Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- var_x = scrap ; 4 bytes
- var_y = scrap+4 ; 4 bytes
- var_z0= scrap+4 ; 4 bytes, overwrites var_y
- var_z2= scrap+8 ; 4 bytes
- mul32:
- ;;uses karatsuba multiplication
- ;;var_x * var_y
- ;;z0 holds the 64-bit result
- ;;708cc+6a+13b+42c +3mul
- ;;Avg: 2464.110153
- ;;Max:2839cc, 92cc faster
- ;;Min:2178cc (early can make it faster, though), 167cc faster
- ld de,(var_x) ;\
- ld bc,(var_y) ; |compute z0,z2
- push bc ; | var_y
- call mul16 ; |
- ld (var_z0),hl ; |
- ld bc,(var_y+2) ; |
- ld (var_z0+2),de; |
- ld de,(var_x+2) ; |
- push bc ; | var_y+2
- call mul16 ; |
- ld (var_z2),hl ; |
- ld (var_z2+2),de;/ 208cc
- xor a ;\
- ld hl,(var_x) ; |
- ld de,(var_x+2) ; |
- add hl,de ; |
- rra ; |
- pop de ; |
- ex (sp),hl ; |
- add hl,de ; |
- pop bc ; |
- ex de,hl ; | 109cc
- push de ; |if bit0=1, add DE<<16 to result
- push bc ; |
- push af ; |c flag means add BC<<16 to result
- call mul16 ; |
- ex de,hl ; |
- pop af ; |
- pop bc ; |
- jr nc,$+3 ; | 86+6a
- add hl,bc ; |
- pop bc ; |
- rla ; |
- jr nc,$+4 ; | 26+13b
- add hl,bc ; |
- adc a,0 ; |z1 = AHLDE-z2-z1
- ex de,hl \ ld bc,(var_z0) \ sbc hl,bc
- ex de,hl \ ld bc,(var_z0+2) \ sbc hl,bc
- sbc a,0
- ex de,hl \ ld bc,(var_z2) \ sbc hl,bc
- ex de,hl \ ld bc,(var_z2+2) \ sbc hl,bc
- sbc a,0 ; |z1 = AHLDE
- ld b,h \ ld c,l ;/ z1 = ABCDE
- ld hl,(var_z0+2);\
- add hl,de ; |Add:
- ld (var_z0+2),hl; |z2z0
- ld hl,(var_z2) ; | z1
- adc hl,bc ; |----
- ld (var_z2),hl ; |
- ret nc ; | 279+42c
- ld hl,(var_z2+2); |
- inc hl ; |
- ld (var_z2+2),hl; |
- ret ;/
- mul16:
- ;;DE*BC => DEHL
- ;;Speed:
- ;; 108
- ;; +(196-7b[0]+10b)*d
- ;; +(200-7f[0]+10f)*e
- ;; +a(9+17c)
- ;;min: 490cc
- ;;max: 690cc
- ;;average: 575.2034cc
- ;;early exit can be as early as 108cc
- ;;
- ;; d=0 if B=0
- ;; e=0 if C=0
- ;; b[0] = bit 0,b
- ;; f[0] = bit 0,c
- ;; a = 0 or 1, for overflow
- ;; c = 0 or 1, for overflow
- ;; b = sum of bits of B
- ;; f = sum of bits of C
- ;;speed:
- ;;average: 575.2034cc
- ;; b[0]= .5
- ;; f[0]= .5
- ;; d = 255/256
- ;; e = 255/256
- ;; a = 1/256
- ;; c = 1/256
- ;; b = 4
- ;; f = 4
- ld a,b
- ld hl,0 \ ld b,h \ or a \ jr z,$+50
- rla \ jr nc,$+5 \ ld h,d \ ld l,e
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- ld b,a
- ;;
- push hl
- ld a,c
- ld hl,0 \ ld c,h \ or a \ jr z,$+49
- rla \ jr nc,$+5 \ ld h,d \ ld l,e
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- pop de
- add a,h \ ld h,a
- ret nc \ inc e
- ret nz \ inc d
- ret
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement