Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Karatsuba64:
- ;Input: (word64_1),(word64_2)
- ;Output: (outp128)
- ;best =1776+3*best(mul32) =5292cc
- ;worst =2243+3*worst(mul32) =11165cc
- ;avg =2009.5+3*avg(mul32) =10011.8699cc
- ld hl,(word64_1) \ ld (word32_1),hl
- ld hl,(word64_1+2) \ ld (word32_1+2),hl
- ld hl,(word64_2) \ ld (word32_2),hl
- ld hl,(word64_2+2) \ ld (word32_2+2),hl
- call KaratsubaMul32
- ld hl,(outp64) \ ld (outp128),hl
- ld hl,(outp64+2) \ ld (outp128+2),hl
- ld hl,(outp64+4) \ ld (outp128+4),hl
- ld hl,(outp64+6) \ ld (outp128+6),hl
- ld hl,(word64_1+4) \ ld (word32_1),hl
- ld hl,(word64_1+6) \ ld (word32_1+2),hl
- ld hl,(word64_2+4) \ ld (word32_2),hl
- ld hl,(word64_2+6) \ ld (word32_2+2),hl
- call KaratsubaMul32
- ld hl,(outp64) \ ld (outp128+8),hl
- ld hl,(outp64+2) \ ld (outp128+10),hl
- ld hl,(outp64+4) \ ld (outp128+12),hl
- ld hl,(outp64+6) \ ld (outp128+14),hl
- ;512+2*Karatsuba
- xor a
- ld hl,(word64_1)
- ld de,(word64_1+4)
- add hl,de
- ld (word32_1),hl
- ld hl,(word64_1+2)
- ld de,(word64_1+6)
- adc hl,de
- ld (word32_1+2),hl
- rla
- ld hl,(word64_2)
- ld de,(word64_2+4)
- add hl,de
- ld (word32_2),hl
- ld hl,(word64_2+2)
- ld de,(word64_2+6)
- adc hl,de
- ld (word32_2+2),hl
- push af
- call KaratsubaMul32
- ;790+3*Karatsuba
- pop af
- ld c,a
- ld a,0
- jr nc,Addmore_2
- ld a,c
- ld bc,(word64_1)
- ld hl,(word64_1+4)
- add hl,bc
- ex de,hl
- ld bc,(word64_1+2)
- ld hl,(word64_1+6)
- adc hl,bc
- ex de,hl
- ld bc,(outp64+4)
- add hl,bc
- ld (outp64+4),hl
- ex de,hl
- ld bc,(outp64+6)
- adc hl,bc
- ld (outp64+6),hl
- ld c,a
- ld a,0
- adc a,c
- Addmore_2:
- rr c
- jr nc,label_2
- ld bc,(word64_2)
- ld hl,(word64_2+4)
- add hl,bc
- ex de,hl
- ld bc,(word64_2+2)
- ld hl,(word64_2+6)
- adc hl,bc
- ex de,hl
- ld bc,(outp64+4)
- add hl,bc
- ld (outp64+4),hl
- ex de,hl
- ld bc,(outp64+6)
- adc hl,bc
- ld (outp64+6),hl
- label_2:
- ld d,0 \ adc a,d
- ;(outp64) - (outp128) - (outp128+8)
- ld hl,(outp64)
- ld bc,(outp128)
- sbc hl,bc
- ld (outp64),hl
- ld hl,(outp64+2)
- ld bc,(outp128+2)
- sbc hl,bc
- ld (outp64+2),hl
- ld hl,(outp64+4)
- ld bc,(outp128+4)
- sbc hl,bc
- ld (outp64+4),hl
- ld hl,(outp64+6)
- ld bc,(outp128+6)
- sbc hl,bc
- ld (outp64+6),hl
- sbc a,d
- ld hl,(outp64)
- ld bc,(outp128+8)
- sbc hl,bc
- ld (outp64),hl
- ld hl,(outp64+2)
- ld bc,(outp128+10)
- sbc hl,bc
- ld (outp64+2),hl
- ld hl,(outp64+4)
- ld bc,(outp128+12)
- sbc hl,bc
- ld (outp64+4),hl
- ld hl,(outp64+6)
- ld bc,(outp128+14)
- sbc hl,bc
- ld (outp64+6),hl
- sbc a,d
- ;(outp64) + (outp128+4)
- ld hl,(outp64)
- ld bc,(outp128+4)
- add hl,bc
- ld (outp128+4),hl
- ld hl,(outp64+2)
- ld bc,(outp128+6)
- adc hl,bc
- ld (outp128+6),hl
- ld hl,(outp64+4)
- ld bc,(outp128+8)
- adc hl,bc
- ld (outp128+8),hl
- ld hl,(outp64+6)
- ld bc,(outp128+10)
- adc hl,bc
- ld (outp128+10),hl
- ld hl,(outp128+12)
- ld e,a
- adc hl,de
- ld (outp128+12),hl
- ret nc
- ld hl,(outp128+14)
- inc hl
- ld (outp128+14),hl
- ret
- .echo " Karatsuba64:",$-Karatsuba64
- KaratsubaMul32:
- ;Input: (word32_1), (word32_2)
- ;Output: (outp64)
- ;worst: 925+3*worst(Mul) 2974cc
- ;best : 785+3*best(Mul) 1172cc
- ;avg : 855+3*avg(Mul) 2667.4566cc
- ;Previous best optimized: 3666 t-states worst case, 2880 lower bound
- ;has a bug. For example, in pi*e, multiplying the upper 32 bits of each ends in A189, but should be a288
- ld bc,(word32_1)
- ld de,(word32_2)
- call BC_Times_DE
- ld c,h
- ld h,l
- ld l,a
- ld (outp),hl
- ld (outp+2),bc
- ld bc,(word32_1+2)
- ld de,(word32_2+2)
- call BC_Times_DE
- ld c,h
- ld h,l
- ld l,a
- ld (outp+4),hl
- ld (outp+6),bc
- xor a
- ld hl,(word32_1)
- ld bc,(word32_1+2)
- add hl,bc
- rla
- ex de,hl
- ld hl,(word32_2)
- ld bc,(word32_2+2)
- add hl,bc
- ld b,h
- ld c,l
- push af
- call BC_Times_DE
- ld e,h
- ld d,b
- ld h,l
- ld l,a
- ;DEHL
- pop af
- push hl
- ld c,a
- ld a,0
- jr nc,Addmore_1
- ;(ax+b)(cx+d) = acx^2+axd+bcx+bd
- ;c flag is c
- ;
- ;x=2^16
- ;a,c are 0 or 1
- ; If a = 1, add c to A (A is the overflow thing), add (word32_2)+(word32_2+2) to DE
- ; If c = 1, add b to DE
- ld a,c
- ld bc,(word32_1)
- ld hl,(word32_1+2)
- add hl,bc
- add hl,de
- ex de,hl
- ld c,a
- ld a,0
- adc a,c
- Addmore_1:
- rr c
- jr nc,label_1
- ;if bit 7 is set, A =2, else A=0
- ld bc,(word32_2)
- ld hl,(word32_2+2)
- add hl,bc
- add hl,de
- ex de,hl
- adc a,0
- label_1:
- pop hl
- ;ADEHL - (outp) - (outp+4)
- ld bc,(outp)
- ; or a
- sbc hl,bc
- ex de,hl
- ;HLDE
- ld bc,(outp+2)
- sbc hl,bc \ sbc a,0
- ex de,hl
- ;DEHL
- ld bc,(outp+4)
- sbc hl,bc
- ex de,hl
- ;HLDE
- ld bc,(outp+6)
- sbc hl,bc \ sbc a,0
- ex de,hl
- ;DEHL + (outp+2)
- ld bc,(outp+2)
- add hl,bc
- ld (outp+2),hl
- ex de,hl
- ;HLDE
- ld bc,(outp+4)
- ld de,(outp+6)
- adc hl,bc
- ld (outp+4),hl
- ld h,0 \ ld l,a
- adc hl,de
- ld (outp+6),hl
- ret
- .echo " Karatsuba32:",$-KaratsubaMul32
- BCM:
- BC_Times_DE:
- ;BC*DE->BHLA
- ;out: E=0, A,D are destroyed
- ;Assuming B=0, C=0 129cc
- ;Assuming B!=0,C=0 329cc~410cc, avg 373cc-3.5
- ; B=0, C!=0
- ;Assuming B!=0,C!=0 529cc~683cc, avg 609.5cc-3.5cc
- ;Overall average: 79187439/131072=604.15221405029296875cc
- ld a,b
- ld hl,0
- ld b,h
- or a
- jr z,+_
- add a,a \ jr nc,$+5 \ ld h,d \ ld l,e
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,b
- _:
- push hl
- ld h,b
- ld l,b
- ld b,a
- ld a,c
- ld c,b
- or a
- jr z,+_
- add a,a \ jr nc,$+5 \ ld h,d \ ld l,e
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
- add hl,hl \ rla \ jr nc,$+4 \ add hl,de \ adc a,c
- _:
- pop de
- ld c,a
- ld a,l
- ld l,h
- ld h,c
- add hl,de
- ret nc
- inc b
- ret
- .echo " Base Mult :",$-BCM
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement