Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ;********************************************
- ;* sqrt32 unrolled
- ;*
- ;* computes integer Sqrt of a 32bit number
- ;********************************************
- ;* by Verz - Jul2019
- ;********************************************
- ;*
- ;* input: square, 32bit source number
- ;* output: sqrt, 16bit value
- ;* rmnd, 17bit value
- ;********************************************
- sqrt32_unrolled
- lda #0
- sta sqrt ; R=0
- sta sqrt+1
- sta M+4 ; aka T+0
- ;sta T+1 ; T+1 = 0 until last iteration; T+0 is always 0
- clc
- ; iterations 1..7
- ldy #6 ; 7 iterations (6-->0)
- _loop1
- ; T =((2*R+D) LSR 1) ASL 16 ; actually: T+2 = R+(D LSR 1)
- ;lda sqrt / ora stablo,y / sta T+2 ; Dlo=0 / sqrt+0=0 during these iterations
- lda sqrt+1
- ora stablo,y ; would be stabhi,y but changed the counter
- sta T+3
- bcs elab1
- lda M+3
- cmp T+3
- bcc nxt1 ; T <= M (branch if T>M)
- ; bne skip06
- ; lda M+2 / sbc sqrt ; T+2 = sqrt =0 during these iterations
- ; bcc skip16
- elab1 ;sec
- ; M=M-T ; T+2 = sqrt+0 =0 during these iterations
- ; lda M+2 / sbc sqrt / sta M+2
- lda M+3
- sbc T+3
- sta M+3
- ; R=R+D ; stablo+1,y = 0 during these iterations
- ; lda sqrt / ora stablo+1,y / sta sqrt
- lda sqrt+1
- ora stablo+1,y ; would be stabhi,y but changed the counter
- sta sqrt+1
- nxt1 asl M ; M=M*2
- rol M+1
- rol M+2
- rol M+3
- dey ; implicit: D=D/2, by the move of .Y
- bpl _loop1
- ; 8th iteration
- _loop8
- ; sqrt+0 = 0, stabhi,y = 0 during this iteration
- ; lda #$80 / ora sqrt / sta T+2 / lda sqrt+1 / sta T+3
- bcs elab8
- lda M+3
- cmp sqrt+1 ; T+3=sqrt+1
- bcc nxt8 ; T <= M (branch if T>M)
- bne elab8
- lda M+2
- sbc #$80 ; T+2=$80
- bcc nxt8
- elab8 ;sec
- lda M+2 ; M=M-T
- sbc #$80 ; T+2=$80
- sta M+2
- lda M+3
- sbc sqrt+1 ; T+3=sqrt+1
- sta M+3
- ; lda sqrt ; R=R+D
- ; ora stablo+1,y ; stablo+1,y =0 during this iteration
- ; sta sqrt
- inc sqrt+1 ; lda sqrt+1 / ora #1 / sta sqrt+1
- nxt8 asl M ; M=M*2
- rol M+1
- rol M+2
- rol M+3
- ; iterations 9..15
- ldy #6 ; 7 iterations (6-->0)
- _loop9
- lda stablo,y
- ora sqrt
- sta T+2
- ; lda sqrt+1 / sta T+3 ; stabhi,y = 0 during these iterations
- bcs elab9
- lda M+3
- cmp sqrt+1 ; T+3=sqrt+1
- bcc nxt9 ; T <= M (branch if T>M)
- bne elab9
- lda M+2
- sbc T+2
- bcc nxt9
- elab9 ;sec
- lda M+2 ; M=M-T
- sbc T+2
- sta M+2
- lda M+3
- sbc sqrt+1 ; T+3=sqrt+1
- sta M+3
- lda sqrt ; R=R+D
- ora stablo+1,y
- sta sqrt
- ; lda sqrt+1 / adc #0 / sta sqrt+1 ; stabhi+1,y =0 during these iterations
- nxt9 ;asl M ; M=M*2 \ M+0 =0 during these iterations
- asl M+1
- rol M+2
- rol M+3
- dey ; implicit: D=D/2, by the move of .Y
- bpl _loop9
- ; 16th and last iteration
- _lastiter ; code for last iteration
- ; during last iteration D=1, so (2*R+D) LSR 1 makes D the MSB of T+1
- bcs elab16
- lda M+3
- cmp sqrt+1 ; T+3 = sqrt+1
- bcc nxt16 ; T <= M branch if T>M
- bne elab16
- lda M+2
- cmp sqrt ; T+2 = sqrt
- bcc nxt16
- bne elab16
- lda M+1
- cmp #$80 ; T+1 = $80
- bcc nxt16
- elab16 ;sec
- lda M+1
- sbc #$80 ; T+1 = $80
- sta M+1
- lda M+2 ; M=M-T
- sbc sqrt
- sta M+2
- lda M+3
- sbc sqrt+1
- sta M+3
- inc sqrt ; R=R+D with D=1
- nxt16 ;asl M ; M=M*2 \ M+0 =0 during this iteration
- asl M+1
- rol M+2
- rol M+3
- rol M+4
- _sqrend rts
- ;**** Variables and Shift table
- ;stabhi byte 0,0,0,0,0,0,0,0
- stablo BYTE $01,$02,$04,$08,$10,$20,$40,$80
- ; byte 0,0,0,0,0,0,0,0
- square = $57 ; 4 bytes: $57-$5a; input value
- sqrt = $60 ; 2 bytes: $60-$61; result
- rmnd = M+2 ; 3 bytes: $59-$5b; in the high bytes of M (M LSR 16)
- T = $5B ; 4 bytes: $5b-$5e; could be 2 bytes: T+0 is always 0; T+1 is 0 until last iteration
- M = square ; 5 bytes: $57-$5b, over the input square and over T+0
Add Comment
Please, Sign In to add comment