WIP Float16 Library for the GBZ80 processor

; 16-bit floats have an 11-bit signed value and a 5-bit signed precision.

; These control codes are required by the standard.
F16_INF    EQU $7BFF
F16_NEGINF EQU $7FFF
F16_NAN    EQU $FFFF

CastFloat16ToFixed48:
; de: f16
; hl: q48 *
; returns to *hl

; Arithmetically right-shift the precision
    ld b, d
    sra b
    sra b
    sra b

; Mask away the precision and extend the sign of the value
    ld a, d
    and $7
    bit 2, a
    jr z, .no_sign_extend
    or $f8
.no_sign_extend
    ld d, a

; Prepare the output buffer.  Fixed, big-endian 48-bit numbers have the binary point 32 bits in.
    ld a, 0
    jr z, .no_output_sign_extend
    dec a
.no_output_sign_extend
; Sign extend
    push hl
    ld [hli], a
    ld [hli], a
; Load the input float
    ld [hl], d
    inc hl
    ld [hl], e
    inc hl
; Fill the rest with 0
    xor a
    ld [hli], a
    ld [hl], a
    pop hl

; If the precision is 0, no shifting is needed
    ld a, b
    and a
    ret z
; Check the sign of the precision
    bit 7, a
    jr z, .precision_positive
; Right-shift to gain the required fixed-point precision
    cpl
    inc a
    ld b, a
    ld c, 6
.neg_prec_outer
    push hl
    push bc
    and a
.neg_prec_inner
    rr [hl]
    inc hl
    dec c
    jr nz, .neg_prec_inner
    pop bc
    pop hl
    dec b
    jr nz, .neg_prec_outer
    ret

.precision_positive
; Left-shift to gain the required fixed-point magnitude
    push hl
    ld de, 5
    add hl, de
    ld c, 6
.pos_prec_outer
    push hl
    push bc
    and a
.pos_prec_inner
    rl [hl]
    dec hl
    dec c
    jr nz, .pos_prec_inner
    pop bc
    pop hl
    dec b
    jr nz, .pos_prec_outer
    pop hl
    ret

CastFixed48ToFloat16:
; hl: q48 *
; returns f16 de
    bit 7, [hl]
    push af ; save sign
    jr z, .positive
    ; take absolute value
    push hl
    ld c, 6
.cpl_loop
    ld a, [hl]
    cpl
    ld [hli], a
    dec c
    jr nz, .cpl_loop
    dec hl
    inc [hl]
    pop hl
.positive
    ; find most significant position
    ld b, 16
.most_significant_position_loop
    ld c, 8
    ld a, [hli]
.most_significant_position_loop_inner
    add a
    jr c, .got_most_significant_position
    dec b
    dec c
    jr nz, .most_significant_position_loop
    jr .most_significant_position_loop

.got_most_significant_position
    push bc
    ; calculate the shift
    ld a, b
    bit 7, a
    jr z, .shift_left
    cpl
    inc a
    ld b, a
    cp 16
    jr c, .shift_right
    ; can't fit in an f16, return 0
    ld de, 0
    add sp, 4
    ret

.shift_left
    cp 11
    jr c, .okay_shift_left
    ; can't fit in an f16, return infinity
    ld de, F16_INF
    pop af
    pop af
    ret z
    set 2, d
    ret

.okay_shift_left
    ld de, 5
    add hl, de
    inc e
    ld c, e
.shift_left_loop_outer
    push bc
    and a
.shift_left_loop_inner
    rl [hl]
    dec hl
    dec c
    jr nz, .shift_left_loop_inner
    pop bc
    add hl, de
    dec b
    jr nz, .shift_left_loop_outer
    inc hl
    jr .pack

.shift_right
    ld de, -6
    ld c, 6
.shift_right_loop_outer
    push bc
    and a
.shift_right_loop_inner
    rr [hl]
    inc hl
    dec c
    jr nz, .shift_right_loop_inner
    pop bc
    add hl, de
    dec b
    jr nz, .shift_right_loop_outer
.pack
    pop bc
    sla b
    sla b
    sla b
    inc hl
    inc hl
    ld a, [hli]
    ld e, [hl]
    and $3
    ld d, a
    pop af
    jr z, .no_cpl
    ld a, d
    cpl
    and $7
    ld d, a
    ld a, e
    cpl
    inc a
    ld e, a
.no_cpl
    ld a, d
    or b
    ld d, a
    ret