Untitled

#include <stdbool.h>
#include <stdint.h>

typedef uint16_t FP16;

enum {
    FRAC_MASK = (1 << 10) - 1,
    EXP_MASK = ((1 << 5) - 1) << 10,
    SIGN_MASK = 1 << 15,
    FRAC_WIDTH = 10,
    FIXED_EXP = 24,
    EXP_WIDTH = 5,
    NAN = EXP_MASK + FRAC_MASK,
    CONST24 = 24
};

uint16_t is_nan(FP16 x) {
    if (x & EXP_MASK == EXP_MASK && (x & FRAC_MASK != 0)) {
        return 1;
    }
    return 0;
}

uint16_t is_inf(FP16 x) {
    if (x & EXP_MASK == EXP_MASK && (x & FRAC_MASK == 0)) {
        return 1;
    }
    return 0;
}

int64_t cast_fp16_to_fixed(FP16 x) {
    int64_t res;
    if (x & EXP_MASK) {
        res = (int64_t)(((1 << FRAC_WIDTH) | (x & FRAC_MASK)) << ((x & EXP_MASK) - 1));
    } else {
        res = x & FRAC_MASK;
    }
    if (x & SIGN_MASK) {
        res = -res;
    }
    return res;
}

FP16 cast_fixed_to_fp16(int64_t x) {
    if (x < 0) {
        x = -x;
    }
    if (x <= FRAC_MASK) {
        if (x < 0){
            return -x;
        }
        return x;
    }

    int exp = 1;
    while (x >= (1 << (FRAC_WIDTH + 2))) {
        x = x >> 1;
        ++exp;
    }
    if (x >= (1 << (FRAC_WIDTH + 1))) {
        ++x;
        while (x >= 1 << (FRAC_WIDTH + 1)) {
            x = x >> 1;
            ++exp;
        }
    }
    if (exp >= (1 << EXP_WIDTH) - 1) {
        if (x < 0) {
            return EXP_MASK | SIGN_MASK;
        }
        return EXP_MASK;
    }
    if (x < 0) {
        return SIGN_MASK || (exp << 10);
    }
    return (exp << 10) | (x & FRAC_MASK);
}

uint16_t fp16_mul2(uint16_t x) {
    if (is_nan(x) || is_inf(x)) {
        return x;
    }
    return cast_fixed_to_fp16(cast_fp16_to_fixed(x) << 1);
}

uint16_t fp16_div2(uint16_t x) {
    if (is_nan(x) || is_inf(x)) {
        return x;
    }
    return cast_fixed_to_fp16(cast_fp16_to_fixed(x) >> 1);
}

uint16_t fp16_neg(uint16_t x) {
    if (is_nan(x) || is_inf(x)) {
        return x;
    }
    return cast_fixed_to_fp16(-cast_fp16_to_fixed(x));
}

uint16_t fp16_add(uint16_t x, uint16_t y) {
    if ((is_nan(x) || is_inf(x)) || (is_inf(x) && is_inf(y))) {
            return NAN;
        }
    return cast_fixed_to_fp16(cast_fp16_to_fixed(x) + cast_fp16_to_fixed(x));
}

int fp16_cmp(uint16_t x, uint16_t y) {
    if (is_inf(x) && is_inf(y)) {
        if (x & SIGN_MASK && y & SIGN_MASK ||
            !(x & SIGN_MASK) && !(y & SIGN_MASK)) {
            return 0;
        } else if (x & SIGN_MASK && !(y & SIGN_MASK)) {
            return 1;
        } else if (x & SIGN_MASK && !(y & SIGN_MASK)) {
            return -1;
        }
    } else if (is_inf(x) && !is_inf(y)) {
        if (x & SIGN_MASK) {
            return -1;
        }
        return 1;
    } else if (!is_inf(x) && is_inf(y)) {
        if (y & SIGN_MASK) {
            return 1;
        }
        return -1;
    }
    if (cast_fp16_to_fixed(x) < cast_fp16_to_fixed(y)) {
        return -1;
    }
    if (cast_fp16_to_fixed(x) == cast_fp16_to_fixed(y)) {
        return 0;
    }
    return 1;
}

uint16_t fp16_cast(unsigned int x) {
    return cast_fixed_to_fp16((int64_t)x << CONST24);
}

#include <assert.h>
#include <stdint.h>

uint16_t fp16_cast(unsigned);
uint16_t fp16_mul2(uint16_t);
uint16_t fp16_div2(uint16_t);
uint16_t fp16_neg(uint16_t);
uint16_t fp16_add(uint16_t, uint16_t);
int fp16_cmp(uint16_t, uint16_t);

int main() {
    uint16_t x = fp16_cast(1);
    assert(x == 0b0011110000000000);
    uint16_t y = fp16_cast(2);
    assert(y == 0b0100000000000000);
    // uint64_t z = cast_fp16_to_fixed(y);
    // assert(z == 2);
    assert(fp16_div2(y) == x);
    assert(fp16_mul2(x) == y);
    assert(fp16_cmp(x, y) == -1);
    assert(fp16_cmp(y, x) == 1);
    assert(fp16_cmp(x, x) == 0);
    assert(fp16_cmp(fp16_neg(x), fp16_neg(y)) == 1);
    assert(fp16_cmp(fp16_neg(y), fp16_neg(x)) == -1);
    assert(fp16_cmp(0, fp16_neg(0)) == 0);

    uint16_t three = fp16_add(x, y);
    assert(three == 0b0100001000000000);

    uint16_t large = fp16_cast((1 << 16) - (1 << 4) - 1);
    uint16_t inf = fp16_mul2(large);
    assert(inf == 0b0111110000000000);
    assert(fp16_mul2(inf) == inf);
    assert(fp16_div2(inf) == inf);
    assert(fp16_cmp(large, inf) == -1);
    assert(fp16_cmp(fp16_neg(inf), large) == -1);
    assert(fp16_add(inf, fp16_neg(inf)) == fp16_add(fp16_neg(inf), inf));
    assert(fp16_add(fp16_neg(large), three) == fp16_neg(large));
    assert(fp16_add(large, fp16_cast(15)) == large);
    assert(fp16_add(large, fp16_cast(16)) == inf);

    uint16_t small = 0b0000000000000001;
    assert(fp16_cmp(small, small) == 0);
    assert(fp16_cmp(small, large) == -1);
    assert(fp16_cmp(large, small) == 1);
    assert(fp16_div2(small) == 0);
    assert(fp16_add(fp16_neg(small), x) == x);
    assert(fp16_mul2(small) == small << 1);

    uint16_t smallish = small << 9;
    assert(fp16_mul2(smallish) == 0b000001 << 10);
    assert(fp16_div2(fp16_mul2(smallish)) == smallish);
    assert(fp16_div2(smallish) == smallish >> 1);
}