Untitled

#include <stdbool.h>
#include <stdint.h>

typedef uint16_t FP16;

uint16_t fp16_cast(unsigned int x) {
    // Define bit-masks for FP16 components
    const uint32_t FP16_SIGN_MASK = 0x8000;
    const uint32_t FP16_EXP_MASK = 0x7C00;
    const uint32_t FP16_FRAC_MASK = 0x03FF;

    // Constants for exponent adjustment
    const int FP32_TO_FP16_SHIFT = 13; // Shift amount for converting FP32 to FP16
    const int FP32_EXPONENT_BIAS = 127; // Bias of exponent in FP32
    const int FP16_EXPONENT_BIAS = 15;  // Bias of exponent in FP16

    // Extract FP32 components
    uint32_t sign = (x & 0x80000000) >> 16; // Shift the sign bit to the right position for FP16
    int exponent = ((x >> 23) & 0xFF) - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS; // Adjust exponent
    uint32_t fraction = (x & 0x007FFFFF) >> (23 - 10); // Shift the mantissa bits to the right position for FP16

    // Handle special cases for exponent
    if (exponent <= 0) {
        // Denormalized number or zero
        return (uint16_t)(sign | (fraction >> -exponent));
    } else if (exponent >= 0x1F) {
        // Overflow, set to infinity
        return (uint16_t)(sign | FP16_EXP_MASK);
    }

    // Combine the components into FP16 format
    return (uint16_t)(sign | (exponent << 10) | fraction);
}

// The above function assumes that the input is a 32-bit representation of a floating-point number.
// If the input is actually an integer that needs to be converted to floating-point, additional steps are needed.

#include <assert.h>
#include <stdint.h>

uint16_t fp16_cast(unsigned);
uint16_t fp16_mul2(uint16_t);
uint16_t fp16_div2(uint16_t);
uint16_t fp16_neg(uint16_t);
uint16_t fp16_add(uint16_t, uint16_t);
int fp16_cmp(uint16_t, uint16_t);

int main() {
    uint16_t x = fp16_cast(1);
    assert(x == 0b0011110000000000);
    uint16_t y = fp16_cast(2);
    assert(y == 0b0100000000000000);
    // uint64_t z = cast_fp16_to_fixed(y);
    // assert(z == 2);
    assert(fp16_div2(y) == x);
    assert(fp16_mul2(x) == y);
    assert(fp16_cmp(x, y) == -1);
    assert(fp16_cmp(y, x) == 1);
    assert(fp16_cmp(x, x) == 0);
    assert(fp16_cmp(fp16_neg(x), fp16_neg(y)) == 1);
    assert(fp16_cmp(fp16_neg(y), fp16_neg(x)) == -1);
    assert(fp16_cmp(0, fp16_neg(0)) == 0);

    uint16_t three = fp16_add(x, y);
    assert(three == 0b0100001000000000);

    uint16_t large = fp16_cast((1 << 16) - (1 << 4) - 1);
    uint16_t inf = fp16_mul2(large);
    assert(inf == 0b0111110000000000);
    assert(fp16_mul2(inf) == inf);
    assert(fp16_div2(inf) == inf);
    assert(fp16_cmp(large, inf) == -1);
    assert(fp16_cmp(fp16_neg(inf), large) == -1);
    assert(fp16_add(inf, fp16_neg(inf)) == fp16_add(fp16_neg(inf), inf));
    assert(fp16_add(fp16_neg(large), three) == fp16_neg(large));
    assert(fp16_add(large, fp16_cast(15)) == large);
    assert(fp16_add(large, fp16_cast(16)) == inf);

    uint16_t small = 0b0000000000000001;
    assert(fp16_cmp(small, small) == 0);
    assert(fp16_cmp(small, large) == -1);
    assert(fp16_cmp(large, small) == 1);
    assert(fp16_div2(small) == 0);
    assert(fp16_add(fp16_neg(small), x) == x);
    assert(fp16_mul2(small) == small << 1);

    uint16_t smallish = small << 9;
    assert(fp16_mul2(smallish) == 0b000001 << 10);
    assert(fp16_div2(fp16_mul2(smallish)) == smallish);
    assert(fp16_div2(smallish) == smallish >> 1);
}