Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdbool.h>
- #include <stdint.h>
- typedef uint16_t FP16;
- uint16_t fp16_cast(unsigned int x) {
- // Define bit-masks for FP16 components
- const uint32_t FP16_SIGN_MASK = 0x8000;
- const uint32_t FP16_EXP_MASK = 0x7C00;
- const uint32_t FP16_FRAC_MASK = 0x03FF;
- // Constants for exponent adjustment
- const int FP32_TO_FP16_SHIFT = 13; // Shift amount for converting FP32 to FP16
- const int FP32_EXPONENT_BIAS = 127; // Bias of exponent in FP32
- const int FP16_EXPONENT_BIAS = 15; // Bias of exponent in FP16
- // Extract FP32 components
- uint32_t sign = (x & 0x80000000) >> 16; // Shift the sign bit to the right position for FP16
- int exponent = ((x >> 23) & 0xFF) - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS; // Adjust exponent
- uint32_t fraction = (x & 0x007FFFFF) >> (23 - 10); // Shift the mantissa bits to the right position for FP16
- // Handle special cases for exponent
- if (exponent <= 0) {
- // Denormalized number or zero
- return (uint16_t)(sign | (fraction >> -exponent));
- } else if (exponent >= 0x1F) {
- // Overflow, set to infinity
- return (uint16_t)(sign | FP16_EXP_MASK);
- }
- // Combine the components into FP16 format
- return (uint16_t)(sign | (exponent << 10) | fraction);
- }
- // The above function assumes that the input is a 32-bit representation of a floating-point number.
- // If the input is actually an integer that needs to be converted to floating-point, additional steps are needed.
- #include <assert.h>
- #include <stdint.h>
- uint16_t fp16_cast(unsigned);
- uint16_t fp16_mul2(uint16_t);
- uint16_t fp16_div2(uint16_t);
- uint16_t fp16_neg(uint16_t);
- uint16_t fp16_add(uint16_t, uint16_t);
- int fp16_cmp(uint16_t, uint16_t);
- int main() {
- uint16_t x = fp16_cast(1);
- assert(x == 0b0011110000000000);
- uint16_t y = fp16_cast(2);
- assert(y == 0b0100000000000000);
- // uint64_t z = cast_fp16_to_fixed(y);
- // assert(z == 2);
- assert(fp16_div2(y) == x);
- assert(fp16_mul2(x) == y);
- assert(fp16_cmp(x, y) == -1);
- assert(fp16_cmp(y, x) == 1);
- assert(fp16_cmp(x, x) == 0);
- assert(fp16_cmp(fp16_neg(x), fp16_neg(y)) == 1);
- assert(fp16_cmp(fp16_neg(y), fp16_neg(x)) == -1);
- assert(fp16_cmp(0, fp16_neg(0)) == 0);
- uint16_t three = fp16_add(x, y);
- assert(three == 0b0100001000000000);
- uint16_t large = fp16_cast((1 << 16) - (1 << 4) - 1);
- uint16_t inf = fp16_mul2(large);
- assert(inf == 0b0111110000000000);
- assert(fp16_mul2(inf) == inf);
- assert(fp16_div2(inf) == inf);
- assert(fp16_cmp(large, inf) == -1);
- assert(fp16_cmp(fp16_neg(inf), large) == -1);
- assert(fp16_add(inf, fp16_neg(inf)) == fp16_add(fp16_neg(inf), inf));
- assert(fp16_add(fp16_neg(large), three) == fp16_neg(large));
- assert(fp16_add(large, fp16_cast(15)) == large);
- assert(fp16_add(large, fp16_cast(16)) == inf);
- uint16_t small = 0b0000000000000001;
- assert(fp16_cmp(small, small) == 0);
- assert(fp16_cmp(small, large) == -1);
- assert(fp16_cmp(large, small) == 1);
- assert(fp16_div2(small) == 0);
- assert(fp16_add(fp16_neg(small), x) == x);
- assert(fp16_mul2(small) == small << 1);
- uint16_t smallish = small << 9;
- assert(fp16_mul2(smallish) == 0b000001 << 10);
- assert(fp16_div2(fp16_mul2(smallish)) == smallish);
- assert(fp16_div2(smallish) == smallish >> 1);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement