Advertisement
pasholnahuy

Untitled

Dec 15th, 2023
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.37 KB | None | 0 0
  1. #include <stdbool.h>
  2. #include <stdint.h>
  3.  
  4. typedef uint16_t FP16;
  5.  
  6. uint16_t fp16_cast(unsigned int x) {
  7. // Define bit-masks for FP16 components
  8. const uint32_t FP16_SIGN_MASK = 0x8000;
  9. const uint32_t FP16_EXP_MASK = 0x7C00;
  10. const uint32_t FP16_FRAC_MASK = 0x03FF;
  11.  
  12. // Constants for exponent adjustment
  13. const int FP32_TO_FP16_SHIFT = 13; // Shift amount for converting FP32 to FP16
  14. const int FP32_EXPONENT_BIAS = 127; // Bias of exponent in FP32
  15. const int FP16_EXPONENT_BIAS = 15; // Bias of exponent in FP16
  16.  
  17. // Extract FP32 components
  18. uint32_t sign = (x & 0x80000000) >> 16; // Shift the sign bit to the right position for FP16
  19. int exponent = ((x >> 23) & 0xFF) - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS; // Adjust exponent
  20. uint32_t fraction = (x & 0x007FFFFF) >> (23 - 10); // Shift the mantissa bits to the right position for FP16
  21.  
  22. // Handle special cases for exponent
  23. if (exponent <= 0) {
  24. // Denormalized number or zero
  25. return (uint16_t)(sign | (fraction >> -exponent));
  26. } else if (exponent >= 0x1F) {
  27. // Overflow, set to infinity
  28. return (uint16_t)(sign | FP16_EXP_MASK);
  29. }
  30.  
  31. // Combine the components into FP16 format
  32. return (uint16_t)(sign | (exponent << 10) | fraction);
  33. }
  34.  
  35. // The above function assumes that the input is a 32-bit representation of a floating-point number.
  36. // If the input is actually an integer that needs to be converted to floating-point, additional steps are needed.
  37.  
  38. #include <assert.h>
  39. #include <stdint.h>
  40.  
  41. uint16_t fp16_cast(unsigned);
  42. uint16_t fp16_mul2(uint16_t);
  43. uint16_t fp16_div2(uint16_t);
  44. uint16_t fp16_neg(uint16_t);
  45. uint16_t fp16_add(uint16_t, uint16_t);
  46. int fp16_cmp(uint16_t, uint16_t);
  47.  
  48. int main() {
  49. uint16_t x = fp16_cast(1);
  50. assert(x == 0b0011110000000000);
  51. uint16_t y = fp16_cast(2);
  52. assert(y == 0b0100000000000000);
  53. // uint64_t z = cast_fp16_to_fixed(y);
  54. // assert(z == 2);
  55. assert(fp16_div2(y) == x);
  56. assert(fp16_mul2(x) == y);
  57. assert(fp16_cmp(x, y) == -1);
  58. assert(fp16_cmp(y, x) == 1);
  59. assert(fp16_cmp(x, x) == 0);
  60. assert(fp16_cmp(fp16_neg(x), fp16_neg(y)) == 1);
  61. assert(fp16_cmp(fp16_neg(y), fp16_neg(x)) == -1);
  62. assert(fp16_cmp(0, fp16_neg(0)) == 0);
  63.  
  64. uint16_t three = fp16_add(x, y);
  65. assert(three == 0b0100001000000000);
  66.  
  67. uint16_t large = fp16_cast((1 << 16) - (1 << 4) - 1);
  68. uint16_t inf = fp16_mul2(large);
  69. assert(inf == 0b0111110000000000);
  70. assert(fp16_mul2(inf) == inf);
  71. assert(fp16_div2(inf) == inf);
  72. assert(fp16_cmp(large, inf) == -1);
  73. assert(fp16_cmp(fp16_neg(inf), large) == -1);
  74. assert(fp16_add(inf, fp16_neg(inf)) == fp16_add(fp16_neg(inf), inf));
  75. assert(fp16_add(fp16_neg(large), three) == fp16_neg(large));
  76. assert(fp16_add(large, fp16_cast(15)) == large);
  77. assert(fp16_add(large, fp16_cast(16)) == inf);
  78.  
  79. uint16_t small = 0b0000000000000001;
  80. assert(fp16_cmp(small, small) == 0);
  81. assert(fp16_cmp(small, large) == -1);
  82. assert(fp16_cmp(large, small) == 1);
  83. assert(fp16_div2(small) == 0);
  84. assert(fp16_add(fp16_neg(small), x) == x);
  85. assert(fp16_mul2(small) == small << 1);
  86.  
  87. uint16_t smallish = small << 9;
  88. assert(fp16_mul2(smallish) == 0b000001 << 10);
  89. assert(fp16_div2(fp16_mul2(smallish)) == smallish);
  90. assert(fp16_div2(smallish) == smallish >> 1);
  91. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement