Advertisement
Skytrias

trie odin

Oct 26th, 2022
1,316
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Go 6.32 KB | None | 0 0
  1. package art
  2.  
  3. import "core:os"
  4. import "core:mem"
  5. import "core:math/bits"
  6. import "core:fmt"
  7.  
  8. ALPHABET_SIZE :: 26
  9.  
  10. /*
  11.     CTrie (104B)
  12.         uses smallest index possible to reduce size constraints
  13.         data allocated in array -> easy to free/clear/delete
  14.         should be the same speed of the default Trie
  15. */
  16.  
  17. // treating 0 as nil as 0 is the root
  18. CTrie :: struct {
  19.     array: [ALPHABET_SIZE]u32,
  20. }
  21.  
  22. ctrie_data: [dynamic]CTrie
  23.  
  24. ctrie_init :: proc(cap: int) {
  25.     ctrie_data = make([dynamic]CTrie, 0, cap)
  26.     append(&ctrie_data, CTrie {})
  27. }
  28.  
  29. ctrie_destroy :: proc() {
  30.     delete(ctrie_data)
  31. }
  32.  
  33. // push a node to the ctrie array and return its index
  34. ctrie_push :: proc() -> u32 {
  35.     append(&ctrie_data, CTrie {})
  36.     return u32(len(ctrie_data) - 1)
  37. }
  38.  
  39. // simple helper
  40. ctrie_get :: #force_inline proc(index: u32) -> ^CTrie {
  41.     return &ctrie_data[index]
  42. }
  43.  
  44. // get the root of the ctrie tree
  45. ctrie_root :: #force_inline proc() -> ^CTrie {
  46.     return &ctrie_data[0]
  47. }
  48.  
  49. // insert a key
  50. ctrie_insert :: proc(key: string) {
  51.     t := ctrie_root()
  52.  
  53.     for b in key {
  54.         idx := b - 'a'
  55.  
  56.         if t.array[idx] == 0 {
  57.             t.array[idx] = ctrie_push()
  58.         }
  59.  
  60.         t = ctrie_get(t.array[idx])
  61.     }  
  62. }
  63.  
  64. // print the ctrie tree
  65. ctrie_print :: proc() {
  66.     depth: int
  67.     ctrie_print_recursive(ctrie_root(), &depth, 0)
  68. }
  69.  
  70. // print the ctrie tree recursively by nodes
  71. ctrie_print_recursive :: proc(t: ^CTrie, depth: ^int, b: u8) {
  72.     if depth^ != 0 {
  73.         for i in 0..<depth^ - 1 {
  74.             fmt.eprint('\t')
  75.         }
  76.         codepoint := rune(b + 'a')
  77.         fmt.eprint(codepoint, '\n')
  78.     }
  79.  
  80.     for i in 0..<ALPHABET_SIZE {
  81.         if t.array[i] != 0 {
  82.             depth^ += 1
  83.             ctrie_print_recursive(ctrie_get(t.array[i]), depth, u8(i))
  84.             depth^ -= 1
  85.         }
  86.     }
  87. }
  88.  
  89. // DEBUG print the ctrie tree size
  90. ctrie_print_size :: proc() {
  91.     size := len(ctrie_data) * size_of(CTrie)
  92.     fmt.eprintf("SIZE in %dB %dKB %dMB for CTrie\n", size, size / 1024, size / 1024 / 1024)
  93. }
  94.  
  95. /*
  96.     Compressed trie in flat memory
  97.         uses the least amount of memory possible while still being accessible
  98.         bitfield with 32 bits describing 26 wanted bits (a-z)
  99. */
  100.  
  101. // dynamic byte array containing flexible compressed trie data
  102. comp: []byte
  103. comp_index: int
  104.  
  105. // init the comp to some cap
  106. comp_init :: proc(cap := mem.Megabyte) {
  107.     comp = make([]byte, cap)
  108. }
  109.  
  110. // destroy the comp data
  111. comp_destroy :: proc() {
  112.     delete(comp)   
  113. }
  114.  
  115. // push u32 alphabet bits data
  116. comp_push_bits :: proc() -> (res: ^u32) {
  117.     old := comp_index
  118.     res = cast(^u32) &comp[old]
  119.     comp_index += size_of(u32)
  120.     return
  121. }
  122.  
  123. // push trie children nodes as indexes
  124. comp_push_data :: proc(count: int) -> (res: []u32) {
  125.     old := comp_index
  126.     res = mem.slice_ptr(cast(^u32) &comp[old], count)
  127.     comp_index += size_of(u32) * count
  128.     return
  129. }
  130.  
  131. // push a ctrie bitfield and its dynamic data
  132. comp_push_ctrie :: proc(t: ^CTrie, previous_data: ^u32) {
  133.     if previous_data != nil {
  134.         previous_data^ = u32(comp_index)
  135.     }
  136.  
  137.     alphabet_bits := comp_push_bits()
  138.  
  139.     for i in 0..<ALPHABET_SIZE {
  140.         if t.array[i] != 0 {
  141.             alphabet_bits^ = bits.bitfield_insert_u32(alphabet_bits^, 1, uint(i), 1)
  142.         }
  143.     }
  144.  
  145.     if alphabet_bits^ != 0 {
  146.         ones := bits.count_ones(alphabet_bits^)
  147.         data := comp_push_data(int(ones))
  148.         index: int
  149.  
  150.         // TODO optimize to store actual used characters to iterate?
  151.         for i in 0..<ALPHABET_SIZE {
  152.             if t.array[i] != 0 {
  153.                 comp_push_ctrie(ctrie_get(t.array[i]), &data[index])
  154.                 index += 1
  155.             }
  156.         }
  157.     }
  158. }
  159.  
  160. // DEBUG print the trie tree
  161. comp_print :: proc() {
  162.     assert(comp_index != 0)
  163.     depth: int
  164.     comp_print_recursive(cast(^u32) &comp[0], &depth)
  165. }
  166.  
  167. // DEBUG print the trie tree recursively
  168. comp_print_recursive :: proc(alphabet_bits: ^u32, depth: ^int) {
  169.     b := alphabet_bits^
  170.  
  171.     if b == 0 {
  172.         return
  173.     }
  174.  
  175.     index: int
  176.     mask: u32
  177.     for i in 0..<u32(ALPHABET_SIZE) {
  178.         mask = (1 << i)
  179.        
  180.         // search for matching bits
  181.         if b & mask == mask {
  182.             depth^ += 1
  183.             for i in 0..<depth^ - 1 {
  184.                 fmt.eprint('\t')
  185.             }
  186.             codepoint := rune(i + 'a')
  187.             fmt.eprint(codepoint, '\n')
  188.  
  189.             next := mem.ptr_offset(alphabet_bits, index + 1)
  190.             comp_print_recursive(cast(^u32) &comp[next^], depth)
  191.             depth^ -= 1
  192.             index += 1
  193.         }
  194.     }  
  195. }
  196.  
  197. // comp_bits_contains_byte :: proc(bits: u32, b: u8) -> bool {
  198. //  mask := u32(1 << u32(b - 'a'))
  199. //  return bits & mask == mask
  200. // }
  201.  
  202. // true when the idx exists in the bitfield
  203. comp_bits_contains_index :: proc(bits: u32, idx: u32) -> bool {
  204.     mask := u32(1 << idx)
  205.     return bits & mask == mask
  206. }
  207.  
  208. // print logical size of the compressed trie
  209. comp_print_size :: proc() {
  210.     size := comp_index
  211.     fmt.eprintf("SIZE in %dB %dKB %dMB for compressed\n", size, size / 1024, size / 1024 / 1024)
  212. }
  213.  
  214. // converts alphabetic byte index into the remapped bitfield space
  215. comp_bits_index_to_counted_one :: proc(bits: u32, idx: u32) -> (res: u32, ok: bool) {
  216.     mask: u32
  217.     for i in 0..<u32(ALPHABET_SIZE) {
  218.         mask = u32(1 << i)
  219.        
  220.         if bits & mask == mask {
  221.             if idx == i {
  222.                 ok = true
  223.                 return
  224.             }
  225.  
  226.             res += 1
  227.         }
  228.     }
  229.  
  230.     return
  231. }
  232.  
  233. // prints used characters in the bitset
  234. comp_print_characters :: proc(bits: u32) {
  235.     if bits == 0 {
  236.         fmt.eprintln("EMPTY BITS")
  237.         return
  238.     }
  239.  
  240.     for i in 0..<ALPHABET_SIZE {
  241.         if comp_bits_contains_index(bits, u32(i)) {
  242.             fmt.eprint(rune(i + 'a'), ' ')
  243.         }
  244.     }
  245.  
  246.     fmt.eprintln()
  247. }
  248.  
  249. // TODO escape on utf8 byte?
  250. // lowercase valid alpha
  251. ascii_check_lower :: proc(b: u8) -> u8 {
  252.     if 'A' <= b && b <= 'Z' {
  253.         return b + 32
  254.     } else {
  255.         return b
  256.     }
  257. }
  258.  
  259. // wether the byte is a letter
  260. ascii_is_letter :: #force_inline proc(b: u8) -> bool {
  261.     return 'a' <= b && b <= 'z'
  262. }
  263.  
  264. // searches the compressed trie for the wanted word
  265. comp_search :: proc(key: string) -> bool {
  266.     alphabet_bits := cast(^u32) &comp[0]
  267.  
  268.     for i in 0..<len(key) {
  269.         b := ascii_check_lower(key[i])
  270.  
  271.         if ascii_is_letter(b) {
  272.             idx := b - 'a'
  273.             // comp_print_characters(alphabet_bits^)
  274.            
  275.             if res, ok := comp_bits_index_to_counted_one(alphabet_bits^, u32(idx)); ok {
  276.                 next := mem.ptr_offset(alphabet_bits, res + 1)
  277.                 alphabet_bits = cast(^u32) &comp[next^]
  278.             } else {
  279.                 return false
  280.             }
  281.         } else {
  282.             return false
  283.         }
  284.     }
  285.  
  286.     return true
  287. }
  288.  
  289. comp_write_to_file :: proc(path: string) -> bool {
  290.     return os.write_entire_file(path, comp[:])
  291. }
  292.  
  293. comp_read_from_file :: proc(path: string) {
  294.     content, ok := os.read_entire_file(path)
  295.  
  296.     if ok {
  297.         comp = content
  298.         comp_index = len(content)
  299.     }
  300. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement