hashy.lua (4105B)
1 -- HASHY McHASHFACE 2 3 local M = {} 4 _G.d = M 5 6 local function setdefault(table, key) 7 local val = table[key] 8 if val == nil then 9 val = {} 10 table[key] = val 11 end 12 return val 13 end 14 15 function M.build_pos_hash(strings) 16 local len_buckets = {} 17 local maxlen = 0 18 for _, s in ipairs(strings) do 19 table.insert(setdefault(len_buckets, #s), s) 20 if #s > maxlen then 21 maxlen = #s 22 end 23 end 24 25 local len_pos_buckets = {} 26 local worst_buck_size = 0 27 28 for len = 1, maxlen do 29 local strs = len_buckets[len] 30 if strs then 31 -- the best position so far generates `best_bucket` 32 -- with `minsize` worst case collisions 33 local bestpos, minsize, best_bucket = nil, #strs * 2, nil 34 for pos = 1, len do 35 local try_bucket = {} 36 for _, str in ipairs(strs) do 37 local poschar = string.sub(str, pos, pos) 38 table.insert(setdefault(try_bucket, poschar), str) 39 end 40 local maxsize = 1 41 for _, pos_strs in pairs(try_bucket) do 42 maxsize = math.max(maxsize, #pos_strs) 43 end 44 if maxsize < minsize then 45 bestpos = pos 46 minsize = maxsize 47 best_bucket = try_bucket 48 end 49 end 50 len_pos_buckets[len] = { bestpos, best_bucket } 51 worst_buck_size = math.max(worst_buck_size, minsize) 52 end 53 end 54 return len_pos_buckets, maxlen, worst_buck_size 55 end 56 57 function M.switcher(put, tab, maxlen, worst_buck_size, icase) 58 local neworder = {} --- @type string[] 59 put ' switch (len) {\n' 60 local bucky = worst_buck_size > 1 61 for len = 1, maxlen do 62 local vals = tab[len] 63 if vals then 64 put(' case ' .. len .. ': ') 65 local pos, posbuck = unpack(vals) 66 local keys = vim.tbl_keys(posbuck) 67 if #keys > 1 then 68 table.sort(keys) 69 put('switch (str[' .. (pos - 1) .. ']) {\n') 70 for _, c in ipairs(keys) do 71 local buck = posbuck[c] 72 local startidx = #neworder 73 vim.list_extend(neworder, buck) 74 local endidx = #neworder 75 if icase and c:upper() ~= c:lower() then 76 put((" case '%s': case '%s': "):format(c:upper(), c:lower())) 77 else 78 put((" case '%s': "):format(c)) 79 end 80 if len == 1 then 81 put('return ' .. startidx .. ';\n') 82 else 83 put('low = ' .. startidx .. '; ') 84 if bucky then 85 put('high = ' .. endidx .. '; ') 86 end 87 put 'break;\n' 88 end 89 end 90 put ' default: break;\n' 91 put ' }\n ' 92 else 93 local startidx = #neworder 94 table.insert(neworder, posbuck[keys[1]][1]) 95 local endidx = #neworder 96 put('low = ' .. startidx .. '; ') 97 if bucky then 98 put('high = ' .. endidx .. '; ') 99 end 100 end 101 put 'break;\n' 102 end 103 end 104 put ' default: break;\n' 105 put ' }\n' 106 return neworder 107 end 108 109 --- @param icase? boolean generate a case-insensitive hash function. 110 --- `strings` must not have mixed case when using this. 111 function M.hashy_hash(name, strings, access, icase) 112 local stats = {} 113 local put = function(str) 114 table.insert(stats, str) 115 end 116 local len_pos_buckets, maxlen, worst_buck_size = M.build_pos_hash(strings) 117 put('int ' .. name .. '_hash(const char *str, size_t len)\n{\n') 118 if maxlen == 1 then 119 put('\n') -- nothing 120 elseif worst_buck_size > 1 then 121 put(' int low = 0, high = 0;\n') 122 else 123 put(' int low = -1;\n') 124 end 125 local neworder = M.switcher(put, len_pos_buckets, maxlen, worst_buck_size, icase) 126 if maxlen == 1 then 127 put([[ 128 return -1; 129 ]]) 130 elseif worst_buck_size > 1 then 131 put(([[ 132 for (int i = low; i < high; i++) { 133 if (!%s(str, %s, len)) { 134 return i; 135 } 136 } 137 return -1; 138 ]]):format(icase and 'vim_strnicmp_asc' or 'memcmp', access('i'))) 139 else 140 put(([[ 141 if (low < 0 || %s(str, %s, len)) { 142 return -1; 143 } 144 return low; 145 ]]):format(icase and 'vim_strnicmp_asc' or 'memcmp', access('low'))) 146 end 147 put '}\n\n' 148 return neworder, table.concat(stats) 149 end 150 151 return M