byteSize-of-string.js (19002B)
1 // |jit-test| skip-if: !getBuildConfiguration("moz-memory"); --setpref=objectfuse_for_global=false 2 // 3 // Run this test only if we're using jemalloc. Other malloc implementations 4 // exhibit surprising behaviors. For example, 32-bit Fedora builds have 5 // non-deterministic allocation sizes. 6 // 7 // Don't use object fuses for this test because atomizing global constants 8 // affects what we're testing. 9 10 // Check JS::ubi::Node::size results for strings. 11 12 // We actually hard-code specific sizes into this test, even though they're 13 // implementation details, because in practice there are only two architecture 14 // variants to consider (32-bit and 64-bit), and if these sizes change, that's 15 // something SpiderMonkey hackers really want to know; they're supposed to be 16 // stable. 17 18 gczeal(0); // Need to control when tenuring happens 19 gcparam('semispaceNurseryEnabled', 0); 20 21 // When values change, it's nice to see *all* of the failures, rather than 22 // stopping at the first. 23 var checkFailures = 0; 24 function checkEq(expect, receive) { 25 if (expect === receive) { 26 return; 27 } 28 29 const e = new Error(); 30 const [_, line] = e.stack.match(/[^\n]*\n[^\n]*?tests\/([^\n]*:\d+):\d+\n/); 31 printErr(`TEST-UNEXPECTED-FAIL | ${line} | Error: Assertion failed. Got ${receive}, expected ${expect}`); 32 checkFailures++; 33 } 34 35 // Hack to skip this test if strings are not allocated in the nursery. 36 { 37 const sample_nursery = "x" + "abc".substr(1); 38 let nursery_enabled = true; 39 const before = byteSize(sample_nursery); 40 gc(); 41 const after = byteSize(sample_nursery); 42 if (before == after) 43 nursery_enabled = false; 44 if (!nursery_enabled) { 45 printErr("nursery strings appear to be disabled"); 46 quit(0); 47 } 48 } 49 50 // Ion eager runs much of this code in Ion, and Ion nursery-allocates more 51 // aggressively than other modes. 52 if (getJitCompilerOptions()["ion.warmup.trigger"] <= 100) 53 setJitCompilerOption("ion.warmup.trigger", 100); 54 55 if (getBuildConfiguration("pointer-byte-size") == 4) 56 var s = (s32, s64) => s32 57 else 58 var s = (s32, s64) => s64 59 60 // Convert an input string, which is probably an atom because it's a literal in 61 // the source text, to a nursery-allocated string with the same contents. Note 62 // that the string's characters may be allocated in the nursery. 63 function copyString(str) { 64 if (str.length == 0) 65 return str; // Nothing we can do here 66 return ensureLinearString(str.substr(0, 1) + str.substr(1)); 67 } 68 69 // Return the nursery byte size of |str|. 70 function nByteSize(str) { 71 // Strings that appear in the source will always be atomized and therefore 72 // will never be in the nursery. 73 return byteSize(copyString(str)); 74 } 75 76 // Return the tenured byte size of |str|. 77 function tByteSize(str) { 78 // Strings that appear in the source will always be atomized and therefore 79 // will never be in the nursery. But we'll make them get tenured instead of 80 // using the atom. 81 str = copyString(str); 82 minorgc(); 83 return byteSize(str); 84 } 85 86 // If a dependent string uses a small percentage of its base's characters, then 87 // it will be cloned if nothing else is keeping the base alive. This introduces 88 // a tracing order dependency, and so both possibilities must be allowed. 89 function s_ifDependent(str, depSize, clonedSize) { 90 // Resolve 32/64 bit width. 91 depSize = s(...depSize); 92 clonedSize = s(...clonedSize); 93 94 if (this.stringRepresentation) { 95 if (JSON.parse(stringRepresentation(str)).flags.includes("DEPENDENT_BIT")) { 96 return depSize; 97 } else { 98 return clonedSize; 99 } 100 } else { 101 // If it matches one of the options, then expect that size. 102 const size = byteSize(str); 103 if (size == depSize) { 104 return depSize; 105 } else { 106 return clonedSize; 107 } 108 } 109 } 110 111 // There are four representations of linear strings, with the following 112 // capacities: 113 // 114 // 32-bit 64-bit test 115 // representation Latin-1 char16_t Latin-1 char16_t label 116 // ======================================================================== 117 // JSExternalString - limited by MaxStringLength - E 118 // JSThinInlineString 8 4 16 8 T 119 // JSFatInlineString 24 12 24 12 F 120 // ThinInlineAtom 12 6 20 10 T 121 // FatInlineAtom 20 10 20 10 F 122 // JSExtensibleString - limited by MaxStringLength - X 123 124 // Notes: 125 // - labels are suffixed with A for atoms and N for non-atoms 126 // - atoms store a 4 byte hash code, and some add to the size to adjust 127 // - Nursery-allocated strings require a header that stores the zone. 128 129 // Expected sizes based on type of string 130 const m32 = (getBuildConfiguration("pointer-byte-size") == 4); 131 const TA = m32 ? 24 : 32; // ThinInlineAtom (includes a hash value) 132 const FA = m32 ? 32 : 32; // FatInlineAtom (includes a hash value) 133 const NA = m32 ? 24 : 32; // NormalAtom 134 const TN = m32 ? 16 : 24; // ThinInlineString 135 const FN = m32 ? 32 : 32; // FatInlineString 136 const LN = m32 ? 16 : 24; // LinearString, has additional storage 137 const XN = m32 ? 16 : 24; // ExtensibleString, has additional storage 138 const RN = m32 ? 16 : 24; // Rope 139 const DN = m32 ? 16 : 24; // DependentString 140 const EN = m32 ? 16 : 24; // ExternalString 141 142 // A function that pads out a tenured size to the nursery size. We store a zone 143 // pointer in the nursery just before the string (4 bytes on 32-bit, 8 bytes on 144 // 64-bit), and the string struct itself must be 8-byte aligned (resulting in 145 // +4 bytes on 32-bit, +0 bytes on 64-bit). The end result is that nursery 146 // strings are 8 bytes larger. 147 const Nursery = m32 ? s => s + 4 + 4 : s => s + 8 + 0; 148 149 // Latin-1 150 checkEq(tByteSize(""), s(TA, TA)); 151 checkEq(tByteSize("1"), s(TA, TA)); 152 checkEq(tByteSize("1234567"), s(TN, TN)); 153 checkEq(tByteSize("12345678"), s(TN, TN)); 154 checkEq(tByteSize("123456789"), s(FN, TN)); 155 checkEq(tByteSize("123456789.12345"), s(FN, TN)); 156 checkEq(tByteSize("123456789.123456"), s(FN, TN)); 157 checkEq(tByteSize("123456789.1234567"), s(FN, FN)); 158 checkEq(tByteSize("123456789.123456789.123"), s(FN, FN)); 159 checkEq(tByteSize("123456789.123456789.1234"), s(FN, FN)); 160 checkEq(tByteSize("123456789.123456789.12345"), s(XN+32, XN+32)); 161 checkEq(tByteSize("123456789.123456789.123456789.1"), s(XN+32, XN+32)); 162 checkEq(tByteSize("123456789.123456789.123456789.12"), s(XN+32, XN+32)); 163 checkEq(tByteSize("123456789.123456789.123456789.123"), s(XN+64, XN+64)); 164 165 checkEq(nByteSize(""), s(TA, TA)); 166 checkEq(nByteSize("1"), s(TA, TA)); 167 checkEq(nByteSize("1234567"), s(Nursery(TN), Nursery(TN))); 168 checkEq(nByteSize("12345678"), s(Nursery(TN), Nursery(TN))); 169 checkEq(nByteSize("123456789"), s(Nursery(FN), Nursery(TN))); 170 checkEq(nByteSize("123456789.12345"), s(Nursery(FN), Nursery(TN))); 171 checkEq(nByteSize("123456789.123456"), s(Nursery(FN), Nursery(TN))); 172 checkEq(nByteSize("123456789.1234567"), s(Nursery(FN), Nursery(FN))); 173 checkEq(nByteSize("123456789.123456789.123"), s(Nursery(FN), Nursery(FN))); 174 checkEq(nByteSize("123456789.123456789.1234"), s(Nursery(FN), Nursery(FN))); 175 checkEq(nByteSize("123456789.123456789.12345"), s(Nursery(XN), Nursery(XN))); 176 checkEq(nByteSize("123456789.123456789.123456789.1"), s(Nursery(XN), Nursery(XN))); 177 checkEq(nByteSize("123456789.123456789.123456789.12"), s(Nursery(XN), Nursery(XN))); 178 checkEq(nByteSize("123456789.123456789.123456789.123"), s(Nursery(XN), Nursery(XN))); 179 180 function Atom(s) { return Object.keys({ [s]: true })[0]; } 181 checkEq(byteSize(Atom("1234567")), s(TA, TA)); 182 checkEq(byteSize(Atom("12345678")), s(TA, FA)); 183 checkEq(byteSize(Atom("123456789.12")), s(TA, FA)); 184 checkEq(byteSize(Atom("123456789.123")), s(FA, FA)); 185 checkEq(byteSize(Atom("123456789.12345")), s(FA, FA)); 186 checkEq(byteSize(Atom("123456789.123456")), s(FA, FA)); 187 checkEq(byteSize(Atom("123456789.1234567")), s(FA, FA)); 188 checkEq(byteSize(Atom("123456789.123456789.")), s(FA, FA)); 189 checkEq(byteSize(Atom("123456789.123456789.1")), s(NA+32, NA+32)); 190 checkEq(byteSize(Atom("123456789.123456789.123")), s(NA+32, NA+32)); 191 checkEq(byteSize(Atom("123456789.123456789.1234")), s(NA+32, NA+32)); 192 checkEq(byteSize(Atom("123456789.123456789.12345")), s(NA+32, NA+32)); 193 checkEq(byteSize(Atom("123456789.123456789.123456789.1")), s(NA+32, NA+32)); 194 checkEq(byteSize(Atom("123456789.123456789.123456789.12")), s(NA+32, NA+32)); 195 checkEq(byteSize(Atom("123456789.123456789.123456789.123")), s(NA+48, NA+48)); 196 197 // Inline char16_t atoms. 198 // "Impassionate gods have never seen the red that is the Tatsuta River." 199 // - Ariwara no Narihira 200 checkEq(tByteSize("千"), s(TA, TA)); 201 checkEq(tByteSize("千早"), s(TN, TN)); 202 checkEq(tByteSize("千早ぶ"), s(TN, TN)); 203 checkEq(tByteSize("千早ぶる"), s(TN, TN)); 204 checkEq(tByteSize("千早ぶる神"), s(FN, TN)); 205 checkEq(tByteSize("千早ぶる神代"), s(FN, TN)); 206 checkEq(tByteSize("千早ぶる神代も"), s(FN, TN)); 207 checkEq(tByteSize("千早ぶる神代もき"), s(FN, TN)); 208 checkEq(tByteSize("千早ぶる神代もきか"), s(FN, FN)); 209 checkEq(tByteSize("千早ぶる神代もきかず龍"), s(FN, FN)); 210 checkEq(tByteSize("千早ぶる神代もきかず龍田"), s(FN, FN)); 211 checkEq(tByteSize("千早ぶる神代もきかず龍田川"), s(XN+32, XN+32)); 212 checkEq(tByteSize("千早ぶる神代もきかず龍田川 か"), s(XN+32, XN+32)); 213 checkEq(tByteSize("千早ぶる神代もきかず龍田川 から"), s(XN+32, XN+32)); 214 checkEq(tByteSize("千早ぶる神代もきかず龍田川 からく"), s(XN+64, XN+64)); 215 checkEq(tByteSize("千早ぶる神代もきかず龍田川 からくれなゐに水く"), s(XN+64, XN+64)); 216 checkEq(tByteSize("千早ぶる神代もきかず龍田川 からくれなゐに水くく"), s(XN+64, XN+64)); 217 checkEq(tByteSize("千早ぶる神代もきかず龍田川 からくれなゐに水くくるとは"), s(XN+64, XN+64)); 218 219 checkEq(nByteSize("千"), s(TA, TA)); 220 checkEq(nByteSize("千早"), s(Nursery(TN), Nursery(TN))); 221 checkEq(nByteSize("千早ぶ"), s(Nursery(TN), Nursery(TN))); 222 checkEq(nByteSize("千早ぶる"), s(Nursery(TN), Nursery(TN))); 223 checkEq(nByteSize("千早ぶる神"), s(Nursery(FN), Nursery(TN))); 224 checkEq(nByteSize("千早ぶる神代"), s(Nursery(FN), Nursery(TN))); 225 checkEq(nByteSize("千早ぶる神代も"), s(Nursery(FN), Nursery(TN))); 226 checkEq(nByteSize("千早ぶる神代もき"), s(Nursery(FN), Nursery(TN))); 227 checkEq(nByteSize("千早ぶる神代もきか"), s(Nursery(FN), Nursery(FN))); 228 checkEq(nByteSize("千早ぶる神代もきかず龍"), s(Nursery(FN), Nursery(FN))); 229 checkEq(nByteSize("千早ぶる神代もきかず龍田"), s(Nursery(FN), Nursery(FN))); 230 checkEq(nByteSize("千早ぶる神代もきかず龍田川"), s(Nursery(XN), Nursery(XN))); 231 checkEq(nByteSize("千早ぶる神代もきかず龍田川 か"), s(Nursery(XN), Nursery(XN))); 232 checkEq(nByteSize("千早ぶる神代もきかず龍田川 から"), s(Nursery(XN), Nursery(XN))); 233 checkEq(nByteSize("千早ぶる神代もきかず龍田川 からく"), s(Nursery(XN), Nursery(XN))); 234 checkEq(nByteSize("千早ぶる神代もきかず龍田川 からくれなゐに水く"), s(Nursery(XN), Nursery(XN))); 235 checkEq(nByteSize("千早ぶる神代もきかず龍田川 からくれなゐに水くく"), s(Nursery(XN), Nursery(XN))); 236 checkEq(nByteSize("千早ぶる神代もきかず龍田川 からくれなゐに水くくるとは"), s(Nursery(XN), Nursery(XN))); 237 238 // A Latin-1 rope. This changes size when flattened. 239 // "In a village of La Mancha, the name of which I have no desire to call to mind" 240 // - Miguel de Cervantes, Don Quixote 241 var fragment8 = "En un lugar de la Mancha, de cuyo nombre no quiero acordarme"; // 60 characters 242 var rope8 = fragment8; 243 for (var i = 0; i < 10; i++) // 1024 repetitions 244 rope8 = rope8 + rope8; 245 246 checkEq(byteSize(rope8), s(Nursery(RN), Nursery(RN))); 247 minorgc(); 248 checkEq(byteSize(rope8), s(RN, RN)); 249 var matches8 = rope8.match(/(de cuyo nombre no quiero acordarme)/); 250 checkEq(byteSize(rope8), s(XN + 64 * 1024, XN + 64 * 1024)); 251 var ext8 = rope8; // Stop calling it what it's not (though it'll change again soon.) 252 253 // Test extensible strings. 254 // 255 // Appending another copy of the fragment should yield another rope. 256 // 257 // Flattening that should turn the original rope into a dependent string, and 258 // yield a new linear string, of the same size as the original. 259 var rope8a = ext8 + fragment8; 260 checkEq(byteSize(rope8a), s(Nursery(RN), Nursery(RN))); 261 rope8a.match(/x/, function() { checkEq(true, false); }); 262 checkEq(byteSize(rope8a), s(Nursery(XN) + 65536, Nursery(XN) + 65536)); 263 checkEq(byteSize(ext8), s(DN, DN)); 264 265 // Latin-1 dependent strings in the nursery. 266 checkEq(byteSize(ext8.substr(1000, 2000)), s(Nursery(DN), Nursery(DN))); 267 checkEq(byteSize(matches8[0]), s(Nursery(DN), Nursery(DN))); 268 checkEq(byteSize(matches8[1]), s(Nursery(DN), Nursery(DN))); 269 270 // Tenure everything and do it again. 271 ext8 = copyString(ext8); 272 rope8a = ext8 + fragment8; 273 minorgc(); 274 checkEq(byteSize(rope8a), s(RN, RN)); 275 rope8a.match(/x/, function() { checkEq(true, false); }); 276 checkEq(byteSize(rope8a), s(XN + 65536, XN + 65536)); 277 checkEq(byteSize(rope8), s(RN, RN)); 278 279 // Latin-1 tenured dependent strings. 280 function tenure(s) { 281 minorgc(); 282 return s; 283 } 284 var sub = tenure(rope8.substr(1000, 2000)); 285 checkEq(byteSize(sub), s_ifDependent(sub, [DN, DN], [LN+2048, LN+2048])); 286 checkEq(byteSize(matches8[0]), s_ifDependent(matches8[0], [DN, DN], [LN+48, LN+48])); 287 checkEq(byteSize(matches8[1]), s_ifDependent(matches8[0], [DN, DN], [LN+48, LN+48])); 288 289 // A char16_t rope. This changes size when flattened. 290 // "From the Heliconian Muses let us begin to sing" 291 // --- Hesiod, Theogony 292 var fragment16 = "μουσάων Ἑλικωνιάδων ἀρχώμεθ᾽ ἀείδειν"; 293 var rope16 = fragment16; 294 for (var i = 0; i < 10; i++) // 1024 repetitions 295 rope16 = rope16 + rope16; 296 checkEq(byteSize(rope16), s(Nursery(RN), Nursery(RN))); 297 let matches16 = rope16.match(/(Ἑλικωνιάδων ἀρχώμεθ᾽)/); 298 checkEq(byteSize(rope16), s(Nursery(XN) + 128 * 1024, Nursery(XN) + 128 * 1024)); 299 var ext16 = rope16; 300 301 // char16_t dependent strings in the nursery. 302 checkEq(byteSize(ext16.substr(1000, 2000)), s(Nursery(DN), Nursery(DN))); 303 checkEq(byteSize(matches16[0]), s(Nursery(DN), Nursery(DN))); 304 checkEq(byteSize(matches16[1]), s(Nursery(DN), Nursery(DN))); 305 306 // Test extensible strings. 307 // 308 // Appending another copy of the fragment should yield another rope. 309 // 310 // Flattening that should turn the original rope into a dependent string, and 311 // yield a new linear string, of the some size as the original. 312 rope16a = ext16 + fragment16; 313 checkEq(byteSize(rope16a), s(Nursery(RN), Nursery(RN))); 314 rope16a.match(/x/, function() { checkEq(true, false); }); 315 checkEq(byteSize(rope16a), s(Nursery(XN) + 128 * 1024, Nursery(XN) + 128 * 1024)); 316 checkEq(byteSize(ext16), s(Nursery(DN), Nursery(DN))); 317 318 // Tenure everything and try again. This time it should steal the extensible 319 // characters and convert the root into an extensible string using them. 320 ext16 = copyString(ext16); 321 rope16a = ext16 + fragment16; 322 minorgc(); 323 finishBackgroundFree(); 324 checkEq(byteSize(rope16a), s(RN, RN)); 325 rope16a.match(/x/, function() { checkEq(true, false); }); 326 checkEq(byteSize(rope16a), s(XN + 128 * 1024, XN + 128 * 1024)); 327 checkEq(byteSize(ext16), s(RN, RN)); 328 329 // Test external strings. 330 // 331 // We only support char16_t external strings and external strings are never 332 // allocated in the nursery. If this ever changes, please add tests for the new 333 // cases. Also note that on Windows mozmalloc's smallest allocation size is 334 // two words compared to one word on other platforms. 335 checkEq(byteSize(newString("", {external: true})), s(EN+16, EN+16)); 336 checkEq(byteSize(newString("1", {external: true})), s(EN+16, EN+16)); 337 checkEq(byteSize(newString("12", {external: true})), s(EN+16, EN+16)); 338 checkEq(byteSize(newString("123", {external: true})), s(EN+16, EN+16)); 339 checkEq(byteSize(newString("1234", {external: true})), s(EN+16, EN+16)); 340 checkEq(byteSize(newString("12345", {external: true})), s(EN+16, EN+16)); 341 checkEq(byteSize(newString("123456789.123456789.1234", {external: true})), s(EN+48, EN+48)); 342 checkEq(byteSize(newString("123456789.123456789.12345", {external: true})), s(EN+64, EN+64)); 343 344 // Nursery-allocated chars. 345 // 346 // byteSize will not include the space used by the nursery for the chars. 347 checkEq(byteSize(newString("123456789.123456789.12345")), s(Nursery(XN)+0,Nursery(XN)+0)); 348 checkEq(byteSize(newString("123456789.123456789.123456789.123")), s(Nursery(XN)+0,Nursery(XN)+0)); 349 350 assertEq(`${checkFailures} failure(s)`, "0 failure(s)");