hb-buffer-verify.cc (13216B)
1 /* 2 * Copyright © 2022 Behdad Esfahbod 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #include "hb.hh" 28 29 #ifndef HB_NO_BUFFER_VERIFY 30 31 #include "hb-buffer.hh" 32 33 34 #define BUFFER_VERIFY_ERROR "buffer verify error: " 35 static inline void 36 buffer_verify_error (hb_buffer_t *buffer, 37 hb_font_t *font, 38 const char *fmt, 39 ...) HB_PRINTF_FUNC(3, 4); 40 41 static inline void 42 buffer_verify_error (hb_buffer_t *buffer, 43 hb_font_t *font, 44 const char *fmt, 45 ...) 46 { 47 va_list ap; 48 va_start (ap, fmt); 49 if (buffer->messaging ()) 50 { 51 buffer->message_impl (font, fmt, ap); 52 } 53 else 54 { 55 fprintf (stderr, "harfbuzz "); 56 vfprintf (stderr, fmt, ap); 57 fprintf (stderr, "\n"); 58 } 59 va_end (ap); 60 } 61 62 static bool 63 buffer_verify_monotone (hb_buffer_t *buffer, 64 hb_font_t *font) 65 { 66 if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level)) 67 { 68 /* Cannot perform this check without monotone clusters. */ 69 return true; 70 } 71 72 bool is_forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); 73 74 unsigned int num_glyphs; 75 hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); 76 77 for (unsigned int i = 1; i < num_glyphs; i++) 78 if (info[i-1].cluster != info[i].cluster && 79 (info[i-1].cluster < info[i].cluster) != is_forward) 80 { 81 buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "clusters are not monotone."); 82 return false; 83 } 84 85 return true; 86 } 87 88 static bool 89 buffer_verify_unsafe_to_break (hb_buffer_t *buffer, 90 hb_buffer_t *text_buffer, 91 hb_font_t *font, 92 const hb_feature_t *features, 93 unsigned int num_features, 94 const char * const *shapers) 95 { 96 if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level)) 97 { 98 /* Cannot perform this check without monotone clusters. */ 99 return true; 100 } 101 102 /* Check that breaking up shaping at safe-to-break is indeed safe. */ 103 104 hb_buffer_t *fragment = hb_buffer_create_similar (buffer); 105 hb_buffer_set_flags (fragment, (hb_buffer_flags_t (hb_buffer_get_flags (fragment) & ~HB_BUFFER_FLAG_VERIFY))); 106 hb_buffer_t *reconstruction = hb_buffer_create_similar (buffer); 107 hb_buffer_set_flags (reconstruction, (hb_buffer_flags_t (hb_buffer_get_flags (reconstruction) & ~HB_BUFFER_FLAG_VERIFY))); 108 109 unsigned int num_glyphs; 110 hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); 111 112 unsigned int num_chars; 113 hb_glyph_info_t *text = hb_buffer_get_glyph_infos (text_buffer, &num_chars); 114 115 /* Chop text and shape fragments. */ 116 bool forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); 117 unsigned int start = 0; 118 unsigned int text_start = forward ? 0 : num_chars; 119 unsigned int text_end = text_start; 120 for (unsigned int end = 1; end < num_glyphs + 1; end++) 121 { 122 if (end < num_glyphs && 123 (info[end].cluster == info[end-1].cluster || 124 info[end-(forward?0:1)].mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK)) 125 continue; 126 127 /* Shape segment corresponding to glyphs start..end. */ 128 if (end == num_glyphs) 129 { 130 if (forward) 131 text_end = num_chars; 132 else 133 text_start = 0; 134 } 135 else 136 { 137 if (forward) 138 { 139 unsigned int cluster = info[end].cluster; 140 while (text_end < num_chars && text[text_end].cluster < cluster) 141 text_end++; 142 } 143 else 144 { 145 unsigned int cluster = info[end - 1].cluster; 146 while (text_start && text[text_start - 1].cluster >= cluster) 147 text_start--; 148 } 149 } 150 assert (text_start < text_end); 151 152 if (false) 153 printf("start %u end %u text start %u end %u\n", start, end, text_start, text_end); 154 155 hb_buffer_clear_contents (fragment); 156 157 hb_buffer_flags_t flags = hb_buffer_get_flags (fragment); 158 if (0 < text_start) 159 flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT); 160 if (text_end < num_chars) 161 flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_EOT); 162 hb_buffer_set_flags (fragment, flags); 163 164 hb_buffer_append (fragment, text_buffer, text_start, text_end); 165 if (!hb_shape_full (font, fragment, features, num_features, shapers) || 166 fragment->successful) 167 { 168 hb_buffer_destroy (reconstruction); 169 hb_buffer_destroy (fragment); 170 return true; 171 } 172 hb_buffer_append (reconstruction, fragment, 0, -1); 173 174 start = end; 175 if (forward) 176 text_start = text_end; 177 else 178 text_end = text_start; 179 } 180 181 bool ret = true; 182 if (likely (reconstruction->successful)) 183 { 184 hb_buffer_diff_flags_t diff = hb_buffer_diff (reconstruction, buffer, (hb_codepoint_t) -1, 0); 185 if (diff & ~HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH) 186 { 187 buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "unsafe-to-break test failed."); 188 ret = false; 189 190 /* Return the reconstructed result instead so it can be inspected. */ 191 hb_buffer_set_length (buffer, 0); 192 hb_buffer_append (buffer, reconstruction, 0, -1); 193 } 194 } 195 196 hb_buffer_destroy (reconstruction); 197 hb_buffer_destroy (fragment); 198 199 return ret; 200 } 201 202 static bool 203 buffer_verify_unsafe_to_concat (hb_buffer_t *buffer, 204 hb_buffer_t *text_buffer, 205 hb_font_t *font, 206 const hb_feature_t *features, 207 unsigned int num_features, 208 const char * const *shapers) 209 { 210 if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level)) 211 { 212 /* Cannot perform this check without monotone clusters. */ 213 return true; 214 } 215 216 /* Check that shuffling up text before shaping at safe-to-concat points 217 * is indeed safe. */ 218 219 /* This is what we do: 220 * 221 * 1. We shape text once. Then segment the text at all the safe-to-concat 222 * points; 223 * 224 * 2. Then we create two buffers, one containing all the even segments and 225 * one all the odd segments. 226 * 227 * 3. Because all these segments were safe-to-concat at both ends, we 228 * expect that concatenating them and shaping should NOT change the 229 * shaping results of each segment. As such, we expect that after 230 * shaping the two buffers, we still get cluster boundaries at the 231 * segment boundaries, and that those all are safe-to-concat points. 232 * Moreover, that there are NOT any safe-to-concat points within the 233 * segments. 234 * 235 * 4. Finally, we reconstruct the shaping results of the original text by 236 * simply interleaving the shaping results of the segments from the two 237 * buffers, and assert that the total shaping results is the same as 238 * the one from original buffer in step 1. 239 */ 240 241 hb_buffer_t *fragments[2] {hb_buffer_create_similar (buffer), 242 hb_buffer_create_similar (buffer)}; 243 hb_buffer_set_flags (fragments[0], (hb_buffer_flags_t (hb_buffer_get_flags (fragments[0]) & ~HB_BUFFER_FLAG_VERIFY))); 244 hb_buffer_set_flags (fragments[1], (hb_buffer_flags_t (hb_buffer_get_flags (fragments[1]) & ~HB_BUFFER_FLAG_VERIFY))); 245 hb_buffer_t *reconstruction = hb_buffer_create_similar (buffer); 246 hb_buffer_set_flags (reconstruction, (hb_buffer_flags_t (hb_buffer_get_flags (reconstruction) & ~HB_BUFFER_FLAG_VERIFY))); 247 hb_segment_properties_t props; 248 hb_buffer_get_segment_properties (buffer, &props); 249 hb_buffer_set_segment_properties (fragments[0], &props); 250 hb_buffer_set_segment_properties (fragments[1], &props); 251 hb_buffer_set_segment_properties (reconstruction, &props); 252 253 unsigned num_glyphs; 254 hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); 255 256 unsigned num_chars; 257 hb_glyph_info_t *text = hb_buffer_get_glyph_infos (text_buffer, &num_chars); 258 259 bool forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); 260 261 if (!forward) 262 hb_buffer_reverse (buffer); 263 264 /* 265 * Split text into segments and collect into to fragment streams. 266 */ 267 { 268 unsigned fragment_idx = 0; 269 unsigned start = 0; 270 unsigned text_start = 0; 271 unsigned text_end = 0; 272 for (unsigned end = 1; end < num_glyphs + 1; end++) 273 { 274 if (end < num_glyphs && 275 (info[end].cluster == info[end-1].cluster || 276 info[end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) 277 continue; 278 279 /* Accumulate segment corresponding to glyphs start..end. */ 280 if (end == num_glyphs) 281 text_end = num_chars; 282 else 283 { 284 unsigned cluster = info[end].cluster; 285 while (text_end < num_chars && text[text_end].cluster < cluster) 286 text_end++; 287 } 288 assert (text_start < text_end); 289 290 if (false) 291 printf("start %u end %u text start %u end %u\n", start, end, text_start, text_end); 292 293 #if 0 294 hb_buffer_flags_t flags = hb_buffer_get_flags (fragment); 295 if (0 < text_start) 296 flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT); 297 if (text_end < num_chars) 298 flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_EOT); 299 hb_buffer_set_flags (fragment, flags); 300 #endif 301 302 hb_buffer_append (fragments[fragment_idx], text_buffer, text_start, text_end); 303 304 start = end; 305 text_start = text_end; 306 fragment_idx = 1 - fragment_idx; 307 } 308 } 309 310 bool ret = true; 311 hb_buffer_diff_flags_t diff; 312 /* 313 * Shape the two fragment streams. 314 */ 315 if (!hb_shape_full (font, fragments[0], features, num_features, shapers) || 316 !fragments[0]->successful) 317 goto out; 318 319 if (!hb_shape_full (font, fragments[1], features, num_features, shapers) || 320 !fragments[1]->successful) 321 goto out; 322 323 if (!forward) 324 { 325 hb_buffer_reverse (fragments[0]); 326 hb_buffer_reverse (fragments[1]); 327 } 328 329 /* 330 * Reconstruct results. 331 */ 332 { 333 unsigned fragment_idx = 0; 334 unsigned fragment_start[2] {0, 0}; 335 unsigned fragment_num_glyphs[2]; 336 hb_glyph_info_t *fragment_info[2]; 337 for (unsigned i = 0; i < 2; i++) 338 fragment_info[i] = hb_buffer_get_glyph_infos (fragments[i], &fragment_num_glyphs[i]); 339 while (fragment_start[0] < fragment_num_glyphs[0] || 340 fragment_start[1] < fragment_num_glyphs[1]) 341 { 342 unsigned fragment_end = fragment_start[fragment_idx] + 1; 343 while (fragment_end < fragment_num_glyphs[fragment_idx] && 344 (fragment_info[fragment_idx][fragment_end].cluster == fragment_info[fragment_idx][fragment_end - 1].cluster || 345 fragment_info[fragment_idx][fragment_end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) 346 fragment_end++; 347 348 hb_buffer_append (reconstruction, fragments[fragment_idx], fragment_start[fragment_idx], fragment_end); 349 350 fragment_start[fragment_idx] = fragment_end; 351 fragment_idx = 1 - fragment_idx; 352 } 353 } 354 355 if (!forward) 356 { 357 hb_buffer_reverse (buffer); 358 hb_buffer_reverse (reconstruction); 359 } 360 361 if (likely (reconstruction->successful)) 362 { 363 /* 364 * Diff results. 365 */ 366 diff = hb_buffer_diff (reconstruction, buffer, (hb_codepoint_t) -1, 0); 367 if (diff & ~HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH) 368 { 369 buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "unsafe-to-concat test failed."); 370 ret = false; 371 372 /* Return the reconstructed result instead so it can be inspected. */ 373 hb_buffer_set_length (buffer, 0); 374 hb_buffer_append (buffer, reconstruction, 0, -1); 375 } 376 } 377 378 out: 379 hb_buffer_destroy (reconstruction); 380 hb_buffer_destroy (fragments[0]); 381 hb_buffer_destroy (fragments[1]); 382 383 return ret; 384 } 385 386 bool 387 hb_buffer_t::verify (hb_buffer_t *text_buffer, 388 hb_font_t *font, 389 const hb_feature_t *features, 390 unsigned int num_features, 391 const char * const *shapers) 392 { 393 bool ret = true; 394 if (!buffer_verify_monotone (this, font)) 395 ret = false; 396 if (!buffer_verify_unsafe_to_break (this, text_buffer, font, features, num_features, shapers)) 397 ret = false; 398 if ((flags & HB_BUFFER_FLAG_PRODUCE_UNSAFE_TO_CONCAT) != 0 && 399 !buffer_verify_unsafe_to_concat (this, text_buffer, font, features, num_features, shapers)) 400 ret = false; 401 if (!ret) 402 { 403 #ifndef HB_NO_BUFFER_SERIALIZE 404 unsigned len = text_buffer->len; 405 hb_vector_t<char> bytes; 406 if (likely (bytes.resize (len * 10 + 16))) 407 { 408 hb_buffer_serialize_unicode (text_buffer, 409 0, len, 410 bytes.arrayZ, bytes.length, 411 &len, 412 HB_BUFFER_SERIALIZE_FORMAT_TEXT, 413 HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS); 414 buffer_verify_error (this, font, BUFFER_VERIFY_ERROR "text was: %s.", bytes.arrayZ ? bytes.arrayZ : ""); 415 } 416 #endif 417 } 418 return ret; 419 } 420 421 422 #endif