hb-sanitize.hh (13759B)
1 /* 2 * Copyright © 2007,2008,2009,2010 Red Hat, Inc. 3 * Copyright © 2012,2018 Google, Inc. 4 * 5 * This is part of HarfBuzz, a text shaping library. 6 * 7 * Permission is hereby granted, without written agreement and without 8 * license or royalty fees, to use, copy, modify, and distribute this 9 * software and its documentation for any purpose, provided that the 10 * above copyright notice and the following two paragraphs appear in 11 * all copies of this software. 12 * 13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 17 * DAMAGE. 18 * 19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 24 * 25 * Red Hat Author(s): Behdad Esfahbod 26 * Google Author(s): Behdad Esfahbod 27 */ 28 29 #ifndef HB_SANITIZE_HH 30 #define HB_SANITIZE_HH 31 32 #include "hb.hh" 33 #include "hb-blob.hh" 34 #include "hb-dispatch.hh" 35 36 37 /* 38 * Sanitize 39 * 40 * 41 * === Introduction === 42 * 43 * The sanitize machinery is at the core of our zero-cost font loading. We 44 * mmap() font file into memory and create a blob out of it. Font subtables 45 * are returned as a readonly sub-blob of the main font blob. These table 46 * blobs are then sanitized before use, to ensure invalid memory access does 47 * not happen. The toplevel sanitize API use is like, eg. to load the 'head' 48 * table: 49 * 50 * hb_blob_t *head_blob = hb_sanitize_context_t ().reference_table<OT::head> (face); 51 * 52 * The blob then can be converted to a head table struct with: 53 * 54 * const head *head_table = head_blob->as<head> (); 55 * 56 * What the reference_table does is, to call hb_face_reference_table() to load 57 * the table blob, sanitize it and return either the sanitized blob, or empty 58 * blob if sanitization failed. The blob->as() function returns the null 59 * object of its template type argument if the blob is empty. Otherwise, it 60 * just casts the blob contents to the desired type. 61 * 62 * Sanitizing a blob of data with a type T works as follows (with minor 63 * simplification): 64 * 65 * - Cast blob content to T*, call sanitize() method of it, 66 * - If sanitize succeeded, return blob. 67 * - Return empty blob otherwise. 68 * 69 * 70 * === The sanitize() contract === 71 * 72 * The sanitize() method of each object type shall return `true` if it's safe to 73 * call other methods of the object, and `false` otherwise. 74 * 75 * Note that what sanitize() checks for might align with what the specification 76 * describes as valid table data, but does not have to be. In particular, we 77 * do NOT want to be pedantic and concern ourselves with validity checks that 78 * are irrelevant to our use of the table. On the contrary, we want to be 79 * lenient with error handling and accept invalid data to the extent that it 80 * does not impose extra burden on us. 81 * 82 * Based on the sanitize contract, one can see that what we check for depends 83 * on how we use the data in other table methods. Ie. if other table methods 84 * assume that offsets do NOT point out of the table data block, then that's 85 * something sanitize() must check for (GSUB/GPOS/GDEF/etc work this way). On 86 * the other hand, if other methods do such checks themselves, then sanitize() 87 * does not have to bother with them (glyf/local work this way). The choice 88 * depends on the table structure and sanitize() performance. For example, to 89 * check glyf/loca offsets in sanitize() would cost O(num-glyphs). We try hard 90 * to avoid such costs during font loading. By postponing such checks to the 91 * actual glyph loading, we reduce the sanitize cost to O(1) and total runtime 92 * cost to O(used-glyphs). As such, this is preferred. 93 * 94 * The same argument can be made re GSUB/GPOS/GDEF, but there, the table 95 * structure is so complicated that by checking all offsets at sanitize() time, 96 * we make the code much simpler in other methods, as offsets and referenced 97 * objects do not need to be validated at each use site. 98 * 99 * Note: 100 * Sanitize was named so because it used to try to recover from errors by 101 * modifying the data to make it valid. This is no longer the case, as it 102 * could make HarfBuzz hallucinate new rules if there was aliasing in the 103 * data. However, the name stuck. See: https://behdad.github.io/harfbust/ 104 */ 105 106 /* This limits sanitizing time on really broken fonts. */ 107 #ifndef HB_SANITIZE_MAX_EDITS 108 #define HB_SANITIZE_MAX_EDITS 32 109 #endif 110 #ifndef HB_SANITIZE_MAX_OPS_FACTOR 111 #define HB_SANITIZE_MAX_OPS_FACTOR 64 112 #endif 113 #ifndef HB_SANITIZE_MAX_OPS_MIN 114 #define HB_SANITIZE_MAX_OPS_MIN 16384 115 #endif 116 #ifndef HB_SANITIZE_MAX_OPS_MAX 117 #define HB_SANITIZE_MAX_OPS_MAX 0x3FFFFFFF 118 #endif 119 #ifndef HB_SANITIZE_MAX_SUBTABLES 120 #define HB_SANITIZE_MAX_SUBTABLES 0x4000 121 #endif 122 123 struct hb_sanitize_context_t : 124 hb_dispatch_context_t<hb_sanitize_context_t, bool, HB_DEBUG_SANITIZE> 125 { 126 hb_sanitize_context_t (const char *start_ = nullptr, const char *end_ = nullptr) : 127 start (start_), end (end_), 128 length (0), 129 max_ops (0), max_subtables (0), 130 recursion_depth (0), 131 writable (false), 132 blob (nullptr), 133 num_glyphs (65536), 134 num_glyphs_set (false), 135 lazy_some_gpos (false) {} 136 137 const char *get_name () { return "SANITIZE"; } 138 template <typename T, typename F> 139 bool may_dispatch (const T *obj HB_UNUSED, const F *format) 140 { 141 return format->sanitize (this) && 142 hb_barrier (); 143 } 144 static return_t default_return_value () { return true; } 145 static return_t no_dispatch_return_value () { return false; } 146 bool stop_sublookup_iteration (const return_t r) const { return !r; } 147 148 bool visit_subtables (unsigned count) 149 { 150 max_subtables += count; 151 return max_subtables < HB_SANITIZE_MAX_SUBTABLES; 152 } 153 154 private: 155 template <typename T, typename ...Ts> auto 156 _dispatch (const T &obj, hb_priority<1>, Ts&&... ds) HB_AUTO_RETURN 157 ( obj.sanitize (this, std::forward<Ts> (ds)...) ) 158 template <typename T, typename ...Ts> auto 159 _dispatch (const T &obj, hb_priority<0>, Ts&&... ds) HB_AUTO_RETURN 160 ( obj.dispatch (this, std::forward<Ts> (ds)...) ) 161 public: 162 template <typename T, typename ...Ts> auto 163 dispatch (const T &obj, Ts&&... ds) HB_AUTO_RETURN 164 ( _dispatch (obj, hb_prioritize, std::forward<Ts> (ds)...) ) 165 166 hb_sanitize_context_t (hb_blob_t *b) : hb_sanitize_context_t () 167 { 168 init (b); 169 170 if (blob) 171 start_processing (); 172 } 173 174 ~hb_sanitize_context_t () 175 { 176 if (blob) 177 end_processing (); 178 } 179 180 void init (hb_blob_t *b) 181 { 182 this->blob = hb_blob_reference (b); 183 this->writable = false; 184 } 185 186 void set_num_glyphs (unsigned int num_glyphs_) 187 { 188 num_glyphs = num_glyphs_; 189 num_glyphs_set = true; 190 } 191 unsigned int get_num_glyphs () { return num_glyphs; } 192 193 void set_max_ops (int max_ops_) { max_ops = max_ops_; } 194 195 template <typename T> 196 void set_object (const T *obj) 197 { 198 reset_object (); 199 200 if (!obj) return; 201 202 const char *obj_start = (const char *) obj; 203 if (unlikely (obj_start < this->start || this->end <= obj_start)) 204 { 205 this->start = this->end = nullptr; 206 this->length = 0; 207 } 208 else 209 { 210 this->start = obj_start; 211 this->end = obj_start + hb_min (size_t (this->end - obj_start), obj->get_size ()); 212 this->length = this->end - this->start; 213 } 214 } 215 216 void reset_object () 217 { 218 if (this->blob) 219 { 220 this->start = this->blob->data; 221 this->end = this->start + this->blob->length; 222 } 223 this->length = this->end - this->start; 224 assert (this->start <= this->end); /* Must not overflow. */ 225 } 226 227 void start_processing (const char *start_ = nullptr, const char *end_ = nullptr) 228 { 229 if (start_) 230 { 231 this->start = start_; 232 this->end = end_; 233 } 234 reset_object (); 235 unsigned m; 236 if (unlikely (hb_unsigned_mul_overflows (this->end - this->start, HB_SANITIZE_MAX_OPS_FACTOR, &m))) 237 this->max_ops = HB_SANITIZE_MAX_OPS_MAX; 238 else 239 this->max_ops = hb_clamp (m, 240 (unsigned) HB_SANITIZE_MAX_OPS_MIN, 241 (unsigned) HB_SANITIZE_MAX_OPS_MAX); 242 this->debug_depth = 0; 243 this->recursion_depth = 0; 244 245 DEBUG_MSG_LEVEL (SANITIZE, start, 0, +1, 246 "start [%p..%p] (%lu bytes)", 247 this->start, this->end, 248 (unsigned long) (this->end - this->start)); 249 } 250 251 void end_processing () 252 { 253 DEBUG_MSG_LEVEL (SANITIZE, this->start, 0, -1, 254 "end [%p..%p]", 255 this->start, this->end); 256 257 hb_blob_destroy (this->blob); 258 this->blob = nullptr; 259 this->start = this->end = nullptr; 260 this->length = 0; 261 } 262 263 bool check_ops(unsigned count) 264 { 265 /* Avoid underflow */ 266 if (unlikely (this->max_ops < 0 || count >= (unsigned) this->max_ops)) 267 { 268 this->max_ops = -1; 269 return false; 270 } 271 this->max_ops -= (int) count; 272 return true; 273 } 274 275 #ifndef HB_OPTIMIZE_SIZE 276 HB_ALWAYS_INLINE 277 #endif 278 bool check_range (const void *base, 279 unsigned int len) const 280 { 281 const char *p = (const char *) base; 282 bool ok = (uintptr_t) (p - this->start) <= this->length && 283 (unsigned int) (this->end - p) >= len && 284 ((this->max_ops -= len) > 0); 285 286 DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0, 287 "check_range [%p..%p]" 288 " (%u bytes) in [%p..%p] -> %s", 289 p, p + len, len, 290 this->start, this->end, 291 ok ? "OK" : "OUT-OF-RANGE"); 292 293 return likely (ok); 294 } 295 #ifndef HB_OPTIMIZE_SIZE 296 HB_ALWAYS_INLINE 297 #endif 298 bool check_range_fast (const void *base, 299 unsigned int len) const 300 { 301 const char *p = (const char *) base; 302 bool ok = ((uintptr_t) (p - this->start) <= this->length && 303 (unsigned int) (this->end - p) >= len); 304 305 DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0, 306 "check_range_fast [%p..%p]" 307 " (%u bytes) in [%p..%p] -> %s", 308 p, p + len, len, 309 this->start, this->end, 310 ok ? "OK" : "OUT-OF-RANGE"); 311 312 return likely (ok); 313 } 314 315 #ifndef HB_OPTIMIZE_SIZE 316 HB_ALWAYS_INLINE 317 #endif 318 bool check_point (const void *base) const 319 { 320 const char *p = (const char *) base; 321 bool ok = (uintptr_t) (p - this->start) <= this->length; 322 323 DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0, 324 "check_point [%p]" 325 " in [%p..%p] -> %s", 326 p, 327 this->start, this->end, 328 ok ? "OK" : "OUT-OF-RANGE"); 329 330 return likely (ok); 331 } 332 333 template <typename T> 334 bool check_range (const T *base, 335 unsigned int a, 336 unsigned int b) const 337 { 338 unsigned m; 339 return !hb_unsigned_mul_overflows (a, b, &m) && 340 this->check_range (base, m); 341 } 342 343 template <typename T> 344 bool check_range (const T *base, 345 unsigned int a, 346 unsigned int b, 347 unsigned int c) const 348 { 349 unsigned m; 350 return !hb_unsigned_mul_overflows (a, b, &m) && 351 this->check_range (base, m, c); 352 } 353 354 template <typename T> 355 HB_ALWAYS_INLINE 356 bool check_array_sized (const T *base, unsigned int len, unsigned len_size) const 357 { 358 if (len_size >= 4) 359 { 360 if (unlikely (hb_unsigned_mul_overflows (len, hb_static_size (T), &len))) 361 return false; 362 } 363 else 364 len = len * hb_static_size (T); 365 return this->check_range (base, len); 366 } 367 368 template <typename T> 369 bool check_array (const T *base, unsigned int len) const 370 { 371 return this->check_range (base, len, hb_static_size (T)); 372 } 373 374 template <typename T> 375 bool check_array (const T *base, 376 unsigned int a, 377 unsigned int b) const 378 { 379 return this->check_range (base, hb_static_size (T), a, b); 380 } 381 382 bool check_start_recursion (int max_depth) 383 { 384 if (unlikely (recursion_depth >= max_depth)) return false; 385 return ++recursion_depth; 386 } 387 388 bool end_recursion (bool result) 389 { 390 recursion_depth--; 391 return result; 392 } 393 394 template <typename Type> 395 #ifndef HB_OPTIMIZE_SIZE 396 HB_ALWAYS_INLINE 397 #endif 398 bool check_struct (const Type *obj) const 399 { 400 if (sizeof (uintptr_t) == sizeof (uint32_t)) 401 return likely (this->check_range_fast (obj, obj->min_size)); 402 else 403 return likely (this->check_point ((const char *) obj + obj->min_size)); 404 } 405 406 template <typename Type> 407 hb_blob_t *sanitize_blob (hb_blob_t *blob) 408 { 409 bool sane; 410 411 init (blob); 412 413 DEBUG_MSG_FUNC (SANITIZE, start, "start"); 414 415 start_processing (); 416 417 if (unlikely (!start)) 418 { 419 end_processing (); 420 return blob; 421 } 422 423 Type *t = reinterpret_cast<Type *> (const_cast<char *> (start)); 424 425 sane = t->sanitize (this); 426 427 end_processing (); 428 429 DEBUG_MSG_FUNC (SANITIZE, start, sane ? "PASSED" : "FAILED"); 430 if (sane) 431 { 432 hb_blob_make_immutable (blob); 433 return blob; 434 } 435 else 436 { 437 hb_blob_destroy (blob); 438 return hb_blob_get_empty (); 439 } 440 } 441 442 template <typename Type> 443 hb_blob_t *reference_table (const hb_face_t *face, hb_tag_t tableTag = Type::tableTag) 444 { 445 if (!num_glyphs_set) 446 set_num_glyphs (hb_face_get_glyph_count (face)); 447 return sanitize_blob<Type> (hb_face_reference_table (face, tableTag)); 448 } 449 450 const char *start, *end; 451 unsigned length; 452 mutable int max_ops, max_subtables; 453 private: 454 int recursion_depth; 455 bool writable; 456 hb_blob_t *blob; 457 unsigned int num_glyphs; 458 bool num_glyphs_set; 459 public: 460 bool lazy_some_gpos; 461 }; 462 463 struct hb_sanitize_with_object_t 464 { 465 template <typename T> 466 hb_sanitize_with_object_t (hb_sanitize_context_t *c, const T& obj) : c (c) 467 { c->set_object (obj); } 468 ~hb_sanitize_with_object_t () 469 { c->reset_object (); } 470 471 private: 472 hb_sanitize_context_t *c; 473 }; 474 475 476 #endif /* HB_SANITIZE_HH */