tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

utf16.h (23910B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 1999-2012, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  utf16.h
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 1999sep09
     16 *   created by: Markus W. Scherer
     17 */
     18 
     19 /**
     20 * \file
     21 * \brief C API: 16-bit Unicode handling macros
     22 * 
     23 * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
     24 *
     25 * For more information see utf.h and the ICU User Guide Strings chapter
     26 * (https://unicode-org.github.io/icu/userguide/strings).
     27 *
     28 * <em>Usage:</em>
     29 * ICU coding guidelines for if() statements should be followed when using these macros.
     30 * Compound statements (curly braces {}) must be used  for if-else-while... 
     31 * bodies and all macro statements should be terminated with semicolon.
     32 */
     33 
     34 #ifndef __UTF16_H__
     35 #define __UTF16_H__
     36 
     37 #include <stdbool.h>
     38 #include "unicode/umachine.h"
     39 #ifndef __UTF_H__
     40 #   include "unicode/utf.h"
     41 #endif
     42 
     43 /* single-code point definitions -------------------------------------------- */
     44 
     45 /**
     46 * Does this code unit alone encode a code point (BMP, not a surrogate)?
     47 * @param c 16-bit code unit
     48 * @return true or false
     49 * @stable ICU 2.4
     50 */
     51 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
     52 
     53 /**
     54 * Is this code unit a lead surrogate (U+d800..U+dbff)?
     55 * @param c 16-bit code unit
     56 * @return true or false
     57 * @stable ICU 2.4
     58 */
     59 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
     60 
     61 /**
     62 * Is this code unit a trail surrogate (U+dc00..U+dfff)?
     63 * @param c 16-bit code unit
     64 * @return true or false
     65 * @stable ICU 2.4
     66 */
     67 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
     68 
     69 /**
     70 * Is this code unit a surrogate (U+d800..U+dfff)?
     71 * @param c 16-bit code unit
     72 * @return true or false
     73 * @stable ICU 2.4
     74 */
     75 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
     76 
     77 /**
     78 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
     79 * is it a lead surrogate?
     80 * @param c 16-bit code unit
     81 * @return true or false
     82 * @stable ICU 2.4
     83 */
     84 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
     85 
     86 /**
     87 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
     88 * is it a trail surrogate?
     89 * @param c 16-bit code unit
     90 * @return true or false
     91 * @stable ICU 4.2
     92 */
     93 #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
     94 
     95 /**
     96 * Helper constant for U16_GET_SUPPLEMENTARY.
     97 * @internal
     98 */
     99 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
    100 
    101 /**
    102 * Get a supplementary code point value (U+10000..U+10ffff)
    103 * from its lead and trail surrogates.
    104 * The result is undefined if the input values are not
    105 * lead and trail surrogates.
    106 *
    107 * @param lead lead surrogate (U+d800..U+dbff)
    108 * @param trail trail surrogate (U+dc00..U+dfff)
    109 * @return supplementary code point (U+10000..U+10ffff)
    110 * @stable ICU 2.4
    111 */
    112 #define U16_GET_SUPPLEMENTARY(lead, trail) \
    113    (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
    114 
    115 
    116 /**
    117 * Get the lead surrogate (0xd800..0xdbff) for a
    118 * supplementary code point (0x10000..0x10ffff).
    119 * @param supplementary 32-bit code point (U+10000..U+10ffff)
    120 * @return lead surrogate (U+d800..U+dbff) for supplementary
    121 * @stable ICU 2.4
    122 */
    123 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
    124 
    125 /**
    126 * Get the trail surrogate (0xdc00..0xdfff) for a
    127 * supplementary code point (0x10000..0x10ffff).
    128 * @param supplementary 32-bit code point (U+10000..U+10ffff)
    129 * @return trail surrogate (U+dc00..U+dfff) for supplementary
    130 * @stable ICU 2.4
    131 */
    132 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
    133 
    134 /**
    135 * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
    136 * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
    137 * @param c 32-bit code point
    138 * @return 1 or 2
    139 * @stable ICU 2.4
    140 */
    141 #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
    142 
    143 /**
    144 * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
    145 * @return 2
    146 * @stable ICU 2.4
    147 */
    148 #define U16_MAX_LENGTH 2
    149 
    150 /**
    151 * Get a code point from a string at a random-access offset,
    152 * without changing the offset.
    153 * "Unsafe" macro, assumes well-formed UTF-16.
    154 *
    155 * The offset may point to either the lead or trail surrogate unit
    156 * for a supplementary code point, in which case the macro will read
    157 * the adjacent matching surrogate as well.
    158 * The result is undefined if the offset points to a single, unpaired surrogate.
    159 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
    160 *
    161 * @param s const UChar * string
    162 * @param i string offset
    163 * @param c output UChar32 variable
    164 * @see U16_GET
    165 * @stable ICU 2.4
    166 */
    167 #define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    168    (c)=(s)[i]; \
    169    if(U16_IS_SURROGATE(c)) { \
    170        if(U16_IS_SURROGATE_LEAD(c)) { \
    171            (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
    172        } else { \
    173            (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
    174        } \
    175    } \
    176 } UPRV_BLOCK_MACRO_END
    177 
    178 /**
    179 * Get a code point from a string at a random-access offset,
    180 * without changing the offset.
    181 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    182 *
    183 * The offset may point to either the lead or trail surrogate unit
    184 * for a supplementary code point, in which case the macro will read
    185 * the adjacent matching surrogate as well.
    186 *
    187 * The length can be negative for a NUL-terminated string.
    188 *
    189 * If the offset points to a single, unpaired surrogate, then
    190 * c is set to that unpaired surrogate.
    191 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
    192 *
    193 * @param s const UChar * string
    194 * @param start starting string offset (usually 0)
    195 * @param i string offset, must be start<=i<length
    196 * @param length string length
    197 * @param c output UChar32 variable
    198 * @see U16_GET_UNSAFE
    199 * @stable ICU 2.4
    200 */
    201 #define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
    202    (c)=(s)[i]; \
    203    if(U16_IS_SURROGATE(c)) { \
    204        uint16_t __c2; \
    205        if(U16_IS_SURROGATE_LEAD(c)) { \
    206            if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
    207                (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
    208            } \
    209        } else { \
    210            if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
    211                (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
    212            } \
    213        } \
    214    } \
    215 } UPRV_BLOCK_MACRO_END
    216 
    217 /**
    218 * Get a code point from a string at a random-access offset,
    219 * without changing the offset.
    220 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    221 *
    222 * The offset may point to either the lead or trail surrogate unit
    223 * for a supplementary code point, in which case the macro will read
    224 * the adjacent matching surrogate as well.
    225 *
    226 * The length can be negative for a NUL-terminated string.
    227 *
    228 * If the offset points to a single, unpaired surrogate, then
    229 * c is set to U+FFFD.
    230 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
    231 *
    232 * @param s const UChar * string
    233 * @param start starting string offset (usually 0)
    234 * @param i string offset, must be start<=i<length
    235 * @param length string length
    236 * @param c output UChar32 variable
    237 * @see U16_GET_UNSAFE
    238 * @stable ICU 60
    239 */
    240 #define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
    241    (c)=(s)[i]; \
    242    if(U16_IS_SURROGATE(c)) { \
    243        uint16_t __c2; \
    244        if(U16_IS_SURROGATE_LEAD(c)) { \
    245            if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
    246                (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
    247            } else { \
    248                (c)=0xfffd; \
    249            } \
    250        } else { \
    251            if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
    252                (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
    253            } else { \
    254                (c)=0xfffd; \
    255            } \
    256        } \
    257    } \
    258 } UPRV_BLOCK_MACRO_END
    259 
    260 /* definitions with forward iteration --------------------------------------- */
    261 
    262 /**
    263 * Get a code point from a string at a code point boundary offset,
    264 * and advance the offset to the next code point boundary.
    265 * (Post-incrementing forward iteration.)
    266 * "Unsafe" macro, assumes well-formed UTF-16.
    267 *
    268 * The offset may point to the lead surrogate unit
    269 * for a supplementary code point, in which case the macro will read
    270 * the following trail surrogate as well.
    271 * If the offset points to a trail surrogate, then that itself
    272 * will be returned as the code point.
    273 * The result is undefined if the offset points to a single, unpaired lead surrogate.
    274 *
    275 * @param s const UChar * string
    276 * @param i string offset
    277 * @param c output UChar32 variable
    278 * @see U16_NEXT
    279 * @stable ICU 2.4
    280 */
    281 #define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    282    (c)=(s)[(i)++]; \
    283    if(U16_IS_LEAD(c)) { \
    284        (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
    285    } \
    286 } UPRV_BLOCK_MACRO_END
    287 
    288 /**
    289 * Get a code point from a string at a code point boundary offset,
    290 * and advance the offset to the next code point boundary.
    291 * (Post-incrementing forward iteration.)
    292 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    293 *
    294 * The length can be negative for a NUL-terminated string.
    295 *
    296 * The offset may point to the lead surrogate unit
    297 * for a supplementary code point, in which case the macro will read
    298 * the following trail surrogate as well.
    299 * If the offset points to a trail surrogate or
    300 * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
    301 *
    302 * @param s const UChar * string
    303 * @param i string offset, must be i<length
    304 * @param length string length
    305 * @param c output UChar32 variable
    306 * @see U16_NEXT_UNSAFE
    307 * @stable ICU 2.4
    308 */
    309 #define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
    310    (c)=(s)[(i)++]; \
    311    if(U16_IS_LEAD(c)) { \
    312        uint16_t __c2; \
    313        if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
    314            ++(i); \
    315            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
    316        } \
    317    } \
    318 } UPRV_BLOCK_MACRO_END
    319 
    320 /**
    321 * Get a code point from a string at a code point boundary offset,
    322 * and advance the offset to the next code point boundary.
    323 * (Post-incrementing forward iteration.)
    324 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    325 *
    326 * The length can be negative for a NUL-terminated string.
    327 *
    328 * The offset may point to the lead surrogate unit
    329 * for a supplementary code point, in which case the macro will read
    330 * the following trail surrogate as well.
    331 * If the offset points to a trail surrogate or
    332 * to a single, unpaired lead surrogate, then c is set to U+FFFD.
    333 *
    334 * @param s const UChar * string
    335 * @param i string offset, must be i<length
    336 * @param length string length
    337 * @param c output UChar32 variable
    338 * @see U16_NEXT_UNSAFE
    339 * @stable ICU 60
    340 */
    341 #define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
    342    (c)=(s)[(i)++]; \
    343    if(U16_IS_SURROGATE(c)) { \
    344        uint16_t __c2; \
    345        if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
    346            ++(i); \
    347            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
    348        } else { \
    349            (c)=0xfffd; \
    350        } \
    351    } \
    352 } UPRV_BLOCK_MACRO_END
    353 
    354 /**
    355 * Append a code point to a string, overwriting 1 or 2 code units.
    356 * The offset points to the current end of the string contents
    357 * and is advanced (post-increment).
    358 * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
    359 * Otherwise, the result is undefined.
    360 *
    361 * @param s const UChar * string buffer
    362 * @param i string offset
    363 * @param c code point to append
    364 * @see U16_APPEND
    365 * @stable ICU 2.4
    366 */
    367 #define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    368    if((uint32_t)(c)<=0xffff) { \
    369        (s)[(i)++]=(uint16_t)(c); \
    370    } else { \
    371        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
    372        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
    373    } \
    374 } UPRV_BLOCK_MACRO_END
    375 
    376 /**
    377 * Append a code point to a string, overwriting 1 or 2 code units.
    378 * The offset points to the current end of the string contents
    379 * and is advanced (post-increment).
    380 * "Safe" macro, checks for a valid code point.
    381 * If a surrogate pair is written, checks for sufficient space in the string.
    382 * If the code point is not valid or a trail surrogate does not fit,
    383 * then isError is set to true.
    384 *
    385 * @param s const UChar * string buffer
    386 * @param i string offset, must be i<capacity
    387 * @param capacity size of the string buffer
    388 * @param c code point to append
    389 * @param isError output UBool set to true if an error occurs, otherwise not modified
    390 * @see U16_APPEND_UNSAFE
    391 * @stable ICU 2.4
    392 */
    393 #define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
    394    if((uint32_t)(c)<=0xffff) { \
    395        (s)[(i)++]=(uint16_t)(c); \
    396    } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
    397        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
    398        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
    399    } else /* c>0x10ffff or not enough space */ { \
    400        (isError)=true; \
    401    } \
    402 } UPRV_BLOCK_MACRO_END
    403 
    404 /**
    405 * Advance the string offset from one code point boundary to the next.
    406 * (Post-incrementing iteration.)
    407 * "Unsafe" macro, assumes well-formed UTF-16.
    408 *
    409 * @param s const UChar * string
    410 * @param i string offset
    411 * @see U16_FWD_1
    412 * @stable ICU 2.4
    413 */
    414 #define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
    415    if(U16_IS_LEAD((s)[(i)++])) { \
    416        ++(i); \
    417    } \
    418 } UPRV_BLOCK_MACRO_END
    419 
    420 /**
    421 * Advance the string offset from one code point boundary to the next.
    422 * (Post-incrementing iteration.)
    423 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    424 *
    425 * The length can be negative for a NUL-terminated string.
    426 *
    427 * @param s const UChar * string
    428 * @param i string offset, must be i<length
    429 * @param length string length
    430 * @see U16_FWD_1_UNSAFE
    431 * @stable ICU 2.4
    432 */
    433 #define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
    434    if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
    435        ++(i); \
    436    } \
    437 } UPRV_BLOCK_MACRO_END
    438 
    439 /**
    440 * Advance the string offset from one code point boundary to the n-th next one,
    441 * i.e., move forward by n code points.
    442 * (Post-incrementing iteration.)
    443 * "Unsafe" macro, assumes well-formed UTF-16.
    444 *
    445 * @param s const UChar * string
    446 * @param i string offset
    447 * @param n number of code points to skip
    448 * @see U16_FWD_N
    449 * @stable ICU 2.4
    450 */
    451 #define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
    452    int32_t __N=(n); \
    453    while(__N>0) { \
    454        U16_FWD_1_UNSAFE(s, i); \
    455        --__N; \
    456    } \
    457 } UPRV_BLOCK_MACRO_END
    458 
    459 /**
    460 * Advance the string offset from one code point boundary to the n-th next one,
    461 * i.e., move forward by n code points.
    462 * (Post-incrementing iteration.)
    463 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    464 *
    465 * The length can be negative for a NUL-terminated string.
    466 *
    467 * @param s const UChar * string
    468 * @param i int32_t string offset, must be i<length
    469 * @param length int32_t string length
    470 * @param n number of code points to skip
    471 * @see U16_FWD_N_UNSAFE
    472 * @stable ICU 2.4
    473 */
    474 #define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
    475    int32_t __N=(n); \
    476    while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
    477        U16_FWD_1(s, i, length); \
    478        --__N; \
    479    } \
    480 } UPRV_BLOCK_MACRO_END
    481 
    482 /**
    483 * Adjust a random-access offset to a code point boundary
    484 * at the start of a code point.
    485 * If the offset points to the trail surrogate of a surrogate pair,
    486 * then the offset is decremented.
    487 * Otherwise, it is not modified.
    488 * "Unsafe" macro, assumes well-formed UTF-16.
    489 *
    490 * @param s const UChar * string
    491 * @param i string offset
    492 * @see U16_SET_CP_START
    493 * @stable ICU 2.4
    494 */
    495 #define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
    496    if(U16_IS_TRAIL((s)[i])) { \
    497        --(i); \
    498    } \
    499 } UPRV_BLOCK_MACRO_END
    500 
    501 /**
    502 * Adjust a random-access offset to a code point boundary
    503 * at the start of a code point.
    504 * If the offset points to the trail surrogate of a surrogate pair,
    505 * then the offset is decremented.
    506 * Otherwise, it is not modified.
    507 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    508 *
    509 * @param s const UChar * string
    510 * @param start starting string offset (usually 0)
    511 * @param i string offset, must be start<=i
    512 * @see U16_SET_CP_START_UNSAFE
    513 * @stable ICU 2.4
    514 */
    515 #define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
    516    if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
    517        --(i); \
    518    } \
    519 } UPRV_BLOCK_MACRO_END
    520 
    521 /* definitions with backward iteration -------------------------------------- */
    522 
    523 /**
    524 * Move the string offset from one code point boundary to the previous one
    525 * and get the code point between them.
    526 * (Pre-decrementing backward iteration.)
    527 * "Unsafe" macro, assumes well-formed UTF-16.
    528 *
    529 * The input offset may be the same as the string length.
    530 * If the offset is behind a trail surrogate unit
    531 * for a supplementary code point, then the macro will read
    532 * the preceding lead surrogate as well.
    533 * If the offset is behind a lead surrogate, then that itself
    534 * will be returned as the code point.
    535 * The result is undefined if the offset is behind a single, unpaired trail surrogate.
    536 *
    537 * @param s const UChar * string
    538 * @param i string offset
    539 * @param c output UChar32 variable
    540 * @see U16_PREV
    541 * @stable ICU 2.4
    542 */
    543 #define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    544    (c)=(s)[--(i)]; \
    545    if(U16_IS_TRAIL(c)) { \
    546        (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
    547    } \
    548 } UPRV_BLOCK_MACRO_END
    549 
    550 /**
    551 * Move the string offset from one code point boundary to the previous one
    552 * and get the code point between them.
    553 * (Pre-decrementing backward iteration.)
    554 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    555 *
    556 * The input offset may be the same as the string length.
    557 * If the offset is behind a trail surrogate unit
    558 * for a supplementary code point, then the macro will read
    559 * the preceding lead surrogate as well.
    560 * If the offset is behind a lead surrogate or behind a single, unpaired
    561 * trail surrogate, then c is set to that unpaired surrogate.
    562 *
    563 * @param s const UChar * string
    564 * @param start starting string offset (usually 0)
    565 * @param i string offset, must be start<i
    566 * @param c output UChar32 variable
    567 * @see U16_PREV_UNSAFE
    568 * @stable ICU 2.4
    569 */
    570 #define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    571    (c)=(s)[--(i)]; \
    572    if(U16_IS_TRAIL(c)) { \
    573        uint16_t __c2; \
    574        if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
    575            --(i); \
    576            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
    577        } \
    578    } \
    579 } UPRV_BLOCK_MACRO_END
    580 
    581 /**
    582 * Move the string offset from one code point boundary to the previous one
    583 * and get the code point between them.
    584 * (Pre-decrementing backward iteration.)
    585 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    586 *
    587 * The input offset may be the same as the string length.
    588 * If the offset is behind a trail surrogate unit
    589 * for a supplementary code point, then the macro will read
    590 * the preceding lead surrogate as well.
    591 * If the offset is behind a lead surrogate or behind a single, unpaired
    592 * trail surrogate, then c is set to U+FFFD.
    593 *
    594 * @param s const UChar * string
    595 * @param start starting string offset (usually 0)
    596 * @param i string offset, must be start<i
    597 * @param c output UChar32 variable
    598 * @see U16_PREV_UNSAFE
    599 * @stable ICU 60
    600 */
    601 #define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
    602    (c)=(s)[--(i)]; \
    603    if(U16_IS_SURROGATE(c)) { \
    604        uint16_t __c2; \
    605        if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
    606            --(i); \
    607            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
    608        } else { \
    609            (c)=0xfffd; \
    610        } \
    611    } \
    612 } UPRV_BLOCK_MACRO_END
    613 
    614 /**
    615 * Move the string offset from one code point boundary to the previous one.
    616 * (Pre-decrementing backward iteration.)
    617 * The input offset may be the same as the string length.
    618 * "Unsafe" macro, assumes well-formed UTF-16.
    619 *
    620 * @param s const UChar * string
    621 * @param i string offset
    622 * @see U16_BACK_1
    623 * @stable ICU 2.4
    624 */
    625 #define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
    626    if(U16_IS_TRAIL((s)[--(i)])) { \
    627        --(i); \
    628    } \
    629 } UPRV_BLOCK_MACRO_END
    630 
    631 /**
    632 * Move the string offset from one code point boundary to the previous one.
    633 * (Pre-decrementing backward iteration.)
    634 * The input offset may be the same as the string length.
    635 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    636 *
    637 * @param s const UChar * string
    638 * @param start starting string offset (usually 0)
    639 * @param i string offset, must be start<i
    640 * @see U16_BACK_1_UNSAFE
    641 * @stable ICU 2.4
    642 */
    643 #define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
    644    if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
    645        --(i); \
    646    } \
    647 } UPRV_BLOCK_MACRO_END
    648 
    649 /**
    650 * Move the string offset from one code point boundary to the n-th one before it,
    651 * i.e., move backward by n code points.
    652 * (Pre-decrementing backward iteration.)
    653 * The input offset may be the same as the string length.
    654 * "Unsafe" macro, assumes well-formed UTF-16.
    655 *
    656 * @param s const UChar * string
    657 * @param i string offset
    658 * @param n number of code points to skip
    659 * @see U16_BACK_N
    660 * @stable ICU 2.4
    661 */
    662 #define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
    663    int32_t __N=(n); \
    664    while(__N>0) { \
    665        U16_BACK_1_UNSAFE(s, i); \
    666        --__N; \
    667    } \
    668 } UPRV_BLOCK_MACRO_END
    669 
    670 /**
    671 * Move the string offset from one code point boundary to the n-th one before it,
    672 * i.e., move backward by n code points.
    673 * (Pre-decrementing backward iteration.)
    674 * The input offset may be the same as the string length.
    675 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    676 *
    677 * @param s const UChar * string
    678 * @param start start of string
    679 * @param i string offset, must be start<i
    680 * @param n number of code points to skip
    681 * @see U16_BACK_N_UNSAFE
    682 * @stable ICU 2.4
    683 */
    684 #define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
    685    int32_t __N=(n); \
    686    while(__N>0 && (i)>(start)) { \
    687        U16_BACK_1(s, start, i); \
    688        --__N; \
    689    } \
    690 } UPRV_BLOCK_MACRO_END
    691 
    692 /**
    693 * Adjust a random-access offset to a code point boundary after a code point.
    694 * If the offset is behind the lead surrogate of a surrogate pair,
    695 * then the offset is incremented.
    696 * Otherwise, it is not modified.
    697 * The input offset may be the same as the string length.
    698 * "Unsafe" macro, assumes well-formed UTF-16.
    699 *
    700 * @param s const UChar * string
    701 * @param i string offset
    702 * @see U16_SET_CP_LIMIT
    703 * @stable ICU 2.4
    704 */
    705 #define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
    706    if(U16_IS_LEAD((s)[(i)-1])) { \
    707        ++(i); \
    708    } \
    709 } UPRV_BLOCK_MACRO_END
    710 
    711 /**
    712 * Adjust a random-access offset to a code point boundary after a code point.
    713 * If the offset is behind the lead surrogate of a surrogate pair,
    714 * then the offset is incremented.
    715 * Otherwise, it is not modified.
    716 * The input offset may be the same as the string length.
    717 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    718 *
    719 * The length can be negative for a NUL-terminated string.
    720 *
    721 * @param s const UChar * string
    722 * @param start int32_t starting string offset (usually 0)
    723 * @param i int32_t string offset, start<=i<=length
    724 * @param length int32_t string length
    725 * @see U16_SET_CP_LIMIT_UNSAFE
    726 * @stable ICU 2.4
    727 */
    728 #define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
    729    if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
    730        ++(i); \
    731    } \
    732 } UPRV_BLOCK_MACRO_END
    733 
    734 #endif