tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

utext.h (59495B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2004-2012, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  utext.h
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2004oct06
     16 *   created by: Markus W. Scherer
     17 */
     18 
     19 #ifndef __UTEXT_H__
     20 #define __UTEXT_H__
     21 
     22 /**
     23 * \file
     24 * \brief C API: Abstract Unicode Text API
     25 *
     26 * The Text Access API provides a means to allow text that is stored in alternative
     27 * formats to work with ICU services.  ICU normally operates on text that is
     28 * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type
     29 * UnicodeString for C++ APIs.
     30 *
     31 * ICU Text Access allows other formats, such as UTF-8 or non-contiguous
     32 * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services.
     33 *
     34 * There are three general classes of usage for UText:
     35 *
     36 *     Application Level Use.  This is the simplest usage - applications would
     37 *     use one of the utext_open() functions on their input text, and pass
     38 *     the resulting UText to the desired ICU service.
     39 *
     40 *     Second is usage in ICU Services, such as break iteration, that will need to
     41 *     operate on input presented to them as a UText.  These implementations
     42 *     will need to use the iteration and related UText functions to gain
     43 *     access to the actual text.
     44 *
     45 *     The third class of UText users are "text providers."  These are the
     46 *     UText implementations for the various text storage formats.  An application
     47 *     or system with a unique text storage format can implement a set of
     48 *     UText provider functions for that format, which will then allow
     49 *     ICU services to operate on that format.
     50 *
     51 *
     52 * <em>Iterating over text</em>
     53 *
     54 * Here is sample code for a forward iteration over the contents of a UText
     55 *
     56 * \code
     57 *    UChar32  c;
     58 *    UText    *ut = whatever();
     59 *
     60 *    for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) {
     61 *       // do whatever with the codepoint c here.
     62 *    }
     63 * \endcode
     64 *
     65 * And here is similar code to iterate in the reverse direction, from the end
     66 * of the text towards the beginning.
     67 *
     68 * \code
     69 *    UChar32  c;
     70 *    UText    *ut = whatever();
     71 *    int      textLength = utext_nativeLength(ut);
     72 *    for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) {
     73 *       // do whatever with the codepoint c here.
     74 *    }
     75 * \endcode
     76 *
     77 * <em>Characters and Indexing</em>
     78 *
     79 * Indexing into text by UText functions is nearly always in terms of the native
     80 * indexing of the underlying text storage.  The storage format could be UTF-8
     81 * or UTF-32, for example.  When coding to the UText access API, no assumptions
     82 * can be made regarding the size of characters, or how far an index
     83 * may move when iterating between characters.
     84 *
     85 * All indices supplied to UText functions are pinned to the length of the
     86 * text.  An out-of-bounds index is not considered to be an error, but is
     87 * adjusted to be in the range  0 <= index <= length of input text.
     88 *
     89 *
     90 * When an index position is returned from a UText function, it will be
     91 * a native index to the underlying text.  In the case of multi-unit characters,
     92 * it will  always refer to the first position of the character,
     93 * never to the interior.  This is essentially the same thing as saying that
     94 * a returned index will always point to a boundary between characters.
     95 *
     96 * When a native index is supplied to a UText function, all indices that
     97 * refer to any part of a multi-unit character representation are considered
     98 * to be equivalent.  In the case of multi-unit characters, an incoming index
     99 * will be logically normalized to refer to the start of the character.
    100 * 
    101 * It is possible to test whether a native index is on a code point boundary
    102 * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex().
    103 * If the index is returned unchanged, it was on a code point boundary.  If
    104 * an adjusted index is returned, the original index referred to the
    105 * interior of a character.
    106 *
    107 * <em>Conventions for calling UText functions</em>
    108 *
    109 * Most UText access functions have as their first parameter a (UText *) pointer,
    110 * which specifies the UText to be used.  Unless otherwise noted, the
    111 * pointer must refer to a valid, open UText.  Attempting to
    112 * use a closed UText or passing a NULL pointer is a programming error and
    113 * will produce undefined results or NULL pointer exceptions.
    114 * 
    115 * The UText_Open family of functions can either open an existing (closed)
    116 * UText, or heap allocate a new UText.  Here is sample code for creating
    117 * a stack-allocated UText.
    118 *
    119 * \code
    120 *    char     *s = whatever();  // A utf-8 string 
    121 *    U_ErrorCode status = U_ZERO_ERROR;
    122 *    UText    ut = UTEXT_INITIALIZER;
    123 *    utext_openUTF8(ut, s, -1, &status);
    124 *    if (U_FAILURE(status)) {
    125 *        // error handling
    126 *    } else {
    127 *        // work with the UText
    128 *    }
    129 * \endcode
    130 *
    131 * Any existing UText passed to an open function _must_ have been initialized, 
    132 * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated
    133 * by an open function.  Passing NULL will cause the open function to
    134 * heap-allocate and fully initialize a new UText.
    135 *
    136 */
    137 
    138 
    139 
    140 #include "unicode/utypes.h"
    141 #include "unicode/uchar.h"
    142 #if U_SHOW_CPLUSPLUS_API
    143 #include "unicode/localpointer.h"
    144 #include "unicode/rep.h"
    145 #include "unicode/unistr.h"
    146 #include "unicode/chariter.h"
    147 #endif
    148 
    149 
    150 U_CDECL_BEGIN
    151 
    152 struct UText;
    153 typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */
    154 
    155 
    156 /***************************************************************************************
    157 *
    158 *   C Functions for creating UText wrappers around various kinds of text strings.
    159 *
    160 ****************************************************************************************/
    161 
    162 
    163 /**
    164  * Close function for UText instances.
    165  * Cleans up, releases any resources being held by an open UText.
    166  * <p>
    167  *   If the UText was originally allocated by one of the utext_open functions,
    168  *   the storage associated with the utext will also be freed.
    169  *   If the UText storage originated with the application, as it would with
    170  *   a local or static instance, the storage will not be deleted.
    171  *
    172  *   An open UText can be reset to refer to new string by using one of the utext_open()
    173  *   functions without first closing the UText.  
    174  *
    175  * @param ut  The UText to be closed.
    176  * @return    NULL if the UText struct was deleted by the close.  If the UText struct
    177  *            was originally provided by the caller to the open function, it is
    178  *            returned by this function, and may be safely used again in
    179  *            a subsequent utext_open.
    180  *
    181  * @stable ICU 3.4
    182  */
    183 U_CAPI UText * U_EXPORT2
    184 utext_close(UText *ut);
    185 
    186 /**
    187 * Open a read-only UText implementation for UTF-8 strings.
    188 * 
    189 * \htmlonly
    190 * Any invalid UTF-8 in the input will be handled in this way:
    191 * a sequence of bytes that has the form of a truncated, but otherwise valid,
    192 * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. 
    193 * Any other illegal bytes will each be replaced by a \uFFFD.
    194 * \endhtmlonly
    195 * 
    196 * @param ut     Pointer to a UText struct.  If NULL, a new UText will be created.
    197 *               If non-NULL, must refer to an initialized UText struct, which will then
    198 *               be reset to reference the specified UTF-8 string.
    199 * @param s      A UTF-8 string.  Must not be NULL.
    200 * @param length The length of the UTF-8 string in bytes, or -1 if the string is
    201 *               zero terminated.
    202 * @param status Errors are returned here.
    203 * @return       A pointer to the UText.  If a pre-allocated UText was provided, it
    204 *               will always be used and returned.
    205 * @stable ICU 3.4
    206 */
    207 U_CAPI UText * U_EXPORT2
    208 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
    209 
    210 
    211 /**
    212 * Open a read-only UText for UChar * string.
    213 * 
    214 * @param ut     Pointer to a UText struct.  If NULL, a new UText will be created.
    215 *               If non-NULL, must refer to an initialized UText struct, which will then
    216 *               be reset to reference the specified UChar string.
    217 * @param s      A UChar (UTF-16) string
    218 * @param length The number of UChars in the input string, or -1 if the string is
    219 *               zero terminated.
    220 * @param status Errors are returned here.
    221 * @return       A pointer to the UText.  If a pre-allocated UText was provided, it
    222 *               will always be used and returned.
    223 * @stable ICU 3.4
    224 */
    225 U_CAPI UText * U_EXPORT2
    226 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
    227 
    228 
    229 #if U_SHOW_CPLUSPLUS_API
    230 /**
    231 * Open a writable UText for a non-const UnicodeString. 
    232 * 
    233 * @param ut      Pointer to a UText struct.  If nullptr, a new UText will be created.
    234 *                 If non-nullptr, must refer to an initialized UText struct, which will then
    235 *                 be reset to reference the specified input string.
    236 * @param s       A UnicodeString.
    237 * @param status Errors are returned here.
    238 * @return        Pointer to the UText.  If a UText was supplied as input, this
    239 *                 will always be used and returned.
    240 * @stable ICU 3.4
    241 */
    242 U_CAPI UText * U_EXPORT2
    243 utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status);
    244 
    245 
    246 /**
    247 * Open a UText for a const UnicodeString.   The resulting UText will not be writable.
    248 * 
    249 * @param ut    Pointer to a UText struct.  If nullptr, a new UText will be created.
    250 *               If non-nullptr, must refer to an initialized UText struct, which will then
    251 *               be reset to reference the specified input string.
    252 * @param s      A const UnicodeString to be wrapped.
    253 * @param status Errors are returned here.
    254 * @return       Pointer to the UText.  If a UText was supplied as input, this
    255 *               will always be used and returned.
    256 * @stable ICU 3.4
    257 */
    258 U_CAPI UText * U_EXPORT2
    259 utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status);
    260 
    261 
    262 /**
    263 * Open a writable UText implementation for an ICU Replaceable object.
    264 * @param ut    Pointer to a UText struct.  If nullptr, a new UText will be created.
    265 *               If non-nullptr, must refer to an already existing UText, which will then
    266 *               be reset to reference the specified replaceable text.
    267 * @param rep    A Replaceable text object.
    268 * @param status Errors are returned here.
    269 * @return       Pointer to the UText.  If a UText was supplied as input, this
    270 *               will always be used and returned.
    271 * @see Replaceable
    272 * @stable ICU 3.4
    273 */
    274 U_CAPI UText * U_EXPORT2
    275 utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status);
    276 
    277 /**
    278 * Open a  UText implementation over an ICU CharacterIterator.
    279 * @param ut    Pointer to a UText struct.  If nullptr, a new UText will be created.
    280 *               If non-nullptr, must refer to an already existing UText, which will then
    281 *               be reset to reference the specified replaceable text.
    282 * @param ci     A Character Iterator.
    283 * @param status Errors are returned here.
    284 * @return       Pointer to the UText.  If a UText was supplied as input, this
    285 *               will always be used and returned.
    286 * @see Replaceable
    287 * @stable ICU 3.4
    288 */
    289 U_CAPI UText * U_EXPORT2
    290 utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status);
    291 
    292 #endif
    293 
    294 
    295 /**
    296  *  Clone a UText.  This is much like opening a UText where the source text is itself
    297  *  another UText.
    298  *
    299  *  A deep clone will copy both the UText data structures and the underlying text.
    300  *  The original and cloned UText will operate completely independently; modifications
    301  *  made to the text in one will not affect the other.  Text providers are not
    302  *  required to support deep clones.  The user of clone() must check the status return
    303  *  and be prepared to handle failures.
    304  *
    305  *  The standard UText implementations for UTF8, UChar *, UnicodeString and
    306  *  Replaceable all support deep cloning.
    307  *
    308  *  The UText returned from a deep clone will be writable, assuming that the text
    309  *  provider is able to support writing, even if the source UText had been made
    310  *  non-writable by means of UText_freeze().
    311  *
    312  *  A shallow clone replicates only the UText data structures; it does not make
    313  *  a copy of the underlying text.  Shallow clones can be used as an efficient way to 
    314  *  have multiple iterators active in a single text string that is not being
    315  *  modified.
    316  *
    317  *  A shallow clone operation will not fail, barring truly exceptional conditions such
    318  *  as memory allocation failures.
    319  *
    320  *  Shallow UText clones should be avoided if the UText functions that modify the
    321  *  text are expected to be used, either on the original or the cloned UText.
    322  *  Any such modifications  can cause unpredictable behavior.  Read Only
    323  *  shallow clones provide some protection against errors of this type by
    324  *  disabling text modification via the cloned UText.
    325  *
    326  *  A shallow clone made with the readOnly parameter == false will preserve the 
    327  *  utext_isWritable() state of the source object.  Note, however, that
    328  *  write operations must be avoided while more than one UText exists that refer
    329  *  to the same underlying text.
    330  *
    331  *  A UText and its clone may be safely concurrently accessed by separate threads.
    332  *  This is true for read access only with shallow clones, and for both read and
    333  *  write access with deep clones.
    334  *  It is the responsibility of the Text Provider to ensure that this thread safety
    335  *  constraint is met.
    336  *
    337  *  @param dest   A UText struct to be filled in with the result of the clone operation,
    338  *                or NULL if the clone function should heap-allocate a new UText struct.
    339  *                If non-NULL, must refer to an already existing UText, which will then
    340  *                be reset to become the clone.
    341  *  @param src    The UText to be cloned.
    342  *  @param deep   true to request a deep clone, false for a shallow clone.
    343  *  @param readOnly true to request that the cloned UText have read only access to the 
    344  *                underlying text.  
    345 
    346  *  @param status Errors are returned here.  For deep clones, U_UNSUPPORTED_ERROR
    347  *                will be returned if the text provider is unable to clone the
    348  *                original text.
    349  *  @return       The newly created clone, or NULL if the clone operation failed.
    350  *  @stable ICU 3.4
    351  */
    352 U_CAPI UText * U_EXPORT2
    353 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
    354 
    355 
    356 /**
    357  *  Compare two UText objects for equality.
    358  *  UTexts are equal if they are iterating over the same text, and
    359  *    have the same iteration position within the text.
    360  *    If either or both of the parameters are NULL, the comparison is false.
    361  *
    362  *  @param a   The first of the two UTexts to compare.
    363  *  @param b   The other UText to be compared.
    364  *  @return    true if the two UTexts are equal.
    365  *  @stable ICU 3.6
    366  */
    367 U_CAPI UBool U_EXPORT2
    368 utext_equals(const UText *a, const UText *b);
    369 
    370 
    371 /*****************************************************************************
    372 *
    373 *   Functions to work with the text represented by a UText wrapper
    374 *
    375 *****************************************************************************/
    376 
    377 /**
    378  * Get the length of the text.  Depending on the characteristics
    379  * of the underlying text representation, this may be expensive.  
    380  * @see  utext_isLengthExpensive()
    381  *
    382  *
    383  * @param ut  the text to be accessed.
    384  * @return the length of the text, expressed in native units.
    385  *
    386  * @stable ICU 3.4
    387  */
    388 U_CAPI int64_t U_EXPORT2
    389 utext_nativeLength(UText *ut);
    390 
    391 /**
    392 *  Return true if calculating the length of the text could be expensive.
    393 *  Finding the length of NUL terminated strings is considered to be expensive.
    394 *
    395 *  Note that the value of this function may change
    396 *  as the result of other operations on a UText.
    397 *  Once the length of a string has been discovered, it will no longer
    398 *  be expensive to report it.
    399 *
    400 * @param ut the text to be accessed.
    401 * @return true if determining the length of the text could be time consuming.
    402 * @stable ICU 3.4
    403 */
    404 U_CAPI UBool U_EXPORT2
    405 utext_isLengthExpensive(const UText *ut);
    406 
    407 /**
    408 * Returns the code point at the requested index,
    409 * or U_SENTINEL (-1) if it is out of bounds.
    410 *
    411 * If the specified index points to the interior of a multi-unit
    412 * character - one of the trail bytes of a UTF-8 sequence, for example -
    413 * the complete code point will be returned.
    414 *
    415 * The iteration position will be set to the start of the returned code point.
    416 *
    417 * This function is roughly equivalent to the sequence
    418 *    utext_setNativeIndex(index);
    419 *    utext_current32();
    420 * (There is a subtle difference if the index is out of bounds by being less than zero - 
    421 * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current()
    422 * will return the char at zero.  utext_char32At(negative index), on the other hand, will
    423 * return the U_SENTINEL value of -1.)
    424 * 
    425 * @param ut the text to be accessed
    426 * @param nativeIndex the native index of the character to be accessed.  If the index points
    427 *        to other than the first unit of a multi-unit character, it will be adjusted
    428 *        to the start of the character.
    429 * @return the code point at the specified index.
    430 * @stable ICU 3.4
    431 */
    432 U_CAPI UChar32 U_EXPORT2
    433 utext_char32At(UText *ut, int64_t nativeIndex);
    434 
    435 
    436 /**
    437 *
    438 * Get the code point at the current iteration position,
    439 * or U_SENTINEL (-1) if the iteration has reached the end of
    440 * the input text.
    441 *
    442 * @param ut the text to be accessed.
    443 * @return the Unicode code point at the current iterator position.
    444 * @stable ICU 3.4
    445 */
    446 U_CAPI UChar32 U_EXPORT2
    447 utext_current32(UText *ut);
    448 
    449 
    450 /**
    451 * Get the code point at the current iteration position of the UText, and
    452 * advance the position to the first index following the character.
    453 *
    454 * If the position is at the end of the text (the index following
    455 * the last character, which is also the length of the text), 
    456 * return U_SENTINEL (-1) and do not advance the index. 
    457 *
    458 * This is a post-increment operation.
    459 *
    460 * An inline macro version of this function, UTEXT_NEXT32(), 
    461 * is available for performance critical use.
    462 *
    463 * @param ut the text to be accessed.
    464 * @return the Unicode code point at the iteration position.
    465 * @see UTEXT_NEXT32
    466 * @stable ICU 3.4
    467 */
    468 U_CAPI UChar32 U_EXPORT2
    469 utext_next32(UText *ut);
    470 
    471 
    472 /**
    473 *  Move the iterator position to the character (code point) whose
    474 *  index precedes the current position, and return that character.
    475 *  This is a pre-decrement operation.
    476 *
    477 *  If the initial position is at the start of the text (index of 0) 
    478 *  return U_SENTINEL (-1), and leave the position unchanged.
    479 *
    480 *  An inline macro version of this function, UTEXT_PREVIOUS32(), 
    481 *  is available for performance critical use.
    482 *
    483 *  @param ut the text to be accessed.
    484 *  @return the previous UChar32 code point, or U_SENTINEL (-1) 
    485 *          if the iteration has reached the start of the text.
    486 *  @see UTEXT_PREVIOUS32
    487 *  @stable ICU 3.4
    488 */
    489 U_CAPI UChar32 U_EXPORT2
    490 utext_previous32(UText *ut);
    491 
    492 
    493 /**
    494  * Set the iteration index and return the code point at that index. 
    495  * Leave the iteration index at the start of the following code point.
    496  *
    497  * This function is the most efficient and convenient way to
    498  * begin a forward iteration.  The results are identical to the those
    499  * from the sequence
    500  * \code
    501  *    utext_setIndex();
    502  *    utext_next32();
    503  * \endcode
    504  *
    505  *  @param ut the text to be accessed.
    506  *  @param nativeIndex Iteration index, in the native units of the text provider.
    507  *  @return Code point which starts at or before index,
    508  *         or U_SENTINEL (-1) if it is out of bounds.
    509  * @stable ICU 3.4
    510  */
    511 U_CAPI UChar32 U_EXPORT2
    512 utext_next32From(UText *ut, int64_t nativeIndex);
    513 
    514 
    515 
    516 /**
    517  * Set the iteration index, and return the code point preceding the
    518  * one specified by the initial index.  Leave the iteration position
    519  * at the start of the returned code point.
    520  *
    521  * This function is the most efficient and convenient way to
    522  * begin a backwards iteration.
    523  *
    524  * @param ut the text to be accessed.
    525  * @param nativeIndex Iteration index in the native units of the text provider.
    526  * @return Code point preceding the one at the initial index,
    527  *         or U_SENTINEL (-1) if it is out of bounds.
    528  *
    529  * @stable ICU 3.4
    530  */
    531 U_CAPI UChar32 U_EXPORT2
    532 utext_previous32From(UText *ut, int64_t nativeIndex);
    533 
    534 /**
    535  * Get the current iterator position, which can range from 0 to 
    536  * the length of the text.
    537  * The position is a native index into the input text, in whatever format it
    538  * may have (possibly UTF-8 for example), and may not always be the same as
    539  * the corresponding UChar (UTF-16) index.
    540  * The returned position will always be aligned to a code point boundary. 
    541  *
    542  * @param ut the text to be accessed.
    543  * @return the current index position, in the native units of the text provider.
    544  * @stable ICU 3.4
    545  */
    546 U_CAPI int64_t U_EXPORT2
    547 utext_getNativeIndex(const UText *ut);
    548 
    549 /**
    550 * Set the current iteration position to the nearest code point
    551 * boundary at or preceding the specified index.
    552 * The index is in the native units of the original input text.
    553 * If the index is out of range, it will be pinned to be within
    554 * the range of the input text.
    555 * <p>
    556 * It will usually be more efficient to begin an iteration
    557 * using the functions utext_next32From() or utext_previous32From()
    558 * rather than setIndex().
    559 * <p>
    560 * Moving the index position to an adjacent character is best done
    561 * with utext_next32(), utext_previous32() or utext_moveIndex32().
    562 * Attempting to do direct arithmetic on the index position is
    563 * complicated by the fact that the size (in native units) of a
    564 * character depends on the underlying representation of the character
    565 * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not
    566 * easily knowable.
    567 *
    568 * @param ut the text to be accessed.
    569 * @param nativeIndex the native unit index of the new iteration position.
    570 * @stable ICU 3.4
    571 */
    572 U_CAPI void U_EXPORT2
    573 utext_setNativeIndex(UText *ut, int64_t nativeIndex);
    574 
    575 /**
    576 * Move the iterator position by delta code points.  The number of code points
    577 * is a signed number; a negative delta will move the iterator backwards,
    578 * towards the start of the text.
    579 * <p>
    580 * The index is moved by <code>delta</code> code points
    581 * forward or backward, but no further backward than to 0 and
    582 * no further forward than to utext_nativeLength().
    583 * The resulting index value will be in between 0 and length, inclusive.
    584 *
    585 * @param ut the text to be accessed.
    586 * @param delta the signed number of code points to move the iteration position.
    587 * @return true if the position could be moved the requested number of positions while
    588 *              staying within the range [0 - text length].
    589 * @stable ICU 3.4
    590 */
    591 U_CAPI UBool U_EXPORT2
    592 utext_moveIndex32(UText *ut, int32_t delta);
    593 
    594 /**
    595 * Get the native index of the character preceding the current position.
    596 * If the iteration position is already at the start of the text, zero
    597 * is returned.
    598 * The value returned is the same as that obtained from the following sequence,
    599 * but without the side effect of changing the iteration position.
    600 *   
    601 * \code
    602 *    UText  *ut = whatever;
    603 *      ...
    604 *    utext_previous(ut)
    605 *    utext_getNativeIndex(ut);
    606 * \endcode
    607 *
    608 * This function is most useful during forwards iteration, where it will get the
    609 *   native index of the character most recently returned from utext_next().
    610 *
    611 * @param ut the text to be accessed
    612 * @return the native index of the character preceding the current index position,
    613 *         or zero if the current position is at the start of the text.
    614 * @stable ICU 3.6
    615 */
    616 U_CAPI int64_t U_EXPORT2
    617 utext_getPreviousNativeIndex(UText *ut); 
    618 
    619 
    620 /**
    621 *
    622 * Extract text from a UText into a UChar buffer.  The range of text to be extracted
    623 * is specified in the native indices of the UText provider.  These may not necessarily
    624 * be UTF-16 indices.
    625 * <p>
    626 * The size (number of 16 bit UChars) of the data to be extracted is returned.  The
    627 * full number of UChars is returned, even when the extracted text is truncated
    628 * because the specified buffer size is too small.
    629 * <p>
    630 * The extracted string will (if you are a user) / must (if you are a text provider)
    631 * be NUL-terminated if there is sufficient space in the destination buffer.  This
    632 * terminating NUL is not included in the returned length.
    633 * <p>
    634 * The iteration index is left at the position following the last extracted character.
    635 *
    636 * @param  ut    the UText from which to extract data.
    637 * @param  nativeStart the native index of the first character to extract.\
    638 *               If the specified index is out of range,
    639 *               it will be pinned to be within 0 <= index <= textLength
    640 * @param  nativeLimit the native string index of the position following the last
    641 *               character to extract.  If the specified index is out of range,
    642 *               it will be pinned to be within 0 <= index <= textLength.
    643 *               nativeLimit must be >= nativeStart.
    644 * @param  dest  the UChar (UTF-16) buffer into which the extracted text is placed
    645 * @param  destCapacity  The size, in UChars, of the destination buffer.  May be zero
    646 *               for precomputing the required size.
    647 * @param  status receives any error status.
    648 *         U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the 
    649 *         buffer was too small.  Returns number of UChars for preflighting.
    650 * @return Number of UChars in the data to be extracted.  Does not include a trailing NUL.
    651 *
    652 * @stable ICU 3.4
    653 */
    654 U_CAPI int32_t U_EXPORT2
    655 utext_extract(UText *ut,
    656             int64_t nativeStart, int64_t nativeLimit,
    657             UChar *dest, int32_t destCapacity,
    658             UErrorCode *status);
    659 
    660 
    661 
    662 /************************************************************************************
    663 *
    664 *  #define inline versions of selected performance-critical text access functions
    665 *          Caution:  do not use auto increment++ or decrement-- expressions
    666 *                    as parameters to these macros.
    667 *
    668 *          For most use, where there is no extreme performance constraint, the
    669 *          normal, non-inline functions are a better choice.  The resulting code
    670 *          will be smaller, and, if the need ever arises, easier to debug.
    671 *
    672 *          These are implemented as #defines rather than real functions
    673 *          because there is no fully portable way to do inline functions in plain C.
    674 *
    675 ************************************************************************************/
    676 
    677 #ifndef U_HIDE_INTERNAL_API
    678 /**
    679 * inline version of utext_current32(), for performance-critical situations.
    680 *
    681 * Get the code point at the current iteration position of the UText.
    682 * Returns U_SENTINEL (-1) if the position is at the end of the
    683 * text.
    684 *
    685 * @internal ICU 4.4 technology preview
    686 */
    687 #define UTEXT_CURRENT32(ut)  \
    688    ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
    689    ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
    690 #endif  /* U_HIDE_INTERNAL_API */
    691 
    692 /**
    693 * inline version of utext_next32(), for performance-critical situations.
    694 *
    695 * Get the code point at the current iteration position of the UText, and
    696 * advance the position to the first index following the character.
    697 * This is a post-increment operation.
    698 * Returns U_SENTINEL (-1) if the position is at the end of the
    699 * text.
    700 *
    701 * @stable ICU 3.4
    702 */
    703 #define UTEXT_NEXT32(ut)  \
    704    ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
    705    ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
    706 
    707 /**
    708 * inline version of utext_previous32(), for performance-critical situations.
    709 *
    710 *  Move the iterator position to the character (code point) whose
    711 *  index precedes the current position, and return that character.
    712 *  This is a pre-decrement operation.
    713 *  Returns U_SENTINEL (-1) if the position is at the start of the  text.
    714 *
    715 * @stable ICU 3.4
    716 */
    717 #define UTEXT_PREVIOUS32(ut)  \
    718    ((ut)->chunkOffset > 0 && \
    719     (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
    720          (ut)->chunkContents[--((ut)->chunkOffset)]  :  utext_previous32(ut))
    721 
    722 /**
    723  *  inline version of utext_getNativeIndex(), for performance-critical situations.
    724  *
    725  * Get the current iterator position, which can range from 0 to 
    726  * the length of the text.
    727  * The position is a native index into the input text, in whatever format it
    728  * may have (possibly UTF-8 for example), and may not always be the same as
    729  * the corresponding UChar (UTF-16) index.
    730  * The returned position will always be aligned to a code point boundary. 
    731  *
    732  * @stable ICU 3.6
    733  */
    734 #define UTEXT_GETNATIVEINDEX(ut)                       \
    735    ((ut)->chunkOffset <= (ut)->nativeIndexingLimit?   \
    736        (ut)->chunkNativeStart+(ut)->chunkOffset :     \
    737        (ut)->pFuncs->mapOffsetToNative(ut))    
    738 
    739 /**
    740  *  inline version of utext_setNativeIndex(), for performance-critical situations.
    741  *
    742  * Set the current iteration position to the nearest code point
    743  * boundary at or preceding the specified index.
    744  * The index is in the native units of the original input text.
    745  * If the index is out of range, it will be pinned to be within
    746  * the range of the input text.
    747  *
    748  * @stable ICU 3.8
    749  */
    750 #define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \
    751    int64_t __offset = (ix) - (ut)->chunkNativeStart; \
    752    if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \
    753        (ut)->chunkOffset=(int32_t)__offset; \
    754    } else { \
    755        utext_setNativeIndex((ut), (ix)); \
    756    } \
    757 } UPRV_BLOCK_MACRO_END
    758 
    759 
    760 
    761 /************************************************************************************
    762 *
    763 *   Functions related to writing or modifying the text.
    764 *   These will work only with modifiable UTexts.  Attempting to
    765 *   modify a read-only UText will return an error status.
    766 *
    767 ************************************************************************************/
    768 
    769 
    770 /**
    771 *  Return true if the text can be written (modified) with utext_replace() or
    772 *  utext_copy().  For the text to be writable, the text provider must
    773 *  be of a type that supports writing and the UText must not be frozen.
    774 *
    775 *  Attempting to modify text when utext_isWriteable() is false will fail -
    776 *  the text will not be modified, and an error will be returned from the function
    777 *  that attempted the modification.
    778 *
    779 * @param  ut   the UText to be tested.
    780 * @return true if the text is modifiable.
    781 *
    782 * @see    utext_freeze()
    783 * @see    utext_replace()
    784 * @see    utext_copy()
    785 * @stable ICU 3.4
    786 *
    787 */
    788 U_CAPI UBool U_EXPORT2
    789 utext_isWritable(const UText *ut);
    790 
    791 
    792 /**
    793  * Test whether there is meta data associated with the text.
    794  * @see Replaceable::hasMetaData()
    795  *
    796  * @param ut The UText to be tested
    797  * @return true if the underlying text includes meta data.
    798  * @stable ICU 3.4
    799  */
    800 U_CAPI UBool U_EXPORT2
    801 utext_hasMetaData(const UText *ut);
    802 
    803 
    804 /**
    805 * Replace a range of the original text with a replacement text.
    806 *
    807 * Leaves the current iteration position at the position following the
    808 *  newly inserted replacement text.
    809 *
    810 * This function is only available on UText types that support writing,
    811 * that is, ones where utext_isWritable() returns true.
    812 *
    813 * When using this function, there should be only a single UText opened onto the
    814 * underlying native text string.  Behavior after a replace operation
    815 * on a UText is undefined for any other additional UTexts that refer to the
    816 * modified string.
    817 *
    818 * @param ut               the UText representing the text to be operated on.
    819 * @param nativeStart      the native index of the start of the region to be replaced
    820 * @param nativeLimit      the native index of the character following the region to be replaced.
    821 * @param replacementText  pointer to the replacement text
    822 * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated.
    823 * @param status           receives any error status.  Possible errors include
    824 *                         U_NO_WRITE_PERMISSION
    825 *
    826 * @return The signed number of (native) storage units by which
    827 *         the length of the text expanded or contracted.
    828 *
    829 * @stable ICU 3.4
    830 */
    831 U_CAPI int32_t U_EXPORT2
    832 utext_replace(UText *ut,
    833             int64_t nativeStart, int64_t nativeLimit,
    834             const UChar *replacementText, int32_t replacementLength,
    835             UErrorCode *status);
    836 
    837 
    838 
    839 /**
    840 *
    841 * Copy or move a substring from one position to another within the text,
    842 * while retaining any metadata associated with the text.
    843 * This function is used to duplicate or reorder substrings.
    844 * The destination index must not overlap the source range.
    845 *
    846 * The text to be copied or moved is inserted at destIndex;
    847 * it does not replace or overwrite any existing text.
    848 *
    849 * The iteration position is left following the newly inserted text
    850 * at the destination position.
    851 *
    852 * This function is only available on UText types that support writing,
    853 * that is, ones where utext_isWritable() returns true.
    854 *
    855 * When using this function, there should be only a single UText opened onto the
    856 * underlying native text string.  Behavior after a copy operation
    857 * on a UText is undefined in any other additional UTexts that refer to the
    858 * modified string.
    859 *
    860 * @param ut           The UText representing the text to be operated on.
    861 * @param nativeStart  The native index of the start of the region to be copied or moved
    862 * @param nativeLimit  The native index of the character position following the region
    863 *                     to be copied.
    864 * @param destIndex    The native destination index to which the source substring is
    865 *                     copied or moved.
    866 * @param move         If true, then the substring is moved, not copied/duplicated.
    867 * @param status       receives any error status.  Possible errors include U_NO_WRITE_PERMISSION
    868 *                       
    869 * @stable ICU 3.4
    870 */
    871 U_CAPI void U_EXPORT2
    872 utext_copy(UText *ut,
    873          int64_t nativeStart, int64_t nativeLimit,
    874          int64_t destIndex,
    875          UBool move,
    876          UErrorCode *status);
    877 
    878 
    879 /**
    880  *  <p>
    881  *  Freeze a UText.  This prevents any modification to the underlying text itself
    882  *  by means of functions operating on this UText.
    883  *  </p>
    884  *  <p>
    885  *  Once frozen, a UText can not be unfrozen.  The intent is to ensure
    886  *  that a the text underlying a frozen UText wrapper cannot be modified via that UText.
    887  *  </p>
    888  *  <p>
    889  *  Caution:  freezing a UText will disable changes made via the specific
    890  *   frozen UText wrapper only; it will not have any effect on the ability to
    891  *   directly modify the text by bypassing the UText.  Any such backdoor modifications
    892  *   are always an error while UText access is occurring because the underlying
    893  *   text can get out of sync with UText's buffering.
    894  *  </p>
    895  *
    896  *  @param ut  The UText to be frozen.
    897  *  @see   utext_isWritable()
    898  *  @stable ICU 3.6
    899  */
    900 U_CAPI void U_EXPORT2
    901 utext_freeze(UText *ut);
    902 
    903 
    904 /**
    905 * UText provider properties (bit field indexes).
    906 *
    907 * @see UText
    908 * @stable ICU 3.4
    909 */
    910 enum {
    911    /**
    912     * It is potentially time consuming for the provider to determine the length of the text.
    913     * @stable ICU 3.4
    914     */
    915    UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1,
    916    /**
    917     * Text chunks remain valid and usable until the text object is modified or
    918     * deleted, not just until the next time the access() function is called
    919     * (which is the default).
    920     * @stable ICU 3.4
    921     */
    922    UTEXT_PROVIDER_STABLE_CHUNKS = 2,
    923    /**
    924     * The provider supports modifying the text via the replace() and copy()
    925     * functions.
    926     * @see Replaceable
    927     * @stable ICU 3.4
    928     */
    929    UTEXT_PROVIDER_WRITABLE = 3,
    930    /**
    931     * There is meta data associated with the text.
    932     * @see Replaceable::hasMetaData()
    933     * @stable ICU 3.4
    934     */ 
    935    UTEXT_PROVIDER_HAS_META_DATA = 4,
    936    /**
    937     * Text provider owns the text storage.
    938     *  Generally occurs as the result of a deep clone of the UText.
    939     *  When closing the UText, the associated text must
    940     *  also be closed/deleted/freed/ whatever is appropriate.
    941     * @stable ICU 3.6
    942     */
    943     UTEXT_PROVIDER_OWNS_TEXT = 5
    944 };
    945 
    946 /**
    947  * Function type declaration for UText.clone().
    948  *
    949  *  clone a UText.  Much like opening a UText where the source text is itself
    950  *  another UText.
    951  *
    952  *  A deep clone will copy both the UText data structures and the underlying text.
    953  *  The original and cloned UText will operate completely independently; modifications
    954  *  made to the text in one will not effect the other.  Text providers are not
    955  *  required to support deep clones.  The user of clone() must check the status return
    956  *  and be prepared to handle failures.
    957  *
    958  *  A shallow clone replicates only the UText data structures; it does not make
    959  *  a copy of the underlying text.  Shallow clones can be used as an efficient way to 
    960  *  have multiple iterators active in a single text string that is not being
    961  *  modified.
    962  *
    963  *  A shallow clone operation must not fail except for truly exceptional conditions such
    964  *  as memory allocation failures.
    965  *
    966  *  A UText and its clone may be safely concurrently accessed by separate threads.
    967  *  This is true for both shallow and deep clones.
    968  *  It is the responsibility of the Text Provider to ensure that this thread safety
    969  *  constraint is met.
    970 
    971  *
    972  *  @param dest   A UText struct to be filled in with the result of the clone operation,
    973  *                or NULL if the clone function should heap-allocate a new UText struct.
    974  *  @param src    The UText to be cloned.
    975  *  @param deep   true to request a deep clone, false for a shallow clone.
    976  *  @param status Errors are returned here.  For deep clones, U_UNSUPPORTED_ERROR
    977  *                should be returned if the text provider is unable to clone the
    978  *                original text.
    979  *  @return       The newly created clone, or NULL if the clone operation failed.
    980  *
    981  * @stable ICU 3.4
    982  */
    983 typedef UText * U_CALLCONV
    984 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
    985 
    986 
    987 /**
    988 * Function type declaration for UText.nativeLength().
    989 *
    990 * @param ut the UText to get the length of.
    991 * @return the length, in the native units of the original text string.
    992 * @see UText
    993 * @stable ICU 3.4
    994 */
    995 typedef int64_t U_CALLCONV
    996 UTextNativeLength(UText *ut);
    997 
    998 /**
    999 * Function type declaration for UText.access().  Get the description of the text chunk
   1000 *  containing the text at a requested native index.  The UText's iteration
   1001 *  position will be left at the requested index.  If the index is out
   1002 *  of bounds, the iteration position will be left at the start or end
   1003 *  of the string, as appropriate.
   1004 *
   1005 *  Chunks must begin and end on code point boundaries.  A single code point
   1006 *  comprised of multiple storage units must never span a chunk boundary.
   1007 *
   1008 *
   1009 * @param ut          the UText being accessed.
   1010 * @param nativeIndex Requested index of the text to be accessed.
   1011 * @param forward     If true, then the returned chunk must contain text
   1012 *                    starting from the index, so that start<=index<limit.
   1013 *                    If false, then the returned chunk must contain text
   1014 *                    before the index, so that start<index<=limit.
   1015 * @return            True if the requested index could be accessed.  The chunk
   1016 *                    will contain the requested text.
   1017 *                    False value if a chunk cannot be accessed
   1018 *                    (the requested index is out of bounds).
   1019 *
   1020 * @see UText
   1021 * @stable ICU 3.4
   1022 */
   1023 typedef UBool U_CALLCONV
   1024 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
   1025 
   1026 /**
   1027 * Function type declaration for UText.extract().
   1028 *
   1029 * Extract text from a UText into a UChar buffer.  The range of text to be extracted
   1030 * is specified in the native indices of the UText provider.  These may not necessarily
   1031 * be UTF-16 indices.
   1032 * <p>
   1033 * The size (number of 16 bit UChars) in the data to be extracted is returned.  The
   1034 * full amount is returned, even when the specified buffer size is smaller.
   1035 * <p>
   1036 * The extracted string will (if you are a user) / must (if you are a text provider)
   1037 * be NUL-terminated if there is sufficient space in the destination buffer.
   1038 *
   1039 * @param  ut            the UText from which to extract data.
   1040 * @param  nativeStart   the native index of the first character to extract.
   1041 * @param  nativeLimit   the native string index of the position following the last
   1042 *                       character to extract.
   1043 * @param  dest          the UChar (UTF-16) buffer into which the extracted text is placed
   1044 * @param  destCapacity  The size, in UChars, of the destination buffer.  May be zero
   1045 *                       for precomputing the required size.
   1046 * @param  status        receives any error status.
   1047 *                       If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for
   1048 *                       preflighting.
   1049 * @return Number of UChars in the data.  Does not include a trailing NUL.
   1050 *
   1051 * @stable ICU 3.4
   1052 */
   1053 typedef int32_t U_CALLCONV
   1054 UTextExtract(UText *ut,
   1055             int64_t nativeStart, int64_t nativeLimit,
   1056             UChar *dest, int32_t destCapacity,
   1057             UErrorCode *status);
   1058 
   1059 /**
   1060 * Function type declaration for UText.replace().
   1061 *
   1062 * Replace a range of the original text with a replacement text.
   1063 *
   1064 * Leaves the current iteration position at the position following the
   1065 *  newly inserted replacement text.
   1066 *
   1067 * This function need only be implemented on UText types that support writing.
   1068 *
   1069 * When using this function, there should be only a single UText opened onto the
   1070 * underlying native text string.  The function is responsible for updating the
   1071 * text chunk within the UText to reflect the updated iteration position,
   1072 * taking into account any changes to the underlying string's structure caused
   1073 * by the replace operation.
   1074 *
   1075 * @param ut               the UText representing the text to be operated on.
   1076 * @param nativeStart      the index of the start of the region to be replaced
   1077 * @param nativeLimit      the index of the character following the region to be replaced.
   1078 * @param replacementText  pointer to the replacement text
   1079 * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated.
   1080 * @param status           receives any error status.  Possible errors include
   1081 *                         U_NO_WRITE_PERMISSION
   1082 *
   1083 * @return The signed number of (native) storage units by which
   1084 *         the length of the text expanded or contracted.
   1085 *
   1086 * @stable ICU 3.4
   1087 */
   1088 typedef int32_t U_CALLCONV
   1089 UTextReplace(UText *ut,
   1090             int64_t nativeStart, int64_t nativeLimit,
   1091             const UChar *replacementText, int32_t replacmentLength,
   1092             UErrorCode *status);
   1093 
   1094 /**
   1095 * Function type declaration for UText.copy().
   1096 *
   1097 * Copy or move a substring from one position to another within the text,
   1098 * while retaining any metadata associated with the text.
   1099 * This function is used to duplicate or reorder substrings.
   1100 * The destination index must not overlap the source range.
   1101 *
   1102 * The text to be copied or moved is inserted at destIndex;
   1103 * it does not replace or overwrite any existing text.
   1104 *
   1105 * This function need only be implemented for UText types that support writing.
   1106 *
   1107 * When using this function, there should be only a single UText opened onto the
   1108 * underlying native text string.  The function is responsible for updating the
   1109 * text chunk within the UText to reflect the updated iteration position,
   1110 * taking into account any changes to the underlying string's structure caused
   1111 * by the replace operation.
   1112 *
   1113 * @param ut           The UText representing the text to be operated on.
   1114 * @param nativeStart  The index of the start of the region to be copied or moved
   1115 * @param nativeLimit  The index of the character following the region to be replaced.
   1116 * @param nativeDest   The destination index to which the source substring is copied or moved.
   1117 * @param move         If true, then the substring is moved, not copied/duplicated.
   1118 * @param status       receives any error status.  Possible errors include U_NO_WRITE_PERMISSION
   1119 *
   1120 * @stable ICU 3.4
   1121 */
   1122 typedef void U_CALLCONV
   1123 UTextCopy(UText *ut,
   1124          int64_t nativeStart, int64_t nativeLimit,
   1125          int64_t nativeDest,
   1126          UBool move,
   1127          UErrorCode *status);
   1128 
   1129 /**
   1130 * Function type declaration for UText.mapOffsetToNative().
   1131 * Map from the current UChar offset within the current text chunk to
   1132 *  the corresponding native index in the original source text.
   1133 *
   1134 * This is required only for text providers that do not use native UTF-16 indexes.
   1135 *
   1136 * @param ut     the UText.
   1137 * @return Absolute (native) index corresponding to chunkOffset in the current chunk.
   1138 *         The returned native index should always be to a code point boundary.
   1139 *
   1140 * @stable ICU 3.4
   1141 */
   1142 typedef int64_t U_CALLCONV
   1143 UTextMapOffsetToNative(const UText *ut);
   1144 
   1145 /**
   1146 * Function type declaration for UText.mapIndexToUTF16().
   1147 * Map from a native index to a UChar offset within a text chunk.
   1148 * Behavior is undefined if the native index does not fall within the
   1149 *   current chunk.
   1150 *
   1151 * This function is required only for text providers that do not use native UTF-16 indexes.
   1152 *
   1153 * @param ut          The UText containing the text chunk.
   1154 * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit.
   1155 * @return            Chunk-relative UTF-16 offset corresponding to the specified native
   1156 *                    index.
   1157 *
   1158 * @stable ICU 3.4
   1159 */
   1160 typedef int32_t U_CALLCONV
   1161 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
   1162 
   1163 
   1164 /**
   1165 * Function type declaration for UText.utextClose().
   1166 *
   1167 * A Text Provider close function is only required for provider types that make
   1168 *  allocations in their open function (or other functions) that must be 
   1169 *  cleaned when the UText is closed.
   1170 *
   1171 * The allocation of the UText struct itself and any "extra" storage
   1172 * associated with the UText is handled by the common UText implementation
   1173 * and does not require provider specific cleanup in a close function.
   1174 *
   1175 * Most UText provider implementations do not need to implement this function.
   1176 *
   1177 * @param ut A UText object to be closed.
   1178 *
   1179 * @stable ICU 3.4
   1180 */
   1181 typedef void U_CALLCONV
   1182 UTextClose(UText *ut);
   1183 
   1184 
   1185 /**
   1186  *   (public)  Function dispatch table for UText.
   1187  *             Conceptually very much like a C++ Virtual Function Table.
   1188  *             This struct defines the organization of the table.
   1189  *             Each text provider implementation must provide an
   1190  *              actual table that is initialized with the appropriate functions
   1191  *              for the type of text being handled.
   1192  *   @stable ICU 3.6
   1193  */
   1194 struct UTextFuncs {
   1195    /**
   1196     *   (public)  Function table size, sizeof(UTextFuncs)
   1197     *             Intended for use should the table grow to accommodate added
   1198     *             functions in the future, to allow tests for older format
   1199     *             function tables that do not contain the extensions.
   1200     *
   1201     *             Fields are placed for optimal alignment on
   1202     *             32/64/128-bit-pointer machines, by normally grouping together
   1203     *             4 32-bit fields,
   1204     *             4 pointers,
   1205     *             2 64-bit fields
   1206     *             in sequence.
   1207     *   @stable ICU 3.6
   1208     */
   1209    int32_t       tableSize;
   1210 
   1211    /**
   1212      *   (private)  Alignment padding.
   1213      *              Do not use, reserved for use by the UText framework only.
   1214      *   @internal
   1215      */
   1216    int32_t       reserved1, /** @internal */ reserved2, /** @internal */ reserved3;
   1217 
   1218 
   1219    /**
   1220     * (public) Function pointer for UTextClone
   1221     *
   1222     * @see UTextClone
   1223     * @stable ICU 3.6
   1224     */
   1225    UTextClone *clone;
   1226 
   1227    /**
   1228     * (public) function pointer for UTextLength
   1229     * May be expensive to compute!
   1230     *
   1231     * @see UTextLength
   1232     * @stable ICU 3.6
   1233     */
   1234    UTextNativeLength *nativeLength;
   1235 
   1236    /**
   1237     * (public) Function pointer for UTextAccess.
   1238     *
   1239     * @see UTextAccess
   1240     * @stable ICU 3.6
   1241     */
   1242    UTextAccess *access;
   1243 
   1244    /**
   1245     * (public) Function pointer for UTextExtract.
   1246     *
   1247     * @see UTextExtract
   1248     * @stable ICU 3.6
   1249     */
   1250    UTextExtract *extract;
   1251 
   1252    /**
   1253     * (public) Function pointer for UTextReplace.
   1254     *
   1255     * @see UTextReplace
   1256     * @stable ICU 3.6
   1257     */
   1258    UTextReplace *replace;
   1259 
   1260    /**
   1261     * (public) Function pointer for UTextCopy.
   1262     *
   1263     * @see UTextCopy
   1264     * @stable ICU 3.6
   1265     */
   1266    UTextCopy *copy;
   1267 
   1268    /**
   1269     * (public) Function pointer for UTextMapOffsetToNative.
   1270     *
   1271     * @see UTextMapOffsetToNative
   1272     * @stable ICU 3.6
   1273     */
   1274    UTextMapOffsetToNative *mapOffsetToNative;
   1275 
   1276    /**
   1277     * (public) Function pointer for UTextMapNativeIndexToUTF16.
   1278     *
   1279     * @see UTextMapNativeIndexToUTF16
   1280     * @stable ICU 3.6
   1281     */
   1282    UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16;
   1283 
   1284    /**
   1285     * (public) Function pointer for UTextClose.
   1286      *
   1287      * @see UTextClose
   1288      * @stable ICU 3.6
   1289      */
   1290    UTextClose  *close;
   1291 
   1292    /**
   1293      * (private)  Spare function pointer
   1294      * @internal
   1295      */
   1296    UTextClose  *spare1;
   1297    
   1298    /**
   1299      * (private)  Spare function pointer
   1300      * @internal
   1301      */
   1302    UTextClose  *spare2;
   1303 
   1304    /**
   1305      * (private)  Spare function pointer
   1306      * @internal
   1307      */
   1308    UTextClose  *spare3;
   1309 
   1310 };
   1311 /**
   1312 * Function dispatch table for UText
   1313 * @see UTextFuncs
   1314 */
   1315 typedef struct UTextFuncs UTextFuncs;
   1316 
   1317 /**
   1318  *   UText struct.  Provides the interface between the generic UText access code
   1319  *                  and the UText provider code that works on specific kinds of
   1320  *                  text  (UTF-8, noncontiguous UTF-16, whatever.)
   1321  *
   1322  *                  Applications that are using predefined types of text providers
   1323  *                  to pass text data to ICU services will have no need to view the
   1324  *                  internals of the UText structs that they open.
   1325  *
   1326  * @stable ICU 3.6
   1327  */
   1328 struct UText {
   1329    /**
   1330     *     (private)  Magic.  Used to help detect when UText functions are handed
   1331     *                        invalid or uninitialized UText structs.
   1332     *                        utext_openXYZ() functions take an initialized,
   1333     *                        but not necessarily open, UText struct as an
   1334     *                        optional fill-in parameter.  This magic field
   1335     *                        is used to check for that initialization.
   1336     *                        Text provider close functions must NOT clear
   1337     *                        the magic field because that would prevent
   1338     *                        reuse of the UText struct.
   1339     * @internal
   1340     */
   1341    uint32_t       magic;
   1342 
   1343 
   1344    /**
   1345     *     (private)  Flags for managing the allocation and freeing of
   1346     *                memory associated with this UText.
   1347     * @internal
   1348     */
   1349    int32_t        flags;
   1350 
   1351 
   1352    /**
   1353      *  Text provider properties.  This set of flags is maintained by the
   1354      *                             text provider implementation.
   1355      *  @stable ICU 3.4
   1356      */
   1357    int32_t         providerProperties;
   1358 
   1359    /**
   1360     * (public) sizeOfStruct=sizeof(UText)
   1361     * Allows possible backward compatible extension.
   1362     *
   1363     * @stable ICU 3.4
   1364     */
   1365    int32_t         sizeOfStruct;
   1366    
   1367    /* ------ 16 byte alignment boundary -----------  */
   1368    
   1369 
   1370    /**
   1371      *  (protected) Native index of the first character position following
   1372      *              the current chunk.
   1373      *  @stable ICU 3.6
   1374      */
   1375    int64_t         chunkNativeLimit;
   1376 
   1377    /**
   1378     *   (protected)  Size in bytes of the extra space (pExtra).
   1379     *  @stable ICU 3.4
   1380     */
   1381    int32_t        extraSize;
   1382 
   1383    /**
   1384      *    (protected) The highest chunk offset where native indexing and
   1385      *    chunk (UTF-16) indexing correspond.  For UTF-16 sources, value
   1386      *    will be equal to chunkLength.
   1387      *
   1388      *    @stable ICU 3.6
   1389      */
   1390    int32_t         nativeIndexingLimit;
   1391 
   1392    /* ---- 16 byte alignment boundary------ */
   1393    
   1394    /**
   1395     *  (protected) Native index of the first character in the text chunk.
   1396     *  @stable ICU 3.6
   1397     */
   1398    int64_t         chunkNativeStart;
   1399 
   1400    /**
   1401     *  (protected) Current iteration position within the text chunk (UTF-16 buffer).
   1402     *  This is the index to the character that will be returned by utext_next32().
   1403     *  @stable ICU 3.6
   1404     */
   1405    int32_t         chunkOffset;
   1406 
   1407    /**
   1408     *  (protected) Length the text chunk (UTF-16 buffer), in UChars.
   1409     *  @stable ICU 3.6
   1410     */
   1411    int32_t         chunkLength;
   1412 
   1413    /* ---- 16  byte alignment boundary-- */
   1414    
   1415 
   1416    /**
   1417     *  (protected)  pointer to a chunk of text in UTF-16 format.
   1418     *  May refer either to original storage of the source of the text, or
   1419     *  if conversion was required, to a buffer owned by the UText.
   1420     *  @stable ICU 3.6
   1421     */
   1422    const UChar    *chunkContents;
   1423 
   1424     /**
   1425      * (public)     Pointer to Dispatch table for accessing functions for this UText.
   1426      * @stable ICU 3.6
   1427      */
   1428    const UTextFuncs     *pFuncs;
   1429 
   1430    /**
   1431     *  (protected)  Pointer to additional space requested by the
   1432     *               text provider during the utext_open operation.
   1433     * @stable ICU 3.4
   1434     */
   1435    void          *pExtra;
   1436 
   1437    /**
   1438     * (protected) Pointer to string or text-containing object or similar.
   1439     * This is the source of the text that this UText is wrapping, in a format
   1440     *  that is known to the text provider functions.
   1441     * @stable ICU 3.4
   1442     */
   1443    const void   *context;
   1444 
   1445    /* --- 16 byte alignment boundary--- */
   1446 
   1447    /**
   1448     * (protected) Pointer fields available for use by the text provider.
   1449     * Not used by UText common code.
   1450     * @stable ICU 3.6
   1451     */
   1452    const void     *p; 
   1453    /**
   1454     * (protected) Pointer fields available for use by the text provider.
   1455     * Not used by UText common code.
   1456     * @stable ICU 3.6
   1457     */
   1458    const void     *q;
   1459     /**
   1460     * (protected) Pointer fields available for use by the text provider.
   1461     * Not used by UText common code.
   1462     * @stable ICU 3.6
   1463      */
   1464    const void     *r;
   1465 
   1466    /**
   1467      *  Private field reserved for future use by the UText framework
   1468      *     itself.  This is not to be touched by the text providers.
   1469      * @internal ICU 3.4
   1470      */
   1471    void           *privP;
   1472 
   1473 
   1474    /* --- 16 byte alignment boundary--- */
   1475    
   1476 
   1477    /**
   1478      * (protected) Integer field reserved for use by the text provider.
   1479      * Not used by the UText framework, or by the client (user) of the UText.
   1480      * @stable ICU 3.4
   1481      */
   1482    int64_t         a;
   1483 
   1484    /**
   1485      * (protected) Integer field reserved for use by the text provider.
   1486      * Not used by the UText framework, or by the client (user) of the UText.
   1487      * @stable ICU 3.4
   1488      */
   1489    int32_t         b;
   1490 
   1491    /**
   1492      * (protected) Integer field reserved for use by the text provider.
   1493      * Not used by the UText framework, or by the client (user) of the UText.
   1494      * @stable ICU 3.4
   1495      */
   1496    int32_t         c;
   1497 
   1498    /*  ---- 16 byte alignment boundary---- */
   1499 
   1500 
   1501    /**
   1502      *  Private field reserved for future use by the UText framework
   1503      *     itself.  This is not to be touched by the text providers.
   1504      * @internal ICU 3.4
   1505      */
   1506    int64_t         privA;
   1507    /**
   1508      *  Private field reserved for future use by the UText framework
   1509      *     itself.  This is not to be touched by the text providers.
   1510      * @internal ICU 3.4
   1511      */
   1512    int32_t         privB;
   1513    /**
   1514      *  Private field reserved for future use by the UText framework
   1515      *     itself.  This is not to be touched by the text providers.
   1516      * @internal ICU 3.4
   1517      */
   1518    int32_t         privC;
   1519 };
   1520 
   1521 
   1522 /**
   1523 *  Common function for use by Text Provider implementations to allocate and/or initialize
   1524 *  a new UText struct.  To be called in the implementation of utext_open() functions.
   1525 *  If the supplied UText parameter is null, a new UText struct will be allocated on the heap.
   1526 *  If the supplied UText is already open, the provider's close function will be called
   1527 *  so that the struct can be reused by the open that is in progress.
   1528 *
   1529 * @param ut   pointer to a UText struct to be re-used, or null if a new UText
   1530 *             should be allocated.
   1531 * @param extraSpace The amount of additional space to be allocated as part
   1532 *             of this UText, for use by types of providers that require
   1533 *             additional storage.
   1534 * @param status Errors are returned here.
   1535 * @return pointer to the UText, allocated if necessary, with extra space set up if requested.
   1536 * @stable ICU 3.4
   1537 */
   1538 U_CAPI UText * U_EXPORT2
   1539 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
   1540 
   1541 // do not use #ifndef U_HIDE_INTERNAL_API around the following!
   1542 /**
   1543  * @internal
   1544  *  Value used to help identify correctly initialized UText structs.
   1545  *  Note:  must be publicly visible so that UTEXT_INITIALIZER can access it.
   1546  */
   1547 enum {
   1548    UTEXT_MAGIC = 0x345ad82c
   1549 };
   1550 
   1551 /**
   1552 * initializer to be used with local (stack) instances of a UText
   1553 *  struct.  UText structs must be initialized before passing
   1554 *  them to one of the utext_open functions.
   1555 *
   1556 * @stable ICU 3.6
   1557 */
   1558 #define UTEXT_INITIALIZER {                                        \
   1559                  UTEXT_MAGIC,          /* magic                */ \
   1560                  0,                    /* flags                */ \
   1561                  0,                    /* providerProps        */ \
   1562                  sizeof(UText),        /* sizeOfStruct         */ \
   1563                  0,                    /* chunkNativeLimit     */ \
   1564                  0,                    /* extraSize            */ \
   1565                  0,                    /* nativeIndexingLimit  */ \
   1566                  0,                    /* chunkNativeStart     */ \
   1567                  0,                    /* chunkOffset          */ \
   1568                  0,                    /* chunkLength          */ \
   1569                  NULL,                 /* chunkContents        */ \
   1570                  NULL,                 /* pFuncs               */ \
   1571                  NULL,                 /* pExtra               */ \
   1572                  NULL,                 /* context              */ \
   1573                  NULL, NULL, NULL,     /* p, q, r              */ \
   1574                  NULL,                 /* privP                */ \
   1575                  0, 0, 0,              /* a, b, c              */ \
   1576                  0, 0, 0               /* privA,B,C,           */ \
   1577                  }
   1578 
   1579 
   1580 U_CDECL_END
   1581 
   1582 
   1583 #if U_SHOW_CPLUSPLUS_API
   1584 
   1585 U_NAMESPACE_BEGIN
   1586 
   1587 /**
   1588 * \class LocalUTextPointer
   1589 * "Smart pointer" class, closes a UText via utext_close().
   1590 * For most methods see the LocalPointerBase base class.
   1591 *
   1592 * @see LocalPointerBase
   1593 * @see LocalPointer
   1594 * @stable ICU 4.4
   1595 */
   1596 U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close);
   1597 
   1598 U_NAMESPACE_END
   1599 
   1600 #endif
   1601 
   1602 
   1603 #endif