tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ucnv_err.h (21486B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 1999-2009, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *
      9 *
     10 *   ucnv_err.h:
     11 */
     12 
     13 /**
     14 * \file
     15 * \brief C API: UConverter predefined error callbacks
     16 *
     17 *  <h2>Error Behaviour Functions</h2>
     18 *  Defines some error behaviour functions called by ucnv_{from,to}Unicode
     19 *  These are provided as part of ICU and many are stable, but they
     20 *  can also be considered only as an example of what can be done with
     21 *  callbacks.  You may of course write your own.
     22 *
     23 *  If you want to write your own, you may also find the functions from
     24 *  ucnv_cb.h useful when writing your own callbacks.
     25 *
     26 *  These functions, although public, should NEVER be called directly.
     27 *  They should be used as parameters to the ucnv_setFromUCallback
     28 *  and ucnv_setToUCallback functions, to set the behaviour of a converter
     29 *  when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
     30 *
     31 *  usage example:  'STOP' doesn't need any context, but newContext
     32 *    could be set to something other than 'NULL' if needed. The available
     33 *    contexts in this header can modify the default behavior of the callback.
     34 *
     35 *  \code
     36 *  UErrorCode err = U_ZERO_ERROR;
     37 *  UConverter *myConverter = ucnv_open("ibm-949", &err);
     38 *  const void *oldContext;
     39 *  UConverterFromUCallback oldAction;
     40 *
     41 *
     42 *  if (U_SUCCESS(err))
     43 *  {
     44 *      ucnv_setFromUCallBack(myConverter,
     45 *                       UCNV_FROM_U_CALLBACK_STOP,
     46 *                       NULL,
     47 *                       &oldAction,
     48 *                       &oldContext,
     49 *                       &status);
     50 *  }
     51 *  \endcode
     52 *
     53 *  The code above tells "myConverter" to stop when it encounters an
     54 *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
     55 *  Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
     56 *  and ucnv_setToUCallBack would need to be called in order to change
     57 *  that behavior too.
     58 *
     59 *  Here is an example with a context:
     60 *
     61 *  \code
     62 *  UErrorCode err = U_ZERO_ERROR;
     63 *  UConverter *myConverter = ucnv_open("ibm-949", &err);
     64 *  const void *oldContext;
     65 *  UConverterFromUCallback oldAction;
     66 *
     67 *
     68 *  if (U_SUCCESS(err))
     69 *  {
     70 *      ucnv_setToUCallBack(myConverter,
     71 *                       UCNV_TO_U_CALLBACK_SUBSTITUTE,
     72 *                       UCNV_SUB_STOP_ON_ILLEGAL,
     73 *                       &oldAction,
     74 *                       &oldContext,
     75 *                       &status);
     76 *  }
     77 *  \endcode
     78 *
     79 *  The code above tells "myConverter" to stop when it encounters an
     80 *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
     81 *  Codepage -> Unicode. Any unmapped and legal characters will be
     82 *  substituted to be the default substitution character.
     83 */
     84 
     85 #ifndef UCNV_ERR_H
     86 #define UCNV_ERR_H
     87 
     88 #include "unicode/utypes.h"
     89 
     90 #if !UCONFIG_NO_CONVERSION
     91 
     92 /** Forward declaring the UConverter structure. @stable ICU 2.0 */
     93 struct UConverter;
     94 
     95 /** @stable ICU 2.0 */
     96 typedef struct UConverter UConverter;
     97 
     98 /**
     99 * FROM_U, TO_U context options for sub callback
    100 * @stable ICU 2.0
    101 */
    102 #define UCNV_SUB_STOP_ON_ILLEGAL "i"
    103 
    104 /**
    105 * FROM_U, TO_U context options for skip callback
    106 * @stable ICU 2.0
    107 */
    108 #define UCNV_SKIP_STOP_ON_ILLEGAL "i"
    109 
    110 /**
    111 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) 
    112 * @stable ICU 2.0
    113 */
    114 #define UCNV_ESCAPE_ICU       NULL
    115 /**
    116 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
    117 * @stable ICU 2.0
    118 */
    119 #define UCNV_ESCAPE_JAVA      "J"
    120 /**
    121 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
    122 * TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX)
    123 * @stable ICU 2.0
    124 */
    125 #define UCNV_ESCAPE_C         "C"
    126 /**
    127 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
    128 * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
    129 * @stable ICU 2.0
    130 */
    131 #define UCNV_ESCAPE_XML_DEC   "D"
    132 /**
    133 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
    134 * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
    135 * @stable ICU 2.0
    136 */
    137 #define UCNV_ESCAPE_XML_HEX   "X"
    138 /**
    139 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
    140 * @stable ICU 2.0
    141 */
    142 #define UCNV_ESCAPE_UNICODE   "U"
    143 
    144 /**
    145 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
    146 * a backslash, 1..6 hex digits, and a space)
    147 * @stable ICU 4.0
    148 */
    149 #define UCNV_ESCAPE_CSS2   "S"
    150 
    151 /** 
    152 * The process condition code to be used with the callbacks.  
    153 * Codes which are greater than UCNV_IRREGULAR should be 
    154 * passed on to any chained callbacks.
    155 * @stable ICU 2.0
    156 */
    157 typedef enum {
    158    UCNV_UNASSIGNED = 0,  /**< The code point is unassigned.
    159                             The error code U_INVALID_CHAR_FOUND will be set. */
    160    UCNV_ILLEGAL = 1,     /**< The code point is illegal. For example, 
    161                             \\x81\\x2E is illegal in SJIS because \\x2E
    162                             is not a valid trail byte for the \\x81 
    163                             lead byte.
    164                             Also, starting with Unicode 3.0.1, non-shortest byte sequences
    165                             in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
    166                             are also illegal, not just irregular.
    167                             The error code U_ILLEGAL_CHAR_FOUND will be set. */
    168    UCNV_IRREGULAR = 2,   /**< The codepoint is not a regular sequence in 
    169                             the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
    170                             are irregular UTF-8 byte sequences for single surrogate
    171                             code points.
    172                             The error code U_INVALID_CHAR_FOUND will be set. */
    173    UCNV_RESET = 3,       /**< The callback is called with this reason when a
    174                             'reset' has occurred. Callback should reset all
    175                             state. */
    176    UCNV_CLOSE = 4,        /**< Called when the converter is closed. The
    177                             callback should release any allocated memory.*/
    178    UCNV_CLONE = 5         /**< Called when ucnv_safeClone() is called on the
    179                              converter. the pointer available as the
    180                              'context' is an alias to the original converters'
    181                              context pointer. If the context must be owned
    182                              by the new converter, the callback must clone 
    183                              the data and call ucnv_setFromUCallback 
    184                              (or setToUCallback) with the correct pointer.
    185                              @stable ICU 2.2
    186                           */
    187 } UConverterCallbackReason;
    188 
    189 
    190 /**
    191 * The structure for the fromUnicode callback function parameter.
    192 * @stable ICU 2.0
    193 */
    194 typedef struct {
    195    uint16_t size;              /**< The size of this struct. @stable ICU 2.0 */
    196    UBool flush;                /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0    */
    197    UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0  */
    198    const UChar *source;        /**< Pointer to the source source buffer. @stable ICU 2.0    */
    199    const UChar *sourceLimit;   /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
    200    char *target;               /**< Pointer to the target buffer. @stable ICU 2.0    */
    201    const char *targetLimit;    /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
    202    int32_t *offsets;           /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
    203 } UConverterFromUnicodeArgs;
    204 
    205 
    206 /**
    207 * The structure for the toUnicode callback function parameter.
    208 * @stable ICU 2.0
    209 */
    210 typedef struct {
    211    uint16_t size;              /**< The size of this struct   @stable ICU 2.0 */
    212    UBool flush;                /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0   */
    213    UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
    214    const char *source;         /**< Pointer to the source source buffer. @stable ICU 2.0    */
    215    const char *sourceLimit;    /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
    216    UChar *target;              /**< Pointer to the target buffer. @stable ICU 2.0    */
    217    const UChar *targetLimit;   /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
    218    int32_t *offsets;           /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
    219 } UConverterToUnicodeArgs;
    220 
    221 
    222 /**
    223 * DO NOT CALL THIS FUNCTION DIRECTLY!
    224 * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
    225 * returning the error code back to the caller immediately.
    226 *
    227 * @param context Pointer to the callback's private data
    228 * @param fromUArgs Information about the conversion in progress
    229 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
    230 * @param length Size (in bytes) of the concerned codepage sequence
    231 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
    232 * @param reason Defines the reason the callback was invoked
    233 * @param err This should always be set to a failure status prior to calling.
    234 * @stable ICU 2.0
    235 */
    236 U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
    237                  const void *context,
    238                  UConverterFromUnicodeArgs *fromUArgs,
    239                  const UChar* codeUnits,
    240                  int32_t length,
    241                  UChar32 codePoint,
    242                  UConverterCallbackReason reason,
    243                  UErrorCode * err);
    244 
    245 
    246 
    247 /**
    248 * DO NOT CALL THIS FUNCTION DIRECTLY!
    249 * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
    250 * returning the error code back to the caller immediately.
    251 *
    252 * @param context Pointer to the callback's private data
    253 * @param toUArgs Information about the conversion in progress
    254 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
    255 * @param length Size (in bytes) of the concerned codepage sequence
    256 * @param reason Defines the reason the callback was invoked
    257 * @param err This should always be set to a failure status prior to calling.
    258 * @stable ICU 2.0
    259 */
    260 U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
    261                  const void *context,
    262                  UConverterToUnicodeArgs *toUArgs,
    263                  const char* codeUnits,
    264                  int32_t length,
    265                  UConverterCallbackReason reason,
    266                  UErrorCode * err);
    267 
    268 /**
    269 * DO NOT CALL THIS FUNCTION DIRECTLY!
    270 * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
    271 * skips only UNASSIGNED_SEQUENCE depending on the context parameter
    272 * simply ignoring those characters. 
    273 *
    274 * @param context  The function currently recognizes the callback options:
    275 *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
    276 *                      returning the error code back to the caller immediately.
    277 *                 NULL: Skips any ILLEGAL_SEQUENCE
    278 * @param fromUArgs Information about the conversion in progress
    279 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
    280 * @param length Size (in bytes) of the concerned codepage sequence
    281 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
    282 * @param reason Defines the reason the callback was invoked
    283 * @param err Return value will be set to success if the callback was handled,
    284 *      otherwise this value will be set to a failure status.
    285 * @stable ICU 2.0
    286 */
    287 U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
    288                  const void *context,
    289                  UConverterFromUnicodeArgs *fromUArgs,
    290                  const UChar* codeUnits,
    291                  int32_t length,
    292                  UChar32 codePoint,
    293                  UConverterCallbackReason reason,
    294                  UErrorCode * err);
    295 
    296 /**
    297 * DO NOT CALL THIS FUNCTION DIRECTLY!
    298 * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or 
    299 * UNASSIGNED_SEQUENCE depending on context parameter, with the
    300 * current substitution string for the converter. This is the default
    301 * callback.
    302 *
    303 * @param context The function currently recognizes the callback options:
    304 *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
    305 *                      returning the error code back to the caller immediately.
    306 *                 NULL: Substitutes any ILLEGAL_SEQUENCE
    307 * @param fromUArgs Information about the conversion in progress
    308 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
    309 * @param length Size (in bytes) of the concerned codepage sequence
    310 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
    311 * @param reason Defines the reason the callback was invoked
    312 * @param err Return value will be set to success if the callback was handled,
    313 *      otherwise this value will be set to a failure status.
    314 * @see ucnv_setSubstChars
    315 * @stable ICU 2.0
    316 */
    317 U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
    318                  const void *context,
    319                  UConverterFromUnicodeArgs *fromUArgs,
    320                  const UChar* codeUnits,
    321                  int32_t length,
    322                  UChar32 codePoint,
    323                  UConverterCallbackReason reason,
    324                  UErrorCode * err);
    325 
    326 /**
    327 * DO NOT CALL THIS FUNCTION DIRECTLY!
    328 * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
    329 * hexadecimal representation of the illegal codepoints
    330 *
    331 * @param context The function currently recognizes the callback options:
    332 *        <ul>
    333 *        <li>UCNV_ESCAPE_ICU: Substitutes the  ILLEGAL SEQUENCE with the hexadecimal 
    334 *          representation in the format  %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). 
    335 *          In the Event the converter doesn't support the characters {%,U}[A-F][0-9], 
    336 *          it will  substitute  the illegal sequence with the substitution characters.
    337 *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
    338 *          %UD84D%UDC56</li>
    339 *        <li>UCNV_ESCAPE_JAVA: Substitutes the  ILLEGAL SEQUENCE with the hexadecimal 
    340 *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 
    341 *          In the Event the converter doesn't support the characters {\,u}[A-F][0-9], 
    342 *          it will  substitute  the illegal sequence with the substitution characters.
    343 *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
    344 *          \\uD84D\\uDC56</li>
    345 *        <li>UCNV_ESCAPE_C: Substitutes the  ILLEGAL SEQUENCE with the hexadecimal 
    346 *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 
    347 *          In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], 
    348 *          it will  substitute  the illegal sequence with the substitution characters.
    349 *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
    350 *          \\U00023456</li>
    351 *        <li>UCNV_ESCAPE_XML_DEC: Substitutes the  ILLEGAL SEQUENCE with the decimal 
    352 *          representation in the format \htmlonly&amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;")\endhtmlonly. 
    353 *          In the Event the converter doesn't support the characters {&amp;,#}[0-9], 
    354 *          it will  substitute  the illegal sequence with the substitution characters.
    355 *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
    356 *          &amp;#144470; and Zero padding is ignored.</li>
    357 *        <li>UCNV_ESCAPE_XML_HEX:Substitutes the  ILLEGAL SEQUENCE with the decimal 
    358 *          representation in the format \htmlonly&amp;#xXXXX; e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;")\endhtmlonly. 
    359 *          In the Event the converter doesn't support the characters {&,#,x}[0-9], 
    360 *          it will  substitute  the illegal sequence with the substitution characters.
    361 *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
    362 *          \htmlonly&amp;#x23456;\endhtmlonly</li>
    363 *        </ul>
    364 * @param fromUArgs Information about the conversion in progress
    365 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
    366 * @param length Size (in bytes) of the concerned codepage sequence
    367 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
    368 * @param reason Defines the reason the callback was invoked
    369 * @param err Return value will be set to success if the callback was handled,
    370 *      otherwise this value will be set to a failure status.
    371 * @stable ICU 2.0
    372 */
    373 U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
    374                  const void *context,
    375                  UConverterFromUnicodeArgs *fromUArgs,
    376                  const UChar* codeUnits,
    377                  int32_t length,
    378                  UChar32 codePoint,
    379                  UConverterCallbackReason reason,
    380                  UErrorCode * err);
    381 
    382 
    383 /**
    384 * DO NOT CALL THIS FUNCTION DIRECTLY!
    385 * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
    386 * skips only UNASSIGNED_SEQUENCE depending on the context parameter
    387 * simply ignoring those characters. 
    388 *
    389 * @param context  The function currently recognizes the callback options:
    390 *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
    391 *                      returning the error code back to the caller immediately.
    392 *                 NULL: Skips any ILLEGAL_SEQUENCE
    393 * @param toUArgs Information about the conversion in progress
    394 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
    395 * @param length Size (in bytes) of the concerned codepage sequence
    396 * @param reason Defines the reason the callback was invoked
    397 * @param err Return value will be set to success if the callback was handled,
    398 *      otherwise this value will be set to a failure status.
    399 * @stable ICU 2.0
    400 */
    401 U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
    402                  const void *context,
    403                  UConverterToUnicodeArgs *toUArgs,
    404                  const char* codeUnits,
    405                  int32_t length,
    406                  UConverterCallbackReason reason,
    407                  UErrorCode * err);
    408 
    409 /**
    410 * DO NOT CALL THIS FUNCTION DIRECTLY!
    411 * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or 
    412 * UNASSIGNED_SEQUENCE depending on context parameter,  with the
    413 * Unicode substitution character, U+FFFD.
    414 *
    415 * @param context  The function currently recognizes the callback options:
    416 *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
    417 *                      returning the error code back to the caller immediately.
    418 *                 NULL: Substitutes any ILLEGAL_SEQUENCE
    419 * @param toUArgs Information about the conversion in progress
    420 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
    421 * @param length Size (in bytes) of the concerned codepage sequence
    422 * @param reason Defines the reason the callback was invoked
    423 * @param err Return value will be set to success if the callback was handled,
    424 *      otherwise this value will be set to a failure status.
    425 * @stable ICU 2.0
    426 */
    427 U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
    428                  const void *context,
    429                  UConverterToUnicodeArgs *toUArgs,
    430                  const char* codeUnits,
    431                  int32_t length,
    432                  UConverterCallbackReason reason,
    433                  UErrorCode * err);
    434 
    435 /**
    436 * DO NOT CALL THIS FUNCTION DIRECTLY!
    437 * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
    438 * hexadecimal representation of the illegal bytes
    439 *  (in the format  %XNN, e.g. "%XFF%X0A%XC8%X03").
    440 *
    441 * @param context This function currently recognizes the callback options:
    442 *      UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
    443 *      UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
    444 * @param toUArgs Information about the conversion in progress
    445 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
    446 * @param length Size (in bytes) of the concerned codepage sequence
    447 * @param reason Defines the reason the callback was invoked
    448 * @param err Return value will be set to success if the callback was handled,
    449 *      otherwise this value will be set to a failure status.
    450 * @stable ICU 2.0
    451 */
    452 
    453 U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
    454                  const void *context,
    455                  UConverterToUnicodeArgs *toUArgs,
    456                  const char* codeUnits,
    457                  int32_t length,
    458                  UConverterCallbackReason reason,
    459                  UErrorCode * err);
    460 
    461 #endif
    462 
    463 #endif
    464 
    465 /*UCNV_ERR_H*/