tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

idna.h (13239B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2010-2012, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *   file name:  idna.h
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2010mar05
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #ifndef __IDNA_H__
     18 #define __IDNA_H__
     19 
     20 /**
     21 * \file
     22 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
     23 */
     24 
     25 #include "unicode/utypes.h"
     26 
     27 #if U_SHOW_CPLUSPLUS_API
     28 
     29 #if !UCONFIG_NO_IDNA
     30 
     31 #include "unicode/bytestream.h"
     32 #include "unicode/stringpiece.h"
     33 #include "unicode/uidna.h"
     34 #include "unicode/unistr.h"
     35 
     36 U_NAMESPACE_BEGIN
     37 
     38 class IDNAInfo;
     39 
     40 /**
     41 * Abstract base class for IDNA processing.
     42 * See http://www.unicode.org/reports/tr46/
     43 * and http://www.ietf.org/rfc/rfc3490.txt
     44 *
     45 * The IDNA class is not intended for public subclassing.
     46 *
     47 * This C++ API currently only implements UTS #46.
     48 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
     49 * and IDNA2003 (functions that do not use a service object).
     50 * @stable ICU 4.6
     51 */
     52 class U_COMMON_API IDNA : public UObject {
     53 public:
     54    /**
     55     * Destructor.
     56     * @stable ICU 4.6
     57     */
     58    ~IDNA();
     59 
     60    /**
     61     * Returns an IDNA instance which implements UTS #46.
     62     * Returns an unmodifiable instance, owned by the caller.
     63     * Cache it for multiple operations, and delete it when done.
     64     * The instance is thread-safe, that is, it can be used concurrently.
     65     *
     66     * UTS #46 defines Unicode IDNA Compatibility Processing,
     67     * updated to the latest version of Unicode and compatible with both
     68     * IDNA2003 and IDNA2008.
     69     *
     70     * The worker functions use transitional processing, including deviation mappings,
     71     * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
     72     * is used in which case the deviation characters are passed through without change.
     73     * <b>Unicode 15.1 UTS #46 deprecated transitional processing.</b>
     74     *
     75     * Disallowed characters are mapped to U+FFFD.
     76     *
     77     * For available options see the uidna.h header.
     78     * Operations with the UTS #46 instance do not support the
     79     * UIDNA_ALLOW_UNASSIGNED option.
     80     *
     81     * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
     82     * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
     83     * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
     84     *
     85     * @param options Bit set to modify the processing and error checking.
     86     *                These should include UIDNA_DEFAULT, or
     87     *                UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
     88     *                See option bit set values in uidna.h.
     89     * @param errorCode Standard ICU error code. Its input value must
     90     *                  pass the U_SUCCESS() test, or else the function returns
     91     *                  immediately. Check for U_FAILURE() on output or use with
     92     *                  function chaining. (See User Guide for details.)
     93     * @return the UTS #46 IDNA instance, if successful
     94     * @stable ICU 4.6
     95     */
     96    static IDNA *
     97    createUTS46Instance(uint32_t options, UErrorCode &errorCode);
     98 
     99    /**
    100     * Converts a single domain name label into its ASCII form for DNS lookup.
    101     * If any processing step fails, then info.hasErrors() will be true and
    102     * the result might not be an ASCII string.
    103     * The label might be modified according to the types of errors.
    104     * Labels with severe errors will be left in (or turned into) their Unicode form.
    105     *
    106     * The UErrorCode indicates an error only in exceptional cases,
    107     * such as a U_MEMORY_ALLOCATION_ERROR.
    108     *
    109     * @param label Input domain name label
    110     * @param dest Destination string object
    111     * @param info Output container of IDNA processing details.
    112     * @param errorCode Standard ICU error code. Its input value must
    113     *                  pass the U_SUCCESS() test, or else the function returns
    114     *                  immediately. Check for U_FAILURE() on output or use with
    115     *                  function chaining. (See User Guide for details.)
    116     * @return dest
    117     * @stable ICU 4.6
    118     */
    119    virtual UnicodeString &
    120    labelToASCII(const UnicodeString &label, UnicodeString &dest,
    121                 IDNAInfo &info, UErrorCode &errorCode) const = 0;
    122 
    123    /**
    124     * Converts a single domain name label into its Unicode form for human-readable display.
    125     * If any processing step fails, then info.hasErrors() will be true.
    126     * The label might be modified according to the types of errors.
    127     *
    128     * The UErrorCode indicates an error only in exceptional cases,
    129     * such as a U_MEMORY_ALLOCATION_ERROR.
    130     *
    131     * @param label Input domain name label
    132     * @param dest Destination string object
    133     * @param info Output container of IDNA processing details.
    134     * @param errorCode Standard ICU error code. Its input value must
    135     *                  pass the U_SUCCESS() test, or else the function returns
    136     *                  immediately. Check for U_FAILURE() on output or use with
    137     *                  function chaining. (See User Guide for details.)
    138     * @return dest
    139     * @stable ICU 4.6
    140     */
    141    virtual UnicodeString &
    142    labelToUnicode(const UnicodeString &label, UnicodeString &dest,
    143                   IDNAInfo &info, UErrorCode &errorCode) const = 0;
    144 
    145    /**
    146     * Converts a whole domain name into its ASCII form for DNS lookup.
    147     * If any processing step fails, then info.hasErrors() will be true and
    148     * the result might not be an ASCII string.
    149     * The domain name might be modified according to the types of errors.
    150     * Labels with severe errors will be left in (or turned into) their Unicode form.
    151     *
    152     * The UErrorCode indicates an error only in exceptional cases,
    153     * such as a U_MEMORY_ALLOCATION_ERROR.
    154     *
    155     * @param name Input domain name
    156     * @param dest Destination string object
    157     * @param info Output container of IDNA processing details.
    158     * @param errorCode Standard ICU error code. Its input value must
    159     *                  pass the U_SUCCESS() test, or else the function returns
    160     *                  immediately. Check for U_FAILURE() on output or use with
    161     *                  function chaining. (See User Guide for details.)
    162     * @return dest
    163     * @stable ICU 4.6
    164     */
    165    virtual UnicodeString &
    166    nameToASCII(const UnicodeString &name, UnicodeString &dest,
    167                IDNAInfo &info, UErrorCode &errorCode) const = 0;
    168 
    169    /**
    170     * Converts a whole domain name into its Unicode form for human-readable display.
    171     * If any processing step fails, then info.hasErrors() will be true.
    172     * The domain name might be modified according to the types of errors.
    173     *
    174     * The UErrorCode indicates an error only in exceptional cases,
    175     * such as a U_MEMORY_ALLOCATION_ERROR.
    176     *
    177     * @param name Input domain name
    178     * @param dest Destination string object
    179     * @param info Output container of IDNA processing details.
    180     * @param errorCode Standard ICU error code. Its input value must
    181     *                  pass the U_SUCCESS() test, or else the function returns
    182     *                  immediately. Check for U_FAILURE() on output or use with
    183     *                  function chaining. (See User Guide for details.)
    184     * @return dest
    185     * @stable ICU 4.6
    186     */
    187    virtual UnicodeString &
    188    nameToUnicode(const UnicodeString &name, UnicodeString &dest,
    189                  IDNAInfo &info, UErrorCode &errorCode) const = 0;
    190 
    191    // UTF-8 versions of the processing methods ---------------------------- ***
    192 
    193    /**
    194     * Converts a single domain name label into its ASCII form for DNS lookup.
    195     * UTF-8 version of labelToASCII(), same behavior.
    196     *
    197     * @param label Input domain name label
    198     * @param dest Destination byte sink; Flush()ed if successful
    199     * @param info Output container of IDNA processing details.
    200     * @param errorCode Standard ICU error code. Its input value must
    201     *                  pass the U_SUCCESS() test, or else the function returns
    202     *                  immediately. Check for U_FAILURE() on output or use with
    203     *                  function chaining. (See User Guide for details.)
    204     * @return dest
    205     * @stable ICU 4.6
    206     */
    207    virtual void
    208    labelToASCII_UTF8(StringPiece label, ByteSink &dest,
    209                      IDNAInfo &info, UErrorCode &errorCode) const;
    210 
    211    /**
    212     * Converts a single domain name label into its Unicode form for human-readable display.
    213     * UTF-8 version of labelToUnicode(), same behavior.
    214     *
    215     * @param label Input domain name label
    216     * @param dest Destination byte sink; Flush()ed if successful
    217     * @param info Output container of IDNA processing details.
    218     * @param errorCode Standard ICU error code. Its input value must
    219     *                  pass the U_SUCCESS() test, or else the function returns
    220     *                  immediately. Check for U_FAILURE() on output or use with
    221     *                  function chaining. (See User Guide for details.)
    222     * @return dest
    223     * @stable ICU 4.6
    224     */
    225    virtual void
    226    labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
    227                       IDNAInfo &info, UErrorCode &errorCode) const;
    228 
    229    /**
    230     * Converts a whole domain name into its ASCII form for DNS lookup.
    231     * UTF-8 version of nameToASCII(), same behavior.
    232     *
    233     * @param name Input domain name
    234     * @param dest Destination byte sink; Flush()ed if successful
    235     * @param info Output container of IDNA processing details.
    236     * @param errorCode Standard ICU error code. Its input value must
    237     *                  pass the U_SUCCESS() test, or else the function returns
    238     *                  immediately. Check for U_FAILURE() on output or use with
    239     *                  function chaining. (See User Guide for details.)
    240     * @return dest
    241     * @stable ICU 4.6
    242     */
    243    virtual void
    244    nameToASCII_UTF8(StringPiece name, ByteSink &dest,
    245                     IDNAInfo &info, UErrorCode &errorCode) const;
    246 
    247    /**
    248     * Converts a whole domain name into its Unicode form for human-readable display.
    249     * UTF-8 version of nameToUnicode(), same behavior.
    250     *
    251     * @param name Input domain name
    252     * @param dest Destination byte sink; Flush()ed if successful
    253     * @param info Output container of IDNA processing details.
    254     * @param errorCode Standard ICU error code. Its input value must
    255     *                  pass the U_SUCCESS() test, or else the function returns
    256     *                  immediately. Check for U_FAILURE() on output or use with
    257     *                  function chaining. (See User Guide for details.)
    258     * @return dest
    259     * @stable ICU 4.6
    260     */
    261    virtual void
    262    nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
    263                      IDNAInfo &info, UErrorCode &errorCode) const;
    264 };
    265 
    266 class UTS46;
    267 
    268 /**
    269 * Output container for IDNA processing errors.
    270 * The IDNAInfo class is not suitable for subclassing.
    271 * @stable ICU 4.6
    272 */
    273 class U_COMMON_API IDNAInfo : public UMemory {
    274 public:
    275    /**
    276     * Constructor for stack allocation.
    277     * @stable ICU 4.6
    278     */
    279    IDNAInfo() : errors(0), labelErrors(0), isTransDiff(false), isBiDi(false), isOkBiDi(true) {}
    280    /**
    281     * Were there IDNA processing errors?
    282     * @return true if there were processing errors
    283     * @stable ICU 4.6
    284     */
    285    UBool hasErrors() const { return errors!=0; }
    286    /**
    287     * Returns a bit set indicating IDNA processing errors.
    288     * See UIDNA_ERROR_... constants in uidna.h.
    289     * @return bit set of processing errors
    290     * @stable ICU 4.6
    291     */
    292    uint32_t getErrors() const { return errors; }
    293    /**
    294     * Returns true if transitional and nontransitional processing produce different results.
    295     * This is the case when the input label or domain name contains
    296     * one or more deviation characters outside a Punycode label (see UTS #46).
    297     * <ul>
    298     * <li>With nontransitional processing, such characters are
    299     * copied to the destination string.
    300     * <li>With transitional processing, such characters are
    301     * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
    302     * </ul>
    303     * @return true if transitional and nontransitional processing produce different results
    304     * @stable ICU 4.6
    305     */
    306    UBool isTransitionalDifferent() const { return isTransDiff; }
    307 
    308 private:
    309    friend class UTS46;
    310 
    311    IDNAInfo(const IDNAInfo &other) = delete;  // no copying
    312    IDNAInfo &operator=(const IDNAInfo &other) = delete;  // no copying
    313 
    314    void reset() {
    315        errors=labelErrors=0;
    316        isTransDiff=false;
    317        isBiDi=false;
    318        isOkBiDi=true;
    319    }
    320 
    321    uint32_t errors, labelErrors;
    322    UBool isTransDiff;
    323    UBool isBiDi;
    324    UBool isOkBiDi;
    325 };
    326 
    327 U_NAMESPACE_END
    328 
    329 #endif  // UCONFIG_NO_IDNA
    330 
    331 #endif /* U_SHOW_CPLUSPLUS_API */
    332 
    333 #endif  // __IDNA_H__