tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

prscmnts.cpp (7472B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2003-2014, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *
      9 * File prscmnts.cpp
     10 *
     11 * Modification History:
     12 *
     13 *   Date          Name        Description
     14 *   08/22/2003    ram         Creation.
     15 *******************************************************************************
     16 */
     17 
     18 // Safer use of UnicodeString.
     19 #ifndef UNISTR_FROM_CHAR_EXPLICIT
     20 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
     21 #endif
     22 
     23 // Less important, but still a good idea.
     24 #ifndef UNISTR_FROM_STRING_EXPLICIT
     25 #   define UNISTR_FROM_STRING_EXPLICIT explicit
     26 #endif
     27 
     28 #include "unicode/regex.h"
     29 #include "unicode/unistr.h"
     30 #include "unicode/parseerr.h"
     31 #include "prscmnts.h"
     32 #include <stdio.h>
     33 #include <stdlib.h>
     34 
     35 U_NAMESPACE_USE
     36 
     37 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
     38 
     39 #define MAX_SPLIT_STRINGS 20
     40 
     41 const char *patternStrings[UPC_LIMIT]={
     42    "^translate\\s*(.*)",
     43    "^note\\s*(.*)"
     44 };
     45 
     46 U_CFUNC int32_t 
     47 removeText(char16_t *source, int32_t srcLen,
     48           UnicodeString patString,uint32_t options,  
     49           UnicodeString replaceText, UErrorCode *status){
     50 
     51    if(status == nullptr || U_FAILURE(*status)){
     52        return 0;
     53    }
     54 
     55    UnicodeString src(source, srcLen);
     56 
     57    RegexMatcher    myMatcher(patString, src, options, *status);
     58    if(U_FAILURE(*status)){
     59        return 0;
     60    }
     61    UnicodeString dest;
     62 
     63 
     64    dest = myMatcher.replaceAll(replaceText,*status);
     65    
     66    
     67    return dest.extract(source, srcLen, *status);
     68 
     69 }
     70 U_CFUNC int32_t
     71 trim(char16_t *src, int32_t srcLen, UErrorCode *status){
     72     srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines
     73     srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces
     74     srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remove trailing spcaes
     75     return srcLen;
     76 }
     77 
     78 U_CFUNC int32_t 
     79 removeCmtText(char16_t* source, int32_t srcLen, UErrorCode* status){
     80    srcLen = trim(source, srcLen, status);
     81    UnicodeString patString("^\\s*?\\*\\s*?");  // remove pattern like " * " at the beginning of the line
     82    srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status);
     83    return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines;
     84 }
     85 
     86 U_CFUNC int32_t 
     87 getText(const char16_t* source, int32_t srcLen,
     88        char16_t** dest, int32_t destCapacity,
     89        UnicodeString patternString, 
     90        UErrorCode* status){
     91    
     92    if(status == nullptr || U_FAILURE(*status)){
     93        return 0;
     94    }
     95 
     96    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
     97    RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
     98    UnicodeString src (source,srcLen);
     99    
    100    if (U_FAILURE(*status)) {
    101        return 0;
    102    }
    103    pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    104    
    105    RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
    106    if (U_FAILURE(*status)) {
    107        return 0;
    108    }
    109    for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
    110        matcher.reset(stringArray[i]);
    111        if(matcher.lookingAt(*status)){
    112            UnicodeString out = matcher.group(1, *status);
    113 
    114            return out.extract(*dest, destCapacity,*status);
    115        }
    116    }
    117    return 0;
    118 }
    119 
    120 
    121 #define AT_SIGN  0x0040
    122 
    123 U_CFUNC int32_t
    124 getDescription( const char16_t* source, int32_t srcLen,
    125                char16_t** dest, int32_t destCapacity,
    126                UErrorCode* status){
    127    if(status == nullptr || U_FAILURE(*status)){
    128        return 0;
    129    }
    130 
    131    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    132    RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
    133    UnicodeString src(source, srcLen);
    134    
    135    if (U_FAILURE(*status)) {
    136        return 0;
    137    }
    138    pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
    139 
    140    if(stringArray[0].indexOf((char16_t)AT_SIGN)==-1){
    141        int32_t destLen =  stringArray[0].extract(*dest, destCapacity, *status);
    142        return trim(*dest, destLen, status);
    143    }
    144    return 0;
    145 }
    146 
    147 U_CFUNC int32_t
    148 getCount(const char16_t* source, int32_t srcLen,
    149         UParseCommentsOption option, UErrorCode *status){
    150    
    151    if(status == nullptr || U_FAILURE(*status)){
    152        return 0;
    153    }
    154 
    155    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    156    RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
    157    UnicodeString src (source, srcLen);
    158 
    159 
    160    if (U_FAILURE(*status)) {
    161        return 0;
    162    }
    163    int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    164    
    165    UnicodeString patternString(patternStrings[option]);
    166    RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
    167    if (U_FAILURE(*status)) {
    168        return 0;
    169    } 
    170    int32_t count = 0;
    171    for(int32_t i=0; i<retLen; i++){
    172        matcher.reset(stringArray[i]);
    173        if(matcher.lookingAt(*status)){
    174            count++;
    175        }
    176    }
    177    if(option == UPC_TRANSLATE && count > 1){
    178        fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
    179        exit(U_UNSUPPORTED_ERROR);
    180    }
    181    return count;
    182 }
    183 
    184 U_CFUNC int32_t 
    185 getAt(const char16_t* source, int32_t srcLen,
    186        char16_t** dest, int32_t destCapacity,
    187        int32_t index,
    188        UParseCommentsOption option,
    189        UErrorCode* status){
    190 
    191    if(status == nullptr || U_FAILURE(*status)){
    192        return 0;
    193    }
    194 
    195    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    196    RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
    197    UnicodeString src (source, srcLen);
    198 
    199 
    200    if (U_FAILURE(*status)) {
    201        return 0;
    202    }
    203    int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    204    
    205    UnicodeString patternString(patternStrings[option]);
    206    RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
    207    if (U_FAILURE(*status)) {
    208        return 0;
    209    } 
    210    int32_t count = 0;
    211    for(int32_t i=0; i<retLen; i++){
    212        matcher.reset(stringArray[i]);
    213        if(matcher.lookingAt(*status)){
    214            if(count == index){
    215                UnicodeString out = matcher.group(1, *status);
    216                return out.extract(*dest, destCapacity,*status);
    217            }
    218            count++;
    219            
    220        }
    221    }
    222    return 0;
    223 
    224 }
    225 
    226 U_CFUNC int32_t
    227 getTranslate( const char16_t* source, int32_t srcLen,
    228              char16_t** dest, int32_t destCapacity,
    229              UErrorCode* status){
    230    UnicodeString     notePatternString("^translate\\s*?(.*)");
    231    
    232    int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
    233    return trim(*dest, destLen, status);
    234 }
    235 
    236 U_CFUNC int32_t 
    237 getNote(const char16_t* source, int32_t srcLen,
    238        char16_t** dest, int32_t destCapacity,
    239        UErrorCode* status){
    240 
    241    UnicodeString     notePatternString("^note\\s*?(.*)");
    242    int32_t destLen =  getText(source, srcLen, dest, destCapacity, notePatternString, status);
    243    return trim(*dest, destLen, status);
    244 
    245 }
    246 
    247 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */