uspoof_build.cpp (3278B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 *************************************************************************** 5 * Copyright (C) 2008-2015, International Business Machines Corporation 6 * and others. All Rights Reserved. 7 *************************************************************************** 8 * file name: uspoof_build.cpp 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2008 Dec 8 14 * created by: Andy Heninger 15 * 16 * Unicode Spoof Detection Data Builder 17 * Builder-related functions are kept in separate files so that applications not needing 18 * the builder can more easily exclude them, typically by means of static linking. 19 * 20 * There are three relatively independent sets of Spoof data, 21 * Confusables, 22 * Whole Script Confusables 23 * ID character extensions. 24 * 25 * The data tables for each are built separately, each from its own definitions 26 */ 27 28 #include "unicode/utypes.h" 29 #include "unicode/uspoof.h" 30 #include "unicode/unorm.h" 31 #include "unicode/uregex.h" 32 #include "unicode/ustring.h" 33 #include "cmemory.h" 34 #include "uspoof_impl.h" 35 #include "uhash.h" 36 #include "uvector.h" 37 #include "uassert.h" 38 #include "uarrsort.h" 39 #include "uspoof_conf.h" 40 41 #if !UCONFIG_NO_NORMALIZATION 42 43 U_NAMESPACE_USE 44 45 // Defined in uspoof.cpp, initializes file-static variables. 46 U_CFUNC void uspoof_internalInitStatics(UErrorCode *status); 47 48 // The main data building function 49 50 U_CAPI USpoofChecker * U_EXPORT2 51 uspoof_openFromSource(const char *confusables, int32_t confusablesLen, 52 const char* /*confusablesWholeScript*/, int32_t /*confusablesWholeScriptLen*/, 53 int32_t *errorType, UParseError *pe, UErrorCode *status) { 54 uspoof_internalInitStatics(status); 55 if (U_FAILURE(*status)) { 56 return nullptr; 57 } 58 #if UCONFIG_NO_REGULAR_EXPRESSIONS 59 *status = U_UNSUPPORTED_ERROR; 60 return nullptr; 61 #else 62 if (errorType!=nullptr) { 63 *errorType = 0; 64 } 65 if (pe != nullptr) { 66 pe->line = 0; 67 pe->offset = 0; 68 pe->preContext[0] = 0; 69 pe->postContext[0] = 0; 70 } 71 72 // Set up a shell of a spoof detector, with empty data. 73 SpoofData *newSpoofData = new SpoofData(*status); 74 75 if (newSpoofData == nullptr) { 76 *status = U_MEMORY_ALLOCATION_ERROR; 77 return nullptr; 78 } 79 80 if (U_FAILURE(*status)) { 81 delete newSpoofData; 82 return nullptr; 83 } 84 SpoofImpl *This = new SpoofImpl(newSpoofData, *status); 85 86 if (This == nullptr) { 87 *status = U_MEMORY_ALLOCATION_ERROR; 88 delete newSpoofData; // explicit delete as the destructor for SpoofImpl won't be called. 89 return nullptr; 90 } 91 92 if (U_FAILURE(*status)) { 93 delete This; // no delete for newSpoofData, as the SpoofImpl destructor will delete it. 94 return nullptr; 95 } 96 97 // Compile the binary data from the source (text) format. 98 ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status); 99 100 if (U_FAILURE(*status)) { 101 delete This; 102 This = nullptr; 103 } 104 return (USpoofChecker *)This; 105 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS 106 } 107 108 #endif