1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *************************************************************************** 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copyright (C) 2008-2009, International Business Machines Corporation 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and others. All Rights Reserved. 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *************************************************************************** 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * file name: uspoof_build.cpp 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * encoding: US-ASCII 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * tab size: 8 (not used) 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * indentation:4 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * created on: 2008 Dec 8 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * created by: Andy Heninger 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Unicode Spoof Detection Data Builder 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Builder-related functions are kept in separate files so that applications not needing 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the builder can more easily exclude them, typically by means of static linking. 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * There are three relatively independent sets of Spoof data, 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Confusables, 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Whole Script Confusables 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * ID character extensions. 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The data tables for each are built separately, each from its own definitions 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uspoof.h" 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/unorm.h" 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uregex.h" 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ustring.h" 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h" 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uspoof_impl.h" 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uhash.h" 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uvector.h" 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uassert.h" 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uarrsort.h" 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uspoof_conf.h" 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uspoof_wsconf.h" 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_NORMALIZATION 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_USE 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// The main data building function 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI USpoofChecker * U_EXPORT2 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uspoof_openFromSource(const char *confusables, int32_t confusablesLen, 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *confusablesWholeScript, int32_t confusablesWholeScriptLen, 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *errorType, UParseError *pe, UErrorCode *status) { 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(*status)) { 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if UCONFIG_NO_REGULAR_EXPRESSIONS 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_UNSUPPORTED_ERROR; 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#else 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (errorType!=NULL) { 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *errorType = 0; 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pe != NULL) { 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pe->line = 0; 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pe->offset = 0; 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pe->preContext[0] = 0; 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pe->postContext[0] = 0; 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Set up a shell of a spoof detector, with empty data. 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) SpoofData *newSpoofData = new SpoofData(*status); 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) SpoofImpl *This = new SpoofImpl(newSpoofData, *status); 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Compile the binary data from the source (text) format. 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status); 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buildWSConfusableData(This, confusablesWholeScript, confusablesWholeScriptLen, pe, *status); 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(*status)) { 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete This; 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) This = NULL; 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (USpoofChecker *)This; 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif // UCONFIG_NO_REGULAR_EXPRESSIONS 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 86