1/* 2 ******************************************************************************* 3 * Copyright (C) 2003-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * 7 * File prscmnts.cpp 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 08/22/2003 ram Creation. 13 ******************************************************************************* 14 */ 15 16// Safer use of UnicodeString. 17#ifndef UNISTR_FROM_CHAR_EXPLICIT 18# define UNISTR_FROM_CHAR_EXPLICIT explicit 19#endif 20 21// Less important, but still a good idea. 22#ifndef UNISTR_FROM_STRING_EXPLICIT 23# define UNISTR_FROM_STRING_EXPLICIT explicit 24#endif 25 26#include "unicode/regex.h" 27#include "unicode/unistr.h" 28#include "unicode/parseerr.h" 29#include "prscmnts.h" 30#include <stdio.h> 31#include <stdlib.h> 32 33U_NAMESPACE_USE 34 35#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */ 36 37#define MAX_SPLIT_STRINGS 20 38 39const char *patternStrings[UPC_LIMIT]={ 40 "^translate\\s*(.*)", 41 "^note\\s*(.*)" 42}; 43 44U_CFUNC int32_t 45removeText(UChar *source, int32_t srcLen, 46 UnicodeString patString,uint32_t options, 47 UnicodeString replaceText, UErrorCode *status){ 48 49 if(status == NULL || U_FAILURE(*status)){ 50 return 0; 51 } 52 53 UnicodeString src(source, srcLen); 54 55 RegexMatcher myMatcher(patString, src, options, *status); 56 if(U_FAILURE(*status)){ 57 return 0; 58 } 59 UnicodeString dest; 60 61 62 dest = myMatcher.replaceAll(replaceText,*status); 63 64 65 return dest.extract(source, srcLen, *status); 66 67} 68U_CFUNC int32_t 69trim(UChar *src, int32_t srcLen, UErrorCode *status){ 70 srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines 71 srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces 72 srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes 73 return srcLen; 74} 75 76U_CFUNC int32_t 77removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ 78 srcLen = trim(source, srcLen, status); 79 UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the begining of the line 80 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status); 81 return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines; 82} 83 84U_CFUNC int32_t 85getText(const UChar* source, int32_t srcLen, 86 UChar** dest, int32_t destCapacity, 87 UnicodeString patternString, 88 UErrorCode* status){ 89 90 if(status == NULL || U_FAILURE(*status)){ 91 return 0; 92 } 93 94 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 95 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status); 96 UnicodeString src (source,srcLen); 97 98 if (U_FAILURE(*status)) { 99 return 0; 100 } 101 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); 102 103 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); 104 if (U_FAILURE(*status)) { 105 return 0; 106 } 107 for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ 108 matcher.reset(stringArray[i]); 109 if(matcher.lookingAt(*status)){ 110 UnicodeString out = matcher.group(1, *status); 111 112 return out.extract(*dest, destCapacity,*status); 113 } 114 } 115 return 0; 116} 117 118 119#define AT_SIGN 0x0040 120 121U_CFUNC int32_t 122getDescription( const UChar* source, int32_t srcLen, 123 UChar** dest, int32_t destCapacity, 124 UErrorCode* status){ 125 if(status == NULL || U_FAILURE(*status)){ 126 return 0; 127 } 128 129 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 130 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); 131 UnicodeString src(source, srcLen); 132 133 if (U_FAILURE(*status)) { 134 return 0; 135 } 136 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); 137 138 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ 139 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); 140 return trim(*dest, destLen, status); 141 } 142 return 0; 143} 144 145U_CFUNC int32_t 146getCount(const UChar* source, int32_t srcLen, 147 UParseCommentsOption option, UErrorCode *status){ 148 149 if(status == NULL || U_FAILURE(*status)){ 150 return 0; 151 } 152 153 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 154 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); 155 UnicodeString src (source, srcLen); 156 157 158 if (U_FAILURE(*status)) { 159 return 0; 160 } 161 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); 162 163 UnicodeString patternString(patternStrings[option]); 164 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); 165 if (U_FAILURE(*status)) { 166 return 0; 167 } 168 int32_t count = 0; 169 for(int32_t i=0; i<retLen; i++){ 170 matcher.reset(stringArray[i]); 171 if(matcher.lookingAt(*status)){ 172 count++; 173 } 174 } 175 if(option == UPC_TRANSLATE && count > 1){ 176 fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); 177 exit(U_UNSUPPORTED_ERROR); 178 } 179 return count; 180} 181 182U_CFUNC int32_t 183getAt(const UChar* source, int32_t srcLen, 184 UChar** dest, int32_t destCapacity, 185 int32_t index, 186 UParseCommentsOption option, 187 UErrorCode* status){ 188 189 if(status == NULL || U_FAILURE(*status)){ 190 return 0; 191 } 192 193 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 194 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); 195 UnicodeString src (source, srcLen); 196 197 198 if (U_FAILURE(*status)) { 199 return 0; 200 } 201 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); 202 203 UnicodeString patternString(patternStrings[option]); 204 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); 205 if (U_FAILURE(*status)) { 206 return 0; 207 } 208 int32_t count = 0; 209 for(int32_t i=0; i<retLen; i++){ 210 matcher.reset(stringArray[i]); 211 if(matcher.lookingAt(*status)){ 212 if(count == index){ 213 UnicodeString out = matcher.group(1, *status); 214 return out.extract(*dest, destCapacity,*status); 215 } 216 count++; 217 218 } 219 } 220 return 0; 221 222} 223 224U_CFUNC int32_t 225getTranslate( const UChar* source, int32_t srcLen, 226 UChar** dest, int32_t destCapacity, 227 UErrorCode* status){ 228 UnicodeString notePatternString("^translate\\s*?(.*)"); 229 230 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); 231 return trim(*dest, destLen, status); 232} 233 234U_CFUNC int32_t 235getNote(const UChar* source, int32_t srcLen, 236 UChar** dest, int32_t destCapacity, 237 UErrorCode* status){ 238 239 UnicodeString notePatternString("^note\\s*?(.*)"); 240 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); 241 return trim(*dest, destLen, status); 242 243} 244 245#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ 246 247