1/* 2 ******************************************************************************* 3 * Copyright (C) 2003-2007, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * 7 * File prscmnts.cpp 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 08/22/2003 ram Creation. 13 ******************************************************************************* 14 */ 15 16#include "unicode/regex.h" 17#include "unicode/unistr.h" 18#include "unicode/parseerr.h" 19#include "prscmnts.h" 20#include <stdio.h> 21#include <stdlib.h> 22 23U_NAMESPACE_USE 24 25#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */ 26 27#define MAX_SPLIT_STRINGS 20 28 29const char *patternStrings[UPC_LIMIT]={ 30 "^translate\\s*(.*)", 31 "^note\\s*(.*)" 32}; 33 34U_CFUNC int32_t 35removeText(UChar *source, int32_t srcLen, 36 UnicodeString patString,uint32_t options, 37 UnicodeString replaceText, UErrorCode *status){ 38 39 if(status == NULL || U_FAILURE(*status)){ 40 return 0; 41 } 42 43 UnicodeString src(source, srcLen); 44 45 RegexMatcher myMatcher(patString, src, options, *status); 46 if(U_FAILURE(*status)){ 47 return 0; 48 } 49 UnicodeString dest; 50 51 52 dest = myMatcher.replaceAll(replaceText,*status); 53 54 55 return dest.extract(source, srcLen, *status); 56 57} 58U_CFUNC int32_t 59trim(UChar *src, int32_t srcLen, UErrorCode *status){ 60 srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines 61 srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces 62 srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes 63 return srcLen; 64} 65 66U_CFUNC int32_t 67removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ 68 srcLen = trim(source, srcLen, status); 69 UnicodeString patString = "^\\s*?\\*\\s*?"; // remove pattern like " * " at the begining of the line 70 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status); 71 return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines; 72} 73 74U_CFUNC int32_t 75getText(const UChar* source, int32_t srcLen, 76 UChar** dest, int32_t destCapacity, 77 UnicodeString patternString, 78 UErrorCode* status){ 79 80 if(status == NULL || U_FAILURE(*status)){ 81 return 0; 82 } 83 84 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 85 RegexPattern *pattern = RegexPattern::compile("@", 0, *status); 86 UnicodeString src (source,srcLen); 87 88 if (U_FAILURE(*status)) { 89 return 0; 90 } 91 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); 92 93 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); 94 if (U_FAILURE(*status)) { 95 return 0; 96 } 97 for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ 98 matcher.reset(stringArray[i]); 99 if(matcher.lookingAt(*status)){ 100 UnicodeString out = matcher.group(1, *status); 101 102 return out.extract(*dest, destCapacity,*status); 103 } 104 } 105 return 0; 106} 107 108 109#define AT_SIGN 0x0040 110 111U_CFUNC int32_t 112getDescription( const UChar* source, int32_t srcLen, 113 UChar** dest, int32_t destCapacity, 114 UErrorCode* status){ 115 if(status == NULL || U_FAILURE(*status)){ 116 return 0; 117 } 118 119 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 120 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); 121 UnicodeString src(source, srcLen); 122 123 if (U_FAILURE(*status)) { 124 return 0; 125 } 126 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); 127 128 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ 129 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); 130 return trim(*dest, destLen, status); 131 } 132 return 0; 133} 134 135U_CFUNC int32_t 136getCount(const UChar* source, int32_t srcLen, 137 UParseCommentsOption option, UErrorCode *status){ 138 139 if(status == NULL || U_FAILURE(*status)){ 140 return 0; 141 } 142 143 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 144 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); 145 UnicodeString src (source, srcLen); 146 147 148 if (U_FAILURE(*status)) { 149 return 0; 150 } 151 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); 152 153 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); 154 if (U_FAILURE(*status)) { 155 return 0; 156 } 157 int32_t count = 0; 158 for(int32_t i=0; i<retLen; i++){ 159 matcher.reset(stringArray[i]); 160 if(matcher.lookingAt(*status)){ 161 count++; 162 } 163 } 164 if(option == UPC_TRANSLATE && count > 1){ 165 fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); 166 exit(U_UNSUPPORTED_ERROR); 167 } 168 return count; 169} 170 171U_CFUNC int32_t 172getAt(const UChar* source, int32_t srcLen, 173 UChar** dest, int32_t destCapacity, 174 int32_t index, 175 UParseCommentsOption option, 176 UErrorCode* status){ 177 178 if(status == NULL || U_FAILURE(*status)){ 179 return 0; 180 } 181 182 UnicodeString stringArray[MAX_SPLIT_STRINGS]; 183 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); 184 UnicodeString src (source, srcLen); 185 186 187 if (U_FAILURE(*status)) { 188 return 0; 189 } 190 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); 191 192 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); 193 if (U_FAILURE(*status)) { 194 return 0; 195 } 196 int32_t count = 0; 197 for(int32_t i=0; i<retLen; i++){ 198 matcher.reset(stringArray[i]); 199 if(matcher.lookingAt(*status)){ 200 if(count == index){ 201 UnicodeString out = matcher.group(1, *status); 202 return out.extract(*dest, destCapacity,*status); 203 } 204 count++; 205 206 } 207 } 208 return 0; 209 210} 211 212U_CFUNC int32_t 213getTranslate( const UChar* source, int32_t srcLen, 214 UChar** dest, int32_t destCapacity, 215 UErrorCode* status){ 216 UnicodeString notePatternString = "^translate\\s*?(.*)"; 217 218 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); 219 return trim(*dest, destLen, status); 220} 221 222U_CFUNC int32_t 223getNote(const UChar* source, int32_t srcLen, 224 UChar** dest, int32_t destCapacity, 225 UErrorCode* status){ 226 227 UnicodeString notePatternString = "^note\\s*?(.*)"; 228 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); 229 return trim(*dest, destLen, status); 230 231} 232 233#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ 234 235