1/*
2 *******************************************************************************
3 *   Copyright (C) 2003-2007, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *
7 * File prscmnts.cpp
8 *
9 * Modification History:
10 *
11 *   Date          Name        Description
12 *   08/22/2003    ram         Creation.
13 *******************************************************************************
14 */
15
16#include "unicode/regex.h"
17#include "unicode/unistr.h"
18#include "unicode/parseerr.h"
19#include "prscmnts.h"
20#include <stdio.h>
21#include <stdlib.h>
22
23U_NAMESPACE_USE
24
25#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
26
27#define MAX_SPLIT_STRINGS 20
28
29const char *patternStrings[UPC_LIMIT]={
30    "^translate\\s*(.*)",
31    "^note\\s*(.*)"
32};
33
34U_CFUNC int32_t
35removeText(UChar *source, int32_t srcLen,
36           UnicodeString patString,uint32_t options,
37           UnicodeString replaceText, UErrorCode *status){
38
39    if(status == NULL || U_FAILURE(*status)){
40        return 0;
41    }
42
43    UnicodeString src(source, srcLen);
44
45    RegexMatcher    myMatcher(patString, src, options, *status);
46    if(U_FAILURE(*status)){
47        return 0;
48    }
49    UnicodeString dest;
50
51
52    dest = myMatcher.replaceAll(replaceText,*status);
53
54
55    return dest.extract(source, srcLen, *status);
56
57}
58U_CFUNC int32_t
59trim(UChar *src, int32_t srcLen, UErrorCode *status){
60     srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines
61     srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces
62     srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes
63     return srcLen;
64}
65
66U_CFUNC int32_t
67removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
68    srcLen = trim(source, srcLen, status);
69    UnicodeString     patString = "^\\s*?\\*\\s*?";     // remove pattern like " * " at the begining of the line
70    srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status);
71    return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines;
72}
73
74U_CFUNC int32_t
75getText(const UChar* source, int32_t srcLen,
76        UChar** dest, int32_t destCapacity,
77        UnicodeString patternString,
78        UErrorCode* status){
79
80    if(status == NULL || U_FAILURE(*status)){
81        return 0;
82    }
83
84    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
85    RegexPattern      *pattern = RegexPattern::compile("@", 0, *status);
86    UnicodeString src (source,srcLen);
87
88    if (U_FAILURE(*status)) {
89        return 0;
90    }
91    pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
92
93    RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
94    if (U_FAILURE(*status)) {
95        return 0;
96    }
97    for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
98        matcher.reset(stringArray[i]);
99        if(matcher.lookingAt(*status)){
100            UnicodeString out = matcher.group(1, *status);
101
102            return out.extract(*dest, destCapacity,*status);
103        }
104    }
105    return 0;
106}
107
108
109#define AT_SIGN  0x0040
110
111U_CFUNC int32_t
112getDescription( const UChar* source, int32_t srcLen,
113                UChar** dest, int32_t destCapacity,
114                UErrorCode* status){
115    if(status == NULL || U_FAILURE(*status)){
116        return 0;
117    }
118
119    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
120    RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
121    UnicodeString src(source, srcLen);
122
123    if (U_FAILURE(*status)) {
124        return 0;
125    }
126    pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
127
128    if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
129        int32_t destLen =  stringArray[0].extract(*dest, destCapacity, *status);
130        return trim(*dest, destLen, status);
131    }
132    return 0;
133}
134
135U_CFUNC int32_t
136getCount(const UChar* source, int32_t srcLen,
137         UParseCommentsOption option, UErrorCode *status){
138
139    if(status == NULL || U_FAILURE(*status)){
140        return 0;
141    }
142
143    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
144    RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
145    UnicodeString src (source, srcLen);
146
147
148    if (U_FAILURE(*status)) {
149        return 0;
150    }
151    int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
152
153    RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
154    if (U_FAILURE(*status)) {
155        return 0;
156    }
157    int32_t count = 0;
158    for(int32_t i=0; i<retLen; i++){
159        matcher.reset(stringArray[i]);
160        if(matcher.lookingAt(*status)){
161            count++;
162        }
163    }
164    if(option == UPC_TRANSLATE && count > 1){
165        fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
166        exit(U_UNSUPPORTED_ERROR);
167    }
168    return count;
169}
170
171U_CFUNC int32_t
172getAt(const UChar* source, int32_t srcLen,
173        UChar** dest, int32_t destCapacity,
174        int32_t index,
175        UParseCommentsOption option,
176        UErrorCode* status){
177
178    if(status == NULL || U_FAILURE(*status)){
179        return 0;
180    }
181
182    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
183    RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
184    UnicodeString src (source, srcLen);
185
186
187    if (U_FAILURE(*status)) {
188        return 0;
189    }
190    int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
191
192    RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
193    if (U_FAILURE(*status)) {
194        return 0;
195    }
196    int32_t count = 0;
197    for(int32_t i=0; i<retLen; i++){
198        matcher.reset(stringArray[i]);
199        if(matcher.lookingAt(*status)){
200            if(count == index){
201                UnicodeString out = matcher.group(1, *status);
202                return out.extract(*dest, destCapacity,*status);
203            }
204            count++;
205
206        }
207    }
208    return 0;
209
210}
211
212U_CFUNC int32_t
213getTranslate( const UChar* source, int32_t srcLen,
214              UChar** dest, int32_t destCapacity,
215              UErrorCode* status){
216    UnicodeString     notePatternString = "^translate\\s*?(.*)";
217
218    int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
219    return trim(*dest, destLen, status);
220}
221
222U_CFUNC int32_t
223getNote(const UChar* source, int32_t srcLen,
224        UChar** dest, int32_t destCapacity,
225        UErrorCode* status){
226
227    UnicodeString     notePatternString = "^note\\s*?(.*)";
228    int32_t destLen =  getText(source, srcLen, dest, destCapacity, notePatternString, status);
229    return trim(*dest, destLen, status);
230
231}
232
233#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
234
235