1/*
2 *******************************************************************************
3 *   Copyright (C) 2003-2014, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *
7 * File prscmnts.cpp
8 *
9 * Modification History:
10 *
11 *   Date          Name        Description
12 *   08/22/2003    ram         Creation.
13 *******************************************************************************
14 */
15
16// Safer use of UnicodeString.
17#ifndef UNISTR_FROM_CHAR_EXPLICIT
18#   define UNISTR_FROM_CHAR_EXPLICIT explicit
19#endif
20
21// Less important, but still a good idea.
22#ifndef UNISTR_FROM_STRING_EXPLICIT
23#   define UNISTR_FROM_STRING_EXPLICIT explicit
24#endif
25
26#include "unicode/regex.h"
27#include "unicode/unistr.h"
28#include "unicode/parseerr.h"
29#include "prscmnts.h"
30#include <stdio.h>
31#include <stdlib.h>
32
33U_NAMESPACE_USE
34
35#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
36
37#define MAX_SPLIT_STRINGS 20
38
39const char *patternStrings[UPC_LIMIT]={
40    "^translate\\s*(.*)",
41    "^note\\s*(.*)"
42};
43
44U_CFUNC int32_t
45removeText(UChar *source, int32_t srcLen,
46           UnicodeString patString,uint32_t options,
47           UnicodeString replaceText, UErrorCode *status){
48
49    if(status == NULL || U_FAILURE(*status)){
50        return 0;
51    }
52
53    UnicodeString src(source, srcLen);
54
55    RegexMatcher    myMatcher(patString, src, options, *status);
56    if(U_FAILURE(*status)){
57        return 0;
58    }
59    UnicodeString dest;
60
61
62    dest = myMatcher.replaceAll(replaceText,*status);
63
64
65    return dest.extract(source, srcLen, *status);
66
67}
68U_CFUNC int32_t
69trim(UChar *src, int32_t srcLen, UErrorCode *status){
70     srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines
71     srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces
72     srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes
73     return srcLen;
74}
75
76U_CFUNC int32_t
77removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
78    srcLen = trim(source, srcLen, status);
79    UnicodeString patString("^\\s*?\\*\\s*?");  // remove pattern like " * " at the begining of the line
80    srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status);
81    return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines;
82}
83
84U_CFUNC int32_t
85getText(const UChar* source, int32_t srcLen,
86        UChar** dest, int32_t destCapacity,
87        UnicodeString patternString,
88        UErrorCode* status){
89
90    if(status == NULL || U_FAILURE(*status)){
91        return 0;
92    }
93
94    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
95    RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
96    UnicodeString src (source,srcLen);
97
98    if (U_FAILURE(*status)) {
99        return 0;
100    }
101    pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
102
103    RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
104    if (U_FAILURE(*status)) {
105        return 0;
106    }
107    for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
108        matcher.reset(stringArray[i]);
109        if(matcher.lookingAt(*status)){
110            UnicodeString out = matcher.group(1, *status);
111
112            return out.extract(*dest, destCapacity,*status);
113        }
114    }
115    return 0;
116}
117
118
119#define AT_SIGN  0x0040
120
121U_CFUNC int32_t
122getDescription( const UChar* source, int32_t srcLen,
123                UChar** dest, int32_t destCapacity,
124                UErrorCode* status){
125    if(status == NULL || U_FAILURE(*status)){
126        return 0;
127    }
128
129    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
130    RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
131    UnicodeString src(source, srcLen);
132
133    if (U_FAILURE(*status)) {
134        return 0;
135    }
136    pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
137
138    if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
139        int32_t destLen =  stringArray[0].extract(*dest, destCapacity, *status);
140        return trim(*dest, destLen, status);
141    }
142    return 0;
143}
144
145U_CFUNC int32_t
146getCount(const UChar* source, int32_t srcLen,
147         UParseCommentsOption option, UErrorCode *status){
148
149    if(status == NULL || U_FAILURE(*status)){
150        return 0;
151    }
152
153    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
154    RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
155    UnicodeString src (source, srcLen);
156
157
158    if (U_FAILURE(*status)) {
159        return 0;
160    }
161    int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
162
163    UnicodeString patternString(patternStrings[option]);
164    RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
165    if (U_FAILURE(*status)) {
166        return 0;
167    }
168    int32_t count = 0;
169    for(int32_t i=0; i<retLen; i++){
170        matcher.reset(stringArray[i]);
171        if(matcher.lookingAt(*status)){
172            count++;
173        }
174    }
175    if(option == UPC_TRANSLATE && count > 1){
176        fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
177        exit(U_UNSUPPORTED_ERROR);
178    }
179    return count;
180}
181
182U_CFUNC int32_t
183getAt(const UChar* source, int32_t srcLen,
184        UChar** dest, int32_t destCapacity,
185        int32_t index,
186        UParseCommentsOption option,
187        UErrorCode* status){
188
189    if(status == NULL || U_FAILURE(*status)){
190        return 0;
191    }
192
193    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
194    RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
195    UnicodeString src (source, srcLen);
196
197
198    if (U_FAILURE(*status)) {
199        return 0;
200    }
201    int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
202
203    UnicodeString patternString(patternStrings[option]);
204    RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
205    if (U_FAILURE(*status)) {
206        return 0;
207    }
208    int32_t count = 0;
209    for(int32_t i=0; i<retLen; i++){
210        matcher.reset(stringArray[i]);
211        if(matcher.lookingAt(*status)){
212            if(count == index){
213                UnicodeString out = matcher.group(1, *status);
214                return out.extract(*dest, destCapacity,*status);
215            }
216            count++;
217
218        }
219    }
220    return 0;
221
222}
223
224U_CFUNC int32_t
225getTranslate( const UChar* source, int32_t srcLen,
226              UChar** dest, int32_t destCapacity,
227              UErrorCode* status){
228    UnicodeString     notePatternString("^translate\\s*?(.*)");
229
230    int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
231    return trim(*dest, destLen, status);
232}
233
234U_CFUNC int32_t
235getNote(const UChar* source, int32_t srcLen,
236        UChar** dest, int32_t destCapacity,
237        UErrorCode* status){
238
239    UnicodeString     notePatternString("^note\\s*?(.*)");
240    int32_t destLen =  getText(source, srcLen, dest, destCapacity, notePatternString, status);
241    return trim(*dest, destLen, status);
242
243}
244
245#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
246
247