1/*
2 * Copyright (C) 2007-2010 Júlio Vilmar Gesser.
3 * Copyright (C) 2011, 2013-2015 The JavaParser Team.
4 *
5 * This file is part of JavaParser.
6 *
7 * JavaParser can be used either under the terms of
8 * a) the GNU Lesser General Public License as published by
9 *     the Free Software Foundation, either version 3 of the License, or
10 *     (at your option) any later version.
11 * b) the terms of the Apache License
12 *
13 * You should have received a copy of both licenses in LICENCE.LGPL and
14 * LICENCE.APACHE. Please refer to those files for details.
15 *
16 * JavaParser is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 * GNU Lesser General Public License for more details.
20 */
21
22package com.github.javaparser.ast.comments;
23
24import java.io.*;
25import java.nio.charset.Charset;
26import java.util.*;
27
28/**
29 * This parser cares exclusively about comments.
30 */
31public class CommentsParser {
32
33    private enum State {
34        CODE,
35        IN_LINE_COMMENT,
36        IN_BLOCK_COMMENT,
37        IN_STRING,
38        IN_CHAR;
39    }
40
41    private static final int COLUMNS_PER_TAB = 4;
42
43    public CommentsCollection parse(final String source) throws IOException, UnsupportedEncodingException {
44        InputStream in = new ByteArrayInputStream(source.getBytes(Charset.defaultCharset()));
45        return parse(in, Charset.defaultCharset().name());
46    }
47
48    public CommentsCollection parse(final InputStream in, final String charsetName) throws IOException, UnsupportedEncodingException {
49        boolean lastWasASlashR = false;
50        BufferedReader br = new BufferedReader(new InputStreamReader(in, charsetName));
51        CommentsCollection comments = new CommentsCollection();
52        int r;
53
54        Deque prevTwoChars = new LinkedList<Character>(Arrays.asList('z','z'));
55
56        State state = State.CODE;
57        LineComment currentLineComment = null;
58        BlockComment currentBlockComment = null;
59        StringBuffer currentContent = null;
60
61        int currLine = 1;
62        int currCol  = 1;
63
64        while ((r=br.read()) != -1){
65            char c = (char)r;
66            if (c=='\r'){
67                lastWasASlashR = true;
68            } else if (c=='\n'&&lastWasASlashR){
69                lastWasASlashR=false;
70                continue;
71            } else {
72                lastWasASlashR=false;
73            }
74            switch (state) {
75                case CODE:
76                    if (prevTwoChars.peekLast().equals('/') && c == '/') {
77                        currentLineComment = new LineComment();
78                        currentLineComment.setBeginLine(currLine);
79                        currentLineComment.setBeginColumn(currCol - 1);
80                        state = State.IN_LINE_COMMENT;
81                        currentContent = new StringBuffer();
82                    } else if (prevTwoChars.peekLast().equals('/') && c == '*') {
83                        currentBlockComment = new BlockComment();
84                        currentBlockComment.setBeginLine(currLine);
85                        currentBlockComment.setBeginColumn(currCol - 1);
86                        state = State.IN_BLOCK_COMMENT;
87                        currentContent = new StringBuffer();
88                    } else if (c == '"') {
89                        state = State.IN_STRING;
90                    } else if (c == '\'') {
91                        state = State.IN_CHAR;
92                    } else {
93                        // nothing to do
94                    }
95                    break;
96                case IN_LINE_COMMENT:
97                    if (c=='\n' || c=='\r'){
98                        currentLineComment.setContent(currentContent.toString());
99                        currentLineComment.setEndLine(currLine);
100                        currentLineComment.setEndColumn(currCol);
101                        comments.addComment(currentLineComment);
102                        state = State.CODE;
103                    } else {
104                        currentContent.append(c);
105                    }
106                    break;
107                case IN_BLOCK_COMMENT:
108                    if (prevTwoChars.peekLast().equals('*') && c=='/' && !prevTwoChars.peekFirst().equals('/')){
109
110                        // delete last character
111                        String content = currentContent.deleteCharAt(currentContent.toString().length()-1).toString();
112
113                        if (content.startsWith("*")){
114                            JavadocComment javadocComment = new JavadocComment();
115                            javadocComment.setContent(content.substring(1));
116                            javadocComment.setBeginLine(currentBlockComment.getBeginLine());
117                            javadocComment.setBeginColumn(currentBlockComment.getBeginColumn());
118                            javadocComment.setEndLine(currLine);
119                            javadocComment.setEndColumn(currCol+1);
120                            comments.addComment(javadocComment);
121                        } else {
122                            currentBlockComment.setContent(content);
123                            currentBlockComment.setEndLine(currLine);
124                            currentBlockComment.setEndColumn(currCol+1);
125                            comments.addComment(currentBlockComment);
126                        }
127                        state = State.CODE;
128                    } else {
129                        currentContent.append(c=='\r'?'\n':c);
130                    }
131                    break;
132                case IN_STRING:
133                    if (!prevTwoChars.peekLast().equals('\\') && c == '"') {
134                        state = State.CODE;
135                    }
136                    break;
137                case IN_CHAR:
138                    if (!prevTwoChars.peekLast().equals('\\') && c == '\'') {
139                        state = State.CODE;
140                    }
141                    break;
142                default:
143                    throw new RuntimeException("Unexpected");
144            }
145            switch (c){
146                case '\n':
147                case '\r':
148                    currLine+=1;
149                    currCol = 1;
150                    break;
151                case '\t':
152                    currCol+=COLUMNS_PER_TAB;
153                    break;
154                default:
155                    currCol+=1;
156            }
157            prevTwoChars.remove();
158            prevTwoChars.add(c);
159        }
160
161        if (state==State.IN_LINE_COMMENT){
162            currentLineComment.setContent(currentContent.toString());
163            currentLineComment.setEndLine(currLine);
164            currentLineComment.setEndColumn(currCol);
165            comments.addComment(currentLineComment);
166        }
167
168        return comments;
169    }
170
171}
172