1/*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "bookmaker.h"
9
10#include "SkOSFile.h"
11#include "SkOSPath.h"
12
13/*
14things to do
15if cap word is beginning of sentence, add it to table as lower-case
16   word must have only a single initial capital
17
18if word is camel cased, look for :: matches on suffix
19
20when function crosses lines, whole thing isn't seen as a 'word' e.g., search for largeArc in path
21
22words in external not seen
23 */
24struct CheckEntry {
25    string fFile;
26    int fLine;
27    int fCount;
28};
29
30class SpellCheck : public ParserCommon {
31public:
32    SpellCheck(const BmhParser& bmh) : ParserCommon()
33        , fBmhParser(bmh) {
34        this->reset();
35    }
36    bool check(const char* match);
37    void report(SkCommandLineFlags::StringArray report);
38private:
39    enum class TableState {
40        kNone,
41        kRow,
42        kColumn,
43    };
44
45    bool check(Definition* );
46    bool checkable(MarkType markType);
47    void childCheck(const Definition* def, const char* start);
48    void leafCheck(const char* start, const char* end);
49    bool parseFromFile(const char* path) override { return true; }
50    void printCheck(const string& str);
51
52    void reset() override {
53        INHERITED::resetCommon();
54        fMethod = nullptr;
55        fRoot = nullptr;
56        fTableState = TableState::kNone;
57        fInCode = false;
58        fInConst = false;
59        fInFormula = false;
60        fInDescription = false;
61        fInStdOut = false;
62    }
63
64    void wordCheck(const string& str);
65    void wordCheck(ptrdiff_t len, const char* ch);
66
67    unordered_map<string, CheckEntry> fCode;
68    unordered_map<string, CheckEntry> fColons;
69    unordered_map<string, CheckEntry> fDigits;
70    unordered_map<string, CheckEntry> fDots;
71    unordered_map<string, CheckEntry> fParens;  // also hold destructors, operators
72    unordered_map<string, CheckEntry> fUnderscores;
73    unordered_map<string, CheckEntry> fWords;
74    const BmhParser& fBmhParser;
75    Definition* fMethod;
76    RootDefinition* fRoot;
77    TableState fTableState;
78    bool fInCode;
79    bool fInConst;
80    bool fInDescription;
81    bool fInFormula;
82    bool fInStdOut;
83    typedef ParserCommon INHERITED;
84};
85
86/* This doesn't perform a traditional spell or grammar check, although
87   maybe it should. Instead it looks for words used uncommonly and lower
88   case words that match capitalized words that are not sentence starters.
89   It also looks for articles preceeding capitalized words and their
90   modifiers to try to maintain a consistent voice.
91   Maybe also look for passive verbs (e.g. 'is') and suggest active ones?
92 */
93void BmhParser::spellCheck(const char* match, SkCommandLineFlags::StringArray report) const {
94    SpellCheck checker(*this);
95    checker.check(match);
96    checker.report(report);
97}
98
99void BmhParser::spellStatus(const char* statusFile, SkCommandLineFlags::StringArray report) const {
100    SpellCheck checker(*this);
101    StatusIter iter(statusFile, ".bmh", StatusFilter::kInProgress);
102    string match = iter.baseDir();
103    checker.check(match.c_str());
104    checker.report(report);
105}
106
107bool SpellCheck::check(const char* match) {
108    for (const auto& topic : fBmhParser.fTopicMap) {
109        Definition* topicDef = topic.second;
110        if (topicDef->fParent) {
111            continue;
112        }
113        if (!topicDef->isRoot()) {
114            return this->reportError<bool>("expected root topic");
115        }
116        fRoot = topicDef->asRoot();
117        if (string::npos == fRoot->fFileName.rfind(match)) {
118            continue;
119        }
120       this->check(topicDef);
121    }
122    return true;
123}
124
125static bool all_lower(const string& str) {
126    for (auto c : str) {
127        if (!islower(c)) {
128            return false;
129        }
130    }
131    return true;
132}
133
134bool SpellCheck::check(Definition* def) {
135    fFileName = def->fFileName;
136    fLineCount = def->fLineCount;
137    string printable = def->printableName();
138    const char* textStart = def->fContentStart;
139    if (MarkType::kParam != def->fMarkType && MarkType::kConst != def->fMarkType &&
140            MarkType::kPrivate != def->fMarkType && TableState::kNone != fTableState) {
141        fTableState = TableState::kNone;
142    }
143    switch (def->fMarkType) {
144        case MarkType::kAlias:
145            break;
146        case MarkType::kAnchor:
147            break;
148        case MarkType::kBug:
149            break;
150        case MarkType::kClass:
151            this->wordCheck(def->fName);
152            break;
153        case MarkType::kCode:
154            fInCode = true;
155            break;
156        case MarkType::kColumn:
157            break;
158        case MarkType::kComment:
159            break;
160        case MarkType::kConst: {
161            fInConst = true;
162            if (TableState::kNone == fTableState) {
163                fTableState = TableState::kRow;
164            }
165            if (TableState::kRow == fTableState) {
166                fTableState = TableState::kColumn;
167            }
168            this->wordCheck(def->fName);
169            const char* lineEnd = strchr(textStart, '\n');
170            this->wordCheck(lineEnd - textStart, textStart);
171            textStart = lineEnd;
172        } break;
173        case MarkType::kDefine:
174            break;
175        case MarkType::kDefinedBy:
176            break;
177        case MarkType::kDeprecated:
178            break;
179        case MarkType::kDescription:
180            fInDescription = true;
181            break;
182        case MarkType::kDoxygen:
183            break;
184        case MarkType::kDuration:
185            break;
186        case MarkType::kEnum:
187        case MarkType::kEnumClass:
188            this->wordCheck(def->fName);
189            break;
190        case MarkType::kExample:
191            break;
192        case MarkType::kExperimental:
193            break;
194        case MarkType::kExternal:
195            break;
196        case MarkType::kFile:
197            break;
198        case MarkType::kFormula:
199            fInFormula = true;
200            break;
201        case MarkType::kFunction:
202            break;
203        case MarkType::kHeight:
204            break;
205        case MarkType::kImage:
206            break;
207        case MarkType::kLegend:
208            break;
209        case MarkType::kLink:
210            break;
211        case MarkType::kList:
212            break;
213        case MarkType::kLiteral:
214            break;
215        case MarkType::kMarkChar:
216            break;
217        case MarkType::kMember:
218            break;
219        case MarkType::kMethod: {
220            string method_name = def->methodName();
221            if (all_lower(method_name)) {
222                method_name += "()";
223            }
224            string formattedStr = def->formatFunction();
225            if (!def->isClone() && Definition::MethodType::kOperator != def->fMethodType) {
226                this->wordCheck(method_name);
227            }
228            fTableState = TableState::kNone;
229            fMethod = def;
230            } break;
231        case MarkType::kNoExample:
232            break;
233        case MarkType::kOutdent:
234            break;
235        case MarkType::kParam: {
236            if (TableState::kNone == fTableState) {
237                fTableState = TableState::kRow;
238            }
239            if (TableState::kRow == fTableState) {
240                fTableState = TableState::kColumn;
241            }
242            TextParser paramParser(def->fFileName, def->fStart, def->fContentStart,
243                    def->fLineCount);
244            paramParser.skipWhiteSpace();
245            SkASSERT(paramParser.startsWith("#Param"));
246            paramParser.next(); // skip hash
247            paramParser.skipToNonAlphaNum(); // skip Param
248            paramParser.skipSpace();
249            const char* paramName = paramParser.fChar;
250            paramParser.skipToSpace();
251            fInCode = true;
252            this->wordCheck(paramParser.fChar - paramName, paramName);
253            fInCode = false;
254       } break;
255        case MarkType::kPlatform:
256            break;
257        case MarkType::kPrivate:
258            break;
259        case MarkType::kReturn:
260            break;
261        case MarkType::kRow:
262            break;
263        case MarkType::kSeeAlso:
264            break;
265        case MarkType::kStdOut: {
266            fInStdOut = true;
267            TextParser code(def);
268            code.skipSpace();
269            while (!code.eof()) {
270                const char* end = code.trimmedLineEnd();
271                this->wordCheck(end - code.fChar, code.fChar);
272                code.skipToLineStart();
273            }
274            fInStdOut = false;
275            } break;
276        case MarkType::kStruct:
277            fRoot = def->asRoot();
278            this->wordCheck(def->fName);
279            break;
280        case MarkType::kSubstitute:
281            break;
282        case MarkType::kSubtopic:
283            this->printCheck(printable);
284            break;
285        case MarkType::kTable:
286            break;
287        case MarkType::kTemplate:
288            break;
289        case MarkType::kText:
290            break;
291        case MarkType::kTime:
292            break;
293        case MarkType::kToDo:
294            break;
295        case MarkType::kTopic:
296            this->printCheck(printable);
297            break;
298        case MarkType::kTrack:
299            // don't output children
300            return true;
301        case MarkType::kTypedef:
302            break;
303        case MarkType::kUnion:
304            break;
305        case MarkType::kVolatile:
306            break;
307        case MarkType::kWidth:
308            break;
309        default:
310            SkASSERT(0); // handle everything
311            break;
312    }
313    this->childCheck(def, textStart);
314    switch (def->fMarkType) {  // post child work, at least for tables
315        case MarkType::kCode:
316            fInCode = false;
317            break;
318        case MarkType::kColumn:
319            break;
320        case MarkType::kDescription:
321            fInDescription = false;
322            break;
323        case MarkType::kEnum:
324        case MarkType::kEnumClass:
325            break;
326        case MarkType::kExample:
327            break;
328        case MarkType::kFormula:
329            fInFormula = false;
330            break;
331        case MarkType::kLegend:
332            break;
333        case MarkType::kMethod:
334            fMethod = nullptr;
335            break;
336        case MarkType::kConst:
337            fInConst = false;
338        case MarkType::kParam:
339            SkASSERT(TableState::kColumn == fTableState);
340            fTableState = TableState::kRow;
341            break;
342        case MarkType::kReturn:
343        case MarkType::kSeeAlso:
344            break;
345        case MarkType::kRow:
346            break;
347        case MarkType::kStruct:
348            fRoot = fRoot->rootParent();
349            break;
350        case MarkType::kTable:
351            break;
352        default:
353            break;
354    }
355    return true;
356}
357
358bool SpellCheck::checkable(MarkType markType) {
359    return BmhParser::Resolvable::kYes == fBmhParser.fMaps[(int) markType].fResolve;
360}
361
362void SpellCheck::childCheck(const Definition* def, const char* start) {
363    const char* end;
364    fLineCount = def->fLineCount;
365    if (def->isRoot()) {
366        fRoot = const_cast<RootDefinition*>(def->asRoot());
367    }
368    for (auto& child : def->fChildren) {
369        end = child->fStart;
370        if (this->checkable(def->fMarkType)) {
371            this->leafCheck(start, end);
372        }
373        this->check(child);
374        start = child->fTerminator;
375    }
376    if (this->checkable(def->fMarkType)) {
377        end = def->fContentEnd;
378        this->leafCheck(start, end);
379    }
380}
381
382void SpellCheck::leafCheck(const char* start, const char* end) {
383    const char* chPtr = start;
384    int inAngles = 0;
385    int inParens = 0;
386    bool inQuotes = false;
387    bool allLower = true;
388    char priorCh = 0;
389    char lastCh = 0;
390    const char* wordStart = nullptr;
391    const char* wordEnd = nullptr;
392    const char* possibleEnd = nullptr;
393    do {
394        if (wordStart && wordEnd) {
395            if (!allLower || (!inQuotes && '\"' != lastCh && !inParens
396                    && ')' != lastCh && !inAngles && '>' != lastCh)) {
397                string word(wordStart, (possibleEnd ? possibleEnd : wordEnd) - wordStart);
398                wordCheck(word);
399            }
400            wordStart = nullptr;
401        }
402        if (chPtr == end) {
403            break;
404        }
405        switch (*chPtr) {
406            case '>':
407                if (isalpha(lastCh)) {
408                    --inAngles;
409                    SkASSERT(inAngles >= 0);
410                }
411                wordEnd = chPtr;
412                break;
413            case '(':
414                ++inParens;
415                possibleEnd = chPtr;
416                break;
417            case ')':
418                --inParens;
419                if ('(' == lastCh) {
420                    wordEnd = chPtr + 1;
421                } else {
422                    wordEnd = chPtr;
423                }
424                SkASSERT(inParens >= 0 || fInStdOut);
425                break;
426            case '\"':
427                inQuotes = !inQuotes;
428                wordEnd = chPtr;
429                SkASSERT(inQuotes == !wordStart);
430                break;
431            case 'A': case 'B': case 'C': case 'D': case 'E':
432            case 'F': case 'G': case 'H': case 'I': case 'J':
433            case 'K': case 'L': case 'M': case 'N': case 'O':
434            case 'P': case 'Q': case 'R': case 'S': case 'T':
435            case 'U': case 'V': case 'W': case 'X': case 'Y':
436            case 'Z':
437                allLower = false;
438            case 'a': case 'b': case 'c': case 'd': case 'e':
439            case 'f': case 'g': case 'h': case 'i': case 'j':
440            case 'k': case 'l': case 'm': case 'n': case 'o':
441            case 'p': case 'q': case 'r': case 's': case 't':
442            case 'u': case 'v': case 'w': case 'x': case 'y':
443            case 'z':
444                if (!wordStart) {
445                    wordStart = chPtr;
446                    wordEnd = nullptr;
447                    possibleEnd = nullptr;
448                    allLower = 'a' <= *chPtr;
449                    if ('<' == lastCh || ('<' == priorCh && '/' == lastCh)) {
450                        ++inAngles;
451                    }
452                }
453                break;
454            case '0': case '1': case '2': case '3': case '4':
455            case '5': case '6': case '7': case '8': case '9':
456            case '_':
457                allLower = false;
458            case '-':  // note that dash doesn't clear allLower
459                break;
460            default:
461                wordEnd = chPtr;
462                break;
463        }
464        priorCh = lastCh;
465        lastCh = *chPtr;
466    } while (++chPtr <= end);
467}
468
469void SpellCheck::printCheck(const string& str) {
470    string word;
471    for (std::stringstream stream(str); stream >> word; ) {
472        wordCheck(word);
473    }
474}
475
476static bool stringCompare(const std::pair<string, CheckEntry>& i, const std::pair<string, CheckEntry>& j) {
477    return i.first.compare(j.first) < 0;
478}
479
480void SpellCheck::report(SkCommandLineFlags::StringArray report) {
481    vector<std::pair<string, CheckEntry>> elems(fWords.begin(), fWords.end());
482    std::sort(elems.begin(), elems.end(), stringCompare);
483    if (report.contains("once")) {
484        for (auto iter : elems) {
485            if (string::npos != iter.second.fFile.find("undocumented.bmh")) {
486                continue;
487            }
488            if (string::npos != iter.second.fFile.find("markup.bmh")) {
489                continue;
490            }
491            if (string::npos != iter.second.fFile.find("usingBookmaker.bmh")) {
492                continue;
493            }
494            if (iter.second.fCount == 1) {
495                SkDebugf("%s(%d): %s\n", iter.second.fFile.c_str(), iter.second.fLine,
496                        iter.first.c_str());
497            }
498        }
499        SkDebugf("\n");
500        return;
501    }
502    if (report.contains("all")) {
503        int column = 0;
504        char lastInitial = 'a';
505        int count = 0;
506        for (auto iter : elems) {
507            if (string::npos != iter.second.fFile.find("undocumented.bmh")) {
508                continue;
509            }
510            if (string::npos != iter.second.fFile.find("markup.bmh")) {
511                continue;
512            }
513            if (string::npos != iter.second.fFile.find("usingBookmaker.bmh")) {
514                continue;
515            }
516            string check = iter.first.c_str();
517            bool allLower = true;
518            for (auto c : check) {
519                if (isupper(c)) {
520                    allLower = false;
521                    break;
522                }
523            }
524            if (!allLower) {
525                continue;
526            }
527            if (column + check.length() > 100 || check[0] != lastInitial) {
528                SkDebugf("\n");
529                column = 0;
530            }
531            if (check[0] != lastInitial) {
532                SkDebugf("\n");
533                lastInitial = check[0];
534            }
535            SkDebugf("%s ", check.c_str());
536            column += check.length();
537            ++count;
538        }
539        SkDebugf("\n\ncount = %d\n", count);
540        return;
541    }
542    int index = 0;
543    const char* mispelled = report[0];
544    for (auto iter : elems) {
545        if (string::npos != iter.second.fFile.find("undocumented.bmh")) {
546            continue;
547        }
548        if (string::npos != iter.second.fFile.find("markup.bmh")) {
549            continue;
550        }
551        if (string::npos != iter.second.fFile.find("usingBookmaker.bmh")) {
552            continue;
553        }
554        string check = iter.first.c_str();
555        while (check.compare(mispelled) > 0) {
556            SkDebugf("%s not found\n", mispelled);
557            if (report.count() == ++index) {
558                break;
559            }
560        }
561        if (report.count() == index) {
562            break;
563        }
564        if (check.compare(mispelled) == 0) {
565            SkDebugf("%s(%d): %s\n", iter.second.fFile.c_str(), iter.second.fLine,
566                    iter.first.c_str());
567            if (report.count() == ++index) {
568                break;
569            }
570        }
571    }
572}
573
574void SpellCheck::wordCheck(const string& str) {
575    if ("nullptr" == str) {
576        return;  // doesn't seem worth it, treating nullptr as a word in need of correction
577    }
578    bool hasColon = false;
579    bool hasDot = false;
580    bool hasParen = false;
581    bool hasUnderscore = false;
582    bool sawDash = false;
583    bool sawDigit = false;
584    bool sawSpecial = false;
585    SkASSERT(str.length() > 0);
586    SkASSERT(isalpha(str[0]) || '~' == str[0]);
587    for (char ch : str) {
588        if (isalpha(ch) || '-' == ch) {
589            sawDash |= '-' == ch;
590            continue;
591        }
592        bool isColon = ':' == ch;
593        hasColon |= isColon;
594        bool isDot = '.' == ch;
595        hasDot |= isDot;
596        bool isParen = '(' == ch || ')' == ch || '~' == ch || '=' == ch || '!' == ch ||
597                '[' == ch || ']' == ch;
598        hasParen |= isParen;
599        bool isUnderscore = '_' == ch;
600        hasUnderscore |= isUnderscore;
601        if (isColon || isDot || isUnderscore || isParen) {
602            continue;
603        }
604        if (isdigit(ch)) {
605            sawDigit = true;
606            continue;
607        }
608        if ('&' == ch || ',' == ch || ' ' == ch) {
609            sawSpecial = true;
610            continue;
611        }
612        SkASSERT(0);
613    }
614    if (sawSpecial && !hasParen) {
615        SkASSERT(0);
616    }
617    bool inCode = fInCode;
618    if (hasUnderscore && isupper(str[0]) && ('S' != str[0] || 'K' != str[1])
619            && !hasColon && !hasDot && !hasParen && !fInStdOut && !inCode && !fInConst
620            && !sawDigit && !sawSpecial && !sawDash) {
621        std::istringstream ss(str);
622        string token;
623        while (std::getline(ss, token, '_')) {
624            if (token.length()) {
625                this->wordCheck(token);
626            }
627        }
628        return;
629    }
630    if (!hasColon && !hasDot && !hasParen && !hasUnderscore
631            && !fInStdOut && !inCode && !fInConst && !sawDigit
632            && islower(str[0]) && isupper(str[1])) {
633        inCode = true;
634    }
635    bool methodParam = false;
636    if (fMethod) {
637        for (auto child : fMethod->fChildren) {
638            if (MarkType::kParam == child->fMarkType && str == child->fName) {
639                methodParam = true;
640                break;
641            }
642        }
643    }
644    auto& mappy = hasColon ? fColons :
645                  hasDot ? fDots :
646                  hasParen ? fParens :
647                  hasUnderscore ? fUnderscores :
648                  fInStdOut || fInFormula || inCode || fInConst || methodParam ? fCode :
649                  sawDigit ? fDigits : fWords;
650    auto iter = mappy.find(str);
651    if (mappy.end() != iter) {
652        iter->second.fCount += 1;
653    } else {
654        CheckEntry* entry = &mappy[str];
655        entry->fFile = fFileName;
656        entry->fLine = fLineCount;
657        entry->fCount = 1;
658    }
659}
660
661void SpellCheck::wordCheck(ptrdiff_t len, const char* ch) {
662    leafCheck(ch, ch + len);
663}
664