1/* 2 * Copyright 2017 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "bookmaker.h" 9 10#include "SkOSFile.h" 11#include "SkOSPath.h" 12 13/* 14things to do 15if cap word is beginning of sentence, add it to table as lower-case 16 word must have only a single initial capital 17 18if word is camel cased, look for :: matches on suffix 19 20when function crosses lines, whole thing isn't seen as a 'word' e.g., search for largeArc in path 21 22words in external not seen 23 */ 24struct CheckEntry { 25 string fFile; 26 int fLine; 27 int fCount; 28}; 29 30class SpellCheck : public ParserCommon { 31public: 32 SpellCheck(const BmhParser& bmh) : ParserCommon() 33 , fBmhParser(bmh) { 34 this->reset(); 35 } 36 bool check(const char* match); 37 void report(SkCommandLineFlags::StringArray report); 38private: 39 enum class TableState { 40 kNone, 41 kRow, 42 kColumn, 43 }; 44 45 bool check(Definition* ); 46 bool checkable(MarkType markType); 47 void childCheck(const Definition* def, const char* start); 48 void leafCheck(const char* start, const char* end); 49 bool parseFromFile(const char* path) override { return true; } 50 void printCheck(const string& str); 51 52 void reset() override { 53 INHERITED::resetCommon(); 54 fMethod = nullptr; 55 fRoot = nullptr; 56 fTableState = TableState::kNone; 57 fInCode = false; 58 fInConst = false; 59 fInFormula = false; 60 fInDescription = false; 61 fInStdOut = false; 62 } 63 64 void wordCheck(const string& str); 65 void wordCheck(ptrdiff_t len, const char* ch); 66 67 unordered_map<string, CheckEntry> fCode; 68 unordered_map<string, CheckEntry> fColons; 69 unordered_map<string, CheckEntry> fDigits; 70 unordered_map<string, CheckEntry> fDots; 71 unordered_map<string, CheckEntry> fParens; // also hold destructors, operators 72 unordered_map<string, CheckEntry> fUnderscores; 73 unordered_map<string, CheckEntry> fWords; 74 const BmhParser& fBmhParser; 75 Definition* fMethod; 76 RootDefinition* fRoot; 77 TableState fTableState; 78 bool fInCode; 79 bool fInConst; 80 bool fInDescription; 81 bool fInFormula; 82 bool fInStdOut; 83 typedef ParserCommon INHERITED; 84}; 85 86/* This doesn't perform a traditional spell or grammar check, although 87 maybe it should. Instead it looks for words used uncommonly and lower 88 case words that match capitalized words that are not sentence starters. 89 It also looks for articles preceeding capitalized words and their 90 modifiers to try to maintain a consistent voice. 91 Maybe also look for passive verbs (e.g. 'is') and suggest active ones? 92 */ 93void BmhParser::spellCheck(const char* match, SkCommandLineFlags::StringArray report) const { 94 SpellCheck checker(*this); 95 checker.check(match); 96 checker.report(report); 97} 98 99void BmhParser::spellStatus(const char* statusFile, SkCommandLineFlags::StringArray report) const { 100 SpellCheck checker(*this); 101 StatusIter iter(statusFile, ".bmh", StatusFilter::kInProgress); 102 string match = iter.baseDir(); 103 checker.check(match.c_str()); 104 checker.report(report); 105} 106 107bool SpellCheck::check(const char* match) { 108 for (const auto& topic : fBmhParser.fTopicMap) { 109 Definition* topicDef = topic.second; 110 if (topicDef->fParent) { 111 continue; 112 } 113 if (!topicDef->isRoot()) { 114 return this->reportError<bool>("expected root topic"); 115 } 116 fRoot = topicDef->asRoot(); 117 if (string::npos == fRoot->fFileName.rfind(match)) { 118 continue; 119 } 120 this->check(topicDef); 121 } 122 return true; 123} 124 125static bool all_lower(const string& str) { 126 for (auto c : str) { 127 if (!islower(c)) { 128 return false; 129 } 130 } 131 return true; 132} 133 134bool SpellCheck::check(Definition* def) { 135 fFileName = def->fFileName; 136 fLineCount = def->fLineCount; 137 string printable = def->printableName(); 138 const char* textStart = def->fContentStart; 139 if (MarkType::kParam != def->fMarkType && MarkType::kConst != def->fMarkType && 140 MarkType::kPrivate != def->fMarkType && TableState::kNone != fTableState) { 141 fTableState = TableState::kNone; 142 } 143 switch (def->fMarkType) { 144 case MarkType::kAlias: 145 break; 146 case MarkType::kAnchor: 147 break; 148 case MarkType::kBug: 149 break; 150 case MarkType::kClass: 151 this->wordCheck(def->fName); 152 break; 153 case MarkType::kCode: 154 fInCode = true; 155 break; 156 case MarkType::kColumn: 157 break; 158 case MarkType::kComment: 159 break; 160 case MarkType::kConst: { 161 fInConst = true; 162 if (TableState::kNone == fTableState) { 163 fTableState = TableState::kRow; 164 } 165 if (TableState::kRow == fTableState) { 166 fTableState = TableState::kColumn; 167 } 168 this->wordCheck(def->fName); 169 const char* lineEnd = strchr(textStart, '\n'); 170 this->wordCheck(lineEnd - textStart, textStart); 171 textStart = lineEnd; 172 } break; 173 case MarkType::kDefine: 174 break; 175 case MarkType::kDefinedBy: 176 break; 177 case MarkType::kDeprecated: 178 break; 179 case MarkType::kDescription: 180 fInDescription = true; 181 break; 182 case MarkType::kDoxygen: 183 break; 184 case MarkType::kDuration: 185 break; 186 case MarkType::kEnum: 187 case MarkType::kEnumClass: 188 this->wordCheck(def->fName); 189 break; 190 case MarkType::kExample: 191 break; 192 case MarkType::kExperimental: 193 break; 194 case MarkType::kExternal: 195 break; 196 case MarkType::kFile: 197 break; 198 case MarkType::kFormula: 199 fInFormula = true; 200 break; 201 case MarkType::kFunction: 202 break; 203 case MarkType::kHeight: 204 break; 205 case MarkType::kImage: 206 break; 207 case MarkType::kLegend: 208 break; 209 case MarkType::kLink: 210 break; 211 case MarkType::kList: 212 break; 213 case MarkType::kLiteral: 214 break; 215 case MarkType::kMarkChar: 216 break; 217 case MarkType::kMember: 218 break; 219 case MarkType::kMethod: { 220 string method_name = def->methodName(); 221 if (all_lower(method_name)) { 222 method_name += "()"; 223 } 224 string formattedStr = def->formatFunction(); 225 if (!def->isClone() && Definition::MethodType::kOperator != def->fMethodType) { 226 this->wordCheck(method_name); 227 } 228 fTableState = TableState::kNone; 229 fMethod = def; 230 } break; 231 case MarkType::kNoExample: 232 break; 233 case MarkType::kOutdent: 234 break; 235 case MarkType::kParam: { 236 if (TableState::kNone == fTableState) { 237 fTableState = TableState::kRow; 238 } 239 if (TableState::kRow == fTableState) { 240 fTableState = TableState::kColumn; 241 } 242 TextParser paramParser(def->fFileName, def->fStart, def->fContentStart, 243 def->fLineCount); 244 paramParser.skipWhiteSpace(); 245 SkASSERT(paramParser.startsWith("#Param")); 246 paramParser.next(); // skip hash 247 paramParser.skipToNonAlphaNum(); // skip Param 248 paramParser.skipSpace(); 249 const char* paramName = paramParser.fChar; 250 paramParser.skipToSpace(); 251 fInCode = true; 252 this->wordCheck(paramParser.fChar - paramName, paramName); 253 fInCode = false; 254 } break; 255 case MarkType::kPlatform: 256 break; 257 case MarkType::kPrivate: 258 break; 259 case MarkType::kReturn: 260 break; 261 case MarkType::kRow: 262 break; 263 case MarkType::kSeeAlso: 264 break; 265 case MarkType::kStdOut: { 266 fInStdOut = true; 267 TextParser code(def); 268 code.skipSpace(); 269 while (!code.eof()) { 270 const char* end = code.trimmedLineEnd(); 271 this->wordCheck(end - code.fChar, code.fChar); 272 code.skipToLineStart(); 273 } 274 fInStdOut = false; 275 } break; 276 case MarkType::kStruct: 277 fRoot = def->asRoot(); 278 this->wordCheck(def->fName); 279 break; 280 case MarkType::kSubstitute: 281 break; 282 case MarkType::kSubtopic: 283 this->printCheck(printable); 284 break; 285 case MarkType::kTable: 286 break; 287 case MarkType::kTemplate: 288 break; 289 case MarkType::kText: 290 break; 291 case MarkType::kTime: 292 break; 293 case MarkType::kToDo: 294 break; 295 case MarkType::kTopic: 296 this->printCheck(printable); 297 break; 298 case MarkType::kTrack: 299 // don't output children 300 return true; 301 case MarkType::kTypedef: 302 break; 303 case MarkType::kUnion: 304 break; 305 case MarkType::kVolatile: 306 break; 307 case MarkType::kWidth: 308 break; 309 default: 310 SkASSERT(0); // handle everything 311 break; 312 } 313 this->childCheck(def, textStart); 314 switch (def->fMarkType) { // post child work, at least for tables 315 case MarkType::kCode: 316 fInCode = false; 317 break; 318 case MarkType::kColumn: 319 break; 320 case MarkType::kDescription: 321 fInDescription = false; 322 break; 323 case MarkType::kEnum: 324 case MarkType::kEnumClass: 325 break; 326 case MarkType::kExample: 327 break; 328 case MarkType::kFormula: 329 fInFormula = false; 330 break; 331 case MarkType::kLegend: 332 break; 333 case MarkType::kMethod: 334 fMethod = nullptr; 335 break; 336 case MarkType::kConst: 337 fInConst = false; 338 case MarkType::kParam: 339 SkASSERT(TableState::kColumn == fTableState); 340 fTableState = TableState::kRow; 341 break; 342 case MarkType::kReturn: 343 case MarkType::kSeeAlso: 344 break; 345 case MarkType::kRow: 346 break; 347 case MarkType::kStruct: 348 fRoot = fRoot->rootParent(); 349 break; 350 case MarkType::kTable: 351 break; 352 default: 353 break; 354 } 355 return true; 356} 357 358bool SpellCheck::checkable(MarkType markType) { 359 return BmhParser::Resolvable::kYes == fBmhParser.fMaps[(int) markType].fResolve; 360} 361 362void SpellCheck::childCheck(const Definition* def, const char* start) { 363 const char* end; 364 fLineCount = def->fLineCount; 365 if (def->isRoot()) { 366 fRoot = const_cast<RootDefinition*>(def->asRoot()); 367 } 368 for (auto& child : def->fChildren) { 369 end = child->fStart; 370 if (this->checkable(def->fMarkType)) { 371 this->leafCheck(start, end); 372 } 373 this->check(child); 374 start = child->fTerminator; 375 } 376 if (this->checkable(def->fMarkType)) { 377 end = def->fContentEnd; 378 this->leafCheck(start, end); 379 } 380} 381 382void SpellCheck::leafCheck(const char* start, const char* end) { 383 const char* chPtr = start; 384 int inAngles = 0; 385 int inParens = 0; 386 bool inQuotes = false; 387 bool allLower = true; 388 char priorCh = 0; 389 char lastCh = 0; 390 const char* wordStart = nullptr; 391 const char* wordEnd = nullptr; 392 const char* possibleEnd = nullptr; 393 do { 394 if (wordStart && wordEnd) { 395 if (!allLower || (!inQuotes && '\"' != lastCh && !inParens 396 && ')' != lastCh && !inAngles && '>' != lastCh)) { 397 string word(wordStart, (possibleEnd ? possibleEnd : wordEnd) - wordStart); 398 wordCheck(word); 399 } 400 wordStart = nullptr; 401 } 402 if (chPtr == end) { 403 break; 404 } 405 switch (*chPtr) { 406 case '>': 407 if (isalpha(lastCh)) { 408 --inAngles; 409 SkASSERT(inAngles >= 0); 410 } 411 wordEnd = chPtr; 412 break; 413 case '(': 414 ++inParens; 415 possibleEnd = chPtr; 416 break; 417 case ')': 418 --inParens; 419 if ('(' == lastCh) { 420 wordEnd = chPtr + 1; 421 } else { 422 wordEnd = chPtr; 423 } 424 SkASSERT(inParens >= 0 || fInStdOut); 425 break; 426 case '\"': 427 inQuotes = !inQuotes; 428 wordEnd = chPtr; 429 SkASSERT(inQuotes == !wordStart); 430 break; 431 case 'A': case 'B': case 'C': case 'D': case 'E': 432 case 'F': case 'G': case 'H': case 'I': case 'J': 433 case 'K': case 'L': case 'M': case 'N': case 'O': 434 case 'P': case 'Q': case 'R': case 'S': case 'T': 435 case 'U': case 'V': case 'W': case 'X': case 'Y': 436 case 'Z': 437 allLower = false; 438 case 'a': case 'b': case 'c': case 'd': case 'e': 439 case 'f': case 'g': case 'h': case 'i': case 'j': 440 case 'k': case 'l': case 'm': case 'n': case 'o': 441 case 'p': case 'q': case 'r': case 's': case 't': 442 case 'u': case 'v': case 'w': case 'x': case 'y': 443 case 'z': 444 if (!wordStart) { 445 wordStart = chPtr; 446 wordEnd = nullptr; 447 possibleEnd = nullptr; 448 allLower = 'a' <= *chPtr; 449 if ('<' == lastCh || ('<' == priorCh && '/' == lastCh)) { 450 ++inAngles; 451 } 452 } 453 break; 454 case '0': case '1': case '2': case '3': case '4': 455 case '5': case '6': case '7': case '8': case '9': 456 case '_': 457 allLower = false; 458 case '-': // note that dash doesn't clear allLower 459 break; 460 default: 461 wordEnd = chPtr; 462 break; 463 } 464 priorCh = lastCh; 465 lastCh = *chPtr; 466 } while (++chPtr <= end); 467} 468 469void SpellCheck::printCheck(const string& str) { 470 string word; 471 for (std::stringstream stream(str); stream >> word; ) { 472 wordCheck(word); 473 } 474} 475 476static bool stringCompare(const std::pair<string, CheckEntry>& i, const std::pair<string, CheckEntry>& j) { 477 return i.first.compare(j.first) < 0; 478} 479 480void SpellCheck::report(SkCommandLineFlags::StringArray report) { 481 vector<std::pair<string, CheckEntry>> elems(fWords.begin(), fWords.end()); 482 std::sort(elems.begin(), elems.end(), stringCompare); 483 if (report.contains("once")) { 484 for (auto iter : elems) { 485 if (string::npos != iter.second.fFile.find("undocumented.bmh")) { 486 continue; 487 } 488 if (string::npos != iter.second.fFile.find("markup.bmh")) { 489 continue; 490 } 491 if (string::npos != iter.second.fFile.find("usingBookmaker.bmh")) { 492 continue; 493 } 494 if (iter.second.fCount == 1) { 495 SkDebugf("%s(%d): %s\n", iter.second.fFile.c_str(), iter.second.fLine, 496 iter.first.c_str()); 497 } 498 } 499 SkDebugf("\n"); 500 return; 501 } 502 if (report.contains("all")) { 503 int column = 0; 504 char lastInitial = 'a'; 505 int count = 0; 506 for (auto iter : elems) { 507 if (string::npos != iter.second.fFile.find("undocumented.bmh")) { 508 continue; 509 } 510 if (string::npos != iter.second.fFile.find("markup.bmh")) { 511 continue; 512 } 513 if (string::npos != iter.second.fFile.find("usingBookmaker.bmh")) { 514 continue; 515 } 516 string check = iter.first.c_str(); 517 bool allLower = true; 518 for (auto c : check) { 519 if (isupper(c)) { 520 allLower = false; 521 break; 522 } 523 } 524 if (!allLower) { 525 continue; 526 } 527 if (column + check.length() > 100 || check[0] != lastInitial) { 528 SkDebugf("\n"); 529 column = 0; 530 } 531 if (check[0] != lastInitial) { 532 SkDebugf("\n"); 533 lastInitial = check[0]; 534 } 535 SkDebugf("%s ", check.c_str()); 536 column += check.length(); 537 ++count; 538 } 539 SkDebugf("\n\ncount = %d\n", count); 540 return; 541 } 542 int index = 0; 543 const char* mispelled = report[0]; 544 for (auto iter : elems) { 545 if (string::npos != iter.second.fFile.find("undocumented.bmh")) { 546 continue; 547 } 548 if (string::npos != iter.second.fFile.find("markup.bmh")) { 549 continue; 550 } 551 if (string::npos != iter.second.fFile.find("usingBookmaker.bmh")) { 552 continue; 553 } 554 string check = iter.first.c_str(); 555 while (check.compare(mispelled) > 0) { 556 SkDebugf("%s not found\n", mispelled); 557 if (report.count() == ++index) { 558 break; 559 } 560 } 561 if (report.count() == index) { 562 break; 563 } 564 if (check.compare(mispelled) == 0) { 565 SkDebugf("%s(%d): %s\n", iter.second.fFile.c_str(), iter.second.fLine, 566 iter.first.c_str()); 567 if (report.count() == ++index) { 568 break; 569 } 570 } 571 } 572} 573 574void SpellCheck::wordCheck(const string& str) { 575 if ("nullptr" == str) { 576 return; // doesn't seem worth it, treating nullptr as a word in need of correction 577 } 578 bool hasColon = false; 579 bool hasDot = false; 580 bool hasParen = false; 581 bool hasUnderscore = false; 582 bool sawDash = false; 583 bool sawDigit = false; 584 bool sawSpecial = false; 585 SkASSERT(str.length() > 0); 586 SkASSERT(isalpha(str[0]) || '~' == str[0]); 587 for (char ch : str) { 588 if (isalpha(ch) || '-' == ch) { 589 sawDash |= '-' == ch; 590 continue; 591 } 592 bool isColon = ':' == ch; 593 hasColon |= isColon; 594 bool isDot = '.' == ch; 595 hasDot |= isDot; 596 bool isParen = '(' == ch || ')' == ch || '~' == ch || '=' == ch || '!' == ch || 597 '[' == ch || ']' == ch; 598 hasParen |= isParen; 599 bool isUnderscore = '_' == ch; 600 hasUnderscore |= isUnderscore; 601 if (isColon || isDot || isUnderscore || isParen) { 602 continue; 603 } 604 if (isdigit(ch)) { 605 sawDigit = true; 606 continue; 607 } 608 if ('&' == ch || ',' == ch || ' ' == ch) { 609 sawSpecial = true; 610 continue; 611 } 612 SkASSERT(0); 613 } 614 if (sawSpecial && !hasParen) { 615 SkASSERT(0); 616 } 617 bool inCode = fInCode; 618 if (hasUnderscore && isupper(str[0]) && ('S' != str[0] || 'K' != str[1]) 619 && !hasColon && !hasDot && !hasParen && !fInStdOut && !inCode && !fInConst 620 && !sawDigit && !sawSpecial && !sawDash) { 621 std::istringstream ss(str); 622 string token; 623 while (std::getline(ss, token, '_')) { 624 if (token.length()) { 625 this->wordCheck(token); 626 } 627 } 628 return; 629 } 630 if (!hasColon && !hasDot && !hasParen && !hasUnderscore 631 && !fInStdOut && !inCode && !fInConst && !sawDigit 632 && islower(str[0]) && isupper(str[1])) { 633 inCode = true; 634 } 635 bool methodParam = false; 636 if (fMethod) { 637 for (auto child : fMethod->fChildren) { 638 if (MarkType::kParam == child->fMarkType && str == child->fName) { 639 methodParam = true; 640 break; 641 } 642 } 643 } 644 auto& mappy = hasColon ? fColons : 645 hasDot ? fDots : 646 hasParen ? fParens : 647 hasUnderscore ? fUnderscores : 648 fInStdOut || fInFormula || inCode || fInConst || methodParam ? fCode : 649 sawDigit ? fDigits : fWords; 650 auto iter = mappy.find(str); 651 if (mappy.end() != iter) { 652 iter->second.fCount += 1; 653 } else { 654 CheckEntry* entry = &mappy[str]; 655 entry->fFile = fFileName; 656 entry->fLine = fLineCount; 657 entry->fCount = 1; 658 } 659} 660 661void SpellCheck::wordCheck(ptrdiff_t len, const char* ch) { 662 leafCheck(ch, ch + len); 663} 664