localize.cpp revision b798689749c64baba81f02e10cf2157c747d6b46
1#include "SourcePos.h" 2#include "ValuesFile.h" 3#include "XLIFFFile.h" 4#include "Perforce.h" 5#include "merge_res_and_xliff.h" 6#include "localize.h" 7#include "file_utils.h" 8#include "res_check.h" 9#include "xmb.h" 10 11#include <host/pseudolocalize.h> 12 13#include <stdlib.h> 14#include <stdarg.h> 15#include <sstream> 16#include <stdio.h> 17#include <string.h> 18 19using namespace std; 20 21FILE* g_logFile = NULL; 22 23int test(); 24 25int 26read_settings(const string& filename, map<string,Settings>* result, const string& rootDir) 27{ 28 XMLNode* root = NodeHandler::ParseFile(filename, XMLNode::PRETTY); 29 if (root == NULL) { 30 SourcePos(filename, -1).Error("Error reading file."); 31 return 1; 32 } 33 34 // <configuration> 35 vector<XMLNode*> configNodes = root->GetElementsByName("", "configuration"); 36 const size_t I = configNodes.size(); 37 for (size_t i=0; i<I; i++) { 38 const XMLNode* configNode = configNodes[i]; 39 40 Settings settings; 41 settings.id = configNode->GetAttribute("", "id", ""); 42 if (settings.id == "") { 43 configNode->Position().Error("<configuration> needs an id attribute."); 44 delete root; 45 return 1; 46 } 47 48 settings.oldVersion = configNode->GetAttribute("", "old-cl", ""); 49 50 settings.currentVersion = configNode->GetAttribute("", "new-cl", ""); 51 if (settings.currentVersion == "") { 52 configNode->Position().Error("<configuration> needs a new-cl attribute."); 53 delete root; 54 return 1; 55 } 56 57 // <app> 58 vector<XMLNode*> appNodes = configNode->GetElementsByName("", "app"); 59 60 const size_t J = appNodes.size(); 61 for (size_t j=0; j<J; j++) { 62 const XMLNode* appNode = appNodes[j]; 63 64 string dir = appNode->GetAttribute("", "dir", ""); 65 if (dir == "") { 66 appNode->Position().Error("<app> needs a dir attribute."); 67 delete root; 68 return 1; 69 } 70 71 settings.apps.push_back(dir); 72 } 73 74 // <reject> 75 vector<XMLNode*> rejectNodes = configNode->GetElementsByName("", "reject"); 76 77 const size_t K = rejectNodes.size(); 78 for (size_t k=0; k<K; k++) { 79 const XMLNode* rejectNode = rejectNodes[k]; 80 81 Reject reject; 82 83 reject.file = rejectNode->GetAttribute("", "file", ""); 84 if (reject.file == "") { 85 rejectNode->Position().Error("<reject> needs a file attribute."); 86 delete root; 87 return 1; 88 } 89 string f = reject.file; 90 reject.file = rootDir; 91 reject.file += '/'; 92 reject.file += f; 93 94 reject.name = rejectNode->GetAttribute("", "name", ""); 95 if (reject.name == "") { 96 rejectNode->Position().Error("<reject> needs a name attribute."); 97 delete root; 98 return 1; 99 } 100 101 reject.comment = trim_string(rejectNode->CollapseTextContents()); 102 103 settings.reject.push_back(reject); 104 } 105 106 (*result)[settings.id] = settings; 107 } 108 109 delete root; 110 return 0; 111} 112 113 114static void 115ValuesFile_to_XLIFFFile(const ValuesFile* values, XLIFFFile* xliff, const string& englishFilename) 116{ 117 const set<StringResource>& strings = values->GetStrings(); 118 for (set<StringResource>::const_iterator it=strings.begin(); it!=strings.end(); it++) { 119 StringResource res = *it; 120 res.file = englishFilename; 121 xliff->AddStringResource(res); 122 } 123} 124 125static bool 126contains_reject(const Settings& settings, const string& file, const TransUnit& tu) 127{ 128 const string name = tu.id; 129 const vector<Reject>& reject = settings.reject; 130 const size_t I = reject.size(); 131 for (size_t i=0; i<I; i++) { 132 const Reject& r = reject[i]; 133 if (r.file == file && r.name == name) { 134 return true; 135 } 136 } 137 return false; 138} 139 140/** 141 * If it's been rejected, then we keep whatever info we have. 142 * 143 * Implements this truth table: 144 * 145 * S AT AS Keep 146 * ----------------------- 147 * 0 0 0 0 (this case can't happen) 148 * 0 0 1 0 (it was there, never translated, and removed) 149 * 0 1 0 0 (somehow it got translated, but it was removed) 150 * 0 1 1 0 (it was removed after having been translated) 151 * 152 * 1 0 0 1 (it was just added) 153 * 1 0 1 1 (it was added, has been changed, but it never got translated) 154 * 1 1 0 1 (somehow it got translated, but we don't know based on what) 155 * 1 1 1 0/1 (it's in both. 0 if S=AS b/c there's no need to retranslate if they're 156 * the same. 1 if S!=AS because S changed, so it should be retranslated) 157 * 158 * The first four are cases where, whatever happened in the past, the string isn't there 159 * now, so it shouldn't be in the XLIFF file. 160 * 161 * For cases 4 and 5, the string has never been translated, so get it translated. 162 * 163 * For case 6, it's unclear where the translated version came from, so we're conservative 164 * and send it back for them to have another shot at. 165 * 166 * For case 7, we have some data. We have two choices. We could rely on the translator's 167 * translation memory or tools to notice that the strings haven't changed, and populate the 168 * <target> field themselves. Or if the string hasn't changed since last time, we can just 169 * not even tell them about it. As the project nears the end, it will be convenient to see 170 * the xliff files reducing in size, so we pick the latter. Obviously, if the string has 171 * changed, then we need to get it retranslated. 172 */ 173bool 174keep_this_trans_unit(const string& file, const TransUnit& unit, void* cookie) 175{ 176 const Settings* settings = reinterpret_cast<const Settings*>(cookie); 177 178 if (contains_reject(*settings, file, unit)) { 179 return true; 180 } 181 182 if (unit.source.id == "") { 183 return false; 184 } 185 if (unit.altTarget.id == "" || unit.altSource.id == "") { 186 return true; 187 } 188 return unit.source.value->ContentsToString(XLIFF_NAMESPACES) 189 != unit.altSource.value->ContentsToString(XLIFF_NAMESPACES); 190} 191 192int 193validate_config(const string& settingsFile, const map<string,Settings>& settings, 194 const string& config) 195{ 196 if (settings.find(config) == settings.end()) { 197 SourcePos(settingsFile, -1).Error("settings file does not contain setting: %s\n", 198 config.c_str()); 199 return 1; 200 } 201 return 0; 202} 203 204int 205validate_configs(const string& settingsFile, const map<string,Settings>& settings, 206 const vector<string>& configs) 207{ 208 int err = 0; 209 for (size_t i=0; i<configs.size(); i++) { 210 string config = configs[i]; 211 err |= validate_config(settingsFile, settings, config); 212 } 213 return err; 214} 215 216int 217select_files(vector<string> *resFiles, const string& config, 218 const map<string,Settings>& settings, const string& rootDir) 219{ 220 int err; 221 vector<vector<string> > allResFiles; 222 vector<string> configs; 223 configs.push_back(config); 224 err = select_files(&allResFiles, configs, settings, rootDir); 225 if (err == 0) { 226 *resFiles = allResFiles[0]; 227 } 228 return err; 229} 230 231int 232select_files(vector<vector<string> > *allResFiles, const vector<string>& configs, 233 const map<string,Settings>& settings, const string& rootDir) 234{ 235 int err; 236 printf("Selecting files..."); 237 fflush(stdout); 238 239 for (size_t i=0; i<configs.size(); i++) { 240 const string& config = configs[i]; 241 const Settings& setting = settings.find(config)->second; 242 243 vector<string> resFiles; 244 err = Perforce::GetResourceFileNames(setting.currentVersion, rootDir, 245 setting.apps, &resFiles, true); 246 if (err != 0) { 247 fprintf(stderr, "error with perforce. bailing\n"); 248 return err; 249 } 250 251 allResFiles->push_back(resFiles); 252 } 253 return 0; 254} 255 256static int 257do_export(const string& settingsFile, const string& rootDir, const string& outDir, 258 const string& targetLocale, const vector<string>& configs) 259{ 260 bool success = true; 261 int err; 262 263 if (false) { 264 printf("settingsFile=%s\n", settingsFile.c_str()); 265 printf("rootDir=%s\n", rootDir.c_str()); 266 printf("outDir=%s\n", outDir.c_str()); 267 for (size_t i=0; i<configs.size(); i++) { 268 printf("config[%zd]=%s\n", i, configs[i].c_str()); 269 } 270 } 271 272 map<string,Settings> settings; 273 err = read_settings(settingsFile, &settings, rootDir); 274 if (err != 0) { 275 return err; 276 } 277 278 err = validate_configs(settingsFile, settings, configs); 279 if (err != 0) { 280 return err; 281 } 282 283 vector<vector<string> > allResFiles; 284 err = select_files(&allResFiles, configs, settings, rootDir); 285 if (err != 0) { 286 return err; 287 } 288 289 size_t totalFileCount = 0; 290 for (size_t i=0; i<allResFiles.size(); i++) { 291 totalFileCount += allResFiles[i].size(); 292 } 293 totalFileCount *= 3; // we try all 3 versions of the file 294 295 size_t fileProgress = 0; 296 vector<Stats> stats; 297 vector<pair<string,XLIFFFile*> > xliffs; 298 299 for (size_t i=0; i<configs.size(); i++) { 300 const string& config = configs[i]; 301 const Settings& setting = settings[config]; 302 303 if (false) { 304 fprintf(stderr, "Configuration: %s (%zd of %zd)\n", config.c_str(), i+1, 305 configs.size()); 306 fprintf(stderr, " Old CL: %s\n", setting.oldVersion.c_str()); 307 fprintf(stderr, " Current CL: %s\n", setting.currentVersion.c_str()); 308 } 309 310 Configuration english; 311 english.locale = "en_US"; 312 Configuration translated; 313 translated.locale = targetLocale; 314 XLIFFFile* xliff = XLIFFFile::Create(english, translated, setting.currentVersion); 315 316 const vector<string>& resFiles = allResFiles[i]; 317 const size_t J = resFiles.size(); 318 for (size_t j=0; j<J; j++) { 319 string resFile = resFiles[j]; 320 321 // parse the files into a ValuesFile 322 // pull out the strings and add them to the XLIFFFile 323 324 // current file 325 print_file_status(++fileProgress, totalFileCount); 326 ValuesFile* currentFile = get_values_file(resFile, english, CURRENT_VERSION, 327 setting.currentVersion, true); 328 if (currentFile != NULL) { 329 ValuesFile_to_XLIFFFile(currentFile, xliff, resFile); 330 //printf("currentFile=[%s]\n", currentFile->ToString().c_str()); 331 } else { 332 fprintf(stderr, "error reading file %s@%s\n", resFile.c_str(), 333 setting.currentVersion.c_str()); 334 success = false; 335 } 336 337 // old file 338 print_file_status(++fileProgress, totalFileCount); 339 ValuesFile* oldFile = get_values_file(resFile, english, OLD_VERSION, 340 setting.oldVersion, false); 341 if (oldFile != NULL) { 342 ValuesFile_to_XLIFFFile(oldFile, xliff, resFile); 343 //printf("oldFile=[%s]\n", oldFile->ToString().c_str()); 344 } 345 346 // translated version 347 // (get the head of the tree for the most recent translation, but it's considered 348 // the old one because the "current" one hasn't been made yet, and this goes into 349 // the <alt-trans> tag if necessary 350 print_file_status(++fileProgress, totalFileCount); 351 string transFilename = translated_file_name(resFile, targetLocale); 352 ValuesFile* transFile = get_values_file(transFilename, translated, OLD_VERSION, 353 setting.currentVersion, false); 354 if (transFile != NULL) { 355 ValuesFile_to_XLIFFFile(transFile, xliff, resFile); 356 } 357 358 delete currentFile; 359 delete oldFile; 360 delete transFile; 361 } 362 363 Stats beforeFilterStats = xliff->GetStats(config); 364 365 // run through the XLIFFFile and strip out TransUnits that have identical 366 // old and current source values and are not in the reject list, or just 367 // old values and no source values 368 xliff->Filter(keep_this_trans_unit, (void*)&setting); 369 370 Stats afterFilterStats = xliff->GetStats(config); 371 afterFilterStats.totalStrings = beforeFilterStats.totalStrings; 372 373 // add the reject comments 374 for (vector<Reject>::const_iterator reject = setting.reject.begin(); 375 reject != setting.reject.end(); reject++) { 376 TransUnit* tu = xliff->EditTransUnit(reject->file, reject->name); 377 tu->rejectComment = reject->comment; 378 } 379 380 // config-locale-current_cl.xliff 381 stringstream filename; 382 if (outDir != "") { 383 filename << outDir << '/'; 384 } 385 filename << config << '-' << targetLocale << '-' << setting.currentVersion << ".xliff"; 386 xliffs.push_back(pair<string,XLIFFFile*>(filename.str(), xliff)); 387 388 stats.push_back(afterFilterStats); 389 } 390 391 // today is a good day to die 392 if (!success || SourcePos::HasErrors()) { 393 return 1; 394 } 395 396 // write the XLIFF files 397 printf("\nWriting %zd file%s...\n", xliffs.size(), xliffs.size() == 1 ? "" : "s"); 398 for (vector<pair<string,XLIFFFile*> >::iterator it = xliffs.begin(); it != xliffs.end(); it++) { 399 const string& filename = it->first; 400 XLIFFFile* xliff = it->second; 401 string text = xliff->ToString(); 402 write_to_file(filename, text); 403 } 404 405 // the stats 406 printf("\n" 407 " to without total\n" 408 " config files translate comments strings\n" 409 "-----------------------------------------------------------------------\n"); 410 Stats totals; 411 totals.config = "total"; 412 totals.files = 0; 413 totals.toBeTranslated = 0; 414 totals.noComments = 0; 415 totals.totalStrings = 0; 416 for (vector<Stats>::iterator it=stats.begin(); it!=stats.end(); it++) { 417 string cfg = it->config; 418 if (cfg.length() > 20) { 419 cfg.resize(20); 420 } 421 printf(" %-20s %-9zd %-9zd %-9zd %-19zd\n", cfg.c_str(), it->files, 422 it->toBeTranslated, it->noComments, it->totalStrings); 423 totals.files += it->files; 424 totals.toBeTranslated += it->toBeTranslated; 425 totals.noComments += it->noComments; 426 totals.totalStrings += it->totalStrings; 427 } 428 if (stats.size() > 1) { 429 printf("-----------------------------------------------------------------------\n" 430 " %-20s %-9zd %-9zd %-9zd %-19zd\n", totals.config.c_str(), totals.files, 431 totals.toBeTranslated, totals.noComments, totals.totalStrings); 432 } 433 printf("\n"); 434 return 0; 435} 436 437struct PseudolocalizeSettings { 438 XLIFFFile* xliff; 439 bool expand; 440}; 441 442 443string 444pseudolocalize_string(const string& source, const PseudolocalizeSettings* settings) 445{ 446 return pseudolocalize_string(source); 447} 448 449static XMLNode* 450pseudolocalize_xml_node(const XMLNode* source, const PseudolocalizeSettings* settings) 451{ 452 if (source->Type() == XMLNode::TEXT) { 453 return XMLNode::NewText(source->Position(), pseudolocalize_string(source->Text(), settings), 454 source->Pretty()); 455 } else { 456 XMLNode* target; 457 if (source->Namespace() == XLIFF_XMLNS && source->Name() == "g") { 458 // XXX don't translate these 459 target = XMLNode::NewElement(source->Position(), source->Namespace(), 460 source->Name(), source->Attributes(), source->Pretty()); 461 } else { 462 target = XMLNode::NewElement(source->Position(), source->Namespace(), 463 source->Name(), source->Attributes(), source->Pretty()); 464 } 465 466 const vector<XMLNode*>& children = source->Children(); 467 const size_t I = children.size(); 468 for (size_t i=0; i<I; i++) { 469 target->EditChildren().push_back(pseudolocalize_xml_node(children[i], settings)); 470 } 471 472 return target; 473 } 474} 475 476void 477pseudolocalize_trans_unit(const string&file, TransUnit* unit, void* cookie) 478{ 479 const PseudolocalizeSettings* settings = (PseudolocalizeSettings*)cookie; 480 481 const StringResource& source = unit->source; 482 StringResource* target = &unit->target; 483 *target = source; 484 485 target->config = settings->xliff->TargetConfig(); 486 487 delete target->value; 488 target->value = pseudolocalize_xml_node(source.value, settings); 489} 490 491int 492pseudolocalize_xliff(XLIFFFile* xliff, bool expand) 493{ 494 PseudolocalizeSettings settings; 495 496 settings.xliff = xliff; 497 settings.expand = expand; 498 xliff->Map(pseudolocalize_trans_unit, &settings); 499 return 0; 500} 501 502static int 503do_pseudo(const string& infile, const string& outfile, bool expand) 504{ 505 int err; 506 507 XLIFFFile* xliff = XLIFFFile::Parse(infile); 508 if (xliff == NULL) { 509 return 1; 510 } 511 512 pseudolocalize_xliff(xliff, expand); 513 514 err = write_to_file(outfile, xliff->ToString()); 515 516 delete xliff; 517 518 return err; 519} 520 521void 522log_printf(const char *fmt, ...) 523{ 524 int ret; 525 va_list ap; 526 527 if (g_logFile != NULL) { 528 va_start(ap, fmt); 529 ret = vfprintf(g_logFile, fmt, ap); 530 va_end(ap); 531 fflush(g_logFile); 532 } 533} 534 535void 536close_log_file() 537{ 538 if (g_logFile != NULL) { 539 fclose(g_logFile); 540 } 541} 542 543void 544open_log_file(const char* file) 545{ 546 g_logFile = fopen(file, "w"); 547 printf("log file: %s -- %p\n", file, g_logFile); 548 atexit(close_log_file); 549} 550 551static int 552usage() 553{ 554 fprintf(stderr, 555 "usage: localize export OPTIONS CONFIGS...\n" 556 " REQUIRED OPTIONS\n" 557 " --settings SETTINGS The settings file to use. See CONFIGS below.\n" 558 " --root TREE_ROOT The location in Perforce of the files. e.g. //device\n" 559 " --target LOCALE The target locale. See LOCALES below.\n" 560 "\n" 561 " OPTIONAL OPTIONS\n" 562 " --out DIR Directory to put the output files. Defaults to the\n" 563 " current directory if not supplied. Files are\n" 564 " named as follows:\n" 565 " CONFIG-LOCALE-CURRENT_CL.xliff\n" 566 "\n" 567 "\n" 568 "usage: localize import XLIFF_FILE...\n" 569 "\n" 570 "Import a translated XLIFF file back into the tree.\n" 571 "\n" 572 "\n" 573 "usage: localize xlb XMB_FILE VALUES_FILES...\n" 574 "\n" 575 "Read resource files from the tree file and write the corresponding XLB file\n" 576 "\n" 577 "Supply all of the android resource files (values files) to export after that.\n" 578 "\n" 579 "\n" 580 "\n" 581 "CONFIGS\n" 582 "\n" 583 "LOCALES\n" 584 "Locales are specified in the form en_US They will be processed correctly\n" 585 "to locate the resouce files in the tree.\n" 586 "\n" 587 "\n" 588 "usage: localize pseudo OPTIONS INFILE [OUTFILE]\n" 589 " OPTIONAL OPTIONS\n" 590 " --big Pad strings so they get longer.\n" 591 "\n" 592 "Read INFILE, an XLIFF file, and output a pseudotranslated version of that file. If\n" 593 "OUTFILE is specified, the results are written there; otherwise, the results are\n" 594 "written back to INFILE.\n" 595 "\n" 596 "\n" 597 "usage: localize rescheck FILES...\n" 598 "\n" 599 "Reads the base strings and prints warnings about bad resources from the given files.\n" 600 "\n"); 601 return 1; 602} 603 604int 605main(int argc, const char** argv) 606{ 607 //open_log_file("log.txt"); 608 //g_logFile = stdout; 609 610 if (argc == 2 && 0 == strcmp(argv[1], "--test")) { 611 return test(); 612 } 613 614 if (argc < 2) { 615 return usage(); 616 } 617 618 int index = 1; 619 620 if (0 == strcmp("export", argv[index])) { 621 string settingsFile; 622 string rootDir; 623 string outDir; 624 string baseLocale = "en"; 625 string targetLocale; 626 string language, region; 627 vector<string> configs; 628 629 index++; 630 while (index < argc) { 631 if (0 == strcmp("--settings", argv[index])) { 632 settingsFile = argv[index+1]; 633 index += 2; 634 } 635 else if (0 == strcmp("--root", argv[index])) { 636 rootDir = argv[index+1]; 637 index += 2; 638 } 639 else if (0 == strcmp("--out", argv[index])) { 640 outDir = argv[index+1]; 641 index += 2; 642 } 643 else if (0 == strcmp("--target", argv[index])) { 644 targetLocale = argv[index+1]; 645 index += 2; 646 } 647 else if (argv[index][0] == '-') { 648 fprintf(stderr, "unknown argument %s\n", argv[index]); 649 return usage(); 650 } 651 else { 652 break; 653 } 654 } 655 for (; index<argc; index++) { 656 configs.push_back(argv[index]); 657 } 658 659 if (settingsFile == "" || rootDir == "" || configs.size() == 0 || targetLocale == "") { 660 return usage(); 661 } 662 if (!split_locale(targetLocale, &language, ®ion)) { 663 fprintf(stderr, "illegal --target locale: '%s'\n", targetLocale.c_str()); 664 return usage(); 665 } 666 667 668 return do_export(settingsFile, rootDir, outDir, targetLocale, configs); 669 } 670 else if (0 == strcmp("import", argv[index])) { 671 vector<string> xliffFilenames; 672 673 index++; 674 for (; index<argc; index++) { 675 xliffFilenames.push_back(argv[index]); 676 } 677 678 return do_merge(xliffFilenames); 679 } 680 else if (0 == strcmp("xlb", argv[index])) { 681 string outfile; 682 vector<string> resFiles; 683 684 index++; 685 if (argc < index+1) { 686 return usage(); 687 } 688 689 outfile = argv[index]; 690 691 index++; 692 for (; index<argc; index++) { 693 resFiles.push_back(argv[index]); 694 } 695 696 return do_xlb_export(outfile, resFiles); 697 } 698 else if (0 == strcmp("pseudo", argv[index])) { 699 string infile; 700 string outfile; 701 bool big = false; 702 703 index++; 704 while (index < argc) { 705 if (0 == strcmp("--big", argv[index])) { 706 big = true; 707 index += 1; 708 } 709 else if (argv[index][0] == '-') { 710 fprintf(stderr, "unknown argument %s\n", argv[index]); 711 return usage(); 712 } 713 else { 714 break; 715 } 716 } 717 718 if (index == argc-1) { 719 infile = argv[index]; 720 outfile = argv[index]; 721 } 722 else if (index == argc-2) { 723 infile = argv[index]; 724 outfile = argv[index+1]; 725 } 726 else { 727 fprintf(stderr, "unknown argument %s\n", argv[index]); 728 return usage(); 729 } 730 731 return do_pseudo(infile, outfile, big); 732 } 733 else if (0 == strcmp("rescheck", argv[index])) { 734 vector<string> files; 735 736 index++; 737 while (index < argc) { 738 if (argv[index][0] == '-') { 739 fprintf(stderr, "unknown argument %s\n", argv[index]); 740 return usage(); 741 } 742 else { 743 break; 744 } 745 } 746 for (; index<argc; index++) { 747 files.push_back(argv[index]); 748 } 749 750 if (files.size() == 0) { 751 return usage(); 752 } 753 754 return do_rescheck(files); 755 } 756 else { 757 return usage(); 758 } 759 760 if (SourcePos::HasErrors()) { 761 SourcePos::PrintErrors(stderr); 762 return 1; 763 } 764 765 return 0; 766} 767 768