localize.cpp revision b798689749c64baba81f02e10cf2157c747d6b46
1#include "SourcePos.h"
2#include "ValuesFile.h"
3#include "XLIFFFile.h"
4#include "Perforce.h"
5#include "merge_res_and_xliff.h"
6#include "localize.h"
7#include "file_utils.h"
8#include "res_check.h"
9#include "xmb.h"
10
11#include <host/pseudolocalize.h>
12
13#include <stdlib.h>
14#include <stdarg.h>
15#include <sstream>
16#include <stdio.h>
17#include <string.h>
18
19using namespace std;
20
21FILE* g_logFile = NULL;
22
23int test();
24
25int
26read_settings(const string& filename, map<string,Settings>* result, const string& rootDir)
27{
28    XMLNode* root = NodeHandler::ParseFile(filename, XMLNode::PRETTY);
29    if (root == NULL) {
30        SourcePos(filename, -1).Error("Error reading file.");
31        return 1;
32    }
33
34    // <configuration>
35    vector<XMLNode*> configNodes = root->GetElementsByName("", "configuration");
36    const size_t I = configNodes.size();
37    for (size_t i=0; i<I; i++) {
38        const XMLNode* configNode = configNodes[i];
39
40        Settings settings;
41        settings.id = configNode->GetAttribute("", "id", "");
42        if (settings.id == "") {
43            configNode->Position().Error("<configuration> needs an id attribute.");
44            delete root;
45            return 1;
46        }
47
48        settings.oldVersion = configNode->GetAttribute("", "old-cl", "");
49
50        settings.currentVersion = configNode->GetAttribute("", "new-cl", "");
51        if (settings.currentVersion == "") {
52            configNode->Position().Error("<configuration> needs a new-cl attribute.");
53            delete root;
54            return 1;
55        }
56
57        // <app>
58        vector<XMLNode*> appNodes = configNode->GetElementsByName("", "app");
59
60        const size_t J = appNodes.size();
61        for (size_t j=0; j<J; j++) {
62            const XMLNode* appNode = appNodes[j];
63
64            string dir = appNode->GetAttribute("", "dir", "");
65            if (dir == "") {
66                appNode->Position().Error("<app> needs a dir attribute.");
67                delete root;
68                return 1;
69            }
70
71            settings.apps.push_back(dir);
72        }
73
74        // <reject>
75        vector<XMLNode*> rejectNodes = configNode->GetElementsByName("", "reject");
76
77        const size_t K = rejectNodes.size();
78        for (size_t k=0; k<K; k++) {
79            const XMLNode* rejectNode = rejectNodes[k];
80
81            Reject reject;
82
83            reject.file = rejectNode->GetAttribute("", "file", "");
84            if (reject.file == "") {
85                rejectNode->Position().Error("<reject> needs a file attribute.");
86                delete root;
87                return 1;
88            }
89            string f =  reject.file;
90            reject.file = rootDir;
91            reject.file += '/';
92            reject.file += f;
93
94            reject.name = rejectNode->GetAttribute("", "name", "");
95            if (reject.name == "") {
96                rejectNode->Position().Error("<reject> needs a name attribute.");
97                delete root;
98                return 1;
99            }
100
101            reject.comment = trim_string(rejectNode->CollapseTextContents());
102
103            settings.reject.push_back(reject);
104        }
105
106        (*result)[settings.id] = settings;
107    }
108
109    delete root;
110    return 0;
111}
112
113
114static void
115ValuesFile_to_XLIFFFile(const ValuesFile* values, XLIFFFile* xliff, const string& englishFilename)
116{
117    const set<StringResource>& strings = values->GetStrings();
118    for (set<StringResource>::const_iterator it=strings.begin(); it!=strings.end(); it++) {
119        StringResource res = *it;
120        res.file = englishFilename;
121        xliff->AddStringResource(res);
122    }
123}
124
125static bool
126contains_reject(const Settings& settings, const string& file, const TransUnit& tu)
127{
128    const string name = tu.id;
129    const vector<Reject>& reject = settings.reject;
130    const size_t I = reject.size();
131    for (size_t i=0; i<I; i++) {
132        const Reject& r = reject[i];
133        if (r.file == file && r.name == name) {
134            return true;
135        }
136    }
137    return false;
138}
139
140/**
141 * If it's been rejected, then we keep whatever info we have.
142 *
143 * Implements this truth table:
144 *
145 *    S   AT   AS     Keep
146 *   -----------------------
147 *    0    0    0      0    (this case can't happen)
148 *    0    0    1      0    (it was there, never translated, and removed)
149 *    0    1    0      0    (somehow it got translated, but it was removed)
150 *    0    1    1      0    (it was removed after having been translated)
151 *
152 *    1    0    0      1    (it was just added)
153 *    1    0    1      1    (it was added, has been changed, but it never got translated)
154 *    1    1    0      1    (somehow it got translated, but we don't know based on what)
155 *    1    1    1     0/1   (it's in both.  0 if S=AS b/c there's no need to retranslate if they're
156 *                           the same.  1 if S!=AS because S changed, so it should be retranslated)
157 *
158 * The first four are cases where, whatever happened in the past, the string isn't there
159 * now, so it shouldn't be in the XLIFF file.
160 *
161 * For cases 4 and 5, the string has never been translated, so get it translated.
162 *
163 * For case 6, it's unclear where the translated version came from, so we're conservative
164 * and send it back for them to have another shot at.
165 *
166 * For case 7, we have some data.  We have two choices.  We could rely on the translator's
167 * translation memory or tools to notice that the strings haven't changed, and populate the
168 * <target> field themselves.  Or if the string hasn't changed since last time, we can just
169 * not even tell them about it.  As the project nears the end, it will be convenient to see
170 * the xliff files reducing in size, so we pick the latter.  Obviously, if the string has
171 * changed, then we need to get it retranslated.
172 */
173bool
174keep_this_trans_unit(const string& file, const TransUnit& unit, void* cookie)
175{
176    const Settings* settings = reinterpret_cast<const Settings*>(cookie);
177
178    if (contains_reject(*settings, file, unit)) {
179        return true;
180    }
181
182    if (unit.source.id == "") {
183        return false;
184    }
185    if (unit.altTarget.id == "" || unit.altSource.id == "") {
186        return true;
187    }
188    return unit.source.value->ContentsToString(XLIFF_NAMESPACES)
189            != unit.altSource.value->ContentsToString(XLIFF_NAMESPACES);
190}
191
192int
193validate_config(const string& settingsFile, const map<string,Settings>& settings,
194        const string& config)
195{
196    if (settings.find(config) == settings.end()) {
197        SourcePos(settingsFile, -1).Error("settings file does not contain setting: %s\n",
198                config.c_str());
199        return 1;
200    }
201    return 0;
202}
203
204int
205validate_configs(const string& settingsFile, const map<string,Settings>& settings,
206        const vector<string>& configs)
207{
208    int err = 0;
209    for (size_t i=0; i<configs.size(); i++) {
210        string config = configs[i];
211        err |= validate_config(settingsFile, settings, config);
212    }
213    return err;
214}
215
216int
217select_files(vector<string> *resFiles, const string& config,
218        const map<string,Settings>& settings, const string& rootDir)
219{
220    int err;
221    vector<vector<string> > allResFiles;
222    vector<string> configs;
223    configs.push_back(config);
224    err = select_files(&allResFiles, configs, settings, rootDir);
225    if (err == 0) {
226        *resFiles = allResFiles[0];
227    }
228    return err;
229}
230
231int
232select_files(vector<vector<string> > *allResFiles, const vector<string>& configs,
233        const map<string,Settings>& settings, const string& rootDir)
234{
235    int err;
236    printf("Selecting files...");
237    fflush(stdout);
238
239    for (size_t i=0; i<configs.size(); i++) {
240        const string& config = configs[i];
241        const Settings& setting = settings.find(config)->second;
242
243        vector<string> resFiles;
244        err = Perforce::GetResourceFileNames(setting.currentVersion, rootDir,
245                                                setting.apps, &resFiles, true);
246        if (err != 0) {
247            fprintf(stderr, "error with perforce.  bailing\n");
248            return err;
249        }
250
251        allResFiles->push_back(resFiles);
252    }
253    return 0;
254}
255
256static int
257do_export(const string& settingsFile, const string& rootDir, const string& outDir,
258            const string& targetLocale, const vector<string>& configs)
259{
260    bool success = true;
261    int err;
262
263    if (false) {
264        printf("settingsFile=%s\n", settingsFile.c_str());
265        printf("rootDir=%s\n", rootDir.c_str());
266        printf("outDir=%s\n", outDir.c_str());
267        for (size_t i=0; i<configs.size(); i++) {
268            printf("config[%zd]=%s\n", i, configs[i].c_str());
269        }
270    }
271
272    map<string,Settings> settings;
273    err = read_settings(settingsFile, &settings, rootDir);
274    if (err != 0) {
275        return err;
276    }
277
278    err = validate_configs(settingsFile, settings, configs);
279    if (err != 0) {
280        return err;
281    }
282
283    vector<vector<string> > allResFiles;
284    err = select_files(&allResFiles, configs, settings, rootDir);
285    if (err != 0) {
286        return err;
287    }
288
289    size_t totalFileCount = 0;
290    for (size_t i=0; i<allResFiles.size(); i++) {
291        totalFileCount += allResFiles[i].size();
292    }
293    totalFileCount *= 3; // we try all 3 versions of the file
294
295    size_t fileProgress = 0;
296    vector<Stats> stats;
297    vector<pair<string,XLIFFFile*> > xliffs;
298
299    for (size_t i=0; i<configs.size(); i++) {
300        const string& config = configs[i];
301        const Settings& setting = settings[config];
302
303        if (false) {
304            fprintf(stderr, "Configuration: %s (%zd of %zd)\n", config.c_str(), i+1,
305                    configs.size());
306            fprintf(stderr, "  Old CL:     %s\n", setting.oldVersion.c_str());
307            fprintf(stderr, "  Current CL: %s\n", setting.currentVersion.c_str());
308        }
309
310        Configuration english;
311            english.locale = "en_US";
312        Configuration translated;
313            translated.locale = targetLocale;
314        XLIFFFile* xliff = XLIFFFile::Create(english, translated, setting.currentVersion);
315
316        const vector<string>& resFiles = allResFiles[i];
317        const size_t J = resFiles.size();
318        for (size_t j=0; j<J; j++) {
319            string resFile = resFiles[j];
320
321            // parse the files into a ValuesFile
322            // pull out the strings and add them to the XLIFFFile
323
324            // current file
325            print_file_status(++fileProgress, totalFileCount);
326            ValuesFile* currentFile = get_values_file(resFile, english, CURRENT_VERSION,
327                                                        setting.currentVersion, true);
328            if (currentFile != NULL) {
329                ValuesFile_to_XLIFFFile(currentFile, xliff, resFile);
330                //printf("currentFile=[%s]\n", currentFile->ToString().c_str());
331            } else {
332                fprintf(stderr, "error reading file %s@%s\n", resFile.c_str(),
333                            setting.currentVersion.c_str());
334                success = false;
335            }
336
337            // old file
338            print_file_status(++fileProgress, totalFileCount);
339            ValuesFile* oldFile = get_values_file(resFile, english, OLD_VERSION,
340                                                        setting.oldVersion, false);
341            if (oldFile != NULL) {
342                ValuesFile_to_XLIFFFile(oldFile, xliff, resFile);
343                //printf("oldFile=[%s]\n", oldFile->ToString().c_str());
344            }
345
346            // translated version
347            // (get the head of the tree for the most recent translation, but it's considered
348            // the old one because the "current" one hasn't been made yet, and this goes into
349            // the <alt-trans> tag if necessary
350            print_file_status(++fileProgress, totalFileCount);
351            string transFilename = translated_file_name(resFile, targetLocale);
352            ValuesFile* transFile = get_values_file(transFilename, translated, OLD_VERSION,
353                                                        setting.currentVersion, false);
354            if (transFile != NULL) {
355                ValuesFile_to_XLIFFFile(transFile, xliff, resFile);
356            }
357
358            delete currentFile;
359            delete oldFile;
360            delete transFile;
361        }
362
363        Stats beforeFilterStats = xliff->GetStats(config);
364
365        // run through the XLIFFFile and strip out TransUnits that have identical
366        // old and current source values and are not in the reject list, or just
367        // old values and no source values
368        xliff->Filter(keep_this_trans_unit, (void*)&setting);
369
370        Stats afterFilterStats = xliff->GetStats(config);
371        afterFilterStats.totalStrings = beforeFilterStats.totalStrings;
372
373        // add the reject comments
374        for (vector<Reject>::const_iterator reject = setting.reject.begin();
375                reject != setting.reject.end(); reject++) {
376            TransUnit* tu = xliff->EditTransUnit(reject->file, reject->name);
377            tu->rejectComment = reject->comment;
378        }
379
380        // config-locale-current_cl.xliff
381        stringstream filename;
382        if (outDir != "") {
383            filename << outDir << '/';
384        }
385        filename << config << '-' << targetLocale << '-' << setting.currentVersion << ".xliff";
386        xliffs.push_back(pair<string,XLIFFFile*>(filename.str(), xliff));
387
388        stats.push_back(afterFilterStats);
389    }
390
391    // today is a good day to die
392    if (!success || SourcePos::HasErrors()) {
393        return 1;
394    }
395
396    // write the XLIFF files
397    printf("\nWriting %zd file%s...\n", xliffs.size(), xliffs.size() == 1 ? "" : "s");
398    for (vector<pair<string,XLIFFFile*> >::iterator it = xliffs.begin(); it != xliffs.end(); it++) {
399        const string& filename = it->first;
400        XLIFFFile* xliff = it->second;
401        string text = xliff->ToString();
402        write_to_file(filename, text);
403    }
404
405    // the stats
406    printf("\n"
407           "                                  to          without     total\n"
408           " config               files       translate   comments    strings\n"
409           "-----------------------------------------------------------------------\n");
410    Stats totals;
411        totals.config = "total";
412        totals.files = 0;
413        totals.toBeTranslated = 0;
414        totals.noComments = 0;
415        totals.totalStrings = 0;
416    for (vector<Stats>::iterator it=stats.begin(); it!=stats.end(); it++) {
417        string cfg = it->config;
418        if (cfg.length() > 20) {
419            cfg.resize(20);
420        }
421        printf(" %-20s  %-9zd   %-9zd   %-9zd   %-19zd\n", cfg.c_str(), it->files,
422                it->toBeTranslated, it->noComments, it->totalStrings);
423        totals.files += it->files;
424        totals.toBeTranslated += it->toBeTranslated;
425        totals.noComments += it->noComments;
426        totals.totalStrings += it->totalStrings;
427    }
428    if (stats.size() > 1) {
429        printf("-----------------------------------------------------------------------\n"
430               " %-20s  %-9zd   %-9zd   %-9zd   %-19zd\n", totals.config.c_str(), totals.files,
431                    totals.toBeTranslated, totals.noComments, totals.totalStrings);
432    }
433    printf("\n");
434    return 0;
435}
436
437struct PseudolocalizeSettings {
438    XLIFFFile* xliff;
439    bool expand;
440};
441
442
443string
444pseudolocalize_string(const string& source, const PseudolocalizeSettings* settings)
445{
446    return pseudolocalize_string(source);
447}
448
449static XMLNode*
450pseudolocalize_xml_node(const XMLNode* source, const PseudolocalizeSettings* settings)
451{
452    if (source->Type() == XMLNode::TEXT) {
453        return XMLNode::NewText(source->Position(), pseudolocalize_string(source->Text(), settings),
454                                source->Pretty());
455    } else {
456        XMLNode* target;
457        if (source->Namespace() == XLIFF_XMLNS && source->Name() == "g") {
458            // XXX don't translate these
459            target = XMLNode::NewElement(source->Position(), source->Namespace(),
460                                    source->Name(), source->Attributes(), source->Pretty());
461        } else {
462            target = XMLNode::NewElement(source->Position(), source->Namespace(),
463                                    source->Name(), source->Attributes(), source->Pretty());
464        }
465
466        const vector<XMLNode*>& children = source->Children();
467        const size_t I = children.size();
468        for (size_t i=0; i<I; i++) {
469            target->EditChildren().push_back(pseudolocalize_xml_node(children[i], settings));
470        }
471
472        return target;
473    }
474}
475
476void
477pseudolocalize_trans_unit(const string&file, TransUnit* unit, void* cookie)
478{
479    const PseudolocalizeSettings* settings = (PseudolocalizeSettings*)cookie;
480
481    const StringResource& source = unit->source;
482    StringResource* target = &unit->target;
483    *target = source;
484
485    target->config = settings->xliff->TargetConfig();
486
487    delete target->value;
488    target->value = pseudolocalize_xml_node(source.value, settings);
489}
490
491int
492pseudolocalize_xliff(XLIFFFile* xliff, bool expand)
493{
494    PseudolocalizeSettings settings;
495
496    settings.xliff = xliff;
497    settings.expand = expand;
498    xliff->Map(pseudolocalize_trans_unit, &settings);
499    return 0;
500}
501
502static int
503do_pseudo(const string& infile, const string& outfile, bool expand)
504{
505    int err;
506
507    XLIFFFile* xliff = XLIFFFile::Parse(infile);
508    if (xliff == NULL) {
509        return 1;
510    }
511
512    pseudolocalize_xliff(xliff, expand);
513
514    err = write_to_file(outfile, xliff->ToString());
515
516    delete xliff;
517
518    return err;
519}
520
521void
522log_printf(const char *fmt, ...)
523{
524    int ret;
525    va_list ap;
526
527    if (g_logFile != NULL) {
528        va_start(ap, fmt);
529        ret = vfprintf(g_logFile, fmt, ap);
530        va_end(ap);
531        fflush(g_logFile);
532    }
533}
534
535void
536close_log_file()
537{
538    if (g_logFile != NULL) {
539        fclose(g_logFile);
540    }
541}
542
543void
544open_log_file(const char* file)
545{
546    g_logFile = fopen(file, "w");
547    printf("log file: %s -- %p\n", file, g_logFile);
548    atexit(close_log_file);
549}
550
551static int
552usage()
553{
554    fprintf(stderr,
555            "usage: localize export OPTIONS CONFIGS...\n"
556            "   REQUIRED OPTIONS\n"
557            "     --settings SETTINGS   The settings file to use.  See CONFIGS below.\n"
558            "     --root TREE_ROOT      The location in Perforce of the files.  e.g. //device\n"
559            "     --target LOCALE       The target locale.  See LOCALES below.\n"
560            "\n"
561            "   OPTIONAL OPTIONS\n"
562            "      --out DIR            Directory to put the output files.  Defaults to the\n"
563            "                           current directory if not supplied.  Files are\n"
564            "                           named as follows:\n"
565            "                               CONFIG-LOCALE-CURRENT_CL.xliff\n"
566            "\n"
567            "\n"
568            "usage: localize import XLIFF_FILE...\n"
569            "\n"
570            "Import a translated XLIFF file back into the tree.\n"
571            "\n"
572            "\n"
573            "usage: localize xlb XMB_FILE VALUES_FILES...\n"
574            "\n"
575            "Read resource files from the tree file and write the corresponding XLB file\n"
576            "\n"
577            "Supply all of the android resource files (values files) to export after that.\n"
578            "\n"
579            "\n"
580            "\n"
581            "CONFIGS\n"
582            "\n"
583            "LOCALES\n"
584            "Locales are specified in the form en_US  They will be processed correctly\n"
585            "to locate the resouce files in the tree.\n"
586            "\n"
587            "\n"
588            "usage: localize pseudo OPTIONS INFILE [OUTFILE]\n"
589            "   OPTIONAL OPTIONS\n"
590            "     --big                 Pad strings so they get longer.\n"
591            "\n"
592            "Read INFILE, an XLIFF file, and output a pseudotranslated version of that file.  If\n"
593            "OUTFILE is specified, the results are written there; otherwise, the results are\n"
594            "written back to INFILE.\n"
595            "\n"
596            "\n"
597            "usage: localize rescheck FILES...\n"
598            "\n"
599            "Reads the base strings and prints warnings about bad resources from the given files.\n"
600            "\n");
601    return 1;
602}
603
604int
605main(int argc, const char** argv)
606{
607    //open_log_file("log.txt");
608    //g_logFile = stdout;
609
610    if (argc == 2 && 0 == strcmp(argv[1], "--test")) {
611        return test();
612    }
613
614    if (argc < 2) {
615        return usage();
616    }
617
618    int index = 1;
619
620    if (0 == strcmp("export", argv[index])) {
621        string settingsFile;
622        string rootDir;
623        string outDir;
624        string baseLocale = "en";
625        string targetLocale;
626        string language, region;
627        vector<string> configs;
628
629        index++;
630        while (index < argc) {
631            if (0 == strcmp("--settings", argv[index])) {
632                settingsFile = argv[index+1];
633                index += 2;
634            }
635            else if (0 == strcmp("--root", argv[index])) {
636                rootDir = argv[index+1];
637                index += 2;
638            }
639            else if (0 == strcmp("--out", argv[index])) {
640                outDir = argv[index+1];
641                index += 2;
642            }
643            else if (0 == strcmp("--target", argv[index])) {
644                targetLocale = argv[index+1];
645                index += 2;
646            }
647            else if (argv[index][0] == '-') {
648                fprintf(stderr, "unknown argument %s\n", argv[index]);
649                return usage();
650            }
651            else {
652                break;
653            }
654        }
655        for (; index<argc; index++) {
656            configs.push_back(argv[index]);
657        }
658
659        if (settingsFile == "" || rootDir == "" || configs.size() == 0 || targetLocale == "") {
660            return usage();
661        }
662        if (!split_locale(targetLocale, &language, &region)) {
663            fprintf(stderr, "illegal --target locale: '%s'\n", targetLocale.c_str());
664            return usage();
665        }
666
667
668        return do_export(settingsFile, rootDir, outDir, targetLocale, configs);
669    }
670    else if (0 == strcmp("import", argv[index])) {
671        vector<string> xliffFilenames;
672
673        index++;
674        for (; index<argc; index++) {
675            xliffFilenames.push_back(argv[index]);
676        }
677
678        return do_merge(xliffFilenames);
679    }
680    else if (0 == strcmp("xlb", argv[index])) {
681        string outfile;
682        vector<string> resFiles;
683
684        index++;
685        if (argc < index+1) {
686            return usage();
687        }
688
689        outfile = argv[index];
690
691        index++;
692        for (; index<argc; index++) {
693            resFiles.push_back(argv[index]);
694        }
695
696        return do_xlb_export(outfile, resFiles);
697    }
698    else if (0 == strcmp("pseudo", argv[index])) {
699        string infile;
700        string outfile;
701        bool big = false;
702
703        index++;
704        while (index < argc) {
705            if (0 == strcmp("--big", argv[index])) {
706                big = true;
707                index += 1;
708            }
709            else if (argv[index][0] == '-') {
710                fprintf(stderr, "unknown argument %s\n", argv[index]);
711                return usage();
712            }
713            else {
714                break;
715            }
716        }
717
718        if (index == argc-1) {
719            infile = argv[index];
720            outfile = argv[index];
721        }
722        else if (index == argc-2) {
723            infile = argv[index];
724            outfile = argv[index+1];
725        }
726        else {
727            fprintf(stderr, "unknown argument %s\n", argv[index]);
728            return usage();
729        }
730
731        return do_pseudo(infile, outfile, big);
732    }
733    else if (0 == strcmp("rescheck", argv[index])) {
734        vector<string> files;
735
736        index++;
737        while (index < argc) {
738            if (argv[index][0] == '-') {
739                fprintf(stderr, "unknown argument %s\n", argv[index]);
740                return usage();
741            }
742            else {
743                break;
744            }
745        }
746        for (; index<argc; index++) {
747            files.push_back(argv[index]);
748        }
749
750        if (files.size() == 0) {
751            return usage();
752        }
753
754        return do_rescheck(files);
755    }
756    else {
757        return usage();
758    }
759
760    if (SourcePos::HasErrors()) {
761        SourcePos::PrintErrors(stderr);
762        return 1;
763    }
764
765    return 0;
766}
767
768