1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// This command-line program converts an effective-TLD data file in UTF-8 from 6// the format provided by Mozilla to the format expected by Chrome. This 7// program generates an intermediate file which is then used by gperf to 8// generate a perfect hash map. The benefit of this approach is that no time is 9// spent on program initialization to generate the map of this data. 10// 11// Running this program finds "effective_tld_names.dat" in the expected location 12// in the source checkout and generates "effective_tld_names.gperf" next to it. 13// 14// Any errors or warnings from this program are recorded in tld_cleanup.log. 15// 16// In particular, it 17// * Strips blank lines and comments, as well as notes for individual rules. 18// * Strips a single leading and/or trailing dot from each rule, if present. 19// * Logs a warning if a rule contains '!' or '*.' other than at the beginning 20// of the rule. (This also catches multiple ! or *. at the start of a rule.) 21// * Logs a warning if GURL reports a rule as invalid, but keeps the rule. 22// * Canonicalizes each rule's domain by converting it to a GURL and back. 23// * Adds explicit rules for true TLDs found in any rule. 24// * Marks entries in the file between "// ===BEGIN PRIVATE DOMAINS===" 25// and "// ===END PRIVATE DOMAINS===" as private. 26 27#include "base/at_exit.h" 28#include "base/command_line.h" 29#include "base/files/file_path.h" 30#include "base/files/file_util.h" 31#include "base/i18n/icu_util.h" 32#include "base/logging.h" 33#include "base/path_service.h" 34#include "base/process/memory.h" 35#include "net/tools/tld_cleanup/tld_cleanup_util.h" 36 37int main(int argc, const char* argv[]) { 38 base::EnableTerminationOnHeapCorruption(); 39 if (argc != 1) { 40 fprintf(stderr, "Normalizes and verifies UTF-8 TLD data files\n"); 41 fprintf(stderr, "Usage: %s\n", argv[0]); 42 return 1; 43 } 44 45 // Manages the destruction of singletons. 46 base::AtExitManager exit_manager; 47 48 // Only use OutputDebugString in debug mode. 49#ifdef NDEBUG 50 logging::LoggingDestination destination = logging::LOG_TO_FILE; 51#else 52 logging::LoggingDestination destination = 53 logging::LOG_TO_ALL; 54#endif 55 56 base::CommandLine::Init(argc, argv); 57 58 base::FilePath log_filename; 59 PathService::Get(base::DIR_EXE, &log_filename); 60 log_filename = log_filename.AppendASCII("tld_cleanup.log"); 61 logging::LoggingSettings settings; 62 settings.logging_dest = destination; 63 settings.log_file = log_filename.value().c_str(); 64 settings.delete_old = logging::DELETE_OLD_LOG_FILE; 65 logging::InitLogging(settings); 66 67 base::i18n::InitializeICU(); 68 69 base::FilePath input_file; 70 PathService::Get(base::DIR_SOURCE_ROOT, &input_file); 71 input_file = input_file.Append(FILE_PATH_LITERAL("net")) 72 .Append(FILE_PATH_LITERAL("base")) 73 .Append(FILE_PATH_LITERAL( 74 "registry_controlled_domains")) 75 .Append(FILE_PATH_LITERAL("effective_tld_names.dat")); 76 base::FilePath output_file; 77 PathService::Get(base::DIR_SOURCE_ROOT, &output_file); 78 output_file = output_file.Append(FILE_PATH_LITERAL("net")) 79 .Append(FILE_PATH_LITERAL("base")) 80 .Append(FILE_PATH_LITERAL( 81 "registry_controlled_domains")) 82 .Append(FILE_PATH_LITERAL( 83 "effective_tld_names.gperf")); 84 net::tld_cleanup::NormalizeResult result = 85 net::tld_cleanup::NormalizeFile(input_file, output_file); 86 if (result != net::tld_cleanup::kSuccess) { 87 fprintf(stderr, 88 "Errors or warnings processing file. See log in tld_cleanup.log."); 89 } 90 91 if (result == net::tld_cleanup::kError) 92 return 1; 93 return 0; 94} 95