1a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)// Copyright 2013 The Chromium Authors. All rights reserved. 2a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 3a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)// found in the LICENSE file. 4a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 5a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)#include "net/tools/tld_cleanup/tld_cleanup_util.h" 6a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 71320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "base/files/file_util.h" 8a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)#include "base/logging.h" 97d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)#include "base/strings/string_number_conversions.h" 10868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string_util.h" 117dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#include "url/gurl.h" 127dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#include "url/url_parse.h" 13a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 14a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)namespace { 15a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 16a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)const char kBeginPrivateDomainsComment[] = "// ===BEGIN PRIVATE DOMAINS==="; 17a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)const char kEndPrivateDomainsComment[] = "// ===END PRIVATE DOMAINS==="; 187d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) 197d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)const int kExceptionRule = 1; 207d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)const int kWildcardRule = 2; 217d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)const int kPrivateRule = 4; 22a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)} 23a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 24a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)namespace net { 25a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)namespace tld_cleanup { 26a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 27a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)// Writes the list of domain rules contained in the 'rules' set to the 28a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)// 'outfile', with each rule terminated by a LF. The file must already have 29a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)// been created with write access. 30a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)bool WriteRules(const RuleMap& rules, const base::FilePath& outfile) { 31a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) std::string data; 327dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch data.append("%{\n" 337dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch "// Copyright 2012 The Chromium Authors. All rights reserved.\n" 347dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch "// Use of this source code is governed by a BSD-style license " 357dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch "that can be\n" 367dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch "// found in the LICENSE file.\n\n" 377dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch "// This file is generated by net/tools/tld_cleanup/.\n" 387dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch "// DO NOT MANUALLY EDIT!\n" 397dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch "%}\n" 407dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch "struct DomainRule {\n" 417dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch " int name_offset;\n" 427dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch " int type; // flags: 1: exception, 2: wildcard, 4: private\n" 437dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch "};\n" 447dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch "%%\n"); 45a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 46a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) for (RuleMap::const_iterator i = rules.begin(); i != rules.end(); ++i) { 47a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) data.append(i->first); 48a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) data.append(", "); 497d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) int type = 0; 50a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (i->second.exception) { 517d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) type = kExceptionRule; 52a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } else if (i->second.wildcard) { 537d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) type = kWildcardRule; 54a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 55a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (i->second.is_private) { 567d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) type += kPrivateRule; 57a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 587d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) data.append(base::IntToString(type)); 59a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) data.append("\n"); 60a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 61a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 62a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) data.append("%%\n"); 63a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 64a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) int written = base::WriteFile(outfile, 65a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) data.data(), 66a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) static_cast<int>(data.size())); 67a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 68a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) return written == static_cast<int>(data.size()); 69a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)} 70a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 71a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)// Adjusts the rule to a standard form: removes single extraneous dots and 72a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)// canonicalizes it using GURL. Returns kSuccess if the rule is interpreted as 73a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)// valid; logs a warning and returns kWarning if it is probably invalid; and 74a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)// logs an error and returns kError if the rule is (almost) certainly invalid. 75a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)NormalizeResult NormalizeRule(std::string* domain, Rule* rule) { 76a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) NormalizeResult result = kSuccess; 77a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 78a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // Strip single leading and trailing dots. 79a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (domain->at(0) == '.') 80a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) domain->erase(0, 1); 81a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (domain->empty()) { 82a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) LOG(WARNING) << "Ignoring empty rule"; 83a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) return kWarning; 84a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 85a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (domain->at(domain->size() - 1) == '.') 86a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) domain->erase(domain->size() - 1, 1); 87a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (domain->empty()) { 88a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) LOG(WARNING) << "Ignoring empty rule"; 89a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) return kWarning; 90a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 91a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 92a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // Allow single leading '*.' or '!', saved here so it's not canonicalized. 93a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) size_t start_offset = 0; 94a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (domain->at(0) == '!') { 95a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) domain->erase(0, 1); 96a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) rule->exception = true; 97a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } else if (domain->find("*.") == 0) { 98a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) domain->erase(0, 2); 99a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) rule->wildcard = true; 100a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 101a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (domain->empty()) { 102a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) LOG(WARNING) << "Ignoring empty rule"; 103a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) return kWarning; 104a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 105a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 106a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // Warn about additional '*.' or '!'. 107a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (domain->find("*.", start_offset) != std::string::npos || 108a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) domain->find('!', start_offset) != std::string::npos) { 109a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) LOG(WARNING) << "Keeping probably invalid rule: " << *domain; 110a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) result = kWarning; 111a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 112a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 113a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // Make a GURL and normalize it, then get the host back out. 114a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) std::string url = "http://"; 115a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) url.append(*domain); 116a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) GURL gurl(url); 117a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) const std::string& spec = gurl.possibly_invalid_spec(); 1185c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu url::Component host = gurl.parsed_for_possibly_invalid_spec().host; 119a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (host.len < 0) { 120a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) LOG(ERROR) << "Ignoring rule that couldn't be normalized: " << *domain; 121a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) return kError; 122a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 123a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (!gurl.is_valid()) { 124a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) LOG(WARNING) << "Keeping rule that GURL says is invalid: " << *domain; 125a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) result = kWarning; 126a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 127a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) domain->assign(spec.substr(host.begin, host.len)); 128a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 129a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) return result; 130a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)} 131a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 132a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)NormalizeResult NormalizeDataToRuleMap(const std::string data, 133a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) RuleMap* rules) { 134a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) CHECK(rules); 135a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // We do a lot of string assignment during parsing, but simplicity is more 136a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // important than performance here. 137a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) std::string domain; 138a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) NormalizeResult result = kSuccess; 139a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) size_t line_start = 0; 140a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) size_t line_end = 0; 141a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) bool is_private = false; 142a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) RuleMap extra_rules; 143a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) int begin_private_length = arraysize(kBeginPrivateDomainsComment) - 1; 144a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) int end_private_length = arraysize(kEndPrivateDomainsComment) - 1; 145a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) while (line_start < data.size()) { 146a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (line_start + begin_private_length < data.size() && 147a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) !data.compare(line_start, begin_private_length, 148a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) kBeginPrivateDomainsComment)) { 149a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) is_private = true; 150a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) line_end = line_start + begin_private_length; 151a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } else if (line_start + end_private_length < data.size() && 152a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) !data.compare(line_start, end_private_length, 153a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) kEndPrivateDomainsComment)) { 154a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) is_private = false; 155a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) line_end = line_start + end_private_length; 156a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } else if (line_start + 1 < data.size() && 157a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) data[line_start] == '/' && 158a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) data[line_start + 1] == '/') { 159a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // Skip comments. 160a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) line_end = data.find_first_of("\r\n", line_start); 161a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (line_end == std::string::npos) 162a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) line_end = data.size(); 163a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } else { 164a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // Truncate at first whitespace. 165a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) line_end = data.find_first_of("\r\n \t", line_start); 166a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (line_end == std::string::npos) 167a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) line_end = data.size(); 168a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) domain.assign(data.data(), line_start, line_end - line_start); 169a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 170a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) Rule rule; 171a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) rule.wildcard = false; 172a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) rule.exception = false; 173a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) rule.is_private = is_private; 174a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) NormalizeResult new_result = NormalizeRule(&domain, &rule); 175a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (new_result != kError) { 176a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // Check the existing rules to make sure we don't have an exception and 177a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // wildcard for the same rule, or that the same domain is listed as both 178a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // private and not private. If we did, we'd have to update our 179a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // parsing code to handle this case. 1805d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) CHECK(rules->find(domain) == rules->end()) 1815d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) << "Duplicate rule found for " << domain; 182a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 183a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) (*rules)[domain] = rule; 184a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // Add true TLD for multi-level rules. We don't add them right now, in 185a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // case there's an exception or wild card that either exists or might be 186a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // added in a later iteration. In those cases, there's no need to add 187a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // it and it would just slow down parsing the data. 188a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) size_t tld_start = domain.find_last_of('.'); 189a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (tld_start != std::string::npos && tld_start + 1 < domain.size()) { 190a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) std::string extra_rule_domain = domain.substr(tld_start + 1); 191a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) RuleMap::const_iterator iter = extra_rules.find(extra_rule_domain); 192a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) Rule extra_rule; 193a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) extra_rule.exception = false; 194a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) extra_rule.wildcard = false; 195a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (iter == extra_rules.end()) { 196a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) extra_rule.is_private = is_private; 197a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } else { 198a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // A rule already exists, so we ensure that if any of the entries is 199a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // not private the result should be that the entry is not private. 200a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // An example is .au which is not listed as a real TLD, but only 201a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // lists second-level domains such as com.au. Subdomains of .au 202a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // (eg. blogspot.com.au) are also listed in the private section, 203a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // which is processed later, so this ensures that the real TLD 204a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // (eg. .au) is listed as public. 205a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) extra_rule.is_private = is_private && iter->second.is_private; 206a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 207a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) extra_rules[extra_rule_domain] = extra_rule; 208a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 209a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 210a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) result = std::max(result, new_result); 211a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 212a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 213a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // Find beginning of next non-empty line. 214a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) line_start = data.find_first_of("\r\n", line_end); 215a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (line_start == std::string::npos) 216a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) line_start = data.size(); 217a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) line_start = data.find_first_not_of("\r\n", line_start); 218a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (line_start == std::string::npos) 219a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) line_start = data.size(); 220a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 221a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 222a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) for (RuleMap::const_iterator iter = extra_rules.begin(); 223a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) iter != extra_rules.end(); 224a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) ++iter) { 225a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (rules->find(iter->first) == rules->end()) { 226a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) (*rules)[iter->first] = iter->second; 227a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 228a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 229a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 230a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) return result; 231a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)} 232a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 233a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)NormalizeResult NormalizeFile(const base::FilePath& in_filename, 234a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) const base::FilePath& out_filename) { 235a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) RuleMap rules; 236a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) std::string data; 23758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) if (!base::ReadFileToString(in_filename, &data)) { 238a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) LOG(ERROR) << "Unable to read file"; 239a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) // We return success since we've already reported the error. 240a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) return kSuccess; 241a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 242a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 243a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) NormalizeResult result = NormalizeDataToRuleMap(data, &rules); 244a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 245a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) if (!WriteRules(rules, out_filename)) { 246a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) LOG(ERROR) << "Error(s) writing output file"; 247a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) result = kError; 248a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) } 249a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 250a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) return result; 251a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)} 252a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 253a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles) 254a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)} // namespace tld_cleanup 255a93a17c8d99d686bd4a1511e5504e5e6cc9fcadfTorne (Richard Coles)} // namespace net 256