1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/dns/dns_hosts.h"
6
7#include "base/file_util.h"
8#include "base/logging.h"
9#include "base/metrics/histogram.h"
10#include "base/strings/string_util.h"
11#include "base/strings/string_tokenizer.h"
12
13using base::StringPiece;
14
15namespace net {
16
17// Parses the contents of a hosts file.  Returns one token (IP or hostname) at
18// a time.  Doesn't copy anything; accepts the file as a StringPiece and
19// returns tokens as StringPieces.
20class HostsParser {
21 public:
22  explicit HostsParser(const StringPiece& text)
23      : text_(text),
24        data_(text.data()),
25        end_(text.size()),
26        pos_(0),
27        token_(),
28        token_is_ip_(false) {}
29
30  // Advances to the next token (IP or hostname).  Returns whether another
31  // token was available.  |token_is_ip| and |token| can be used to find out
32  // the type and text of the token.
33  bool Advance() {
34    bool next_is_ip = (pos_ == 0);
35    while (pos_ < end_ && pos_ != std::string::npos) {
36      switch (text_[pos_]) {
37        case ' ':
38        case '\t':
39          SkipWhitespace();
40          break;
41
42        case '\r':
43        case '\n':
44          next_is_ip = true;
45          pos_++;
46          break;
47
48        case '#':
49          SkipRestOfLine();
50          break;
51
52        default: {
53          size_t token_start = pos_;
54          SkipToken();
55          size_t token_end = (pos_ == std::string::npos) ? end_ : pos_;
56
57          token_ = StringPiece(data_ + token_start, token_end - token_start);
58          token_is_ip_ = next_is_ip;
59
60          return true;
61        }
62      }
63    }
64
65    text_ = StringPiece();
66    return false;
67  }
68
69  // Fast-forwards the parser to the next line.  Should be called if an IP
70  // address doesn't parse, to avoid wasting time tokenizing hostnames that
71  // will be ignored.
72  void SkipRestOfLine() {
73    pos_ = text_.find("\n", pos_);
74  }
75
76  // Returns whether the last-parsed token is an IP address (true) or a
77  // hostname (false).
78  bool token_is_ip() { return token_is_ip_; }
79
80  // Returns the text of the last-parsed token as a StringPiece referencing
81  // the same underlying memory as the StringPiece passed to the constructor.
82  // Returns an empty StringPiece if no token has been parsed or the end of
83  // the input string has been reached.
84  const StringPiece& token() { return token_; }
85
86 private:
87  void SkipToken() {
88    pos_ = text_.find_first_of(" \t\n\r#", pos_);
89  }
90
91  void SkipWhitespace() {
92    pos_ = text_.find_first_not_of(" \t", pos_);
93  }
94
95  StringPiece text_;
96  const char* data_;
97  const size_t end_;
98
99  size_t pos_;
100  StringPiece token_;
101  bool token_is_ip_;
102
103  DISALLOW_COPY_AND_ASSIGN(HostsParser);
104};
105
106
107
108void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) {
109  CHECK(dns_hosts);
110  DnsHosts& hosts = *dns_hosts;
111
112  StringPiece ip_text;
113  IPAddressNumber ip;
114  AddressFamily family = ADDRESS_FAMILY_IPV4;
115  HostsParser parser(contents);
116  while (parser.Advance()) {
117    if (parser.token_is_ip()) {
118      StringPiece new_ip_text = parser.token();
119      // Some ad-blocking hosts files contain thousands of entries pointing to
120      // the same IP address (usually 127.0.0.1).  Don't bother parsing the IP
121      // again if it's the same as the one above it.
122      if (new_ip_text != ip_text) {
123        IPAddressNumber new_ip;
124        if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) {
125          ip_text = new_ip_text;
126          ip.swap(new_ip);
127          family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6;
128        } else {
129          parser.SkipRestOfLine();
130        }
131      }
132    } else {
133      DnsHostsKey key(parser.token().as_string(), family);
134      StringToLowerASCII(&key.first);
135      IPAddressNumber& mapped_ip = hosts[key];
136      if (mapped_ip.empty())
137        mapped_ip = ip;
138      // else ignore this entry (first hit counts)
139    }
140  }
141}
142
143bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) {
144  dns_hosts->clear();
145  // Missing file indicates empty HOSTS.
146  if (!base::PathExists(path))
147    return true;
148
149  int64 size;
150  if (!file_util::GetFileSize(path, &size))
151    return false;
152
153  UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size);
154
155  // Reject HOSTS files larger than |kMaxHostsSize| bytes.
156  const int64 kMaxHostsSize = 1 << 25;  // 32MB
157  if (size > kMaxHostsSize)
158    return false;
159
160  std::string contents;
161  if (!file_util::ReadFileToString(path, &contents))
162    return false;
163
164  ParseHosts(contents, dns_hosts);
165  return true;
166}
167
168}  // namespace net
169
170