protocol_parser.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Parse the data returned from the SafeBrowsing v2.1 protocol response.
6
7#include "chrome/browser/safe_browsing/protocol_parser.h"
8
9#include "build/build_config.h"
10
11#if defined(OS_WIN)
12#include <Winsock2.h>
13#elif defined(OS_POSIX)
14#include <arpa/inet.h>
15#endif
16
17#include "base/format_macros.h"
18#include "base/logging.h"
19#include "base/string_util.h"
20
21namespace {
22// Helper function for quick scans of a line oriented protocol. Note that we use
23//   std::string::assign(const charT* s, size_type n)
24// to copy data into 'line'. This form of 'assign' does not call strlen on
25// 'input', which is binary data and is not NULL terminated. 'input' may also
26// contain valid NULL bytes in the payload, which a strlen based copy would
27// truncate.
28bool GetLine(const char* input, int input_len, std::string* line) {
29  const char* pos = input;
30  while (pos && (pos - input < input_len)) {
31    if (*pos == '\n') {
32      line->assign(input, pos - input);
33      return true;
34    }
35    ++pos;
36  }
37  return false;
38}
39}
40
41//------------------------------------------------------------------------------
42// SafeBrowsingParser implementation
43
44SafeBrowsingProtocolParser::SafeBrowsingProtocolParser() {
45}
46
47bool SafeBrowsingProtocolParser::ParseGetHash(
48    const char* chunk_data,
49    int chunk_len,
50    const std::string& key,
51    bool* re_key,
52    std::vector<SBFullHashResult>* full_hashes) {
53  full_hashes->clear();
54  int length = chunk_len;
55  const char* data = chunk_data;
56
57  int offset;
58  std::string line;
59  if (!key.empty()) {
60    if (!GetLine(data, length, &line))
61      return false;  // Error! Bad GetHash result.
62
63    if (line == "e:pleaserekey") {
64      *re_key = true;
65      return true;
66    }
67
68    offset = static_cast<int>(line.size()) + 1;
69    data += offset;
70    length -= offset;
71
72    if (!safe_browsing_util::VerifyMAC(key, line, data, length))
73      return false;
74  }
75
76  while (length > 0) {
77    if (!GetLine(data, length, &line))
78      return false;
79
80    offset = static_cast<int>(line.size()) + 1;
81    data += offset;
82    length -= offset;
83
84    std::vector<std::string> cmd_parts;
85    SplitString(line, ':', &cmd_parts);
86    if (cmd_parts.size() != 3)
87      return false;
88
89    SBFullHashResult full_hash;
90    full_hash.list_name = cmd_parts[0];
91    full_hash.add_chunk_id = atoi(cmd_parts[1].c_str());
92    int full_hash_len = atoi(cmd_parts[2].c_str());
93
94    // Ignore hash results from lists we don't recognize.
95    if (safe_browsing_util::GetListId(full_hash.list_name) < 0) {
96      data += full_hash_len;
97      length -= full_hash_len;
98      continue;
99    }
100
101    while (full_hash_len > 0) {
102      DCHECK(static_cast<size_t>(full_hash_len) >= sizeof(SBFullHash));
103      memcpy(&full_hash.hash, data, sizeof(SBFullHash));
104      full_hashes->push_back(full_hash);
105      data += sizeof(SBFullHash);
106      length -= sizeof(SBFullHash);
107      full_hash_len -= sizeof(SBFullHash);
108    }
109  }
110
111  return length == 0;
112}
113
114void SafeBrowsingProtocolParser::FormatGetHash(
115   const std::vector<SBPrefix>& prefixes, std::string* request) {
116  DCHECK(request);
117
118  // Format the request for GetHash.
119  request->append(StringPrintf("%" PRIuS ":%" PRIuS "\n",
120                               sizeof(SBPrefix),
121                               sizeof(SBPrefix) * prefixes.size()));
122  for (size_t i = 0; i < prefixes.size(); ++i) {
123    request->append(reinterpret_cast<const char*>(&prefixes[i]),
124                    sizeof(SBPrefix));
125  }
126}
127
128bool SafeBrowsingProtocolParser::ParseUpdate(
129    const char* chunk_data,
130    int chunk_len,
131    const std::string& key,
132    int* next_update_sec,
133    bool* re_key,
134    bool* reset,
135    std::vector<SBChunkDelete>* deletes,
136    std::vector<ChunkUrl>* chunk_urls) {
137  DCHECK(next_update_sec);
138  DCHECK(deletes);
139  DCHECK(chunk_urls);
140
141  int length = chunk_len;
142  const char* data = chunk_data;
143
144  // Populated below.
145  std::string list_name;
146
147  while (length > 0) {
148    std::string cmd_line;
149    if (!GetLine(data, length, &cmd_line))
150      return false;  // Error: bad list format!
151
152    std::vector<std::string> cmd_parts;
153    SplitString(cmd_line, ':', &cmd_parts);
154    if (cmd_parts.empty())
155      return false;
156    const std::string& command = cmd_parts[0];
157    if (cmd_parts.size() != 2 && command[0] != 'u')
158      return false;
159
160    const int consumed = static_cast<int>(cmd_line.size()) + 1;
161    data += consumed;
162    length -= consumed;
163    if (length < 0)
164      return false;  // Parsing error.
165
166    // Differentiate on the first character of the command (which is usually
167    // only one character, with the exception of the 'ad' and 'sd' commands).
168    switch (command[0]) {
169      case 'a':
170      case 's': {
171        // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
172        // have also parsed the list name before getting here, or the add-del
173        // or sub-del will have no context.
174        if (command.size() != 2 || command[1] != 'd' || list_name.empty())
175          return false;
176        SBChunkDelete chunk_delete;
177        chunk_delete.is_sub_del = command[0] == 's';
178        StringToRanges(cmd_parts[1], &chunk_delete.chunk_del);
179        chunk_delete.list_name = list_name;
180        deletes->push_back(chunk_delete);
181        break;
182      }
183
184      case 'e':
185        if (cmd_parts[1] != "pleaserekey")
186          return false;
187        *re_key = true;
188        break;
189
190      case 'i':
191        // The line providing the name of the list (i.e. 'goog-phish-shavar').
192        list_name = cmd_parts[1];
193        break;
194
195      case 'm':
196        // Verify that the MAC of the remainer of this chunk is what we expect.
197        if (!key.empty() &&
198            !safe_browsing_util::VerifyMAC(key, cmd_parts[1], data, length))
199          return false;
200        break;
201
202      case 'n':
203        // The line providing the next earliest time (in seconds) to re-query.
204        *next_update_sec = atoi(cmd_parts[1].c_str());
205        break;
206
207      case 'u': {
208        // The redirect command is of the form: u:<url>,<mac> where <url> can
209        // contain multiple colons, commas or any valid URL characters. We scan
210        // backwards in the string looking for the first ',' we encounter and
211        // assume that everything before that is the URL and everything after
212        // is the MAC (if the MAC was requested).
213        std::string mac;
214        std::string redirect_url(cmd_line, 2);  // Skip the initial "u:".
215        if (!key.empty()) {
216          std::string::size_type mac_pos = redirect_url.rfind(',');
217          if (mac_pos == std::string::npos)
218            return false;
219          mac = redirect_url.substr(mac_pos + 1);
220          redirect_url = redirect_url.substr(0, mac_pos);
221        }
222
223        ChunkUrl chunk_url;
224        chunk_url.url = redirect_url;
225        chunk_url.list_name = list_name;
226        if (!key.empty())
227          chunk_url.mac = mac;
228        chunk_urls->push_back(chunk_url);
229        break;
230      }
231
232      case 'r':
233        if (cmd_parts[1] != "pleasereset")
234          return false;
235        *reset = true;
236        break;
237
238      default:
239        // According to the spec, we ignore commands we don't understand.
240        break;
241    }
242  }
243
244  return true;
245}
246
247bool SafeBrowsingProtocolParser::ParseChunk(const char* data,
248                                            int length,
249                                            const std::string& key,
250                                            const std::string& mac,
251                                            bool* re_key,
252                                            SBChunkList* chunks) {
253  int remaining = length;
254  const char* chunk_data = data;
255
256  if (!key.empty() &&
257      !safe_browsing_util::VerifyMAC(key, mac, data, length)) {
258    return false;
259  }
260
261  while (remaining > 0) {
262    std::string cmd_line;
263    if (!GetLine(chunk_data, length, &cmd_line))
264      return false;  // Error: bad chunk format!
265
266    const int line_len = static_cast<int>(cmd_line.length()) + 1;
267    std::vector<std::string> cmd_parts;
268    SplitString(cmd_line, ':', &cmd_parts);
269
270    // Handle a possible re-key command.
271    if (cmd_parts.size() != 4) {
272      if (cmd_parts.size() == 2 &&
273          cmd_parts[0] == "e" &&
274          cmd_parts[1] == "pleaserekey") {
275        *re_key = true;
276        chunk_data += line_len;
277        remaining -= line_len;
278        continue;
279      }
280      return false;
281    }
282
283    // Process the chunk data.
284    const int chunk_number = atoi(cmd_parts[1].c_str());
285    const int hash_len = atoi(cmd_parts[2].c_str());
286    if (hash_len != sizeof(SBPrefix) && hash_len != sizeof(SBFullHash)) {
287      SB_DLOG(INFO) << "ParseChunk got unknown hashlen " << hash_len;
288      return false;
289    }
290
291    const int chunk_len = atoi(cmd_parts[3].c_str());
292    chunk_data += line_len;
293    remaining -= line_len;
294
295    chunks->push_back(SBChunk());
296    chunks->back().chunk_number = chunk_number;
297
298    if (cmd_parts[0] == "a") {
299      chunks->back().is_add = true;
300      if (!ParseAddChunk(chunk_data, chunk_len, hash_len,
301                         &chunks->back().hosts))
302        return false;  // Parse error.
303    } else if (cmd_parts[0] == "s") {
304      chunks->back().is_add = false;
305      if (!ParseSubChunk(chunk_data, chunk_len, hash_len,
306                         &chunks->back().hosts))
307        return false;  // Parse error.
308    } else {
309      NOTREACHED();
310      return false;
311    }
312
313    chunk_data += chunk_len;
314    remaining -= chunk_len;
315    if (remaining < 0)
316      return false;  // Parse error.
317  }
318
319  DCHECK(remaining == 0);
320
321  return true;
322}
323
324bool SafeBrowsingProtocolParser::ParseAddChunk(
325    const char* data, int data_len, int hash_len,
326    std::deque<SBChunkHost>* hosts) {
327
328  int remaining = data_len;
329  const char* chunk_data = data;
330  const int min_size = sizeof(SBPrefix) + 1;
331
332  while (remaining >= min_size) {
333    SBPrefix host;
334    int prefix_count;
335    ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count);
336    SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
337        SBEntry::ADD_PREFIX : SBEntry::ADD_FULL_HASH;
338    SBEntry* entry;
339    int index_start = 0;
340
341    // If a host has more than 255 prefixes, then subsequent entries are used.
342    // Check if this is the case, and if so put them in one SBEntry since the
343    // database code assumes that all prefixes from the same host and chunk are
344    // in one SBEntry.
345    if (!hosts->empty() && hosts->back().host == host &&
346        hosts->back().entry->HashLen() == hash_len) {
347      // Reuse the SBChunkHost, but need to create a new SBEntry since we have
348      // more prefixes.
349      index_start = hosts->back().entry->prefix_count();
350      entry = hosts->back().entry->Enlarge(prefix_count);
351      hosts->back().entry = entry;
352    } else {
353      entry = SBEntry::Create(type, prefix_count);
354      SBChunkHost chunk_host;
355      chunk_host.host = host;
356      chunk_host.entry = entry;
357      hosts->push_back(chunk_host);
358    }
359
360    if (!ReadPrefixes(&chunk_data, &remaining, entry, prefix_count,
361                      index_start))
362      return false;
363  }
364
365  return remaining == 0;
366}
367
368bool SafeBrowsingProtocolParser::ParseSubChunk(
369    const char* data, int data_len, int hash_len,
370    std::deque<SBChunkHost>* hosts) {
371
372  int remaining = data_len;
373  const char* chunk_data = data;
374  const int min_size = 2 * sizeof(SBPrefix) + 1;
375
376  while (remaining >= min_size) {
377    SBPrefix host;
378    int prefix_count;
379    ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count);
380    SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
381        SBEntry::SUB_PREFIX : SBEntry::SUB_FULL_HASH;
382    SBEntry* entry = SBEntry::Create(type, prefix_count);
383
384    SBChunkHost chunk_host;
385    chunk_host.host = host;
386    chunk_host.entry = entry;
387    hosts->push_back(chunk_host);
388
389    if (prefix_count == 0) {
390      // There is only an add chunk number (no prefixes).
391      entry->set_chunk_id(ReadChunkId(&chunk_data, &remaining));
392      continue;
393    }
394
395    if (!ReadPrefixes(&chunk_data, &remaining, entry, prefix_count, 0))
396      return false;
397  }
398
399  return remaining == 0;
400}
401
402
403void SafeBrowsingProtocolParser::ReadHostAndPrefixCount(
404    const char** data, int* remaining, SBPrefix* host, int* count) {
405  // Next 4 bytes are the host prefix.
406  memcpy(host, *data, sizeof(SBPrefix));
407  *data += sizeof(SBPrefix);
408  *remaining -= sizeof(SBPrefix);
409
410  // Next 1 byte is the prefix count (could be zero, but never negative).
411  *count = static_cast<unsigned char>(**data);
412  *data += 1;
413  *remaining -= 1;
414}
415
416int SafeBrowsingProtocolParser::ReadChunkId(
417    const char** data, int* remaining) {
418  int chunk_number;
419  memcpy(&chunk_number, *data, sizeof(chunk_number));
420  *data += sizeof(chunk_number);
421  *remaining -= sizeof(chunk_number);
422  return htonl(chunk_number);
423}
424
425bool SafeBrowsingProtocolParser::ReadPrefixes(
426    const char** data, int* remaining, SBEntry* entry, int count,
427    int index_start) {
428  int hash_len = entry->HashLen();
429  for (int i = 0; i < count; ++i) {
430    if (entry->IsSub()) {
431      entry->SetChunkIdAtPrefix(index_start + i, ReadChunkId(data, remaining));
432      if (*remaining <= 0)
433        return false;
434    }
435
436    if (entry->IsPrefix()) {
437      entry->SetPrefixAt(index_start + i,
438                         *reinterpret_cast<const SBPrefix*>(*data));
439    } else {
440      entry->SetFullHashAt(index_start + i,
441                           *reinterpret_cast<const SBFullHash*>(*data));
442    }
443    *data += hash_len;
444    *remaining -= hash_len;
445    if (*remaining < 0)
446      return false;
447  }
448
449  return true;
450}
451
452bool SafeBrowsingProtocolParser::ParseNewKey(const char* chunk_data,
453                                             int chunk_length,
454                                             std::string* client_key,
455                                             std::string* wrapped_key) {
456  DCHECK(client_key && wrapped_key);
457  client_key->clear();
458  wrapped_key->clear();
459
460  const char* data = chunk_data;
461  int remaining = chunk_length;
462
463  while (remaining > 0) {
464    std::string line;
465    if (!GetLine(data, remaining, &line))
466      return false;
467
468    std::vector<std::string> cmd_parts;
469    SplitString(line, ':', &cmd_parts);
470    if (cmd_parts.size() != 3)
471      return false;
472
473    if (static_cast<int>(cmd_parts[2].size()) != atoi(cmd_parts[1].c_str()))
474      return false;
475
476    if (cmd_parts[0] == "clientkey") {
477      client_key->assign(cmd_parts[2]);
478    } else if (cmd_parts[0] == "wrappedkey") {
479      wrapped_key->assign(cmd_parts[2]);
480    } else {
481      return false;
482    }
483
484    data += line.size() + 1;
485    remaining -= static_cast<int>(line.size()) + 1;
486  }
487
488  if (client_key->empty() || wrapped_key->empty())
489    return false;
490
491  return true;
492}
493