1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Parse the data returned from the SafeBrowsing v2.1 protocol response.
6
7// TODOv3(shess): Review these changes carefully.
8
9#include <stdlib.h>
10
11#include "base/format_macros.h"
12#include "base/logging.h"
13#include "base/strings/string_number_conversions.h"
14#include "base/strings/string_split.h"
15#include "base/strings/stringprintf.h"
16#include "base/sys_byteorder.h"
17#include "base/time/time.h"
18#include "build/build_config.h"
19#include "chrome/browser/safe_browsing/protocol_parser.h"
20#include "chrome/browser/safe_browsing/safe_browsing_util.h"
21
22namespace {
23
24// Helper class for scanning a buffer.
25class BufferReader {
26 public:
27  BufferReader(const char* data, size_t length)
28      : data_(data),
29        length_(length) {
30  }
31
32  // Return info about remaining buffer data.
33  size_t length() const {
34    return length_;
35  }
36  const char* data() const {
37    return data_;
38  }
39  bool empty() const {
40    return length_ == 0;
41  }
42
43  // Remove |l| characters from the buffer.
44  void Advance(size_t l) {
45    DCHECK_LE(l, length());
46    data_ += l;
47    length_ -= l;
48  }
49
50  // Get a reference to data in the buffer.
51  // TODO(shess): I'm not sure I like this.  Fill out a StringPiece instead?
52  bool RefData(const void** pptr, size_t l) {
53    if (length() < l) {
54      Advance(length());  // poison
55      return false;
56    }
57
58    *pptr = data();
59    Advance(l);
60    return true;
61  }
62
63  // Copy data out of the buffer.
64  bool GetData(void* ptr, size_t l) {
65    const void* buf_ptr;
66    if (!RefData(&buf_ptr, l))
67      return false;
68
69    memcpy(ptr, buf_ptr, l);
70    return true;
71  }
72
73  // Read a 32-bit integer in network byte order into a local uint32.
74  bool GetNet32(uint32* i) {
75    if (!GetData(i, sizeof(*i)))
76      return false;
77
78    *i = base::NetToHost32(*i);
79    return true;
80  }
81
82  // Returns false if there is no data, otherwise fills |*line| with a reference
83  // to the next line of data in the buffer.
84  bool GetLine(base::StringPiece* line) {
85    if (!length_)
86      return false;
87
88    // Find the end of the line, or the end of the input.
89    size_t eol = 0;
90    while (eol < length_ && data_[eol] != '\n') {
91      ++eol;
92    }
93    line->set(data_, eol);
94    Advance(eol);
95
96    // Skip the newline if present.
97    if (length_ && data_[0] == '\n')
98      Advance(1);
99
100    return true;
101  }
102
103  // Read out |c| colon-separated pieces from the next line.  The resulting
104  // pieces point into the original data buffer.
105  bool GetPieces(size_t c, std::vector<base::StringPiece>* pieces) {
106    base::StringPiece line;
107    if (!GetLine(&line))
108      return false;
109
110    // Find the parts separated by ':'.
111    while (pieces->size() + 1 < c) {
112      size_t colon_ofs = line.find(':');
113      if (colon_ofs == base::StringPiece::npos) {
114        Advance(length_);
115        return false;
116      }
117
118      pieces->push_back(line.substr(0, colon_ofs));
119      line.remove_prefix(colon_ofs + 1);
120    }
121
122    // The last piece runs to the end of the line.
123    pieces->push_back(line);
124    return true;
125  }
126
127 private:
128  const char* data_;
129  size_t length_;
130
131  DISALLOW_COPY_AND_ASSIGN(BufferReader);
132};
133
134bool ParseGetHashMetadata(size_t hash_count,
135                          BufferReader* reader,
136                          std::vector<SBFullHashResult>* full_hashes) {
137  for (size_t i = 0; i < hash_count; ++i) {
138    base::StringPiece line;
139    if (!reader->GetLine(&line))
140      return false;
141
142    size_t meta_data_len;
143    if (!base::StringToSizeT(line, &meta_data_len))
144      return false;
145
146    const void* meta_data;
147    if (!reader->RefData(&meta_data, meta_data_len))
148      return false;
149
150    if (full_hashes) {
151      (*full_hashes)[full_hashes->size() - hash_count + i].metadata.assign(
152          reinterpret_cast<const char*>(meta_data), meta_data_len);
153    }
154  }
155  return true;
156}
157
158}  // namespace
159
160namespace safe_browsing {
161
162// BODY          = CACHELIFETIME LF HASHENTRY* EOF
163// CACHELIFETIME = DIGIT+
164// HASHENTRY     = LISTNAME ":" HASHSIZE ":" NUMRESPONSES [":m"] LF
165//                 HASHDATA (METADATALEN LF METADATA)*
166// HASHSIZE      = DIGIT+                  # Length of each full hash
167// NUMRESPONSES  = DIGIT+                  # Number of full hashes in HASHDATA
168// HASHDATA      = <HASHSIZE*NUMRESPONSES number of unsigned bytes>
169// METADATALEN   = DIGIT+
170// METADATA      = <METADATALEN number of unsigned bytes>
171bool ParseGetHash(const char* chunk_data,
172                  size_t chunk_len,
173                  base::TimeDelta* cache_lifetime,
174                  std::vector<SBFullHashResult>* full_hashes) {
175  full_hashes->clear();
176  BufferReader reader(chunk_data, chunk_len);
177
178  // Parse out cache lifetime.
179  {
180    base::StringPiece line;
181    if (!reader.GetLine(&line))
182      return false;
183
184    int64_t cache_lifetime_seconds;
185    if (!base::StringToInt64(line, &cache_lifetime_seconds))
186      return false;
187
188    // TODO(shess): Zero also doesn't make sense, but isn't clearly forbidden,
189    // either.  Maybe there should be a threshold involved.
190    if (cache_lifetime_seconds < 0)
191      return false;
192
193    *cache_lifetime = base::TimeDelta::FromSeconds(cache_lifetime_seconds);
194  }
195
196  while (!reader.empty()) {
197    std::vector<base::StringPiece> cmd_parts;
198    if (!reader.GetPieces(3, &cmd_parts))
199      return false;
200
201    SBFullHashResult full_hash;
202    full_hash.list_id = safe_browsing_util::GetListId(cmd_parts[0]);
203
204    size_t hash_len;
205    if (!base::StringToSizeT(cmd_parts[1], &hash_len))
206      return false;
207
208    // TODO(shess): Is this possible?  If not, why the length present?
209    if (hash_len != sizeof(SBFullHash))
210      return false;
211
212    // Metadata is indicated by an optional ":m" at the end of the line.
213    bool has_metadata = false;
214    base::StringPiece hash_count_string = cmd_parts[2];
215    size_t optional_colon = hash_count_string.find(':', 0);
216    if (optional_colon != base::StringPiece::npos) {
217      if (hash_count_string.substr(optional_colon) != ":m")
218        return false;
219      has_metadata = true;
220      hash_count_string.remove_suffix(2);
221    }
222
223    size_t hash_count;
224    if (!base::StringToSizeT(hash_count_string, &hash_count))
225      return false;
226
227    if (hash_len * hash_count > reader.length())
228      return false;
229
230    // Ignore hash results from lists we don't recognize.
231    if (full_hash.list_id < 0) {
232      reader.Advance(hash_len * hash_count);
233      if (has_metadata && !ParseGetHashMetadata(hash_count, &reader, NULL))
234        return false;
235      continue;
236    }
237
238    for (size_t i = 0; i < hash_count; ++i) {
239      if (!reader.GetData(&full_hash.hash, hash_len))
240        return false;
241      full_hashes->push_back(full_hash);
242    }
243
244    if (has_metadata && !ParseGetHashMetadata(hash_count, &reader, full_hashes))
245      return false;
246  }
247
248  return reader.empty();
249}
250
251// BODY       = HEADER LF PREFIXES EOF
252// HEADER     = PREFIXSIZE ":" LENGTH
253// PREFIXSIZE = DIGIT+         # Size of each prefix in bytes
254// LENGTH     = DIGIT+         # Size of PREFIXES in bytes
255std::string FormatGetHash(const std::vector<SBPrefix>& prefixes) {
256  std::string request;
257  request.append(base::Uint64ToString(sizeof(SBPrefix)));
258  request.append(":");
259  request.append(base::Uint64ToString(sizeof(SBPrefix) * prefixes.size()));
260  request.append("\n");
261
262  // SBPrefix values are read without concern for byte order, so write back the
263  // same way.
264  for (size_t i = 0; i < prefixes.size(); ++i) {
265    request.append(reinterpret_cast<const char*>(&prefixes[i]),
266                   sizeof(SBPrefix));
267  }
268
269  return request;
270}
271
272bool ParseUpdate(const char* chunk_data,
273                 size_t chunk_len,
274                 size_t* next_update_sec,
275                 bool* reset,
276                 std::vector<SBChunkDelete>* deletes,
277                 std::vector<ChunkUrl>* chunk_urls) {
278  DCHECK(next_update_sec);
279  DCHECK(deletes);
280  DCHECK(chunk_urls);
281
282  BufferReader reader(chunk_data, chunk_len);
283
284  // Populated below.
285  std::string list_name;
286
287  while (!reader.empty()) {
288    std::vector<base::StringPiece> pieces;
289    if (!reader.GetPieces(2, &pieces))
290      return false;
291
292    base::StringPiece& command = pieces[0];
293
294    // Differentiate on the first character of the command (which is usually
295    // only one character, with the exception of the 'ad' and 'sd' commands).
296    switch (command[0]) {
297      case 'a':
298      case 's': {
299        // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
300        // have also parsed the list name before getting here, or the add-del
301        // or sub-del will have no context.
302        if (list_name.empty() || (command != "ad" && command != "sd"))
303          return false;
304        SBChunkDelete chunk_delete;
305        chunk_delete.is_sub_del = command[0] == 's';
306        StringToRanges(pieces[1].as_string(), &chunk_delete.chunk_del);
307        chunk_delete.list_name = list_name;
308        deletes->push_back(chunk_delete);
309        break;
310      }
311
312      case 'i':
313        // The line providing the name of the list (i.e. 'goog-phish-shavar').
314        list_name = pieces[1].as_string();
315        break;
316
317      case 'n':
318        // The line providing the next earliest time (in seconds) to re-query.
319        if (!base::StringToSizeT(pieces[1], next_update_sec))
320          return false;
321        break;
322
323      case 'u': {
324        ChunkUrl chunk_url;
325        chunk_url.url = pieces[1].as_string();  // Skip the initial "u:".
326        chunk_url.list_name = list_name;
327        chunk_urls->push_back(chunk_url);
328        break;
329      }
330
331      case 'r':
332        if (pieces[1] != "pleasereset")
333          return false;
334        *reset = true;
335        break;
336
337      default:
338        // According to the spec, we ignore commands we don't understand.
339        // TODO(shess): Does this apply to r:unknown or n:not-integer?
340        break;
341    }
342  }
343
344  return true;
345}
346
347// BODY      = (UINT32 CHUNKDATA)+
348// UINT32    = Unsigned 32-bit integer in network byte order
349// CHUNKDATA = Encoded ChunkData protocol message
350bool ParseChunk(const char* data,
351                size_t length,
352                ScopedVector<SBChunkData>* chunks) {
353  BufferReader reader(data, length);
354
355  while (!reader.empty()) {
356    uint32 l = 0;
357    if (!reader.GetNet32(&l) || l == 0 || l > reader.length())
358      return false;
359
360    const void* p = NULL;
361    if (!reader.RefData(&p, l))
362      return false;
363
364    scoped_ptr<SBChunkData> chunk(new SBChunkData());
365    if (!chunk->ParseFrom(reinterpret_cast<const unsigned char*>(p), l))
366      return false;
367
368    chunks->push_back(chunk.release());
369  }
370
371  DCHECK(reader.empty());
372  return true;
373}
374
375// LIST      = LISTNAME ";" LISTINFO (":" LISTINFO)*
376// LISTINFO  = CHUNKTYPE ":" CHUNKLIST
377// CHUNKTYPE = "a" | "s"
378// CHUNKLIST = (RANGE | NUMBER) ["," CHUNKLIST]
379// NUMBER    = DIGIT+
380// RANGE     = NUMBER "-" NUMBER
381std::string FormatList(const SBListChunkRanges& list) {
382  std::string formatted_results = list.name;
383  formatted_results.append(";");
384
385  if (!list.adds.empty())
386    formatted_results.append("a:").append(list.adds);
387  if (!list.adds.empty() && !list.subs.empty())
388    formatted_results.append(":");
389  if (!list.subs.empty())
390    formatted_results.append("s:").append(list.subs);
391  formatted_results.append("\n");
392
393  return formatted_results;
394}
395
396}  // namespace safe_browsing
397