protocol_parser.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4// 5// Parse the data returned from the SafeBrowsing v2.1 protocol response. 6 7#include "chrome/browser/safe_browsing/protocol_parser.h" 8 9#include "build/build_config.h" 10 11#if defined(OS_WIN) 12#include <Winsock2.h> 13#elif defined(OS_POSIX) 14#include <arpa/inet.h> 15#endif 16 17#include "base/format_macros.h" 18#include "base/logging.h" 19#include "base/string_util.h" 20 21namespace { 22// Helper function for quick scans of a line oriented protocol. Note that we use 23// std::string::assign(const charT* s, size_type n) 24// to copy data into 'line'. This form of 'assign' does not call strlen on 25// 'input', which is binary data and is not NULL terminated. 'input' may also 26// contain valid NULL bytes in the payload, which a strlen based copy would 27// truncate. 28bool GetLine(const char* input, int input_len, std::string* line) { 29 const char* pos = input; 30 while (pos && (pos - input < input_len)) { 31 if (*pos == '\n') { 32 line->assign(input, pos - input); 33 return true; 34 } 35 ++pos; 36 } 37 return false; 38} 39} 40 41//------------------------------------------------------------------------------ 42// SafeBrowsingParser implementation 43 44SafeBrowsingProtocolParser::SafeBrowsingProtocolParser() { 45} 46 47bool SafeBrowsingProtocolParser::ParseGetHash( 48 const char* chunk_data, 49 int chunk_len, 50 const std::string& key, 51 bool* re_key, 52 std::vector<SBFullHashResult>* full_hashes) { 53 full_hashes->clear(); 54 int length = chunk_len; 55 const char* data = chunk_data; 56 57 int offset; 58 std::string line; 59 if (!key.empty()) { 60 if (!GetLine(data, length, &line)) 61 return false; // Error! Bad GetHash result. 62 63 if (line == "e:pleaserekey") { 64 *re_key = true; 65 return true; 66 } 67 68 offset = static_cast<int>(line.size()) + 1; 69 data += offset; 70 length -= offset; 71 72 if (!safe_browsing_util::VerifyMAC(key, line, data, length)) 73 return false; 74 } 75 76 while (length > 0) { 77 if (!GetLine(data, length, &line)) 78 return false; 79 80 offset = static_cast<int>(line.size()) + 1; 81 data += offset; 82 length -= offset; 83 84 std::vector<std::string> cmd_parts; 85 SplitString(line, ':', &cmd_parts); 86 if (cmd_parts.size() != 3) 87 return false; 88 89 SBFullHashResult full_hash; 90 full_hash.list_name = cmd_parts[0]; 91 full_hash.add_chunk_id = atoi(cmd_parts[1].c_str()); 92 int full_hash_len = atoi(cmd_parts[2].c_str()); 93 94 // Ignore hash results from lists we don't recognize. 95 if (safe_browsing_util::GetListId(full_hash.list_name) < 0) { 96 data += full_hash_len; 97 length -= full_hash_len; 98 continue; 99 } 100 101 while (full_hash_len > 0) { 102 DCHECK(static_cast<size_t>(full_hash_len) >= sizeof(SBFullHash)); 103 memcpy(&full_hash.hash, data, sizeof(SBFullHash)); 104 full_hashes->push_back(full_hash); 105 data += sizeof(SBFullHash); 106 length -= sizeof(SBFullHash); 107 full_hash_len -= sizeof(SBFullHash); 108 } 109 } 110 111 return length == 0; 112} 113 114void SafeBrowsingProtocolParser::FormatGetHash( 115 const std::vector<SBPrefix>& prefixes, std::string* request) { 116 DCHECK(request); 117 118 // Format the request for GetHash. 119 request->append(StringPrintf("%" PRIuS ":%" PRIuS "\n", 120 sizeof(SBPrefix), 121 sizeof(SBPrefix) * prefixes.size())); 122 for (size_t i = 0; i < prefixes.size(); ++i) { 123 request->append(reinterpret_cast<const char*>(&prefixes[i]), 124 sizeof(SBPrefix)); 125 } 126} 127 128bool SafeBrowsingProtocolParser::ParseUpdate( 129 const char* chunk_data, 130 int chunk_len, 131 const std::string& key, 132 int* next_update_sec, 133 bool* re_key, 134 bool* reset, 135 std::vector<SBChunkDelete>* deletes, 136 std::vector<ChunkUrl>* chunk_urls) { 137 DCHECK(next_update_sec); 138 DCHECK(deletes); 139 DCHECK(chunk_urls); 140 141 int length = chunk_len; 142 const char* data = chunk_data; 143 144 // Populated below. 145 std::string list_name; 146 147 while (length > 0) { 148 std::string cmd_line; 149 if (!GetLine(data, length, &cmd_line)) 150 return false; // Error: bad list format! 151 152 std::vector<std::string> cmd_parts; 153 SplitString(cmd_line, ':', &cmd_parts); 154 if (cmd_parts.empty()) 155 return false; 156 const std::string& command = cmd_parts[0]; 157 if (cmd_parts.size() != 2 && command[0] != 'u') 158 return false; 159 160 const int consumed = static_cast<int>(cmd_line.size()) + 1; 161 data += consumed; 162 length -= consumed; 163 if (length < 0) 164 return false; // Parsing error. 165 166 // Differentiate on the first character of the command (which is usually 167 // only one character, with the exception of the 'ad' and 'sd' commands). 168 switch (command[0]) { 169 case 'a': 170 case 's': { 171 // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must 172 // have also parsed the list name before getting here, or the add-del 173 // or sub-del will have no context. 174 if (command.size() != 2 || command[1] != 'd' || list_name.empty()) 175 return false; 176 SBChunkDelete chunk_delete; 177 chunk_delete.is_sub_del = command[0] == 's'; 178 StringToRanges(cmd_parts[1], &chunk_delete.chunk_del); 179 chunk_delete.list_name = list_name; 180 deletes->push_back(chunk_delete); 181 break; 182 } 183 184 case 'e': 185 if (cmd_parts[1] != "pleaserekey") 186 return false; 187 *re_key = true; 188 break; 189 190 case 'i': 191 // The line providing the name of the list (i.e. 'goog-phish-shavar'). 192 list_name = cmd_parts[1]; 193 break; 194 195 case 'm': 196 // Verify that the MAC of the remainer of this chunk is what we expect. 197 if (!key.empty() && 198 !safe_browsing_util::VerifyMAC(key, cmd_parts[1], data, length)) 199 return false; 200 break; 201 202 case 'n': 203 // The line providing the next earliest time (in seconds) to re-query. 204 *next_update_sec = atoi(cmd_parts[1].c_str()); 205 break; 206 207 case 'u': { 208 // The redirect command is of the form: u:<url>,<mac> where <url> can 209 // contain multiple colons, commas or any valid URL characters. We scan 210 // backwards in the string looking for the first ',' we encounter and 211 // assume that everything before that is the URL and everything after 212 // is the MAC (if the MAC was requested). 213 std::string mac; 214 std::string redirect_url(cmd_line, 2); // Skip the initial "u:". 215 if (!key.empty()) { 216 std::string::size_type mac_pos = redirect_url.rfind(','); 217 if (mac_pos == std::string::npos) 218 return false; 219 mac = redirect_url.substr(mac_pos + 1); 220 redirect_url = redirect_url.substr(0, mac_pos); 221 } 222 223 ChunkUrl chunk_url; 224 chunk_url.url = redirect_url; 225 chunk_url.list_name = list_name; 226 if (!key.empty()) 227 chunk_url.mac = mac; 228 chunk_urls->push_back(chunk_url); 229 break; 230 } 231 232 case 'r': 233 if (cmd_parts[1] != "pleasereset") 234 return false; 235 *reset = true; 236 break; 237 238 default: 239 // According to the spec, we ignore commands we don't understand. 240 break; 241 } 242 } 243 244 return true; 245} 246 247bool SafeBrowsingProtocolParser::ParseChunk(const char* data, 248 int length, 249 const std::string& key, 250 const std::string& mac, 251 bool* re_key, 252 SBChunkList* chunks) { 253 int remaining = length; 254 const char* chunk_data = data; 255 256 if (!key.empty() && 257 !safe_browsing_util::VerifyMAC(key, mac, data, length)) { 258 return false; 259 } 260 261 while (remaining > 0) { 262 std::string cmd_line; 263 if (!GetLine(chunk_data, length, &cmd_line)) 264 return false; // Error: bad chunk format! 265 266 const int line_len = static_cast<int>(cmd_line.length()) + 1; 267 std::vector<std::string> cmd_parts; 268 SplitString(cmd_line, ':', &cmd_parts); 269 270 // Handle a possible re-key command. 271 if (cmd_parts.size() != 4) { 272 if (cmd_parts.size() == 2 && 273 cmd_parts[0] == "e" && 274 cmd_parts[1] == "pleaserekey") { 275 *re_key = true; 276 chunk_data += line_len; 277 remaining -= line_len; 278 continue; 279 } 280 return false; 281 } 282 283 // Process the chunk data. 284 const int chunk_number = atoi(cmd_parts[1].c_str()); 285 const int hash_len = atoi(cmd_parts[2].c_str()); 286 if (hash_len != sizeof(SBPrefix) && hash_len != sizeof(SBFullHash)) { 287 SB_DLOG(INFO) << "ParseChunk got unknown hashlen " << hash_len; 288 return false; 289 } 290 291 const int chunk_len = atoi(cmd_parts[3].c_str()); 292 chunk_data += line_len; 293 remaining -= line_len; 294 295 chunks->push_back(SBChunk()); 296 chunks->back().chunk_number = chunk_number; 297 298 if (cmd_parts[0] == "a") { 299 chunks->back().is_add = true; 300 if (!ParseAddChunk(chunk_data, chunk_len, hash_len, 301 &chunks->back().hosts)) 302 return false; // Parse error. 303 } else if (cmd_parts[0] == "s") { 304 chunks->back().is_add = false; 305 if (!ParseSubChunk(chunk_data, chunk_len, hash_len, 306 &chunks->back().hosts)) 307 return false; // Parse error. 308 } else { 309 NOTREACHED(); 310 return false; 311 } 312 313 chunk_data += chunk_len; 314 remaining -= chunk_len; 315 if (remaining < 0) 316 return false; // Parse error. 317 } 318 319 DCHECK(remaining == 0); 320 321 return true; 322} 323 324bool SafeBrowsingProtocolParser::ParseAddChunk( 325 const char* data, int data_len, int hash_len, 326 std::deque<SBChunkHost>* hosts) { 327 328 int remaining = data_len; 329 const char* chunk_data = data; 330 const int min_size = sizeof(SBPrefix) + 1; 331 332 while (remaining >= min_size) { 333 SBPrefix host; 334 int prefix_count; 335 ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count); 336 SBEntry::Type type = hash_len == sizeof(SBPrefix) ? 337 SBEntry::ADD_PREFIX : SBEntry::ADD_FULL_HASH; 338 SBEntry* entry; 339 int index_start = 0; 340 341 // If a host has more than 255 prefixes, then subsequent entries are used. 342 // Check if this is the case, and if so put them in one SBEntry since the 343 // database code assumes that all prefixes from the same host and chunk are 344 // in one SBEntry. 345 if (!hosts->empty() && hosts->back().host == host && 346 hosts->back().entry->HashLen() == hash_len) { 347 // Reuse the SBChunkHost, but need to create a new SBEntry since we have 348 // more prefixes. 349 index_start = hosts->back().entry->prefix_count(); 350 entry = hosts->back().entry->Enlarge(prefix_count); 351 hosts->back().entry = entry; 352 } else { 353 entry = SBEntry::Create(type, prefix_count); 354 SBChunkHost chunk_host; 355 chunk_host.host = host; 356 chunk_host.entry = entry; 357 hosts->push_back(chunk_host); 358 } 359 360 if (!ReadPrefixes(&chunk_data, &remaining, entry, prefix_count, 361 index_start)) 362 return false; 363 } 364 365 return remaining == 0; 366} 367 368bool SafeBrowsingProtocolParser::ParseSubChunk( 369 const char* data, int data_len, int hash_len, 370 std::deque<SBChunkHost>* hosts) { 371 372 int remaining = data_len; 373 const char* chunk_data = data; 374 const int min_size = 2 * sizeof(SBPrefix) + 1; 375 376 while (remaining >= min_size) { 377 SBPrefix host; 378 int prefix_count; 379 ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count); 380 SBEntry::Type type = hash_len == sizeof(SBPrefix) ? 381 SBEntry::SUB_PREFIX : SBEntry::SUB_FULL_HASH; 382 SBEntry* entry = SBEntry::Create(type, prefix_count); 383 384 SBChunkHost chunk_host; 385 chunk_host.host = host; 386 chunk_host.entry = entry; 387 hosts->push_back(chunk_host); 388 389 if (prefix_count == 0) { 390 // There is only an add chunk number (no prefixes). 391 entry->set_chunk_id(ReadChunkId(&chunk_data, &remaining)); 392 continue; 393 } 394 395 if (!ReadPrefixes(&chunk_data, &remaining, entry, prefix_count, 0)) 396 return false; 397 } 398 399 return remaining == 0; 400} 401 402 403void SafeBrowsingProtocolParser::ReadHostAndPrefixCount( 404 const char** data, int* remaining, SBPrefix* host, int* count) { 405 // Next 4 bytes are the host prefix. 406 memcpy(host, *data, sizeof(SBPrefix)); 407 *data += sizeof(SBPrefix); 408 *remaining -= sizeof(SBPrefix); 409 410 // Next 1 byte is the prefix count (could be zero, but never negative). 411 *count = static_cast<unsigned char>(**data); 412 *data += 1; 413 *remaining -= 1; 414} 415 416int SafeBrowsingProtocolParser::ReadChunkId( 417 const char** data, int* remaining) { 418 int chunk_number; 419 memcpy(&chunk_number, *data, sizeof(chunk_number)); 420 *data += sizeof(chunk_number); 421 *remaining -= sizeof(chunk_number); 422 return htonl(chunk_number); 423} 424 425bool SafeBrowsingProtocolParser::ReadPrefixes( 426 const char** data, int* remaining, SBEntry* entry, int count, 427 int index_start) { 428 int hash_len = entry->HashLen(); 429 for (int i = 0; i < count; ++i) { 430 if (entry->IsSub()) { 431 entry->SetChunkIdAtPrefix(index_start + i, ReadChunkId(data, remaining)); 432 if (*remaining <= 0) 433 return false; 434 } 435 436 if (entry->IsPrefix()) { 437 entry->SetPrefixAt(index_start + i, 438 *reinterpret_cast<const SBPrefix*>(*data)); 439 } else { 440 entry->SetFullHashAt(index_start + i, 441 *reinterpret_cast<const SBFullHash*>(*data)); 442 } 443 *data += hash_len; 444 *remaining -= hash_len; 445 if (*remaining < 0) 446 return false; 447 } 448 449 return true; 450} 451 452bool SafeBrowsingProtocolParser::ParseNewKey(const char* chunk_data, 453 int chunk_length, 454 std::string* client_key, 455 std::string* wrapped_key) { 456 DCHECK(client_key && wrapped_key); 457 client_key->clear(); 458 wrapped_key->clear(); 459 460 const char* data = chunk_data; 461 int remaining = chunk_length; 462 463 while (remaining > 0) { 464 std::string line; 465 if (!GetLine(data, remaining, &line)) 466 return false; 467 468 std::vector<std::string> cmd_parts; 469 SplitString(line, ':', &cmd_parts); 470 if (cmd_parts.size() != 3) 471 return false; 472 473 if (static_cast<int>(cmd_parts[2].size()) != atoi(cmd_parts[1].c_str())) 474 return false; 475 476 if (cmd_parts[0] == "clientkey") { 477 client_key->assign(cmd_parts[2]); 478 } else if (cmd_parts[0] == "wrappedkey") { 479 wrapped_key->assign(cmd_parts[2]); 480 } else { 481 return false; 482 } 483 484 data += line.size() + 1; 485 remaining -= static_cast<int>(line.size()) + 1; 486 } 487 488 if (client_key->empty() || wrapped_key->empty()) 489 return false; 490 491 return true; 492} 493