1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "content/common/page_state_serialization.h"
6
7#include <algorithm>
8#include <limits>
9
10#include "base/pickle.h"
11#include "base/strings/string_number_conversions.h"
12#include "base/strings/string_util.h"
13#include "base/strings/utf_string_conversions.h"
14#include "ui/gfx/screen.h"
15
16namespace content {
17namespace {
18
19#if defined(OS_ANDROID)
20float g_device_scale_factor_for_testing = 0.0;
21#endif
22
23//-----------------------------------------------------------------------------
24
25void AppendDataToHttpBody(ExplodedHttpBody* http_body, const char* data,
26                          int data_length) {
27  ExplodedHttpBodyElement element;
28  element.type = WebKit::WebHTTPBody::Element::TypeData;
29  element.data.assign(data, data_length);
30  http_body->elements.push_back(element);
31}
32
33void AppendFileRangeToHttpBody(ExplodedHttpBody* http_body,
34                               const base::NullableString16& file_path,
35                               int file_start,
36                               int file_length,
37                               double file_modification_time) {
38  ExplodedHttpBodyElement element;
39  element.type = WebKit::WebHTTPBody::Element::TypeFile;
40  element.file_path = file_path;
41  element.file_start = file_start;
42  element.file_length = file_length;
43  element.file_modification_time = file_modification_time;
44  http_body->elements.push_back(element);
45}
46
47void AppendURLRangeToHttpBody(ExplodedHttpBody* http_body,
48                              const GURL& url,
49                              int file_start,
50                              int file_length,
51                              double file_modification_time) {
52  ExplodedHttpBodyElement element;
53  element.type = WebKit::WebHTTPBody::Element::TypeURL;
54  element.url = url;
55  element.file_start = file_start;
56  element.file_length = file_length;
57  element.file_modification_time = file_modification_time;
58  http_body->elements.push_back(element);
59}
60
61void AppendBlobToHttpBody(ExplodedHttpBody* http_body, const GURL& url) {
62  ExplodedHttpBodyElement element;
63  element.type = WebKit::WebHTTPBody::Element::TypeBlob;
64  element.url = url;
65  http_body->elements.push_back(element);
66}
67
68//----------------------------------------------------------------------------
69
70void AppendReferencedFilesFromHttpBody(
71    const std::vector<ExplodedHttpBodyElement>& elements,
72    std::vector<base::NullableString16>* referenced_files) {
73  for (size_t i = 0; i < elements.size(); ++i) {
74    if (elements[i].type == WebKit::WebHTTPBody::Element::TypeFile)
75      referenced_files->push_back(elements[i].file_path);
76  }
77}
78
79bool AppendReferencedFilesFromDocumentState(
80    const std::vector<base::NullableString16>& document_state,
81    std::vector<base::NullableString16>* referenced_files) {
82  if (document_state.empty())
83    return true;
84
85  // This algorithm is adapted from Blink's core/html/FormController.cpp code.
86  // We only care about how that code worked when this code snapshot was taken
87  // as this code is only needed for backwards compat.
88  //
89  // For reference, see FormController::formStatesFromStateVector at:
90  // http://src.chromium.org/viewvc/blink/trunk/Source/core/html/FormController.cpp?pathrev=152274
91
92  size_t index = 0;
93
94  if (document_state.size() < 3)
95    return false;
96
97  index++;  // Skip over magic signature.
98  index++;  // Skip over form key.
99
100  size_t item_count;
101  if (!base::StringToSizeT(document_state[index++].string(), &item_count))
102    return false;
103
104  while (item_count--) {
105    if (index + 1 >= document_state.size())
106      return false;
107
108    index++;  // Skip over name.
109    const base::NullableString16& type = document_state[index++];
110
111    if (index >= document_state.size())
112      return false;
113
114    size_t value_size;
115    if (!base::StringToSizeT(document_state[index++].string(), &value_size))
116      return false;
117
118    if (index + value_size > document_state.size() ||
119        index + value_size < index)  // Check for overflow.
120      return false;
121
122    if (EqualsASCII(type.string(), "file")) {
123      if (value_size != 2)
124        return false;
125
126      referenced_files->push_back(document_state[index++]);
127      index++;  // Skip over display name.
128    } else {
129      index += value_size;
130    }
131  }
132
133  return true;
134}
135
136bool RecursivelyAppendReferencedFiles(
137    const ExplodedFrameState& frame_state,
138    std::vector<base::NullableString16>* referenced_files) {
139  if (!frame_state.http_body.is_null) {
140    AppendReferencedFilesFromHttpBody(frame_state.http_body.elements,
141                                      referenced_files);
142  }
143
144  if (!AppendReferencedFilesFromDocumentState(frame_state.document_state,
145                                              referenced_files))
146    return false;
147
148  for (size_t i = 0; i < frame_state.children.size(); ++i) {
149    if (!RecursivelyAppendReferencedFiles(frame_state.children[i],
150                                          referenced_files))
151      return false;
152  }
153
154  return true;
155}
156
157//----------------------------------------------------------------------------
158
159struct SerializeObject {
160  SerializeObject()
161      : version(0),
162        parse_error(false) {
163  }
164
165  SerializeObject(const char* data, int len)
166      : pickle(data, len),
167        version(0),
168        parse_error(false) {
169    iter = PickleIterator(pickle);
170  }
171
172  std::string GetAsString() {
173    return std::string(static_cast<const char*>(pickle.data()), pickle.size());
174  }
175
176  Pickle pickle;
177  PickleIterator iter;
178  int version;
179  bool parse_error;
180};
181
182// Version ID of serialized format.
183// 11: Min version
184// 12: Adds support for contains_passwords in HTTP body
185// 13: Adds support for URL (FileSystem URL)
186// 14: Adds list of referenced files, version written only for first item.
187//
188// NOTE: If the version is -1, then the pickle contains only a URL string.
189// See ReadPageState.
190//
191const int kMinVersion = 11;
192const int kCurrentVersion = 14;
193
194// A bunch of convenience functions to read/write to SerializeObjects.  The
195// de-serializers assume the input data will be in the correct format and fall
196// back to returning safe defaults when not.
197
198void WriteData(const void* data, int length, SerializeObject* obj) {
199  obj->pickle.WriteData(static_cast<const char*>(data), length);
200}
201
202void ReadData(SerializeObject* obj, const void** data, int* length) {
203  const char* tmp;
204  if (obj->pickle.ReadData(&obj->iter, &tmp, length)) {
205    *data = tmp;
206  } else {
207    obj->parse_error = true;
208    *data = NULL;
209    *length = 0;
210  }
211}
212
213void WriteInteger(int data, SerializeObject* obj) {
214  obj->pickle.WriteInt(data);
215}
216
217int ReadInteger(SerializeObject* obj) {
218  int tmp;
219  if (obj->pickle.ReadInt(&obj->iter, &tmp))
220    return tmp;
221  obj->parse_error = true;
222  return 0;
223}
224
225void ConsumeInteger(SerializeObject* obj) {
226  int unused ALLOW_UNUSED = ReadInteger(obj);
227}
228
229void WriteInteger64(int64 data, SerializeObject* obj) {
230  obj->pickle.WriteInt64(data);
231}
232
233int64 ReadInteger64(SerializeObject* obj) {
234  int64 tmp = 0;
235  if (obj->pickle.ReadInt64(&obj->iter, &tmp))
236    return tmp;
237  obj->parse_error = true;
238  return 0;
239}
240
241void WriteReal(double data, SerializeObject* obj) {
242  WriteData(&data, sizeof(double), obj);
243}
244
245double ReadReal(SerializeObject* obj) {
246  const void* tmp = NULL;
247  int length = 0;
248  double value = 0.0;
249  ReadData(obj, &tmp, &length);
250  if (length == static_cast<int>(sizeof(double))) {
251    // Use memcpy, as tmp may not be correctly aligned.
252    memcpy(&value, tmp, sizeof(double));
253  } else {
254    obj->parse_error = true;
255  }
256  return value;
257}
258
259void WriteBoolean(bool data, SerializeObject* obj) {
260  obj->pickle.WriteInt(data ? 1 : 0);
261}
262
263bool ReadBoolean(SerializeObject* obj) {
264  bool tmp;
265  if (obj->pickle.ReadBool(&obj->iter, &tmp))
266    return tmp;
267  obj->parse_error = true;
268  return false;
269}
270
271void WriteGURL(const GURL& url, SerializeObject* obj) {
272  obj->pickle.WriteString(url.possibly_invalid_spec());
273}
274
275GURL ReadGURL(SerializeObject* obj) {
276  std::string spec;
277  if (obj->pickle.ReadString(&obj->iter, &spec))
278    return GURL(spec);
279  obj->parse_error = true;
280  return GURL();
281}
282
283// WriteString pickles the NullableString16 as <int length><char16* data>.
284// If length == -1, then the NullableString16 itself is null.  Otherwise the
285// length is the number of char16 (not bytes) in the NullableString16.
286void WriteString(const base::NullableString16& str, SerializeObject* obj) {
287  if (str.is_null()) {
288    obj->pickle.WriteInt(-1);
289  } else {
290    const char16* data = str.string().data();
291    size_t length_in_bytes = str.string().length() * sizeof(char16);
292
293    CHECK_LT(length_in_bytes,
294             static_cast<size_t>(std::numeric_limits<int>::max()));
295    obj->pickle.WriteInt(length_in_bytes);
296    obj->pickle.WriteBytes(data, length_in_bytes);
297  }
298}
299
300// This reads a serialized NullableString16 from obj. If a string can't be
301// read, NULL is returned.
302const char16* ReadStringNoCopy(SerializeObject* obj, int* num_chars) {
303  int length_in_bytes;
304  if (!obj->pickle.ReadInt(&obj->iter, &length_in_bytes)) {
305    obj->parse_error = true;
306    return NULL;
307  }
308
309  if (length_in_bytes < 0)
310    return NULL;
311
312  const char* data;
313  if (!obj->pickle.ReadBytes(&obj->iter, &data, length_in_bytes)) {
314    obj->parse_error = true;
315    return NULL;
316  }
317
318  if (num_chars)
319    *num_chars = length_in_bytes / sizeof(char16);
320  return reinterpret_cast<const char16*>(data);
321}
322
323base::NullableString16 ReadString(SerializeObject* obj) {
324  int num_chars;
325  const char16* chars = ReadStringNoCopy(obj, &num_chars);
326  return chars ?
327      base::NullableString16(base::string16(chars, num_chars), false) :
328      base::NullableString16();
329}
330
331void ConsumeString(SerializeObject* obj) {
332  const char16* unused ALLOW_UNUSED = ReadStringNoCopy(obj, NULL);
333}
334
335template <typename T>
336void WriteAndValidateVectorSize(const std::vector<T>& v, SerializeObject* obj) {
337  CHECK_LT(v.size(), std::numeric_limits<int>::max() / sizeof(T));
338  WriteInteger(static_cast<int>(v.size()), obj);
339}
340
341size_t ReadAndValidateVectorSize(SerializeObject* obj, size_t element_size) {
342  size_t num_elements = static_cast<size_t>(ReadInteger(obj));
343
344  // Ensure that resizing a vector to size num_elements makes sense.
345  if (std::numeric_limits<int>::max() / element_size <= num_elements) {
346    obj->parse_error = true;
347    return 0;
348  }
349
350  // Ensure that it is plausible for the pickle to contain num_elements worth
351  // of data.
352  if (obj->pickle.payload_size() <= num_elements) {
353    obj->parse_error = true;
354    return 0;
355  }
356
357  return num_elements;
358}
359
360// Writes a Vector of strings into a SerializeObject for serialization.
361void WriteStringVector(
362    const std::vector<base::NullableString16>& data, SerializeObject* obj) {
363  WriteAndValidateVectorSize(data, obj);
364  for (size_t i = 0; i < data.size(); ++i) {
365    WriteString(data[i], obj);
366  }
367}
368
369void ReadStringVector(SerializeObject* obj,
370                      std::vector<base::NullableString16>* result) {
371  size_t num_elements =
372      ReadAndValidateVectorSize(obj, sizeof(base::NullableString16));
373
374  result->resize(num_elements);
375  for (size_t i = 0; i < num_elements; ++i)
376    (*result)[i] = ReadString(obj);
377}
378
379// Writes an ExplodedHttpBody object into a SerializeObject for serialization.
380void WriteHttpBody(const ExplodedHttpBody& http_body, SerializeObject* obj) {
381  WriteBoolean(!http_body.is_null, obj);
382
383  if (http_body.is_null)
384    return;
385
386  WriteAndValidateVectorSize(http_body.elements, obj);
387  for (size_t i = 0; i < http_body.elements.size(); ++i) {
388    const ExplodedHttpBodyElement& element = http_body.elements[i];
389    WriteInteger(element.type, obj);
390    if (element.type == WebKit::WebHTTPBody::Element::TypeData) {
391      WriteData(element.data.data(), static_cast<int>(element.data.size()),
392                obj);
393    } else if (element.type == WebKit::WebHTTPBody::Element::TypeFile) {
394      WriteString(element.file_path, obj);
395      WriteInteger64(element.file_start, obj);
396      WriteInteger64(element.file_length, obj);
397      WriteReal(element.file_modification_time, obj);
398    } else if (element.type == WebKit::WebHTTPBody::Element::TypeURL) {
399      WriteGURL(element.url, obj);
400      WriteInteger64(element.file_start, obj);
401      WriteInteger64(element.file_length, obj);
402      WriteReal(element.file_modification_time, obj);
403    } else {
404      WriteGURL(element.url, obj);
405    }
406  }
407  WriteInteger64(http_body.identifier, obj);
408  WriteBoolean(http_body.contains_passwords, obj);
409}
410
411void ReadHttpBody(SerializeObject* obj, ExplodedHttpBody* http_body) {
412  // An initial boolean indicates if we have an HTTP body.
413  if (!ReadBoolean(obj))
414    return;
415  http_body->is_null = false;
416
417  int num_elements = ReadInteger(obj);
418
419  for (int i = 0; i < num_elements; ++i) {
420    int type = ReadInteger(obj);
421    if (type == WebKit::WebHTTPBody::Element::TypeData) {
422      const void* data;
423      int length = -1;
424      ReadData(obj, &data, &length);
425      if (length >= 0) {
426        AppendDataToHttpBody(http_body, static_cast<const char*>(data),
427                             length);
428      }
429    } else if (type == WebKit::WebHTTPBody::Element::TypeFile) {
430      base::NullableString16 file_path = ReadString(obj);
431      int64 file_start = ReadInteger64(obj);
432      int64 file_length = ReadInteger64(obj);
433      double file_modification_time = ReadReal(obj);
434      AppendFileRangeToHttpBody(http_body, file_path, file_start, file_length,
435                                file_modification_time);
436    } else if (type == WebKit::WebHTTPBody::Element::TypeURL) {
437      GURL url = ReadGURL(obj);
438      int64 file_start = ReadInteger64(obj);
439      int64 file_length = ReadInteger64(obj);
440      double file_modification_time = ReadReal(obj);
441      AppendURLRangeToHttpBody(http_body, url, file_start, file_length,
442                               file_modification_time);
443    } else if (type == WebKit::WebHTTPBody::Element::TypeBlob) {
444      GURL blob_url = ReadGURL(obj);
445      AppendBlobToHttpBody(http_body, blob_url);
446    }
447  }
448  http_body->identifier = ReadInteger64(obj);
449
450  if (obj->version >= 12)
451    http_body->contains_passwords = ReadBoolean(obj);
452}
453
454// Writes the ExplodedFrameState data into the SerializeObject object for
455// serialization.
456void WriteFrameState(
457    const ExplodedFrameState& state, SerializeObject* obj, bool is_top) {
458  // WARNING: This data may be persisted for later use. As such, care must be
459  // taken when changing the serialized format. If a new field needs to be
460  // written, only adding at the end will make it easier to deal with loading
461  // older versions. Similarly, this should NOT save fields with sensitive
462  // data, such as password fields.
463
464  WriteString(state.url_string, obj);
465  WriteString(state.original_url_string, obj);
466  WriteString(state.target, obj);
467  WriteString(state.parent, obj);
468  WriteString(state.title, obj);
469  WriteString(state.alternate_title, obj);
470  WriteReal(state.visited_time, obj);
471  WriteInteger(state.scroll_offset.x(), obj);
472  WriteInteger(state.scroll_offset.y(), obj);
473  WriteBoolean(state.is_target_item, obj);
474  WriteInteger(state.visit_count, obj);
475  WriteString(state.referrer, obj);
476
477  WriteStringVector(state.document_state, obj);
478
479  WriteReal(state.page_scale_factor, obj);
480  WriteInteger64(state.item_sequence_number, obj);
481  WriteInteger64(state.document_sequence_number, obj);
482
483  bool has_state_object = !state.state_object.is_null();
484  WriteBoolean(has_state_object, obj);
485  if (has_state_object)
486    WriteString(state.state_object, obj);
487
488  WriteHttpBody(state.http_body, obj);
489
490  // NOTE: It is a quirk of the format that we still have to write the
491  // http_content_type field when the HTTP body is null.  That's why this code
492  // is here instead of inside WriteHttpBody.
493  WriteString(state.http_body.http_content_type, obj);
494
495  // Subitems
496  const std::vector<ExplodedFrameState>& children = state.children;
497  WriteAndValidateVectorSize(children, obj);
498  for (size_t i = 0; i < children.size(); ++i)
499    WriteFrameState(children[i], obj, false);
500}
501
502void ReadFrameState(SerializeObject* obj, bool is_top,
503                    ExplodedFrameState* state) {
504  if (obj->version < 14 && !is_top)
505    ConsumeInteger(obj);  // Skip over redundant version field.
506
507  state->url_string = ReadString(obj);
508  state->original_url_string = ReadString(obj);
509  state->target = ReadString(obj);
510  state->parent = ReadString(obj);
511  state->title = ReadString(obj);
512  state->alternate_title = ReadString(obj);
513  state->visited_time = ReadReal(obj);
514
515  int x = ReadInteger(obj);
516  int y = ReadInteger(obj);
517  state->scroll_offset = gfx::Point(x, y);
518
519  state->is_target_item = ReadBoolean(obj);
520  state->visit_count = ReadInteger(obj);
521  state->referrer = ReadString(obj);
522
523  ReadStringVector(obj, &state->document_state);
524
525  state->page_scale_factor = ReadReal(obj);
526  state->item_sequence_number = ReadInteger64(obj);
527  state->document_sequence_number = ReadInteger64(obj);
528
529  bool has_state_object = ReadBoolean(obj);
530  if (has_state_object)
531    state->state_object = ReadString(obj);
532
533  ReadHttpBody(obj, &state->http_body);
534
535  // NOTE: It is a quirk of the format that we still have to read the
536  // http_content_type field when the HTTP body is null.  That's why this code
537  // is here instead of inside ReadHttpBody.
538  state->http_body.http_content_type = ReadString(obj);
539
540  if (obj->version < 14)
541    ConsumeString(obj);  // Skip unused referrer string.
542
543#if defined(OS_ANDROID)
544  if (obj->version == 11) {
545    // Now-unused values that shipped in this version of Chrome for Android when
546    // it was on a private branch.
547    ReadReal(obj);
548    ReadBoolean(obj);
549
550    // In this version, page_scale_factor included device_scale_factor and
551    // scroll offsets were premultiplied by pageScaleFactor.
552    if (state->page_scale_factor) {
553      float device_scale_factor = g_device_scale_factor_for_testing;
554      if (!device_scale_factor) {
555        device_scale_factor =
556            gfx::Screen::GetNativeScreen()->GetPrimaryDisplay().
557                device_scale_factor();
558      }
559      state->scroll_offset =
560          gfx::Point(state->scroll_offset.x() / state->page_scale_factor,
561                     state->scroll_offset.y() / state->page_scale_factor);
562      state->page_scale_factor /= device_scale_factor;
563    }
564  }
565#endif
566
567  // Subitems
568  size_t num_children =
569      ReadAndValidateVectorSize(obj, sizeof(ExplodedFrameState));
570  state->children.resize(num_children);
571  for (size_t i = 0; i < num_children; ++i)
572    ReadFrameState(obj, false, &state->children[i]);
573}
574
575void WritePageState(const ExplodedPageState& state, SerializeObject* obj) {
576  WriteInteger(obj->version, obj);
577  WriteStringVector(state.referenced_files, obj);
578  WriteFrameState(state.top, obj, true);
579}
580
581void ReadPageState(SerializeObject* obj, ExplodedPageState* state) {
582  obj->version = ReadInteger(obj);
583
584  if (obj->version == -1) {
585    GURL url = ReadGURL(obj);
586    // NOTE: GURL::possibly_invalid_spec() always returns valid UTF-8.
587    state->top.url_string = state->top.original_url_string =
588        base::NullableString16(UTF8ToUTF16(url.possibly_invalid_spec()), false);
589    return;
590  }
591
592  if (obj->version > kCurrentVersion || obj->version < kMinVersion) {
593    obj->parse_error = true;
594    return;
595  }
596
597  if (obj->version >= 14)
598    ReadStringVector(obj, &state->referenced_files);
599
600  ReadFrameState(obj, true, &state->top);
601
602  if (obj->version < 14)
603    RecursivelyAppendReferencedFiles(state->top, &state->referenced_files);
604
605  // De-dupe
606  state->referenced_files.erase(
607      std::unique(state->referenced_files.begin(),
608                  state->referenced_files.end()),
609      state->referenced_files.end());
610}
611
612}  // namespace
613
614ExplodedHttpBodyElement::ExplodedHttpBodyElement()
615    : type(WebKit::WebHTTPBody::Element::TypeData),
616      file_start(0),
617      file_length(-1),
618      file_modification_time(std::numeric_limits<double>::quiet_NaN()) {
619}
620
621ExplodedHttpBodyElement::~ExplodedHttpBodyElement() {
622}
623
624ExplodedHttpBody::ExplodedHttpBody()
625    : identifier(0),
626      contains_passwords(false),
627      is_null(true) {
628}
629
630ExplodedHttpBody::~ExplodedHttpBody() {
631}
632
633ExplodedFrameState::ExplodedFrameState()
634    : item_sequence_number(0),
635      document_sequence_number(0),
636      visit_count(0),
637      visited_time(0.0),
638      page_scale_factor(0.0),
639      is_target_item(false) {
640}
641
642ExplodedFrameState::~ExplodedFrameState() {
643}
644
645ExplodedPageState::ExplodedPageState() {
646}
647
648ExplodedPageState::~ExplodedPageState() {
649}
650
651bool DecodePageState(const std::string& encoded, ExplodedPageState* exploded) {
652  *exploded = ExplodedPageState();
653
654  if (encoded.empty())
655    return true;
656
657  SerializeObject obj(encoded.data(), static_cast<int>(encoded.size()));
658  ReadPageState(&obj, exploded);
659  return !obj.parse_error;
660}
661
662bool EncodePageState(const ExplodedPageState& exploded, std::string* encoded) {
663  SerializeObject obj;
664  obj.version = kCurrentVersion;
665  WritePageState(exploded, &obj);
666  *encoded = obj.GetAsString();
667  return true;
668}
669
670#if defined(OS_ANDROID)
671bool DecodePageStateWithDeviceScaleFactorForTesting(
672    const std::string& encoded,
673    float device_scale_factor,
674    ExplodedPageState* exploded) {
675  g_device_scale_factor_for_testing = device_scale_factor;
676  bool rv = DecodePageState(encoded, exploded);
677  g_device_scale_factor_for_testing = 0.0;
678  return rv;
679}
680#endif
681
682}  // namespace content
683