1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "content/common/page_state_serialization.h"
6
7#include <algorithm>
8#include <limits>
9
10#include "base/pickle.h"
11#include "base/strings/string_number_conversions.h"
12#include "base/strings/string_util.h"
13#include "base/strings/utf_string_conversions.h"
14#include "ui/gfx/screen.h"
15
16namespace content {
17namespace {
18
19#if defined(OS_ANDROID)
20float g_device_scale_factor_for_testing = 0.0;
21#endif
22
23//-----------------------------------------------------------------------------
24
25void AppendDataToHttpBody(ExplodedHttpBody* http_body, const char* data,
26                          int data_length) {
27  ExplodedHttpBodyElement element;
28  element.type = blink::WebHTTPBody::Element::TypeData;
29  element.data.assign(data, data_length);
30  http_body->elements.push_back(element);
31}
32
33void AppendFileRangeToHttpBody(ExplodedHttpBody* http_body,
34                               const base::NullableString16& file_path,
35                               int file_start,
36                               int file_length,
37                               double file_modification_time) {
38  ExplodedHttpBodyElement element;
39  element.type = blink::WebHTTPBody::Element::TypeFile;
40  element.file_path = file_path;
41  element.file_start = file_start;
42  element.file_length = file_length;
43  element.file_modification_time = file_modification_time;
44  http_body->elements.push_back(element);
45}
46
47void AppendURLRangeToHttpBody(ExplodedHttpBody* http_body,
48                              const GURL& url,
49                              int file_start,
50                              int file_length,
51                              double file_modification_time) {
52  ExplodedHttpBodyElement element;
53  element.type = blink::WebHTTPBody::Element::TypeFileSystemURL;
54  element.filesystem_url = url;
55  element.file_start = file_start;
56  element.file_length = file_length;
57  element.file_modification_time = file_modification_time;
58  http_body->elements.push_back(element);
59}
60
61void AppendBlobToHttpBody(ExplodedHttpBody* http_body,
62                          const std::string& uuid) {
63  ExplodedHttpBodyElement element;
64  element.type = blink::WebHTTPBody::Element::TypeBlob;
65  element.blob_uuid = uuid;
66  http_body->elements.push_back(element);
67}
68
69//----------------------------------------------------------------------------
70
71void AppendReferencedFilesFromHttpBody(
72    const std::vector<ExplodedHttpBodyElement>& elements,
73    std::vector<base::NullableString16>* referenced_files) {
74  for (size_t i = 0; i < elements.size(); ++i) {
75    if (elements[i].type == blink::WebHTTPBody::Element::TypeFile)
76      referenced_files->push_back(elements[i].file_path);
77  }
78}
79
80bool AppendReferencedFilesFromDocumentState(
81    const std::vector<base::NullableString16>& document_state,
82    std::vector<base::NullableString16>* referenced_files) {
83  if (document_state.empty())
84    return true;
85
86  // This algorithm is adapted from Blink's core/html/FormController.cpp code.
87  // We only care about how that code worked when this code snapshot was taken
88  // as this code is only needed for backwards compat.
89  //
90  // For reference, see FormController::formStatesFromStateVector at:
91  // http://src.chromium.org/viewvc/blink/trunk/Source/core/html/FormController.cpp?pathrev=152274
92
93  size_t index = 0;
94
95  if (document_state.size() < 3)
96    return false;
97
98  index++;  // Skip over magic signature.
99  index++;  // Skip over form key.
100
101  size_t item_count;
102  if (!base::StringToSizeT(document_state[index++].string(), &item_count))
103    return false;
104
105  while (item_count--) {
106    if (index + 1 >= document_state.size())
107      return false;
108
109    index++;  // Skip over name.
110    const base::NullableString16& type = document_state[index++];
111
112    if (index >= document_state.size())
113      return false;
114
115    size_t value_size;
116    if (!base::StringToSizeT(document_state[index++].string(), &value_size))
117      return false;
118
119    if (index + value_size > document_state.size() ||
120        index + value_size < index)  // Check for overflow.
121      return false;
122
123    if (EqualsASCII(type.string(), "file")) {
124      if (value_size != 2)
125        return false;
126
127      referenced_files->push_back(document_state[index++]);
128      index++;  // Skip over display name.
129    } else {
130      index += value_size;
131    }
132  }
133
134  return true;
135}
136
137bool RecursivelyAppendReferencedFiles(
138    const ExplodedFrameState& frame_state,
139    std::vector<base::NullableString16>* referenced_files) {
140  if (!frame_state.http_body.is_null) {
141    AppendReferencedFilesFromHttpBody(frame_state.http_body.elements,
142                                      referenced_files);
143  }
144
145  if (!AppendReferencedFilesFromDocumentState(frame_state.document_state,
146                                              referenced_files))
147    return false;
148
149  for (size_t i = 0; i < frame_state.children.size(); ++i) {
150    if (!RecursivelyAppendReferencedFiles(frame_state.children[i],
151                                          referenced_files))
152      return false;
153  }
154
155  return true;
156}
157
158//----------------------------------------------------------------------------
159
160struct SerializeObject {
161  SerializeObject()
162      : version(0),
163        parse_error(false) {
164  }
165
166  SerializeObject(const char* data, int len)
167      : pickle(data, len),
168        version(0),
169        parse_error(false) {
170    iter = PickleIterator(pickle);
171  }
172
173  std::string GetAsString() {
174    return std::string(static_cast<const char*>(pickle.data()), pickle.size());
175  }
176
177  Pickle pickle;
178  PickleIterator iter;
179  int version;
180  bool parse_error;
181};
182
183// Version ID of serialized format.
184// 11: Min version
185// 12: Adds support for contains_passwords in HTTP body
186// 13: Adds support for URL (FileSystem URL)
187// 14: Adds list of referenced files, version written only for first item.
188// 15: Removes a bunch of values we defined but never used.
189// 16: Switched from blob urls to blob uuids.
190// 17: Add a target frame id number.
191//
192// NOTE: If the version is -1, then the pickle contains only a URL string.
193// See ReadPageState.
194//
195const int kMinVersion = 11;
196const int kCurrentVersion = 17;
197
198// A bunch of convenience functions to read/write to SerializeObjects.  The
199// de-serializers assume the input data will be in the correct format and fall
200// back to returning safe defaults when not.
201
202void WriteData(const void* data, int length, SerializeObject* obj) {
203  obj->pickle.WriteData(static_cast<const char*>(data), length);
204}
205
206void ReadData(SerializeObject* obj, const void** data, int* length) {
207  const char* tmp;
208  if (obj->pickle.ReadData(&obj->iter, &tmp, length)) {
209    *data = tmp;
210  } else {
211    obj->parse_error = true;
212    *data = NULL;
213    *length = 0;
214  }
215}
216
217void WriteInteger(int data, SerializeObject* obj) {
218  obj->pickle.WriteInt(data);
219}
220
221int ReadInteger(SerializeObject* obj) {
222  int tmp;
223  if (obj->pickle.ReadInt(&obj->iter, &tmp))
224    return tmp;
225  obj->parse_error = true;
226  return 0;
227}
228
229void ConsumeInteger(SerializeObject* obj) {
230  int unused ALLOW_UNUSED = ReadInteger(obj);
231}
232
233void WriteInteger64(int64 data, SerializeObject* obj) {
234  obj->pickle.WriteInt64(data);
235}
236
237int64 ReadInteger64(SerializeObject* obj) {
238  int64 tmp = 0;
239  if (obj->pickle.ReadInt64(&obj->iter, &tmp))
240    return tmp;
241  obj->parse_error = true;
242  return 0;
243}
244
245void WriteReal(double data, SerializeObject* obj) {
246  WriteData(&data, sizeof(double), obj);
247}
248
249double ReadReal(SerializeObject* obj) {
250  const void* tmp = NULL;
251  int length = 0;
252  double value = 0.0;
253  ReadData(obj, &tmp, &length);
254  if (length == static_cast<int>(sizeof(double))) {
255    // Use memcpy, as tmp may not be correctly aligned.
256    memcpy(&value, tmp, sizeof(double));
257  } else {
258    obj->parse_error = true;
259  }
260  return value;
261}
262
263void ConsumeReal(SerializeObject* obj) {
264  double unused ALLOW_UNUSED = ReadReal(obj);
265}
266
267void WriteBoolean(bool data, SerializeObject* obj) {
268  obj->pickle.WriteInt(data ? 1 : 0);
269}
270
271bool ReadBoolean(SerializeObject* obj) {
272  bool tmp;
273  if (obj->pickle.ReadBool(&obj->iter, &tmp))
274    return tmp;
275  obj->parse_error = true;
276  return false;
277}
278
279void ConsumeBoolean(SerializeObject* obj) {
280  bool unused ALLOW_UNUSED = ReadBoolean(obj);
281}
282
283void WriteGURL(const GURL& url, SerializeObject* obj) {
284  obj->pickle.WriteString(url.possibly_invalid_spec());
285}
286
287GURL ReadGURL(SerializeObject* obj) {
288  std::string spec;
289  if (obj->pickle.ReadString(&obj->iter, &spec))
290    return GURL(spec);
291  obj->parse_error = true;
292  return GURL();
293}
294
295void WriteStdString(const std::string& s, SerializeObject* obj) {
296  obj->pickle.WriteString(s);
297}
298
299std::string ReadStdString(SerializeObject* obj) {
300  std::string s;
301  if (obj->pickle.ReadString(&obj->iter, &s))
302    return s;
303  obj->parse_error = true;
304  return std::string();
305}
306
307// WriteString pickles the NullableString16 as <int length><char16* data>.
308// If length == -1, then the NullableString16 itself is null.  Otherwise the
309// length is the number of char16 (not bytes) in the NullableString16.
310void WriteString(const base::NullableString16& str, SerializeObject* obj) {
311  if (str.is_null()) {
312    obj->pickle.WriteInt(-1);
313  } else {
314    const char16* data = str.string().data();
315    size_t length_in_bytes = str.string().length() * sizeof(char16);
316
317    CHECK_LT(length_in_bytes,
318             static_cast<size_t>(std::numeric_limits<int>::max()));
319    obj->pickle.WriteInt(length_in_bytes);
320    obj->pickle.WriteBytes(data, length_in_bytes);
321  }
322}
323
324// This reads a serialized NullableString16 from obj. If a string can't be
325// read, NULL is returned.
326const char16* ReadStringNoCopy(SerializeObject* obj, int* num_chars) {
327  int length_in_bytes;
328  if (!obj->pickle.ReadInt(&obj->iter, &length_in_bytes)) {
329    obj->parse_error = true;
330    return NULL;
331  }
332
333  if (length_in_bytes < 0)
334    return NULL;
335
336  const char* data;
337  if (!obj->pickle.ReadBytes(&obj->iter, &data, length_in_bytes)) {
338    obj->parse_error = true;
339    return NULL;
340  }
341
342  if (num_chars)
343    *num_chars = length_in_bytes / sizeof(char16);
344  return reinterpret_cast<const char16*>(data);
345}
346
347base::NullableString16 ReadString(SerializeObject* obj) {
348  int num_chars;
349  const char16* chars = ReadStringNoCopy(obj, &num_chars);
350  return chars ?
351      base::NullableString16(base::string16(chars, num_chars), false) :
352      base::NullableString16();
353}
354
355void ConsumeString(SerializeObject* obj) {
356  const char16* unused ALLOW_UNUSED = ReadStringNoCopy(obj, NULL);
357}
358
359template <typename T>
360void WriteAndValidateVectorSize(const std::vector<T>& v, SerializeObject* obj) {
361  CHECK_LT(v.size(), std::numeric_limits<int>::max() / sizeof(T));
362  WriteInteger(static_cast<int>(v.size()), obj);
363}
364
365size_t ReadAndValidateVectorSize(SerializeObject* obj, size_t element_size) {
366  size_t num_elements = static_cast<size_t>(ReadInteger(obj));
367
368  // Ensure that resizing a vector to size num_elements makes sense.
369  if (std::numeric_limits<int>::max() / element_size <= num_elements) {
370    obj->parse_error = true;
371    return 0;
372  }
373
374  // Ensure that it is plausible for the pickle to contain num_elements worth
375  // of data.
376  if (obj->pickle.payload_size() <= num_elements) {
377    obj->parse_error = true;
378    return 0;
379  }
380
381  return num_elements;
382}
383
384// Writes a Vector of strings into a SerializeObject for serialization.
385void WriteStringVector(
386    const std::vector<base::NullableString16>& data, SerializeObject* obj) {
387  WriteAndValidateVectorSize(data, obj);
388  for (size_t i = 0; i < data.size(); ++i) {
389    WriteString(data[i], obj);
390  }
391}
392
393void ReadStringVector(SerializeObject* obj,
394                      std::vector<base::NullableString16>* result) {
395  size_t num_elements =
396      ReadAndValidateVectorSize(obj, sizeof(base::NullableString16));
397
398  result->resize(num_elements);
399  for (size_t i = 0; i < num_elements; ++i)
400    (*result)[i] = ReadString(obj);
401}
402
403// Writes an ExplodedHttpBody object into a SerializeObject for serialization.
404void WriteHttpBody(const ExplodedHttpBody& http_body, SerializeObject* obj) {
405  WriteBoolean(!http_body.is_null, obj);
406
407  if (http_body.is_null)
408    return;
409
410  WriteAndValidateVectorSize(http_body.elements, obj);
411  for (size_t i = 0; i < http_body.elements.size(); ++i) {
412    const ExplodedHttpBodyElement& element = http_body.elements[i];
413    WriteInteger(element.type, obj);
414    if (element.type == blink::WebHTTPBody::Element::TypeData) {
415      WriteData(element.data.data(), static_cast<int>(element.data.size()),
416                obj);
417    } else if (element.type == blink::WebHTTPBody::Element::TypeFile) {
418      WriteString(element.file_path, obj);
419      WriteInteger64(element.file_start, obj);
420      WriteInteger64(element.file_length, obj);
421      WriteReal(element.file_modification_time, obj);
422    } else if (element.type ==
423               blink::WebHTTPBody::Element::TypeFileSystemURL) {
424      WriteGURL(element.filesystem_url, obj);
425      WriteInteger64(element.file_start, obj);
426      WriteInteger64(element.file_length, obj);
427      WriteReal(element.file_modification_time, obj);
428    } else {
429      DCHECK(element.type == blink::WebHTTPBody::Element::TypeBlob);
430      WriteStdString(element.blob_uuid, obj);
431    }
432  }
433  WriteInteger64(http_body.identifier, obj);
434  WriteBoolean(http_body.contains_passwords, obj);
435}
436
437void ReadHttpBody(SerializeObject* obj, ExplodedHttpBody* http_body) {
438  // An initial boolean indicates if we have an HTTP body.
439  if (!ReadBoolean(obj))
440    return;
441  http_body->is_null = false;
442
443  int num_elements = ReadInteger(obj);
444
445  for (int i = 0; i < num_elements; ++i) {
446    int type = ReadInteger(obj);
447    if (type == blink::WebHTTPBody::Element::TypeData) {
448      const void* data;
449      int length = -1;
450      ReadData(obj, &data, &length);
451      if (length >= 0) {
452        AppendDataToHttpBody(http_body, static_cast<const char*>(data),
453                             length);
454      }
455    } else if (type == blink::WebHTTPBody::Element::TypeFile) {
456      base::NullableString16 file_path = ReadString(obj);
457      int64 file_start = ReadInteger64(obj);
458      int64 file_length = ReadInteger64(obj);
459      double file_modification_time = ReadReal(obj);
460      AppendFileRangeToHttpBody(http_body, file_path, file_start, file_length,
461                                file_modification_time);
462    } else if (type == blink::WebHTTPBody::Element::TypeFileSystemURL) {
463      GURL url = ReadGURL(obj);
464      int64 file_start = ReadInteger64(obj);
465      int64 file_length = ReadInteger64(obj);
466      double file_modification_time = ReadReal(obj);
467      AppendURLRangeToHttpBody(http_body, url, file_start, file_length,
468                               file_modification_time);
469    } else if (type == blink::WebHTTPBody::Element::TypeBlob) {
470      if (obj->version >= 16) {
471        std::string blob_uuid = ReadStdString(obj);
472        AppendBlobToHttpBody(http_body, blob_uuid);
473      } else {
474        ReadGURL(obj); // Skip the obsolete blob url value.
475      }
476    }
477  }
478  http_body->identifier = ReadInteger64(obj);
479
480  if (obj->version >= 12)
481    http_body->contains_passwords = ReadBoolean(obj);
482}
483
484// Writes the ExplodedFrameState data into the SerializeObject object for
485// serialization.
486void WriteFrameState(
487    const ExplodedFrameState& state, SerializeObject* obj, bool is_top) {
488  // WARNING: This data may be persisted for later use. As such, care must be
489  // taken when changing the serialized format. If a new field needs to be
490  // written, only adding at the end will make it easier to deal with loading
491  // older versions. Similarly, this should NOT save fields with sensitive
492  // data, such as password fields.
493
494  WriteString(state.url_string, obj);
495  WriteString(state.original_url_string, obj);
496  WriteString(state.target, obj);
497  WriteInteger(state.scroll_offset.x(), obj);
498  WriteInteger(state.scroll_offset.y(), obj);
499  WriteString(state.referrer, obj);
500
501  WriteStringVector(state.document_state, obj);
502
503  WriteReal(state.page_scale_factor, obj);
504  WriteInteger64(state.item_sequence_number, obj);
505  WriteInteger64(state.document_sequence_number, obj);
506  WriteInteger64(state.target_frame_id, obj);
507
508  bool has_state_object = !state.state_object.is_null();
509  WriteBoolean(has_state_object, obj);
510  if (has_state_object)
511    WriteString(state.state_object, obj);
512
513  WriteHttpBody(state.http_body, obj);
514
515  // NOTE: It is a quirk of the format that we still have to write the
516  // http_content_type field when the HTTP body is null.  That's why this code
517  // is here instead of inside WriteHttpBody.
518  WriteString(state.http_body.http_content_type, obj);
519
520  // Subitems
521  const std::vector<ExplodedFrameState>& children = state.children;
522  WriteAndValidateVectorSize(children, obj);
523  for (size_t i = 0; i < children.size(); ++i)
524    WriteFrameState(children[i], obj, false);
525}
526
527void ReadFrameState(SerializeObject* obj, bool is_top,
528                    ExplodedFrameState* state) {
529  if (obj->version < 14 && !is_top)
530    ConsumeInteger(obj);  // Skip over redundant version field.
531
532  state->url_string = ReadString(obj);
533  state->original_url_string = ReadString(obj);
534  state->target = ReadString(obj);
535  if (obj->version < 15) {
536    ConsumeString(obj);  // Skip obsolete parent field.
537    ConsumeString(obj);  // Skip obsolete title field.
538    ConsumeString(obj);  // Skip obsolete alternate title field.
539    ConsumeReal(obj);    // Skip obsolete visited time field.
540  }
541
542  int x = ReadInteger(obj);
543  int y = ReadInteger(obj);
544  state->scroll_offset = gfx::Point(x, y);
545
546  if (obj->version < 15) {
547    ConsumeBoolean(obj);  // Skip obsolete target item flag.
548    ConsumeInteger(obj);  // Skip obsolete visit count field.
549  }
550  state->referrer = ReadString(obj);
551
552  ReadStringVector(obj, &state->document_state);
553
554  state->page_scale_factor = ReadReal(obj);
555  state->item_sequence_number = ReadInteger64(obj);
556  state->document_sequence_number = ReadInteger64(obj);
557  if (obj->version >= 17)
558    state->target_frame_id = ReadInteger64(obj);
559
560  bool has_state_object = ReadBoolean(obj);
561  if (has_state_object)
562    state->state_object = ReadString(obj);
563
564  ReadHttpBody(obj, &state->http_body);
565
566  // NOTE: It is a quirk of the format that we still have to read the
567  // http_content_type field when the HTTP body is null.  That's why this code
568  // is here instead of inside ReadHttpBody.
569  state->http_body.http_content_type = ReadString(obj);
570
571  if (obj->version < 14)
572    ConsumeString(obj);  // Skip unused referrer string.
573
574#if defined(OS_ANDROID)
575  if (obj->version == 11) {
576    // Now-unused values that shipped in this version of Chrome for Android when
577    // it was on a private branch.
578    ReadReal(obj);
579    ReadBoolean(obj);
580
581    // In this version, page_scale_factor included device_scale_factor and
582    // scroll offsets were premultiplied by pageScaleFactor.
583    if (state->page_scale_factor) {
584      float device_scale_factor = g_device_scale_factor_for_testing;
585      if (!device_scale_factor) {
586        device_scale_factor =
587            gfx::Screen::GetNativeScreen()->GetPrimaryDisplay().
588                device_scale_factor();
589      }
590      state->scroll_offset =
591          gfx::Point(state->scroll_offset.x() / state->page_scale_factor,
592                     state->scroll_offset.y() / state->page_scale_factor);
593      state->page_scale_factor /= device_scale_factor;
594    }
595  }
596#endif
597
598  // Subitems
599  size_t num_children =
600      ReadAndValidateVectorSize(obj, sizeof(ExplodedFrameState));
601  state->children.resize(num_children);
602  for (size_t i = 0; i < num_children; ++i)
603    ReadFrameState(obj, false, &state->children[i]);
604}
605
606void WritePageState(const ExplodedPageState& state, SerializeObject* obj) {
607  WriteInteger(obj->version, obj);
608  WriteStringVector(state.referenced_files, obj);
609  WriteFrameState(state.top, obj, true);
610}
611
612void ReadPageState(SerializeObject* obj, ExplodedPageState* state) {
613  obj->version = ReadInteger(obj);
614
615  if (obj->version == -1) {
616    GURL url = ReadGURL(obj);
617    // NOTE: GURL::possibly_invalid_spec() always returns valid UTF-8.
618    state->top.url_string = state->top.original_url_string =
619        base::NullableString16(UTF8ToUTF16(url.possibly_invalid_spec()), false);
620    return;
621  }
622
623  if (obj->version > kCurrentVersion || obj->version < kMinVersion) {
624    obj->parse_error = true;
625    return;
626  }
627
628  if (obj->version >= 14)
629    ReadStringVector(obj, &state->referenced_files);
630
631  ReadFrameState(obj, true, &state->top);
632
633  if (obj->version < 14)
634    RecursivelyAppendReferencedFiles(state->top, &state->referenced_files);
635
636  // De-dupe
637  state->referenced_files.erase(
638      std::unique(state->referenced_files.begin(),
639                  state->referenced_files.end()),
640      state->referenced_files.end());
641}
642
643}  // namespace
644
645ExplodedHttpBodyElement::ExplodedHttpBodyElement()
646    : type(blink::WebHTTPBody::Element::TypeData),
647      file_start(0),
648      file_length(-1),
649      file_modification_time(std::numeric_limits<double>::quiet_NaN()) {
650}
651
652ExplodedHttpBodyElement::~ExplodedHttpBodyElement() {
653}
654
655ExplodedHttpBody::ExplodedHttpBody()
656    : identifier(0),
657      contains_passwords(false),
658      is_null(true) {
659}
660
661ExplodedHttpBody::~ExplodedHttpBody() {
662}
663
664ExplodedFrameState::ExplodedFrameState()
665    : item_sequence_number(0),
666      document_sequence_number(0),
667      target_frame_id(0),
668      page_scale_factor(0.0) {
669}
670
671ExplodedFrameState::~ExplodedFrameState() {
672}
673
674ExplodedPageState::ExplodedPageState() {
675}
676
677ExplodedPageState::~ExplodedPageState() {
678}
679
680bool DecodePageState(const std::string& encoded, ExplodedPageState* exploded) {
681  *exploded = ExplodedPageState();
682
683  if (encoded.empty())
684    return true;
685
686  SerializeObject obj(encoded.data(), static_cast<int>(encoded.size()));
687  ReadPageState(&obj, exploded);
688  return !obj.parse_error;
689}
690
691bool EncodePageState(const ExplodedPageState& exploded, std::string* encoded) {
692  SerializeObject obj;
693  obj.version = kCurrentVersion;
694  WritePageState(exploded, &obj);
695  *encoded = obj.GetAsString();
696  return true;
697}
698
699#if defined(OS_ANDROID)
700bool DecodePageStateWithDeviceScaleFactorForTesting(
701    const std::string& encoded,
702    float device_scale_factor,
703    ExplodedPageState* exploded) {
704  g_device_scale_factor_for_testing = device_scale_factor;
705  bool rv = DecodePageState(encoded, exploded);
706  g_device_scale_factor_for_testing = 0.0;
707  return rv;
708}
709#endif
710
711}  // namespace content
712