1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "webkit/glue/glue_serialize.h"
6
7#include <string>
8
9#include "base/pickle.h"
10#include "base/utf_string_conversions.h"
11#include "googleurl/src/gurl.h"
12#include "third_party/WebKit/Source/WebKit/chromium/public/WebData.h"
13#include "third_party/WebKit/Source/WebKit/chromium/public/WebHistoryItem.h"
14#include "third_party/WebKit/Source/WebKit/chromium/public/WebHTTPBody.h"
15#include "third_party/WebKit/Source/WebKit/chromium/public/WebPoint.h"
16#include "third_party/WebKit/Source/WebKit/chromium/public/WebSerializedScriptValue.h"
17#include "third_party/WebKit/Source/WebKit/chromium/public/WebString.h"
18#include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"
19#include "third_party/WebKit/Source/WebKit/chromium/public/WebVector.h"
20#include "webkit/glue/webkit_glue.h"
21
22using WebKit::WebData;
23using WebKit::WebHistoryItem;
24using WebKit::WebHTTPBody;
25using WebKit::WebPoint;
26using WebKit::WebSerializedScriptValue;
27using WebKit::WebString;
28using WebKit::WebUChar;
29using WebKit::WebVector;
30
31namespace webkit_glue {
32
33struct SerializeObject {
34  SerializeObject() : iter(NULL) {}
35  SerializeObject(const char* data, int len) : pickle(data, len), iter(NULL) {}
36
37  std::string GetAsString() {
38    return std::string(static_cast<const char*>(pickle.data()), pickle.size());
39  }
40
41  Pickle pickle;
42  mutable void* iter;
43  mutable int version;
44};
45
46// TODO(mpcomplete): obsolete versions 1 and 2 after 1/1/2008.
47// Version ID used in reading/writing history items.
48// 1: Initial revision.
49// 2: Added case for NULL string versus "". Version 2 code can read Version 1
50//    data, but not vice versa.
51// 3: Version 2 was broken, it stored number of WebUChars, not number of bytes.
52//    This version checks and reads v1 and v2 correctly.
53// 4: Adds support for storing FormData::identifier().
54// 5: Adds support for empty FormData
55// 6: Adds support for documentSequenceNumbers
56// 7: Adds support for stateObject
57// 8: Adds support for file range and modification time
58// 9: Adds support for itemSequenceNumbers
59// 10: Adds support for blob
60// Should be const, but unit tests may modify it.
61//
62// NOTE: If the version is -1, then the pickle contains only a URL string.
63// See CreateHistoryStateForURL.
64//
65int kVersion = 10;
66
67// A bunch of convenience functions to read/write to SerializeObjects.
68// The serializers assume the input data is in the correct format and so does
69// no error checking.
70inline void WriteData(const void* data, int length, SerializeObject* obj) {
71  obj->pickle.WriteData(static_cast<const char*>(data), length);
72}
73
74inline void ReadData(const SerializeObject* obj, const void** data,
75                     int* length) {
76  const char* tmp = NULL;
77  obj->pickle.ReadData(&obj->iter, &tmp, length);
78  *data = tmp;
79}
80
81inline bool ReadBytes(const SerializeObject* obj, const void** data,
82                     int length) {
83  const char *tmp;
84  if (!obj->pickle.ReadBytes(&obj->iter, &tmp, length))
85    return false;
86  *data = tmp;
87  return true;
88}
89
90inline void WriteInteger(int data, SerializeObject* obj) {
91  obj->pickle.WriteInt(data);
92}
93
94inline int ReadInteger(const SerializeObject* obj) {
95  int tmp = 0;
96  obj->pickle.ReadInt(&obj->iter, &tmp);
97  return tmp;
98}
99
100inline void WriteInteger64(int64 data, SerializeObject* obj) {
101  obj->pickle.WriteInt64(data);
102}
103
104inline int64 ReadInteger64(const SerializeObject* obj) {
105  int64 tmp = 0;
106  obj->pickle.ReadInt64(&obj->iter, &tmp);
107  return tmp;
108}
109
110inline void WriteReal(double data, SerializeObject* obj) {
111  WriteData(&data, sizeof(double), obj);
112}
113
114inline double ReadReal(const SerializeObject* obj) {
115  const void* tmp = NULL;
116  int length = 0;
117  ReadData(obj, &tmp, &length);
118  if (tmp && length > 0 && length >= static_cast<int>(sizeof(0.0)))
119    return *static_cast<const double*>(tmp);
120  else
121    return 0.0;
122}
123
124inline void WriteBoolean(bool data, SerializeObject* obj) {
125  obj->pickle.WriteInt(data ? 1 : 0);
126}
127
128inline bool ReadBoolean(const SerializeObject* obj) {
129  bool tmp = false;
130  obj->pickle.ReadBool(&obj->iter, &tmp);
131  return tmp;
132}
133
134inline void WriteGURL(const GURL& url, SerializeObject* obj) {
135  obj->pickle.WriteString(url.possibly_invalid_spec());
136}
137
138inline GURL ReadGURL(const SerializeObject* obj) {
139  std::string spec;
140  obj->pickle.ReadString(&obj->iter, &spec);
141  return GURL(spec);
142}
143
144// Read/WriteString pickle the WebString as <int length><WebUChar* data>.
145// If length == -1, then the WebString itself is NULL (WebString()).
146// Otherwise the length is the number of WebUChars (not bytes) in the WebString.
147inline void WriteString(const WebString& str, SerializeObject* obj) {
148  switch (kVersion) {
149    case 1:
150      // Version 1 writes <length in bytes><string data>.
151      // It saves WebString() and "" as "".
152      obj->pickle.WriteInt(str.length() * sizeof(WebUChar));
153      obj->pickle.WriteBytes(str.data(), str.length() * sizeof(WebUChar));
154      break;
155    case 2:
156      // Version 2 writes <length in WebUChar><string data>.
157      // It uses -1 in the length field to mean WebString().
158      if (str.isNull()) {
159        obj->pickle.WriteInt(-1);
160      } else {
161        obj->pickle.WriteInt(str.length());
162        obj->pickle.WriteBytes(str.data(),
163                               str.length() * sizeof(WebUChar));
164      }
165      break;
166    default:
167      // Version 3+ writes <length in bytes><string data>.
168      // It uses -1 in the length field to mean WebString().
169      if (str.isNull()) {
170        obj->pickle.WriteInt(-1);
171      } else {
172        obj->pickle.WriteInt(str.length() * sizeof(WebUChar));
173        obj->pickle.WriteBytes(str.data(),
174                               str.length() * sizeof(WebUChar));
175      }
176      break;
177  }
178}
179
180// This reads a serialized WebString from obj. If a string can't be read,
181// WebString() is returned.
182inline WebString ReadString(const SerializeObject* obj) {
183  int length;
184
185  // Versions 1, 2, and 3 all start with an integer.
186  if (!obj->pickle.ReadInt(&obj->iter, &length))
187    return WebString();
188
189  // Starting with version 2, -1 means WebString().
190  if (length == -1)
191    return WebString();
192
193  // In version 2, the length field was the length in WebUChars.
194  // In version 1 and 3 it is the length in bytes.
195  int bytes = length;
196  if (obj->version == 2)
197    bytes *= sizeof(WebUChar);
198
199  const void* data;
200  if (!ReadBytes(obj, &data, bytes))
201    return WebString();
202  return WebString(static_cast<const WebUChar*>(data),
203                   bytes / sizeof(WebUChar));
204}
205
206// Writes a Vector of Strings into a SerializeObject for serialization.
207static void WriteStringVector(
208    const WebVector<WebString>& data, SerializeObject* obj) {
209  WriteInteger(static_cast<int>(data.size()), obj);
210  for (size_t i = 0, c = data.size(); i < c; ++i) {
211    unsigned ui = static_cast<unsigned>(i);  // sigh
212    WriteString(data[ui], obj);
213  }
214}
215
216static WebVector<WebString> ReadStringVector(const SerializeObject* obj) {
217  int num_elements = ReadInteger(obj);
218  WebVector<WebString> result(static_cast<size_t>(num_elements));
219  for (int i = 0; i < num_elements; ++i)
220    result[i] = ReadString(obj);
221  return result;
222}
223
224// Writes a FormData object into a SerializeObject for serialization.
225static void WriteFormData(const WebHTTPBody& http_body, SerializeObject* obj) {
226  WriteBoolean(!http_body.isNull(), obj);
227
228  if (http_body.isNull())
229    return;
230
231  WriteInteger(static_cast<int>(http_body.elementCount()), obj);
232  WebHTTPBody::Element element;
233  for (size_t i = 0; http_body.elementAt(i, element); ++i) {
234    WriteInteger(element.type, obj);
235    if (element.type == WebHTTPBody::Element::TypeData) {
236      WriteData(element.data.data(), static_cast<int>(element.data.size()),
237                obj);
238    } else if (element.type == WebHTTPBody::Element::TypeFile) {
239      WriteString(element.filePath, obj);
240      WriteInteger64(element.fileStart, obj);
241      WriteInteger64(element.fileLength, obj);
242      WriteReal(element.modificationTime, obj);
243    } else {
244      WriteGURL(element.blobURL, obj);
245    }
246  }
247  WriteInteger64(http_body.identifier(), obj);
248}
249
250static WebHTTPBody ReadFormData(const SerializeObject* obj) {
251  // In newer versions, an initial boolean indicates if we have form data.
252  if (obj->version >= 5 && !ReadBoolean(obj))
253    return WebHTTPBody();
254
255  // In older versions, 0 elements implied no form data.
256  int num_elements = ReadInteger(obj);
257  if (num_elements == 0 && obj->version < 5)
258    return WebHTTPBody();
259
260  WebHTTPBody http_body;
261  http_body.initialize();
262
263  for (int i = 0; i < num_elements; ++i) {
264    int type = ReadInteger(obj);
265    if (type == WebHTTPBody::Element::TypeData) {
266      const void* data;
267      int length = -1;
268      ReadData(obj, &data, &length);
269      if (length >= 0)
270        http_body.appendData(WebData(static_cast<const char*>(data), length));
271    } else if (type == WebHTTPBody::Element::TypeFile) {
272      WebString file_path = ReadString(obj);
273      long long file_start = 0;
274      long long file_length = -1;
275      double modification_time = 0.0;
276      if (obj->version >= 8) {
277        file_start = ReadInteger64(obj);
278        file_length = ReadInteger64(obj);
279        modification_time = ReadReal(obj);
280      }
281      http_body.appendFileRange(file_path, file_start, file_length,
282                                modification_time);
283    } else if (obj->version >= 10) {
284      GURL blob_url = ReadGURL(obj);
285      http_body.appendBlob(blob_url);
286    }
287  }
288  if (obj->version >= 4)
289    http_body.setIdentifier(ReadInteger64(obj));
290
291  return http_body;
292}
293
294// Writes the HistoryItem data into the SerializeObject object for
295// serialization.
296static void WriteHistoryItem(
297    const WebHistoryItem& item, SerializeObject* obj) {
298  // WARNING: This data may be persisted for later use. As such, care must be
299  // taken when changing the serialized format. If a new field needs to be
300  // written, only adding at the end will make it easier to deal with loading
301  // older versions. Similarly, this should NOT save fields with sensitive
302  // data, such as password fields.
303  WriteInteger(kVersion, obj);
304  WriteString(item.urlString(), obj);
305  WriteString(item.originalURLString(), obj);
306  WriteString(item.target(), obj);
307  WriteString(item.parent(), obj);
308  WriteString(item.title(), obj);
309  WriteString(item.alternateTitle(), obj);
310  WriteReal(item.lastVisitedTime(), obj);
311  WriteInteger(item.scrollOffset().x, obj);
312  WriteInteger(item.scrollOffset().y, obj);
313  WriteBoolean(item.isTargetItem(), obj);
314  WriteInteger(item.visitCount(), obj);
315  WriteString(item.referrer(), obj);
316
317  WriteStringVector(item.documentState(), obj);
318
319  if (kVersion >= 9)
320    WriteInteger64(item.itemSequenceNumber(), obj);
321  if (kVersion >= 6)
322    WriteInteger64(item.documentSequenceNumber(), obj);
323  if (kVersion >= 7) {
324    bool has_state_object = !item.stateObject().isNull();
325    WriteBoolean(has_state_object, obj);
326    if (has_state_object)
327      WriteString(item.stateObject().toString(), obj);
328  }
329
330  // Yes, the referrer is written twice.  This is for backwards
331  // compatibility with the format.
332  WriteFormData(item.httpBody(), obj);
333  WriteString(item.httpContentType(), obj);
334  WriteString(item.referrer(), obj);
335
336  // Subitems
337  const WebVector<WebHistoryItem>& children = item.children();
338  WriteInteger(static_cast<int>(children.size()), obj);
339  for (size_t i = 0, c = children.size(); i < c; ++i)
340    WriteHistoryItem(children[i], obj);
341}
342
343// Creates a new HistoryItem tree based on the serialized string.
344// Assumes the data is in the format returned by WriteHistoryItem.
345static WebHistoryItem ReadHistoryItem(
346    const SerializeObject* obj,
347    bool include_form_data,
348    bool include_scroll_offset) {
349  // See note in WriteHistoryItem. on this.
350  obj->version = ReadInteger(obj);
351
352  if (obj->version == -1) {
353    GURL url = ReadGURL(obj);
354    WebHistoryItem item;
355    item.initialize();
356    item.setURLString(WebString::fromUTF8(url.possibly_invalid_spec()));
357    return item;
358  }
359
360  if (obj->version > kVersion || obj->version < 1)
361    return WebHistoryItem();
362
363  WebHistoryItem item;
364  item.initialize();
365
366  item.setURLString(ReadString(obj));
367  item.setOriginalURLString(ReadString(obj));
368  item.setTarget(ReadString(obj));
369  item.setParent(ReadString(obj));
370  item.setTitle(ReadString(obj));
371  item.setAlternateTitle(ReadString(obj));
372  item.setLastVisitedTime(ReadReal(obj));
373
374  int x = ReadInteger(obj);
375  int y = ReadInteger(obj);
376  if (include_scroll_offset)
377    item.setScrollOffset(WebPoint(x, y));
378
379  item.setIsTargetItem(ReadBoolean(obj));
380  item.setVisitCount(ReadInteger(obj));
381  item.setReferrer(ReadString(obj));
382
383  item.setDocumentState(ReadStringVector(obj));
384
385  if (obj->version >= 9)
386    item.setItemSequenceNumber(ReadInteger64(obj));
387  if (obj->version >= 6)
388    item.setDocumentSequenceNumber(ReadInteger64(obj));
389  if (obj->version >= 7) {
390    bool has_state_object = ReadBoolean(obj);
391    if (has_state_object) {
392      item.setStateObject(
393          WebSerializedScriptValue::fromString(ReadString(obj)));
394    }
395  }
396
397  // The extra referrer string is read for backwards compat.
398  const WebHTTPBody& http_body = ReadFormData(obj);
399  const WebString& http_content_type = ReadString(obj);
400  ALLOW_UNUSED const WebString& unused_referrer = ReadString(obj);
401  if (include_form_data) {
402    item.setHTTPBody(http_body);
403    item.setHTTPContentType(http_content_type);
404  }
405
406  // Subitems
407  int num_children = ReadInteger(obj);
408  for (int i = 0; i < num_children; ++i)
409    item.appendToChildren(ReadHistoryItem(obj,
410                                          include_form_data,
411                                          include_scroll_offset));
412
413  return item;
414}
415
416// Serialize a HistoryItem to a string, using our JSON Value serializer.
417std::string HistoryItemToString(const WebHistoryItem& item) {
418  if (item.isNull())
419    return std::string();
420
421  SerializeObject obj;
422  WriteHistoryItem(item, &obj);
423  return obj.GetAsString();
424}
425
426// Reconstruct a HistoryItem from a string, using our JSON Value deserializer.
427// This assumes that the given serialized string has all the required key,value
428// pairs, and does minimal error checking. If |include_form_data| is true,
429// the form data from a post is restored, otherwise the form data is empty.
430// If |include_scroll_offset| is true, the scroll offset is restored.
431static WebHistoryItem HistoryItemFromString(
432    const std::string& serialized_item,
433    bool include_form_data,
434    bool include_scroll_offset) {
435  if (serialized_item.empty())
436    return WebHistoryItem();
437
438  SerializeObject obj(serialized_item.data(),
439                      static_cast<int>(serialized_item.length()));
440  return ReadHistoryItem(&obj, include_form_data, include_scroll_offset);
441}
442
443WebHistoryItem HistoryItemFromString(
444    const std::string& serialized_item) {
445  return HistoryItemFromString(serialized_item, true, true);
446}
447
448// For testing purposes only.
449void HistoryItemToVersionedString(const WebHistoryItem& item, int version,
450                                  std::string* serialized_item) {
451  if (item.isNull()) {
452    serialized_item->clear();
453    return;
454  }
455
456  // Temporarily change the version.
457  int real_version = kVersion;
458  kVersion = version;
459
460  SerializeObject obj;
461  WriteHistoryItem(item, &obj);
462  *serialized_item = obj.GetAsString();
463
464  kVersion = real_version;
465}
466
467std::string CreateHistoryStateForURL(const GURL& url) {
468  // We avoid using the WebKit API here, so that we do not need to have WebKit
469  // initialized before calling this method.  Instead, we write a simple
470  // serialization of the given URL with a dummy version number of -1.  This
471  // will be interpreted by ReadHistoryItem as a request to create a default
472  // WebHistoryItem.
473  SerializeObject obj;
474  WriteInteger(-1, &obj);
475  WriteGURL(url, &obj);
476  return obj.GetAsString();
477}
478
479std::string RemoveFormDataFromHistoryState(const std::string& content_state) {
480  // TODO(darin): We should avoid using the WebKit API here, so that we do not
481  // need to have WebKit initialized before calling this method.
482  const WebHistoryItem& item =
483      HistoryItemFromString(content_state, false, true);
484  if (item.isNull()) {
485    // Couldn't parse the string, return an empty string.
486    return std::string();
487  }
488
489  return HistoryItemToString(item);
490}
491
492std::string RemoveScrollOffsetFromHistoryState(
493    const std::string& content_state) {
494  // TODO(darin): We should avoid using the WebKit API here, so that we do not
495  // need to have WebKit initialized before calling this method.
496  const WebHistoryItem& item =
497      HistoryItemFromString(content_state, true, false);
498  if (item.isNull()) {
499    // Couldn't parse the string, return an empty string.
500    return std::string();
501  }
502
503  return HistoryItemToString(item);
504}
505
506}  // namespace webkit_glue
507