11320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// Copyright 2013 The Chromium Authors. All rights reserved.
21320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// Use of this source code is governed by a BSD-style license that can be
31320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// found in the LICENSE file.
41320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
51320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#ifndef CHROME_BROWSER_PROFILE_RESETTER_JTL_FOUNDATION_H_
61320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#define CHROME_BROWSER_PROFILE_RESETTER_JTL_FOUNDATION_H_
71320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
81320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include <map>
91320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include <string>
101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "base/basictypes.h"
121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "crypto/hmac.h"
131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccinamespace jtl_foundation {
151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// A JTL (JSON Traversal Language) program is composed of one or more
171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// sentences. Each sentence consists of a sequence of operations. The input of
181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// the program is a hierarchical JSON data structure.
191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci//
201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// The execution of each sentence starts at the root of an input dictionary. The
211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// operations include navigation in the JSON data structure, as well as
221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// comparing the current (leaf) node to fixed values. The program also has a
231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// separate dictionary as working memory, into which it can memorize data, then
241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// later recall it for comparisons.
251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci//
261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// Example program:
271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// NAVIGATE_ANY
281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// NAVIGATE(hash("bar"))
291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// COMPARE_NODE_BOOL(1)
301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// STORE_BOOL(hash("found_foo"), 1)
311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// STOP_EXECUTING_SENTENCE
321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci//
331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// Example input:
341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// {
351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci//   'key1': 1,
361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci//   'key2': {'foo': 0, 'bar': false, 'baz': 2}
371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci//   'key3': {'foo': 0, 'bar': true, 'baz': 2}
381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci//   'key4': {'foo': 0, 'bar': true, 'baz': 2}
391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// }
401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci//
411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// This program navigates from the root of the dictionary to all children
421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// ("key1", "key2", "key3", "key4") and executes the remaining program on each
431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// of the children. The navigation happens in depth-first pre-order. On each of
441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// the children it tries to navigate into the child "bar", which fails for
451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// "key1", so execution stops for this sub-branch. On key2 the program navigates
461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// to "bar" and moves the execution context to this node which has the value
471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// "false". Therefore, the following COMPARE_NODE_BOOL is not fulfilled and the
481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// execution does not continue on this branch, so we back track and proceed with
491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// "key3" and its "bar" child. For this node, COMPARE_NODE_BOOL is fulfilled and
501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// the execution continues to store "found_foo = true" into the working memory
511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// of the interpreter. Next the interpreter executes STOP_EXECUTING_SENTENCE
521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// which prevents the traversal from descending into the "key4" branch from the
531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// NAVIGATE_ANY operation and can therefore speedup the processing.
541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci//
551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// All node names, and node values of type string, integer and double are hashed
561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// before being compared to hash values listed in |program|.
571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// JTL byte code consists of uint8 opcodes followed by parameters. Parameters
591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// are either boolean (uint8 with value \x00 or \x01), uint32 (in little-endian
601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// notation), or hash string of 32 bytes.
611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// The following opcodes are defined:
621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccienum OpCodes {
631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Continues execution with the next operation on the element of a
641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // dictionary that matches the passed key parameter. If no such element
651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // exists, the command execution returns from the current instruction.
661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Parameters:
671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - a 32 byte hash of the target dictionary key.
681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  NAVIGATE = 0x00,
691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Continues execution with the next operation on each element of a
701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // dictionary or list. If no such element exists or the current element is
711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // neither a dictionary or list, the command execution returns from the
721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // current instruction.
731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  NAVIGATE_ANY = 0x01,
741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Continues execution with the next operation on the parent node of the
751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // current node. If the current node is the root of the input dictionary, the
761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // program execution fails with a runtime error.
771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  NAVIGATE_BACK = 0x02,
781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Stores a boolean value in the working memory.
791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Parameters:
801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - a 32 byte hash of the parameter name,
811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - the value to store (\x00 or \x01)
821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STORE_BOOL = 0x10,
831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Checks whether a boolean stored in working memory matches the expected
841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // value and continues execution with the next operation in case of a match.
851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Parameters:
861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - a 32 byte hash of the parameter name,
871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - the expected value (\x00 or \x01),
881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - the default value in case the working memory contains no corresponding
891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  //   entry (\x00 or\x01).
901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  COMPARE_STORED_BOOL = 0x11,
911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Same as STORE_BOOL but takes a hash instead of a boolean value as
921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // parameter.
931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STORE_HASH = 0x12,
941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Same as COMPARE_STORED_BOOL but takes a hash instead of two boolean values
951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // as parameters.
961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  COMPARE_STORED_HASH = 0x13,
971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Stores the current node into the working memory. If the current node is not
981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // a boolean value, the program execution returns from the current
991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // instruction.
1001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Parameters:
1011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - a 32 byte hash of the parameter name.
1021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STORE_NODE_BOOL = 0x14,
1031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Stores the hashed value of the current node into the working memory. If
1041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // the current node is not a string, integer or double, the program execution
1051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // returns from the current instruction.
1061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Parameters:
1071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - a 32 byte hash of the parameter name.
1081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STORE_NODE_HASH = 0x15,
1091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Parses the value of the current node as a URL, extracts the subcomponent of
1101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // the domain name that is immediately below the registrar-controlled portion,
1111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // and stores the hash of this subcomponent into working memory. In case the
1121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // domain name ends in a suffix not on the Public Suffix List (i.e. is neither
1131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // an ICANN, nor a private domain suffix), the last subcomponent is assumed to
1141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // be the registry, and the second-to-last subcomponent is extracted.
1151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // If the current node is not a string that represents a URL, or if this URL
1161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // does not have a domain component as described above, the program execution
1171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // returns from the current instruction.
1181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // For example, "foo.com", "www.foo.co.uk", "foo.appspot.com", and "foo.bar"
1191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // will all yield (the hash of) "foo" as a result.
1201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // See the unit test for more details.
1211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Parameters:
1221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - a 32 byte hash of the parameter name.
1231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STORE_NODE_REGISTERABLE_DOMAIN_HASH = 0x16,
1241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Compares the current node against a boolean value and continues execution
1251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // with the next operation in case of a match. If the current node does not
1261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // match or is not a boolean value, the program execution returns from the
1271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // current instruction.
1281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Parameters:
1291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - a boolean value (\x00 or \x01).
1301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  COMPARE_NODE_BOOL = 0x20,
1311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Compares the hashed value of the current node against the given hash, and
1321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // continues execution with the next operation in case of a match. If the
1331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // current node is not a string, integer or double, or if it is either, but
1341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // its hash does not match, the program execution returns from the current
1351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // instruction.
1361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Parameters:
1371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - a 32 byte hash of the expected value.
1381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  COMPARE_NODE_HASH = 0x21,
1391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // The negation of the above.
1401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  COMPARE_NODE_HASH_NOT = 0x22,
1411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Compares the current node against a boolean value stored in working memory,
1421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // and continues with the next operation in case of a match. If the current
1431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // node is not a boolean value, the working memory contains no corresponding
1441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // boolean value, or if they do not match, the program execution returns from
1451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // the current instruction.
1461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Parameters:
1471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - a 32 byte hash of the parameter name.
1481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  COMPARE_NODE_TO_STORED_BOOL = 0x23,
1491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Compares the hashed value of the current node against a hash value stored
1501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // in working memory, and continues with the next operation in case of a
1511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // match. If the current node is not a string, integer or double, or if the
1521320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // working memory contains no corresponding hash string, or if the hashes do
1531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // not match, the program execution returns from the current instruction.
1541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Parameters:
1551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - a 32 byte hash of the parameter name.
1561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  COMPARE_NODE_TO_STORED_HASH = 0x24,
1571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Checks whether the current node is a string value that contains the given
1581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // pattern as a substring, and continues execution with the next operation in
1591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // case it does. If the current node is not a string, or if does not match the
1601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // pattern, the program execution returns from the current instruction.
1611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Parameters:
1621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - a 32 byte hash of the pattern,
1631320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - a 4 byte unsigned integer: the length (>0) of the pattern in bytes,
1641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // - a 4 byte unsigned integer: the sum of all bytes in the pattern, serving
1651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  //   as a heuristic to reduce the number of actual SHA-256 calculations.
1661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  COMPARE_NODE_SUBSTRING = 0x25,
1671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Stop execution in this specific sentence.
1681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  STOP_EXECUTING_SENTENCE = 0x30,
1691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Separator between sentences, starts a new sentence.
1701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  END_OF_SENTENCE = 0x31
1711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
1721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciconst size_t kHashSizeInBytes = 32u;
1741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci// A class that provides SHA256 hash values for strings using a fixed hash seed.
1761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciclass Hasher {
1771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci public:
1781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  explicit Hasher(const std::string& seed);
1791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  ~Hasher();
1801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  std::string GetHash(const std::string& input) const;
1821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  static bool IsHash(const std::string& maybe_hash);
1841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci private:
1861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  crypto::HMAC hmac_;
1871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  mutable std::map<std::string, std::string> cached_hashes_;
1881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  DISALLOW_COPY_AND_ASSIGN(Hasher);
1891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci};
1901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}  // namespace jtl_foundation
1921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#endif  // CHROME_BROWSER_PROFILE_RESETTER_JTL_FOUNDATION_H_
194