1# !/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6# Generates the hashed_ad_networks.[h,cc] files. Takes an input the name of a
7# file with all ad network host patterns, new-line separated. If given an
8# optional root output file name, generates the files <root_output_name>.h and
9# <root_output_name>.cc. If no output name is given, the output name is
10# 'hashed_ad_networks'.
11#
12# These are found at chrome/browser/extensions/activity_log/hashed_ad_networks.*
13# and are used by the ActivityLog and HashedAdNetworkDatabase for recognizing
14# ad injection.
15
16from hashlib import sha256
17from argparse import ArgumentParser
18import sys
19
20_LICENSE = '''\
21// Copyright 2014 The Chromium Authors. All rights reserved.
22// Use of this source code is governed by a BSD-style license that can be
23// found in the LICENSE file.
24
25// This file is automatically generated from the script in
26// chrome/browser/extensions/activity_log/generate_ad_network_hashes.py.
27// DO NOT MODIFY BY HAND!
28'''
29
30_H_TEMPLATE = '''\
31%(license)s
32#ifndef CHROME_BROWSER_EXTENSIONS_ACTIVITY_LOG_HASHED_AD_NETWORKS_H_
33#define CHROME_BROWSER_EXTENSIONS_ACTIVITY_LOG_HASHED_AD_NETWORKS_H_
34
35#include "base/basictypes.h"
36
37namespace extensions {
38
39extern const char* kHashedAdNetworks[];
40extern const int kNumHashedAdNetworks;
41
42}  // namespace extensions
43
44#endif  // CHROME_BROWSER_EXTENSIONS_ACTIVITY_LOG_HASHED_AD_NETWORKS_H_
45'''
46
47_CC_TEMPLATE = '''\
48%(license)s
49#include "chrome/browser/extensions/activity_log/hashed_ad_networks.h"
50
51#include "base/basictypes.h"
52
53namespace extensions {
54
55const char* kHashedAdNetworks[] = {
56  %(ad_networks)s
57};
58
59const int kNumHashedAdNetworks = arraysize(kHashedAdNetworks);
60
61}  // namespace extensions
62'''
63
64
65def Generate(input_filename, output_root_filename):
66  '''Generate the .h and .cc files for the hashed_ad_network source files.
67  |input_filename|
68      The name of the input file, which should have one host to be hashed per
69      line.
70  |output_root_filename|
71      The root name of the output files. This will generate a .h and .cc file,
72      like |output_root_filename|.[h,cc].
73  '''
74  with open(input_filename, 'r') as file:
75    hashes = ['"%s"' % sha256(line.strip()).hexdigest()[:16].upper()
76              for line in file.readlines()]
77
78  # Hashes should be sorted in C++ so we can do a binary search over them.
79  hashes.sort()
80  ad_networks = ',\n  '.join(hashes)
81  for ext, template in (('.h', _H_TEMPLATE),
82                        ('.cc', _CC_TEMPLATE)):
83    with open(output_root_filename + ext, 'w') as out:
84      out.write(template % {
85        'license': _LICENSE,
86        'ad_networks': ad_networks
87      })
88
89
90if __name__ == '__main__':
91  parser = ArgumentParser(
92      description='Generate hashed_ad_networks.[h,cc] source files')
93  parser.add_argument(
94      'input_file',
95      help='The name of the input file with the hosts to be hashed')
96  parser.add_argument(
97      '-o', '--out',
98      help='The root name of the output source file',
99      default='hashed_ad_networks')
100
101  args = parser.parse_args()
102  Generate(args.input_file, args.out)
103