1#!/usr/bin/env python
2# Copyright (c) 2014 Google Inc. All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#     * Redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer.
10#     * Redistributions in binary form must reproduce the above
11# copyright notice, this list of conditions and the following disclaimer
12# in the documentation and/or other materials provided with the
13# distribution.
14#     * Neither the name of Google Inc. nor the names of its
15# contributors may be used to endorse or promote products derived from
16# this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30import hashlib
31import re
32
33try:
34    import json
35except ImportError:
36    import simplejson as json
37
38
39def save_hashes(hashes_file_path, hashes):
40    try:
41        with open(hashes_file_path, "wt") as hashes_file:
42            json.dump(hashes, hashes_file, indent=4, separators=(",", ": "))
43    except:
44        print "ERROR: Failed to write %s" % hashes_file_path
45        raise
46
47
48def load_hashes(hashes_file_path):
49    try:
50        with open(hashes_file_path, "r") as hashes_file:
51            hashes = json.load(hashes_file)
52    except:
53        return {}
54    return hashes
55
56
57def calculate_file_hash(file_path):
58    with open(file_path) as file:
59        data = file.read()
60        md5_hash = hashlib.md5(data).hexdigest()
61    return md5_hash
62
63
64def files_with_invalid_hashes(hash_file_path, file_paths):
65    hashes = load_hashes(hash_file_path)
66    result = []
67    for file_path in file_paths:
68        file_name = re.sub(".*/", "", file_path)
69        if calculate_file_hash(file_path) != hashes.get(file_name, ""):
70            result.append(file_path)
71    return result
72
73
74def update_file_hashes(hash_file_path, file_paths):
75    hashes = {}
76    for file_path in file_paths:
77        file_name = re.sub(".*/", "", file_path)
78        hashes[file_name] = calculate_file_hash(file_path)
79    save_hashes(hash_file_path, hashes)
80