1#!/usr/bin/env python
2# Run with directory arguments from any directory, with no special setup required.
3# Or:
4# for i in libc libdl libm linker libstdc++ ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done
5
6import ftplib
7import hashlib
8import os
9import re
10import shutil
11import string
12import subprocess
13import sys
14import tarfile
15import tempfile
16
17VERBOSE = False
18
19def warn(s):
20    sys.stderr.write("warning: %s\n" % s)
21
22def warn_verbose(s):
23    if VERBOSE:
24        warn(s)
25
26def is_interesting(path):
27    path = path.lower()
28    uninteresting_extensions = [
29        ".bp",
30        ".map",
31        ".mk",
32        ".py",
33        ".pyc",
34        ".swp",
35        ".txt",
36    ]
37    if os.path.splitext(path)[1] in uninteresting_extensions:
38        return False
39    if path.endswith("/notice") or path.endswith("/readme"):
40        return False
41    return True
42
43def is_auto_generated(content):
44    if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
45        return True
46    if "This header was automatically generated from a Linux kernel header" in content:
47        return True
48    return False
49
50copyrights = set()
51
52def extract_copyright_at(lines, i):
53    hash = lines[i].startswith("#")
54
55    # Do we need to back up to find the start of the copyright header?
56    start = i
57    if not hash:
58        while start > 0:
59            if "/*" in lines[start - 1]:
60                break
61            start -= 1
62
63    # Read comment lines until we hit something that terminates a
64    # copyright header.
65    while i < len(lines):
66        if "*/" in lines[i]:
67            break
68        if hash and len(lines[i]) == 0:
69            break
70        if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
71            break
72        if "\tcitrus Id: " in lines[i]:
73            break
74        if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
75            break
76        if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
77            break
78        # OpenBSD likes to say where stuff originally came from:
79        if "Original version ID:" in lines[i]:
80            break
81        i += 1
82
83    end = i
84
85    # Trim trailing cruft.
86    while end > 0:
87        if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
88            break
89        end -= 1
90
91    # Remove C/assembler comment formatting, pulling out just the text.
92    clean_lines = []
93    for line in lines[start:end]:
94        line = line.replace("\t", "    ")
95        line = line.replace("/* ", "")
96        line = re.sub("^ \* ", "", line)
97        line = line.replace("** ", "")
98        line = line.replace("# ", "")
99        if line.startswith("++Copyright++"):
100            continue
101        line = line.replace("--Copyright--", "")
102        line = line.rstrip()
103        # These come last and take care of "blank" comment lines.
104        if line == "#" or line == " *" or line == "**" or line == "-":
105            line = ""
106        clean_lines.append(line)
107
108    # Trim blank lines from head and tail.
109    while clean_lines[0] == "":
110        clean_lines = clean_lines[1:]
111    while clean_lines[len(clean_lines) - 1] == "":
112        clean_lines = clean_lines[0:(len(clean_lines) - 1)]
113
114    copyright = "\n".join(clean_lines)
115    copyrights.add(copyright)
116
117    return i
118
119
120def do_file(path):
121    with open(path, "r") as the_file:
122        try:
123            content = open(path, "r").read().decode("utf-8")
124        except UnicodeDecodeError:
125            warn("bad UTF-8 in %s" % path)
126            content = open(path, "r").read().decode("iso-8859-1")
127
128    lines = content.split("\n")
129
130    if len(lines) <= 4:
131        warn_verbose("ignoring short file %s" % path)
132        return
133
134    if is_auto_generated(content):
135        warn_verbose("ignoring auto-generated file %s" % path)
136        return
137
138    if not "Copyright" in content:
139        if "public domain" in content.lower():
140            warn("ignoring public domain file %s" % path)
141            return
142        warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
143        return
144
145    # Manually iterate because extract_copyright_at tells us how many lines to skip.
146    i = 0
147    while i < len(lines):
148        if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
149            i = extract_copyright_at(lines, i)
150        else:
151            i += 1
152
153
154def do_dir(path):
155    for directory, sub_directories, filenames in os.walk(arg):
156        if ".git" in sub_directories:
157            sub_directories.remove(".git")
158        sub_directories = sorted(sub_directories)
159
160        for filename in sorted(filenames):
161            path = os.path.join(directory, filename)
162            if is_interesting(path):
163                do_file(path)
164
165
166args = sys.argv[1:]
167if len(args) == 0:
168    args = [ "." ]
169
170for arg in args:
171    if os.path.isdir(arg):
172        do_dir(arg)
173    else:
174        do_file(arg)
175
176for copyright in sorted(copyrights):
177    print copyright.encode("utf-8")
178    print
179    print "-------------------------------------------------------------------"
180    print
181
182sys.exit(0)
183