12a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#!/usr/bin/perl -w 21320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci# Copyright 2013 The Chromium Authors. All rights reserved. 32a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be 42a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)# found in the LICENSE file. 52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 61320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci# Use: echo filename1.cc ... | find_copyrights.pl 71320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci# or: find_copyrights.pl list_file 81320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci# or: find_files.pl ... | find_copyrights.pl 92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)use strict; 112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)use warnings; 122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)use File::Basename; 132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)sub check_is_generated_file($); 152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)sub start_copyright_parsing(); 162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)my $progname = basename($0); 182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)my $generated_file_scan_boundary = 25; 201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciwhile (<>) { 211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci chomp; 221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci my $file = $_; 232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $file_header = ''; 242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my %copyrights; 252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) open (F, "<$file") or die "$progname: Unable to access $file\n"; 262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $parse_copyright = start_copyright_parsing(); 272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) while (<F>) { 282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $file_header .= $_ unless $. > $generated_file_scan_boundary; 292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $copyright_match = $parse_copyright->($_, $.); 302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if ($copyright_match) { 312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $copyrights{lc("$copyright_match")} = "$copyright_match"; 322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) close(F); 35a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) my $copyright = join(" / ", sort values %copyrights); 362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) print "$file\t"; 372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (check_is_generated_file($file_header)) { 382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) print "GENERATED FILE"; 392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } else { 402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) print ($copyright or "*No copyright*"); 412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) print "\n"; 432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)sub check_is_generated_file($) { 462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $license = uc($_[0]); 472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) # Remove Python multiline comments to avoid false positives 482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (index($license, '"""') != -1) { 492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $license =~ s/"""[^"]*(?:"""|$)//mg; 502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (index($license, "'''") != -1) { 522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $license =~ s/'''[^']*(?:'''|$)//mg; 532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) # Quick checks using index. 552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (index($license, 'ALL CHANGES MADE IN THIS FILE WILL BE LOST') != -1) { 562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return 1; 572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (index($license, 'DO NOT EDIT') != -1 || 592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) index($license, 'DO NOT DELETE') != -1 || 602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) index($license, 'GENERATED') != -1) { 612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return ($license =~ /(All changes made in this file will be lost' . 622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 'DO NOT (EDIT|delete this file)|Generated (at|automatically|data)' . 632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) '|Automatically generated|\Wgenerated\s+(?:\w+\s+)*file\W)/i); 642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return 0; 662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)sub are_within_increasing_progression($$$) { 692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $delta = $_[0] - $_[1]; 702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return $delta >= 0 && $delta <= $_[2]; 712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)sub start_copyright_parsing() { 742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $max_line_numbers_proximity = 3; 752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) # Set up the defaults the way that proximity checks will not succeed. 762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $last_a_item_line_number = -200; 772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $last_b_item_line_number = -100; 782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return sub { 802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $line = $_[0]; 812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $line_number = $_[1]; 822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) # Remove C / C++ strings to avoid false positives. 842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (index($line, '"') != -1) { 852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g; 862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $uc_line = uc($line); 892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) # Record '(a)' and '(b)' last occurences in C++ comments. 912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $cpp_comment_idx = index($uc_line, '//'); 922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if ($cpp_comment_idx != -1) { 932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (index($uc_line, '(A)') > $cpp_comment_idx) { 942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $last_a_item_line_number = $line_number; 952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (index($uc_line, '(B)') > $cpp_comment_idx) { 972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $last_b_item_line_number = $line_number; 982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 1002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) # Fast bailout, uses the same patterns as the regexp. 1022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (index($uc_line, 'COPYRIGHT') == -1 && 1032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) index($uc_line, 'COPR.') == -1 && 1042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) index($uc_line, '\x{00a9}') == -1 && 1052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) index($uc_line, '\xc2\xa9') == -1) { 1062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1072a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $c_item_index = index($uc_line, '(C)'); 1082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return '' if ($c_item_index == -1); 1092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) # Filter out 'c' used as a list item inside C++ comments. 1102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah" 1112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if ($c_item_index > $cpp_comment_idx && 1122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) are_within_increasing_progression( 1132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $line_number, 1142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $last_b_item_line_number, 1152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $max_line_numbers_proximity) && 1162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) are_within_increasing_progression( 1172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $last_b_item_line_number, 1182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $last_a_item_line_number, 1192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $max_line_numbers_proximity)) { 1202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return ''; 1212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 1222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 1232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $copyright_indicator_regex = 1252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))'; 126a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) my $full_copyright_indicator_regex = 127a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) sprintf '(?:\W|^)%s(?::\s*|\s+)(\w.*)$', $copyright_indicator_regex; 1282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $copyright_disindicator_regex = 1292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) '\b(?:info(?:rmation)?|notice|and|or)\b'; 1302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $copyright = ''; 132a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) if ($line =~ m%$full_copyright_indicator_regex%i) { 1332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) my $match = $1; 1342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if ($match !~ m%^\s*$copyright_disindicator_regex%i) { 1352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $match =~ s/([,.])?\s*$//; 1362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $match =~ s/$copyright_indicator_regex//ig; 1372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $match =~ s/^\s+//; 1382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $match =~ s/\s{2,}/ /g; 1392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $match =~ s/\\@/@/g; 1402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) $copyright = $match; 1412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 1422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 1432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return $copyright; 1452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 1462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 147