12ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#!/usr/bin/perl
22ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson# Copyright 2008 The RE2 Authors.  All Rights Reserved.
32ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson# Use of this source code is governed by a BSD-style
42ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson# license that can be found in the LICENSE file.
52ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
62ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson# Generate table entries giving character ranges
72ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson# for POSIX/Perl character classes.  Rather than
82ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson# figure out what the definition is, it is easier to ask
92ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson# Perl about each letter from 0-128 and write down
102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson# its answer.
112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson@posixclasses = (
132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"[:alnum:]",
142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"[:alpha:]",
152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"[:ascii:]",
162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"[:blank:]",
172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"[:cntrl:]",
182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"[:digit:]",
192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"[:graph:]",
202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"[:lower:]",
212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"[:print:]",
222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"[:punct:]",
232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"[:space:]",
242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"[:upper:]",
252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"[:word:]",
262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"[:xdigit:]",
272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson);
282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson@perlclasses = (
302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"\\d",
312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"\\s",
322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson	"\\w",
332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson);
342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonsub ComputeClass($) {
362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  my @ranges;
372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  my ($class) = @_;
382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  my $regexp = "[$class]";
392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  my $start = -1;
402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  for (my $i=0; $i<=129; $i++) {
412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if ($i == 129) { $i = 256; }
422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if ($i <= 128 && chr($i) =~ $regexp) {
432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if ($start < 0) {
442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        $start = $i;
452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      }
462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    } else {
472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if ($start >= 0) {
482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        push @ranges, [$start, $i-1];
492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      }
502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      $start = -1;
512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    }
522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return @ranges;
542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonsub PrintClass($$@) {
572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  my ($cname, $name, @ranges) = @_;
582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  print "static URange16 code${cname}[] = {  /* $name */\n";
592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  for (my $i=0; $i<@ranges; $i++) {
602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    my @a = @{$ranges[$i]};
612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1];
622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  print "};\n";
642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  my $n = @ranges;
652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  my $escname = $name;
662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  $escname =~ s/\\/\\\\/g;
672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  $negname = $escname;
682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if ($negname =~ /:/) {
692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    $negname =~ s/:/:^/;
702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  } else {
712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    $negname =~ y/a-z/A-Z/;
722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return "{ \"$escname\", +1, code$cname, $n }", "{ \"$negname\", -1, code$cname, $n }";
742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonmy $gen = 0;
772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonsub PrintClasses($@) {
792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  my ($cname, @classes) = @_;
802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  my @entries;
812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  foreach my $cl (@classes) {
822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    my @ranges = ComputeClass($cl);
832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    push @entries, PrintClass(++$gen, $cl, @ranges);
842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  print "UGroup ${cname}_groups[] = {\n";
862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  foreach my $e (@entries) {
872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    print "\t$e,\n";
882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  print "};\n";
902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  my $count = @entries;
912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  print "int num_${cname}_groups = $count;\n";
922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonprint <<EOF;
952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// make_perl_groups.pl >perl_groups.cc
972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/unicode_groups.h"
992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonnamespace re2 {
1012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonEOF
1032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrintClasses("perl", @perlclasses);
1052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrintClasses("posix", @posixclasses);
1062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonprint <<EOF;
1082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}  // namespace re2
1102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonEOF
111