Text file src/regexp/syntax/make_perl_groups.pl

     1  #!/usr/bin/perl
     2  # Copyright 2008 The Go Authors. All rights reserved.
     3  # Use of this source code is governed by a BSD-style
     4  # license that can be found in the LICENSE file.
     5  
     6  # Modified version of RE2's make_perl_groups.pl.
     7  
     8  # Generate table entries giving character ranges
     9  # for POSIX/Perl character classes.  Rather than
    10  # figure out what the definition is, it is easier to ask
    11  # Perl about each letter from 0-128 and write down
    12  # its answer.
    13  
    14  @posixclasses = (
    15  	"[:alnum:]",
    16  	"[:alpha:]",
    17  	"[:ascii:]",
    18  	"[:blank:]",
    19  	"[:cntrl:]",
    20  	"[:digit:]",
    21  	"[:graph:]",
    22  	"[:lower:]",
    23  	"[:print:]",
    24  	"[:punct:]",
    25  	"[:space:]",
    26  	"[:upper:]",
    27  	"[:word:]",
    28  	"[:xdigit:]",
    29  );
    30  
    31  @perlclasses = (
    32  	"\\d",
    33  	"\\s",
    34  	"\\w",
    35  );
    36  
    37  %overrides = (
    38  	# Prior to Perl 5.18, \s did not match vertical tab.
    39  	# RE2 preserves that original behaviour.
    40  	"\\s:11" => 0,
    41  );
    42  
    43  sub ComputeClass($) {
    44    my @ranges;
    45    my ($class) = @_;
    46    my $regexp = "[$class]";
    47    my $start = -1;
    48    for (my $i=0; $i<=129; $i++) {
    49      if ($i == 129) { $i = 256; }
    50      if ($i <= 128 && ($overrides{"$class:$i"} // chr($i) =~ $regexp)) {
    51        if ($start < 0) {
    52          $start = $i;
    53        }
    54      } else {
    55        if ($start >= 0) {
    56          push @ranges, [$start, $i-1];
    57        }
    58        $start = -1;
    59      }
    60    }
    61    return @ranges;
    62  }
    63  
    64  sub PrintClass($$@) {
    65    my ($cname, $name, @ranges) = @_;
    66    print "var code$cname = []rune{  /* $name */\n";
    67    for (my $i=0; $i<@ranges; $i++) {
    68      my @a = @{$ranges[$i]};
    69      printf "\t0x%x, 0x%x,\n", $a[0], $a[1];
    70    }
    71    print "}\n\n";
    72    my $n = @ranges;
    73    $negname = $name;
    74    if ($negname =~ /:/) {
    75      $negname =~ s/:/:^/;
    76    } else {
    77      $negname =~ y/a-z/A-Z/;
    78    }
    79    return "\t`$name`: {+1, code$cname},\n" .
    80    	"\t`$negname`: {-1, code$cname},\n";
    81  }
    82  
    83  my $gen = 0;
    84  
    85  sub PrintClasses($@) {
    86    my ($cname, @classes) = @_;
    87    my @entries;
    88    foreach my $cl (@classes) {
    89      my @ranges = ComputeClass($cl);
    90      push @entries, PrintClass(++$gen, $cl, @ranges);
    91    }
    92    print "var ${cname}Group = map[string]charGroup{\n";
    93    foreach my $e (@entries) {
    94      print $e;
    95    }
    96    print "}\n";
    97    my $count = @entries;
    98  }
    99  
   100  print <<EOF;
   101  // Copyright 2013 The Go Authors. All rights reserved.
   102  // Use of this source code is governed by a BSD-style
   103  // license that can be found in the LICENSE file.
   104  
   105  // GENERATED BY make_perl_groups.pl; DO NOT EDIT.
   106  // make_perl_groups.pl >perl_groups.go
   107  
   108  package syntax
   109  
   110  EOF
   111  
   112  PrintClasses("perl", @perlclasses);
   113  PrintClasses("posix", @posixclasses);
   114  

View as plain text