-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcanonindices
executable file
·51 lines (43 loc) · 1.8 KB
/
canonindices
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/local/bin/perl -w
# Author: Jason Eisner, University of Pennsylvania
# Usage: canonindices [files ...]
#
# Canonicalizes the indices on each line, so that
# the index mentioned first (farthest to the left) is
# changed to -1 throughout the line, the index mentioned
# second is changed to -2, etc.
#
# Typically applied to the output of rules2frames, but may be applied
# to the output of listrules, oneline, etc.
#
# The -f flag says to assume that each line has been produced by
# rules2frames, and to strip any (unmatched) index from the main
# category of the LHS of the frame. (That is, an index on the whole,
# finished constituent is not considered part of the frame -- unless
# for some reason it coindexes the constituent with a trace inside
# itself, as is the Treebank convention for parenthetical "he says.")
require("stamp.inc"); &stamp; # modify $0 and @INC, and print timestamp
$framestriplhs = 1, shift(@ARGV) if $ARGV[0] eq "-f";
die "$0: bad command line flags" if @ARGV && $ARGV[0] =~ /^-./;
$ind = "-[0-9]+\\b"; # index on null or overt element
$indices = 0;
$indexmentions = 0;
while (<>) {
chop;
s/^(\S+:[0-9]+:\t)?//, $location = $&;
unless (/^\#/) { # unless a comment
if ($framestriplhs && /^[^\t]*\t[^ \t\n\\\/]+($ind)/o) { # handle -f flag
$stripind = $1;
$translate{$stripind} = "" unless $' =~ /$stripind\b/; # check that stripind doesn't appear later before deleteing it
}
$newindex = 0; # we'll treat indices as negative numbers
while (/$ind/go) {
$translate{$&} = --$newindex unless defined $translate{$&};
}
$indices += keys %translate;
$indexmentions += s/$ind/$translate{$&}/geo;
undef %translate;
}
print "$location$_\n";
}
print STDERR "$0: canonicalized $indexmentions mentions of $indices indices\n";