Front page | perl.perl6.internals |
Postings from July 2002
Re: Perl6 grammar (take III)
Thread Previous
|
Thread Next
From:
Sean O'Rourke
Date:
July 3, 2002 18:42
Subject:
Re: Perl6 grammar (take III)
Message ID:
Pine.GSO.4.33.0207031831360.5195-200000@beowulf.ucsd.edu
use Data::Dumper;
use Getopt::Long;
use strict;
$Data::Dumper::Terse = 1;
$Data::Dumper::Indent = 1;
use Term::ReadLine;
######################################################################
# Argument context for functions and control structures
######################################################################
my %WANT;
sub ::find_want {
my $f = shift;
$f = $f->{__VALUE__} || $f->{__PATTERN1__} if ref $f;
# print STDERR "find_want $f: $WANT{$f}\n";
$WANT{$f} || '__fail__';
}
##############################
# Functions (list operators):
# XXX: many of these need their own special want_* rules
my $FUNCTION_ARGS = 'maybe_comma';
my @builtin_funcs = qw(crypt index pack rindex sprintf substr
join unpack split
push unshift splice
warn die print printf read select syscall sysread
sysseek syswrite truncate write
vec
chmod chown fcntl ioctl link open opendir
rename symlink sysopen unlink
return fail
not);
@WANT{@builtin_funcs} = ($FUNCTION_ARGS) x @builtin_funcs;
sub ::add_function {
my $fname = shift->{__VALUE__};
$WANT{$fname} = shift || $FUNCTION_ARGS;
}
##############################
# Loop control
my @loop_control = qw(redo last next continue);
@WANT{@loop_control} = ('maybe_namepart') x @loop_control;
##############################
# Unary operators
# XXX: need to handle default $_
my @unary_ops = qw(chop chomp chr hex lc lcfirst length
ord reverse uc ucfirst
abs atan2 cos exp hex int log oct rand sin sqrt srand
pop shift
delete each exists keys values
defined undef
chdir chroot glob mkdir rmdir stat umask
close);
@WANT{@unary_ops} = ('prefix') x @unary_ops;
##############################
# Control operators
my @control = qw(for given when default if elsif else grep map);
@WANT{@control} = map { "want_for_$_" } @control;
##############################
# Named blocks
my @special_blocks = qw(CATCH BEGIN END INIT AUTOLOAD
PRE POST NEXT LAST FIRST
try do);
@WANT{@special_blocks} = ('closure') x @special_blocks;
##############################
# Classes (builtin and otherwise)
my %CLASSES;
my @builtin_types = qw(int real str HASH ARRAY SCALAR
true false);# XXX: these are really constants
@CLASSES{@builtin_types} = @builtin_types;
sub ::add_class { # seen class.
my $c = shift->{__VALUE__};
$CLASSES{$c} = $c;
}
sub ::find_class { # seen class?
my $c = shift;
return $CLASSES{$c->{__VALUE__}};
}
# HACK to distinguish between "my ($a, $b) ..." and "foo ($a, $b)".
# Don't need all keywords in here, but only the ones that cause
# problems.
my %KEYWORDS;
@KEYWORDS{qw(my our temp)} = 1;
sub ::not_keyword {
my $f = shift->{__VALUE__};
exists $KEYWORDS{$f} ? undef : 1;
}
# (see Parse::RecDescent::Consumer)
sub ::consumer {
my $t = shift;
my $old_len = length $t;
return sub {
my $len = length($_[0]);
return substr($t, 0, ($old_len - $len));
};
}
my $since_block;
sub ::saw_end_block {
my $text = shift;
if ($since_block) {
local $_ = $since_block->($text);
return m/\A[\s\n]+\z/ || undef;
}
return undef;
}
sub ::end_block {
$since_block = ::consumer(shift);
}
######################################################################
my $literals = <<'END';
sv_literal: lit_real
| lit_string
| hv_literal_ref
| av_literal_ref
lit_real: /(?:\d+(?:\.\d+)?|\.\d+)(?:[Ee]-?\d+)?/
lit_string: <perl_quotelike>
av_seq: semi /[;,]?/
av_literal: '(' av_seq(?) ')'
av_literal_ref: '[' av_seq(?) ']'
hv_seq: <leftop: pair ',' pair> /,?/
hv_literal: '(' hv_seq ')'
hv_literal_ref: '{' hv_seq '}'
END
######################################################################
$::NAMEPART = qr/[a-zA-Z_][\w_]*/;
my $variables = <<'END';
variable: sigil <skip:''> varname
sigil: /[\@\%\$\&]/
sv: '$' <skip:''> varname
av: '@' <skip:''> varname
hv: '%' <skip:''> varname
cv: '&' <skip:''> varname
varname: ('*')(?) '{' <commit> <skip:'\s*'> (scalar_expr | name) '}'
| name
| /\d+/
| /[\!_]/
| '^' <skip:''> namepart
name: /(?:::|\.|\*)?$::NAMEPART(::$::NAMEPART)*/
namepart: /$::NAMEPART/
END
######################################################################
$::COMPARE = qr{cmp | eq | [gnl]e | [gl]t
| <=> | [<>=!]= | < | > }x;
$::CONTEXT = qr{[\%\@\$\&*_?] | \+(?!\+)}x;
$::MULDIV = qr{[\%*x] | /(?!/)}x;
$::PREFIX = qr{[!~\\] | -(?![->])}x;
$::ADDSUB = qr{[-+_]};
$::LOG_OR = qr{x?or | //(?!=)}x;
my $operators = <<'END';
hype: '^' <skip:''> <matchrule:$arg[0]>
| <matchrule:$arg[0]>
maybe_comma: comma[$arg[0]] |
maybe_namepart: namepart |
hv_index: <skip:''> '{' <skip:$item[1]> hv_indices '}'
hv_indices: /[\w_]+/
| comma
av_index: <skip:''> '[' <skip:$item[1]> av_seq ']'
arglist: '(' maybe_comma ')'
access: '.' <skip:''> namepart
subscript: <skip:''> '{' <commit> <skip:$item[1]> hv_indices '}'
| <skip:''> '[' <commit> <skip:$item[1]> av_seq ']'
| '(' maybe_comma ')'
subscriptable: name { ::not_keyword($item[1]) } <commit> arglist
| '.' <commit> <skip:''> namepart
| '(' <commit> av_seq(?) ')'
| variable
context: /$::CONTEXT/o
# context: '%' | '@' | '$' | '&' | '*' | '_' | '?'
# | /\+(?!\+)/ # numeric context...
term: '<' <commit> expr(?) '>'
| subscriptable <commit> subscript(s?)
| context <commit> term
| class
| sv_literal
| closure
apply_rhs: namepart <commit> subscript(s?)
| subscript(s)
apply: <leftop: term hype['apply_op'] apply_rhs>
apply_op: '.'
incr: hype['incr_op'] <commit> apply
| apply hype['incr_op'](?)
incr_op: '++' | '--'
pow: incr (hype['pow_op'] prefix)(s?)
pow_op: '**'
prefix: filetest_op <commit> prefix
| hype['prefix_op'] <commit> prefix
| name <matchrule:@{[::find_want($item[1])]}>
| pow
# prefix_op: '!' | '~' | '\\' | /-(?![->])/
prefix_op: /$::PREFIX/o
filetest_op: /-[rwxoRWXOezsfdlpSbctugkTBMAC]+/
pair: namepart '=>' <commit> prefix
| prefix '=>' prefix
maybe_pair: namepart '=>' <commit> prefix
| prefix ('=>' prefix)(?)
match: maybe_pair (hype['match_op'] maybe_pair)(s?)
match_op: '=~' | '!~'
muldiv: <leftop: match hype['muldiv_op'] match>
# muldiv_op: '*' | '/' | '%' | 'x'
muldiv_op: /$::MULDIV/o
addsub: <leftop: muldiv hype['addsub_op'] muldiv>
# addsub_op: '+' | '-' | '_'
addsub_op: /$::ADDSUB/o
bitshift: <leftop: addsub hype['bitshift_op'] addsub>
bitshift_op: '<<' | '>>'
compare: <leftop: bitshift hype['compare_op'] bitshift>
compare_op: /$::COMPARE/
# compare_op: '<=>' | '<=' | '==' | '>=' | '<' | '>' | '!='
# | 'eq' | 'ge' | 'ne' | 'le' | 'lt' | 'gt' | 'cmp'
bitand: <leftop: compare hype['bitand_op'] compare>
bitand_op: '&'
bitor: <leftop: bitand hype['bitor_op'] bitand>
bitor_op: '|' | '~'
logand: <leftop: bitor hype['logand_op'] bitor>
logand_op: '&&'
logor: <leftop: logand hype['logor_op'] logand>
logor_op: '||' | '~~'
range: logor (range_op logor)(?)
range_op: '..'
ternary: range ('??' ternary '::' ternary)(?)
scope: 'my' | 'temp' | 'our'
class: name { ::find_class($item{name}) }
scope_class: scope class(?)
| class
property: ("$arg[0]")(?) name ( '(' comma ')' )(?)
props: (property[$arg[0]])(s?)
decl: '(' <commit> <leftop: variable ',' variable> ')' props['are']
| variable props['is']
assign: assign_lhs assign_rhs(s?)
assign_lhs: scope_class decl
| ternary
assign_rhs: hype['assign_op'] scalar_expr
assign_op: /[!:]?=/
| assignable_op <skip:''> '='
assignable_op: '//'
| logand_op | logor_op
| bitand_op | bitor_op | bitshift_op
| addsub_op | muldiv_op | pow_op
scalar_expr: assign but(s?)
but: but_word assign
but_word: 'but' | 'err'
comma: <leftop: <matchrule:@{[$arg[0] || 'scalar_expr']}>
comma_op <matchrule:@{[$arg[0] || 'scalar_expr']}> >
comma_op: ','
semi: <leftop: comma semi_op comma>
semi_op: ';'
adverb: scalar_expr adv_clause
adv_clause: /:(?!:)/ <commit> comma['scalar_expr']
| # nothing
log_AND: <leftop: adverb hype['log_AND_op'] adverb>
log_AND_op: 'and'
log_OR: <leftop: log_AND hype['log_OR_op'] log_AND>
# log_OR_op: 'or' | 'xor' | m{//(?!=)}
log_OR_op: /$::LOG_OR/o
expr: log_OR
END
######################################################################
my $declarations = <<'END';
sub_def: scope(?) 'sub' name { ::add_function($item{name}) }
params props['is'] block
class_def: scope(?) 'class' name { ::add_class($item{name}) }
props['is'] block
method_def: 'method' name params props['is'] block
params: '(' (_params ',')(?) '*' <commit> '@' namepart ')'
| '(' <commit> _params(?) (';' _params)(?) ')'
| # nothing
_params: <leftop: _param ',' _param>
_param: maybe_scope_class variable props['is'] initializer(?)
initializer: hype['assign_op'] expr
maybe_scope_class: scope_class |
END
######################################################################
my $statements = <<'END';
prog: /\A/ stmts /\z/
| <error>
stmts: <leftop: stmt stmt_sep stmt> stmt_sep(?)
| # nothing
stmt_sep: ';'
| { ::saw_end_block($text) }
stmt: directive <commit> name
| sub_def
| class_def
| method_def
| expr guard(?)
directive: 'package' | 'module' | 'use'
guard: ('if' | 'unless' | 'while') <commit> scalar_expr
| 'for' expr
block: start_block '...' <commit> '}' { ::end_block($text) } ''
| start_block stmts '}' { ::end_block($text) } ''
start_block: <skip:''> /\s*(?<![^\n\s]){\s*/m
closure: closure_args(?) block
| <error>
closure_args: '->' '(' <commit> _closure_args(?) ')'
| '->' _closure_args(?)
| <error>
_closure_args: <leftop: maybe_comma['variable'] ';' comma['variable']>
END
######################################################################
my $wants = <<'END';
__fail__: <reject>
want_for_for: av_seq closure
want_for_given: scalar_expr closure
want_for_when: comma closure
want_for_default: closure
want_for_if: scalar_expr closure elsif(s?) else(?)
elsif: 'elsif' scalar_expr closure
else: 'else' closure
want_for_grep: scalar_expr comma
want_for_map: scalar_expr comma
END
######################################################################
# Pretty-printing:
sub pretty_sexp {
'('.join(' ', @_).')';
}
sub pretty { # don't die on literals
my $self = shift;
if (!ref $self) {
return qq{"$self"};
}
if (UNIVERSAL::can($self, '_pretty')) {
return $self->_pretty;
}
if (UNIVERSAL::isa($self, 'ARRAY')) {
if (@$self == 0) {
return '';
}
if (@$self == 1) {
return pretty($self->[0]);
}
return pretty_sexp ref($self), map { pretty($_) } @$self;
}
# We're a hash.
if ($self->{__done__}++) {
# warn "Already seen $self\n";
return '';
}
# try to do something intelligent...
if (exists $self->{__VALUE__}) {
if ($self->{__VALUE__} =~ /\S/) {
return pretty_sexp ref($self), $self->{__VALUE__};
} else {
return '';
}
}
my @things = grep /\S/, map { pretty($self->{$_}) }
grep !/__(?:RULE|done)__/, keys %$self;
if (@things == 0) {
return '';
} elsif (@things == 1) {
return $things[0];
} else {
return pretty_sexp $self->{__RULE__}, @things;
}
}
sub pretty_hard_to_see { '' }
for my $pkg (qw(start_block)) {
no strict 'refs';
*{"$pkg\::_pretty"} = \&pretty_hard_to_see;
}
sub hype::_pretty {
my $h = shift;
my $op = pretty($h->{'$arg[0]'});
if (exists $h->{__STRING1__}) {
return pretty_sexp 'hyped', $op;
}
return $op;
}
sub subscript::_pretty {
my $x = shift;
if (exists $x->{maybe_comma}) {
return pretty_sexp 'call', pretty($x->{maybe_comma});
}
if (exists $x->{av_seq}) {
return pretty_sexp '[]', pretty($x->{av_seq});
}
if (exists $x->{hv_indices}) {
return pretty($x->{hv_indices});
}
die "subscript:".Dumper($x);
}
sub adverb::_pretty {
my $x = shift;
if (ref $x->{adv_clause}) {
return pretty_sexp 'adverb:', pretty($x->{scalar_expr}),
pretty($x->{adv_clause});
}
return pretty($x->{scalar_expr});
}
sub adv_clause::_pretty {
my $x = shift;
if (exists $x->{comma}) {
return pretty($x->{comma});
}
return '';
}
sub maybe_namepart::_pretty {
my $x = shift;
if (exists $x->{namepart}) {
return pretty($x->{namepart});
}
return '';
}
sub lit_string::_pretty {
my $self = shift;
join '', @{$self->{__DIRECTIVE1__}}[1..3];
}
sub property::_pretty {
my $x = shift;
return pretty_sexp 'property', pretty($x->{namepart});
}
sub subscriptable::_pretty {
my $x = shift;
foreach my $k (qw(variable)) {
if (exists $x->{$k}) {
return pretty($x->{$k});
}
}
if (exists $x->{name}) {
return pretty_sexp 'call', pretty($x->{name}), pretty($x->{arglist});
}
if (exists $x->{namepart}) {
return pretty_sexp 'prop', pretty($x->{property});
}
if (exists $x->{__STRING1__}) {
return pretty_sexp 'array', pretty($x->{av_seq});
}
die "What's this subscriptable:\n".Dumper($x);
}
sub context::_pretty {
my $x = shift;
return $x->{__VALUE__} || $x->{__PATTERN1__};
}
sub term::_pretty {
my $x = shift;
if (exists $x->{context}) {
return pretty_sexp 'context', pretty($x->{context}),
pretty($x->{term});
}
foreach my $k (qw(class closure sv_literal)) {
if (exists $x->{$k}) {
return pretty($x->{$k});
}
}
if (exists $x->{subscript}) {
if (@{$x->{subscript}} > 0) {
return pretty_sexp 'subscript', pretty($x->{subscriptable}),
pretty($x->{subscript});
}
return pretty($x->{subscriptable});
}
if ($x->{__STRING1__} eq '<') {
# <BLAH>
return pretty_sexp 'readline', pretty($x->{expr});
}
if ($x->{__STRING1__} eq '(') {
return pretty_sexp 'expr', pretty($x->{av_seq});
}
die "what's this term:\n".Dumper($x);
}
sub variable::_pretty {
my $x = shift;
return pretty_sexp 'variable', pretty($x->{sigil}),
pretty($x->{varname});
}
sub apply_rhs::_pretty {
my $x = shift;
if (exists $x->{namepart}) {
return pretty_sexp '.', pretty($x->{namepart}),
pretty($x->{arglist}), pretty($x->{subscript});
}
if (exists $x->{subscript}) {
return pretty($x->{subscript});
}
die "apply_rhs:\n".Dumper($x);
}
sub but::_pretty {
my $x = shift;
return pretty_sexp 'but', pretty($x->{assign});
}
sub adv::_pretty {
my $x = shift;
return pretty_sexp ':', pretty($x->{comma});
}
sub block::_pretty {
my $x = shift;
if (exists $x->{stmts}) {
return pretty_sexp 'block', pretty($x->{stmts});
}
return pretty_sexp 'block', '...';
}
######################################################################
# Interaction
my %o;
(GetOptions(\%o, qw(dumper rule=s batch help cache trace))
&& !$o{help})
|| die <<END;
Usage: $0 [options]
--batch read batch on STDIN, write to STDOUT
--dumper use Data::Dumper to generate output
--rule NAME start with rule NAME (default = 'stmts')
--cache use precompiled grammar
In interactive mode, output is terminated by a blank line.
END
$::RD_TRACE = $o{trace};
$::rule = $o{rule} || 'prog';
my $parser;
my $gname = 'Perl6grammar';
if ($o{cache} && eval("require $gname")) {
$parser = eval "new $gname" or die "$gname: $@";
} else {
print STDERR "Constructing parser...";
use Parse::RecDescent;
my $header = <<'END';
{
$SIG{__DIE__} = sub { use Carp 'confess'; confess @_ };
}
<autotree>
END
if ($o{cache}) {
Parse::RecDescent->Precompile($header
.$variables
.$literals
.$operators
.$declarations
.$statements
.$wants,
# .$directives,
$gname);
eval "require $gname";
$parser = eval "new $gname";
} else {
$parser = new Parse::RecDescent($header
.$variables
.$literals
.$operators
.$declarations
.$statements
.$wants
# .$directives
);
}
print STDERR "done\n";
}
my $in = '';
if ($o{batch}) {
local $/ = undef;
$in = <STDIN>;
my $result = $parser->$::rule($in);
print pretty($result);
exit;
}
my $term = new Term::ReadLine;
my $prompt = '> ';
while (defined(my $l = $term->readline($prompt))) {
if ($in =~ /^:(.*)/) {
print eval $1, "\n";
$in = '';
next;
}
unless ($l =~ /^$/) {
$in .= "$l\n";
$prompt = '? ';
next;
}
print "as $::rule:\n";
my $result = $parser->$::rule($in);
print STDERR "done\n";
if ($o{dumper}) {
print Dumper $result;
} else {
if ($result) {
print pretty $result, "\n";
} else {
print "parse error\n";
}
}
print "\n";
$in = '';
$prompt = '> ';
}
Thread Previous
|
Thread Next