develooper Front page | perl.perl6.internals | Postings from June 2002

Re: Perl6 grammar (take 2)

Thread Previous | Thread Next
From:
Sean O'Rourke
Date:
June 25, 2002 22:26
Subject:
Re: Perl6 grammar (take 2)
Message ID:
Pine.GSO.4.33.0206252158450.27184-200000@beowulf.ucsd.edu
use Parse::RecDescent;
use Data::Dumper;
use strict;
$Data::Dumper::Terse = 1;
$Data::Dumper::Indent = 1;
use Term::ReadLine;
use vars qw(%FUNCS %CLASSES %KEYWORDS %BLOCKS %UNARY_OPS);

######################################################################
# primitive symbol table stuff

##############################
# Functions (list operators):
my @builtin_funcs = qw(warn die return fail true false);
@FUNCS{@builtin_funcs} = @builtin_funcs;

sub ::add_function {
    my $fname = shift->{__VALUE__};
    $FUNCS{$fname} = $fname;
}

sub ::find_function {
    my $f = shift;
    if (exists $f->{cv}) {
	return 'cv';
    }
    return $FUNCS{$f->{__VALUE__}};
}

##############################
# Unary operators
# XXX: Can't handle implicit $_ to a named unary 
my @unary_ops = qw(rand chdir chop chomp exp pop shift);
@UNARY_OPS{@unary_ops} = @unary_ops;
sub ::find_named_unary {
    my $o = shift;
    return $UNARY_OPS{$o->{__VALUE__}};
}

##############################
# Classes (builtin and otherwise)
my @builtin_types = qw(int real HASH ARRAY SCALAR);
@CLASSES{@builtin_types} = @builtin_types;

sub ::add_class {		# seen class.
    my $c = shift->{__VALUE__};
    $CLASSES{$c} = $c;
}
sub ::find_class {		# seen class?
    my $c = shift;
    return $CLASSES{$c->{__VALUE__}};
}

##############################
# Named blocks
my @special_blocks = qw(CATCH BEGIN END INIT NEXT LAST AUTOLOAD);
@BLOCKS{@special_blocks} = @special_blocks;

sub ::find_special_block {	# known special block
    my $b = shift;
    return $BLOCKS{$b->{__VALUE__}};
}

##############################
# HACK to distinguish between "my ($a, $b) ..." and "foo ($a, $b)".
# Don't need all keywords in here, but only the ones that cause
# problems.
@KEYWORDS{qw(my our temp for while default given when)} = 1;

sub ::not_keyword {
    my $f = shift->{__VALUE__};
    exists $KEYWORDS{$f} ? undef : 1;
}

my $parser = new Parse::RecDescent <<'END';
{
    use Regexp::Common;
}
<autotree>
END

######################################################################
my $literals = <<'END';
literal:  '(' <commit> (hv_seq | av_seq) ')'
	| sv_literal

sv_literal:	  lit_real | lit_string
		| hv_literal_ref | av_literal_ref

lit_real:	/(?:\d+(?:\.\d+)?|\.\d+)(?:[Ee]-?\d+)?/
lit_string:	<perl_quotelike>

av_seq:		semi['maybe_pair'] /[;,]?/
av_literal:	'(' av_seq ')'
av_literal_ref: '[' av_seq ']'

hv_seq:		<leftop: pair ',' pair> /,?/
hv_literal:	'(' hv_seq ')'
hv_literal_ref:	'{' hv_seq '}'
END

######################################################################
$::NAMEPART = qr/[a-zA-Z_][\w_]*/;
my $variables = <<'END';
variable: sigil varname

sigil: /[\@\%\$\&]/

sv: '$' varname
av: '@' varname
hv: '%' varname
cv: '&' varname

varname:  <skip:''> '{' <commit> <skip:$item[1]> varname '}'
	| name
	| sv
	| <skip:''> /[%^&*\$\#@!_\d]/

name:	  /(?:::)?$::NAMEPART(::$::NAMEPART)*/

namepart: /$::NAMEPART/

END

######################################################################
my $operators = <<'END';
hype:		  ('^')(?) <matchrule:$arg[0]>

subscript:	  av_index | hv_index
hv_index:	  <skip:''> '{' <skip:$item[1]> (/[\w_]+/ | expr) '}'
av_index:	  '[' expr ']'

term:		  '<' <commit> expr(?) '>'
		| /do|try/ <commit> block
		| closure
		| sv_literal
		| '(' <commit> hv_seq ')'

left_list_lhs:	  left_list_op '(' <commit> expr(?) ')'
		| '(' <commit> av_seq ')' subscript(s?)
		| variable <commit> subscript(s?)
		| class

left_list_rhs:	  av_index
		| hv_index
		| left_list_op '(' <commit> expr(?) ')'
		| namepart

left_list_op:	  name { ::not_keyword($item{name}) }
		| cv

apply:		  <leftop: left_list_lhs hype['apply_op'] left_list_rhs>
		| term

apply_op:	  '.'

incr:		  hype['incr_op'] apply
		| apply hype['incr_op'](?)
incr_op:	  '++' | '--'

pow:		  incr (hype['pow_op'] misc_unary)(?)
pow_op:		  '**'

misc_unary:	  hype['misc_unary_op'](s?) pow
misc_unary_op:	  '!' | '~' | '\\' | '*' | '_' | '?' | '.'
		| /\+(?!\+)/ | /-(?![->])/

match:		  <leftop: misc_unary hype['match_op'] misc_unary>
match_op:	  '=~' | '!~'

muldiv:		  <leftop: match hype['muldiv_op'] match>
muldiv_op:	  '*' | '/' | '%' | 'x'

addsub:		  <leftop: muldiv hype['addsub_op'] muldiv>
addsub_op:	  '+' | '-' | '_'

bitshift:	  <leftop: addsub hype['bitshift_op'] addsub>
bitshift_op:	  '<<' | '>>'

named_unary:	  named_unary_op(s?) bitshift
named_unary_op:   /-[rwxoRWXOezsfdlpSbctugkTBMAC]+/
		| namepart { ::find_named_unary($item[1]) }

compare:	  <leftop: named_unary hype['compare_op'] named_unary>
compare_op:	  '==' | '!=' | '<=>' | '<=' | '>=' | '<' | '>'
		| 'eq' | 'ne' | 'lt' | 'gt' | 'le' | 'ge' | 'cmp'

bitand:		  <leftop: compare hype['bitand_op'] compare>
bitand_op:	  '&'

bitor:		  <leftop: bitand hype['bitor_op'] bitand>
bitor_op:	  '|' | '~'

logand:		  <leftop: bitor hype['logand_op'] bitor>
logand_op:	  '&&'

logor:		  <leftop: logand hype['logor_op'] logand>
logor_op:	  '||' | '~~'

range:		  logor (range_op logor)(?)
range_op:	  '...' | '..'

ternary:	  range ('??' ternary '::' ternary)(?)

assign:		  <rightop: ternary hype['assign_op'] ternary> but(?)
assign_op:	  /[!:]?=/
		| assignable_op(?) <skip:''> '='
assignable_op:	  '//'
		| logand_op | logor_op
		| bitand_op | bitor_op | bitshift_op
		| addsub_op | muldiv_op | pow_op
but:		  'but' ternary

pair:		  name pair_op assign
		| logor pair_op assign
pair_op:	  '=>'

maybe_pair:	  pair | assign

comma:		  <leftop: <matchrule:$arg[0]> comma_op <matchrule:$arg[0]> >
comma_op:	  ','

semi:		  <leftop: comma[$arg[0]] semi_op comma[$arg[0]]>
semi_op:	  ';'

right_list:	  right_list_op ...!'(' <commit> comma['maybe_pair']
		| comma['maybe_pair']
right_list_op:	  cv
		| name { ::find_function($item{name}) }

adverb:		  ('not')(?) right_list (':' right_list)(?)

log_AND:	  <leftop: adverb hype['log_AND_op'] adverb>
log_AND_op:	  'and'

log_OR:		  <leftop: log_AND hype['log_OR_op'] log_AND>
log_OR_op:	  'or' | 'xor' | '//'

expr:		  log_OR

scalar_expr:	  logor

END

######################################################################
my $declarations = <<'END';
sub_def:	  scope(?) 'sub' name params(?) (property['is'])(s?) block
			{ ::add_function($item{name}) }

class_def:	  scope(?) 'class' name { ::add_class($item{name}) }
			(property['is'])(s?) block

method_def:	  'method' name params(?) (property['is'])(s?) block

var_def:	  scope(?) class(?) variable (property['is'])(s?)
			initializer(?)

vars_def:	  scope(?) class(?) '(' <leftop: variable ',' variable> ')'
			(property['are'])(s?) initializer(?)

scope:		  'my' | 'temp' | 'our'
class:		  name { ::find_class($item{name}) }

property:	  ("$arg[0]")(?) name ( '(' expr ')' )(?)

initializer:	  hype['assign_op'] ternary but(?)

params:		  '(' (_params ',')(?) '*' <commit> '@' namepart ')'
		| '(' _params(?) (';' _params)(?) ')'

_params:	  <leftop: var_def ',' var_def>

END

######################################################################
# XXX: completely incomplete
my $directives = <<'END';
directive:	  dirname name
dirname:	  'use' | 'package' | 'module'
nonblock_stmt:	  directive
END

######################################################################
my $statements = <<'END';
stmts:		  terminated_stmt(s?) unterminated_stmt
		| # nothing

terminated_stmt:  nonblock_stmt <commit> ';'
		| block_stmt block_terminator

unterminated_stmt: block_stmt <commit> block_terminator(?)
		| nonblock_stmt (';')(?)
block_terminator: ';' | <skip:''> /\s*$/

block_stmt:	  sub_def | class_def | method_def | block_control

nonblock_stmt:	  expr FOR <commit> expr
		| directive
		| <leftop: expr guard scalar_expr>
		| vars_def
		| var_def
guard:		  'if' | 'unless' | 'while'

block:		  start_block '...' <commit> '}'
		| start_block stmts '}'
start_block:	  <skip:''> /\s*(?<!\w){\s*/m
END

######################################################################
my $control = <<'END';
closure:	  '->' '(' <commit> <leftop: variable ',' variable>(?) ')'
			block
		| '->' <leftop: variable ',' variable> block

block_control: for | given | when | default | if_seq | while | named_block

for:		  FOR list_bind block
FOR:		  'for' | 'foreach'
list_bind:	  semi['maybe_pair'] ('->' semi['variable'])(?)

given:		  'given' scalar_expr (closure | block)
when:		  'when' comma['maybe_pair'] block
default:	  'default' block

if_seq:		  if elsif(s?) else(?)
if:		  'if' scalar_expr block
elsif:		  'elsif' scalar_expr block
else:		  'else' block

while:		  'while' scalar_expr block

named_block:	  namepart { ::find_special_block($item[1]) } block
END

$parser->Extend($variables
		.$literals
		.$operators
		.$declarations
		.$statements
		.$control
		.$directives);


######################################################################
# Pretty-printing:

sub pretty {			# don't die on literals
    my $self = shift;
    if (!ref $self) {
	return qq{"$self"};
    }
    if (UNIVERSAL::can($self, '_pretty')) {
	return $self->_pretty;
    }
    if (UNIVERSAL::isa($self, 'ARRAY')) {
	if (@$self == 0) {
	    return '';
	}
	if (@$self == 1) {
	    return pretty($self->[0]);
	}
	return '('.join(' ', ref($self), map { pretty($_) } @$self).')';
    }
    # We're a hash.
    if ($self->{__done__}++) {
	return '';
    }
    # try to do something intelligent...
    if (exists $self->{__VALUE__}) {
	if ($self->{__VALUE__} =~ /\S/) {
	    return '('.ref($self)." $self->{__VALUE__})";
	} else {
	    return '';
	}
    }
    my @things = grep /\S/, map { pretty($self->{$_}) }
	grep !/__(?:RULE|done)__/, keys %$self;
    if (@things == 0) {
	return '';
    } elsif (@things == 1) {
	return $things[0];
    } else {
	return "($self->{__RULE__} ".(join ' ', @things).')';
    }
}

sub pretty_hard_to_see { '' }

for my $pkg (qw(block_terminator start_block)) {
    no strict 'refs';
    *{"$pkg\::_pretty"} = \&pretty_hard_to_see;
}

sub hype::_pretty {
    my $h = shift;
    $h->{"'^'"}[0].pretty($h->{'$arg[0]'});
}
sub lit_string::_pretty {
    my $self = shift;
    join '', @{$self->{__DIRECTIVE1__}}[1..3];
}

sub left_list::_pretty {
    my $x = shift;
    foreach my $k (qw(sv_literal closure)) {
	if (exists $x->{$k}) {
	    return pretty($x->{$k});
	}
    }
    if (exists $x->{hv_seq}) {
	return '(hash '.pretty($x->{hv_seq}).')';
    }
    foreach my $k (qw(av_seq variable)) {
	if (exists $x->{$k}) {
	    my $ret = pretty($x->{$k});
	    if (exists ($x->{subscript})) {
		my $tmp = pretty($x->{subscript});
		if ($tmp =~ /\S/) {
		    return "(subscript $ret $tmp)";
		}
	    }
	    return $ret;
	}
    }
    if ($x->{left_list_op}) {
	return '(apply '.pretty($x->{left_list_op})
	    .pretty($x->{expr} || $x->{comma}).')';
    }
    if (exists $x->{class}) {
	return pretty($x->{class});
    }
    if (exists $x->{expr}) {
	# <BLAH>
	return "(readline ".pretty($x->{expr}).')';
    }
    die "left_list:\n".Dumper($x);
}

# sub namepart::_pretty { shift->{__DIRECTIVE1__} }

######################################################################
# Interaction

my $term = new Term::ReadLine;
my $rule = 'unterminated_stmt';
while (defined(local $_ = $term->readline('> '))) {
    if (/^:(.*)/) {
	print eval $1;
    } else {
	print "as $rule:\n";
	my $result = $parser->$rule($_);
	if ($::USE_DUMPER) {
	    print Dumper $result;
	} else {
	    if ($result) {
		print pretty $result, "\n";
	    } else {
		print "parse error\n";
	    }
	}
    }
    print "\n";
}

Thread Previous | Thread Next


nntp.perl.org: Perl Programming lists via nntp and http.
Comments to Ask Bjørn Hansen at ask@perl.org | Group listing | About