Mercurial > pub > Impl
view Lib/BNFCompiler.pm @ 40:ac21a032e7a9
bnf parser in progress
author | Sergey |
---|---|
date | Thu, 10 Dec 2009 17:43:39 +0300 |
parents | 03e58a454b20 |
children | 16ada169ca75 |
line wrap: on
line source
package BNFCompiler; package BNFCompiler::DOM; package BNFCompiler::DOM::Builder; package BNFCompiler::DOM::Node; use strict; package BNFCompiler::EventParser; use strict; use lib '.'; use Common; our @ISA = qw(Object); our $EventMapSchema = { Description => 'Parser events', Type => 'HASH', Values => 'SCALAR' }; BEGIN { DeclareProperty(EventMap => ACCESS_READ); DeclareProperty(CompiledEvents => ACCESS_NONE); DeclareProperty(Handler => ACCESS_ALL); } sub CTOR { my ($this,%args) = @_; $this->SUPER::CTOR(%args); } sub Compile { my ($this) = @_; delete $this->{$CompiledEvents}; while (my ($key,$val) = each %{$this->{$EventMap}}) { $this->{$CompiledEvents}{$key} = qr/\G$val/; } 1; } sub Parse { my ($this,$data) = @_; my $StateData; OUTER: while(pos($data) < length($data)) { keys %{$this->{$CompiledEvents}}; while (my ($event,$match) = each %{$this->{$CompiledEvents}}) { if ($data =~ m/($match)/gc) { $StateData .= $1; eval { undef $StateData if $this->{$Handler}->($event,$StateData); }; if ($@) { die ["Invalid syntax","unexpected $event: $1",pos($data)]; } next OUTER; } } die ["Invalid syntax",substr($data,pos($data),10),pos($data)]; } return 1; } # небольшая уловка, поскольку ref от регулярного выражения есть Regexp, можно поставить хуки package Regexp; use Data::Dumper; sub STORABLE_freeze { my ($obj,$cloning) = @_; return $obj; } sub STORABLE_attach { my($class, $cloning, $serialized) = @_; return qr/$serialized/; } package BNFCompiler; use Common; use Storable; use Data::Dumper; our @ISA = qw(Object); our $BNFSchema; my $ParseAgainstSchema; my $TransformDOMToBNF; BEGIN { DeclareProperty(Schema => ACCESS_NONE); DeclareProperty(SchemaCache => ACCESS_NONE); DeclareProperty(Transform => ACCESS_NONE); } sub CTOR { my $this = shift; $this->SUPER::CTOR(@_); $this->{$SchemaCache} .= '/' if ($this->{$SchemaCache} and not $this->{$SchemaCache} =~ /\/$/); } { my $compiledBNFSchema; sub LoadBNFSchema { my ($this,%args) = @_; my $CompileBNFText = sub { my ($this,$text) = @_; my %SchemaDOM; foreach my $item (split /\n{2,}/, $text) { next if not $item; $compiledBNFSchema = CompileBNFSchema($BNFSchema) if not $compiledBNFSchema; my $context = new BNFCompiler::DOM::Builder(); eval { my $expr = &$ParseAgainstSchema($compiledBNFSchema,$item,$context); die ["Unexpected expression", $expr] if $expr; }; if ($@) { if (ref $@ eq 'ARRAY') { die new Exception(@{$@}); } else { die $@; } } $SchemaDOM{$context->Document->selectNodes('name')->text()} = &$TransformDOMToBNF($context->Document->selectNodes('def')); } $SchemaDOM{'separator'} = ['re:\\s+']; $this->{$Schema} = CompileBNFSchema(\%SchemaDOM); }; my $text; if ($args{'file'}) { my $fnameCached; if ($this->{$SchemaCache}) { my $fname = $args{'file'}; $fname =~ tr/\//_/; $fnameCached = $this->{$SchemaCache}.$fname.'.cbs'; if ( -e $fnameCached && -f $fnameCached && ( -M $args{'file'} >= -M $fnameCached )) { my $compiledSchema = retrieve($fnameCached); if ($compiledSchema) { $this->{$Schema} = $compiledSchema; return 1; } else { unlink $fnameCached; } } } open my $hFile, '<', $args{'file'} or die new Exception("Failed to open file",$args{'file'},$!); local $/ = undef; my $text = <$hFile>; $this->$CompileBNFText($text); if ($fnameCached) { store($this->{$Schema},$fnameCached); } } elsif ($args{'Schema'}) { $this->{$Schema} = CompileBNFSchema($args{'Schema'}); return 1; } elsif ($args{'text'}) { $this->$CompileBNFText( $args{'text'} ); } else { die new Exception("'file', 'text' or 'Schema' parameter required"); } } } sub Parse { my ($this, $string, %flags) = @_; my $context = new BNFCompiler::DOM::Builder; eval { my $err; $err = &$ParseAgainstSchema($this->{$Schema},$string,$context,\%flags) and die new Exception('Failed to parse',substr($err,0,80).' ...'); }; if ($@) { if (ref $@ eq 'ARRAY') { die new Exception(@{$@}); } else { die $@; } } if (not $this->{$Transform}) { return $context->Document; } else { return $this->{$Transform}->($context->Document); } } sub Dispose { my ($this) = shift; CleanSchema($this->{$Schema}); delete @$this{$Schema, $Transform}; $this->SUPER::Dispose; } sub CleanSchema { my ($schema,$table) = @_; UNIVERSAL::isa($schema,'ARRAY') or return; $table or $table = { $schema, 1}; for(my $i=0; $i<@$schema;$i++) { my $item = $schema->[$i]; if (ref $item) { next if $table->{$item}; $table->{$item} = 1; if (UNIVERSAL::isa($item,'ARRAY')) { CleanSchema($item,$table); } elsif( UNIVERSAL::isa($item,'HASH')) { CleanSchema($item->{'syntax'},$table); } undef $schema->[$i]; } } } sub OPT { return bless [@_], 'OPT'; } sub SWITCH { return bless [@_], 'SWITCH'; } sub REPEAT { return bless [@_], 'REPEAT'; } $TransformDOMToBNF = sub { my ($nodeRoot) = @_; return [grep $_, map { my $nodeName = $_->nodeName; if (not $nodeName ){ my $obj = $_; $obj->text() if (not( grep { $obj->text() eq $_} ('{', '}', '[', ']') ) ); }elsif($nodeName eq 'name') { $_->text(); } elsif ($nodeName eq 'separator') { OPT('separator'); } elsif ($nodeName eq 'or_sep') { # nothing } elsif ($nodeName eq 'switch_part') { &$TransformDOMToBNF($_); } elsif ($nodeName eq 'class') { my $class = $_->childNodes->[0]->text(); $class =~ s{(^<|>$|\\.|[\]\[])}{ my $char = { '>' => '', '<' => '', '[' => '\\[', ']' => '\\]', '\\\\' => '\\\\'}->{$1}; defined $char ? $char : ($1 =~ tr/\\// && $1); }ge; $class = '['.$class.']'; $class .= $_->childNodes->[1]->text() if $_->childNodes->[1]; 're:'.$class; } elsif ($nodeName eq 'symbol') { $_->text(); } elsif ($nodeName eq 'simple') { @{&$TransformDOMToBNF($_)}; } elsif ($nodeName eq 'multi_def') { @{&$TransformDOMToBNF($_)}; } elsif ($nodeName eq 'optional') { my $multi_def = &$TransformDOMToBNF($_); if ($multi_def->[scalar(@{$multi_def})-1] eq '...') { pop @{$multi_def}; OPT(REPEAT(@{$multi_def})); } else { OPT(@{$multi_def}); } } elsif ($nodeName eq 'switch') { SWITCH(@{&$TransformDOMToBNF($_)}); } elsif ($nodeName eq 'def') { @{&$TransformDOMToBNF($_)}; } else{ die "unknown nodeName: $nodeName"; } } @{$nodeRoot->childNodes}]; }; $BNFSchema = { syntax => ['name',OPT('separator'),'::=',OPT('separator'),'def'], name => ['re:\\w+'], class => ['re:<([^<>\\\\]|\\\\.)+>',OPT('re:\\*|\\+|\\?|\\{\\d+\\}')], symbol => ['re:[^\\w\\d\\s\\[\\]{}<>\\\\|]+'], separator => ['re:\\s+'], simple => [ SWITCH( 'name', 'class', 'symbol' ) ], multi_def => [ OPT('separator'), SWITCH('...',[SWITCH('simple','optional','switch'),OPT('multi_def')]) ], optional => [ '[','multi_def', OPT('separator') ,']' ], keyword => [], or_sep => ['|'], switch_part => [OPT('separator'),SWITCH('simple','optional','switch'),OPT(REPEAT(OPT('separator'),SWITCH('simple','optional','switch'))),OPT('separator')], switch => [ '{','switch_part',OPT(REPEAT('or_sep','switch_part')),'}' ], def => [REPEAT(OPT('separator'),SWITCH('simple','optional','switch'))] }; my $CompileTerm; $CompileTerm = sub { my ($term,$Schema,$cache,$ref) = @_; my $compiled = ref $term eq 'ARRAY' ? ($ref or []) : bless (($ref or []), ref $term); die new Exception("Invalid term type $term", $term, ref $term) if not grep ref $term eq $_, qw(ARRAY REPEAT SWITCH OPT); foreach my $element (@{$term}) { if (ref $element) { push @{$compiled}, &$CompileTerm($element,$Schema,$cache); } else { if($element =~/^\w+$/) { if (exists $Schema->{$element}) { # reference my $compiledUnit; if (exists $cache->{$element}) { $compiledUnit = $cache->{$element}; } else { $compiledUnit = []; $cache->{$element} = $compiledUnit; &$CompileTerm($Schema->{$element},$Schema,$cache,$compiledUnit); } push @{$compiled},{ name => $element, syntax => $compiledUnit}; } else { # simple word push @{$compiled}, $element; } } elsif ($element =~ /^re:(.*)/){ # regexp push @{$compiled},qr/\G(?:$1)/; } else { # char sequence push @{$compiled},$element; } } } return $compiled; }; sub CompileBNFSchema { my($Schema) = @_; my %Cache; return &$CompileTerm($Schema->{'syntax'},$Schema,\%Cache); } my $CompiledSchema = CompileBNFSchema($BNFSchema); $ParseAgainstSchema = sub { my ($Schema,$expression,$context,$flags,$level) = @_; $level = 0 if not defined $level; my $dbgPrint = $flags->{debug} ? sub { print "\t" x $level, @_,"\n"; } : sub {}; foreach my $elem (@{$Schema}) { my $type = ref $elem; $expression = substr $expression,pos($expression) if $type ne 'Regexp' and pos($expression); if ($type eq 'HASH') { $context->NewContext($elem->{'name'}); &$dbgPrint("$elem->{name} ", join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem->{'syntax'}})); eval { $expression = &$ParseAgainstSchema($elem->{'syntax'},$expression,$context,$flags,$level+1); }; if ($@) { $context->EndContext(0); &$dbgPrint("/$elem->{name} ", "0"); die $@; } else { &$dbgPrint("/$elem->{name} ", "1"); $context->EndContext(1); } } elsif ($type eq 'ARRAY') { &$dbgPrint("entering ", join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem})); $expression = &$ParseAgainstSchema($elem,$expression,$context,$flags,$level+1); &$dbgPrint("success"); } elsif ($type eq 'OPT') { if (defined $expression) { &$dbgPrint("optional ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem})); eval { $expression = &$ParseAgainstSchema($elem,$expression,$context,$flags,$level+1); }; if ($@) { &$dbgPrint("failed"); undef $@; } else { &$dbgPrint("success"); } } } elsif ($type eq 'SWITCH') { my $success = 0; &$dbgPrint("switch"); LOOP_SWITCH: foreach my $subelem (@{$elem}) { eval { &$dbgPrint("\ttry ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ } @{(grep ref $subelem eq $_, qw(ARRAY SWITCH OPT REPEAT)) ? $subelem : [$subelem]})); $expression = &$ParseAgainstSchema((grep ref $subelem eq $_, qw(ARRAY SWITCH OPT REPEAT)) ? $subelem : [$subelem],$expression,$context,$flags,$level+1); $success = 1; }; if ($@) { undef $@; } else { last LOOP_SWITCH; } } if ($success) { &$dbgPrint("success"); } else { &$dbgPrint("failed"); die ["syntax error",$expression,$elem]; } } elsif ($type eq 'REPEAT') { my $copy = [@{$elem}]; my $i = 0; &$dbgPrint("repeat ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem})); while (1) { eval { $expression = &$ParseAgainstSchema($copy,$expression,$context,$flags,$level+1); $i++; }; if ($@) { if (not $i) { &$dbgPrint("failed"); die $@; } &$dbgPrint("found $i matches"); undef $@; last; } } } elsif ($type eq 'Regexp') { my $posPrev = pos($expression) || 0; if ( $expression =~ m/($elem)/ ) { $context->Data($1); pos($expression) = $posPrev+length($1); &$dbgPrint("Regexp: $1 $elem ", pos($expression)); } else { &$dbgPrint("Regexp: $elem failed"); die ["syntax error",$expression,$elem,$posPrev]; pos($expression) = $posPrev; } } else { if ((my $val = substr($expression, 0, length($elem),'')) eq $elem) { &$dbgPrint("Scalar: $val"); $context->Data($elem); } else { &$dbgPrint("Scalar: failed $val expected $elem"); die ["syntax error",$val.$expression,$elem]; } } } if (pos $expression) { return substr $expression,(pos($expression) || 0); } else { return $expression; } }; package BNFCompiler::DOM::Node; use Common; our @ISA = qw(Object); sub NODE_TEXT { 1 } sub NODE_ELEM { 2 } BEGIN { DeclareProperty(nodeName => ACCESS_READ); DeclareProperty(nodeType => ACCESS_READ); DeclareProperty(nodeValue => ACCESS_READ); DeclareProperty(childNodes => ACCESS_READ); DeclareProperty(isComplex => ACCESS_READ); } sub CTOR { my ($this,%args) = @_; $args{'nodeType'} = NODE_ELEM if not $args{'nodeType'}; die new Exception("Invalid args. nodeName reqired.") if $args{'nodeType'} == NODE_ELEM and not $args{nodeName}; #for speed reason #$this->SUPER::CTOR(%args); $this->{$nodeName} = $args{'nodeName'} if $args{'nodeName'}; $this->{$nodeType} = $args{'nodeType'}; $this->{$nodeValue} = $args{'nodeValue'} if exists $args{'nodeValue'}; $this->{$isComplex} = 0; } sub insertNode { my ($this,$node,$pos) = @_; die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; die new Exception("Invalid node type",ref $node) if ref $node ne __PACKAGE__; $this->{$childNodes} = [] if not $this->{$childNodes}; $pos = scalar(@{$this->{$childNodes}}) if not defined $pos; die new Exception("Index out of range",$pos) if $pos > scalar(@{$this->{$childNodes}}) or $pos < 0; splice @{$this->{$childNodes}},$pos,0,$node; $this->{$isComplex} = 1 if not $this->{$isComplex} and $node->{$nodeType} == NODE_ELEM; return $node; } sub removeNode { my ($this,$node) = @_; die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; @{$this->{$childNodes}} = grep { $_ != $node } @{$this->{$childNodes}}; return $node; } sub removeAt { my ($this,$pos) = @_; die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; die new Exception("Index out of range",$pos) if $pos >= scalar(@{$this->{$childNodes}}) or $pos < 0; return splice @{$this->{$childNodes}},$pos,1; } sub selectNodes { my ($this,$name) = @_; die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; my @nodes = grep { $_->{$nodeType} == NODE_ELEM and $_->{$nodeName} eq $name } @{$this->{$childNodes}}; if (wantarray) { return @nodes; } else { return shift @nodes; } } sub text { my $this = shift; if ($this->{$nodeType} == NODE_TEXT) { return $this->{$nodeValue}; } else { my @texts; foreach my $node (@{$this->{$childNodes}}) { push @texts, $node->{$nodeValue} if ($node->{$nodeType}==NODE_TEXT); } if (wantarray) { return @texts; } else { return join '',@texts; } } } package BNFCompiler::DOM::Builder; use Common; our @ISA=qw(Object); BEGIN { DeclareProperty(Document => ACCESS_READ); DeclareProperty(currentNode => ACCESS_NONE); DeclareProperty(stackNodes => ACCESS_NONE); } sub CTOR { my $this = shift; $this->{$Document} = new BNFCompiler::DOM::Node(nodeName => 'Document', nodeType => BNFCompiler::DOM::Node::NODE_ELEM); $this->{$currentNode} = $this->{$Document}; } sub NewContext { my ($this,$contextName) = @_; push @{$this->{$stackNodes}},$this->{$currentNode}; $this->{$currentNode} = new BNFCompiler::DOM::Node(nodeName => $contextName, nodeType=> BNFCompiler::DOM::Node::NODE_ELEM); return 1; } sub EndContext{ my ($this,$isNotEmpty) = @_; if ($isNotEmpty) { my $child = $this->{$currentNode}; $this->{$currentNode} = pop @{$this->{$stackNodes}}; $this->{$currentNode}->insertNode($child); } else { $this->{$currentNode} = pop @{$this->{$stackNodes}}; } } sub Data { my ($this,$data) = @_; $this->{$currentNode}->insertNode(new BNFCompiler::DOM::Node(nodeType=> BNFCompiler::DOM::Node::NODE_TEXT, nodeValue => $data)); } package BNFCompiler::DOM; sub TransformDOMToHash { my ($root,$options) = @_; my %content; if (not $root->childNodes) { die; } foreach my $child (@{$root->childNodes}) { if ($child->nodeType == BNFCompiler::DOM::Node::NODE_ELEM) { my @newValue; my $nodeName = $child->nodeName; next if $nodeName eq 'separator' and $options->{'skip_spaces'}; if ($child->isComplex) { $newValue[0] = TransformDOMToHash($child,$options); } else { @newValue = $child->text() } if ($options->{'use_arrays'}) { push @{$content{$nodeName}},@newValue; } if (exists $content{$nodeName}) { if (ref $content{$nodeName} eq 'ARRAY') { push @{$content{$nodeName}}, @newValue; } else { $content{$nodeName} = [$content{$nodeName},@newValue]; } } else { $content{$nodeName} = $newValue[0] if scalar(@newValue) == 1; $content{$nodeName} = \@newValue if scalar(@newValue) > 1; } } else { next if $options->{'skip_text'}; push @{$content{'_text'}},$child->nodeValue(); } } return \%content; } 1;