Mercurial > pub > Impl
diff Lib/BNFCompiler.pm @ 0:03e58a454b20
Создан репозитарий
author | Sergey |
---|---|
date | Tue, 14 Jul 2009 12:54:37 +0400 |
parents | |
children | 16ada169ca75 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/BNFCompiler.pm Tue Jul 14 12:54:37 2009 +0400 @@ -0,0 +1,666 @@ +package BNFCompiler; +package BNFCompiler::DOM; +package BNFCompiler::DOM::Builder; +package BNFCompiler::DOM::Node; +use strict; + +package BNFCompiler::EventParser; +use strict; +use lib '.'; +use Common; +our @ISA = qw(Object); + +our $EventMapSchema = { + Description => 'Parser events', + Type => 'HASH', + Values => 'SCALAR' +}; + +BEGIN { + DeclareProperty(EventMap => ACCESS_READ); + DeclareProperty(CompiledEvents => ACCESS_NONE); + DeclareProperty(Handler => ACCESS_ALL); +} + +sub CTOR { + my ($this,%args) = @_; + $this->SUPER::CTOR(%args); +} + +sub Compile { + my ($this) = @_; + + delete $this->{$CompiledEvents}; + while (my ($key,$val) = each %{$this->{$EventMap}}) { + $this->{$CompiledEvents}{$key} = qr/\G$val/; + } + 1; +} + +sub Parse { + my ($this,$data) = @_; + + my $StateData; + OUTER: while(pos($data) < length($data)) { + keys %{$this->{$CompiledEvents}}; + while (my ($event,$match) = each %{$this->{$CompiledEvents}}) { + if ($data =~ m/($match)/gc) { + $StateData .= $1; + eval { + undef $StateData if $this->{$Handler}->($event,$StateData); + }; + if ($@) { + die ["Invalid syntax","unexpected $event: $1",pos($data)]; + } + next OUTER; + } + } + die ["Invalid syntax",substr($data,pos($data),10),pos($data)]; + } + + return 1; +} + +# íåáîëüøàÿ óëîâêà, ïîñêîëüêó ref îò ðåãóëÿðíîãî âûðàæåíèÿ åñòü Regexp, ìîæíî ïîñòàâèòü õóêè +package Regexp; +use Data::Dumper; + +sub STORABLE_freeze { + my ($obj,$cloning) = @_; + + return $obj; +} + +sub STORABLE_attach { + my($class, $cloning, $serialized) = @_; + return qr/$serialized/; +} + +package BNFCompiler; +use Common; +use Storable; +use Data::Dumper; +our @ISA = qw(Object); + +our $BNFSchema; +my $ParseAgainstSchema; +my $TransformDOMToBNF; + +BEGIN { + DeclareProperty(Schema => ACCESS_NONE); + DeclareProperty(SchemaCache => ACCESS_NONE); + DeclareProperty(Transform => ACCESS_NONE); +} + +sub CTOR { + my $this = shift; + $this->SUPER::CTOR(@_); + + $this->{$SchemaCache} .= '/' if ($this->{$SchemaCache} and not $this->{$SchemaCache} =~ /\/$/); +} +{ + my $compiledBNFSchema; + sub LoadBNFSchema { + my ($this,%args) = @_; + + my $CompileBNFText = sub { + my ($this,$text) = @_; + + my %SchemaDOM; + foreach my $item (split /\n{2,}/, $text) { + next if not $item; + $compiledBNFSchema = CompileBNFSchema($BNFSchema) if not $compiledBNFSchema; + my $context = new BNFCompiler::DOM::Builder(); + eval { + my $expr = &$ParseAgainstSchema($compiledBNFSchema,$item,$context); + die ["Unexpected expression", $expr] if $expr; + }; + if ($@) { + if (ref $@ eq 'ARRAY') { + die new Exception(@{$@}); + } else { + die $@; + } + } + + $SchemaDOM{$context->Document->selectNodes('name')->text()} = &$TransformDOMToBNF($context->Document->selectNodes('def')); + + } + + $SchemaDOM{'separator'} = ['re:\\s+']; + $this->{$Schema} = CompileBNFSchema(\%SchemaDOM); + }; + + my $text; + if ($args{'file'}) { + + my $fnameCached; + if ($this->{$SchemaCache}) { + my $fname = $args{'file'}; + $fname =~ tr/\//_/; + $fnameCached = $this->{$SchemaCache}.$fname.'.cbs'; + if ( -e $fnameCached && -f $fnameCached && ( -M $args{'file'} >= -M $fnameCached )) { + my $compiledSchema = retrieve($fnameCached); + if ($compiledSchema) { + $this->{$Schema} = $compiledSchema; + return 1; + } else { + unlink $fnameCached; + } + } + } + open my $hFile, '<', $args{'file'} or die new Exception("Failed to open file",$args{'file'},$!); + local $/ = undef; + my $text = <$hFile>; + + $this->$CompileBNFText($text); + + if ($fnameCached) { + store($this->{$Schema},$fnameCached); + } + } elsif ($args{'Schema'}) { + $this->{$Schema} = CompileBNFSchema($args{'Schema'}); + return 1; + } elsif ($args{'text'}) { + $this->$CompileBNFText( $args{'text'} ); + } else { + die new Exception("'file', 'text' or 'Schema' parameter required"); + } + + } +} + +sub Parse { + my ($this, $string, %flags) = @_; + + my $context = new BNFCompiler::DOM::Builder; + + eval { + my $err; + $err = &$ParseAgainstSchema($this->{$Schema},$string,$context,\%flags) and die new Exception('Failed to parse',substr($err,0,80).' ...'); + }; + if ($@) { + if (ref $@ eq 'ARRAY') { + die new Exception(@{$@}); + } else { + die $@; + } + } + if (not $this->{$Transform}) { + return $context->Document; + } else { + return $this->{$Transform}->($context->Document); + } +} + +sub Dispose { + my ($this) = shift; + CleanSchema($this->{$Schema}); + delete @$this{$Schema, $Transform}; + $this->SUPER::Dispose; +} + +sub CleanSchema { + my ($schema,$table) = @_; + + UNIVERSAL::isa($schema,'ARRAY') or return; + $table or $table = { $schema, 1}; + + for(my $i=0; $i<@$schema;$i++) { + my $item = $schema->[$i]; + if (ref $item) { + next if $table->{$item}; + $table->{$item} = 1; + if (UNIVERSAL::isa($item,'ARRAY')) { + CleanSchema($item,$table); + } elsif( UNIVERSAL::isa($item,'HASH')) { + CleanSchema($item->{'syntax'},$table); + } + undef $schema->[$i]; + } + } +} + + +sub OPT { + return bless [@_], 'OPT'; +} + +sub SWITCH { + return bless [@_], 'SWITCH'; +} + +sub REPEAT { + return bless [@_], 'REPEAT'; +} + +$TransformDOMToBNF = sub { + my ($nodeRoot) = @_; + + return [grep $_, map { + my $nodeName = $_->nodeName; + if (not $nodeName ){ + my $obj = $_; + $obj->text() if (not( grep { $obj->text() eq $_} ('{', '}', '[', ']') ) ); + }elsif($nodeName eq 'name') { + $_->text(); + } elsif ($nodeName eq 'separator') { + OPT('separator'); + } elsif ($nodeName eq 'or_sep') { + # nothing + } elsif ($nodeName eq 'switch_part') { + &$TransformDOMToBNF($_); + } elsif ($nodeName eq 'class') { + my $class = $_->childNodes->[0]->text(); + + $class =~ s{(^<|>$|\\.|[\]\[])}{ + my $char = { '>' => '', '<' => '', '[' => '\\[', ']' => '\\]', '\\\\' => '\\\\'}->{$1}; + defined $char ? $char : ($1 =~ tr/\\// && $1); + }ge; + $class = '['.$class.']'; + $class .= $_->childNodes->[1]->text() if $_->childNodes->[1]; + 're:'.$class; + } elsif ($nodeName eq 'symbol') { + $_->text(); + } elsif ($nodeName eq 'simple') { + @{&$TransformDOMToBNF($_)}; + } elsif ($nodeName eq 'multi_def') { + @{&$TransformDOMToBNF($_)}; + } elsif ($nodeName eq 'optional') { + my $multi_def = &$TransformDOMToBNF($_); + if ($multi_def->[scalar(@{$multi_def})-1] eq '...') { + pop @{$multi_def}; + OPT(REPEAT(@{$multi_def})); + } else { + OPT(@{$multi_def}); + } + } elsif ($nodeName eq 'switch') { + SWITCH(@{&$TransformDOMToBNF($_)}); + } elsif ($nodeName eq 'def') { + @{&$TransformDOMToBNF($_)}; + } else{ + die "unknown nodeName: $nodeName"; + } + } @{$nodeRoot->childNodes}]; +}; + +$BNFSchema = { + syntax => ['name',OPT('separator'),'::=',OPT('separator'),'def'], + name => ['re:\\w+'], + class => ['re:<([^<>\\\\]|\\\\.)+>',OPT('re:\\*|\\+|\\?|\\{\\d+\\}')], + symbol => ['re:[^\\w\\d\\s\\[\\]{}<>\\\\|]+'], + separator => ['re:\\s+'], + simple => [ + SWITCH( + 'name', + 'class', + 'symbol' + ) + ], + multi_def => [ + OPT('separator'), SWITCH('...',[SWITCH('simple','optional','switch'),OPT('multi_def')]) + ], + optional => [ + '[','multi_def', OPT('separator') ,']' + + ], + keyword => [], + or_sep => ['|'], + switch_part => [OPT('separator'),SWITCH('simple','optional','switch'),OPT(REPEAT(OPT('separator'),SWITCH('simple','optional','switch'))),OPT('separator')], + switch => [ + '{','switch_part',OPT(REPEAT('or_sep','switch_part')),'}' + ], + def => [REPEAT(OPT('separator'),SWITCH('simple','optional','switch'))] +}; + +my $CompileTerm; +$CompileTerm = sub { + my ($term,$Schema,$cache,$ref) = @_; + + my $compiled = ref $term eq 'ARRAY' ? ($ref or []) : bless (($ref or []), ref $term); + + die new Exception("Invalid term type $term", $term, ref $term) if not grep ref $term eq $_, qw(ARRAY REPEAT SWITCH OPT); + + foreach my $element (@{$term}) { + if (ref $element) { + push @{$compiled}, &$CompileTerm($element,$Schema,$cache); + } else { + if($element =~/^\w+$/) { + if (exists $Schema->{$element}) { + # reference + my $compiledUnit; + if (exists $cache->{$element}) { + $compiledUnit = $cache->{$element}; + } else { + $compiledUnit = []; + $cache->{$element} = $compiledUnit; + &$CompileTerm($Schema->{$element},$Schema,$cache,$compiledUnit); + } + + push @{$compiled},{ name => $element, syntax => $compiledUnit}; + } else { + # simple word + push @{$compiled}, $element; + } + } elsif ($element =~ /^re:(.*)/){ + # regexp + push @{$compiled},qr/\G(?:$1)/; + } else { + # char sequence + push @{$compiled},$element; + } + } + } + + return $compiled; +}; + +sub CompileBNFSchema { + my($Schema) = @_; + + my %Cache; + return &$CompileTerm($Schema->{'syntax'},$Schema,\%Cache); +} + +my $CompiledSchema = CompileBNFSchema($BNFSchema); + +$ParseAgainstSchema = sub { + my ($Schema,$expression,$context,$flags,$level) = @_; + + $level = 0 if not defined $level; + my $dbgPrint = $flags->{debug} ? sub { + print "\t" x $level, @_,"\n"; + } : sub {}; + + foreach my $elem (@{$Schema}) { + my $type = ref $elem; + $expression = substr $expression,pos($expression) if $type ne 'Regexp' and pos($expression); + + if ($type eq 'HASH') { + $context->NewContext($elem->{'name'}); + &$dbgPrint("$elem->{name} ", join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem->{'syntax'}})); + eval { + $expression = &$ParseAgainstSchema($elem->{'syntax'},$expression,$context,$flags,$level+1); + }; + if ($@) { + $context->EndContext(0); + &$dbgPrint("/$elem->{name} ", "0"); + die $@; + } else { + &$dbgPrint("/$elem->{name} ", "1"); + $context->EndContext(1); + } + } elsif ($type eq 'ARRAY') { + &$dbgPrint("entering ", join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem})); + $expression = &$ParseAgainstSchema($elem,$expression,$context,$flags,$level+1); + &$dbgPrint("success"); + } elsif ($type eq 'OPT') { + if (defined $expression) { + &$dbgPrint("optional ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem})); + eval { + $expression = &$ParseAgainstSchema($elem,$expression,$context,$flags,$level+1); + }; + if ($@) { + &$dbgPrint("failed"); + undef $@; + } else { + &$dbgPrint("success"); + } + } + } elsif ($type eq 'SWITCH') { + my $success = 0; + &$dbgPrint("switch"); + LOOP_SWITCH: foreach my $subelem (@{$elem}) { + eval { + &$dbgPrint("\ttry ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ } @{(grep ref $subelem eq $_, qw(ARRAY SWITCH OPT REPEAT)) ? $subelem : [$subelem]})); + $expression = &$ParseAgainstSchema((grep ref $subelem eq $_, qw(ARRAY SWITCH OPT REPEAT)) ? $subelem : [$subelem],$expression,$context,$flags,$level+1); + $success = 1; + }; + if ($@) { + undef $@; + } else { + last LOOP_SWITCH; + } + } + if ($success) { + &$dbgPrint("success"); + } else { + &$dbgPrint("failed"); + die ["syntax error",$expression,$elem]; + } + } elsif ($type eq 'REPEAT') { + my $copy = [@{$elem}]; + my $i = 0; + &$dbgPrint("repeat ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem})); + while (1) { + eval { + $expression = &$ParseAgainstSchema($copy,$expression,$context,$flags,$level+1); + $i++; + }; + if ($@) { + if (not $i) { + &$dbgPrint("failed"); + die $@; + } + &$dbgPrint("found $i matches"); + undef $@; + last; + } + } + } elsif ($type eq 'Regexp') { + my $posPrev = pos($expression) || 0; + if ( $expression =~ m/($elem)/ ) { + $context->Data($1); + pos($expression) = $posPrev+length($1); + &$dbgPrint("Regexp: $1 $elem ", pos($expression)); + } else { + &$dbgPrint("Regexp: $elem failed"); + die ["syntax error",$expression,$elem,$posPrev]; + pos($expression) = $posPrev; + } + } else { + if ((my $val = substr($expression, 0, length($elem),'')) eq $elem) { + &$dbgPrint("Scalar: $val"); + $context->Data($elem); + } else { + &$dbgPrint("Scalar: failed $val expected $elem"); + die ["syntax error",$val.$expression,$elem]; + } + } + + } + + if (pos $expression) { + return substr $expression,(pos($expression) || 0); + } else { + return $expression; + } + +}; + +package BNFCompiler::DOM::Node; +use Common; +our @ISA = qw(Object); + +sub NODE_TEXT { 1 } +sub NODE_ELEM { 2 } + +BEGIN { + DeclareProperty(nodeName => ACCESS_READ); + DeclareProperty(nodeType => ACCESS_READ); + DeclareProperty(nodeValue => ACCESS_READ); + DeclareProperty(childNodes => ACCESS_READ); + DeclareProperty(isComplex => ACCESS_READ); +} + +sub CTOR { + my ($this,%args) = @_; + $args{'nodeType'} = NODE_ELEM if not $args{'nodeType'}; + die new Exception("Invalid args. nodeName reqired.") if $args{'nodeType'} == NODE_ELEM and not $args{nodeName}; + + #for speed reason + #$this->SUPER::CTOR(%args); + + $this->{$nodeName} = $args{'nodeName'} if $args{'nodeName'}; + $this->{$nodeType} = $args{'nodeType'}; + $this->{$nodeValue} = $args{'nodeValue'} if exists $args{'nodeValue'}; + + $this->{$isComplex} = 0; +} + +sub insertNode { + my ($this,$node,$pos) = @_; + + die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; + die new Exception("Invalid node type",ref $node) if ref $node ne __PACKAGE__; + + $this->{$childNodes} = [] if not $this->{$childNodes}; + + $pos = scalar(@{$this->{$childNodes}}) if not defined $pos; + die new Exception("Index out of range",$pos) if $pos > scalar(@{$this->{$childNodes}}) or $pos < 0; + + splice @{$this->{$childNodes}},$pos,0,$node; + $this->{$isComplex} = 1 if not $this->{$isComplex} and $node->{$nodeType} == NODE_ELEM; + + return $node; +} + +sub removeNode { + my ($this,$node) = @_; + + die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; + @{$this->{$childNodes}} = grep { $_ != $node } @{$this->{$childNodes}}; + + return $node; +} + +sub removeAt { + my ($this,$pos) = @_; + + die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; + die new Exception("Index out of range",$pos) if $pos >= scalar(@{$this->{$childNodes}}) or $pos < 0; + + return splice @{$this->{$childNodes}},$pos,1; +} + +sub selectNodes { + my ($this,$name) = @_; + + die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; + + my @nodes = grep { $_->{$nodeType} == NODE_ELEM and $_->{$nodeName} eq $name } @{$this->{$childNodes}}; + + if (wantarray) { + return @nodes; + } else { + return shift @nodes; + } +} + +sub text { + my $this = shift; + + if ($this->{$nodeType} == NODE_TEXT) { + return $this->{$nodeValue}; + } else { + my @texts; + + foreach my $node (@{$this->{$childNodes}}) { + push @texts, $node->{$nodeValue} if ($node->{$nodeType}==NODE_TEXT); + } + + if (wantarray) { + return @texts; + } else { + return join '',@texts; + } + } +} + +package BNFCompiler::DOM::Builder; +use Common; +our @ISA=qw(Object); + +BEGIN { + DeclareProperty(Document => ACCESS_READ); + DeclareProperty(currentNode => ACCESS_NONE); + DeclareProperty(stackNodes => ACCESS_NONE); +} + +sub CTOR { + my $this = shift; + + $this->{$Document} = new BNFCompiler::DOM::Node(nodeName => 'Document', nodeType => BNFCompiler::DOM::Node::NODE_ELEM); + $this->{$currentNode} = $this->{$Document}; +} + +sub NewContext { + my ($this,$contextName) = @_; + + push @{$this->{$stackNodes}},$this->{$currentNode}; + $this->{$currentNode} = new BNFCompiler::DOM::Node(nodeName => $contextName, nodeType=> BNFCompiler::DOM::Node::NODE_ELEM); + + return 1; +} +sub EndContext{ + my ($this,$isNotEmpty) = @_; + + if ($isNotEmpty) { + my $child = $this->{$currentNode}; + $this->{$currentNode} = pop @{$this->{$stackNodes}}; + $this->{$currentNode}->insertNode($child); + } else { + $this->{$currentNode} = pop @{$this->{$stackNodes}}; + } +} +sub Data { + my ($this,$data) = @_; + $this->{$currentNode}->insertNode(new BNFCompiler::DOM::Node(nodeType=> BNFCompiler::DOM::Node::NODE_TEXT, nodeValue => $data)); +} + +package BNFCompiler::DOM; + +sub TransformDOMToHash { + my ($root,$options) = @_; + + my %content; + + if (not $root->childNodes) { + die; + } + + foreach my $child (@{$root->childNodes}) { + if ($child->nodeType == BNFCompiler::DOM::Node::NODE_ELEM) { + my @newValue; + my $nodeName = $child->nodeName; + next if $nodeName eq 'separator' and $options->{'skip_spaces'}; + if ($child->isComplex) { + $newValue[0] = TransformDOMToHash($child,$options); + } else { + @newValue = $child->text() + } + + if ($options->{'use_arrays'}) { + push @{$content{$nodeName}},@newValue; + } + + if (exists $content{$nodeName}) { + if (ref $content{$nodeName} eq 'ARRAY') { + push @{$content{$nodeName}}, @newValue; + } else { + $content{$nodeName} = [$content{$nodeName},@newValue]; + } + } else { + $content{$nodeName} = $newValue[0] if scalar(@newValue) == 1; + $content{$nodeName} = \@newValue if scalar(@newValue) > 1; + } + } else { + next if $options->{'skip_text'}; + push @{$content{'_text'}},$child->nodeValue(); + } + } + + return \%content; +} + +1;