Mercurial > pub > Impl
diff Lib/BNFCompiler.pm @ 49:16ada169ca75
migrating to the Eclipse IDE
author | wizard@linux-odin.local |
---|---|
date | Fri, 26 Feb 2010 10:49:21 +0300 |
parents | 03e58a454b20 |
children |
line wrap: on
line diff
--- a/Lib/BNFCompiler.pm Fri Feb 26 01:43:42 2010 +0300 +++ b/Lib/BNFCompiler.pm Fri Feb 26 10:49:21 2010 +0300 @@ -1,666 +1,666 @@ -package BNFCompiler; -package BNFCompiler::DOM; -package BNFCompiler::DOM::Builder; -package BNFCompiler::DOM::Node; -use strict; - -package BNFCompiler::EventParser; -use strict; -use lib '.'; -use Common; -our @ISA = qw(Object); - -our $EventMapSchema = { - Description => 'Parser events', - Type => 'HASH', - Values => 'SCALAR' -}; - -BEGIN { - DeclareProperty(EventMap => ACCESS_READ); - DeclareProperty(CompiledEvents => ACCESS_NONE); - DeclareProperty(Handler => ACCESS_ALL); -} - -sub CTOR { - my ($this,%args) = @_; - $this->SUPER::CTOR(%args); -} - -sub Compile { - my ($this) = @_; - - delete $this->{$CompiledEvents}; - while (my ($key,$val) = each %{$this->{$EventMap}}) { - $this->{$CompiledEvents}{$key} = qr/\G$val/; - } - 1; -} - -sub Parse { - my ($this,$data) = @_; - - my $StateData; - OUTER: while(pos($data) < length($data)) { - keys %{$this->{$CompiledEvents}}; - while (my ($event,$match) = each %{$this->{$CompiledEvents}}) { - if ($data =~ m/($match)/gc) { - $StateData .= $1; - eval { - undef $StateData if $this->{$Handler}->($event,$StateData); - }; - if ($@) { - die ["Invalid syntax","unexpected $event: $1",pos($data)]; - } - next OUTER; - } - } - die ["Invalid syntax",substr($data,pos($data),10),pos($data)]; - } - - return 1; -} - -# небольшая уловка, поскольку ref от регулярного выражения есть Regexp, можно поставить хуки -package Regexp; -use Data::Dumper; - -sub STORABLE_freeze { - my ($obj,$cloning) = @_; - - return $obj; -} - -sub STORABLE_attach { - my($class, $cloning, $serialized) = @_; - return qr/$serialized/; -} - -package BNFCompiler; -use Common; -use Storable; -use Data::Dumper; -our @ISA = qw(Object); - -our $BNFSchema; -my $ParseAgainstSchema; -my $TransformDOMToBNF; - -BEGIN { - DeclareProperty(Schema => ACCESS_NONE); - DeclareProperty(SchemaCache => ACCESS_NONE); - DeclareProperty(Transform => ACCESS_NONE); -} - -sub CTOR { - my $this = shift; - $this->SUPER::CTOR(@_); - - $this->{$SchemaCache} .= '/' if ($this->{$SchemaCache} and not $this->{$SchemaCache} =~ /\/$/); -} -{ - my $compiledBNFSchema; - sub LoadBNFSchema { - my ($this,%args) = @_; - - my $CompileBNFText = sub { - my ($this,$text) = @_; - - my %SchemaDOM; - foreach my $item (split /\n{2,}/, $text) { - next if not $item; - $compiledBNFSchema = CompileBNFSchema($BNFSchema) if not $compiledBNFSchema; - my $context = new BNFCompiler::DOM::Builder(); - eval { - my $expr = &$ParseAgainstSchema($compiledBNFSchema,$item,$context); - die ["Unexpected expression", $expr] if $expr; - }; - if ($@) { - if (ref $@ eq 'ARRAY') { - die new Exception(@{$@}); - } else { - die $@; - } - } - - $SchemaDOM{$context->Document->selectNodes('name')->text()} = &$TransformDOMToBNF($context->Document->selectNodes('def')); - - } - - $SchemaDOM{'separator'} = ['re:\\s+']; - $this->{$Schema} = CompileBNFSchema(\%SchemaDOM); - }; - - my $text; - if ($args{'file'}) { - - my $fnameCached; - if ($this->{$SchemaCache}) { - my $fname = $args{'file'}; - $fname =~ tr/\//_/; - $fnameCached = $this->{$SchemaCache}.$fname.'.cbs'; - if ( -e $fnameCached && -f $fnameCached && ( -M $args{'file'} >= -M $fnameCached )) { - my $compiledSchema = retrieve($fnameCached); - if ($compiledSchema) { - $this->{$Schema} = $compiledSchema; - return 1; - } else { - unlink $fnameCached; - } - } - } - open my $hFile, '<', $args{'file'} or die new Exception("Failed to open file",$args{'file'},$!); - local $/ = undef; - my $text = <$hFile>; - - $this->$CompileBNFText($text); - - if ($fnameCached) { - store($this->{$Schema},$fnameCached); - } - } elsif ($args{'Schema'}) { - $this->{$Schema} = CompileBNFSchema($args{'Schema'}); - return 1; - } elsif ($args{'text'}) { - $this->$CompileBNFText( $args{'text'} ); - } else { - die new Exception("'file', 'text' or 'Schema' parameter required"); - } - - } -} - -sub Parse { - my ($this, $string, %flags) = @_; - - my $context = new BNFCompiler::DOM::Builder; - - eval { - my $err; - $err = &$ParseAgainstSchema($this->{$Schema},$string,$context,\%flags) and die new Exception('Failed to parse',substr($err,0,80).' ...'); - }; - if ($@) { - if (ref $@ eq 'ARRAY') { - die new Exception(@{$@}); - } else { - die $@; - } - } - if (not $this->{$Transform}) { - return $context->Document; - } else { - return $this->{$Transform}->($context->Document); - } -} - -sub Dispose { - my ($this) = shift; - CleanSchema($this->{$Schema}); - delete @$this{$Schema, $Transform}; - $this->SUPER::Dispose; -} - -sub CleanSchema { - my ($schema,$table) = @_; - - UNIVERSAL::isa($schema,'ARRAY') or return; - $table or $table = { $schema, 1}; - - for(my $i=0; $i<@$schema;$i++) { - my $item = $schema->[$i]; - if (ref $item) { - next if $table->{$item}; - $table->{$item} = 1; - if (UNIVERSAL::isa($item,'ARRAY')) { - CleanSchema($item,$table); - } elsif( UNIVERSAL::isa($item,'HASH')) { - CleanSchema($item->{'syntax'},$table); - } - undef $schema->[$i]; - } - } -} - - -sub OPT { - return bless [@_], 'OPT'; -} - -sub SWITCH { - return bless [@_], 'SWITCH'; -} - -sub REPEAT { - return bless [@_], 'REPEAT'; -} - -$TransformDOMToBNF = sub { - my ($nodeRoot) = @_; - - return [grep $_, map { - my $nodeName = $_->nodeName; - if (not $nodeName ){ - my $obj = $_; - $obj->text() if (not( grep { $obj->text() eq $_} ('{', '}', '[', ']') ) ); - }elsif($nodeName eq 'name') { - $_->text(); - } elsif ($nodeName eq 'separator') { - OPT('separator'); - } elsif ($nodeName eq 'or_sep') { - # nothing - } elsif ($nodeName eq 'switch_part') { - &$TransformDOMToBNF($_); - } elsif ($nodeName eq 'class') { - my $class = $_->childNodes->[0]->text(); - - $class =~ s{(^<|>$|\\.|[\]\[])}{ - my $char = { '>' => '', '<' => '', '[' => '\\[', ']' => '\\]', '\\\\' => '\\\\'}->{$1}; - defined $char ? $char : ($1 =~ tr/\\// && $1); - }ge; - $class = '['.$class.']'; - $class .= $_->childNodes->[1]->text() if $_->childNodes->[1]; - 're:'.$class; - } elsif ($nodeName eq 'symbol') { - $_->text(); - } elsif ($nodeName eq 'simple') { - @{&$TransformDOMToBNF($_)}; - } elsif ($nodeName eq 'multi_def') { - @{&$TransformDOMToBNF($_)}; - } elsif ($nodeName eq 'optional') { - my $multi_def = &$TransformDOMToBNF($_); - if ($multi_def->[scalar(@{$multi_def})-1] eq '...') { - pop @{$multi_def}; - OPT(REPEAT(@{$multi_def})); - } else { - OPT(@{$multi_def}); - } - } elsif ($nodeName eq 'switch') { - SWITCH(@{&$TransformDOMToBNF($_)}); - } elsif ($nodeName eq 'def') { - @{&$TransformDOMToBNF($_)}; - } else{ - die "unknown nodeName: $nodeName"; - } - } @{$nodeRoot->childNodes}]; -}; - -$BNFSchema = { - syntax => ['name',OPT('separator'),'::=',OPT('separator'),'def'], - name => ['re:\\w+'], - class => ['re:<([^<>\\\\]|\\\\.)+>',OPT('re:\\*|\\+|\\?|\\{\\d+\\}')], - symbol => ['re:[^\\w\\d\\s\\[\\]{}<>\\\\|]+'], - separator => ['re:\\s+'], - simple => [ - SWITCH( - 'name', - 'class', - 'symbol' - ) - ], - multi_def => [ - OPT('separator'), SWITCH('...',[SWITCH('simple','optional','switch'),OPT('multi_def')]) - ], - optional => [ - '[','multi_def', OPT('separator') ,']' - - ], - keyword => [], - or_sep => ['|'], - switch_part => [OPT('separator'),SWITCH('simple','optional','switch'),OPT(REPEAT(OPT('separator'),SWITCH('simple','optional','switch'))),OPT('separator')], - switch => [ - '{','switch_part',OPT(REPEAT('or_sep','switch_part')),'}' - ], - def => [REPEAT(OPT('separator'),SWITCH('simple','optional','switch'))] -}; - -my $CompileTerm; -$CompileTerm = sub { - my ($term,$Schema,$cache,$ref) = @_; - - my $compiled = ref $term eq 'ARRAY' ? ($ref or []) : bless (($ref or []), ref $term); - - die new Exception("Invalid term type $term", $term, ref $term) if not grep ref $term eq $_, qw(ARRAY REPEAT SWITCH OPT); - - foreach my $element (@{$term}) { - if (ref $element) { - push @{$compiled}, &$CompileTerm($element,$Schema,$cache); - } else { - if($element =~/^\w+$/) { - if (exists $Schema->{$element}) { - # reference - my $compiledUnit; - if (exists $cache->{$element}) { - $compiledUnit = $cache->{$element}; - } else { - $compiledUnit = []; - $cache->{$element} = $compiledUnit; - &$CompileTerm($Schema->{$element},$Schema,$cache,$compiledUnit); - } - - push @{$compiled},{ name => $element, syntax => $compiledUnit}; - } else { - # simple word - push @{$compiled}, $element; - } - } elsif ($element =~ /^re:(.*)/){ - # regexp - push @{$compiled},qr/\G(?:$1)/; - } else { - # char sequence - push @{$compiled},$element; - } - } - } - - return $compiled; -}; - -sub CompileBNFSchema { - my($Schema) = @_; - - my %Cache; - return &$CompileTerm($Schema->{'syntax'},$Schema,\%Cache); -} - -my $CompiledSchema = CompileBNFSchema($BNFSchema); - -$ParseAgainstSchema = sub { - my ($Schema,$expression,$context,$flags,$level) = @_; - - $level = 0 if not defined $level; - my $dbgPrint = $flags->{debug} ? sub { - print "\t" x $level, @_,"\n"; - } : sub {}; - - foreach my $elem (@{$Schema}) { - my $type = ref $elem; - $expression = substr $expression,pos($expression) if $type ne 'Regexp' and pos($expression); - - if ($type eq 'HASH') { - $context->NewContext($elem->{'name'}); - &$dbgPrint("$elem->{name} ", join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem->{'syntax'}})); - eval { - $expression = &$ParseAgainstSchema($elem->{'syntax'},$expression,$context,$flags,$level+1); - }; - if ($@) { - $context->EndContext(0); - &$dbgPrint("/$elem->{name} ", "0"); - die $@; - } else { - &$dbgPrint("/$elem->{name} ", "1"); - $context->EndContext(1); - } - } elsif ($type eq 'ARRAY') { - &$dbgPrint("entering ", join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem})); - $expression = &$ParseAgainstSchema($elem,$expression,$context,$flags,$level+1); - &$dbgPrint("success"); - } elsif ($type eq 'OPT') { - if (defined $expression) { - &$dbgPrint("optional ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem})); - eval { - $expression = &$ParseAgainstSchema($elem,$expression,$context,$flags,$level+1); - }; - if ($@) { - &$dbgPrint("failed"); - undef $@; - } else { - &$dbgPrint("success"); - } - } - } elsif ($type eq 'SWITCH') { - my $success = 0; - &$dbgPrint("switch"); - LOOP_SWITCH: foreach my $subelem (@{$elem}) { - eval { - &$dbgPrint("\ttry ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ } @{(grep ref $subelem eq $_, qw(ARRAY SWITCH OPT REPEAT)) ? $subelem : [$subelem]})); - $expression = &$ParseAgainstSchema((grep ref $subelem eq $_, qw(ARRAY SWITCH OPT REPEAT)) ? $subelem : [$subelem],$expression,$context,$flags,$level+1); - $success = 1; - }; - if ($@) { - undef $@; - } else { - last LOOP_SWITCH; - } - } - if ($success) { - &$dbgPrint("success"); - } else { - &$dbgPrint("failed"); - die ["syntax error",$expression,$elem]; - } - } elsif ($type eq 'REPEAT') { - my $copy = [@{$elem}]; - my $i = 0; - &$dbgPrint("repeat ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem})); - while (1) { - eval { - $expression = &$ParseAgainstSchema($copy,$expression,$context,$flags,$level+1); - $i++; - }; - if ($@) { - if (not $i) { - &$dbgPrint("failed"); - die $@; - } - &$dbgPrint("found $i matches"); - undef $@; - last; - } - } - } elsif ($type eq 'Regexp') { - my $posPrev = pos($expression) || 0; - if ( $expression =~ m/($elem)/ ) { - $context->Data($1); - pos($expression) = $posPrev+length($1); - &$dbgPrint("Regexp: $1 $elem ", pos($expression)); - } else { - &$dbgPrint("Regexp: $elem failed"); - die ["syntax error",$expression,$elem,$posPrev]; - pos($expression) = $posPrev; - } - } else { - if ((my $val = substr($expression, 0, length($elem),'')) eq $elem) { - &$dbgPrint("Scalar: $val"); - $context->Data($elem); - } else { - &$dbgPrint("Scalar: failed $val expected $elem"); - die ["syntax error",$val.$expression,$elem]; - } - } - - } - - if (pos $expression) { - return substr $expression,(pos($expression) || 0); - } else { - return $expression; - } - -}; - -package BNFCompiler::DOM::Node; -use Common; -our @ISA = qw(Object); - -sub NODE_TEXT { 1 } -sub NODE_ELEM { 2 } - -BEGIN { - DeclareProperty(nodeName => ACCESS_READ); - DeclareProperty(nodeType => ACCESS_READ); - DeclareProperty(nodeValue => ACCESS_READ); - DeclareProperty(childNodes => ACCESS_READ); - DeclareProperty(isComplex => ACCESS_READ); -} - -sub CTOR { - my ($this,%args) = @_; - $args{'nodeType'} = NODE_ELEM if not $args{'nodeType'}; - die new Exception("Invalid args. nodeName reqired.") if $args{'nodeType'} == NODE_ELEM and not $args{nodeName}; - - #for speed reason - #$this->SUPER::CTOR(%args); - - $this->{$nodeName} = $args{'nodeName'} if $args{'nodeName'}; - $this->{$nodeType} = $args{'nodeType'}; - $this->{$nodeValue} = $args{'nodeValue'} if exists $args{'nodeValue'}; - - $this->{$isComplex} = 0; -} - -sub insertNode { - my ($this,$node,$pos) = @_; - - die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; - die new Exception("Invalid node type",ref $node) if ref $node ne __PACKAGE__; - - $this->{$childNodes} = [] if not $this->{$childNodes}; - - $pos = scalar(@{$this->{$childNodes}}) if not defined $pos; - die new Exception("Index out of range",$pos) if $pos > scalar(@{$this->{$childNodes}}) or $pos < 0; - - splice @{$this->{$childNodes}},$pos,0,$node; - $this->{$isComplex} = 1 if not $this->{$isComplex} and $node->{$nodeType} == NODE_ELEM; - - return $node; -} - -sub removeNode { - my ($this,$node) = @_; - - die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; - @{$this->{$childNodes}} = grep { $_ != $node } @{$this->{$childNodes}}; - - return $node; -} - -sub removeAt { - my ($this,$pos) = @_; - - die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; - die new Exception("Index out of range",$pos) if $pos >= scalar(@{$this->{$childNodes}}) or $pos < 0; - - return splice @{$this->{$childNodes}},$pos,1; -} - -sub selectNodes { - my ($this,$name) = @_; - - die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; - - my @nodes = grep { $_->{$nodeType} == NODE_ELEM and $_->{$nodeName} eq $name } @{$this->{$childNodes}}; - - if (wantarray) { - return @nodes; - } else { - return shift @nodes; - } -} - -sub text { - my $this = shift; - - if ($this->{$nodeType} == NODE_TEXT) { - return $this->{$nodeValue}; - } else { - my @texts; - - foreach my $node (@{$this->{$childNodes}}) { - push @texts, $node->{$nodeValue} if ($node->{$nodeType}==NODE_TEXT); - } - - if (wantarray) { - return @texts; - } else { - return join '',@texts; - } - } -} - -package BNFCompiler::DOM::Builder; -use Common; -our @ISA=qw(Object); - -BEGIN { - DeclareProperty(Document => ACCESS_READ); - DeclareProperty(currentNode => ACCESS_NONE); - DeclareProperty(stackNodes => ACCESS_NONE); -} - -sub CTOR { - my $this = shift; - - $this->{$Document} = new BNFCompiler::DOM::Node(nodeName => 'Document', nodeType => BNFCompiler::DOM::Node::NODE_ELEM); - $this->{$currentNode} = $this->{$Document}; -} - -sub NewContext { - my ($this,$contextName) = @_; - - push @{$this->{$stackNodes}},$this->{$currentNode}; - $this->{$currentNode} = new BNFCompiler::DOM::Node(nodeName => $contextName, nodeType=> BNFCompiler::DOM::Node::NODE_ELEM); - - return 1; -} -sub EndContext{ - my ($this,$isNotEmpty) = @_; - - if ($isNotEmpty) { - my $child = $this->{$currentNode}; - $this->{$currentNode} = pop @{$this->{$stackNodes}}; - $this->{$currentNode}->insertNode($child); - } else { - $this->{$currentNode} = pop @{$this->{$stackNodes}}; - } -} -sub Data { - my ($this,$data) = @_; - $this->{$currentNode}->insertNode(new BNFCompiler::DOM::Node(nodeType=> BNFCompiler::DOM::Node::NODE_TEXT, nodeValue => $data)); -} - -package BNFCompiler::DOM; - -sub TransformDOMToHash { - my ($root,$options) = @_; - - my %content; - - if (not $root->childNodes) { - die; - } - - foreach my $child (@{$root->childNodes}) { - if ($child->nodeType == BNFCompiler::DOM::Node::NODE_ELEM) { - my @newValue; - my $nodeName = $child->nodeName; - next if $nodeName eq 'separator' and $options->{'skip_spaces'}; - if ($child->isComplex) { - $newValue[0] = TransformDOMToHash($child,$options); - } else { - @newValue = $child->text() - } - - if ($options->{'use_arrays'}) { - push @{$content{$nodeName}},@newValue; - } - - if (exists $content{$nodeName}) { - if (ref $content{$nodeName} eq 'ARRAY') { - push @{$content{$nodeName}}, @newValue; - } else { - $content{$nodeName} = [$content{$nodeName},@newValue]; - } - } else { - $content{$nodeName} = $newValue[0] if scalar(@newValue) == 1; - $content{$nodeName} = \@newValue if scalar(@newValue) > 1; - } - } else { - next if $options->{'skip_text'}; - push @{$content{'_text'}},$child->nodeValue(); - } - } - - return \%content; -} - -1; +package BNFCompiler; +package BNFCompiler::DOM; +package BNFCompiler::DOM::Builder; +package BNFCompiler::DOM::Node; +use strict; + +package BNFCompiler::EventParser; +use strict; +use lib '.'; +use Common; +our @ISA = qw(Object); + +our $EventMapSchema = { + Description => 'Parser events', + Type => 'HASH', + Values => 'SCALAR' +}; + +BEGIN { + DeclareProperty(EventMap => ACCESS_READ); + DeclareProperty(CompiledEvents => ACCESS_NONE); + DeclareProperty(Handler => ACCESS_ALL); +} + +sub CTOR { + my ($this,%args) = @_; + $this->SUPER::CTOR(%args); +} + +sub Compile { + my ($this) = @_; + + delete $this->{$CompiledEvents}; + while (my ($key,$val) = each %{$this->{$EventMap}}) { + $this->{$CompiledEvents}{$key} = qr/\G$val/; + } + 1; +} + +sub Parse { + my ($this,$data) = @_; + + my $StateData; + OUTER: while(pos($data) < length($data)) { + keys %{$this->{$CompiledEvents}}; + while (my ($event,$match) = each %{$this->{$CompiledEvents}}) { + if ($data =~ m/($match)/gc) { + $StateData .= $1; + eval { + undef $StateData if $this->{$Handler}->($event,$StateData); + }; + if ($@) { + die ["Invalid syntax","unexpected $event: $1",pos($data)]; + } + next OUTER; + } + } + die ["Invalid syntax",substr($data,pos($data),10),pos($data)]; + } + + return 1; +} + +# небольшая уловка, поскольку ref от регулярного выражения есть Regexp, можно поставить хуки +package Regexp; +use Data::Dumper; + +sub STORABLE_freeze { + my ($obj,$cloning) = @_; + + return $obj; +} + +sub STORABLE_attach { + my($class, $cloning, $serialized) = @_; + return qr/$serialized/; +} + +package BNFCompiler; +use Common; +use Storable; +use Data::Dumper; +our @ISA = qw(Object); + +our $BNFSchema; +my $ParseAgainstSchema; +my $TransformDOMToBNF; + +BEGIN { + DeclareProperty(Schema => ACCESS_NONE); + DeclareProperty(SchemaCache => ACCESS_NONE); + DeclareProperty(Transform => ACCESS_NONE); +} + +sub CTOR { + my $this = shift; + $this->SUPER::CTOR(@_); + + $this->{$SchemaCache} .= '/' if ($this->{$SchemaCache} and not $this->{$SchemaCache} =~ /\/$/); +} +{ + my $compiledBNFSchema; + sub LoadBNFSchema { + my ($this,%args) = @_; + + my $CompileBNFText = sub { + my ($this,$text) = @_; + + my %SchemaDOM; + foreach my $item (split /\n{2,}/, $text) { + next if not $item; + $compiledBNFSchema = CompileBNFSchema($BNFSchema) if not $compiledBNFSchema; + my $context = new BNFCompiler::DOM::Builder(); + eval { + my $expr = &$ParseAgainstSchema($compiledBNFSchema,$item,$context); + die ["Unexpected expression", $expr] if $expr; + }; + if ($@) { + if (ref $@ eq 'ARRAY') { + die new Exception(@{$@}); + } else { + die $@; + } + } + + $SchemaDOM{$context->Document->selectNodes('name')->text()} = &$TransformDOMToBNF($context->Document->selectNodes('def')); + + } + + $SchemaDOM{'separator'} = ['re:\\s+']; + $this->{$Schema} = CompileBNFSchema(\%SchemaDOM); + }; + + my $text; + if ($args{'file'}) { + + my $fnameCached; + if ($this->{$SchemaCache}) { + my $fname = $args{'file'}; + $fname =~ tr/\//_/; + $fnameCached = $this->{$SchemaCache}.$fname.'.cbs'; + if ( -e $fnameCached && -f $fnameCached && ( -M $args{'file'} >= -M $fnameCached )) { + my $compiledSchema = retrieve($fnameCached); + if ($compiledSchema) { + $this->{$Schema} = $compiledSchema; + return 1; + } else { + unlink $fnameCached; + } + } + } + open my $hFile, '<', $args{'file'} or die new Exception("Failed to open file",$args{'file'},$!); + local $/ = undef; + my $text = <$hFile>; + + $this->$CompileBNFText($text); + + if ($fnameCached) { + store($this->{$Schema},$fnameCached); + } + } elsif ($args{'Schema'}) { + $this->{$Schema} = CompileBNFSchema($args{'Schema'}); + return 1; + } elsif ($args{'text'}) { + $this->$CompileBNFText( $args{'text'} ); + } else { + die new Exception("'file', 'text' or 'Schema' parameter required"); + } + + } +} + +sub Parse { + my ($this, $string, %flags) = @_; + + my $context = new BNFCompiler::DOM::Builder; + + eval { + my $err; + $err = &$ParseAgainstSchema($this->{$Schema},$string,$context,\%flags) and die new Exception('Failed to parse',substr($err,0,80).' ...'); + }; + if ($@) { + if (ref $@ eq 'ARRAY') { + die new Exception(@{$@}); + } else { + die $@; + } + } + if (not $this->{$Transform}) { + return $context->Document; + } else { + return $this->{$Transform}->($context->Document); + } +} + +sub Dispose { + my ($this) = shift; + CleanSchema($this->{$Schema}); + delete @$this{$Schema, $Transform}; + $this->SUPER::Dispose; +} + +sub CleanSchema { + my ($schema,$table) = @_; + + UNIVERSAL::isa($schema,'ARRAY') or return; + $table or $table = { $schema, 1}; + + for(my $i=0; $i<@$schema;$i++) { + my $item = $schema->[$i]; + if (ref $item) { + next if $table->{$item}; + $table->{$item} = 1; + if (UNIVERSAL::isa($item,'ARRAY')) { + CleanSchema($item,$table); + } elsif( UNIVERSAL::isa($item,'HASH')) { + CleanSchema($item->{'syntax'},$table); + } + undef $schema->[$i]; + } + } +} + + +sub OPT { + return bless [@_], 'OPT'; +} + +sub SWITCH { + return bless [@_], 'SWITCH'; +} + +sub REPEAT { + return bless [@_], 'REPEAT'; +} + +$TransformDOMToBNF = sub { + my ($nodeRoot) = @_; + + return [grep $_, map { + my $nodeName = $_->nodeName; + if (not $nodeName ){ + my $obj = $_; + $obj->text() if (not( grep { $obj->text() eq $_} ('{', '}', '[', ']') ) ); + }elsif($nodeName eq 'name') { + $_->text(); + } elsif ($nodeName eq 'separator') { + OPT('separator'); + } elsif ($nodeName eq 'or_sep') { + # nothing + } elsif ($nodeName eq 'switch_part') { + &$TransformDOMToBNF($_); + } elsif ($nodeName eq 'class') { + my $class = $_->childNodes->[0]->text(); + + $class =~ s{(^<|>$|\\.|[\]\[])}{ + my $char = { '>' => '', '<' => '', '[' => '\\[', ']' => '\\]', '\\\\' => '\\\\'}->{$1}; + defined $char ? $char : ($1 =~ tr/\\// && $1); + }ge; + $class = '['.$class.']'; + $class .= $_->childNodes->[1]->text() if $_->childNodes->[1]; + 're:'.$class; + } elsif ($nodeName eq 'symbol') { + $_->text(); + } elsif ($nodeName eq 'simple') { + @{&$TransformDOMToBNF($_)}; + } elsif ($nodeName eq 'multi_def') { + @{&$TransformDOMToBNF($_)}; + } elsif ($nodeName eq 'optional') { + my $multi_def = &$TransformDOMToBNF($_); + if ($multi_def->[scalar(@{$multi_def})-1] eq '...') { + pop @{$multi_def}; + OPT(REPEAT(@{$multi_def})); + } else { + OPT(@{$multi_def}); + } + } elsif ($nodeName eq 'switch') { + SWITCH(@{&$TransformDOMToBNF($_)}); + } elsif ($nodeName eq 'def') { + @{&$TransformDOMToBNF($_)}; + } else{ + die "unknown nodeName: $nodeName"; + } + } @{$nodeRoot->childNodes}]; +}; + +$BNFSchema = { + syntax => ['name',OPT('separator'),'::=',OPT('separator'),'def'], + name => ['re:\\w+'], + class => ['re:<([^<>\\\\]|\\\\.)+>',OPT('re:\\*|\\+|\\?|\\{\\d+\\}')], + symbol => ['re:[^\\w\\d\\s\\[\\]{}<>\\\\|]+'], + separator => ['re:\\s+'], + simple => [ + SWITCH( + 'name', + 'class', + 'symbol' + ) + ], + multi_def => [ + OPT('separator'), SWITCH('...',[SWITCH('simple','optional','switch'),OPT('multi_def')]) + ], + optional => [ + '[','multi_def', OPT('separator') ,']' + + ], + keyword => [], + or_sep => ['|'], + switch_part => [OPT('separator'),SWITCH('simple','optional','switch'),OPT(REPEAT(OPT('separator'),SWITCH('simple','optional','switch'))),OPT('separator')], + switch => [ + '{','switch_part',OPT(REPEAT('or_sep','switch_part')),'}' + ], + def => [REPEAT(OPT('separator'),SWITCH('simple','optional','switch'))] +}; + +my $CompileTerm; +$CompileTerm = sub { + my ($term,$Schema,$cache,$ref) = @_; + + my $compiled = ref $term eq 'ARRAY' ? ($ref or []) : bless (($ref or []), ref $term); + + die new Exception("Invalid term type $term", $term, ref $term) if not grep ref $term eq $_, qw(ARRAY REPEAT SWITCH OPT); + + foreach my $element (@{$term}) { + if (ref $element) { + push @{$compiled}, &$CompileTerm($element,$Schema,$cache); + } else { + if($element =~/^\w+$/) { + if (exists $Schema->{$element}) { + # reference + my $compiledUnit; + if (exists $cache->{$element}) { + $compiledUnit = $cache->{$element}; + } else { + $compiledUnit = []; + $cache->{$element} = $compiledUnit; + &$CompileTerm($Schema->{$element},$Schema,$cache,$compiledUnit); + } + + push @{$compiled},{ name => $element, syntax => $compiledUnit}; + } else { + # simple word + push @{$compiled}, $element; + } + } elsif ($element =~ /^re:(.*)/){ + # regexp + push @{$compiled},qr/\G(?:$1)/; + } else { + # char sequence + push @{$compiled},$element; + } + } + } + + return $compiled; +}; + +sub CompileBNFSchema { + my($Schema) = @_; + + my %Cache; + return &$CompileTerm($Schema->{'syntax'},$Schema,\%Cache); +} + +my $CompiledSchema = CompileBNFSchema($BNFSchema); + +$ParseAgainstSchema = sub { + my ($Schema,$expression,$context,$flags,$level) = @_; + + $level = 0 if not defined $level; + my $dbgPrint = $flags->{debug} ? sub { + print "\t" x $level, @_,"\n"; + } : sub {}; + + foreach my $elem (@{$Schema}) { + my $type = ref $elem; + $expression = substr $expression,pos($expression) if $type ne 'Regexp' and pos($expression); + + if ($type eq 'HASH') { + $context->NewContext($elem->{'name'}); + &$dbgPrint("$elem->{name} ", join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem->{'syntax'}})); + eval { + $expression = &$ParseAgainstSchema($elem->{'syntax'},$expression,$context,$flags,$level+1); + }; + if ($@) { + $context->EndContext(0); + &$dbgPrint("/$elem->{name} ", "0"); + die $@; + } else { + &$dbgPrint("/$elem->{name} ", "1"); + $context->EndContext(1); + } + } elsif ($type eq 'ARRAY') { + &$dbgPrint("entering ", join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem})); + $expression = &$ParseAgainstSchema($elem,$expression,$context,$flags,$level+1); + &$dbgPrint("success"); + } elsif ($type eq 'OPT') { + if (defined $expression) { + &$dbgPrint("optional ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem})); + eval { + $expression = &$ParseAgainstSchema($elem,$expression,$context,$flags,$level+1); + }; + if ($@) { + &$dbgPrint("failed"); + undef $@; + } else { + &$dbgPrint("success"); + } + } + } elsif ($type eq 'SWITCH') { + my $success = 0; + &$dbgPrint("switch"); + LOOP_SWITCH: foreach my $subelem (@{$elem}) { + eval { + &$dbgPrint("\ttry ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ } @{(grep ref $subelem eq $_, qw(ARRAY SWITCH OPT REPEAT)) ? $subelem : [$subelem]})); + $expression = &$ParseAgainstSchema((grep ref $subelem eq $_, qw(ARRAY SWITCH OPT REPEAT)) ? $subelem : [$subelem],$expression,$context,$flags,$level+1); + $success = 1; + }; + if ($@) { + undef $@; + } else { + last LOOP_SWITCH; + } + } + if ($success) { + &$dbgPrint("success"); + } else { + &$dbgPrint("failed"); + die ["syntax error",$expression,$elem]; + } + } elsif ($type eq 'REPEAT') { + my $copy = [@{$elem}]; + my $i = 0; + &$dbgPrint("repeat ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem})); + while (1) { + eval { + $expression = &$ParseAgainstSchema($copy,$expression,$context,$flags,$level+1); + $i++; + }; + if ($@) { + if (not $i) { + &$dbgPrint("failed"); + die $@; + } + &$dbgPrint("found $i matches"); + undef $@; + last; + } + } + } elsif ($type eq 'Regexp') { + my $posPrev = pos($expression) || 0; + if ( $expression =~ m/($elem)/ ) { + $context->Data($1); + pos($expression) = $posPrev+length($1); + &$dbgPrint("Regexp: $1 $elem ", pos($expression)); + } else { + &$dbgPrint("Regexp: $elem failed"); + die ["syntax error",$expression,$elem,$posPrev]; + pos($expression) = $posPrev; + } + } else { + if ((my $val = substr($expression, 0, length($elem),'')) eq $elem) { + &$dbgPrint("Scalar: $val"); + $context->Data($elem); + } else { + &$dbgPrint("Scalar: failed $val expected $elem"); + die ["syntax error",$val.$expression,$elem]; + } + } + + } + + if (pos $expression) { + return substr $expression,(pos($expression) || 0); + } else { + return $expression; + } + +}; + +package BNFCompiler::DOM::Node; +use Common; +our @ISA = qw(Object); + +sub NODE_TEXT { 1 } +sub NODE_ELEM { 2 } + +BEGIN { + DeclareProperty(nodeName => ACCESS_READ); + DeclareProperty(nodeType => ACCESS_READ); + DeclareProperty(nodeValue => ACCESS_READ); + DeclareProperty(childNodes => ACCESS_READ); + DeclareProperty(isComplex => ACCESS_READ); +} + +sub CTOR { + my ($this,%args) = @_; + $args{'nodeType'} = NODE_ELEM if not $args{'nodeType'}; + die new Exception("Invalid args. nodeName reqired.") if $args{'nodeType'} == NODE_ELEM and not $args{nodeName}; + + #for speed reason + #$this->SUPER::CTOR(%args); + + $this->{$nodeName} = $args{'nodeName'} if $args{'nodeName'}; + $this->{$nodeType} = $args{'nodeType'}; + $this->{$nodeValue} = $args{'nodeValue'} if exists $args{'nodeValue'}; + + $this->{$isComplex} = 0; +} + +sub insertNode { + my ($this,$node,$pos) = @_; + + die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; + die new Exception("Invalid node type",ref $node) if ref $node ne __PACKAGE__; + + $this->{$childNodes} = [] if not $this->{$childNodes}; + + $pos = scalar(@{$this->{$childNodes}}) if not defined $pos; + die new Exception("Index out of range",$pos) if $pos > scalar(@{$this->{$childNodes}}) or $pos < 0; + + splice @{$this->{$childNodes}},$pos,0,$node; + $this->{$isComplex} = 1 if not $this->{$isComplex} and $node->{$nodeType} == NODE_ELEM; + + return $node; +} + +sub removeNode { + my ($this,$node) = @_; + + die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; + @{$this->{$childNodes}} = grep { $_ != $node } @{$this->{$childNodes}}; + + return $node; +} + +sub removeAt { + my ($this,$pos) = @_; + + die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; + die new Exception("Index out of range",$pos) if $pos >= scalar(@{$this->{$childNodes}}) or $pos < 0; + + return splice @{$this->{$childNodes}},$pos,1; +} + +sub selectNodes { + my ($this,$name) = @_; + + die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM; + + my @nodes = grep { $_->{$nodeType} == NODE_ELEM and $_->{$nodeName} eq $name } @{$this->{$childNodes}}; + + if (wantarray) { + return @nodes; + } else { + return shift @nodes; + } +} + +sub text { + my $this = shift; + + if ($this->{$nodeType} == NODE_TEXT) { + return $this->{$nodeValue}; + } else { + my @texts; + + foreach my $node (@{$this->{$childNodes}}) { + push @texts, $node->{$nodeValue} if ($node->{$nodeType}==NODE_TEXT); + } + + if (wantarray) { + return @texts; + } else { + return join '',@texts; + } + } +} + +package BNFCompiler::DOM::Builder; +use Common; +our @ISA=qw(Object); + +BEGIN { + DeclareProperty(Document => ACCESS_READ); + DeclareProperty(currentNode => ACCESS_NONE); + DeclareProperty(stackNodes => ACCESS_NONE); +} + +sub CTOR { + my $this = shift; + + $this->{$Document} = new BNFCompiler::DOM::Node(nodeName => 'Document', nodeType => BNFCompiler::DOM::Node::NODE_ELEM); + $this->{$currentNode} = $this->{$Document}; +} + +sub NewContext { + my ($this,$contextName) = @_; + + push @{$this->{$stackNodes}},$this->{$currentNode}; + $this->{$currentNode} = new BNFCompiler::DOM::Node(nodeName => $contextName, nodeType=> BNFCompiler::DOM::Node::NODE_ELEM); + + return 1; +} +sub EndContext{ + my ($this,$isNotEmpty) = @_; + + if ($isNotEmpty) { + my $child = $this->{$currentNode}; + $this->{$currentNode} = pop @{$this->{$stackNodes}}; + $this->{$currentNode}->insertNode($child); + } else { + $this->{$currentNode} = pop @{$this->{$stackNodes}}; + } +} +sub Data { + my ($this,$data) = @_; + $this->{$currentNode}->insertNode(new BNFCompiler::DOM::Node(nodeType=> BNFCompiler::DOM::Node::NODE_TEXT, nodeValue => $data)); +} + +package BNFCompiler::DOM; + +sub TransformDOMToHash { + my ($root,$options) = @_; + + my %content; + + if (not $root->childNodes) { + die; + } + + foreach my $child (@{$root->childNodes}) { + if ($child->nodeType == BNFCompiler::DOM::Node::NODE_ELEM) { + my @newValue; + my $nodeName = $child->nodeName; + next if $nodeName eq 'separator' and $options->{'skip_spaces'}; + if ($child->isComplex) { + $newValue[0] = TransformDOMToHash($child,$options); + } else { + @newValue = $child->text() + } + + if ($options->{'use_arrays'}) { + push @{$content{$nodeName}},@newValue; + } + + if (exists $content{$nodeName}) { + if (ref $content{$nodeName} eq 'ARRAY') { + push @{$content{$nodeName}}, @newValue; + } else { + $content{$nodeName} = [$content{$nodeName},@newValue]; + } + } else { + $content{$nodeName} = $newValue[0] if scalar(@newValue) == 1; + $content{$nodeName} = \@newValue if scalar(@newValue) > 1; + } + } else { + next if $options->{'skip_text'}; + push @{$content{'_text'}},$child->nodeValue(); + } + } + + return \%content; +} + +1;