diff Lib/BNFCompiler.pm @ 0:03e58a454b20

Создан репозитарий
author Sergey
date Tue, 14 Jul 2009 12:54:37 +0400
parents
children 16ada169ca75
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Lib/BNFCompiler.pm	Tue Jul 14 12:54:37 2009 +0400
@@ -0,0 +1,666 @@
+package BNFCompiler;
+package BNFCompiler::DOM;
+package BNFCompiler::DOM::Builder;
+package BNFCompiler::DOM::Node;
+use strict;
+
+package BNFCompiler::EventParser;
+use strict;
+use lib '.';
+use Common;
+our @ISA = qw(Object);
+
+our $EventMapSchema = {
+    Description => 'Parser events',
+    Type => 'HASH',
+    Values => 'SCALAR'
+};
+
+BEGIN {
+    DeclareProperty(EventMap => ACCESS_READ);
+    DeclareProperty(CompiledEvents => ACCESS_NONE);
+    DeclareProperty(Handler => ACCESS_ALL);
+}
+
+sub CTOR {
+    my ($this,%args) = @_;
+    $this->SUPER::CTOR(%args);
+}
+
+sub Compile {
+    my ($this) = @_;
+    
+    delete $this->{$CompiledEvents};
+    while (my ($key,$val) = each %{$this->{$EventMap}}) {
+        $this->{$CompiledEvents}{$key} = qr/\G$val/;
+    }
+    1;
+}
+
+sub Parse {
+    my ($this,$data) = @_;
+    
+    my $StateData;
+    OUTER: while(pos($data) < length($data)) {
+        keys %{$this->{$CompiledEvents}};
+        while (my ($event,$match) = each %{$this->{$CompiledEvents}}) {
+            if ($data =~ m/($match)/gc) {
+                $StateData .= $1;
+                eval {
+                    undef $StateData if $this->{$Handler}->($event,$StateData);
+                };
+                if ($@) {
+                    die ["Invalid syntax","unexpected $event: $1",pos($data)];
+                }
+                next OUTER;
+            }
+        }
+        die ["Invalid syntax",substr($data,pos($data),10),pos($data)];
+    }
+    
+    return 1;
+}
+
+# íåáîëüøàÿ óëîâêà, ïîñêîëüêó ref îò ðåãóëÿðíîãî âûðàæåíèÿ åñòü Regexp, ìîæíî ïîñòàâèòü õóêè
+package Regexp;
+use Data::Dumper;
+
+sub STORABLE_freeze {
+    my ($obj,$cloning) = @_;
+    
+    return $obj;
+}
+
+sub STORABLE_attach {
+    my($class, $cloning, $serialized) = @_;
+    return qr/$serialized/;
+}
+
+package BNFCompiler;
+use Common;
+use Storable;
+use Data::Dumper;
+our @ISA = qw(Object);
+
+our $BNFSchema;
+my $ParseAgainstSchema;
+my $TransformDOMToBNF;
+
+BEGIN {
+    DeclareProperty(Schema => ACCESS_NONE);
+    DeclareProperty(SchemaCache => ACCESS_NONE);
+    DeclareProperty(Transform => ACCESS_NONE);
+}
+
+sub CTOR {
+    my $this = shift;
+    $this->SUPER::CTOR(@_);
+    
+    $this->{$SchemaCache} .= '/' if ($this->{$SchemaCache} and not $this->{$SchemaCache} =~ /\/$/);
+}
+{
+    my $compiledBNFSchema;
+    sub LoadBNFSchema {
+        my ($this,%args) = @_;
+        
+        my $CompileBNFText = sub {
+            my ($this,$text) = @_;
+            
+            my %SchemaDOM;
+            foreach my $item (split /\n{2,}/, $text) {
+                next if not $item;
+                $compiledBNFSchema = CompileBNFSchema($BNFSchema) if not $compiledBNFSchema;
+                my $context = new BNFCompiler::DOM::Builder();
+                eval {
+                    my $expr = &$ParseAgainstSchema($compiledBNFSchema,$item,$context);
+                    die ["Unexpected expression", $expr] if $expr;
+                };
+                if ($@) {
+                    if (ref $@ eq 'ARRAY') {
+                        die new Exception(@{$@});
+                    } else {
+                        die $@;
+                    }
+                }
+                
+                $SchemaDOM{$context->Document->selectNodes('name')->text()} = &$TransformDOMToBNF($context->Document->selectNodes('def'));
+                
+            }
+            
+            $SchemaDOM{'separator'} = ['re:\\s+'];
+            $this->{$Schema} = CompileBNFSchema(\%SchemaDOM);
+        };
+        
+        my $text;
+        if ($args{'file'}) {
+            
+            my $fnameCached;
+            if ($this->{$SchemaCache}) {
+                my $fname = $args{'file'};
+                $fname =~ tr/\//_/;
+                $fnameCached = $this->{$SchemaCache}.$fname.'.cbs';
+                if ( -e $fnameCached && -f $fnameCached && ( -M $args{'file'} >= -M $fnameCached )) {
+                    my $compiledSchema = retrieve($fnameCached);
+                    if ($compiledSchema) {
+                        $this->{$Schema} = $compiledSchema;
+                        return 1;
+                    } else {
+                        unlink $fnameCached;
+                    }
+                }
+            }
+            open my $hFile, '<', $args{'file'} or die new Exception("Failed to open file",$args{'file'},$!);
+            local $/ = undef;
+            my $text = <$hFile>;
+            
+            $this->$CompileBNFText($text);
+            
+            if ($fnameCached) {
+                store($this->{$Schema},$fnameCached);
+            }
+        } elsif ($args{'Schema'}) {
+            $this->{$Schema} = CompileBNFSchema($args{'Schema'});
+            return 1;
+        } elsif ($args{'text'}) {
+            $this->$CompileBNFText( $args{'text'} );
+        } else {
+            die new Exception("'file', 'text' or 'Schema' parameter required");
+        }
+        
+    }
+}
+
+sub Parse {
+    my ($this, $string, %flags) = @_;
+    
+    my $context = new BNFCompiler::DOM::Builder;
+    
+    eval {
+        my $err;
+        $err = &$ParseAgainstSchema($this->{$Schema},$string,$context,\%flags) and die new Exception('Failed to parse',substr($err,0,80).' ...');
+    };
+    if ($@) {
+        if (ref $@ eq 'ARRAY') {
+            die new Exception(@{$@});
+        } else {
+            die $@;
+        }
+    }
+    if (not $this->{$Transform}) {
+        return $context->Document;
+    } else {
+        return $this->{$Transform}->($context->Document);
+    }
+}
+
+sub Dispose {
+    my ($this) = shift;
+    CleanSchema($this->{$Schema});
+    delete @$this{$Schema, $Transform};
+    $this->SUPER::Dispose;
+}
+
+sub CleanSchema {
+    my ($schema,$table) = @_;
+    
+    UNIVERSAL::isa($schema,'ARRAY') or return;
+    $table or $table = { $schema, 1};
+    
+    for(my $i=0; $i<@$schema;$i++) {
+        my $item = $schema->[$i];
+        if (ref $item) {
+            next if $table->{$item};
+            $table->{$item} = 1;
+            if (UNIVERSAL::isa($item,'ARRAY')) {
+                CleanSchema($item,$table);
+            } elsif( UNIVERSAL::isa($item,'HASH')) {
+                CleanSchema($item->{'syntax'},$table);
+            }
+            undef $schema->[$i];
+        }
+    }
+}
+
+
+sub OPT {
+    return bless [@_], 'OPT';
+}
+
+sub SWITCH {
+    return bless [@_], 'SWITCH';
+}
+
+sub REPEAT {
+    return bless [@_], 'REPEAT';
+}
+
+$TransformDOMToBNF = sub {
+    my ($nodeRoot) = @_;
+    
+    return [grep $_, map {
+        my $nodeName = $_->nodeName;
+        if (not $nodeName ){
+            my $obj = $_;
+            $obj->text() if (not( grep { $obj->text() eq $_} ('{', '}', '[', ']') ) );
+        }elsif($nodeName eq 'name') {
+            $_->text();
+        } elsif ($nodeName eq 'separator') {
+            OPT('separator');
+        } elsif ($nodeName eq 'or_sep') {
+            # nothing
+        } elsif ($nodeName eq 'switch_part') {
+            &$TransformDOMToBNF($_);
+        } elsif ($nodeName eq 'class') {
+            my $class = $_->childNodes->[0]->text();
+            
+            $class =~ s{(^<|>$|\\.|[\]\[])}{
+                my $char = { '>' => '', '<' => '', '[' => '\\[', ']' => '\\]', '\\\\' => '\\\\'}->{$1};
+                defined $char ? $char : ($1 =~ tr/\\// && $1);
+            }ge;
+            $class = '['.$class.']';
+            $class .= $_->childNodes->[1]->text() if $_->childNodes->[1];
+            're:'.$class;
+        } elsif ($nodeName eq 'symbol') {
+            $_->text();
+        } elsif ($nodeName eq 'simple') {
+            @{&$TransformDOMToBNF($_)};
+        } elsif ($nodeName eq 'multi_def') {
+            @{&$TransformDOMToBNF($_)};
+        } elsif ($nodeName eq 'optional') {
+            my $multi_def = &$TransformDOMToBNF($_);
+            if ($multi_def->[scalar(@{$multi_def})-1] eq '...') {
+                pop @{$multi_def};
+                OPT(REPEAT(@{$multi_def}));
+            } else {
+                OPT(@{$multi_def});
+            }
+        } elsif ($nodeName eq 'switch') {
+            SWITCH(@{&$TransformDOMToBNF($_)});
+        } elsif ($nodeName eq 'def') {
+            @{&$TransformDOMToBNF($_)};
+        } else{
+            die "unknown nodeName: $nodeName";
+        }
+    } @{$nodeRoot->childNodes}];
+};
+
+$BNFSchema = {
+    syntax => ['name',OPT('separator'),'::=',OPT('separator'),'def'],    
+    name => ['re:\\w+'],    
+    class => ['re:<([^<>\\\\]|\\\\.)+>',OPT('re:\\*|\\+|\\?|\\{\\d+\\}')],
+    symbol => ['re:[^\\w\\d\\s\\[\\]{}<>\\\\|]+'],
+    separator => ['re:\\s+'],
+    simple => [
+                SWITCH(
+                    'name',
+                    'class',
+                    'symbol'
+                )
+    ],
+    multi_def => [
+        OPT('separator'), SWITCH('...',[SWITCH('simple','optional','switch'),OPT('multi_def')])
+    ],
+    optional => [
+        '[','multi_def', OPT('separator') ,']'
+
+    ],
+    keyword => [],
+    or_sep => ['|'],
+    switch_part => [OPT('separator'),SWITCH('simple','optional','switch'),OPT(REPEAT(OPT('separator'),SWITCH('simple','optional','switch'))),OPT('separator')],
+    switch => [
+        '{','switch_part',OPT(REPEAT('or_sep','switch_part')),'}'
+    ],
+    def => [REPEAT(OPT('separator'),SWITCH('simple','optional','switch'))]
+};
+
+my $CompileTerm;
+$CompileTerm = sub {
+    my ($term,$Schema,$cache,$ref) = @_;
+    
+    my $compiled = ref $term eq 'ARRAY' ? ($ref or []) : bless (($ref or []), ref $term);
+    
+    die new Exception("Invalid term type $term", $term, ref $term) if not grep ref $term eq $_, qw(ARRAY REPEAT SWITCH OPT);
+    
+    foreach my $element (@{$term}) {
+        if (ref $element) {
+            push @{$compiled}, &$CompileTerm($element,$Schema,$cache);
+        } else {
+            if($element =~/^\w+$/) {
+                if (exists $Schema->{$element}) {
+                    # reference
+                    my $compiledUnit;
+                    if (exists $cache->{$element}) {
+                        $compiledUnit = $cache->{$element};
+                    } else {
+                        $compiledUnit = [];
+                        $cache->{$element} = $compiledUnit;
+                        &$CompileTerm($Schema->{$element},$Schema,$cache,$compiledUnit);
+                    }
+                    
+                    push @{$compiled},{ name => $element, syntax => $compiledUnit};
+                } else {
+                    # simple word
+                    push @{$compiled}, $element;
+                }
+            } elsif ($element =~ /^re:(.*)/){
+                # regexp
+                push @{$compiled},qr/\G(?:$1)/;
+            } else {
+                # char sequence
+                push @{$compiled},$element;
+            }            
+        }
+    }
+    
+    return $compiled;
+};
+
+sub CompileBNFSchema {
+    my($Schema) = @_;
+    
+    my %Cache;
+    return &$CompileTerm($Schema->{'syntax'},$Schema,\%Cache);
+}
+
+my $CompiledSchema = CompileBNFSchema($BNFSchema);
+
+$ParseAgainstSchema = sub {
+    my ($Schema,$expression,$context,$flags,$level) = @_;
+    
+    $level = 0 if not defined $level;
+    my $dbgPrint = $flags->{debug} ? sub {
+        print "\t" x $level, @_,"\n";
+    } : sub {};
+    
+    foreach my $elem (@{$Schema}) {
+        my $type = ref $elem;
+        $expression = substr $expression,pos($expression) if $type ne 'Regexp' and pos($expression);
+        
+        if ($type eq 'HASH') {
+            $context->NewContext($elem->{'name'});
+            &$dbgPrint("$elem->{name} ", join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem->{'syntax'}}));
+            eval {
+                $expression = &$ParseAgainstSchema($elem->{'syntax'},$expression,$context,$flags,$level+1);
+            };
+            if ($@) {
+                $context->EndContext(0);
+                &$dbgPrint("/$elem->{name} ", "0");
+                die $@;
+            } else {
+                &$dbgPrint("/$elem->{name} ", "1");
+                $context->EndContext(1);
+            }
+        } elsif ($type eq 'ARRAY') {
+            &$dbgPrint("entering ", join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem}));
+            $expression = &$ParseAgainstSchema($elem,$expression,$context,$flags,$level+1);
+            &$dbgPrint("success");
+        } elsif ($type eq 'OPT') {
+            if (defined $expression) {
+                &$dbgPrint("optional ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem}));
+                eval {
+                    $expression = &$ParseAgainstSchema($elem,$expression,$context,$flags,$level+1);
+                };
+                if ($@) {
+                    &$dbgPrint("failed");
+                    undef $@;
+                } else {
+                    &$dbgPrint("success");
+                }
+            }
+        } elsif ($type eq 'SWITCH') {
+            my $success = 0;
+            &$dbgPrint("switch");
+            LOOP_SWITCH: foreach my $subelem (@{$elem}) {
+                eval {
+                    &$dbgPrint("\ttry ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ } @{(grep ref $subelem eq $_, qw(ARRAY SWITCH OPT REPEAT)) ? $subelem : [$subelem]}));
+                    $expression = &$ParseAgainstSchema((grep ref $subelem eq $_, qw(ARRAY SWITCH OPT REPEAT)) ? $subelem : [$subelem],$expression,$context,$flags,$level+1);
+                    $success = 1;
+                };
+                if ($@) {
+                    undef $@;
+                } else {
+                    last LOOP_SWITCH;
+                }
+            }
+            if ($success) {
+                &$dbgPrint("success");
+            } else {
+                &$dbgPrint("failed");
+                die ["syntax error",$expression,$elem];
+            }
+        } elsif ($type eq 'REPEAT') {
+            my $copy = [@{$elem}];
+            my $i = 0;
+            &$dbgPrint("repeat ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem}));
+            while (1) {
+                eval {
+                    $expression = &$ParseAgainstSchema($copy,$expression,$context,$flags,$level+1);
+                    $i++;
+                };
+                if ($@) {
+                    if (not $i) {
+                        &$dbgPrint("failed");
+                        die $@;
+                    }
+                    &$dbgPrint("found $i matches");
+                    undef $@;
+                    last;
+                }
+            }
+        } elsif ($type eq 'Regexp') {
+                my $posPrev = pos($expression) || 0;
+                if ( $expression =~ m/($elem)/ ) {
+                    $context->Data($1);
+                    pos($expression) = $posPrev+length($1);
+                    &$dbgPrint("Regexp: $1 $elem ", pos($expression));
+                } else {
+                    &$dbgPrint("Regexp: $elem failed");
+                    die ["syntax error",$expression,$elem,$posPrev];
+                    pos($expression) = $posPrev;
+                }
+        } else {
+            if ((my $val = substr($expression, 0, length($elem),'')) eq $elem) {
+                &$dbgPrint("Scalar: $val");
+                $context->Data($elem);
+            } else {
+                &$dbgPrint("Scalar: failed $val expected $elem");
+                die ["syntax error",$val.$expression,$elem];
+            }
+        }
+        
+    }
+    
+    if (pos $expression) {
+        return substr $expression,(pos($expression) || 0);
+    } else {
+        return $expression;
+    }
+    
+};
+
+package BNFCompiler::DOM::Node;
+use Common;
+our @ISA = qw(Object);
+
+sub NODE_TEXT { 1 }
+sub NODE_ELEM { 2 }
+
+BEGIN {
+    DeclareProperty(nodeName => ACCESS_READ);
+    DeclareProperty(nodeType => ACCESS_READ);
+    DeclareProperty(nodeValue => ACCESS_READ);
+    DeclareProperty(childNodes => ACCESS_READ);
+    DeclareProperty(isComplex => ACCESS_READ);
+}
+
+sub CTOR {
+    my ($this,%args) = @_;
+    $args{'nodeType'} = NODE_ELEM if not $args{'nodeType'};
+    die new Exception("Invalid args. nodeName reqired.") if $args{'nodeType'} == NODE_ELEM and not $args{nodeName};
+    
+    #for speed reason
+    #$this->SUPER::CTOR(%args);
+    
+    $this->{$nodeName} = $args{'nodeName'} if $args{'nodeName'};
+    $this->{$nodeType} = $args{'nodeType'};
+    $this->{$nodeValue} = $args{'nodeValue'} if exists $args{'nodeValue'};
+    
+    $this->{$isComplex} = 0;
+}
+
+sub insertNode {
+    my ($this,$node,$pos) = @_;
+    
+    die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM;
+    die new Exception("Invalid node type",ref $node) if ref $node ne __PACKAGE__;
+    
+    $this->{$childNodes} = [] if not $this->{$childNodes};
+    
+    $pos = scalar(@{$this->{$childNodes}}) if not defined $pos;
+    die new Exception("Index out of range",$pos) if $pos > scalar(@{$this->{$childNodes}}) or $pos < 0;
+    
+    splice @{$this->{$childNodes}},$pos,0,$node;
+    $this->{$isComplex} = 1 if not $this->{$isComplex} and $node->{$nodeType} == NODE_ELEM;
+    
+    return $node;
+}
+
+sub removeNode {
+    my ($this,$node) = @_;
+    
+    die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM;
+    @{$this->{$childNodes}} = grep { $_ != $node } @{$this->{$childNodes}};
+    
+    return $node;
+}
+
+sub removeAt {
+    my ($this,$pos) = @_;
+    
+    die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM;
+    die new Exception("Index out of range",$pos) if $pos >= scalar(@{$this->{$childNodes}}) or $pos < 0;
+    
+    return splice @{$this->{$childNodes}},$pos,1;
+}
+
+sub selectNodes {
+    my ($this,$name) = @_;
+    
+    die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM;
+    
+    my @nodes = grep { $_->{$nodeType} == NODE_ELEM and $_->{$nodeName} eq $name } @{$this->{$childNodes}};
+    
+    if (wantarray) {
+        return @nodes;
+    } else {
+        return shift @nodes;
+    }
+}
+
+sub text {
+    my $this = shift;
+    
+    if ($this->{$nodeType} == NODE_TEXT) {
+        return $this->{$nodeValue};
+    } else {
+        my @texts;
+        
+        foreach my $node (@{$this->{$childNodes}}) {
+            push @texts, $node->{$nodeValue} if ($node->{$nodeType}==NODE_TEXT);
+        }
+        
+        if (wantarray) {
+            return @texts;
+        } else {
+            return join '',@texts;
+        }
+    }
+}
+
+package BNFCompiler::DOM::Builder;
+use Common;
+our @ISA=qw(Object);
+
+BEGIN {
+    DeclareProperty(Document => ACCESS_READ);
+    DeclareProperty(currentNode => ACCESS_NONE);
+    DeclareProperty(stackNodes => ACCESS_NONE);
+}
+
+sub CTOR {
+    my $this = shift;
+    
+    $this->{$Document} = new BNFCompiler::DOM::Node(nodeName => 'Document', nodeType => BNFCompiler::DOM::Node::NODE_ELEM);
+    $this->{$currentNode} = $this->{$Document};
+}
+
+sub NewContext {
+    my ($this,$contextName) = @_;
+        
+    push @{$this->{$stackNodes}},$this->{$currentNode};
+    $this->{$currentNode} = new BNFCompiler::DOM::Node(nodeName => $contextName, nodeType=> BNFCompiler::DOM::Node::NODE_ELEM);
+
+    return 1;
+}
+sub EndContext{
+    my ($this,$isNotEmpty) = @_;
+    
+    if ($isNotEmpty) {
+        my $child = $this->{$currentNode};
+        $this->{$currentNode} = pop @{$this->{$stackNodes}};
+        $this->{$currentNode}->insertNode($child);
+    } else {
+        $this->{$currentNode} = pop @{$this->{$stackNodes}};
+    }
+}
+sub Data {
+    my ($this,$data) = @_;
+    $this->{$currentNode}->insertNode(new BNFCompiler::DOM::Node(nodeType=> BNFCompiler::DOM::Node::NODE_TEXT, nodeValue => $data));
+}
+
+package BNFCompiler::DOM;
+
+sub TransformDOMToHash {
+    my ($root,$options) = @_;
+    
+    my %content;
+    
+    if (not $root->childNodes) {
+        die;
+    }
+    
+    foreach my $child (@{$root->childNodes}) {
+        if ($child->nodeType == BNFCompiler::DOM::Node::NODE_ELEM) {
+            my @newValue;
+            my $nodeName = $child->nodeName;
+            next if $nodeName eq 'separator' and $options->{'skip_spaces'};
+            if ($child->isComplex) {
+                $newValue[0] = TransformDOMToHash($child,$options);
+            } else {
+                @newValue = $child->text()
+            }
+            
+            if ($options->{'use_arrays'}) {
+                push @{$content{$nodeName}},@newValue;
+            }
+            
+            if (exists $content{$nodeName}) {
+                if (ref $content{$nodeName} eq 'ARRAY') {
+                    push @{$content{$nodeName}}, @newValue;
+                } else {
+                    $content{$nodeName} = [$content{$nodeName},@newValue];
+                }
+            } else {
+                $content{$nodeName} = $newValue[0] if scalar(@newValue) == 1;
+                $content{$nodeName} = \@newValue if scalar(@newValue) > 1;
+            }
+        } else {
+            next if $options->{'skip_text'};
+            push @{$content{'_text'}},$child->nodeValue();
+        }
+    }
+    
+    return \%content;
+}
+
+1;