view Lib/BNFCompiler.pm @ 134:44977efed303

Significant performance optimizations Fixed recursion problems due converting objects to JSON Added cache support for the templates Added discovery feature for the web methods
author wizard
date Mon, 21 Jun 2010 02:39:53 +0400
parents 16ada169ca75
children
line wrap: on
line source

package BNFCompiler;
package BNFCompiler::DOM;
package BNFCompiler::DOM::Builder;
package BNFCompiler::DOM::Node;
use strict;

package BNFCompiler::EventParser;
use strict;
use lib '.';
use Common;
our @ISA = qw(Object);

our $EventMapSchema = {
    Description => 'Parser events',
    Type => 'HASH',
    Values => 'SCALAR'
};

BEGIN {
    DeclareProperty(EventMap => ACCESS_READ);
    DeclareProperty(CompiledEvents => ACCESS_NONE);
    DeclareProperty(Handler => ACCESS_ALL);
}

sub CTOR {
    my ($this,%args) = @_;
    $this->SUPER::CTOR(%args);
}

sub Compile {
    my ($this) = @_;
    
    delete $this->{$CompiledEvents};
    while (my ($key,$val) = each %{$this->{$EventMap}}) {
        $this->{$CompiledEvents}{$key} = qr/\G$val/;
    }
    1;
}

sub Parse {
    my ($this,$data) = @_;
    
    my $StateData;
    OUTER: while(pos($data) < length($data)) {
        keys %{$this->{$CompiledEvents}};
        while (my ($event,$match) = each %{$this->{$CompiledEvents}}) {
            if ($data =~ m/($match)/gc) {
                $StateData .= $1;
                eval {
                    undef $StateData if $this->{$Handler}->($event,$StateData);
                };
                if ($@) {
                    die ["Invalid syntax","unexpected $event: $1",pos($data)];
                }
                next OUTER;
            }
        }
        die ["Invalid syntax",substr($data,pos($data),10),pos($data)];
    }
    
    return 1;
}

# небольшая уловка, поскольку ref от регулярного выражения есть Regexp, можно поставить хуки
package Regexp;
use Data::Dumper;

sub STORABLE_freeze {
    my ($obj,$cloning) = @_;
    
    return $obj;
}

sub STORABLE_attach {
    my($class, $cloning, $serialized) = @_;
    return qr/$serialized/;
}

package BNFCompiler;
use Common;
use Storable;
use Data::Dumper;
our @ISA = qw(Object);

our $BNFSchema;
my $ParseAgainstSchema;
my $TransformDOMToBNF;

BEGIN {
    DeclareProperty(Schema => ACCESS_NONE);
    DeclareProperty(SchemaCache => ACCESS_NONE);
    DeclareProperty(Transform => ACCESS_NONE);
}

sub CTOR {
    my $this = shift;
    $this->SUPER::CTOR(@_);
    
    $this->{$SchemaCache} .= '/' if ($this->{$SchemaCache} and not $this->{$SchemaCache} =~ /\/$/);
}
{
    my $compiledBNFSchema;
    sub LoadBNFSchema {
        my ($this,%args) = @_;
        
        my $CompileBNFText = sub {
            my ($this,$text) = @_;
            
            my %SchemaDOM;
            foreach my $item (split /\n{2,}/, $text) {
                next if not $item;
                $compiledBNFSchema = CompileBNFSchema($BNFSchema) if not $compiledBNFSchema;
                my $context = new BNFCompiler::DOM::Builder();
                eval {
                    my $expr = &$ParseAgainstSchema($compiledBNFSchema,$item,$context);
                    die ["Unexpected expression", $expr] if $expr;
                };
                if ($@) {
                    if (ref $@ eq 'ARRAY') {
                        die new Exception(@{$@});
                    } else {
                        die $@;
                    }
                }
                
                $SchemaDOM{$context->Document->selectNodes('name')->text()} = &$TransformDOMToBNF($context->Document->selectNodes('def'));
                
            }
            
            $SchemaDOM{'separator'} = ['re:\\s+'];
            $this->{$Schema} = CompileBNFSchema(\%SchemaDOM);
        };
        
        my $text;
        if ($args{'file'}) {
            
            my $fnameCached;
            if ($this->{$SchemaCache}) {
                my $fname = $args{'file'};
                $fname =~ tr/\//_/;
                $fnameCached = $this->{$SchemaCache}.$fname.'.cbs';
                if ( -e $fnameCached && -f $fnameCached && ( -M $args{'file'} >= -M $fnameCached )) {
                    my $compiledSchema = retrieve($fnameCached);
                    if ($compiledSchema) {
                        $this->{$Schema} = $compiledSchema;
                        return 1;
                    } else {
                        unlink $fnameCached;
                    }
                }
            }
            open my $hFile, '<', $args{'file'} or die new Exception("Failed to open file",$args{'file'},$!);
            local $/ = undef;
            my $text = <$hFile>;
            
            $this->$CompileBNFText($text);
            
            if ($fnameCached) {
                store($this->{$Schema},$fnameCached);
            }
        } elsif ($args{'Schema'}) {
            $this->{$Schema} = CompileBNFSchema($args{'Schema'});
            return 1;
        } elsif ($args{'text'}) {
            $this->$CompileBNFText( $args{'text'} );
        } else {
            die new Exception("'file', 'text' or 'Schema' parameter required");
        }
        
    }
}

sub Parse {
    my ($this, $string, %flags) = @_;
    
    my $context = new BNFCompiler::DOM::Builder;
    
    eval {
        my $err;
        $err = &$ParseAgainstSchema($this->{$Schema},$string,$context,\%flags) and die new Exception('Failed to parse',substr($err,0,80).' ...');
    };
    if ($@) {
        if (ref $@ eq 'ARRAY') {
            die new Exception(@{$@});
        } else {
            die $@;
        }
    }
    if (not $this->{$Transform}) {
        return $context->Document;
    } else {
        return $this->{$Transform}->($context->Document);
    }
}

sub Dispose {
    my ($this) = shift;
    CleanSchema($this->{$Schema});
    delete @$this{$Schema, $Transform};
    $this->SUPER::Dispose;
}

sub CleanSchema {
    my ($schema,$table) = @_;
    
    UNIVERSAL::isa($schema,'ARRAY') or return;
    $table or $table = { $schema, 1};
    
    for(my $i=0; $i<@$schema;$i++) {
        my $item = $schema->[$i];
        if (ref $item) {
            next if $table->{$item};
            $table->{$item} = 1;
            if (UNIVERSAL::isa($item,'ARRAY')) {
                CleanSchema($item,$table);
            } elsif( UNIVERSAL::isa($item,'HASH')) {
                CleanSchema($item->{'syntax'},$table);
            }
            undef $schema->[$i];
        }
    }
}


sub OPT {
    return bless [@_], 'OPT';
}

sub SWITCH {
    return bless [@_], 'SWITCH';
}

sub REPEAT {
    return bless [@_], 'REPEAT';
}

$TransformDOMToBNF = sub {
    my ($nodeRoot) = @_;
    
    return [grep $_, map {
        my $nodeName = $_->nodeName;
        if (not $nodeName ){
            my $obj = $_;
            $obj->text() if (not( grep { $obj->text() eq $_} ('{', '}', '[', ']') ) );
        }elsif($nodeName eq 'name') {
            $_->text();
        } elsif ($nodeName eq 'separator') {
            OPT('separator');
        } elsif ($nodeName eq 'or_sep') {
            # nothing
        } elsif ($nodeName eq 'switch_part') {
            &$TransformDOMToBNF($_);
        } elsif ($nodeName eq 'class') {
            my $class = $_->childNodes->[0]->text();
            
            $class =~ s{(^<|>$|\\.|[\]\[])}{
                my $char = { '>' => '', '<' => '', '[' => '\\[', ']' => '\\]', '\\\\' => '\\\\'}->{$1};
                defined $char ? $char : ($1 =~ tr/\\// && $1);
            }ge;
            $class = '['.$class.']';
            $class .= $_->childNodes->[1]->text() if $_->childNodes->[1];
            're:'.$class;
        } elsif ($nodeName eq 'symbol') {
            $_->text();
        } elsif ($nodeName eq 'simple') {
            @{&$TransformDOMToBNF($_)};
        } elsif ($nodeName eq 'multi_def') {
            @{&$TransformDOMToBNF($_)};
        } elsif ($nodeName eq 'optional') {
            my $multi_def = &$TransformDOMToBNF($_);
            if ($multi_def->[scalar(@{$multi_def})-1] eq '...') {
                pop @{$multi_def};
                OPT(REPEAT(@{$multi_def}));
            } else {
                OPT(@{$multi_def});
            }
        } elsif ($nodeName eq 'switch') {
            SWITCH(@{&$TransformDOMToBNF($_)});
        } elsif ($nodeName eq 'def') {
            @{&$TransformDOMToBNF($_)};
        } else{
            die "unknown nodeName: $nodeName";
        }
    } @{$nodeRoot->childNodes}];
};

$BNFSchema = {
    syntax => ['name',OPT('separator'),'::=',OPT('separator'),'def'],    
    name => ['re:\\w+'],    
    class => ['re:<([^<>\\\\]|\\\\.)+>',OPT('re:\\*|\\+|\\?|\\{\\d+\\}')],
    symbol => ['re:[^\\w\\d\\s\\[\\]{}<>\\\\|]+'],
    separator => ['re:\\s+'],
    simple => [
                SWITCH(
                    'name',
                    'class',
                    'symbol'
                )
    ],
    multi_def => [
        OPT('separator'), SWITCH('...',[SWITCH('simple','optional','switch'),OPT('multi_def')])
    ],
    optional => [
        '[','multi_def', OPT('separator') ,']'

    ],
    keyword => [],
    or_sep => ['|'],
    switch_part => [OPT('separator'),SWITCH('simple','optional','switch'),OPT(REPEAT(OPT('separator'),SWITCH('simple','optional','switch'))),OPT('separator')],
    switch => [
        '{','switch_part',OPT(REPEAT('or_sep','switch_part')),'}'
    ],
    def => [REPEAT(OPT('separator'),SWITCH('simple','optional','switch'))]
};

my $CompileTerm;
$CompileTerm = sub {
    my ($term,$Schema,$cache,$ref) = @_;
    
    my $compiled = ref $term eq 'ARRAY' ? ($ref or []) : bless (($ref or []), ref $term);
    
    die new Exception("Invalid term type $term", $term, ref $term) if not grep ref $term eq $_, qw(ARRAY REPEAT SWITCH OPT);
    
    foreach my $element (@{$term}) {
        if (ref $element) {
            push @{$compiled}, &$CompileTerm($element,$Schema,$cache);
        } else {
            if($element =~/^\w+$/) {
                if (exists $Schema->{$element}) {
                    # reference
                    my $compiledUnit;
                    if (exists $cache->{$element}) {
                        $compiledUnit = $cache->{$element};
                    } else {
                        $compiledUnit = [];
                        $cache->{$element} = $compiledUnit;
                        &$CompileTerm($Schema->{$element},$Schema,$cache,$compiledUnit);
                    }
                    
                    push @{$compiled},{ name => $element, syntax => $compiledUnit};
                } else {
                    # simple word
                    push @{$compiled}, $element;
                }
            } elsif ($element =~ /^re:(.*)/){
                # regexp
                push @{$compiled},qr/\G(?:$1)/;
            } else {
                # char sequence
                push @{$compiled},$element;
            }            
        }
    }
    
    return $compiled;
};

sub CompileBNFSchema {
    my($Schema) = @_;
    
    my %Cache;
    return &$CompileTerm($Schema->{'syntax'},$Schema,\%Cache);
}

my $CompiledSchema = CompileBNFSchema($BNFSchema);

$ParseAgainstSchema = sub {
    my ($Schema,$expression,$context,$flags,$level) = @_;
    
    $level = 0 if not defined $level;
    my $dbgPrint = $flags->{debug} ? sub {
        print "\t" x $level, @_,"\n";
    } : sub {};
    
    foreach my $elem (@{$Schema}) {
        my $type = ref $elem;
        $expression = substr $expression,pos($expression) if $type ne 'Regexp' and pos($expression);
        
        if ($type eq 'HASH') {
            $context->NewContext($elem->{'name'});
            &$dbgPrint("$elem->{name} ", join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem->{'syntax'}}));
            eval {
                $expression = &$ParseAgainstSchema($elem->{'syntax'},$expression,$context,$flags,$level+1);
            };
            if ($@) {
                $context->EndContext(0);
                &$dbgPrint("/$elem->{name} ", "0");
                die $@;
            } else {
                &$dbgPrint("/$elem->{name} ", "1");
                $context->EndContext(1);
            }
        } elsif ($type eq 'ARRAY') {
            &$dbgPrint("entering ", join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem}));
            $expression = &$ParseAgainstSchema($elem,$expression,$context,$flags,$level+1);
            &$dbgPrint("success");
        } elsif ($type eq 'OPT') {
            if (defined $expression) {
                &$dbgPrint("optional ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem}));
                eval {
                    $expression = &$ParseAgainstSchema($elem,$expression,$context,$flags,$level+1);
                };
                if ($@) {
                    &$dbgPrint("failed");
                    undef $@;
                } else {
                    &$dbgPrint("success");
                }
            }
        } elsif ($type eq 'SWITCH') {
            my $success = 0;
            &$dbgPrint("switch");
            LOOP_SWITCH: foreach my $subelem (@{$elem}) {
                eval {
                    &$dbgPrint("\ttry ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ } @{(grep ref $subelem eq $_, qw(ARRAY SWITCH OPT REPEAT)) ? $subelem : [$subelem]}));
                    $expression = &$ParseAgainstSchema((grep ref $subelem eq $_, qw(ARRAY SWITCH OPT REPEAT)) ? $subelem : [$subelem],$expression,$context,$flags,$level+1);
                    $success = 1;
                };
                if ($@) {
                    undef $@;
                } else {
                    last LOOP_SWITCH;
                }
            }
            if ($success) {
                &$dbgPrint("success");
            } else {
                &$dbgPrint("failed");
                die ["syntax error",$expression,$elem];
            }
        } elsif ($type eq 'REPEAT') {
            my $copy = [@{$elem}];
            my $i = 0;
            &$dbgPrint("repeat ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem}));
            while (1) {
                eval {
                    $expression = &$ParseAgainstSchema($copy,$expression,$context,$flags,$level+1);
                    $i++;
                };
                if ($@) {
                    if (not $i) {
                        &$dbgPrint("failed");
                        die $@;
                    }
                    &$dbgPrint("found $i matches");
                    undef $@;
                    last;
                }
            }
        } elsif ($type eq 'Regexp') {
                my $posPrev = pos($expression) || 0;
                if ( $expression =~ m/($elem)/ ) {
                    $context->Data($1);
                    pos($expression) = $posPrev+length($1);
                    &$dbgPrint("Regexp: $1 $elem ", pos($expression));
                } else {
                    &$dbgPrint("Regexp: $elem failed");
                    die ["syntax error",$expression,$elem,$posPrev];
                    pos($expression) = $posPrev;
                }
        } else {
            if ((my $val = substr($expression, 0, length($elem),'')) eq $elem) {
                &$dbgPrint("Scalar: $val");
                $context->Data($elem);
            } else {
                &$dbgPrint("Scalar: failed $val expected $elem");
                die ["syntax error",$val.$expression,$elem];
            }
        }
        
    }
    
    if (pos $expression) {
        return substr $expression,(pos($expression) || 0);
    } else {
        return $expression;
    }
    
};

package BNFCompiler::DOM::Node;
use Common;
our @ISA = qw(Object);

sub NODE_TEXT { 1 }
sub NODE_ELEM { 2 }

BEGIN {
    DeclareProperty(nodeName => ACCESS_READ);
    DeclareProperty(nodeType => ACCESS_READ);
    DeclareProperty(nodeValue => ACCESS_READ);
    DeclareProperty(childNodes => ACCESS_READ);
    DeclareProperty(isComplex => ACCESS_READ);
}

sub CTOR {
    my ($this,%args) = @_;
    $args{'nodeType'} = NODE_ELEM if not $args{'nodeType'};
    die new Exception("Invalid args. nodeName reqired.") if $args{'nodeType'} == NODE_ELEM and not $args{nodeName};
    
    #for speed reason
    #$this->SUPER::CTOR(%args);
    
    $this->{$nodeName} = $args{'nodeName'} if $args{'nodeName'};
    $this->{$nodeType} = $args{'nodeType'};
    $this->{$nodeValue} = $args{'nodeValue'} if exists $args{'nodeValue'};
    
    $this->{$isComplex} = 0;
}

sub insertNode {
    my ($this,$node,$pos) = @_;
    
    die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM;
    die new Exception("Invalid node type",ref $node) if ref $node ne __PACKAGE__;
    
    $this->{$childNodes} = [] if not $this->{$childNodes};
    
    $pos = scalar(@{$this->{$childNodes}}) if not defined $pos;
    die new Exception("Index out of range",$pos) if $pos > scalar(@{$this->{$childNodes}}) or $pos < 0;
    
    splice @{$this->{$childNodes}},$pos,0,$node;
    $this->{$isComplex} = 1 if not $this->{$isComplex} and $node->{$nodeType} == NODE_ELEM;
    
    return $node;
}

sub removeNode {
    my ($this,$node) = @_;
    
    die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM;
    @{$this->{$childNodes}} = grep { $_ != $node } @{$this->{$childNodes}};
    
    return $node;
}

sub removeAt {
    my ($this,$pos) = @_;
    
    die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM;
    die new Exception("Index out of range",$pos) if $pos >= scalar(@{$this->{$childNodes}}) or $pos < 0;
    
    return splice @{$this->{$childNodes}},$pos,1;
}

sub selectNodes {
    my ($this,$name) = @_;
    
    die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM;
    
    my @nodes = grep { $_->{$nodeType} == NODE_ELEM and $_->{$nodeName} eq $name } @{$this->{$childNodes}};
    
    if (wantarray) {
        return @nodes;
    } else {
        return shift @nodes;
    }
}

sub text {
    my $this = shift;
    
    if ($this->{$nodeType} == NODE_TEXT) {
        return $this->{$nodeValue};
    } else {
        my @texts;
        
        foreach my $node (@{$this->{$childNodes}}) {
            push @texts, $node->{$nodeValue} if ($node->{$nodeType}==NODE_TEXT);
        }
        
        if (wantarray) {
            return @texts;
        } else {
            return join '',@texts;
        }
    }
}

package BNFCompiler::DOM::Builder;
use Common;
our @ISA=qw(Object);

BEGIN {
    DeclareProperty(Document => ACCESS_READ);
    DeclareProperty(currentNode => ACCESS_NONE);
    DeclareProperty(stackNodes => ACCESS_NONE);
}

sub CTOR {
    my $this = shift;
    
    $this->{$Document} = new BNFCompiler::DOM::Node(nodeName => 'Document', nodeType => BNFCompiler::DOM::Node::NODE_ELEM);
    $this->{$currentNode} = $this->{$Document};
}

sub NewContext {
    my ($this,$contextName) = @_;
        
    push @{$this->{$stackNodes}},$this->{$currentNode};
    $this->{$currentNode} = new BNFCompiler::DOM::Node(nodeName => $contextName, nodeType=> BNFCompiler::DOM::Node::NODE_ELEM);

    return 1;
}
sub EndContext{
    my ($this,$isNotEmpty) = @_;
    
    if ($isNotEmpty) {
        my $child = $this->{$currentNode};
        $this->{$currentNode} = pop @{$this->{$stackNodes}};
        $this->{$currentNode}->insertNode($child);
    } else {
        $this->{$currentNode} = pop @{$this->{$stackNodes}};
    }
}
sub Data {
    my ($this,$data) = @_;
    $this->{$currentNode}->insertNode(new BNFCompiler::DOM::Node(nodeType=> BNFCompiler::DOM::Node::NODE_TEXT, nodeValue => $data));
}

package BNFCompiler::DOM;

sub TransformDOMToHash {
    my ($root,$options) = @_;
    
    my %content;
    
    if (not $root->childNodes) {
        die;
    }
    
    foreach my $child (@{$root->childNodes}) {
        if ($child->nodeType == BNFCompiler::DOM::Node::NODE_ELEM) {
            my @newValue;
            my $nodeName = $child->nodeName;
            next if $nodeName eq 'separator' and $options->{'skip_spaces'};
            if ($child->isComplex) {
                $newValue[0] = TransformDOMToHash($child,$options);
            } else {
                @newValue = $child->text()
            }
            
            if ($options->{'use_arrays'}) {
                push @{$content{$nodeName}},@newValue;
            }
            
            if (exists $content{$nodeName}) {
                if (ref $content{$nodeName} eq 'ARRAY') {
                    push @{$content{$nodeName}}, @newValue;
                } else {
                    $content{$nodeName} = [$content{$nodeName},@newValue];
                }
            } else {
                $content{$nodeName} = $newValue[0] if scalar(@newValue) == 1;
                $content{$nodeName} = \@newValue if scalar(@newValue) > 1;
            }
        } else {
            next if $options->{'skip_text'};
            push @{$content{'_text'}},$child->nodeValue();
        }
    }
    
    return \%content;
}

1;