comparison Lib/BNFCompiler.pm @ 0:03e58a454b20

Создан репозитарий
author Sergey
date Tue, 14 Jul 2009 12:54:37 +0400
parents
children 16ada169ca75
comparison
equal deleted inserted replaced
-1:000000000000 0:03e58a454b20
1 package BNFCompiler;
2 package BNFCompiler::DOM;
3 package BNFCompiler::DOM::Builder;
4 package BNFCompiler::DOM::Node;
5 use strict;
6
7 package BNFCompiler::EventParser;
8 use strict;
9 use lib '.';
10 use Common;
11 our @ISA = qw(Object);
12
13 our $EventMapSchema = {
14 Description => 'Parser events',
15 Type => 'HASH',
16 Values => 'SCALAR'
17 };
18
19 BEGIN {
20 DeclareProperty(EventMap => ACCESS_READ);
21 DeclareProperty(CompiledEvents => ACCESS_NONE);
22 DeclareProperty(Handler => ACCESS_ALL);
23 }
24
25 sub CTOR {
26 my ($this,%args) = @_;
27 $this->SUPER::CTOR(%args);
28 }
29
30 sub Compile {
31 my ($this) = @_;
32
33 delete $this->{$CompiledEvents};
34 while (my ($key,$val) = each %{$this->{$EventMap}}) {
35 $this->{$CompiledEvents}{$key} = qr/\G$val/;
36 }
37 1;
38 }
39
40 sub Parse {
41 my ($this,$data) = @_;
42
43 my $StateData;
44 OUTER: while(pos($data) < length($data)) {
45 keys %{$this->{$CompiledEvents}};
46 while (my ($event,$match) = each %{$this->{$CompiledEvents}}) {
47 if ($data =~ m/($match)/gc) {
48 $StateData .= $1;
49 eval {
50 undef $StateData if $this->{$Handler}->($event,$StateData);
51 };
52 if ($@) {
53 die ["Invalid syntax","unexpected $event: $1",pos($data)];
54 }
55 next OUTER;
56 }
57 }
58 die ["Invalid syntax",substr($data,pos($data),10),pos($data)];
59 }
60
61 return 1;
62 }
63
64 # íåáîëüøàÿ óëîâêà, ïîñêîëüêó ref îò ðåãóëÿðíîãî âûðàæåíèÿ åñòü Regexp, ìîæíî ïîñòàâèòü õóêè
65 package Regexp;
66 use Data::Dumper;
67
68 sub STORABLE_freeze {
69 my ($obj,$cloning) = @_;
70
71 return $obj;
72 }
73
74 sub STORABLE_attach {
75 my($class, $cloning, $serialized) = @_;
76 return qr/$serialized/;
77 }
78
79 package BNFCompiler;
80 use Common;
81 use Storable;
82 use Data::Dumper;
83 our @ISA = qw(Object);
84
85 our $BNFSchema;
86 my $ParseAgainstSchema;
87 my $TransformDOMToBNF;
88
89 BEGIN {
90 DeclareProperty(Schema => ACCESS_NONE);
91 DeclareProperty(SchemaCache => ACCESS_NONE);
92 DeclareProperty(Transform => ACCESS_NONE);
93 }
94
95 sub CTOR {
96 my $this = shift;
97 $this->SUPER::CTOR(@_);
98
99 $this->{$SchemaCache} .= '/' if ($this->{$SchemaCache} and not $this->{$SchemaCache} =~ /\/$/);
100 }
101 {
102 my $compiledBNFSchema;
103 sub LoadBNFSchema {
104 my ($this,%args) = @_;
105
106 my $CompileBNFText = sub {
107 my ($this,$text) = @_;
108
109 my %SchemaDOM;
110 foreach my $item (split /\n{2,}/, $text) {
111 next if not $item;
112 $compiledBNFSchema = CompileBNFSchema($BNFSchema) if not $compiledBNFSchema;
113 my $context = new BNFCompiler::DOM::Builder();
114 eval {
115 my $expr = &$ParseAgainstSchema($compiledBNFSchema,$item,$context);
116 die ["Unexpected expression", $expr] if $expr;
117 };
118 if ($@) {
119 if (ref $@ eq 'ARRAY') {
120 die new Exception(@{$@});
121 } else {
122 die $@;
123 }
124 }
125
126 $SchemaDOM{$context->Document->selectNodes('name')->text()} = &$TransformDOMToBNF($context->Document->selectNodes('def'));
127
128 }
129
130 $SchemaDOM{'separator'} = ['re:\\s+'];
131 $this->{$Schema} = CompileBNFSchema(\%SchemaDOM);
132 };
133
134 my $text;
135 if ($args{'file'}) {
136
137 my $fnameCached;
138 if ($this->{$SchemaCache}) {
139 my $fname = $args{'file'};
140 $fname =~ tr/\//_/;
141 $fnameCached = $this->{$SchemaCache}.$fname.'.cbs';
142 if ( -e $fnameCached && -f $fnameCached && ( -M $args{'file'} >= -M $fnameCached )) {
143 my $compiledSchema = retrieve($fnameCached);
144 if ($compiledSchema) {
145 $this->{$Schema} = $compiledSchema;
146 return 1;
147 } else {
148 unlink $fnameCached;
149 }
150 }
151 }
152 open my $hFile, '<', $args{'file'} or die new Exception("Failed to open file",$args{'file'},$!);
153 local $/ = undef;
154 my $text = <$hFile>;
155
156 $this->$CompileBNFText($text);
157
158 if ($fnameCached) {
159 store($this->{$Schema},$fnameCached);
160 }
161 } elsif ($args{'Schema'}) {
162 $this->{$Schema} = CompileBNFSchema($args{'Schema'});
163 return 1;
164 } elsif ($args{'text'}) {
165 $this->$CompileBNFText( $args{'text'} );
166 } else {
167 die new Exception("'file', 'text' or 'Schema' parameter required");
168 }
169
170 }
171 }
172
173 sub Parse {
174 my ($this, $string, %flags) = @_;
175
176 my $context = new BNFCompiler::DOM::Builder;
177
178 eval {
179 my $err;
180 $err = &$ParseAgainstSchema($this->{$Schema},$string,$context,\%flags) and die new Exception('Failed to parse',substr($err,0,80).' ...');
181 };
182 if ($@) {
183 if (ref $@ eq 'ARRAY') {
184 die new Exception(@{$@});
185 } else {
186 die $@;
187 }
188 }
189 if (not $this->{$Transform}) {
190 return $context->Document;
191 } else {
192 return $this->{$Transform}->($context->Document);
193 }
194 }
195
196 sub Dispose {
197 my ($this) = shift;
198 CleanSchema($this->{$Schema});
199 delete @$this{$Schema, $Transform};
200 $this->SUPER::Dispose;
201 }
202
203 sub CleanSchema {
204 my ($schema,$table) = @_;
205
206 UNIVERSAL::isa($schema,'ARRAY') or return;
207 $table or $table = { $schema, 1};
208
209 for(my $i=0; $i<@$schema;$i++) {
210 my $item = $schema->[$i];
211 if (ref $item) {
212 next if $table->{$item};
213 $table->{$item} = 1;
214 if (UNIVERSAL::isa($item,'ARRAY')) {
215 CleanSchema($item,$table);
216 } elsif( UNIVERSAL::isa($item,'HASH')) {
217 CleanSchema($item->{'syntax'},$table);
218 }
219 undef $schema->[$i];
220 }
221 }
222 }
223
224
225 sub OPT {
226 return bless [@_], 'OPT';
227 }
228
229 sub SWITCH {
230 return bless [@_], 'SWITCH';
231 }
232
233 sub REPEAT {
234 return bless [@_], 'REPEAT';
235 }
236
237 $TransformDOMToBNF = sub {
238 my ($nodeRoot) = @_;
239
240 return [grep $_, map {
241 my $nodeName = $_->nodeName;
242 if (not $nodeName ){
243 my $obj = $_;
244 $obj->text() if (not( grep { $obj->text() eq $_} ('{', '}', '[', ']') ) );
245 }elsif($nodeName eq 'name') {
246 $_->text();
247 } elsif ($nodeName eq 'separator') {
248 OPT('separator');
249 } elsif ($nodeName eq 'or_sep') {
250 # nothing
251 } elsif ($nodeName eq 'switch_part') {
252 &$TransformDOMToBNF($_);
253 } elsif ($nodeName eq 'class') {
254 my $class = $_->childNodes->[0]->text();
255
256 $class =~ s{(^<|>$|\\.|[\]\[])}{
257 my $char = { '>' => '', '<' => '', '[' => '\\[', ']' => '\\]', '\\\\' => '\\\\'}->{$1};
258 defined $char ? $char : ($1 =~ tr/\\// && $1);
259 }ge;
260 $class = '['.$class.']';
261 $class .= $_->childNodes->[1]->text() if $_->childNodes->[1];
262 're:'.$class;
263 } elsif ($nodeName eq 'symbol') {
264 $_->text();
265 } elsif ($nodeName eq 'simple') {
266 @{&$TransformDOMToBNF($_)};
267 } elsif ($nodeName eq 'multi_def') {
268 @{&$TransformDOMToBNF($_)};
269 } elsif ($nodeName eq 'optional') {
270 my $multi_def = &$TransformDOMToBNF($_);
271 if ($multi_def->[scalar(@{$multi_def})-1] eq '...') {
272 pop @{$multi_def};
273 OPT(REPEAT(@{$multi_def}));
274 } else {
275 OPT(@{$multi_def});
276 }
277 } elsif ($nodeName eq 'switch') {
278 SWITCH(@{&$TransformDOMToBNF($_)});
279 } elsif ($nodeName eq 'def') {
280 @{&$TransformDOMToBNF($_)};
281 } else{
282 die "unknown nodeName: $nodeName";
283 }
284 } @{$nodeRoot->childNodes}];
285 };
286
287 $BNFSchema = {
288 syntax => ['name',OPT('separator'),'::=',OPT('separator'),'def'],
289 name => ['re:\\w+'],
290 class => ['re:<([^<>\\\\]|\\\\.)+>',OPT('re:\\*|\\+|\\?|\\{\\d+\\}')],
291 symbol => ['re:[^\\w\\d\\s\\[\\]{}<>\\\\|]+'],
292 separator => ['re:\\s+'],
293 simple => [
294 SWITCH(
295 'name',
296 'class',
297 'symbol'
298 )
299 ],
300 multi_def => [
301 OPT('separator'), SWITCH('...',[SWITCH('simple','optional','switch'),OPT('multi_def')])
302 ],
303 optional => [
304 '[','multi_def', OPT('separator') ,']'
305
306 ],
307 keyword => [],
308 or_sep => ['|'],
309 switch_part => [OPT('separator'),SWITCH('simple','optional','switch'),OPT(REPEAT(OPT('separator'),SWITCH('simple','optional','switch'))),OPT('separator')],
310 switch => [
311 '{','switch_part',OPT(REPEAT('or_sep','switch_part')),'}'
312 ],
313 def => [REPEAT(OPT('separator'),SWITCH('simple','optional','switch'))]
314 };
315
316 my $CompileTerm;
317 $CompileTerm = sub {
318 my ($term,$Schema,$cache,$ref) = @_;
319
320 my $compiled = ref $term eq 'ARRAY' ? ($ref or []) : bless (($ref or []), ref $term);
321
322 die new Exception("Invalid term type $term", $term, ref $term) if not grep ref $term eq $_, qw(ARRAY REPEAT SWITCH OPT);
323
324 foreach my $element (@{$term}) {
325 if (ref $element) {
326 push @{$compiled}, &$CompileTerm($element,$Schema,$cache);
327 } else {
328 if($element =~/^\w+$/) {
329 if (exists $Schema->{$element}) {
330 # reference
331 my $compiledUnit;
332 if (exists $cache->{$element}) {
333 $compiledUnit = $cache->{$element};
334 } else {
335 $compiledUnit = [];
336 $cache->{$element} = $compiledUnit;
337 &$CompileTerm($Schema->{$element},$Schema,$cache,$compiledUnit);
338 }
339
340 push @{$compiled},{ name => $element, syntax => $compiledUnit};
341 } else {
342 # simple word
343 push @{$compiled}, $element;
344 }
345 } elsif ($element =~ /^re:(.*)/){
346 # regexp
347 push @{$compiled},qr/\G(?:$1)/;
348 } else {
349 # char sequence
350 push @{$compiled},$element;
351 }
352 }
353 }
354
355 return $compiled;
356 };
357
358 sub CompileBNFSchema {
359 my($Schema) = @_;
360
361 my %Cache;
362 return &$CompileTerm($Schema->{'syntax'},$Schema,\%Cache);
363 }
364
365 my $CompiledSchema = CompileBNFSchema($BNFSchema);
366
367 $ParseAgainstSchema = sub {
368 my ($Schema,$expression,$context,$flags,$level) = @_;
369
370 $level = 0 if not defined $level;
371 my $dbgPrint = $flags->{debug} ? sub {
372 print "\t" x $level, @_,"\n";
373 } : sub {};
374
375 foreach my $elem (@{$Schema}) {
376 my $type = ref $elem;
377 $expression = substr $expression,pos($expression) if $type ne 'Regexp' and pos($expression);
378
379 if ($type eq 'HASH') {
380 $context->NewContext($elem->{'name'});
381 &$dbgPrint("$elem->{name} ", join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem->{'syntax'}}));
382 eval {
383 $expression = &$ParseAgainstSchema($elem->{'syntax'},$expression,$context,$flags,$level+1);
384 };
385 if ($@) {
386 $context->EndContext(0);
387 &$dbgPrint("/$elem->{name} ", "0");
388 die $@;
389 } else {
390 &$dbgPrint("/$elem->{name} ", "1");
391 $context->EndContext(1);
392 }
393 } elsif ($type eq 'ARRAY') {
394 &$dbgPrint("entering ", join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem}));
395 $expression = &$ParseAgainstSchema($elem,$expression,$context,$flags,$level+1);
396 &$dbgPrint("success");
397 } elsif ($type eq 'OPT') {
398 if (defined $expression) {
399 &$dbgPrint("optional ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem}));
400 eval {
401 $expression = &$ParseAgainstSchema($elem,$expression,$context,$flags,$level+1);
402 };
403 if ($@) {
404 &$dbgPrint("failed");
405 undef $@;
406 } else {
407 &$dbgPrint("success");
408 }
409 }
410 } elsif ($type eq 'SWITCH') {
411 my $success = 0;
412 &$dbgPrint("switch");
413 LOOP_SWITCH: foreach my $subelem (@{$elem}) {
414 eval {
415 &$dbgPrint("\ttry ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ } @{(grep ref $subelem eq $_, qw(ARRAY SWITCH OPT REPEAT)) ? $subelem : [$subelem]}));
416 $expression = &$ParseAgainstSchema((grep ref $subelem eq $_, qw(ARRAY SWITCH OPT REPEAT)) ? $subelem : [$subelem],$expression,$context,$flags,$level+1);
417 $success = 1;
418 };
419 if ($@) {
420 undef $@;
421 } else {
422 last LOOP_SWITCH;
423 }
424 }
425 if ($success) {
426 &$dbgPrint("success");
427 } else {
428 &$dbgPrint("failed");
429 die ["syntax error",$expression,$elem];
430 }
431 } elsif ($type eq 'REPEAT') {
432 my $copy = [@{$elem}];
433 my $i = 0;
434 &$dbgPrint("repeat ",join(',',map { ref $_ eq 'HASH' ? $_->{name} : $_ }@{$elem}));
435 while (1) {
436 eval {
437 $expression = &$ParseAgainstSchema($copy,$expression,$context,$flags,$level+1);
438 $i++;
439 };
440 if ($@) {
441 if (not $i) {
442 &$dbgPrint("failed");
443 die $@;
444 }
445 &$dbgPrint("found $i matches");
446 undef $@;
447 last;
448 }
449 }
450 } elsif ($type eq 'Regexp') {
451 my $posPrev = pos($expression) || 0;
452 if ( $expression =~ m/($elem)/ ) {
453 $context->Data($1);
454 pos($expression) = $posPrev+length($1);
455 &$dbgPrint("Regexp: $1 $elem ", pos($expression));
456 } else {
457 &$dbgPrint("Regexp: $elem failed");
458 die ["syntax error",$expression,$elem,$posPrev];
459 pos($expression) = $posPrev;
460 }
461 } else {
462 if ((my $val = substr($expression, 0, length($elem),'')) eq $elem) {
463 &$dbgPrint("Scalar: $val");
464 $context->Data($elem);
465 } else {
466 &$dbgPrint("Scalar: failed $val expected $elem");
467 die ["syntax error",$val.$expression,$elem];
468 }
469 }
470
471 }
472
473 if (pos $expression) {
474 return substr $expression,(pos($expression) || 0);
475 } else {
476 return $expression;
477 }
478
479 };
480
481 package BNFCompiler::DOM::Node;
482 use Common;
483 our @ISA = qw(Object);
484
485 sub NODE_TEXT { 1 }
486 sub NODE_ELEM { 2 }
487
488 BEGIN {
489 DeclareProperty(nodeName => ACCESS_READ);
490 DeclareProperty(nodeType => ACCESS_READ);
491 DeclareProperty(nodeValue => ACCESS_READ);
492 DeclareProperty(childNodes => ACCESS_READ);
493 DeclareProperty(isComplex => ACCESS_READ);
494 }
495
496 sub CTOR {
497 my ($this,%args) = @_;
498 $args{'nodeType'} = NODE_ELEM if not $args{'nodeType'};
499 die new Exception("Invalid args. nodeName reqired.") if $args{'nodeType'} == NODE_ELEM and not $args{nodeName};
500
501 #for speed reason
502 #$this->SUPER::CTOR(%args);
503
504 $this->{$nodeName} = $args{'nodeName'} if $args{'nodeName'};
505 $this->{$nodeType} = $args{'nodeType'};
506 $this->{$nodeValue} = $args{'nodeValue'} if exists $args{'nodeValue'};
507
508 $this->{$isComplex} = 0;
509 }
510
511 sub insertNode {
512 my ($this,$node,$pos) = @_;
513
514 die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM;
515 die new Exception("Invalid node type",ref $node) if ref $node ne __PACKAGE__;
516
517 $this->{$childNodes} = [] if not $this->{$childNodes};
518
519 $pos = scalar(@{$this->{$childNodes}}) if not defined $pos;
520 die new Exception("Index out of range",$pos) if $pos > scalar(@{$this->{$childNodes}}) or $pos < 0;
521
522 splice @{$this->{$childNodes}},$pos,0,$node;
523 $this->{$isComplex} = 1 if not $this->{$isComplex} and $node->{$nodeType} == NODE_ELEM;
524
525 return $node;
526 }
527
528 sub removeNode {
529 my ($this,$node) = @_;
530
531 die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM;
532 @{$this->{$childNodes}} = grep { $_ != $node } @{$this->{$childNodes}};
533
534 return $node;
535 }
536
537 sub removeAt {
538 my ($this,$pos) = @_;
539
540 die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM;
541 die new Exception("Index out of range",$pos) if $pos >= scalar(@{$this->{$childNodes}}) or $pos < 0;
542
543 return splice @{$this->{$childNodes}},$pos,1;
544 }
545
546 sub selectNodes {
547 my ($this,$name) = @_;
548
549 die new Exception("Invalid operation on text node.") if $this->{$nodeType} != NODE_ELEM;
550
551 my @nodes = grep { $_->{$nodeType} == NODE_ELEM and $_->{$nodeName} eq $name } @{$this->{$childNodes}};
552
553 if (wantarray) {
554 return @nodes;
555 } else {
556 return shift @nodes;
557 }
558 }
559
560 sub text {
561 my $this = shift;
562
563 if ($this->{$nodeType} == NODE_TEXT) {
564 return $this->{$nodeValue};
565 } else {
566 my @texts;
567
568 foreach my $node (@{$this->{$childNodes}}) {
569 push @texts, $node->{$nodeValue} if ($node->{$nodeType}==NODE_TEXT);
570 }
571
572 if (wantarray) {
573 return @texts;
574 } else {
575 return join '',@texts;
576 }
577 }
578 }
579
580 package BNFCompiler::DOM::Builder;
581 use Common;
582 our @ISA=qw(Object);
583
584 BEGIN {
585 DeclareProperty(Document => ACCESS_READ);
586 DeclareProperty(currentNode => ACCESS_NONE);
587 DeclareProperty(stackNodes => ACCESS_NONE);
588 }
589
590 sub CTOR {
591 my $this = shift;
592
593 $this->{$Document} = new BNFCompiler::DOM::Node(nodeName => 'Document', nodeType => BNFCompiler::DOM::Node::NODE_ELEM);
594 $this->{$currentNode} = $this->{$Document};
595 }
596
597 sub NewContext {
598 my ($this,$contextName) = @_;
599
600 push @{$this->{$stackNodes}},$this->{$currentNode};
601 $this->{$currentNode} = new BNFCompiler::DOM::Node(nodeName => $contextName, nodeType=> BNFCompiler::DOM::Node::NODE_ELEM);
602
603 return 1;
604 }
605 sub EndContext{
606 my ($this,$isNotEmpty) = @_;
607
608 if ($isNotEmpty) {
609 my $child = $this->{$currentNode};
610 $this->{$currentNode} = pop @{$this->{$stackNodes}};
611 $this->{$currentNode}->insertNode($child);
612 } else {
613 $this->{$currentNode} = pop @{$this->{$stackNodes}};
614 }
615 }
616 sub Data {
617 my ($this,$data) = @_;
618 $this->{$currentNode}->insertNode(new BNFCompiler::DOM::Node(nodeType=> BNFCompiler::DOM::Node::NODE_TEXT, nodeValue => $data));
619 }
620
621 package BNFCompiler::DOM;
622
623 sub TransformDOMToHash {
624 my ($root,$options) = @_;
625
626 my %content;
627
628 if (not $root->childNodes) {
629 die;
630 }
631
632 foreach my $child (@{$root->childNodes}) {
633 if ($child->nodeType == BNFCompiler::DOM::Node::NODE_ELEM) {
634 my @newValue;
635 my $nodeName = $child->nodeName;
636 next if $nodeName eq 'separator' and $options->{'skip_spaces'};
637 if ($child->isComplex) {
638 $newValue[0] = TransformDOMToHash($child,$options);
639 } else {
640 @newValue = $child->text()
641 }
642
643 if ($options->{'use_arrays'}) {
644 push @{$content{$nodeName}},@newValue;
645 }
646
647 if (exists $content{$nodeName}) {
648 if (ref $content{$nodeName} eq 'ARRAY') {
649 push @{$content{$nodeName}}, @newValue;
650 } else {
651 $content{$nodeName} = [$content{$nodeName},@newValue];
652 }
653 } else {
654 $content{$nodeName} = $newValue[0] if scalar(@newValue) == 1;
655 $content{$nodeName} = \@newValue if scalar(@newValue) > 1;
656 }
657 } else {
658 next if $options->{'skip_text'};
659 push @{$content{'_text'}},$child->nodeValue();
660 }
661 }
662
663 return \%content;
664 }
665
666 1;