-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrammar.ml
1959 lines (1758 loc) · 72.8 KB
/
grammar.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(* camlp5r *)
(* grammar.ml,v *)
(* Copyright (c) INRIA 2007-2017 *)
open Gramext
open Format
open Util
exception Error of string
(** Raised by parsers when the first component of a stream pattern is
accepted, but one of the following components is rejected. *)
(* Functorial interface *)
type norec
type mayrec
module type S = sig
type keyword_state
type te
type 'c pattern
type ty_pattern = TPattern : 'a pattern -> ty_pattern
(** Type combinators to factor the module type between explicit
state passing in Grammar and global state in Pcoq *)
type 'a with_gstate
(** Reader of grammar state *)
type 'a with_kwstate
(** Read keyword state *)
type 'a with_estate
(** Read entry state *)
type 'a mod_estate
(** Read/write entry state *)
module Parsable : sig
type t
(** [Parsable.t] Stream tokenizers with Coq-specific funcitonality *)
val make : ?loc:Loc.t -> (unit,char) Stream.t -> t
(** [make ?loc strm] Build a parsable from stream [strm], resuming
at position [?loc] *)
val comments : t -> ((int * int) * string) list
val loc : t -> Loc.t
(** [loc pa] Return parsing position for [pa] *)
val consume : t -> int -> unit with_kwstate
(** [consume pa n] Discard [n] tokens from [pa], updating the
parsing position *)
end
module Entry : sig
type 'a t
val make : string -> 'a t mod_estate
val parse : 'a t -> Parsable.t -> 'a with_gstate
val name : 'a t -> string
type 'a parser_fun = { parser_fun : keyword_state -> (keyword_state,te) LStream.t -> 'a }
val of_parser : string -> 'a parser_fun -> 'a t mod_estate
val parse_token_stream : 'a t -> (keyword_state,te) LStream.t -> 'a with_gstate
val print : Format.formatter -> 'a t -> unit with_estate
val is_empty : 'a t -> bool with_estate
type any_t = Any : 'a t -> any_t
val accumulate_in : any_t list -> any_t list CString.Map.t with_estate
end
module rec Symbol : sig
type ('self, 'trec, 'a) t
val nterm : 'a Entry.t -> ('self, norec, 'a) t
val nterml : 'a Entry.t -> string -> ('self, norec, 'a) t
val list0 : ('self, 'trec, 'a) t -> ('self, 'trec, 'a list) t
val list0sep :
('self, 'trec, 'a) t -> ('self, norec, unit) t -> bool ->
('self, 'trec, 'a list) t
val list1 : ('self, 'trec, 'a) t -> ('self, 'trec, 'a list) t
val list1sep :
('self, 'trec, 'a) t -> ('self, norec, unit) t -> bool ->
('self, 'trec, 'a list) t
val opt : ('self, 'trec, 'a) t -> ('self, 'trec, 'a option) t
val self : ('self, mayrec, 'self) t
val next : ('self, mayrec, 'self) t
val token : 'c pattern -> ('self, norec, 'c) t
val tokens : ty_pattern list -> ('self, norec, unit) t
val rules : 'a Rules.t list -> ('self, norec, 'a) t
end and Rule : sig
type ('self, 'trec, 'f, 'r) t
val stop : ('self, norec, 'r, 'r) t
val next :
('self, _, 'a, 'r) t -> ('self, _, 'b) Symbol.t ->
('self, mayrec, 'b -> 'a, 'r) t
val next_norec :
('self, norec, 'a, 'r) Rule.t -> ('self, norec, 'b) Symbol.t ->
('self, norec, 'b -> 'a, 'r) t
end and Rules : sig
type 'a t
val make : (_, norec, 'f, Loc.t -> 'a) Rule.t -> 'f -> 'a t
end
module Production : sig
type 'a t
val make : ('a, _, 'f, Loc.t -> 'a) Rule.t -> 'f -> 'a t
end
type 'a single_extend_statement =
string option * Gramext.g_assoc option * 'a Production.t list
type 'a extend_statement =
| Reuse of string option * 'a Production.t list
| Fresh of Gramext.position * 'a single_extend_statement list
val generalize_symbol : ('a, 'tr, 'c) Symbol.t -> ('b, norec, 'c) Symbol.t option
(* Used in custom entries, should tweak? *)
val level_of_nonterm : ('a, norec, 'c) Symbol.t -> string option
end
module type ExtS = sig
type keyword_state
module EState : sig
type t
val empty : t
end
module GState : sig
type t = {
estate : EState.t;
kwstate : keyword_state;
}
end
include S
with type keyword_state := keyword_state
and type 'a with_gstate := GState.t -> 'a
and type 'a with_kwstate := keyword_state -> 'a
and type 'a with_estate := EState.t -> 'a
and type 'a mod_estate := EState.t -> EState.t * 'a
type 's add_kw = { add_kw : 'c. 's -> 'c pattern -> 's }
val safe_extend : 's add_kw -> EState.t -> 's -> 'a Entry.t -> 'a extend_statement -> EState.t * 's
val safe_delete_rule : EState.t -> 'a Entry.t -> 'a Production.t -> EState.t
module Unsafe : sig
val clear_entry : EState.t -> 'a Entry.t -> EState.t
end
end
(* Implementation *)
module GMake (L : Plexing.S) : ExtS
with type keyword_state := L.keyword_state
and type te := L.te
and type 'c pattern := 'c L.pattern
= struct
type te = L.te
type 'c pattern = 'c L.pattern
type ty_pattern = TPattern : 'a pattern -> ty_pattern
type 'a parser_t = (L.keyword_state,L.te) LStream.t -> 'a
(** Used to propagate possible presence of SELF/NEXT in a rule (binary and) *)
type ('a, 'b, 'c) ty_and_rec =
| NoRec2 : (norec, norec, norec) ty_and_rec
| MayRec2 : ('a, 'b, mayrec) ty_and_rec
(** Used to propagate possible presence of SELF/NEXT in a tree (ternary and) *)
type ('a, 'b, 'c, 'd) ty_and_rec3 =
| NoRec3 : (norec, norec, norec, norec) ty_and_rec3
| MayRec3 : ('a, 'b, 'c, mayrec) ty_and_rec3
type _ tag = ..
module DMap = PolyMap.Make (struct type nonrec 'a tag = 'a tag = .. end)
type 'a ty_entry = {
ename : string;
etag : 'a DMap.onetag;
}
and 'a ty_desc =
| Dlevels of 'a ty_level list
| Dparser of (L.keyword_state -> 'a parser_t)
and 'a ty_level = Level : (_, _, 'a) ty_rec_level -> 'a ty_level
and ('trecs, 'trecp, 'a) ty_rec_level = {
assoc : g_assoc;
lname : string option;
lsuffix : ('a, 'trecs, 'a -> Loc.t -> 'a) ty_tree;
lprefix : ('a, 'trecp, Loc.t -> 'a) ty_tree;
}
and ('self, 'trec, 'a) ty_symbol =
| Stoken : 'c pattern -> ('self, norec, 'c) ty_symbol
| Stokens : ty_pattern list -> ('self, norec, unit) ty_symbol
| Slist1 : ('self, 'trec, 'a) ty_symbol -> ('self, 'trec, 'a list) ty_symbol
| Slist1sep : ('self, 'trec, 'a) ty_symbol * ('self, norec, unit) ty_symbol * bool -> ('self, 'trec, 'a list) ty_symbol
| Slist0 : ('self, 'trec, 'a) ty_symbol -> ('self, 'trec, 'a list) ty_symbol
| Slist0sep : ('self, 'trec, 'a) ty_symbol * ('self, norec, unit) ty_symbol * bool -> ('self, 'trec, 'a list) ty_symbol
| Sopt : ('self, 'trec, 'a) ty_symbol -> ('self, 'trec, 'a option) ty_symbol
| Sself : ('self, mayrec, 'self) ty_symbol
| Snext : ('self, mayrec, 'self) ty_symbol
| Snterm : 'a ty_entry -> ('self, norec, 'a) ty_symbol
(* norec but the entry can nevertheless introduce a loop with the current entry*)
| Snterml : 'a ty_entry * string -> ('self, norec, 'a) ty_symbol
| Stree : ('self, 'trec, Loc.t -> 'a) ty_tree -> ('self, 'trec, 'a) ty_symbol
and ('self, _, _, 'r) ty_rule =
| TStop : ('self, norec, 'r, 'r) ty_rule
| TNext : ('trr, 'trs, 'tr) ty_and_rec * ('self, 'trr, 'a, 'r) ty_rule * ('self, 'trs, 'b) ty_symbol -> ('self, 'tr, 'b -> 'a, 'r) ty_rule
and ('self, 'trec, 'a) ty_tree =
| Node : ('trn, 'trs, 'trb, 'tr) ty_and_rec3 * ('self, 'trn, 'trs, 'trb, 'b, 'a) ty_node -> ('self, 'tr, 'a) ty_tree
| LocAct : 'k * 'k list -> ('self, norec, 'k) ty_tree
| DeadEnd : ('self, norec, 'k) ty_tree
and ('self, 'trec, 'trecs, 'trecb, 'a, 'r) ty_node = {
node : ('self, 'trec, 'a) ty_symbol;
son : ('self, 'trecs, 'a -> 'r) ty_tree;
brother : ('self, 'trecb, 'r) ty_tree;
}
(** The closures are built by partially applying the parsing functions
to [edesc] but without depending on the state (so when we update
an entry we don't need to update closures in unrelated entries).
This is an important optimisation, see eg https://gitlab.com/coq/coq/-/jobs/3585529623
(+40% on mathcomp-ssreflect, +15% on stdlib without this,
significant slowdowns on most developments)
*)
type ('t,'a) entry_data = {
edesc : 'a ty_desc;
estart : 't -> int -> 'a parser_t;
econtinue : 't -> int -> int -> 'a -> 'a parser_t;
}
module rec EState : DMap.MapS
with type 'a value := (GState.t, 'a) entry_data
= DMap.Map(struct type 'a t = (GState.t, 'a) entry_data end)
and GState : sig
type t = {
estate : EState.t;
kwstate : L.keyword_state;
}
end = struct
type t = {
estate : EState.t;
kwstate : L.keyword_state;
}
end
open GState
let get_entry estate e = try EState.find (DMap.tag_of_onetag e.etag) estate
with Not_found -> assert false
type 'a ty_rules =
| TRules : (_, norec, 'act, Loc.t -> 'a) ty_rule * 'act -> 'a ty_rules
type 'a ty_production =
| TProd : ('a, _, 'act, Loc.t -> 'a) ty_rule * 'act -> 'a ty_production
let rec derive_eps : type s r a. (s, r, a) ty_symbol -> bool =
function
Slist0 _ -> true
| Slist0sep (_, _, _) -> true
| Sopt _ -> true
| Stree t -> tree_derive_eps t
| Slist1 _ -> false
| Slist1sep (_, _, _) -> false
| Snterm _ -> false | Snterml (_, _) -> false
| Snext -> false
| Sself -> false
| Stoken _ -> false
| Stokens _ -> false
and tree_derive_eps : type s tr a. (s, tr, a) ty_tree -> bool =
function
LocAct (_, _) -> true
| Node (_, {node = s; brother = bro; son = son}) ->
derive_eps s && tree_derive_eps son || tree_derive_eps bro
| DeadEnd -> false
let eq_entry : type a1 a2. a1 ty_entry -> a2 ty_entry -> (a1, a2) eq option = fun e1 e2 ->
DMap.eq_onetag e1.etag (DMap.tag_of_onetag e2.etag)
let tok_pattern_eq_list pl1 pl2 =
let f (TPattern p1) (TPattern p2) = Option.has_some (L.tok_pattern_eq p1 p2) in
if List.for_all2eq f pl1 pl2 then Some Refl else None
let rec eq_symbol : type s r1 r2 a1 a2. (s, r1, a1) ty_symbol -> (s, r2, a2) ty_symbol -> (a1, a2) eq option = fun s1 s2 ->
match s1, s2 with
Snterm e1, Snterm e2 -> eq_entry e1 e2
| Snterml (e1, l1), Snterml (e2, l2) ->
if String.equal l1 l2 then eq_entry e1 e2 else None
| Slist0 s1, Slist0 s2 ->
begin match eq_symbol s1 s2 with None -> None | Some Refl -> Some Refl end
| Slist0sep (s1, sep1, b1), Slist0sep (s2, sep2, b2) ->
if b1 = b2 then match eq_symbol s1 s2 with
| None -> None
| Some Refl ->
match eq_symbol sep1 sep2 with
| None -> None
| Some Refl -> Some Refl
else None
| Slist1 s1, Slist1 s2 ->
begin match eq_symbol s1 s2 with None -> None | Some Refl -> Some Refl end
| Slist1sep (s1, sep1, b1), Slist1sep (s2, sep2, b2) ->
if b1 = b2 then match eq_symbol s1 s2 with
| None -> None
| Some Refl ->
match eq_symbol sep1 sep2 with
| None -> None
| Some Refl -> Some Refl
else None
| Sopt s1, Sopt s2 ->
begin match eq_symbol s1 s2 with None -> None | Some Refl -> Some Refl end
| Stree _, Stree _ -> None
| Sself, Sself -> Some Refl
| Snext, Snext -> Some Refl
| Stoken p1, Stoken p2 -> L.tok_pattern_eq p1 p2
| Stokens pl1, Stokens pl2 -> tok_pattern_eq_list pl1 pl2
| _ -> None
let is_before : type s1 s2 r1 r2 a1 a2. (s1, r1, a1) ty_symbol -> (s2, r2, a2) ty_symbol -> bool = fun s1 s2 ->
match s1, s2 with
| Stoken p1, Stoken p2 ->
snd (L.tok_pattern_strings p1) <> None
&& snd (L.tok_pattern_strings p2) = None
| Stoken _, _ -> true
| _ -> false
(** Ancillary datatypes *)
type 'a ty_rec = MayRec : mayrec ty_rec | NoRec : norec ty_rec
type ('a, 'b, 'c) ty_and_ex =
| NR00 : (mayrec, mayrec, mayrec) ty_and_ex
| NR01 : (mayrec, norec, mayrec) ty_and_ex
| NR10 : (norec, mayrec, mayrec) ty_and_ex
| NR11 : (norec, norec, norec) ty_and_ex
type ('a, 'b) ty_mayrec_and_ex =
| MayRecNR : ('a, 'b, _) ty_and_ex -> ('a, 'b) ty_mayrec_and_ex
type ('s, 'a) ty_mayrec_symbol =
| MayRecSymbol : ('s, _, 'a) ty_symbol -> ('s, 'a) ty_mayrec_symbol
type ('s, 'a) ty_mayrec_tree =
| MayRecTree : ('s, 'tr, 'a) ty_tree -> ('s, 'a) ty_mayrec_tree
type ('s, 'a, 'r) ty_mayrec_rule =
| MayRecRule : ('s, _, 'a, 'r) ty_rule -> ('s, 'a, 'r) ty_mayrec_rule
type ('self, 'trec, _) ty_symbols =
| TNil : ('self, norec, unit) ty_symbols
| TCns : ('trh, 'trt, 'tr) ty_and_rec * ('self, 'trh, 'a) ty_symbol * ('self, 'trt, 'b) ty_symbols -> ('self, 'tr, 'a * 'b) ty_symbols
(** ('i, 'p, 'f, 'r) rel_prod0 ~
∃ α₁ ... αₙ.
p ≡ αₙ * ... α₁ * 'i ∧
f ≡ α₁ -> ... -> αₙ -> 'r
*)
type ('i, _, 'f, _) rel_prod0 =
| Rel0 : ('i, 'i, 'f, 'f) rel_prod0
| RelS : ('i, 'p, 'f, 'a -> 'r) rel_prod0 -> ('i, 'a * 'p, 'f, 'r) rel_prod0
type ('p, 'k, 'r) rel_prod = (unit, 'p, 'k, 'r) rel_prod0
type ('s, 'tr, 'i, 'k, 'r) any_symbols =
| AnyS : ('s, 'tr, 'p) ty_symbols * ('i, 'p, 'k, 'r) rel_prod0 -> ('s, 'tr, 'i, 'k, 'r) any_symbols
type ('s, 'tr, 'k, 'r) ty_belast_rule =
| Belast : ('trr, 'trs, 'tr) ty_and_rec * ('s, 'trr, 'k, 'a -> 'r) ty_rule * ('s, 'trs, 'a) ty_symbol -> ('s, 'tr, 'k, 'r) ty_belast_rule
(* unfortunately, this is quadratic, but ty_rules aren't too long
* (99% of the time of length less or equal 10 and maximum is 22
* when compiling Coq and its standard library) *)
let rec get_symbols : type s trec k r. (s, trec, k, r) ty_rule -> (s, trec, unit, k, r) any_symbols =
let rec belast_rule : type s trr trs tr a k r. (trr, trs, tr) ty_and_rec -> (s, trr, k, r) ty_rule -> (s, trs, a) ty_symbol -> (s, tr, a -> k, r) ty_belast_rule =
fun ar r s -> match ar, r with
| NoRec2, TStop -> Belast (NoRec2, TStop, s)
| MayRec2, TStop -> Belast (MayRec2, TStop, s)
| NoRec2, TNext (NoRec2, r, s') ->
let Belast (NoRec2, r, s') = belast_rule NoRec2 r s' in
Belast (NoRec2, TNext (NoRec2, r, s), s')
| MayRec2, TNext (_, r, s') ->
let Belast (_, r, s') = belast_rule MayRec2 r s' in
Belast (MayRec2, TNext (MayRec2, r, s), s') in
function
| TStop -> AnyS (TNil, Rel0)
| TNext (MayRec2, r, s) ->
let Belast (MayRec2, r, s) = belast_rule MayRec2 r s in
let AnyS (r, pf) = get_symbols r in
AnyS (TCns (MayRec2, s, r), RelS pf)
| TNext (NoRec2, r, s) ->
let Belast (NoRec2, r, s) = belast_rule NoRec2 r s in
let AnyS (r, pf) = get_symbols r in
AnyS (TCns (NoRec2, s, r), RelS pf)
let get_rec_symbols (type s tr p) (s : (s, tr, p) ty_symbols) : tr ty_rec =
match s with TCns (MayRec2, _, _) -> MayRec
| TCns (NoRec2, _, _) -> NoRec | TNil -> NoRec
let get_rec_tree (type s tr f) (s : (s, tr, f) ty_tree) : tr ty_rec =
match s with Node (MayRec3, _) -> MayRec
| Node (NoRec3, _) -> NoRec | LocAct _ -> NoRec | DeadEnd -> NoRec
let and_symbols_tree (type s trs trt p f) (s : (s, trs, p) ty_symbols) (t : (s, trt, f) ty_tree) : (trs, trt) ty_mayrec_and_ex =
match get_rec_symbols s, get_rec_tree t with
| MayRec, MayRec -> MayRecNR NR00 | MayRec, NoRec -> MayRecNR NR01
| NoRec, MayRec -> MayRecNR NR10 | NoRec, NoRec -> MayRecNR NR11
let and_and_tree (type s tr' trt tr trn trs trb f) (ar : (tr', trt, tr) ty_and_rec) (arn : (trn, trs, trb, trt) ty_and_rec3) (t : (s, trb, f) ty_tree) : (tr', trb, tr) ty_and_rec =
match ar, arn, get_rec_tree t with
| MayRec2, _, MayRec -> MayRec2 | MayRec2, _, NoRec -> MayRec2
| NoRec2, NoRec3, NoRec -> NoRec2
let insert_tree (type s trs trt tr p k a) entry_name (ar : (trs, trt, tr) ty_and_ex) (gsymbols : (s, trs, p) ty_symbols) (pf : (p, k, a) rel_prod) (action : k) (tree : (s, trt, a) ty_tree) : (s, tr, a) ty_tree =
let rec insert : type trs trt tr p f k. (trs, trt, tr) ty_and_ex -> (s, trs, p) ty_symbols -> (p, k, f) rel_prod -> (s, trt, f) ty_tree -> k -> (s, tr, f) ty_tree =
fun ar symbols pf tree action ->
match symbols, pf with
TCns (ars, s, sl), RelS pf ->
(* descent in tree at symbol [s] *)
insert_in_tree ar ars s sl pf tree action
| TNil, Rel0 ->
(* insert the action *)
let node (type tb) ({node = s; son = son; brother = bro} : (_, _, _, tb, _, _) ty_node) =
let ar : (norec, tb, tb) ty_and_ex =
match get_rec_tree bro with MayRec -> NR10 | NoRec -> NR11 in
{node = s; son = son; brother = insert ar TNil Rel0 bro action} in
match ar, tree with
| NR10, Node (_, n) -> Node (MayRec3, node n)
| NR11, Node (NoRec3, n) -> Node (NoRec3, node n)
| NR11, LocAct (old_action, action_list) ->
(* What to do about this warning? For now it is disabled *)
if false then
begin
let msg =
"<W> Grammar extension: " ^
(if entry_name = "" then "" else "in ["^entry_name^"%s], ") ^
"some rule has been masked" in
Feedback.msg_warning (Pp.str msg)
end;
LocAct (action, old_action :: action_list)
| NR11, DeadEnd -> LocAct (action, [])
and insert_in_tree : type trs trs' trs'' trt tr a p f k. (trs'', trt, tr) ty_and_ex -> (trs, trs', trs'') ty_and_rec -> (s, trs, a) ty_symbol -> (s, trs', p) ty_symbols -> (p, k, a -> f) rel_prod -> (s, trt, f) ty_tree -> k -> (s, tr, f) ty_tree =
fun ar ars s sl pf tree action ->
let ar : (trs'', trt, tr) ty_and_rec = match ar with NR11 -> NoRec2
| NR00 -> MayRec2 | NR01 -> MayRec2 | NR10 -> MayRec2 in
match try_insert ar ars s sl pf tree action with
Some t -> t
| None ->
let node ar =
{node = s; son = insert ar sl pf DeadEnd action; brother = tree} in
match ar, ars, get_rec_symbols sl with
| MayRec2, MayRec2, MayRec -> Node (MayRec3, node NR01)
| MayRec2, _, NoRec -> Node (MayRec3, node NR11)
| NoRec2, NoRec2, NoRec -> Node (NoRec3, node NR11)
and try_insert : type trs trs' trs'' trt tr a p f k. (trs'', trt, tr) ty_and_rec -> (trs, trs', trs'') ty_and_rec -> (s, trs, a) ty_symbol -> (s, trs', p) ty_symbols -> (p, k, a -> f) rel_prod -> (s, trt, f) ty_tree -> k -> (s, tr, f) ty_tree option =
fun ar ars symb symbl pf tree action ->
match tree with
Node (arn, {node = symb1; son = son; brother = bro}) ->
(* merging rule [symb; symbl -> action] in tree [symb1; son | bro] *)
begin match eq_symbol symb symb1 with
| Some Refl ->
(* reducing merge of [symb; symbl -> action] with [symb1; son] to merge of [symbl -> action] with [son] *)
let MayRecNR arss = and_symbols_tree symbl son in
let son = insert arss symbl pf son action in
let node = {node = symb1; son = son; brother = bro} in
(* propagate presence of SELF/NEXT *)
begin match ar, ars, arn, arss with
| MayRec2, _, _, _ -> Some (Node (MayRec3, node))
| NoRec2, NoRec2, NoRec3, NR11 -> Some (Node (NoRec3, node)) end
| None ->
let ar' = and_and_tree ar arn bro in
if is_before symb1 symb || derive_eps symb && not (derive_eps symb1) then
(* inserting new rule after current rule, i.e. in [bro] *)
let bro =
match try_insert ar' ars symb symbl pf bro action with
Some bro ->
(* could insert in [bro] *)
bro
| None ->
(* not ok to insert in [bro] or after; we insert now *)
let MayRecNR arss = and_symbols_tree symbl DeadEnd in
let son = insert arss symbl pf DeadEnd action in
let node = {node = symb; son = son; brother = bro} in
(* propagate presence of SELF/NEXT *)
match ar, ars, arn, arss with
| MayRec2, _, _, _ -> Node (MayRec3, node)
| NoRec2, NoRec2, NoRec3, NR11 -> Node (NoRec3, node)
in
let node = {node = symb1; son = son; brother = bro} in
(* propagate presence of SELF/NEXT *)
match ar, arn with
| MayRec2, _ -> Some (Node (MayRec3, node))
| NoRec2, NoRec3 -> Some (Node (NoRec3, node))
else
(* should insert in [bro] or before the tree [symb1; son | bro] *)
match try_insert ar' ars symb symbl pf bro action with
Some bro ->
(* could insert in [bro] *)
let node = {node = symb1; son = son; brother = bro} in
begin match ar, arn with
| MayRec2, _ -> Some (Node (MayRec3, node))
| NoRec2, NoRec3 -> Some (Node (NoRec3, node)) end
| None ->
(* should insert before [symb1; son | bro] *)
None
end
| LocAct (_, _) -> None | DeadEnd -> None
in
insert ar gsymbols pf tree action
let insert_tree_norec (type s p k a) entry_name (gsymbols : (s, norec, p) ty_symbols) (pf : (p, k, a) rel_prod) (action : k) (tree : (s, norec, a) ty_tree) : (s, norec, a) ty_tree =
insert_tree entry_name NR11 gsymbols pf action tree
let insert_tree (type s trs trt p k a) entry_name (gsymbols : (s, trs, p) ty_symbols) (pf : (p, k, a) rel_prod) (action : k) (tree : (s, trt, a) ty_tree) : (s, a) ty_mayrec_tree =
let MayRecNR ar = and_symbols_tree gsymbols tree in
MayRecTree (insert_tree entry_name ar gsymbols pf action tree)
let srules (type self a) (rl : a ty_rules list) : (self, norec, a) ty_symbol =
let rec retype_tree : type s a. (s, norec, a) ty_tree -> (self, norec, a) ty_tree =
function
| Node (NoRec3, {node = s; son = son; brother = bro}) ->
Node (NoRec3, {node = retype_symbol s; son = retype_tree son; brother = retype_tree bro})
| LocAct (k, kl) -> LocAct (k, kl)
| DeadEnd -> DeadEnd
and retype_symbol : type s a. (s, norec, a) ty_symbol -> (self, norec, a) ty_symbol =
function
| Stoken p -> Stoken p
| Stokens l -> Stokens l
| Slist1 s -> Slist1 (retype_symbol s)
| Slist1sep (s, sep, b) -> Slist1sep (retype_symbol s, retype_symbol sep, b)
| Slist0 s -> Slist0 (retype_symbol s)
| Slist0sep (s, sep, b) -> Slist0sep (retype_symbol s, retype_symbol sep, b)
| Sopt s -> Sopt (retype_symbol s)
| Snterm e -> Snterm e
| Snterml (e, l) -> Snterml (e, l)
| Stree t -> Stree (retype_tree t) in
let rec retype_rule : type s k r. (s, norec, k, r) ty_rule -> (self, norec, k, r) ty_rule =
function
| TStop -> TStop
| TNext (NoRec2, r, s) -> TNext (NoRec2, retype_rule r, retype_symbol s) in
let t =
List.fold_left
(fun tree (TRules (symbols, action)) ->
let symbols = retype_rule symbols in
let AnyS (symbols, pf) = get_symbols symbols in
insert_tree_norec "" symbols pf action tree)
DeadEnd rl
in
Stree t
let is_level_labelled n (Level lev) =
match lev.lname with
Some n1 -> n = n1
| None -> false
let insert_level (type s tr p k) entry_name (symbols : (s, tr, p) ty_symbols) (pf : (p, k, Loc.t -> s) rel_prod) (action : k) (slev : s ty_level) : s ty_level =
match symbols with
| TCns (_, Sself, symbols) ->
(* Insert a rule of the form "SELF; ...." *)
let Level slev = slev in
let RelS pf = pf in
let MayRecTree lsuffix = insert_tree entry_name symbols pf action slev.lsuffix in
Level
{assoc = slev.assoc; lname = slev.lname;
lsuffix = lsuffix;
lprefix = slev.lprefix}
| _ ->
(* Insert a rule not starting with SELF *)
let Level slev = slev in
let MayRecTree lprefix = insert_tree entry_name symbols pf action slev.lprefix in
Level
{assoc = slev.assoc; lname = slev.lname; lsuffix = slev.lsuffix;
lprefix = lprefix}
let empty_lev lname assoc =
let assoc =
match assoc with
Some a -> a
| None -> LeftA
in
Level
{assoc = assoc; lname = lname; lsuffix = DeadEnd; lprefix = DeadEnd}
let err_no_level lev e =
let msg = sprintf "Grammar.extend: No level labelled \"%s\" in entry \"%s\"" lev e in
failwith msg
let get_position entry position levs =
match position with
First -> [], levs
| Last -> levs, []
| Before n ->
let rec get =
function
[] -> err_no_level n entry.ename
| lev :: levs ->
if is_level_labelled n lev then [], lev :: levs
else
let (levs1, levs2) = get levs in lev :: levs1, levs2
in
get levs
| After n ->
let rec get =
function
[] -> err_no_level n entry.ename
| lev :: levs ->
if is_level_labelled n lev then [lev], levs
else
let (levs1, levs2) = get levs in lev :: levs1, levs2
in
get levs
let get_level entry name levs = match name with
| Some n ->
let rec get =
function
[] -> err_no_level n entry.ename
| lev :: levs ->
if is_level_labelled n lev then [], lev, levs
else
let (levs1, rlev, levs2) = get levs in lev :: levs1, rlev, levs2
in
get levs
| None ->
begin match levs with
lev :: levs -> [], lev, levs
| [] ->
let msg = sprintf "Grammar.extend: No top level in entry \"%s\"" entry.ename in
failwith msg
end
let change_to_self0 (type s) (type trec) (type a) (entry : s ty_entry) : (s, trec, a) ty_symbol -> (s, a) ty_mayrec_symbol =
function
| Snterm e ->
begin match eq_entry e entry with
| None -> MayRecSymbol (Snterm e)
| Some Refl -> MayRecSymbol (Sself)
end
| x -> MayRecSymbol x
let rec change_to_self : type s trec a r. s ty_entry -> (s, trec, a, r) ty_rule -> (s, a, r) ty_mayrec_rule = fun e r -> match r with
| TStop -> MayRecRule TStop
| TNext (_, r, t) ->
let MayRecRule r = change_to_self e r in
let MayRecSymbol t = change_to_self0 e t in
MayRecRule (TNext (MayRec2, r, t))
type 's add_kw = { add_kw : 'c. 's -> 'c pattern -> 's }
let insert_tokens {add_kw} lstate symbols =
let rec insert : type s trec a. _ -> (s, trec, a) ty_symbol -> _ =
fun lstate -> function
| Slist0 s -> insert lstate s
| Slist1 s -> insert lstate s
| Slist0sep (s, t, _) -> let lstate = insert lstate s in insert lstate t
| Slist1sep (s, t, _) -> let lstate = insert lstate s in insert lstate t
| Sopt s -> insert lstate s
| Stree t -> tinsert lstate t
| Stoken tok -> add_kw lstate tok
| Stokens (TPattern tok::_) ->
(* Only the first token is liable to trigger a keyword effect *)
add_kw lstate tok
| Stokens [] -> assert false
| Snterm _
| Snterml _
| Snext
| Sself -> lstate
and tinsert : type s tr a. _ -> (s, tr, a) ty_tree -> _ =
fun lstate -> function
Node (_, {node = s; brother = bro; son = son}) ->
let lstate = insert lstate s in
let lstate = tinsert lstate bro in
tinsert lstate son
| LocAct _ | DeadEnd -> lstate
and linsert : type s tr p. _ -> (s, tr, p) ty_symbols -> _ =
fun lstate -> function
| TNil -> lstate
| TCns (_, s, r) -> let lstate = insert lstate s in linsert lstate r
in
linsert lstate symbols
type 'a single_extend_statement =
string option * Gramext.g_assoc option * 'a ty_production list
type 'a extend_statement =
| Reuse of string option * 'a ty_production list
| Fresh of Gramext.position * 'a single_extend_statement list
let add_prod add_kw entry (lstate, lev) (TProd (symbols, action)) =
let MayRecRule symbols = change_to_self entry symbols in
let AnyS (symbols, pf) = get_symbols symbols in
let lstate = insert_tokens add_kw lstate symbols in
lstate, insert_level entry.ename symbols pf action lev
let levels_of_rules add_kw lstate entry edata st =
let elev =
match edata.edesc with
Dlevels elev -> elev
| Dparser _ ->
let msg = sprintf "Grammar.extend: entry not extensible: \"%s\"" entry.ename in
failwith msg
in
match st with
| Reuse (name, []) -> lstate, elev
| Reuse (name, prods) ->
let (levs1, lev, levs2) = get_level entry name elev in
let lstate, lev = List.fold_left (fun lev prod -> add_prod add_kw entry lev prod) (lstate, lev) prods in
lstate, levs1 @ [lev] @ levs2
| Fresh (position, rules) ->
let (levs1, levs2) = get_position entry position elev in
let fold (lstate, levs) (lname, assoc, prods) =
let lev = empty_lev lname assoc in
let lstate, lev = List.fold_left (fun lev prod -> add_prod add_kw entry lev prod) (lstate, lev) prods in
lstate, lev :: levs
in
let lstate, levs = List.fold_left fold (lstate, []) rules in
lstate, levs1 @ List.rev levs @ levs2
let logically_eq_symbols entry =
let rec eq_symbols : type s1 s2 trec1 trec2 a1 a2. (s1, trec1, a1) ty_symbol -> (s2, trec2, a2) ty_symbol -> bool = fun s1 s2 ->
match s1, s2 with
Snterm e1, Snterm e2 -> e1.ename = e2.ename
| Snterm e1, Sself -> e1.ename = entry.ename
| Sself, Snterm e2 -> entry.ename = e2.ename
| Snterml (e1, l1), Snterml (e2, l2) -> e1.ename = e2.ename && l1 = l2
| Slist0 s1, Slist0 s2 -> eq_symbols s1 s2
| Slist0sep (s1, sep1, b1), Slist0sep (s2, sep2, b2) ->
eq_symbols s1 s2 && eq_symbols sep1 sep2 && b1 = b2
| Slist1 s1, Slist1 s2 -> eq_symbols s1 s2
| Slist1sep (s1, sep1, b1), Slist1sep (s2, sep2, b2) ->
eq_symbols s1 s2 && eq_symbols sep1 sep2 && b1 = b2
| Sopt s1, Sopt s2 -> eq_symbols s1 s2
| Stree t1, Stree t2 -> eq_trees t1 t2
| Stoken p1, Stoken p2 -> L.tok_pattern_eq p1 p2 <> None
| Stokens pl1, Stokens pl2 -> tok_pattern_eq_list pl1 pl2 <> None
| Sself, Sself -> true
| Snext, Snext -> true
| _ -> false
and eq_trees : type s1 s2 tr1 tr2 a1 a2. (s1, tr1, a1) ty_tree -> (s2, tr2, a2) ty_tree -> bool = fun t1 t2 ->
match t1, t2 with
Node (_, n1), Node (_, n2) ->
eq_symbols n1.node n2.node && eq_trees n1.son n2.son &&
eq_trees n1.brother n2.brother
| LocAct _, LocAct _ -> true
| LocAct _, DeadEnd -> true
| DeadEnd, LocAct _ -> true
| DeadEnd, DeadEnd -> true
| _ -> false
in
eq_symbols
(* [delete_rule_in_tree] returns
[Some (dsl, t)] if success
[dsl] =
Some (list of deleted nodes) if branch deleted
None if action replaced by previous version of action
[t] = remaining tree
[None] if failure *)
type 's ex_symbols =
| ExS : ('s, 'tr, 'p) ty_symbols -> 's ex_symbols
let delete_rule_in_tree entry =
let rec delete_in_tree :
type s tr tr' p r. (s, tr, p) ty_symbols -> (s, tr', r) ty_tree -> (s ex_symbols option * (s, r) ty_mayrec_tree) option =
fun symbols tree ->
match symbols, tree with
| TCns (_, s, sl), Node (_, n) ->
if logically_eq_symbols entry s n.node then delete_son sl n
else
begin match delete_in_tree symbols n.brother with
Some (dsl, MayRecTree t) ->
Some (dsl, MayRecTree (Node (MayRec3, {node = n.node; son = n.son; brother = t})))
| None -> None
end
| TCns (_, s, sl), _ -> None
| TNil, Node (_, n) ->
begin match delete_in_tree TNil n.brother with
Some (dsl, MayRecTree t) ->
Some (dsl, MayRecTree (Node (MayRec3, {node = n.node; son = n.son; brother = t})))
| None -> None
end
| TNil, DeadEnd -> None
| TNil, LocAct (_, []) -> Some (Some (ExS TNil), MayRecTree DeadEnd)
| TNil, LocAct (_, action :: list) -> Some (None, MayRecTree (LocAct (action, list)))
and delete_son :
type s p tr trn trs trb a r. (s, tr, p) ty_symbols -> (s, trn, trs, trb, a, r) ty_node -> (s ex_symbols option * (s, r) ty_mayrec_tree) option =
fun sl n ->
match delete_in_tree sl n.son with
Some (Some (ExS dsl), MayRecTree DeadEnd) -> Some (Some (ExS (TCns (MayRec2, n.node, dsl))), MayRecTree n.brother)
| Some (Some (ExS dsl), MayRecTree t) ->
let t = Node (MayRec3, {node = n.node; son = t; brother = n.brother}) in
Some (Some (ExS (TCns (MayRec2, n.node, dsl))), MayRecTree t)
| Some (None, MayRecTree t) ->
let t = Node (MayRec3, {node = n.node; son = t; brother = n.brother}) in
Some (None, MayRecTree t)
| None -> None
in
delete_in_tree
let rec delete_rule_in_suffix entry symbols =
function
Level lev :: levs ->
begin match delete_rule_in_tree entry symbols lev.lsuffix with
Some (dsl, MayRecTree t) ->
begin match t, lev.lprefix with
DeadEnd, DeadEnd -> levs
| _ ->
let lev =
{assoc = lev.assoc; lname = lev.lname; lsuffix = t;
lprefix = lev.lprefix}
in
Level lev :: levs
end
| None ->
let levs = delete_rule_in_suffix entry symbols levs in
Level lev :: levs
end
| [] -> raise Not_found
let rec delete_rule_in_prefix entry symbols =
function
Level lev :: levs ->
begin match delete_rule_in_tree entry symbols lev.lprefix with
Some (dsl, MayRecTree t) ->
begin match t, lev.lsuffix with
DeadEnd, DeadEnd -> levs
| _ ->
let lev =
{assoc = lev.assoc; lname = lev.lname; lsuffix = lev.lsuffix;
lprefix = t}
in
Level lev :: levs
end
| None ->
let levs = delete_rule_in_prefix entry symbols levs in
Level lev :: levs
end
| [] -> raise Not_found
let delete_rule_in_level_list (type s tr p) (entry : s ty_entry) (symbols : (s, tr, p) ty_symbols) levs =
match symbols with
TCns (_, Sself, symbols) -> delete_rule_in_suffix entry symbols levs
| TCns (_, Snterm e, symbols') ->
begin match eq_entry e entry with
| None -> delete_rule_in_prefix entry symbols levs
| Some Refl ->
delete_rule_in_suffix entry symbols' levs
end
| _ -> delete_rule_in_prefix entry symbols levs
let rec flatten_tree : type s tr a. (s, tr, a) ty_tree -> s ex_symbols list =
function
DeadEnd -> []
| LocAct (_, _) -> [ExS TNil]
| Node (_, {node = n; brother = b; son = s}) ->
List.map (fun (ExS l) -> ExS (TCns (MayRec2, n, l))) (flatten_tree s) @ flatten_tree b
let utf8_string_escaped s =
let b = Buffer.create (String.length s) in
let rec loop i =
if i = String.length s then Buffer.contents b
else
begin
begin match s.[i] with
'"' -> Buffer.add_string b "\\\""
| '\\' -> Buffer.add_string b "\\\\"
| '\n' -> Buffer.add_string b "\\n"
| '\t' -> Buffer.add_string b "\\t"
| '\r' -> Buffer.add_string b "\\r"
| '\b' -> Buffer.add_string b "\\b"
| c -> Buffer.add_char b c
end;
loop (i + 1)
end
in
loop 0
let string_escaped s = utf8_string_escaped s
let print_str ppf s = fprintf ppf "\"%s\"" (string_escaped s)
let print_token b ppf p =
match L.tok_pattern_strings p with
| "", Some s -> print_str ppf s
| con, Some prm -> if b then fprintf ppf "%s@ %a" con print_str prm else fprintf ppf "(%s@ %a)" con print_str prm
| con, None -> fprintf ppf "%s" con
let print_tokens ppf = function
| [] -> assert false
| TPattern p :: pl ->
fprintf ppf "[%a%a]"
(print_token true) p
(fun ppf -> List.iter (function TPattern p -> fprintf ppf ";@ "; print_token true ppf p))
pl
let rec print_symbol : type s tr r. formatter -> (s, tr, r) ty_symbol -> unit =
fun ppf ->
function
| Slist0 s -> fprintf ppf "LIST0 %a" print_symbol1 s
| Slist0sep (s, t, osep) ->
fprintf ppf "LIST0 %a SEP %a%s" print_symbol1 s print_symbol1 t
(if osep then " OPT_SEP" else "")
| Slist1 s -> fprintf ppf "LIST1 %a" print_symbol1 s
| Slist1sep (s, t, osep) ->
fprintf ppf "LIST1 %a SEP %a%s" print_symbol1 s print_symbol1 t
(if osep then " OPT_SEP" else "")
| Sopt s -> fprintf ppf "OPT %a" print_symbol1 s
| Stoken p -> print_token true ppf p
| Stokens [TPattern p] -> print_token true ppf p
| Stokens pl -> print_tokens ppf pl
| Snterml (e, l) ->
fprintf ppf "%s%s@ LEVEL@ %a" e.ename ""
print_str l
| s -> print_symbol1 ppf s
and print_symbol1 : type s tr r. formatter -> (s, tr, r) ty_symbol -> unit =
fun ppf ->
function
| Snterm e -> fprintf ppf "%s%s" e.ename ""
| Sself -> pp_print_string ppf "SELF"
| Snext -> pp_print_string ppf "NEXT"
| Stoken p -> print_token false ppf p
| Stokens [TPattern p] -> print_token false ppf p
| Stokens pl -> print_tokens ppf pl
| Stree t -> print_level ppf pp_print_space (flatten_tree t)
| s ->
fprintf ppf "(%a)" print_symbol s
and print_rule : type s tr p. formatter -> (s, tr, p) ty_symbols -> unit =
fun ppf symbols ->
fprintf ppf "@[<hov 0>";
let rec fold : type s tr p. _ -> (s, tr, p) ty_symbols -> unit =
fun sep symbols ->
match symbols with
| TNil -> ()
| TCns (_, symbol, symbols) ->
fprintf ppf "%t%a" sep print_symbol symbol;
fold (fun ppf -> fprintf ppf ";@ ") symbols
in
let () = fold (fun ppf -> ()) symbols in
fprintf ppf "@]"
and print_level : type s. _ -> _ -> s ex_symbols list -> _ =
fun ppf pp_print_space rules ->
fprintf ppf "@[<hov 0>[ ";
let () =
Format.pp_print_list ~pp_sep:(fun ppf () -> fprintf ppf "%a| " pp_print_space ())
(fun ppf (ExS rule) -> print_rule ppf rule)
ppf rules
in
fprintf ppf " ]@]"
let print_levels ppf elev =
Format.pp_print_list ~pp_sep:(fun ppf () -> fprintf ppf "@,| ")
(fun ppf (Level lev) ->
let rules =
List.map (fun (ExS t) -> ExS (TCns (MayRec2, Sself, t))) (flatten_tree lev.lsuffix) @
flatten_tree lev.lprefix
in
fprintf ppf "@[<hov 2>";
begin match lev.lname with
Some n -> fprintf ppf "%a@;<1 2>" print_str n
| None -> ()
end;
begin match lev.assoc with
LeftA -> fprintf ppf "LEFTA"
| RightA -> fprintf ppf "RIGHTA"
| NonA -> fprintf ppf "NONA"
end;
fprintf ppf "@]@;<1 2>";
print_level ppf pp_force_newline rules)
ppf elev
let print_entry estate ppf e =
fprintf ppf "@[<v 0>[ ";
begin match (get_entry estate e).edesc with
Dlevels elev -> print_levels ppf elev
| Dparser _ -> fprintf ppf "<parser>"
end;
fprintf ppf " ]@]"
let name_of_symbol : type s tr a. s ty_entry -> (s, tr, a) ty_symbol -> string =
fun entry ->
function
Snterm e -> "[" ^ e.ename ^ "]"