Skip to content

Commit

Permalink
Support for DFA minimization
Browse files Browse the repository at this point in the history
  • Loading branch information
yallop committed Dec 6, 2018
1 parent 786fd88 commit e7b9ca9
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 6 deletions.
19 changes: 15 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
This repository provides a library and executable for converting
regular expressions into nondeterministic finite automata (NFAs) using
[Glushkov's construction][glushkov], for converting NFAs into DFAs
using the [powerset construction][powerset], and for formatting the
NFAs using [DOT][DOT] so that they can be displayed using [graphviz][graphviz].
using the [powerset construction][powerset], for minimizing DFAs using
[Brzozowski's algorithm][brzozowski] and for formatting the NFAs using
[DOT][DOT] so that they can be displayed using [graphviz][graphviz].

### Online demo

Expand Down Expand Up @@ -38,12 +39,16 @@ digraph {
}
```

To display the corresponding DFA, pass the `-type` argument:
To display the corresponding DFA or minimized DFA, pass the `-type` argument:

```
re-nfa -type dfa "a*b"
```

```
re-nfa -type dfa-minimized "a*b"
```

On a Unix system you might pipe the output directly to `dot`, and then
on to [`display`][display], like this:

Expand All @@ -55,6 +60,10 @@ to display the following graph:

![a*b](/images/astarb.png)

Here is the minimized version:

![a*b](/images/astarb-minimized.png)

Here is a more complex graph constructed from the regex `a?a?a?aaa` that causes pathological backtracking behaviour in some engines, as described in Russ Cox's article [Regular Expression Matching Can Be Simple And Fast][simple-and-fast]:

![a?a?a?aaa](/images/aqaqaqaaa.png)
Expand Down Expand Up @@ -99,11 +108,12 @@ val format_digraph : Format.formatter -> digraph -> unit
```

The [`Dfa`][dfa] module provides functions for converting between NFAs and DFAs,
and an `accept` function for DFAs
a DFA minimization function, and and an `accept` function for DFAs

```ocaml
val determinize : Nfa.nfa -> dfa
val inject : dfa -> Nfa.nfa
val minimize : dfa -> dfa
val accept : dfa -> char list -> bool
```

Expand Down Expand Up @@ -164,3 +174,4 @@ A [ReasonML port of this project][reason-port] is available.
[reason-port]: https://github.com/joelonsql/reason-re-nfa
[joelonsql]: https://github.com/joelonsql
[powerset]: https://en.wikipedia.org/wiki/Powerset_construction
[brzozowski]: https://dl.acm.org/citation.cfm?id=2526104
Binary file added images/astarb-minimized.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
43 changes: 43 additions & 0 deletions lib/dfa.ml
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,51 @@ type dfa = {
next state *)
}

let fold_states : 'a. (state -> 'a -> 'a) -> dfa -> 'a -> 'a =
fun f dfa init ->
let v = ref init in
let seen = Hashtbl.create 10 in
let rec visit state =
if not (Hashtbl.mem seen state) then begin
v := f state !v;
Hashtbl.add seen state ();
CharMap.iter (fun _ -> visit) (dfa.next state)
end
in visit dfa.start; !v

let fold_transitions: 'a. (state * char * state -> 'a -> 'a) -> dfa -> 'a -> 'a =
fun f dfa init ->
fold_states
(fun src v -> CharMap.fold (fun c dst -> f (src, c, dst)) (dfa.next src) v)
dfa init

(** Add src--c-->dst to the transition set, replacing any existing src--c-->dst' *)
let add_transition (src, c, dst) trans =
match StateMap.find src trans with
| exception Not_found -> StateMap.add src (CharMap.singleton c dst) trans
| cm -> StateMap.add src (CharMap.add c dst cm) trans

(** Add src--c-->dst to the transition set, augmenting any existing src--c-->dst' *)
let add_transition' (src, c, dst) trans =
match StateMap.find src trans with
| exception Not_found ->
StateMap.add src (CharMap.singleton c (StateSet.singleton dst)) trans
| cm -> let dstset = match CharMap.find c cm with
| exception Not_found -> StateSet.singleton dst
| dstset -> StateSet.add dst dstset
in StateMap.add src (CharMap.add c dstset cm) trans

(** Build an NFA by reversing a DFA, inverting transition arrows,
turning finals states into start states, and the start state into
the final state *)
let reverse dfa =
let map =
fold_transitions (fun (s, c, t) -> add_transition' (t, c,s)) dfa StateMap.empty
in
{ Nfa.start = dfa.finals;
Nfa.finals = StateSet.singleton dfa.start;
next = fun s -> try StateMap.find s map with Not_found -> CharMap.empty }

(** Available transitions from a set of states *)
let transitions states nfa =
StateSet.fold (fun s m ->
Expand Down Expand Up @@ -63,6 +102,10 @@ let determinize : Nfa.nfa -> dfa =
let next s = try StateMap.find s trans with Not_found -> CharMap.empty in
{ start; finals; next }

(** Brzozowski's DFA minimization algorithm:
reverse DFA to build an NFA and determinize, then do the same again *)
let minimize g = determinize (reverse (determinize (reverse g)))

let inject { start; finals; next } =
{ Nfa.start = Nfa.StateSet.singleton start;
finals;
Expand Down
5 changes: 4 additions & 1 deletion lib/dfa.mli
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ type dfa = {
next state *)
}

val minimize : dfa -> dfa
(** [minimize dfa] is a minimized dfa equivalent to the dfa [dfa],
obtained via Brzozowski's algorithm *)

val accept : dfa -> char list -> bool
(** [accept dfa l] is [true] iff the dfa [dfa] accepts the
character sequence [l] *)
Expand All @@ -26,4 +30,3 @@ val determinize : Nfa.nfa -> dfa

val inject : dfa -> Nfa.nfa
(** [inject dfa] is the deterministic NFA corresponding to [dfa] *)

13 changes: 13 additions & 0 deletions lib_test/tests.ml
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ let nfa_accept r s = Nfa.accept r (explode s)
let dfa_accept r s = Dfa.accept r (explode s)

let dfa_compile r = Dfa.determinize (Regex.compile r)
let dfa_minimize_compile r = Dfa.minimize (dfa_compile r)

let () =
begin
Expand All @@ -161,6 +162,12 @@ let () =
~accept:dfa_accept);
print_endline "OK!";

print_string "testing DFA-minimzied acceptance..."; flush stdout;
test (string_matcher
~compile:dfa_minimize_compile
~accept:dfa_accept);
print_endline "OK!";

print_string "regenerate tests (NFA)..."; flush stdout;
regenerate_tests (combinator_matcher
~compile:Regex.compile
Expand All @@ -172,4 +179,10 @@ let () =
~compile:dfa_compile
~accept:dfa_accept);
print_endline "OK!";

print_string "regenerate tests (DFA-minimized)..."; flush stdout;
regenerate_tests (combinator_matcher
~compile:dfa_minimize_compile
~accept:dfa_accept);
print_endline "OK!";
end
7 changes: 6 additions & 1 deletion src/re-nfa-command.ml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ let typ = ref "nfa"

let spec =
[("-type",
Arg.Symbol (["nfa"; "dfa"],
Arg.Symbol (["nfa"; "dfa"; "dfa-minimized"],
(:=) typ),
"Output type")]

Expand Down Expand Up @@ -34,4 +34,9 @@ let () =
let dfa = Dfa.determinize nfa in
let digraph = Nfa_dot.digraph_of_nfa (Dfa.inject dfa) in
Format.printf "%a@." Nfa_dot.format_digraph digraph;
| Some r, "dfa-minimized" ->
let nfa = Regex.compile (parse_re r) in
let dfa = Dfa.minimize (Dfa.determinize nfa) in
let digraph = Nfa_dot.digraph_of_nfa (Dfa.inject dfa) in
Format.printf "%a@." Nfa_dot.format_digraph digraph;
| _ -> Arg.usage spec usage

0 comments on commit e7b9ca9

Please sign in to comment.