Wiki Agenda Contact Version française

Regular expression matching using residuals

This example implements and prove correct a function that checks if a string matches a regular expression. The algorithm is a simple one based on computation of residuals.


Authors: Claude Marché

Topics: Semantics of languages / Inductive predicates

Tools: Why3

see also the index (by topic, by tool, by reference, by year)


Decision of regular expression membership

Decision algorithm based on residuals

module Residuals

  type char

  val predicate eq (x y : char)
    ensures { result <-> x = y }

  clone export regexp.Regexp with type char = char

  let rec accepts_epsilon (r:regexp) : bool
    variant { r }
    ensures { result <-> mem Nil r }
  = match r with
    | Empty -> false
    | Epsilon -> true
    | Char _ -> false
    | Alt r1 r2 -> accepts_epsilon r1 || accepts_epsilon r2
    | Concat r1 r2 -> accepts_epsilon r1 && accepts_epsilon r2
    | Star _ -> true
    end

  lemma inversion_mem_star_gen :
    forall c w r w' r'.
      w' = Cons c w /\ r' = Star r ->
      mem w' r' ->
      exists w1 w2. w = w1 ++ w2 /\ mem (Cons c w1) r /\ mem w2 r'

  lemma inversion_mem_star :
    forall c w r. mem (Cons c w) (Star r) ->
       exists w1 w2. w = w1 ++ w2 /\ mem (Cons c w1) r /\ mem w2 (Star r)

  let rec residual (r:regexp) (c:char) : regexp
    variant { r }
    ensures { forall w. mem w result <-> mem (Cons c w) r }
  = match r with
    | Empty -> Empty
    | Epsilon -> Empty
    | Char c' -> if eq c c' then Epsilon else Empty
    | Alt r1 r2 -> Alt (residual r1 c) (residual r2 c)
    | Concat r1 r2 ->
       if accepts_epsilon r1 then
          Alt (Concat (residual r1 c) r2) (residual r2 c)
       else Concat (residual r1 c) r2
    | Star r1 ->
      Concat (residual r1 c) r
    end

residual r c denotes the set of words w such that mem c.w r

  let rec decide_mem (w:word) (r:regexp) : bool
    variant { w }
    ensures { result <-> mem w r }
  = match w with
    | Nil -> accepts_epsilon r
    | Cons c w' -> decide_mem w' (residual r c)
    end

end

module Test

  type char = int

  clone Residuals with type char = char

  let constant a : char = 65

  let constant aa : word = Cons a (Cons a Nil)

  let constant astar : regexp = Star (Char a)

  let test_astar () = decide_mem aa astar

end

module DFA

  type char
  use list.List
  type word = list char

  use map.Map

automaton where states = 0..size-1, init state = 0

  type state = int

  type t =
    { size : int;
      is_final_state : map state bool;
      transition : map (state,char) int;
    }

  function exec (a:t) (q:state) (w:word) : state =
    match w with
    | Nil -> q
    | Cons c r -> let q' = a.transition[q,c] in exec a q' r
    end

  function accepts (a:t) (w:word) : bool =
    a.is_final_state[exec a 0 w]

end

download ZIP archive