Wiki Agenda Contact Version française

String search

A naive algorithm and one using a bad shift table


Authors: Jean-Christophe Filliâtre / Claire Dross

Topics: Strings / Algorithms

Tools: Why3

see also the index (by topic, by tool, by reference, by year)


String search

Author: Jean-Christophe FilliĆ¢tre (CNRS) from a SPARK demo by Claire Dross (Adacore)

module Spec

  use int.Int
  use string.Char
  use string.String

  (* ***** all this to be moved to String? ***** *)
  predicate (==) (x y: string) = eq_string x y

  predicate matches (pat text: string) (p: int) =
    0 <= p <= length text - length pat /\
    substring text p (length pat) == pat
  (* ***** *)

end

module Occurs

  use int.Int
  use mach.int.Int63
  use string.String
  use string.Char
  use string.OCaml
  use Spec

  let occurs (pat text: string) (p: int63) : bool
    requires { 0 <= p <= length text - length pat }
    ensures  { result <-> matches pat text p }
  = let ghost n = length text in
    let m = length pat in
    for i = 0 to m - 1 do
      invariant { substring text p i == substring pat 0 i }
      assert { p + i <= n };
      if not (eq_char text[p + i] pat[i]) then return false
    done;
    true

end

module Naive

  use int.Int
  use mach.int.Int63
  use string.String
  use string.Char
  use string.OCaml
  use Spec
  use Occurs

  let search1 (pat text: string) : int63
    requires { length pat <= length text }
    ensures  { -1 <= result <= length text - length pat }
    ensures  { if result = -1 then
                 forall j. not (matches pat text j)
               else
                 matches pat text result }
  =
    let m = length pat in
    let n = length text in
    for i = 0 to n - m do
      invariant { forall j. 0 <= j < i -> substring text j m <> pat }
      if occurs pat text i then return i;
    done;
    -1

  let search2 (pat text: string) : int63
    requires { length pat <= length text }
    ensures  { -1 <= result <= length text - length pat }
    ensures  { if result = -1 then
                 forall j. not (matches pat text j)
               else
                 matches pat text result }
  =
    let m = length pat in
    let n = length text in
    for i = 0 to n - m do
      invariant { forall j. 0 <= j < i -> substring text j m <> pat }
      if sub text i m = pat then return i;
    done;
   -1

end

module BadShiftTable

  use int.Int
  use mach.int.Int63
  use string.String
  use string.Char
  use string.OCaml
  use Spec
  use Occurs

  clone fmap.MapImp as M with type key = char

  (*
   ------------------------+---+----------------
                           | C |
   -----+-----+---+--------+---+----------------
        | ... | C | ..!C.. |
        +-----+---+--------+
          0                  m
                <----sht c--->
  *)
  type bad_shift_table = {
    pat: string;
    sht: M.t int63;
  }
  invariant { forall j c. 0 <= j < length pat -> c = pat[j] ->
              M.mem c sht }
  invariant { forall c. M.mem c sht -> 1 <= sht c <= length pat + 1 }
  invariant { forall c. M.mem c sht -> forall j. 1 <= j < sht c ->
              pat[length pat - j] <> c }
  by { pat = ""; sht = M.create () }

  let make_table (pat: string) : bad_shift_table
  = let m = length pat in
    let sht = M.create () in
    for i = 0 to m - 1 do
      invariant { forall j c. 0 <= j < i -> c = pat[j] -> M.mem c sht }
      invariant { forall c. M.mem c sht -> 1 <= sht c <= m + 1 }
      invariant { forall c. M.mem c sht -> forall j. m - sht c < j < i ->
                  pat[j] <> c }
      M.add pat[i] (m - i) sht;
    done;
    { pat = pat; sht = sht }

  let lemma shift (bst: bad_shift_table) (text: string) (i: int63)
    requires { 0 <= i <= length text - length bst.pat }
    requires { M.mem text[i + length bst.pat] bst.sht }
    ensures  { forall j. i < j < i + M.find text[i + length bst.pat] bst.sht ->
               j <= length text - length bst.pat ->
               substring text j (length bst.pat) <> bst.pat }
  = let m = String.length bst.pat in
    let c = Char.get text (to_int i + m) in
    let lemma aux (j: int)
      requires { i < j < i + M.find c bst.sht }
      requires { j <= length text - m }
      ensures  { substring text j m <> bst.pat }
    = assert { (substring text j m)[i + m - j] = c };
      assert { bst.pat[m - (j - i)] <> c } in
    ()

  let lemma no_shift (bst: bad_shift_table) (text: string) (i: int63)
    requires { 0 <= i < length text - length bst.pat }
    requires { not (M.mem text[i + length bst.pat] bst.sht) }
    ensures  { forall j. i < j <= i + length bst.pat ->
               j <= length text - length bst.pat ->
               substring text j (length bst.pat) <> bst.pat }
  = let m = String.length bst.pat in
    let c = Char.get text (to_int i + m) in
    assert { forall j. 0 <= j < m -> bst.pat[j] <> c };
    let lemma aux (j: int)
      requires { i < j <= i + m }
      requires { j <= length text - m }
      ensures  { substring text j m <> bst.pat }
    = assert { (substring text j m)[i + m - j] = c } in
    ()

  let search (bst: bad_shift_table) (text: string) : int63
    requires { length bst.pat <= length text }
    ensures  { -1 <= result <= length text - length bst.pat }
    ensures  { if result = -1 then
                 forall j. not (matches bst.pat text j)
               else
                 matches bst.pat text result }
  = let pat = bst.pat in
    let m = length pat in
    let n = length text in
    let ref i = 0 in
    while i <= n - m do
      invariant { 0 <= i <= n }
      invariant { forall j. 0 <= j < i -> j <= n - m ->
                  substring text j m <> pat }
      variant   { n - m - i }
      if occurs pat text i then return i;
      if i = n - m then break;
      let c = text[i + m] in
      i <- i + if M.mem c bst.sht then (shift    bst text i; M.find c bst.sht)
                                  else (no_shift bst text i; m + 1)
    done;
   -1

end


download ZIP archive

Why3 Proof Results for Project "string_search"

Theory "string_search.Occurs": fully verified

ObligationsAlt-Ergo 2.3.0Z3 4.8.6
VC for occurs------
split_vc
integer overflow0.01---
loop invariant init0.03---
assertion---0.02
precondition0.01---
integer overflow0.03---
precondition0.01---
postcondition0.05---
loop invariant preservation------
rewrite <- concat_substring
loop invariant preservation3.49---
rewrite premises0.01---
rewrite premises0.02---
rewrite premises0.01---
rewrite premises0.01---
rewrite premises0.02---
postcondition0.06---
VC for occurs---0.02

Theory "string_search.Naive": fully verified

ObligationsAlt-Ergo 2.3.0Z3 4.8.4Z3 4.8.6
VC for search1---------
split_vc
integer overflow---0.02---
loop invariant init0.01------
precondition------0.02
postcondition0.01------
postcondition0.01------
loop invariant preservation0.01------
postcondition0.02------
postcondition0.01------
out of loop bounds0.01------
VC for search2---------
split_vc
integer overflow---0.03---
loop invariant init0.02------
precondition---------
split_vc
precondition0.01------
precondition0.01------
precondition------0.02
postcondition---------
split_vc
postcondition0.01------
postcondition---------
unfold matches
postcondition---------
split_vc
postcondition------0.02
postcondition0.01------
loop invariant preservation0.02------
postcondition0.01------
postcondition0.02------
out of loop bounds------0.03

Theory "string_search.BadShiftTable": fully verified

ObligationsAlt-Ergo 2.3.0Z3 4.8.6
VC for bad_shift_table0.03---
VC for make_table------
split_vc
integer overflow0.06---
loop invariant init0.02---
loop invariant init0.03---
loop invariant init0.03---
integer overflow0.02---
precondition0.01---
loop invariant preservation0.01---
loop invariant preservation0.11---
loop invariant preservation0.02---
precondition0.01---
precondition0.02---
precondition0.08---
out of loop bounds0.02---
VC for shift------
split_vc
assertion0.04---
assertion0.07---
postcondition0.01---
postcondition------
split_vc
postcondition0.01---
VC for no_shift0.44---
VC for search------
split_vc
loop invariant init0.02---
loop invariant init---0.06
integer overflow---0.03
precondition0.02---
postcondition0.02---
postcondition0.01---
integer overflow---0.04
postcondition0.02---
postcondition0.02---
integer overflow---0.03
precondition0.02---
precondition---0.03
precondition0.02---
precondition0.01---
precondition------
split_vc
precondition0.01---
precondition0.02---
precondition0.02---
integer overflow---0.03
integer overflow0.02---
loop variant decrease0.02---
loop invariant preservation0.03---
loop invariant preservation0.37---
postcondition0.01---
postcondition------
split_vc
postcondition0.01---
postcondition0.01---