Wiki Agenda Contact English version

Maximum subarray problem


Auteurs: Jean-Christophe Filliâtre / Andrei Paskevich / Guillaume Melquiond

Catégories: Ghost code / Array Data Structure

Outils: Why3

see also the index (by topic, by tool, by reference, by year)


Maximum subarray problem

Given an array of integers, find the contiguous subarray with the largest sum. Subarrays of length 0 are allowed (which means that an array with negative values only has a maximal sum of 0).

Authors: Jean-Christophe FilliĆ¢tre (CNRS) Guillaume Melquiond (Inria) Andrei Paskevich (U-PSUD)

module Spec
  use int.Int
  use export array.Array

  use export array.ArraySum
  (* provides [sum a l h] = the sum of a[l..h[ and suitable lemmas *)

  (* s is no smaller than sums of subarrays a[l..h[ with 0 <= l < maxlo *)
  predicate maxsublo (a: array int) (maxlo: int) (s: int) =
    forall l h. 0 <= l < maxlo -> l <= h <= length a -> sum a l h <= s

  (* s is no smaller than sums of subarrays of a *)
  predicate maxsub (a: array int) (s: int) =
    forall l h. 0 <= l <= h <= length a -> sum a l h <= s

end

In all codes below, reference ms stands for the maximal sum found so far and ghost references lo and hi hold the bounds for this sum

module Algo1

Naive solution, in O(N^3)

  use int.Int
  use ref.Refint
  use Spec

  let maximum_subarray (a: array int) (ghost ref lo hi: int): int
    ensures { 0 <= lo <= hi <= length a && result = sum a lo hi }
    ensures { maxsub a result }
  = lo <- 0;
    hi <- 0;
    let n = length a in
    let ref ms = 0 in
    for l = 0 to n-1 do
      invariant { 0 <= lo <= l && lo <= hi <= n && ms = sum a lo hi }
      invariant { maxsublo a l ms }
      for h = l to n do
        invariant { 0 <= lo <= l && lo <= hi <= n && ms = sum a lo hi }
        invariant { maxsublo a l ms }
        invariant { forall h'. l <= h' < h -> sum a l h' <= ms }
        (* compute the sum of a[l..h[ *)
        let ref s = 0 in
        for i = l to h-1 do
          invariant { s = sum a l i }
          invariant { 0 <= lo <= l && lo <= hi <= n && ms = sum a lo hi }
          s += a[i]
        done;
        assert { s = sum a l h };
        if s > ms then begin ms <- s; lo <- l; hi <- h end
      done
    done;
    ms

end

Slightly less naive solution, in O(N^2) Do not recompute the sum, simply update it

module Algo2

  use int.Int
  use ref.Refint
  use Spec

  let maximum_subarray (a: array int) (ghost ref lo hi: int): int
    ensures { 0 <= lo <= hi <= length a && result = sum a lo hi }
    ensures { maxsub a result }
  = lo <- 0;
    hi <- 0;
    let n = length a in
    let ref ms = 0 in
    for l = 0 to n-1 do
      invariant { 0 <= lo <= l && lo <= hi <= n && 0 <= ms = sum a lo hi }
      invariant { maxsublo a l ms }
      let ref s = 0 in
      for h = l+1 to n do
        invariant
                { 0 <= lo <= l && lo <= hi <= n && 0 <= ms = sum a lo hi }
        invariant { maxsublo a l ms }
        invariant { forall h'. l <= h' < h -> sum a l h' <= ms }
        invariant { s = sum a l (h-1) }
        s += a[h-1]; (* update the sum *)
        assert { s = sum a l h };
        if s > ms then begin ms <- s; lo <- l; hi <- h end
      done
    done;
    ms

end

Divide-and-conqueer solution, in O(N log N)

module Algo3

  use int.Int
  use ref.Refint
  use int.ComputerDivision
  use Spec

  let rec maximum_subarray_rec (a: array int) (l h: int) (ghost ref lo hi: int)
    : int
    requires { 0 <= l <= h <= length a }
    ensures  { l <= lo <= hi <= h && result = sum a lo hi }
    ensures  { forall l' h'. l <= l' <= h' <= h -> sum a l' h' <= result }
    variant  { h - l }
  = if h = l then begin
      (* base case: no element at all *)
      lo <- l; hi <- h; 0
    end else begin
      (* at least one element *)
      let mid = l + div (h - l) 2 in
      (* first consider all sums that include a[mid] *)
      lo <- mid; hi <- mid;
      let ref ms = 0 in
      let ref s  = ms in
      for i = mid-1 downto l do
        invariant { l <= lo <= mid = hi && ms = sum a lo hi }
        invariant { forall l'. i < l' <= mid -> sum a l' mid <= ms }
        invariant { s = sum a (i+1) mid }
        s += a[i];
        assert { s = sum a i mid };
        if s > ms then begin ms <- s; lo <- i end
      done;
      assert { forall l'. l <= l' <= mid ->
               sum a l' mid <= sum a lo mid };
      s <- ms;
      for i = mid to h-1 do
        invariant { l <= lo <= mid <= hi <= h && ms = sum a lo hi }
        invariant { forall l' h'. l <= l' <= mid <= h' <= i ->
                    sum a l' h' <= ms }
        invariant { s = sum a lo i }
        s += a[i];
        assert { s = sum a lo (i+1) };
        assert { s = sum a lo mid + sum a mid (i+1) };
        if s > ms then begin ms <- s; hi <- (i+1) end
      done;
      (* then consider sums in a[l..mid[ and a[mid+1..h[, recursively *)
      begin
         let ghost ref lo' = 0 in
         let ghost ref hi' = 0 in
         let left = maximum_subarray_rec a l mid lo' hi' in
         if left > ms then begin ms <- left; lo <- lo'; hi <- hi' end
      end;
      begin
         let ghost ref lo' = 0 in
         let ghost ref hi' = 0 in
         let right = maximum_subarray_rec a (mid+1) h lo' hi' in
         if right > ms then begin ms <- right; lo <- lo'; hi <- hi' end
      end;
      ms
    end

 let maximum_subarray (a: array int) (ghost ref lo hi: int): int
    ensures { 0 <= lo <= hi <= length a && result = sum a lo hi }
    ensures { maxsub a result }
  = maximum_subarray_rec a 0 (length a) lo hi

end

Optimal solution, in O(N) Known as Kadane's algorithm

The key idea is to maintain, in addition to the best sum found so far, the best sum that ends at the current point.

i [ 1 | 7 | -3 | 4 | -7 | 1 | 2 | ... <--------------> | max sum so far is 9 <-----> max sum ending at i is 3

Then, for each new value a[i], we 1. update the sum ending at i (in particular, setting it to 0 if a[i]<0); 2. update the maximal sum.

module Algo4

  use int.Int
  use ref.Refint
  use Spec

  let maximum_subarray (a: array int) (ghost ref lo hi: int): int
    ensures { 0 <= lo <= hi <= length a && result = sum a lo hi }
    ensures { maxsub a result }
  = lo <- 0;
    hi <- 0;
    let n = length a in
    let ref ms = 0 in
    let ghost ref l = 0 in
    let ref s = 0 in
    for i = 0 to n-1 do
      invariant { 0 <= lo <= hi <= i && 0 <= ms = sum a lo hi }
      invariant { forall l' h'. 0 <= l' <= h' <= i -> sum a l' h' <= ms }
      invariant { 0 <= l <= i && s = sum a l i }
      invariant { forall l'. 0 <= l' < i -> sum a l' i <= s }
      if s < 0 then begin s <- a[i]; l <- i end else s += a[i];
      if s > ms then begin ms <- s; lo <- l; hi <- (i+1) end
    done;
    ms

end

A slightly different implementation of Kadane's algorithm

module Algo5

  use int.Int
  use ref.Refint
  use export array.Array
  use export array.ArraySum

(*
    [| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |]
     ......|###### maxsum #######|..............
     ............................. |## curmax ##
*)

  let maximum_subarray (a: array int): int
    ensures { forall l h. 0 <= l <= h <= length a -> sum a l h <= result }
    ensures { exists l h. 0 <= l <= h <= length a /\ sum a l h  = result }
  =
    let ref maxsum = 0 in
    let ref curmax = 0 in
    let ghost ref lo = 0 in
    let ghost ref hi = 0 in
    let ghost ref cl = 0 in
    for i = 0 to a.length - 1 do
      invariant { forall l. 0 <= l <= i -> sum a l i <= curmax }
      invariant { 0 <= cl <= i /\ sum a cl i  = curmax }
      invariant { forall l h. 0 <= l <= h <= i -> sum a l h <= maxsum }
      invariant { 0 <= lo <= hi <= i /\ sum a lo hi = maxsum }
      curmax += a[i];
      if curmax < 0 then begin curmax <- 0; cl <- i+1 end;
      if curmax > maxsum then begin maxsum <- curmax; lo <- cl; hi <- i+1 end
    done;
    maxsum

end

Kadane's algorithm with 63-bit integers

Interestingly, we only have to require all sums to be no greater than max_int. There is no need to require the sums to be no smaller than min_int, since whenever the sum becomes negative it is replaced by the next element.

module BoundedIntegers

  use int.Int
  use mach.int.Int63
  use mach.int.Refint63
  use seq.Seq
  use mach.array.Array63
  use int.Sum

  function sum (a: array int63) (lo hi: int) : int =
    Sum.sum (fun i -> (a[i] : int)) lo hi

  let maximum_subarray (a: array int63) (ghost ref lo hi: int): int63
    requires { [@no overflow] forall l h. 0 <= l <= h <= length a ->
               sum a l h <= max_int }
    ensures { 0 <= lo <= hi <= length a && result  = sum a lo hi }
    ensures { forall l h. 0 <= l <= h <= length a -> result >= sum a lo hi }
  = lo <- 0;
    hi <- 0;
    let n = length a in
    let ref ms = zero in
    let ghost ref l = 0 in
    let ref s = zero in
    let ref i = zero in
    while i < n do
      invariant { 0 <= lo <= hi <= i <= n && 0 <= ms = sum a lo hi }
      invariant { forall l' h'. 0 <= l' <= h' <= i -> sum a l' h' <= ms }
      invariant { 0 <= l <= i && s = sum a l i }
      invariant { forall l'. 0 <= l' < i -> sum a l' i <= s }
      variant   { n - i }
      if s < zero then begin s <- a[i]; l <- to_int i end
      else begin assert { sum a l (i + 1) <= max_int }; s += a[i] end;
      if s > ms then begin
        ms <- s; lo <- l; hi <- to_int i + 1 end;
      incr i
    done;
    ms

end

Variant where we seek for the maximal product instead of the maximal sum.

This is an exercise in Jeff Erickson's book "Algorithms", and the author reports that most solutions he could find online were incorrect. Indeed, this happens to be subtle to get right.

The idea is to maintain *two* maximal products ending at position i, one positive and one negative.

maximum so far is 10 <----> i 3 0 5 2 -1 2 4 1 | ... <----------> maximum positive product so far is 6 <--------------------------> maximum negative product so far is -60

module MaxProd

  use int.Int
  use ref.Refint
  use export array.Array

  let rec function prod (a: array int) (lo hi: int) : int
    requires { 0 <= lo <= hi <= length a }
    variant  { hi-lo }
  = if lo = hi then 1 else prod a lo (hi-1) * a[hi-1]

the product of a[lo..hi[

  let maximum_subarray (a: array int): int
    ensures { forall l h. 0 <= l <= h <= length a -> prod a l h <= result }
    ensures { exists l h. 0 <= l <= h <= length a /\ prod a l h  = result }
  =
    let ref maxprd = 1 in
    let ref curmaxp = 1 in
    let ref curmaxn = 0 in
    let ghost ref lo = 0 in
    let ghost ref hi = 0 in
    let ghost ref clp = 0 in
    let ghost ref cln = 0 in
    for i = 0 to a.length - 1 do
      invariant { 0 <= clp <= i /\ prod a clp i = curmaxp >= 1 }
      invariant { forall l. 0 <= l <= i -> 0 <= prod a l i ->
                    prod a l i <= curmaxp }
      invariant { curmaxn <= 0 }
      invariant { curmaxn < 0 -> 0 <= cln <= i /\ prod a cln i = curmaxn < 0 }
      invariant { curmaxn < 0 -> forall l. 0 <= l <= i -> prod a l i < 0 ->
                    curmaxn <= prod a l i }
      invariant { curmaxn = 0 -> forall l. 0 <= l <= i -> prod a l i >= 0 }
      invariant { forall l h. 0 <= l <= h <= i -> prod a l h <= maxprd }
      invariant { 0 <= lo <= hi <= i /\ prod a lo hi = maxprd >= 1 }
      if a[i] = 0 then (
        curmaxp <- 1; clp <- i+1; curmaxn <- 0; cln <- i+1 )
      else if a[i] > 0 then (
        curmaxp <- curmaxp * a[i];
        curmaxn <- curmaxn * a[i]; )
      else (* a[i] < 0 *)
        if curmaxn < 0 then (
          curmaxp, curmaxn <- curmaxn * a[i], curmaxp * a[i];
          clp, cln <- cln, clp )
        else ( (* curmaxn = 0 i.e. no negative product *)
          curmaxp, curmaxn <- 1, curmaxp * a[i];
          clp, cln <- i+1, clp; )
      ;
      if curmaxp > maxprd then (
        maxprd <- curmaxp; lo <- clp; hi <- i+1
      )
    done;
    maxprd

end

download ZIP archive