Maximum subarray problem
Auteurs: Jean-Christophe Filliâtre / Andrei Paskevich / Guillaume Melquiond
Catégories: Ghost code / Array Data Structure
Outils: Why3
see also the index (by topic, by tool, by reference, by year)
Maximum subarray problem
Given an array of integers, find the contiguous subarray with the largest sum. Subarrays of length 0 are allowed (which means that an array with negative values only has a maximal sum of 0).
Authors: Jean-Christophe FilliĆ¢tre (CNRS) Guillaume Melquiond (Inria) Andrei Paskevich (U-PSUD)
module Spec use int.Int use export array.Array use export array.ArraySum (* provides [sum a l h] = the sum of a[l..h[ and suitable lemmas *) (* s is no smaller than sums of subarrays a[l..h[ with 0 <= l < maxlo *) predicate maxsublo (a: array int) (maxlo: int) (s: int) = forall l h. 0 <= l < maxlo -> l <= h <= length a -> sum a l h <= s (* s is no smaller than sums of subarrays of a *) predicate maxsub (a: array int) (s: int) = forall l h. 0 <= l <= h <= length a -> sum a l h <= s end
In all codes below, reference ms stands for the maximal sum found so far and ghost references lo and hi hold the bounds for this sum
module Algo1
Naive solution, in O(N^3)
use int.Int use ref.Refint use Spec let maximum_subarray (a: array int) (ghost ref lo hi: int): int ensures { 0 <= lo <= hi <= length a && result = sum a lo hi } ensures { maxsub a result } = lo <- 0; hi <- 0; let n = length a in let ref ms = 0 in for l = 0 to n-1 do invariant { 0 <= lo <= l && lo <= hi <= n && ms = sum a lo hi } invariant { maxsublo a l ms } for h = l to n do invariant { 0 <= lo <= l && lo <= hi <= n && ms = sum a lo hi } invariant { maxsublo a l ms } invariant { forall h'. l <= h' < h -> sum a l h' <= ms } (* compute the sum of a[l..h[ *) let ref s = 0 in for i = l to h-1 do invariant { s = sum a l i } invariant { 0 <= lo <= l && lo <= hi <= n && ms = sum a lo hi } s += a[i] done; assert { s = sum a l h }; if s > ms then begin ms <- s; lo <- l; hi <- h end done done; ms end
Slightly less naive solution, in O(N^2) Do not recompute the sum, simply update it
module Algo2 use int.Int use ref.Refint use Spec let maximum_subarray (a: array int) (ghost ref lo hi: int): int ensures { 0 <= lo <= hi <= length a && result = sum a lo hi } ensures { maxsub a result } = lo <- 0; hi <- 0; let n = length a in let ref ms = 0 in for l = 0 to n-1 do invariant { 0 <= lo <= l && lo <= hi <= n && 0 <= ms = sum a lo hi } invariant { maxsublo a l ms } let ref s = 0 in for h = l+1 to n do invariant { 0 <= lo <= l && lo <= hi <= n && 0 <= ms = sum a lo hi } invariant { maxsublo a l ms } invariant { forall h'. l <= h' < h -> sum a l h' <= ms } invariant { s = sum a l (h-1) } s += a[h-1]; (* update the sum *) assert { s = sum a l h }; if s > ms then begin ms <- s; lo <- l; hi <- h end done done; ms end
Divide-and-conqueer solution, in O(N log N)
module Algo3 use int.Int use ref.Refint use int.ComputerDivision use Spec let rec maximum_subarray_rec (a: array int) (l h: int) (ghost ref lo hi: int) : int requires { 0 <= l <= h <= length a } ensures { l <= lo <= hi <= h && result = sum a lo hi } ensures { forall l' h'. l <= l' <= h' <= h -> sum a l' h' <= result } variant { h - l } = if h = l then begin (* base case: no element at all *) lo <- l; hi <- h; 0 end else begin (* at least one element *) let mid = l + div (h - l) 2 in (* first consider all sums that include a[mid] *) lo <- mid; hi <- mid; let ref ms = 0 in let ref s = ms in for i = mid-1 downto l do invariant { l <= lo <= mid = hi && ms = sum a lo hi } invariant { forall l'. i < l' <= mid -> sum a l' mid <= ms } invariant { s = sum a (i+1) mid } s += a[i]; assert { s = sum a i mid }; if s > ms then begin ms <- s; lo <- i end done; assert { forall l'. l <= l' <= mid -> sum a l' mid <= sum a lo mid }; s <- ms; for i = mid to h-1 do invariant { l <= lo <= mid <= hi <= h && ms = sum a lo hi } invariant { forall l' h'. l <= l' <= mid <= h' <= i -> sum a l' h' <= ms } invariant { s = sum a lo i } s += a[i]; assert { s = sum a lo (i+1) }; assert { s = sum a lo mid + sum a mid (i+1) }; if s > ms then begin ms <- s; hi <- (i+1) end done; (* then consider sums in a[l..mid[ and a[mid+1..h[, recursively *) begin let ghost ref lo' = 0 in let ghost ref hi' = 0 in let left = maximum_subarray_rec a l mid lo' hi' in if left > ms then begin ms <- left; lo <- lo'; hi <- hi' end end; begin let ghost ref lo' = 0 in let ghost ref hi' = 0 in let right = maximum_subarray_rec a (mid+1) h lo' hi' in if right > ms then begin ms <- right; lo <- lo'; hi <- hi' end end; ms end let maximum_subarray (a: array int) (ghost ref lo hi: int): int ensures { 0 <= lo <= hi <= length a && result = sum a lo hi } ensures { maxsub a result } = maximum_subarray_rec a 0 (length a) lo hi end
Optimal solution, in O(N) Known as Kadane's algorithm
The key idea is to maintain, in addition to the best sum found so far, the best sum that ends at the current point.
i [ 1 | 7 | -3 | 4 | -7 | 1 | 2 | ... <--------------> | max sum so far is 9 <-----> max sum ending at i is 3
Then, for each new value a[i], we 1. update the sum ending at i (in particular, setting it to 0 if a[i]<0); 2. update the maximal sum.
module Algo4 use int.Int use ref.Refint use Spec let maximum_subarray (a: array int) (ghost ref lo hi: int): int ensures { 0 <= lo <= hi <= length a && result = sum a lo hi } ensures { maxsub a result } = lo <- 0; hi <- 0; let n = length a in let ref ms = 0 in let ghost ref l = 0 in let ref s = 0 in for i = 0 to n-1 do invariant { 0 <= lo <= hi <= i && 0 <= ms = sum a lo hi } invariant { forall l' h'. 0 <= l' <= h' <= i -> sum a l' h' <= ms } invariant { 0 <= l <= i && s = sum a l i } invariant { forall l'. 0 <= l' < i -> sum a l' i <= s } if s < 0 then begin s <- a[i]; l <- i end else s += a[i]; if s > ms then begin ms <- s; lo <- l; hi <- (i+1) end done; ms end
A slightly different implementation of Kadane's algorithm
module Algo5 use int.Int use ref.Refint use export array.Array use export array.ArraySum (* [| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |] ......|###### maxsum #######|.............. ............................. |## curmax ## *) let maximum_subarray (a: array int): int ensures { forall l h. 0 <= l <= h <= length a -> sum a l h <= result } ensures { exists l h. 0 <= l <= h <= length a /\ sum a l h = result } = let ref maxsum = 0 in let ref curmax = 0 in let ghost ref lo = 0 in let ghost ref hi = 0 in let ghost ref cl = 0 in for i = 0 to a.length - 1 do invariant { forall l. 0 <= l <= i -> sum a l i <= curmax } invariant { 0 <= cl <= i /\ sum a cl i = curmax } invariant { forall l h. 0 <= l <= h <= i -> sum a l h <= maxsum } invariant { 0 <= lo <= hi <= i /\ sum a lo hi = maxsum } curmax += a[i]; if curmax < 0 then begin curmax <- 0; cl <- i+1 end; if curmax > maxsum then begin maxsum <- curmax; lo <- cl; hi <- i+1 end done; maxsum end
Kadane's algorithm with 63-bit integers
Interestingly, we only have to require all sums to be no greater than max_int. There is no need to require the sums to be no smaller than min_int, since whenever the sum becomes negative it is replaced by the next element.
module BoundedIntegers use int.Int use mach.int.Int63 use mach.int.Refint63 use seq.Seq use mach.array.Array63 use int.Sum function sum (a: array int63) (lo hi: int) : int = Sum.sum (fun i -> (a[i] : int)) lo hi let maximum_subarray (a: array int63) (ghost ref lo hi: int): int63 requires { [@no overflow] forall l h. 0 <= l <= h <= length a -> sum a l h <= max_int } ensures { 0 <= lo <= hi <= length a && result = sum a lo hi } ensures { forall l h. 0 <= l <= h <= length a -> result >= sum a lo hi } = lo <- 0; hi <- 0; let n = length a in let ref ms = zero in let ghost ref l = 0 in let ref s = zero in let ref i = zero in while i < n do invariant { 0 <= lo <= hi <= i <= n && 0 <= ms = sum a lo hi } invariant { forall l' h'. 0 <= l' <= h' <= i -> sum a l' h' <= ms } invariant { 0 <= l <= i && s = sum a l i } invariant { forall l'. 0 <= l' < i -> sum a l' i <= s } variant { n - i } if s < zero then begin s <- a[i]; l <- to_int i end else begin assert { sum a l (i + 1) <= max_int }; s += a[i] end; if s > ms then begin ms <- s; lo <- l; hi <- to_int i + 1 end; incr i done; ms end
Variant where we seek for the maximal product instead of the maximal sum.
This is an exercise in Jeff Erickson's book "Algorithms", and the author reports that most solutions he could find online were incorrect. Indeed, this happens to be subtle to get right.
The idea is to maintain *two* maximal products ending at position i, one positive and one negative.
maximum so far is 10 <----> i 3 0 5 2 -1 2 4 1 | ... <----------> maximum positive product so far is 6 <--------------------------> maximum negative product so far is -60
module MaxProd use int.Int use ref.Refint use export array.Array let rec function prod (a: array int) (lo hi: int) : int requires { 0 <= lo <= hi <= length a } variant { hi-lo } = if lo = hi then 1 else prod a lo (hi-1) * a[hi-1]
the product of a[lo..hi[
let maximum_subarray (a: array int): int ensures { forall l h. 0 <= l <= h <= length a -> prod a l h <= result } ensures { exists l h. 0 <= l <= h <= length a /\ prod a l h = result } = let ref maxprd = 1 in let ref curmaxp = 1 in let ref curmaxn = 0 in let ghost ref lo = 0 in let ghost ref hi = 0 in let ghost ref clp = 0 in let ghost ref cln = 0 in for i = 0 to a.length - 1 do invariant { 0 <= clp <= i /\ prod a clp i = curmaxp >= 1 } invariant { forall l. 0 <= l <= i -> 0 <= prod a l i -> prod a l i <= curmaxp } invariant { curmaxn <= 0 } invariant { curmaxn < 0 -> 0 <= cln <= i /\ prod a cln i = curmaxn < 0 } invariant { curmaxn < 0 -> forall l. 0 <= l <= i -> prod a l i < 0 -> curmaxn <= prod a l i } invariant { curmaxn = 0 -> forall l. 0 <= l <= i -> prod a l i >= 0 } invariant { forall l h. 0 <= l <= h <= i -> prod a l h <= maxprd } invariant { 0 <= lo <= hi <= i /\ prod a lo hi = maxprd >= 1 } if a[i] = 0 then ( curmaxp <- 1; clp <- i+1; curmaxn <- 0; cln <- i+1 ) else if a[i] > 0 then ( curmaxp <- curmaxp * a[i]; curmaxn <- curmaxn * a[i]; ) else (* a[i] < 0 *) if curmaxn < 0 then ( curmaxp, curmaxn <- curmaxn * a[i], curmaxp * a[i]; clp, cln <- cln, clp ) else ( (* curmaxn = 0 i.e. no negative product *) curmaxp, curmaxn <- 1, curmaxp * a[i]; clp, cln <- i+1, clp; ) ; if curmaxp > maxprd then ( maxprd <- curmaxp; lo <- clp; hi <- i+1 ) done; maxprd end
download ZIP archive