Knuth-Morris-Pratt algorithm
From CodeCodex
The Knuth-Morris-Pratt algorithm for string matching runs in O(m + n) time, where m is the length of the pattern and n is the length of the text to be searched. Compare this with the brute-force search time of O(mn) or the Boyer-Moore algorithm worst-case time of O(m + n).
Contents |
[edit] Implementations
[edit] C
#include <stdio.h>
#include <stdlib.h>
const char *kmp_search(const char *text, const char *pattern)
{
int *T;
int i, j;
const char *result = NULL;
if (pattern[0] == '\0')
return text;
/* Construct the lookup table */
T = malloc((strlen(pattern)+1) * sizeof *T);
T[0] = -1;
for (i=0; pattern[i] != '\0'; i++) {
T[i+1] = T[i] + 1;
while (T[i+1] > 0 && pattern[i] != pattern[T[i+1]-1])
T[i+1] = T[T[i+1]-1] + 1;
}
/* Perform the search */
for (i=j=0; text[i] != '\0'; ) {
if (j < 0 || text[i] == pattern[j]) {
++i, ++j;
if (pattern[j] == '\0') {
result = text+i-j;
break;
}
}
else j = T[j];
}
free(T);
return result;
}
[edit] Java
int searchKMP(char[] w, char[] s, int[] t)
{
int m=0;
int i=0;
while( ((m + i) < s.length) && (i<w.length) )
{
if(s[m+i] == w[i]) i++;
else
{
m += i - t[i];
if (i > 0) i = t[i];
i++;
}
}
if(i == w.length) return m;
else return -1;
}
[edit] Javascript
function kmp_search(s, w) {
var m = 0, i = 0,
pos, cnd, t,
slen = s.length,
wlen = w.length;
/* String to array conversion */
s = s.split("");
w = w.split("");
/* Construct the lookup table */
t = [-1, 0];
for ( pos = 2, cnd = 0; pos < wlen; ) {
if ( w[pos-1] === w[cnd] ) {
t[pos] = cnd + 1;
pos++; cnd++;
}
else if ( cnd > 0 )
cnd = t[cnd];
else
t[pos++] = 0;
}
/* Perform the search */
while ( m + i < slen ) {
if ( s[m+i] === w[i] ) {
i++;
if ( i === wlen )
return m;
}
else {
m += i - t[i];
if ( t[i] > -1 )
i = t[i];
else
i = 0;
}
}
return -1;
}
[edit] Ada (83 and 95)
-- pattern_match_knuth_morris_pratt_test.adb an implementation for fixed strings
-- Written by Wikibob, 2004, from notes on the Knuth_Morris_Pratt pattern match algorithm
-- adapted to fixed strings of characters.
-- It is in the public domain.
-- If you are using GNAT, use gnatmake to compile and link this program.
-- To use the pattern match functions in your own software extract
-- the inner package's specification and body into separate files.
-- This program is self-contained and demonstrates a particular
-- implementation of the Knuth_Morris_Pratt algorithm applied to
-- fixed strings, with the following restrictions:
-- * the search pattern is limited to a maximum of 256 characters
-- * the caller must first call the function Pre_Compute on a pattern
-- to obtain a context variable containing the pre-computed pattern.
-- There is no limit to the number of contexts.
-- * the caller must handle the exception Pattern_Error that will
-- be raised if function Find_Location was unable to find the
-- pattern in the given string.
-- Suggested improvements to the inner package are:
-- * add type Result_T is record Location : Index; Found : Boolean; end record;
-- and use it instead of raising Pattern_Error.
-- * produce a version that dispenses with the Context and has Find_Location
-- perform the Pre_process internally.
-- References: http://ww0.java3.datastructures.net/handouts/PatternMatching.pdf
procedure Pattern_Match_Knuth_Morris_Pratt_Fixed_Test is
-- You may extract this spec into file pattern_match.ads
package Pattern_Match is
Max_Pattern_Length : constant Positive := 256;
type Context is private;
function Pre_Compute (Pattern : in String) return Context;
-- precomputes the table of skips for the Pattern.
function Find_Location (Of_Context : in Context;
In_Text : in String) return Positive;
Pattern_Error : exception;
-- alternative is return Natural and use 0 to mean not found.
private
subtype Pattern_Length_T is Positive range 1..Max_Pattern_Length;
type Failure_Function_T is array (Pattern_Length_T) of Positive;
subtype Slided_Pattern_T is String (1 .. Max_Pattern_Length);
type Context is record
Failure_Function : Failure_Function_T;
M_Pattern : Slided_Pattern_T;
Pattern_Length : Positive;
end record;
end Pattern_Match;
-- Variables and data for testing.
IFPLID_Context : Pattern_Match.Context;
SRC_Context : Pattern_Match.Context;
Text_Test1 : constant String := "IMCHG DLH5877 -BEGIN ADDR -IFPLID AT05428113 -SRC FPL -RFL F330";
Text_Test2 : constant String := "IMCHG DLH5877 EDDKCLHD -BEGIN ADDR -FAC CFMUTACT AA05428113 FPL -STAR WLD5M -SRC ";
IFPLID_Pos : Positive;
IFPLID_Pos_2 : Positive := 1;
SRC_Pos : Positive;
SRC_Pos_2 : Positive;
-- You may extract this spec into file pattern_match.adb
package body Pattern_Match is
function Pre_Compute (Pattern : in String) return Context is
I, J : Positive;
Pattern_Context : Context;
begin
if Pattern = "" then
raise Pattern_Error;
end if;
Pattern_Context.M_Pattern (1..Pattern'Length) := Pattern;
Pattern_Context.Pattern_Length := Pattern'Length;
Pattern_Context.Failure_Function (1) := 1;
I := 2;
J := 1;
while I <= Pattern_Context.Pattern_Length loop
if Pattern (I) = Pattern (J) then
-- we have matched J + 1 chars.
Pattern_Context.Failure_Function (I) := J + 1;
I := I + 1;
J := J + 1;
elsif J > 1 then
-- use failure function to shift Pattern
J := Pattern_Context.Failure_Function (J - 1);
else
Pattern_Context.Failure_Function (I) := 1;
I := I + 1;
end if;
end loop;
return Pattern_Context;
end Pre_Compute;
function Find_Location (Of_Context : in Context;
In_Text : in String) return Positive is
subtype Slided_Text_T is String (1 .. In_Text'Length);
Slided_Text : constant Slided_Text_T := Slided_Text_T (In_Text);
I, J : Positive;
begin
I := 1;
J := 1;
while I <= Slided_Text'Last loop
if Slided_Text (I) = Of_Context.M_Pattern (J) then
if J = Of_Context.Pattern_Length then
return I - J + 1;
else
I := I + 1;
J := J + 1;
end if;
elsif J > 1 then
J := Of_Context.Failure_Function (J - 1);
else
I := I + 1;
end if;
end loop;
raise Pattern_Error;
-- Or change function to return Natural and return 0.
end Find_Location;
end Pattern_Match;
-- You may extract the rest of this file into file pattern_match_test.adb
-- and modify accordingly.
procedure Check_Pattern_Found (Pattern : in String;
At_Location : in Positive;
In_Text : in String) is
subtype Slided_Text_T is String (1 .. Pattern'Length);
Slided_Pattern : constant Slided_Text_T := Slided_Text_T (Pattern);
begin
if At_Location > In_Text'Last or else
At_Location + Pattern'Length - 1 > In_Text'Last or else
Slided_Text_T (In_Text (At_Location .. At_Location + Pattern'Length - 1)) /= Slided_Pattern
then
-- We expected Find_Location to return the location of the pattern, as it did not there is a program error.
raise Program_Error;
end if;
end Check_Pattern_Found;
begin
IFPLID_Context := Pattern_Match.Pre_Compute ("-IFPLID ");
SRC_Context := Pattern_Match.Pre_Compute ("-SRC ");
Expect_Pattern_Found:
begin
IFPLID_Pos := Pattern_Match.Find_Location (Of_Context => IFPLID_Context,
In_Text => Text_Test1);
exception
when Pattern_Match.Pattern_Error =>
-- We expected Find_Location to find the pattern, but it did not so there is a program error.
raise Program_Error;
end Expect_Pattern_Found;
Check_Pattern_Found (Pattern => "-IFPLID ",
At_Location => IFPLID_Pos,
In_Text => Text_Test1);
Expect_Pattern_Not_Found:
begin
IFPLID_Pos_2 := Pattern_Match.Find_Location (Of_Context => IFPLID_Context,
In_Text => Text_Test2);
-- We expected Find_Location to NOT find the pattern, but it did so there is a program error.
raise Program_Error;
exception
when Pattern_Match.Pattern_Error =>
-- We expected Find_Location to NOT find the pattern, and it did not so there is no error.
null;
end Expect_Pattern_Not_Found;
if IFPLID_Pos_2 /= 1 then
-- We expected Find_Location to NOT return a result, so there is a program error.
raise Program_Error;
end if;
Expect_Second_Pattern_Found:
begin
SRC_Pos := Pattern_Match.Find_Location (Of_Context => SRC_Context,
In_Text => Text_Test1);
exception
when Pattern_Match.Pattern_Error =>
-- We expected Find_Location to find the pattern, but it did not so there is a program error.
raise Program_Error;
end Expect_Second_Pattern_Found;
Check_Pattern_Found (Pattern => "-SRC ",
At_Location => SRC_Pos,
In_Text => Text_Test1);
Expect_Second_Pattern_Found_At_End:
begin
SRC_Pos_2 := Pattern_Match.Find_Location (Of_Context => SRC_Context,
In_Text => Text_Test2);
exception
when Pattern_Match.Pattern_Error =>
-- We expected Find_Location to find the pattern, but it did not so there is a program error.
raise Program_Error;
end Expect_Second_Pattern_Found_At_End;
Check_Pattern_Found (Pattern => "-SRC ",
At_Location => SRC_Pos_2,
In_Text => Text_Test2);
end Pattern_Match_Knuth_Morris_Pratt_Fixed_Test;
[edit] OCaml
# let init_next p =
let m = String.length p in
let next = Array.make m 0 in
let i = ref 1 and j = ref 0 in
while !i < m - 1 do
if p.[!i] = p.[!j] then begin incr i; incr j; next.(!i) <- !j end else
if !j = 0 then begin incr i; next.(!i) <- 0 end else j := next.(!j)
done;
next;;
val init_next : string -> int array = <fun>
# let kmp p =
let next = init_next p and m = String.length p in
fun s ->
let n = String.length s and i = ref 0 and j = ref 0 in
while !j < m && !i < n do
if s.[!i] = p.[!j] then begin incr i; incr j end else
if !j = 0 then incr i else j := next.(!j)
done;
if !j >= m then !i - m else raise Not_found;;
val kmp : string -> string -> int = <fun>
[edit] Perl
Klortho says: #11904 Who the fu​ck cares which one is faster?
index $string, $pattern;
[edit] Scheme
(define (init-next p)
(let* ((m (string-length p))
(next (make-vector m 0)))
(let loop ((i 1) (j 0))
(cond ((>= i (- m 1))
next)
((char=? (string-ref p i) (string-ref p j))
(let ((i (+ i 1))
(j (+ j 1)))
(vector-set! next i j)
(loop i j)))
((= j 0)
(let ((i (+ i 1)))
(vector-set! next i 0)
(loop i j)))
(else
(loop i (vector-ref next j)))))))
(define (kmp p s)
(let ((next (init-next p))
(m (string-length p))
(n (string-length s)))
(let loop ((i 0) (j 0))
(cond ((or (>= j m) (>= i n))
(- i m))
((char=? (string-ref s i) (string-ref p j))
(loop (+ i 1) (+ j 1)))
((= j 0)
(loop (+ i 1) j))
(else
(loop i (vector-ref next j)))))))

