KMP本身不是特别难理解,但计算next数组的函数实现起来可能不太好理解。
借助DFA,或许会让一些还没弄明白KMP的同学加深一下理解,说不定瞬间就通透了。
另外,DFA实现起来也更容易,但是相比传统的KMP,自动机实现的KMP效率要低许多。
先占个坑,代码先放一下,有空再来更新,实在是没时间写
#include<iostream>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<ctime>
using namespace std;
int dfa[3][256];
void BuildDFA(char* p) {
dfa[0][p[0]] = 1;
int x = 0;
for(int i=1; i < 3; i++) {
for(int j = 0; j < 256; j++) {
if(p[i] == j) {
dfa[i][j] = i + 1;
} else {
dfa[i][j] = dfa[x][j];
}
}
x = dfa[x][p[i]];
}
}
int DFA_KMP(char* s, char* p) {
int s_length = strlen(s);
int p_length = strlen(p);
BuildDFA(p);
int j = 0;
for(int i=0; i < s_length; i++) {
j = dfa[j][s[i]];
if(j == p_length) return i - p_length + 1;
}
return -1;
}
void get_next(const char *p, int *next) {
next[0] = -1;
int i = 0;
int j = -1;
while (i < (strlen(p) - 1)) {
if ((j == -1) || (p[i] == p[j])) {
++ i;
++ j;
next[i] = j;
}
else {
j = next[j];
}
}
}
int kmp(char *t, char *p) {
if ((t == NULL) || (p == NULL)) {
return -1;
}
int i = 0;
int j = 0;
int t_len = strlen(t);
int p_len = strlen(p);
int *next = (int *)malloc(sizeof(int)*(p_len));
get_next(p, next);
while ((i < t_len) && (j < p_len)) {
if ((j == -1) || (t[i] == p[j])) {
++ i;
++ j;
}
else {
j = next[j];
}
}
if (next != NULL) {
free(next);
next = NULL;
}
if (j == p_len) {
return (i - p_len);
}
else {
return -1;
}
}
int main() {
char* s = "abcdbcdda";
char* p = "cdd";
clock_t start1, end1;
clock_t start2, end2;
int n = 10000;
start1 = clock();
for(int i = 0; i < n; i++)
DFA_KMP(s, p);
end1 = clock();
start2 = clock();
for(int i = 0; i < n; i++)
kmp(s, p);
end2 = clock();
cout << "DFA:" << (double)(end1 - start1) << endl
<< "Common:" << (double)(end2 - start2);
return 0;
}
Comments | NOTHING