added a C implementation of KMP search supporting NULL bytes
This commit is contained in:
parent
ed8c2986e5
commit
f58f9aab73
|
@ -2,4 +2,5 @@ include AUTHORS.rst
|
||||||
include CONTRIBUTING.rst
|
include CONTRIBUTING.rst
|
||||||
include HISTORY.rst
|
include HISTORY.rst
|
||||||
include LICENSE
|
include LICENSE
|
||||||
include README.rst
|
include README.rst
|
||||||
|
include fuzzysearch/kmp.h
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
#include <Python.h>
|
||||||
|
#include "fuzzysearch/kmp.h"
|
||||||
|
|
||||||
|
void preKMP(const char *subseq, int subseq_len, int *kmpNext) {
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
j = kmpNext[0] = -1;
|
||||||
|
while (i != subseq_len) {
|
||||||
|
while (j != -1 && subseq[i] != subseq[j])
|
||||||
|
j = kmpNext[j];
|
||||||
|
i++;
|
||||||
|
j++;
|
||||||
|
kmpNext[i] = (subseq[i] == subseq[j]) ? kmpNext[j] : j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct KMPstate KMP_init(const char *subseq, int subseq_len,
|
||||||
|
const char *seq, int seq_len,
|
||||||
|
int *kmpNext) {
|
||||||
|
struct KMPstate retval = {
|
||||||
|
.sequence_ptr = seq,
|
||||||
|
.sequence_end = seq + seq_len,
|
||||||
|
.subsequence = subseq,
|
||||||
|
.kmpNext = kmpNext,
|
||||||
|
.subseq_index = 0,
|
||||||
|
.subseq_len = subseq_len,
|
||||||
|
};
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* KMP_find_next(struct KMPstate *kmp_state) {
|
||||||
|
const char *sequence_ptr = kmp_state->sequence_ptr;
|
||||||
|
const char *sequence_end = kmp_state->sequence_end;
|
||||||
|
const char *subsequence = kmp_state->subsequence;
|
||||||
|
int *kmpNext = kmp_state->kmpNext;
|
||||||
|
int subseq_index = kmp_state->subseq_index;
|
||||||
|
int subseq_len = kmp_state->subseq_len;
|
||||||
|
|
||||||
|
while (sequence_ptr != sequence_end) {
|
||||||
|
while (subseq_index != -1 && subsequence[subseq_index] != (*sequence_ptr))
|
||||||
|
subseq_index = kmpNext[subseq_index];
|
||||||
|
subseq_index++;
|
||||||
|
sequence_ptr++;
|
||||||
|
if (subseq_index == subseq_len) {
|
||||||
|
kmp_state->subseq_index = kmpNext[subseq_index];
|
||||||
|
kmp_state->sequence_ptr = sequence_ptr;
|
||||||
|
return sequence_ptr - subseq_len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
|
@ -0,0 +1,16 @@
|
||||||
|
struct KMPstate {
|
||||||
|
const char *sequence_ptr;
|
||||||
|
const char *sequence_end;
|
||||||
|
const char *subsequence;
|
||||||
|
int *kmpNext;
|
||||||
|
int subseq_index;
|
||||||
|
int subseq_len;
|
||||||
|
};
|
||||||
|
|
||||||
|
void preKMP(const char *subsequence, int subsequence_len, int *kmpNext);
|
||||||
|
|
||||||
|
struct KMPstate KMP_init(const char *subseq, int subseq_len,
|
||||||
|
const char *seq, int seq_len,
|
||||||
|
int *kmpNext);
|
||||||
|
|
||||||
|
const char* KMP_find_next(struct KMPstate *kmp_state);
|
Loading…
Reference in New Issue