diff options
| author | Soikk | 2022-05-28 03:45:54 +0200 |
|---|---|---|
| committer | Soikk | 2022-05-28 03:45:54 +0200 |
| commit | 377dc104be127291ede5b32640c23eea0ba6791a (patch) | |
| tree | 0556ec062d321198580d31b6ce61b6d79fdeabb1 /src/bm.c | |
| parent | f59d888123dd15bf0e70b1966c2c4245bee36b3a (diff) | |
| download | soikk-DB-377dc104be127291ede5b32640c23eea0ba6791a.tar.xz soikk-DB-377dc104be127291ede5b32640c23eea0ba6791a.tar.zst | |
trying boyer-moore
Diffstat (limited to 'src/bm.c')
| -rw-r--r-- | src/bm.c | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/src/bm.c b/src/bm.c new file mode 100644 index 0000000..2962878 --- /dev/null +++ b/src/bm.c @@ -0,0 +1,44 @@ +#include "db.h" + +/* + Implementation of the tuned Boyer-Moore string search algorithm, + as defined here: + http://www-igm.univ-mlv.fr/~lecroq/string/tunedbm.html#SECTION00195 + and here: + http://www-igm.univ-mlv.fr/~lecroq/string/node14.html#SECTION00140 +*/ + +#define ASIZE 128 // alphabet size, need to include most ascii characters + +static void preBmBc(char *x, int m, int bmBc[]){ + for(int i = 0; i < ASIZE; ++i) + bmBc[i] = m; + for(int i = 0; i < m - 1; ++i) + bmBc[x[i]] = m - i - 1; +} + +// x is the needle, y is the haystack +// Should be called TUNEDBM, called BM for simplicity +ssize_t BM(char *x, int m, char *y, int n){ + int j, k, shift, bmBc[ASIZE]; + + /* Preprocessing */ + preBmBc(x, m, bmBc); + shift = bmBc[x[m-1]]; + bmBc[x[m-1]] = 0; + + /* Searching */ + j = 0; + while(j < n){ + k = bmBc[y[j + m -1]]; + while(k != 0){ + j += k; k = bmBc[y[j + m -1]]; + //j += k; k = bmBc[y[j + m -1]]; + //j += k; k = bmBc[y[j + m -1]]; + } + if(memcmp(x, y + j, m) == 0 && j < n) + return j; + j += shift; /* shift */ + } + return -1; +}
\ No newline at end of file |
