From: Soikk <76824648+Soikk@users.noreply.github.com> Date: Sat, 28 May 2022 01:45:54 +0000 (+0200) Subject: trying boyer-moore X-Git-Url: https://git.xolatile.top/?a=commitdiff_plain;h=377dc104be127291ede5b32640c23eea0ba6791a;p=soikk-DB.git trying boyer-moore --- diff --git a/include/bm.h b/include/bm.h new file mode 100644 index 0000000..b59e390 --- /dev/null +++ b/include/bm.h @@ -0,0 +1,10 @@ +#ifndef BM_H +#define BM_H + +#include "db.h" + + +ssize_t BM(char *x, int m, char *y, int n); + + +#endif \ No newline at end of file diff --git a/include/db.h b/include/db.h index a5fca10..3ffcafb 100644 --- a/include/db.h +++ b/include/db.h @@ -15,6 +15,7 @@ #include "parser.h" #include "storage.h" #include "str.h" +#include "bm.h" #endif \ No newline at end of file diff --git a/include/tags.h b/include/tags.h new file mode 100644 index 0000000..b8a62b5 --- /dev/null +++ b/include/tags.h @@ -0,0 +1,23 @@ +#ifndef TAGS_H +#define TAGS_H + +/* + tags are stored in a big string, separated by semicolons (;) + tags can have namespaces, which should come before the tag + and be followed by a colon (:) + should a namespace store more than one tag, the following + tags will be separated by a comma (,) + spaces are only allowed inside tags or namespaces, as part + of themselves + semicolons, colons, and commas are not allowed inside tags + or namespaces + example: "person:ted kaczynsky;mood:serious;meta:jpg" +*/ + +#define MAXTAGS 4094 + +void insertTag(char **tags, char *tag){ + if() +} + +#endif \ No newline at end of file diff --git a/src/bm.c b/src/bm.c new file mode 100644 index 0000000..2962878 --- /dev/null +++ b/src/bm.c @@ -0,0 +1,44 @@ +#include "db.h" + +/* + Implementation of the tuned Boyer-Moore string search algorithm, + as defined here: + http://www-igm.univ-mlv.fr/~lecroq/string/tunedbm.html#SECTION00195 + and here: + http://www-igm.univ-mlv.fr/~lecroq/string/node14.html#SECTION00140 +*/ + +#define ASIZE 128 // alphabet size, need to include most ascii characters + +static void preBmBc(char *x, int m, int bmBc[]){ + for(int i = 0; i < ASIZE; ++i) + bmBc[i] = m; + for(int i = 0; i < m - 1; ++i) + bmBc[x[i]] = m - i - 1; +} + +// x is the needle, y is the haystack +// Should be called TUNEDBM, called BM for simplicity +ssize_t BM(char *x, int m, char *y, int n){ + int j, k, shift, bmBc[ASIZE]; + + /* Preprocessing */ + preBmBc(x, m, bmBc); + shift = bmBc[x[m-1]]; + bmBc[x[m-1]] = 0; + + /* Searching */ + j = 0; + while(j < n){ + k = bmBc[y[j + m -1]]; + while(k != 0){ + j += k; k = bmBc[y[j + m -1]]; + //j += k; k = bmBc[y[j + m -1]]; + //j += k; k = bmBc[y[j + m -1]]; + } + if(memcmp(x, y + j, m) == 0 && j < n) + return j; + j += shift; /* shift */ + } + return -1; +} \ No newline at end of file diff --git a/src/main.c b/src/main.c index ddf12ef..9ca0eb2 100644 --- a/src/main.c +++ b/src/main.c @@ -7,7 +7,18 @@ int main(){ row *r = newRow("~/test/img.png"); - while(1){ + char *str = "grandmother;football;capital;concerned;entire;realize;garden;refused;proud;tune;rhyme;other;writer;command;fresh;fence;rapidly;active;cover;repeat;determine;yard;cannot;animal;pure;rich;mirror;frozen;vast;coach;brass;activity;bottom;airplane;local;tone;attack;though;between;value;collect;mission;tower;brought;original;history;reason;minute;would;hung;strange;children;offer;blue;wrapped;magnet;color;cage;easily;percent;lower;verb;hundred;larger;away;was;certain;western;yes;lack;wish;same;spend;arrive;fog;heard;bill;effort;steam;wolf;indicate;suppose;because;life;down;seat;age;earn;under;cell;floating;although;spent;folks;swing;hello;cent;swung;pen;happened;slip;pupil;smell;fix;piano;closer;idea;trunk;model;school;particularly;he;coast;describe;such;join;been;hard;three;around;tube;soldier;baby;mouse;note;sort;house;gasoline;organized;eat;sat;crowd;alive;spoken;wide;square;luck;tales;angry;having;wear;frog;outer;nice;regular;year;clothing;check;throughout;farmer;dug;dark;exercise;table;your;form;should;personal;use;road;bright;walk;fairly;affect;but;night;close;job;front;fight;beside;ocean;herd;pass;hardly;widely;prepare;nails;paid;lucky;design;grandfather;aid;heavy;truck;sleep;difficult;log;keep;government;headed;mother;sad;bread;voyage;when;happy;making;whistle;plural;guard;therefore;continent;roof;money;pan;unusual;region;special;generally;plate;visit;look;lost;sick;wonderful;farther;put;characteristic;gravity;trap;system;twice;taste;knew;mad;smallest;automobile;return;huge;underline;danger;news;electric;information;breeze;thread;equally;five;new;average;former;wild;spend;cabin;recognize;nearest;circle;such;found;pass;whistle;slave;event;knowledge;fear;friend;am;browserling;cry;length;thy;create;busy;office;earth;blind;smallest;birthday;putting;classroom;pen;southern;summer;put;open;solution;spread;equator;else;kitchen;determine;strong;change;world;pocket;claws;earn;excellent;drove;donkey;rush;band;energy;fighting;hurt;ordinary;native;visitor;give;storm;pressure;imagine;street;engine;worth;hospital;attached;subject;perhaps;hospital;living;waste;dark;natural;change;enter;girl;motor;element;experiment;physical;value;excited;fort;layers;buy;minerals;satisfied;next;spirit;unhappy;storm;angry;science;desk;develop;behind;afraid;act;else;prepare;given;raw;affect;husband;ring;older;brought;book;cow;lake;sides;ago;fill;successful;real;aside;taught;mind;straight;date;very;chart;slabs;thin;saddle;full;sort;heard;surprise;fox;cool;dish;alphabet;early;spring;nest;sometime;date;light;break;lion;difference;rhyme;might;step;teach;potatoes;young;nine;liquid;how;lunch;heavy;mass;being;save;cutting;negative;swimming;cutting;journey;army;none;worry;leave;explore;baseball;fight;road;exact;hay;voyage;sheet;test;right;examine;agree;heart;pig;cannot;tool;hill;changing;bee;find;together;lay;tie;lost;continued;then;came;rhyme;mirror;town;substance;both;up;quite;push;shake;solid;result;you;ought;chicken;waste;freedom;why;somehow;not;complete;sick;struggle;military;pure;top;south;step;education;could;between;familiar;recognize;rich;tool;material;were;chicken;stopped;stay;policeman;round;firm"; + int l = len(str); + printf("len: %d\n", l); + + + //measure these two + int a = strInTags(str, l, "percent", 7, ';'); + //int a = BM("percent", 7, str, l); + + + + while(0){ prompt(); getInput(in); diff --git a/src/storage.c b/src/storage.c index 9b66132..868e0b2 100644 --- a/src/storage.c +++ b/src/storage.c @@ -166,4 +166,4 @@ void removeTag(row *r, char *tag){ r->tags[tagnum] = '\0'; r->numTags = l; r->lenTags = tagnum; -} +} \ No newline at end of file