]> git.xolatile.top Git - soikk-DB.git/commitdiff
Added ref count for files & tags. Refactored database.c. Removed tags.h but kept...
authorSoikk <76824648+Soikk@users.noreply.github.com>
Sun, 24 Jul 2022 20:18:31 +0000 (22:18 +0200)
committerSoikk <76824648+Soikk@users.noreply.github.com>
Sun, 24 Jul 2022 20:18:31 +0000 (22:18 +0200)
DOC
TODO
include/database.h
include/storage.h
include/str.h
include/tags.h [deleted file]
src/bm.c
src/database.c
src/main.c

diff --git a/DOC b/DOC
index d5ef900d837afc29864fa6c1a79e7a96bb752b80..6f1f195fa719ae36534e08ed0b74af343bfa0ac4 100644 (file)
--- a/DOC
+++ b/DOC
@@ -73,18 +73,22 @@ DATABASE
        A database consists of a 32 character (including trailing 0) name, and:
                - 2 lookup tables (ltable) for storing the unique file and tag names.
                - 2 hash tables (htable) for storing the hashes of the names in the ltables.
+               - 2 hash tables (htable) for storing the count of the files & tags in the mapping table (how many files one tags has and vice versa).
                - 1 mapping tables (mtable) for storing the mappings of the tags to the files.
        The lookup tables serve the purpose of looking up the names of the files and tags when needed.
-       The hash tables serve the purpose of providing faster search times when searching for a file or tag.
-       Each respective lookup and hash tables (lfiles and hfiles, ltags and htags) share indexes.
+       The first 2 hash tables serve the purpose of providing faster search times when searching for a file or tag.
+       The remaining 2 hash tables (fcount and tcount) serve the purpose of storing the count of how many of each file and tag is in the mapping table.
+       Each respective lookup and hash tables (lfiles, hfiles and fcount, ltags, htags and hcount) share indexes.
        The mapping table serves the purpose of storing the relation between different files and tags as the pairing of their indexes.
-       A databbase is written to disk in the following format:
+       A database is written to disk in the following format:
                - 2 bytes as a header that store the 'DB' ASCII characters.
                - 32 bytes that store the name of the database.
                - The lfiles ltable.
                - The ltags ltable.
                - The hfiles htable.
                - The htags htable.
+               - The fcount htable;
+               - The tcount htable;
                - The map mtable.
                - 3 bytes as "end" that store the 'END'"' ASCII characters.
        When loading a database, if the header doesn't match, it will print "Header is '(header)' not 'DB'" to standard error.
diff --git a/TODO b/TODO
index 309e76d85372955a28451358e28e0a8061aa3090..e9347763977cd19af81700829fad5562f9720f19 100644 (file)
--- a/TODO
+++ b/TODO
@@ -1,3 +1,5 @@
+TODO   Get rid of old functionalities (strnatcmp, BM)
+
 ----------------------------------------------------------------
 DONE   Change DB model from struct row
                        typedef struct{
index 2b1c91a6f825e9c45b0008822e4622236809c610..339b48bea7e4eb230e627571269b30ce14021f74 100644 (file)
@@ -8,6 +8,7 @@ typedef struct database{
        char name[32];
        ltable *lfiles, *ltags;
        htable *hfiles, *htags;
+       htable *fcount, *tcount;
        mtable *map;
 } database;
 
@@ -18,11 +19,17 @@ database *loadDatabase(const char* path);
 
 int storeDatabase(database *db, const char *path);
 
+uint64_t addFile(database *db, char *file);
+
+uint64_t addTag(database *db, char *tag);
+
 int addFileTag(database *db, char *file, char *tag);
 
 int addFileTags(database *db, char *file, int ntags, ...);
 
-int searchTag(database *db, char *tag, uint64_t *rl);
+int searchTag(database *db, char *tag, uint64_t n, uint64_t **r, uint64_t *rl);
+
+int searchFile(database *db, char *file, uint64_t n, uint64_t **r, uint64_t *rl);
 
 void printDatabase(database *db);
 
index 82ea55ad19e794085ba2e3bde46271c9d7fb452f..bef740a9982a8b0182d0b27a8aeea78a8bfc5533 100644 (file)
@@ -3,6 +3,17 @@
 
 #include "db.h"
 
+/*     (From tags.h)
+       tags are stored in a big table (or their hashes are) ordered
+       by alphabetical order
+       tags can have namespaces which are a special tag that starts
+       with a ':'
+       all tags in a namespace are located between two occurrences
+       of the namespace within the list,
+               e.g [":people", "sam hyde", "hitler", ":people"]
+       maybe namespaces use another hashing function to prevent
+       collisions because of the lack of space because of the ':'
+*/
 
 /*
        tags are stored in a big table (or their hashes are) ordered
index de119f87d09e9ebf6f366a6dba1ed33744c3d164..999f1e37e4ca381bdc1894ccb94f6d6eb78eeee4 100644 (file)
@@ -14,5 +14,4 @@ char *normalizeStrLimit(const char *str, uint32_t *l, uint32_t limit);
 
 ssize_t strInTags(const char *tags, int n, const char *ndl, int m, char sep); 
 
-
 #endif
diff --git a/include/tags.h b/include/tags.h
deleted file mode 100644 (file)
index 0b7f5a9..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef TAGS_H
-#define TAGS_H
-
-/*
-       tags are stored in a big table (or their hashes are) ordered
-       by alphabetical order
-       tags can have namespaces which are a special tag that starts
-       with a ':'
-       all tags in a namespace are located between two occurrences
-       of the namespace within the list,
-               e.g [":people", "sam hyde", "hitler", ":people"]
-       maybe namespaces use another hashing function to prevent
-       collisions because of the lack of space because of the ':'
-*/
-
-#define MAXTAGS 4094
-
-void insertTag(char **tags, char *tag){
-       if()
-}
-
-#endif
\ No newline at end of file
index d94765e90d20f6d1f3ae4c93841760809bfca92a..d2afb0a87be16662f4bb1f9a1c593bdf3fce6b7d 100644 (file)
--- a/src/bm.c
+++ b/src/bm.c
@@ -41,4 +41,4 @@ ssize_t BM(char *x, int m, char *y, int n){
                j += shift;                          /* shift */
        }
        return -1;
-}
\ No newline at end of file
+}
index db7759e184c45701f5890859930331fdf0c5194e..f03446460f2ff63bc630b0d3e7b4e70ac77ed8db 100644 (file)
@@ -8,6 +8,8 @@ database *newDatabase(char *name){
        db->ltags = newLtable(0);
        db->hfiles = newHtable(0);
        db->htags = newHtable(0);
+       db->fcount = newHtable(0);
+       db->tcount = newHtable(0);
        db->map = newMtable(0);
        return db;
 }
@@ -19,6 +21,7 @@ database *loadDatabase(const char* path){
        if(!sameStr(header, "DB")){
                fprintf(stderr, "Header is '%s' not 'DB'\n", header);
        }
+       
        char name[32];
        fread(&name, sizeof(char), 32, fp);
        database *db = newDatabase(name);
@@ -26,9 +29,12 @@ database *loadDatabase(const char* path){
        db->ltags = loadLtable(fp);
        db->hfiles = loadHtable(fp);
        db->htags = loadHtable(fp);
+       db->fcount = loadHtable(fp);
+       db->tcount = loadHtable(fp);
        db->map = loadMtable(fp);
-       char end[4];
-       fread(&end, sizeof(char), 3, fp);
+       
+       char *end = calloc(3, sizeof(char));
+       fread(end, sizeof(char), 3, fp);
        if(!sameStr(end, "END")){
                fprintf(stderr, "End is '%s' not 'END'\n", end);
        }
@@ -47,6 +53,8 @@ int storeDatabase(database *db, const char *path){
        storeLtable(db->ltags, fp);
        storeHtable(db->hfiles, fp);
        storeHtable(db->htags, fp);
+       storeHtable(db->fcount, fp);
+       storeHtable(db->tcount, fp);
        storeMtable(db->map, fp);
        
        char end[3] = "END";
@@ -56,6 +64,44 @@ int storeDatabase(database *db, const char *path){
        return 0;
 }
 
+static void increaseCount(htable *ht, uint64_t i){
+       ht->table[i]++;
+}
+
+uint64_t addFile(database *db, char *file){
+       uint32_t l;
+       file = normalizeStrLimit(file, &l, MAXPATH-1);
+       uint64_t h = crc64(0, file, l);
+       uint64_t i = htableSearch(db->hfiles, h);
+       
+       if(i == -1){
+               ltableAdd(db->lfiles, file);
+               htableAdd(db->hfiles, h);
+               htableAdd(db->fcount, 0);
+               i = db->hfiles->size-1;
+       }
+       increaseCount(db->fcount, i);
+       
+       return i;
+}
+
+uint64_t addTag(database *db, char *tag){
+       uint32_t l;
+       tag = normalizeStrLimit(tag, &l, MAXPATH-1);
+       uint64_t h = crc64(0, tag, l);
+       uint64_t i = htableSearch(db->htags, h);
+       
+       if(i == -1){
+               ltableAdd(db->ltags, tag);
+               htableAdd(db->htags, h);
+               htableAdd(db->tcount, 0);
+               i = db->htags->size-1;
+       }
+       increaseCount(db->tcount, i);
+       
+       return i;
+}
+
 static int addRelation(database *db, relation r){
        if(mtableSearch(db->map, r) != -1){
                return -1;
@@ -66,83 +112,81 @@ static int addRelation(database *db, relation r){
 }
 
 int addFileTag(database *db, char *file, char *tag){
-       uint32_t lf, lt;
-       file = normalizeStrLimit(file, &lf, MAXPATH-1);
-       tag = normalizeStrLimit(tag, &lt, MAXPATH-1);
-       uint64_t hf = crc64(0, file, lf), ht = crc64(0, tag, lt);
-       uint64_t fi = htableSearch(db->hfiles, hf), ti = htableSearch(db->htags, ht);
-       
-       if(fi == -1){
-               ltableAdd(db->lfiles, file);
-               htableAdd(db->hfiles, hf);
-               fi = db->hfiles->size-1;
-       }
-       if(ti == -1){
-               ltableAdd(db->ltags, tag);
-               htableAdd(db->htags, ht);
-               ti = db->htags->size-1;
-       }
-       
+       uint64_t fi = addFile(db, file), ti = addTag(db, tag);
        addRelation(db, (relation){.file = fi, .tag = ti});
+       
        return 0;
 }
 
 int addFileTags(database *db, char *file, int ntags, ...){
-       uint32_t lf;
-       file = normalizeStrLimit(file, &lf, MAXPATH-1);
-       uint64_t hf = crc64(0, file, lf);
-       uint64_t fi = htableSearch(db->hfiles, hf);
-       
-       if(fi == -1){
-               ltableAdd(db->lfiles, file);
-               htableAdd(db->hfiles, hf);
-               fi = db->hfiles->size-1;
-       }
-       
        va_list tags;
        va_start(tags, ntags);
        for(uint64_t i = 0; i < ntags; ++i){
                char *tag = va_arg(tags, char*);
-               uint32_t lt;
-               tag = normalizeStrLimit(tag, &lt, MAXPATH-1);
-               uint64_t ht = crc64(0, tag, lt);
-               uint64_t ti = htableSearch(db->htags, ht);
-               
-               if(ti == -1){
-                       ltableAdd(db->ltags, tag);
-                       htableAdd(db->htags, ht);
-                       ti = db->htags->size-1;
-               }
-               
-               addRelation(db, (relation){.file = fi, .tag = ti});
+               addFileTag(db, file, tag);
        }
        va_end(tags);
 
        return 0;
 }
 
-// Should return a list with the indexes of the files that have this tag
-int searchTag(database *db, char *tag, uint64_t *rl){
+// Stores in r a list with the indexes of the first n files that have this tag
+// If n is 0 or lower, it returns all of them. Stores in rl the length of r
+int searchTag(database *db, char *tag, uint64_t n, uint64_t **r, uint64_t *rl){
        uint32_t l;
        tag = normalizeStrLimit(tag, &l, MAXPATH-1);
        uint64_t h = crc64(0, tag, l);
        uint64_t ti = htableSearch(db->htags, h);
-       // TODO: error checking
+       if(ti == -1){
+               return -1;
+       }
        
-       uint64_t c = 0;
+       *rl = 0;
        for(uint64_t i = 0; i < db->map->size; ++i){
+               if(n < 1 || *rl < n){
+                       if(db->map->table[i].tag == ti){
+                               ++(*rl);
+                       }
+               }
+       }
+       *r = malloc((*rl)*sizeof(uint64_t));
+       uint64_t c = 0;
+       for(uint64_t i = 0; i < db->map->size && c < *rl; ++i){
                if(db->map->table[i].tag == ti){
-                       ++c;
+                       (*r)[c++] = db->map->table[i].file;
                }
        }
-       uint64_t *r = malloc(c*sizeof(uint64_t));
-       c = 0;
+       
+       return 0;
+}
+
+// Stores in r a list with the indexes of the first n tags that this file has
+// If n is 0 or lower, it returns all of them. Stores in rl the length of r
+int searchFile(database *db, char *file, uint64_t n, uint64_t **r, uint64_t *rl){
+       uint32_t l;
+       file = normalizeStrLimit(file, &l, MAXPATH-1);
+       uint64_t h = crc64(0, file, l);
+       uint64_t fi = htableSearch(db->hfiles, h);
+       if(fi == -1){
+               return -1;
+       }
+       
+       *rl = 0;
        for(uint64_t i = 0; i < db->map->size; ++i){
-               if(db->map->table[i].tag == ti){
-                       r[c++] = db->map->table[i].file;
+               if(n < 1 || *rl < n){
+                       if(db->map->table[i].file == fi){
+                               ++(*rl);
+                       }
                }
        }
-       rl = r;
+       *r = malloc((*rl)*sizeof(uint64_t));
+       uint64_t c = 0;
+       for(uint64_t i = 0; i < db->map->size && c < *rl; ++i){
+               if(db->map->table[i].file == fi){
+                       (*r)[c++] = db->map->table[i].tag;
+               }
+       }
+       
        return 0;
 }
 
@@ -158,11 +202,11 @@ void debugDatabase(database *db){
        printf("Name: %s\n", db->name);
        printf("\t-lfiles: %d\n", db->lfiles->size);
        for(uint64_t i = 0; i < db->lfiles->size; ++i){
-               printf("\t\t+%s\n", db->lfiles->table[i]);
+               printf("\t\t+%s (%" PRIu64 ")\n", db->lfiles->table[i], db->fcount->table[i]);
        }
        printf("\t-ltags: %d\n", db->ltags->size);
        for(uint64_t i = 0; i < db->ltags->size; ++i){
-               printf("\t\t+%s\n", db->ltags->table[i]);
+               printf("\t\t+%s (%" PRIu64 ")\n", db->ltags->table[i], db->tcount->table[i]);
        }
        printf("\t-hfiles: %d\n", db->hfiles->size);
        for(uint64_t i = 0; i < db->hfiles->size; ++i){
index 01fdad2dde17394359e4e8ea7872ef68da7d22a9..bd5c6b93effbc31171af982a679038e75f72cf4d 100644 (file)
@@ -4,26 +4,30 @@
 
 int main(){
 
-       printf("%016llu\n", (uint64_t) crc64(0, (unsigned char*)"cacadevaca", 10));
-
+       database *db = newDatabase("miDB");
        inputBuffer *in = newInputBuffer();
        
-       char *str = "grandmother;football;capital;concerned;entire;realize;garden;refused;proud;tune;rhyme;other;writer;command;fresh;fence;rapidly;active;cover;repeat;determine;yard;cannot;animal;pure;rich;mirror;frozen;vast;coach;brass;activity;bottom;airplane;local;tone;attack;though;between;value;collect;mission;tower;brought;original;history;reason;minute;would;hung;strange;children;offer;blue;wrapped;magnet;color;cage;easily;percent;lower;verb;hundred;larger;away;was;certain;western;yes;lack;wish;same;spend;arrive;fog;heard;bill;effort;steam;wolf;indicate;suppose;because;life;down;seat;age;earn;under;cell;floating;although;spent;folks;swing;hello;cent;swung;pen;happened;slip;pupil;smell;fix;piano;closer;idea;trunk;model;school;particularly;he;coast;describe;such;join;been;hard;three;around;tube;soldier;baby;mouse;note;sort;house;gasoline;organized;eat;sat;crowd;alive;spoken;wide;square;luck;tales;angry;having;wear;frog;outer;nice;regular;year;clothing;check;throughout;farmer;dug;dark;exercise;table;your;form;should;personal;use;road;bright;walk;fairly;affect;but;night;close;job;front;fight;beside;ocean;herd;pass;hardly;widely;prepare;nails;paid;lucky;design;grandfather;aid;heavy;truck;sleep;difficult;log;keep;government;headed;mother;sad;bread;voyage;when;happy;making;whistle;plural;guard;therefore;continent;roof;money;pan;unusual;region;special;generally;plate;visit;look;lost;sick;wonderful;farther;put;characteristic;gravity;trap;system;twice;taste;knew;mad;smallest;automobile;return;huge;underline;danger;news;electric;information;breeze;thread;equally;five;new;average;former;wild;spend;cabin;recognize;nearest;circle;such;found;pass;whistle;slave;event;knowledge;fear;friend;am;browserling;cry;length;thy;create;busy;office;earth;blind;smallest;birthday;putting;classroom;pen;southern;summer;put;open;solution;spread;equator;else;kitchen;determine;strong;change;world;pocket;claws;earn;excellent;drove;donkey;rush;band;energy;fighting;hurt;ordinary;native;visitor;give;storm;pressure;imagine;street;engine;worth;hospital;attached;subject;perhaps;hospital;living;waste;dark;natural;change;enter;girl;motor;element;experiment;physical;value;excited;fort;layers;buy;minerals;satisfied;next;spirit;unhappy;storm;angry;science;desk;develop;behind;afraid;act;else;prepare;given;raw;affect;husband;ring;older;brought;book;cow;lake;sides;ago;fill;successful;real;aside;taught;mind;straight;date;very;chart;slabs;thin;saddle;full;sort;heard;surprise;fox;cool;dish;alphabet;early;spring;nest;sometime;date;light;break;lion;difference;rhyme;might;step;teach;potatoes;young;nine;liquid;how;lunch;heavy;mass;being;save;cutting;negative;swimming;cutting;journey;army;none;worry;leave;explore;baseball;fight;road;exact;hay;voyage;sheet;test;right;examine;agree;heart;pig;cannot;tool;hill;changing;bee;find;together;lay;tie;lost;continued;then;came;rhyme;mirror;town;substance;both;up;quite;push;shake;solid;result;you;ought;chicken;waste;freedom;why;somehow;not;complete;sick;struggle;military;pure;top;south;step;education;could;between;familiar;recognize;rich;tool;material;were;chicken;stopped;stay;policeman;round;firm";
-
-
-       database *db = newDatabase("miDB");
        
        addFileTag(db, "vaca.png", "naturaleza");       
        addFileTags(db, "donald-tromp.jpg", 3, "based", "hitler", "very cool");
+       addFileTag(db, "vaca.png", "lovely");
+       addFileTags(db, "vaca.png", 3, "nature", "kami", "very cool");
        
        storeDatabase(db, "db.db");
-       
-       db = loadDatabase("db.db");
 
        printDatabase(db);
        
        debugDatabase(db);
-
+       
+       uint64_t *l, i;
+       searchFile(db, "donald-tromp.jpg", 0, &l, &i);
+       
+       printf("Tags with tag 'donald-tromp.jpg':\n");
+       for(uint64_t j = 0; j < i; ++j){
+               printf("\t%s\n", db->ltags->table[l[j]]);
+       
+       }
+       
 
        while(0){