diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/bm.h | 1 | ||||
| -rw-r--r-- | include/crc64.h | 9 | ||||
| -rw-r--r-- | include/database.h | 31 | ||||
| -rw-r--r-- | include/db.h | 8 | ||||
| -rw-r--r-- | include/repl.h | 3 | ||||
| -rw-r--r-- | include/storage.h | 105 | ||||
| -rw-r--r-- | include/str.h | 8 | ||||
| -rw-r--r-- | include/tags.h | 19 |
8 files changed, 154 insertions, 30 deletions
diff --git a/include/bm.h b/include/bm.h index b59e390..80ddee8 100644 --- a/include/bm.h +++ b/include/bm.h @@ -6,5 +6,4 @@ ssize_t BM(char *x, int m, char *y, int n); - #endif
\ No newline at end of file diff --git a/include/crc64.h b/include/crc64.h new file mode 100644 index 0000000..7657b22 --- /dev/null +++ b/include/crc64.h @@ -0,0 +1,9 @@ +#ifndef CRC64_H +#define CRC64_H + +// Header file created because of project necessity +// Source for c file: https://github.com/srned/baselib/blob/master/crc64.c + +uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l); + +#endif
\ No newline at end of file diff --git a/include/database.h b/include/database.h new file mode 100644 index 0000000..2b1c91a --- /dev/null +++ b/include/database.h @@ -0,0 +1,31 @@ +#ifndef DATABASE_H +#define DATABASE_H + +#include "db.h" + + +typedef struct database{ + char name[32]; + ltable *lfiles, *ltags; + htable *hfiles, *htags; + mtable *map; +} database; + + +database *newDatabase(char *name); + +database *loadDatabase(const char* path); + +int storeDatabase(database *db, const char *path); + +int addFileTag(database *db, char *file, char *tag); + +int addFileTags(database *db, char *file, int ntags, ...); + +int searchTag(database *db, char *tag, uint64_t *rl); + +void printDatabase(database *db); + +void debugDatabase(database *db); + +#endif diff --git a/include/db.h b/include/db.h index 3ffcafb..ccdfc11 100644 --- a/include/db.h +++ b/include/db.h @@ -5,17 +5,19 @@ #include <ctype.h> #include <stdlib.h> #include <string.h> -#include <stdint.h> +#include <inttypes.h> #include <stdbool.h> +#include <stdarg.h> #include "strnatcmp.h" +#include "crc64.h" #include "repl.h" #include "parser.h" #include "storage.h" +#include "database.h" #include "str.h" #include "bm.h" - -#endif
\ No newline at end of file +#endif diff --git a/include/repl.h b/include/repl.h index 9c9fb8e..26abe9b 100644 --- a/include/repl.h +++ b/include/repl.h @@ -3,6 +3,7 @@ #include "db.h" + typedef struct { char *buffer; ssize_t inputSize; @@ -17,4 +18,4 @@ void getInput(inputBuffer *in); void prompt(void); -#endif
\ No newline at end of file +#endif diff --git a/include/storage.h b/include/storage.h index a30278f..82ea55a 100644 --- a/include/storage.h +++ b/include/storage.h @@ -3,24 +3,103 @@ #include "db.h" -#define MAXPATH 4094 -#define MAXTAGS 4094 +/* + tags are stored in a big table (or their hashes are) ordered + by alphabetical order + tags can have namespaces which are a special tag that starts + with a ':' + all tags in a namespace are located between two occurrences + of the namespace within the list, + e.g [":people", "sam hyde", "hitler", ":people"] + maybe namespaces use another hashing function to prevent + collisions because of the lack of space because of the ':' -// When intializing the struct, it is recommended -// to also initialize numTags and lenTags -typedef struct{ - char path[MAXPATH]; - char tags[MAXTAGS]; - uint16_t lenTags; - uint16_t numTags; -} row; + files (their paths) are stored in a big table (or their hashes + are) by alphabetical/numerical order + tags and files' indexes on their respective tables correspond + to the index of the actual tag or file on alookup table. + e.g + iT[23] = 816820769819429900 = hash(lT[23]) = hash("cacadevaca") -row *newRow(const char *path); + there is another table that ties the two together by their + indexes + if an file has more than one tag, it is stored as multiple + relations inside the table, e.g ["1:2", "1:3", "1:4"] -void insertTag(row *r, char *tag); + searching for an file that has a tag is as simple as + - finding the tag on its list + - if the tag is on the list, getting its index number + if not, return 'tag not found' + - search for all the relations that have the index as + the second argument + - if no files are found, return 'files not found' + - store the indexes of all the files (the first argument) + - return the list of all the files + - (OPTIONAL) show all the other tags the files shown have +*/ -void removeTag(row *r, char *tag); +#define MAXPATH 4096 + + +// Stores the actual filepaths/tags +typedef struct lookupTable{ + uint64_t size; + char **table; // They cant be longer than MAXPATH +} ltable; + +// Stores the hashes of the filepaths/tags (for easier lookup) +typedef struct hashTable{ + uint64_t size; + uint64_t *table; +} htable; + +typedef struct relation{ + uint64_t file; + uint64_t tag; +} relation; + +// Maps the relations between the filepaths and the tags +typedef struct mappingTable{ + uint64_t size; + relation *table; +} mtable; + + +// LTABLE +ltable *newLtable(uint64_t size); + +ltable *loadLtable(FILE *fp); + +int storeLtable(const ltable *lt, FILE *fp); + +int ltableAdd(ltable *lt, char *str); + +uint64_t ltableSearch(ltable *lt, char *str); + +// HTABLE + +htable *newHtable(uint64_t size); + +htable *loadHtable(FILE *fp); + +int storeHtable(const htable *ht, FILE *fp); + +int htableAdd(htable *ht, uint64_t h); + +uint64_t htableSearch(htable *ht, uint64_t h); + +// MTABLE + +mtable *newMtable(uint64_t size); + +mtable *loadMtable(FILE *fp); + +int storeMtable(const mtable *mt, FILE *fp); + +int mtableAdd(mtable *mt, relation r); + +uint64_t mtableSearch(mtable *mt, relation r); #endif diff --git a/include/str.h b/include/str.h index ffda145..de119f8 100644 --- a/include/str.h +++ b/include/str.h @@ -4,11 +4,15 @@ #include "db.h" -uint16_t len(const char *s); +uint32_t len(const char *s); bool sameStr(const char *s1, const char *s2); +char *normalizeStr(const char *str, uint32_t *ln); + +char *normalizeStrLimit(const char *str, uint32_t *l, uint32_t limit); + ssize_t strInTags(const char *tags, int n, const char *ndl, int m, char sep); -#endif
\ No newline at end of file +#endif diff --git a/include/tags.h b/include/tags.h index b8a62b5..0b7f5a9 100644 --- a/include/tags.h +++ b/include/tags.h @@ -2,16 +2,15 @@ #define TAGS_H /* - tags are stored in a big string, separated by semicolons (;) - tags can have namespaces, which should come before the tag - and be followed by a colon (:) - should a namespace store more than one tag, the following - tags will be separated by a comma (,) - spaces are only allowed inside tags or namespaces, as part - of themselves - semicolons, colons, and commas are not allowed inside tags - or namespaces - example: "person:ted kaczynsky;mood:serious;meta:jpg" + tags are stored in a big table (or their hashes are) ordered + by alphabetical order + tags can have namespaces which are a special tag that starts + with a ':' + all tags in a namespace are located between two occurrences + of the namespace within the list, + e.g [":people", "sam hyde", "hitler", ":people"] + maybe namespaces use another hashing function to prevent + collisions because of the lack of space because of the ':' */ #define MAXTAGS 4094 |
