aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorSoikk2022-07-23 01:46:24 +0200
committerSoikk2022-07-23 01:46:24 +0200
commit28578b192d0828a9820983b5624b9bcc3577cd18 (patch)
tree2f30b1730f30a7eeee80995ee3984c10f5bdc2ff /include
parent377dc104be127291ede5b32640c23eea0ba6791a (diff)
downloadsoikk-DB-28578b192d0828a9820983b5624b9bcc3577cd18.tar.xz
soikk-DB-28578b192d0828a9820983b5624b9bcc3577cd18.tar.zst
Improved the database storage system. Added persistency.
Diffstat (limited to 'include')
-rw-r--r--include/bm.h1
-rw-r--r--include/crc64.h9
-rw-r--r--include/database.h31
-rw-r--r--include/db.h8
-rw-r--r--include/repl.h3
-rw-r--r--include/storage.h105
-rw-r--r--include/str.h8
-rw-r--r--include/tags.h19
8 files changed, 154 insertions, 30 deletions
diff --git a/include/bm.h b/include/bm.h
index b59e390..80ddee8 100644
--- a/include/bm.h
+++ b/include/bm.h
@@ -6,5 +6,4 @@
ssize_t BM(char *x, int m, char *y, int n);
-
#endif \ No newline at end of file
diff --git a/include/crc64.h b/include/crc64.h
new file mode 100644
index 0000000..7657b22
--- /dev/null
+++ b/include/crc64.h
@@ -0,0 +1,9 @@
+#ifndef CRC64_H
+#define CRC64_H
+
+// Header file created because of project necessity
+// Source for c file: https://github.com/srned/baselib/blob/master/crc64.c
+
+uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
+
+#endif \ No newline at end of file
diff --git a/include/database.h b/include/database.h
new file mode 100644
index 0000000..2b1c91a
--- /dev/null
+++ b/include/database.h
@@ -0,0 +1,31 @@
+#ifndef DATABASE_H
+#define DATABASE_H
+
+#include "db.h"
+
+
+typedef struct database{
+ char name[32];
+ ltable *lfiles, *ltags;
+ htable *hfiles, *htags;
+ mtable *map;
+} database;
+
+
+database *newDatabase(char *name);
+
+database *loadDatabase(const char* path);
+
+int storeDatabase(database *db, const char *path);
+
+int addFileTag(database *db, char *file, char *tag);
+
+int addFileTags(database *db, char *file, int ntags, ...);
+
+int searchTag(database *db, char *tag, uint64_t *rl);
+
+void printDatabase(database *db);
+
+void debugDatabase(database *db);
+
+#endif
diff --git a/include/db.h b/include/db.h
index 3ffcafb..ccdfc11 100644
--- a/include/db.h
+++ b/include/db.h
@@ -5,17 +5,19 @@
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
-#include <stdint.h>
+#include <inttypes.h>
#include <stdbool.h>
+#include <stdarg.h>
#include "strnatcmp.h"
+#include "crc64.h"
#include "repl.h"
#include "parser.h"
#include "storage.h"
+#include "database.h"
#include "str.h"
#include "bm.h"
-
-#endif \ No newline at end of file
+#endif
diff --git a/include/repl.h b/include/repl.h
index 9c9fb8e..26abe9b 100644
--- a/include/repl.h
+++ b/include/repl.h
@@ -3,6 +3,7 @@
#include "db.h"
+
typedef struct {
char *buffer;
ssize_t inputSize;
@@ -17,4 +18,4 @@ void getInput(inputBuffer *in);
void prompt(void);
-#endif \ No newline at end of file
+#endif
diff --git a/include/storage.h b/include/storage.h
index a30278f..82ea55a 100644
--- a/include/storage.h
+++ b/include/storage.h
@@ -3,24 +3,103 @@
#include "db.h"
-#define MAXPATH 4094
-#define MAXTAGS 4094
+/*
+ tags are stored in a big table (or their hashes are) ordered
+ by alphabetical order
+ tags can have namespaces which are a special tag that starts
+ with a ':'
+ all tags in a namespace are located between two occurrences
+ of the namespace within the list,
+ e.g [":people", "sam hyde", "hitler", ":people"]
+ maybe namespaces use another hashing function to prevent
+ collisions because of the lack of space because of the ':'
-// When intializing the struct, it is recommended
-// to also initialize numTags and lenTags
-typedef struct{
- char path[MAXPATH];
- char tags[MAXTAGS];
- uint16_t lenTags;
- uint16_t numTags;
-} row;
+ files (their paths) are stored in a big table (or their hashes
+ are) by alphabetical/numerical order
+ tags and files' indexes on their respective tables correspond
+ to the index of the actual tag or file on alookup table.
+ e.g
+ iT[23] = 816820769819429900 = hash(lT[23]) = hash("cacadevaca")
-row *newRow(const char *path);
+ there is another table that ties the two together by their
+ indexes
+ if an file has more than one tag, it is stored as multiple
+ relations inside the table, e.g ["1:2", "1:3", "1:4"]
-void insertTag(row *r, char *tag);
+ searching for an file that has a tag is as simple as
+ - finding the tag on its list
+ - if the tag is on the list, getting its index number
+ if not, return 'tag not found'
+ - search for all the relations that have the index as
+ the second argument
+ - if no files are found, return 'files not found'
+ - store the indexes of all the files (the first argument)
+ - return the list of all the files
+ - (OPTIONAL) show all the other tags the files shown have
+*/
-void removeTag(row *r, char *tag);
+#define MAXPATH 4096
+
+
+// Stores the actual filepaths/tags
+typedef struct lookupTable{
+ uint64_t size;
+ char **table; // They cant be longer than MAXPATH
+} ltable;
+
+// Stores the hashes of the filepaths/tags (for easier lookup)
+typedef struct hashTable{
+ uint64_t size;
+ uint64_t *table;
+} htable;
+
+typedef struct relation{
+ uint64_t file;
+ uint64_t tag;
+} relation;
+
+// Maps the relations between the filepaths and the tags
+typedef struct mappingTable{
+ uint64_t size;
+ relation *table;
+} mtable;
+
+
+// LTABLE
+ltable *newLtable(uint64_t size);
+
+ltable *loadLtable(FILE *fp);
+
+int storeLtable(const ltable *lt, FILE *fp);
+
+int ltableAdd(ltable *lt, char *str);
+
+uint64_t ltableSearch(ltable *lt, char *str);
+
+// HTABLE
+
+htable *newHtable(uint64_t size);
+
+htable *loadHtable(FILE *fp);
+
+int storeHtable(const htable *ht, FILE *fp);
+
+int htableAdd(htable *ht, uint64_t h);
+
+uint64_t htableSearch(htable *ht, uint64_t h);
+
+// MTABLE
+
+mtable *newMtable(uint64_t size);
+
+mtable *loadMtable(FILE *fp);
+
+int storeMtable(const mtable *mt, FILE *fp);
+
+int mtableAdd(mtable *mt, relation r);
+
+uint64_t mtableSearch(mtable *mt, relation r);
#endif
diff --git a/include/str.h b/include/str.h
index ffda145..de119f8 100644
--- a/include/str.h
+++ b/include/str.h
@@ -4,11 +4,15 @@
#include "db.h"
-uint16_t len(const char *s);
+uint32_t len(const char *s);
bool sameStr(const char *s1, const char *s2);
+char *normalizeStr(const char *str, uint32_t *ln);
+
+char *normalizeStrLimit(const char *str, uint32_t *l, uint32_t limit);
+
ssize_t strInTags(const char *tags, int n, const char *ndl, int m, char sep);
-#endif \ No newline at end of file
+#endif
diff --git a/include/tags.h b/include/tags.h
index b8a62b5..0b7f5a9 100644
--- a/include/tags.h
+++ b/include/tags.h
@@ -2,16 +2,15 @@
#define TAGS_H
/*
- tags are stored in a big string, separated by semicolons (;)
- tags can have namespaces, which should come before the tag
- and be followed by a colon (:)
- should a namespace store more than one tag, the following
- tags will be separated by a comma (,)
- spaces are only allowed inside tags or namespaces, as part
- of themselves
- semicolons, colons, and commas are not allowed inside tags
- or namespaces
- example: "person:ted kaczynsky;mood:serious;meta:jpg"
+ tags are stored in a big table (or their hashes are) ordered
+ by alphabetical order
+ tags can have namespaces which are a special tag that starts
+ with a ':'
+ all tags in a namespace are located between two occurrences
+ of the namespace within the list,
+ e.g [":people", "sam hyde", "hitler", ":people"]
+ maybe namespaces use another hashing function to prevent
+ collisions because of the lack of space because of the ':'
*/
#define MAXTAGS 4094