From f8ecaf91d686d2247ab7088eddf4de417985ca5b Mon Sep 17 00:00:00 2001 From: Soikk <76824648+Soikk@users.noreply.github.com> Date: Sat, 23 Jul 2022 18:09:02 +0200 Subject: [PATCH] Minor modifications. Updated TODO and added documentation (DOC) --- DOC | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++ TODO | 22 ++++++------ src/database.c | 5 +-- src/storage.c | 12 +++---- 4 files changed, 113 insertions(+), 18 deletions(-) create mode 100644 DOC diff --git a/DOC b/DOC new file mode 100644 index 0000000..b95c69c --- /dev/null +++ b/DOC @@ -0,0 +1,92 @@ +OBSERVATIONS + +STORAGE + An ltable consists of its size and a "string" table (char**). + An ltable is initialized with a size, that can be 0. The table inside the ltable is NOT initalized. + An ltable is written to disk in the following format: + - 1 byte as a header that stores the 'L' ASCII character. + - 8 bytes (64 bits) that store the size of the table. + - For each "string" in its table, it has: + · 4 bytes (32 bits) that stores the string size (counting the trailing 0). + · However many bytes as size, for storing each of the caracters in the string, including the trailing 0. + -1 byte as an "end" that stores the 'E' ASCII character. + When loading an ltable, if the header doesn't match, it will print "Header is '(header)' not 'L'" to standard error. + Likewise, if the "end" doesn't match, it will print "End is '(end)' not 'E'" to standard error. + Storing an ltable returns 0 upon completion. + When working with "strings", all "strings" are normalized with normalizeStrLimit, lowercasing it, removing trailing spaces and adding a limit of MAXPATH-1 characters plus the trailing 0. + Adding a "string" to an ltable consists of: + - Creating a new table of size size+1. + - Storing all existing "strings" in said table. + - Appending the new "string" at the end of the table. + - Reallocating space in the ltable's table. + - Storing all "strings" in the new table in the ltable's table. + - Increasing the ltable's size by 1. + If the "string" is already on the table, it returns -1. Otherwise, upon it returns 0 upon completion. + Searching for a "string" returns the index [0 ... size-1] of said "string" in the table, or -1 if its not found. + + An htable consists of its size and an unsigned 64 bit integer table. + An htable is initialized with a size, that can be 0. The table inside the htable IS initialized. + An htable is written to disk in the following format: + - 1 byte as a header that stores the 'H' ASCII character. + - 8 bytes (64 bits) that store the size of the table. + - 8 bytes (64 bits) for each element in its table. + - 1 byte as a "end" that stores the 'E' ASCII character. + When loading an htable, if the header doesn't match, it will print "Header is '(header)' not 'H'" to standard error. + Likewise, if the "end" doesn't match, it will print "End is '(end)' not 'E'" to standard error. + Storing an htable returns 0 upon completion. + Adding a 64 bit unsigned integer to an htable consists of: + - Creating a new table of size size+1. + - Storing all existing 64 bit unsigned integers in said table. + - Appending the new 64 bit unsigned integer at the end of the table. + - Reallocating space in the htable's table. + - Storing all 64 bit unsigned integers in the new table in the htable's table. + - Increasing the htable's size by 1. + If the 64 bit unsigned integer is already on the table, it returns -1. Otherwise, upon it returns 0 upon completion. + Searching for a 64 bit unsigned integer returns the index [0 ... size-1] of said 64 bit unsigned integer in the table, or -1 if its not found. + + An mtable consists of its size and a struct relation table. + A struct relation consists of two 64 bit numbers. The first one is the file identifier and the second one the tag identifier. + An mtable is initialized with a size, that can be 0. The table inside the mtable is NOT initialized. + An mtable is written to disk in the following format: + - 1 byte as a header that stores the 'M' ASCII character. + - 8 bytes (64 bits) that store the size of the table. + - For each struct relation in its table, it has: + · 8 bytes (64 bits) that store the file identifier of the relation. + · 8 bytes (64 bits) that store the tag identifier of the relation. + - 1 byte as a "end" that stores the 'E' ASCII character. + When loading an mtable, if the header doesn't match, it will print "Header is '(header)' not 'M'" to standard error. + Likewise, if the "end" doesn't match, it will print "End is '(end)' not 'E'" to standard error. + Storing an mtable returns 0 upon completion. + Adding a relation to an mtable consists of: + - Creating a new table of size size+1. + - Storing all existing relations in said table. + - Appending the new relation at the end of the table. + - Reallocating space in the mtable's table. + - Storing all relations in the new table in the mtable's table. + - Increasing the mtable's size by 1. + If the relation is already on the table, it returns -1. Otherwise, upon it returns 0 upon completion. + Searching for a relation returns the index [0 ... size-1] of said relation in the table, or -1 if its not found. + + +DATABASE + + A database consists of a 32 character (including trailing 0) name, and: + - 2 lookup tables (ltable) for storing the unique file and tag names. + - 2 hash tables (htable) for storing the hashes of the names in the ltables. + - 1 mapping tables (mtable) for storing the mappings of the tags to the files. + The lookup tables serve the purpose of looking up the names of the files and tags when needed. + The hash tables serve the purpose of providing faster search times when searching for a file or tag. + Each respective lookup and hash tables (lfiles and hfiles, ltags and htags) share indexes. + The mapping table serves the purpose of storing the relation between different files and tags as the pairing of their indexes. + A databbase is written to disk in the following format: + - 2 bytes as a header that store the 'DB' ASCII characters. + - 32 bytes that store the name of the database. + - The lfiles ltable. + - The ltags ltable. + - The hfiles htable. + - The htags htable. + - The map mtable. + - 3 bytes as "end" that store the 'END'"' ASCII characters. + When loading a database, if the header doesn't match, it will print "Header is '(header)' not 'DB'" to standard error. + Likewise, if the "end" doesn't match, it will print "End is '(end)' not 'END'" to standard error. + Storing a database returns 0 upon completion. diff --git a/TODO b/TODO index 60562e5..309e76d 100644 --- a/TODO +++ b/TODO @@ -1,12 +1,14 @@ -Try different hashing functions (current: crc64), e.g murmur -Change DB model from struct row - typedef struct{ - char path[MAXPATH]; - char tags[MAXTAGS]; - uint16_t lenTags; - uint16_t numTags; - } row; -to tables of ids - images[] tags[] images:tags[] +---------------------------------------------------------------- +DONE Change DB model from struct row + typedef struct{ + char path[MAXPATH]; + char tags[MAXTAGS]; + uint16_t lenTags; + uint16_t numTags; + } row; + to tables of ids + images[] tags[] images:tags[] + +DONE Try different hashing functions (current: crc64), e.g murmur DONE Try to improve 'sameStr' by maybe not having to check s2[i2] diff --git a/src/database.c b/src/database.c index 521b0a1..db7759e 100644 --- a/src/database.c +++ b/src/database.c @@ -17,7 +17,7 @@ database *loadDatabase(const char* path){ char *header = calloc(2, sizeof(char)); fread(header, sizeof(char), 2, fp); if(!sameStr(header, "DB")){ - printf("header is %c%c and not DB\n", header[0], header[1]); + fprintf(stderr, "Header is '%s' not 'DB'\n", header); } char name[32]; fread(&name, sizeof(char), 32, fp); @@ -30,7 +30,7 @@ database *loadDatabase(const char* path){ char end[4]; fread(&end, sizeof(char), 3, fp); if(!sameStr(end, "END")){ - printf("end is %s and not END\n", end); + fprintf(stderr, "End is '%s' not 'END'\n", end); } fclose(fp); return db; @@ -53,6 +53,7 @@ int storeDatabase(database *db, const char *path){ fwrite(end, sizeof(char), 3, fp); fclose(fp); + return 0; } static int addRelation(database *db, relation r){ diff --git a/src/storage.c b/src/storage.c index 930f636..4ebbfdc 100644 --- a/src/storage.c +++ b/src/storage.c @@ -13,7 +13,7 @@ ltable *loadLtable(FILE *fp){ char header; fread(&header, sizeof(char), 1, fp); if(header != 'L'){ - printf("header is %c not L\n", header); + fprintf(stderr, "Header is '%c' not 'L'\n", header); } uint64_t size; fread(&size, sizeof(uint64_t), 1, fp); @@ -27,7 +27,7 @@ ltable *loadLtable(FILE *fp){ char end; fread(&end, sizeof(char), 1, fp); if(end != 'E'){ - printf("end is %c not E\n", end); + fprintf(stderr, "End is '%c' not 'E'\n", end); } return lt; } @@ -96,7 +96,7 @@ htable *loadHtable(FILE *fp){ char header; fread(&header, sizeof(char), 1, fp); if(header != 'H'){ - printf("header is %c not H\n", header); + fprintf(stderr, "Header is '%c' not 'H'\n", header); } uint64_t size; fread(&size, sizeof(uint64_t), 1, fp); @@ -107,7 +107,7 @@ htable *loadHtable(FILE *fp){ char end; fread(&end, sizeof(char), 1, fp); if(end != 'E'){ - printf("end is %c not E\n", end); + fprintf(stderr, "End is '%c' not 'E'\n", end); } return ht; } @@ -163,7 +163,7 @@ mtable *loadMtable(FILE *fp){ char header; fread(&header, sizeof(char), 1, fp); if(header != 'M'){ - printf("header is %c not M\n", header); + fprintf(stderr, "Header is '%c' not 'M'\n", header); } uint64_t size; fread(&size, sizeof(uint64_t), 1, fp); @@ -175,7 +175,7 @@ mtable *loadMtable(FILE *fp){ char end; fread(&end, sizeof(char), 1, fp); if(end != 'E'){ - printf("end is %c not E\n", end); + fprintf(stderr, "End is '%c' not 'E'\n", end); } return mt; } -- 2.39.5