memcpy(db->name, name, len(name)+1);
db->lfiles = newLtable(0);
db->ltags = newLtable(0);
- db->hfiles = newHtable(0);
- db->htags = newHtable(0);
- db->fcount = newHtable(0);
- db->tcount = newHtable(0);
+ db->cfiles = newCtable(0);
+ db->ctags = newCtable(0);
+ db->hfiles = NULL;
+ db->htags = NULL;
db->map = newMtable(0);
return db;
}
database *db = newDatabase(name);
db->lfiles = loadLtable(fp);
db->ltags = loadLtable(fp);
- db->hfiles = loadHtable(fp);
- db->htags = loadHtable(fp);
- db->fcount = loadHtable(fp);
- db->tcount = loadHtable(fp);
+ db->cfiles = loadCtable(fp);
+ db->ctags = loadCtable(fp);
+ db->hfiles = loadAVLTree(fp);
+ db->htags = loadAVLTree(fp);
db->map = loadMtable(fp);
char *end = calloc(3, sizeof(char));
storeLtable(db->lfiles, fp);
storeLtable(db->ltags, fp);
- storeHtable(db->hfiles, fp);
- storeHtable(db->htags, fp);
- storeHtable(db->fcount, fp);
- storeHtable(db->tcount, fp);
+ storeCtable(db->cfiles, fp);
+ storeCtable(db->ctags, fp);
+ storeAVLTree(db->hfiles, fp);
+ storeAVLTree(db->htags, fp);
storeMtable(db->map, fp);
char end[3] = "END";
return 0;
}
-static void increaseCount(htable *ht, uint64_t i){
- ht->table[i]++;
+static void increaseCount(ctable *ct, uint64_t i){
+ ct->table[i]++;
}
uint64_t addFile(database *db, char *file){
uint32_t l;
file = normalizeStrLimit(file, &l, MAXPATH-1);
uint64_t h = crc64(0, file, l);
- uint64_t i = htableSearch(db->hfiles, h);
+ uint64_t i = nodeSearch(db->hfiles, h);
if(i == -1){
ltableAdd(db->lfiles, file);
- htableAdd(db->hfiles, h);
- htableAdd(db->fcount, 0);
- i = db->hfiles->size-1;
+ ctableAdd(db->cfiles, 0);
+ i = db->lfiles->size-1;
+ db->hfiles = insertNode(db->hfiles, h, i);
}
- increaseCount(db->fcount, i);
-
+ increaseCount(db->cfiles, i);
+
return i;
}
uint32_t l;
tag = normalizeStrLimit(tag, &l, MAXPATH-1);
uint64_t h = crc64(0, tag, l);
- uint64_t i = htableSearch(db->htags, h);
+ uint64_t i = nodeSearch(db->htags, h);
if(i == -1){
ltableAdd(db->ltags, tag);
- htableAdd(db->htags, h);
- htableAdd(db->tcount, 0);
- i = db->htags->size-1;
+ ctableAdd(db->ctags, 0);
+ i = db->ltags->size-1;
+ db->htags = insertNode(db->htags, h, i);
+
}
- increaseCount(db->tcount, i);
-
+ increaseCount(db->ctags, i);
+
return i;
}
uint32_t l;
file = normalizeStrLimit(file, &l, MAXPATH-1);
uint64_t h = crc64(0, file, l);
- uint64_t fi = htableSearch(db->hfiles, h);
+ uint64_t fi = nodeSearch(db->hfiles, h);
if(fi == -1){
return -1;
}
uint32_t l;
tag = normalizeStrLimit(tag, &l, MAXPATH-1);
uint64_t h = crc64(0, tag, l);
- uint64_t ti = htableSearch(db->htags, h);
+ uint64_t ti = nodeSearch(db->htags, h);
if(ti == -1){
return -1;
}
printf("\n");
}
+void debugAVLtree(node *n){
+ if(n != NULL){
+ printf("\t\t+%" PRIu64 " -> %" PRIu64 "\n", n->h, n->i);
+ debugAVLtree(n->left);
+ debugAVLtree(n->right);
+ }
+}
+
void debugDatabase(database *db){
printf("\n");
printf("Name: %s\n", db->name);
printf("\t-lfiles: %d\n", db->lfiles->size);
for(uint64_t i = 0; i < db->lfiles->size; ++i){
- printf("\t\t+%s (%" PRIu64 ")\n", db->lfiles->table[i], db->fcount->table[i]);
+ printf("\t\t+%s (%" PRIu64 ")\n", db->lfiles->table[i], db->cfiles->table[i]);
}
printf("\t-ltags: %d\n", db->ltags->size);
for(uint64_t i = 0; i < db->ltags->size; ++i){
- printf("\t\t+%s (%" PRIu64 ")\n", db->ltags->table[i], db->tcount->table[i]);
- }
- printf("\t-hfiles: %d\n", db->hfiles->size);
- for(uint64_t i = 0; i < db->hfiles->size; ++i){
- printf("\t\t+%" PRIu64 "\n", db->hfiles->table[i]);
- }
- printf("\t-htags: %d\n", db->htags->size);
- for(uint64_t i = 0; i < db->htags->size; ++i){
- printf("\t\t+%" PRIu64 "\n", db->htags->table[i]);
+ printf("\t\t+%s (%" PRIu64 ")\n", db->ltags->table[i], db->ctags->table[i]);
}
+ printf("\t-hfiles: %d\n", height(db->hfiles));
+ debugAVLtree(db->hfiles);
+ printf("\t-htags: %d\n", height(db->htags));
+ debugAVLtree(db->htags);
printf("\t-map: %d\n", db->map->size);
for(uint64_t i = 0; i < db->map->size; ++i){
printf("\t\t+%" PRIu64 ":%" PRIu64 "\n", db->map->table[i].file, db->map->table[i].tag);
}
printf("\n");
}
-
-void reOrderDatabase(database *db){
-
-
-
-}
#include "db.h"
+// LTABLE
+
ltable *newLtable(uint64_t size){
ltable *lt = malloc(sizeof(ltable));
size = (((uint64_t)size) < 0) ? 0 : size;
return lt;
}
-ltable *loadLtable(FILE *fp){
- char header;
- fread(&header, sizeof(char), 1, fp);
- if(header != 'L'){
- fprintf(stderr, "Header is '%c' not 'L'\n", header);
- }
- uint64_t size;
- fread(&size, sizeof(uint64_t), 1, fp);
- ltable *lt = newLtable(size);
- for(uint64_t i = 0; i < lt->size; ++i){
- uint32_t sl;
- fread(&sl, sizeof(uint32_t), 1, fp);
- lt->table[i] = malloc(sl*sizeof(char));
- fread(lt->table[i], sizeof(char), sl, fp);
- }
- char end;
- fread(&end, sizeof(char), 1, fp);
- if(end != 'E'){
- fprintf(stderr, "End is '%c' not 'E'\n", end);
- }
- return lt;
-}
-
-int storeLtable(const ltable *lt, FILE *fp){
- char header = 'L';
- fwrite(&header, sizeof(char), 1, fp);
- fwrite(<->size, sizeof(uint64_t), 1, fp);
- for(uint64_t i = 0; i < lt->size; ++i){
- uint32_t l = len(lt->table[i]) + 1;
- fwrite(&l, sizeof(uint32_t), 1, fp);
- fwrite(lt->table[i], sizeof(char), l, fp);
- }
- char end = 'E';
- fwrite(&end, sizeof(char), 1, fp);
- return 0;
-}
-
int ltableAdd(ltable *lt, char *str){
uint32_t ls;
str = normalizeStrLimit(str, &ls, MAXPATH-1);
return -1;
}
-htable *newHtable(uint64_t size){
- htable *ht = malloc(sizeof(htable));
- size = (((uint64_t)size) < 0) ? 0 : size;
- ht->size = size;
- ht->table = malloc(size*sizeof(uint64_t));
- return ht;
+int storeLtable(const ltable *lt, FILE *fp){
+ char header = 'L';
+ fwrite(&header, sizeof(char), 1, fp);
+ fwrite(<->size, sizeof(uint64_t), 1, fp);
+ for(uint64_t i = 0; i < lt->size; ++i){
+ uint32_t l = len(lt->table[i]) + 1;
+ fwrite(&l, sizeof(uint32_t), 1, fp);
+ fwrite(lt->table[i], sizeof(char), l, fp);
+ }
+ char end = 'E';
+ fwrite(&end, sizeof(char), 1, fp);
+ return 0;
}
-htable *loadHtable(FILE *fp){
+ltable *loadLtable(FILE *fp){
char header;
fread(&header, sizeof(char), 1, fp);
- if(header != 'H'){
- fprintf(stderr, "Header is '%c' not 'H'\n", header);
+ if(header != 'L'){
+ fprintf(stderr, "Header is '%c' not 'L'\n", header);
}
uint64_t size;
fread(&size, sizeof(uint64_t), 1, fp);
- htable *ht = newHtable(size);
- for(uint64_t i = 0; i < ht->size; ++i){
- fread(&ht->table[i], sizeof(uint64_t), 1, fp);
+ ltable *lt = newLtable(size);
+ for(uint64_t i = 0; i < lt->size; ++i){
+ uint32_t sl;
+ fread(&sl, sizeof(uint32_t), 1, fp);
+ lt->table[i] = malloc(sl*sizeof(char));
+ fread(lt->table[i], sizeof(char), sl, fp);
}
char end;
fread(&end, sizeof(char), 1, fp);
if(end != 'E'){
fprintf(stderr, "End is '%c' not 'E'\n", end);
}
- return ht;
+ return lt;
}
-int storeHtable(const htable *ht, FILE *fp){
- char header = 'H';
- fwrite(&header, sizeof(char), 1, fp);
- fwrite(&ht->size, sizeof(uint64_t), 1, fp);
- for(uint64_t i = 0; i < ht->size; ++i){
- fwrite(&ht->table[i], sizeof(uint64_t), 1, fp);
- }
- char end = 'E';
- fwrite(&end, sizeof(char), 1, fp);
- return 0;
+// CTABLE
+
+ctable *newCtable(uint64_t size){
+ ctable *ct = malloc(sizeof(ctable));
+ size = (((uint64_t)size) < 0) ? 0 : size;
+ ct->size = size;
+ ct->table = malloc(size*sizeof(uint64_t));
+ return ct;
}
-int htableAdd(htable *ht, uint64_t h){
- uint64_t *nht = malloc((ht->size+1)*sizeof(uint64_t));
- for(uint64_t i = 0; i < ht->size; ++i){
- if(h == ht->table[i]){
+int ctableAdd(ctable *ct, uint64_t n){
+ uint64_t *nct = malloc((ct->size+1)*sizeof(uint64_t));
+ for(uint64_t i = 0; i < ct->size; ++i){
+ if(n == ct->table[i]){
return -1;
}
- nht[i] = ht->table[i];
+ nct[i] = ct->table[i];
}
- nht[ht->size] = h;
+ nct[ct->size] = n;
- ht->size++;
- ht->table = malloc(ht->size*sizeof(uint64_t));
- for(uint64_t i = 0; i < ht->size; ++i){
- ht->table[i] = nht[i];
+ ct->size++;
+ ct->table = malloc(ct->size*sizeof(uint64_t));
+ for(uint64_t i = 0; i < ct->size; ++i){
+ ct->table[i] = nct[i];
}
return 0;
}
-// We assume the table isnt ordered as of right now
-uint64_t htableSearch(htable *ht, uint64_t h){
- for(uint64_t i = 0; i < ht->size; ++i){
- if(h == ht->table[i]){
+int ctableDelete(ctable *ct, uint64_t n){
+ uint64_t i = ctableSearch(ct, n);
+ if(i == -1){
+ return -1;
+ }
+ ct->size--;
+ for(uint64_t j = i; j < ct->size-1; ++j){
+ ct->table[j] = ct->table[j+1];
+ }
+ return 0;
+}
+
+uint64_t ctableSearch(ctable *ct, uint64_t n){
+ for(uint64_t i = 0; i < ct->size; ++i){
+ if(n == ct->table[i]){
return i;
}
}
return -1;
}
-int htableDelete(htable *ht, uint64_t h){
- uint64_t i = htableSearch(ht, h);
- if(i == -1){
- return -1;
- }
- ht->size--;
- for(uint64_t j = i; j < ht->size-1; ++j){
- ht->table[j] = ht->table[j+1];
+int storeCtable(const ctable *ct, FILE *fp){
+ char header = 'C';
+ fwrite(&header, sizeof(char), 1, fp);
+ fwrite(&ct->size, sizeof(uint64_t), 1, fp);
+ for(uint64_t i = 0; i < ct->size; ++i){
+ fwrite(&ct->table[i], sizeof(uint64_t), 1, fp);
}
+ char end = 'E';
+ fwrite(&end, sizeof(char), 1, fp);
return 0;
}
+ctable *loadCtable(FILE *fp){
+ char header;
+ fread(&header, sizeof(char), 1, fp);
+ if(header != 'C'){
+ fprintf(stderr, "Header is '%c' not 'C'\n", header);
+ }
+ uint64_t size;
+ fread(&size, sizeof(uint64_t), 1, fp);
+ ctable *ct = newCtable(size);
+ for(uint64_t i = 0; i < ct->size; ++i){
+ fread(&ct->table[i], sizeof(uint64_t), 1, fp);
+ }
+ char end;
+ fread(&end, sizeof(char), 1, fp);
+ if(end != 'E'){
+ fprintf(stderr, "End is '%c' not 'E'\n", end);
+ }
+ return ct;
+}
+
+// MTABLE
+
mtable *newMtable(uint64_t size){
mtable *mt = malloc(sizeof(mtable));
size = (((uint64_t)size) < 0) ? 0 : size;
return mt;
}
+int mtableAdd(mtable *mt, relation r){
+ relation *nmt = malloc((mt->size+1)*sizeof(relation));
+ for(uint64_t i = 0; i < mt->size; ++i){
+ if(r.file == mt->table[i].file && r.tag == mt->table[i].tag){
+ return -1;
+ }
+ nmt[i] = mt->table[i];
+ }
+ nmt[mt->size] = r;
+ mt->size++;
+ mt->table = malloc(mt->size*sizeof(relation));
+ for(uint64_t i = 0; i < mt->size; ++i){
+ mt->table[i] = nmt[i];
+ }
+ return 0;
+}
+
+uint64_t mtableSearch(mtable *mt, relation r){
+ for(uint64_t i = 0; i < mt->size; ++i){
+ if(r.file == mt->table[i].file && r.tag == mt->table[i].tag){
+ return i;
+ }
+ }
+ return -1;
+}
+
+uint64_t mtableSearchFile(mtable *mt, uint64_t file){
+ for(uint64_t i = 0; i < mt->size; ++i){
+ if(file == mt->table[i].file){
+ return i;
+ }
+ }
+ return -1;
+}
+
+uint64_t mtableSearchTag(mtable *mt, uint64_t tag){
+ for(uint64_t i = 0; i < mt->size; ++i){
+ if(tag == mt->table[i].tag){
+ return i;
+ }
+ }
+ return -1;
+}
+
+int storeMtable(const mtable *mt, FILE *fp){
+ char header = 'M';
+ fwrite(&header, sizeof(char), 1, fp);
+ fwrite(&mt->size, sizeof(uint64_t), 1, fp);
+ for(uint64_t i = 0; i < mt->size; ++i){
+ fwrite(&mt->table[i].file, sizeof(uint64_t), 1, fp);
+ fwrite(&mt->table[i].tag, sizeof(uint64_t), 1, fp);
+ }
+ char end = 'E';
+ fwrite(&end, sizeof(char), 1, fp);
+ return 0;
+}
+
mtable *loadMtable(FILE *fp){
char header;
fread(&header, sizeof(char), 1, fp);
return mt;
}
-int storeMtable(const mtable *mt, FILE *fp){
- char header = 'M';
- fwrite(&header, sizeof(char), 1, fp);
- fwrite(&mt->size, sizeof(uint64_t), 1, fp);
- for(uint64_t i = 0; i < mt->size; ++i){
- fwrite(&mt->table[i].file, sizeof(uint64_t), 1, fp);
- fwrite(&mt->table[i].tag, sizeof(uint64_t), 1, fp);
+// AVL TREE
+
+static inline uint64_t max(uint64_t a, uint64_t b){
+ return ((a > b) ? a : b);
+}
+
+uint64_t height(node *n){
+ if(n != NULL){
+ return 1 + max(height(n->left), height(n->right));
}
- char end = 'E';
- fwrite(&end, sizeof(char), 1, fp);
return 0;
}
-int mtableAdd(mtable *mt, relation r){
- relation *nmt = malloc((mt->size+1)*sizeof(relation));
- for(uint64_t i = 0; i < mt->size; ++i){
- if(r.file == mt->table[i].file && r.tag == mt->table[i].tag){
- return -1;
+static int64_t balance(node *n){
+ if(n != NULL){
+ return height(n->left) - height(n->right);
+ }
+ return 0;
+}
+
+static node *lowestNode(node *n){
+ node *t = n;
+ while(t->left != NULL){
+ t = t->left;
+ }
+ return t;
+}
+
+node *newNode(uint64_t h, uint64_t i){
+ node *n = malloc(sizeof(node));
+ n->h = h;
+ n->i = i;
+ n->left = NULL;
+ n->right = NULL;
+ return n;
+}
+
+static node *rotateNodeRight(node *r){
+ node *nr = r->left;
+ node *nc = nr->right;
+
+ r->left = nc;
+ nr->right = r;
+ return nr;
+}
+
+static node *rotateNodeLeft(node *r){
+ node *nr = r->right;
+ node *nc = nr->left;
+
+ r->right = nc;
+ nr->left = r;
+ return nr;
+}
+
+node *insertNode(node *r, uint64_t h, uint64_t i){
+ if(r == NULL){
+ return newNode(h, i);
+ }else if(r->h > h){
+ r->left = insertNode(r->left, h, i);
+ }else if(r->h < h){
+ r->right = insertNode(r->right, h, i);
+ }else{
+ return r;
+ }
+
+ int64_t b = balance(r);
+ if(b > 1 && h < r->left->h){ // Left left
+ return rotateNodeRight(r);
+ }
+ if(b < -1 && h > r->right->h){ // Right right
+ return rotateNodeLeft(r);
+ }
+ if(b > 1 && h > r->left->h){ // Left right
+ r->left = rotateNodeLeft(r->left);
+ return rotateNodeRight(r);
+ }
+ if(b < -1 && h < r->right->h){ // Right left
+ r->right = rotateNodeRight(r->right);
+ return rotateNodeLeft(r);
+ }
+ return r;
+}
+
+node *deleteNode(node *r, uint64_t h){
+ if(r == NULL){
+ return r;
+ }else if(r->h > h){
+ r->left = deleteNode(r->left, h);
+ }else if(r->h < h){
+ r->right = deleteNode(r->right, h);
+ }else{
+ if(r->left == NULL || r->right == NULL){
+ node *t = (r->left) ? r->left : r->right;
+ if(t == NULL){
+ t = r;
+ r = NULL;
+ }else{
+ *r = *t;
+ }
+ free(t);
+ }else{
+ node *t = lowestNode(r->right);
+ r->h = t->h;
+ r->i = t->i;
+ r->right = deleteNode(r->right, t->h);
}
- nmt[i] = mt->table[i];
}
- nmt[mt->size] = r;
+ if(r == NULL){
+ return r;
+ }
- mt->size++;
- mt->table = malloc(mt->size*sizeof(relation));
- for(uint64_t i = 0; i < mt->size; ++i){
- mt->table[i] = nmt[i];
+ uint64_t b = balance(r), bl = balance(r->left), br = balance(r->right);
+ if(b > 1 && bl >= 0){ // Left left
+ return rotateNodeRight(r);
}
- return 0;
+ if(b < -1 && br <= 0){ // Right right
+ return rotateNodeLeft(r);
+ }
+ if(b > 1 && bl < 0){ // Left right
+ r->left = rotateNodeLeft(r->left);
+ return rotateNodeRight(r);
+ }
+ if(b < -1 && br > 0){ // Right left
+ r->right = rotateNodeRight(r->right);
+ return rotateNodeLeft(r);
+ }
+ return r;
}
-uint64_t mtableSearch(mtable *mt, relation r){
- for(uint64_t i = 0; i < mt->size; ++i){
- if(r.file == mt->table[i].file && r.tag == mt->table[i].tag){
- return i;
- }
+// Searches for h, returns i
+uint64_t nodeSearch(node *n, uint64_t h){
+ if(n == NULL){
+ return -1;
+ }else if(h == n->h){
+ return n->i;
+ }else if(h < n->h){
+ return nodeSearch(n->left, h);
+ }else if(h > n->h){
+ return nodeSearch(n->right, h);
}
- return -1;
}
+static void nodesToArray(node *n, uint64_t *array, uint64_t i){
+ if(n == NULL){
+ return;
+ }
+ array[i+0] = n->h;
+ array[i+1] = n->i;
+ nodesToArray(n->left, array, (2*i + 1)*2);
+ nodesToArray(n->right, array, (2*i + 2)*2);
+}
+static uint64_t *treeToArray(tree root, uint64_t *maxNodes){
+ uint64_t treeHeight = height(root);
+ *maxNodes = (1<<treeHeight) - 1;
+ uint64_t *treeArray = malloc((2*(*maxNodes))*sizeof(uint64_t)); // One space for h, another for i
+ for(uint64_t i = 0; i < *maxNodes; ++i){
+ uint64_t ai = (i<<1);
+ // Lets hope we dont have an entry in the database where both the hash and the index are 18446744073709551615
+ treeArray[ai+0] = UINTMAX_MAX;
+ treeArray[ai+1] = UINTMAX_MAX;
+ }
+ nodesToArray(root, treeArray, 0);
+ return treeArray;
+}
+int storeAVLTree(tree root, FILE *fp){
+ char header = 'T';
+ fwrite(&header, sizeof(char), 1, fp);
+ uint64_t maxNodes;
+ uint64_t *array = treeToArray(root, &maxNodes);
+ fwrite(&maxNodes, sizeof(uint64_t), 1, fp);
+ for(uint64_t i = 0; i < maxNodes; ++i){
+ uint64_t ai = (i<<1);
+ fwrite(&array[ai+0], sizeof(uint64_t), 1, fp); // Writing h
+ fwrite(&array[ai+1], sizeof(uint64_t), 1, fp); // Writing i
+ }
+ char end = 'E';
+ fwrite(&end, sizeof(char), 1, fp);
+ return 0;
+}
+
+static node *arrayToNodes(uint64_t *array, uint64_t i, uint64_t maxNodes){
+ if(i >= maxNodes*2){
+ return NULL;
+ }
+ node *n = newNode(array[i+0], array[i+1]);
+ n->left = arrayToNodes(array, (2*i + 1)*2, maxNodes);
+ n->right = arrayToNodes(array, (2*i + 2)*2, maxNodes);
+ return n;
+}
+
+tree loadAVLTree(FILE *fp){
+ char header;
+ fread(&header, sizeof(char), 1, fp);
+ if(header != 'T'){
+ fprintf(stderr, "Header is '%c' not 'T'\n", header);
+ }
+ uint64_t maxNodes;
+ fread(&maxNodes, sizeof(uint64_t), 1, fp);
+ uint64_t *array = malloc((2*maxNodes)*sizeof(uint64_t));
+ for(uint64_t i = 0; i < maxNodes; ++i){
+ uint64_t ai = (i<<1);
+ fread(&array[ai+0], sizeof(uint64_t), 1, fp); // Reading h
+ fread(&array[ai+1], sizeof(uint64_t), 1, fp); // Reading i
+ }
+ tree root = arrayToNodes(array, 0, maxNodes);
+ char end;
+ fread(&end, sizeof(char), 1, fp);
+ if(end != 'E'){
+ fprintf(stderr, "End is '%c' not 'E'\n", end);
+ }
+ return root;
+}
/*
// TODO: remove old impl