aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSoikk2022-07-23 01:46:24 +0200
committerSoikk2022-07-23 01:46:24 +0200
commit28578b192d0828a9820983b5624b9bcc3577cd18 (patch)
tree2f30b1730f30a7eeee80995ee3984c10f5bdc2ff /src
parent377dc104be127291ede5b32640c23eea0ba6791a (diff)
downloadsoikk-DB-28578b192d0828a9820983b5624b9bcc3577cd18.tar.xz
soikk-DB-28578b192d0828a9820983b5624b9bcc3577cd18.tar.zst
Improved the database storage system. Added persistency.
Diffstat (limited to 'src')
-rw-r--r--src/bm.c6
-rw-r--r--src/crc64.c191
-rw-r--r--src/database.c179
-rw-r--r--src/main.c40
-rw-r--r--src/storage.c255
-rw-r--r--src/str.c40
6 files changed, 670 insertions, 41 deletions
diff --git a/src/bm.c b/src/bm.c
index 2962878..d94765e 100644
--- a/src/bm.c
+++ b/src/bm.c
@@ -32,9 +32,9 @@ ssize_t BM(char *x, int m, char *y, int n){
while(j < n){
k = bmBc[y[j + m -1]];
while(k != 0){
- j += k; k = bmBc[y[j + m -1]];
- //j += k; k = bmBc[y[j + m -1]];
- //j += k; k = bmBc[y[j + m -1]];
+ j += k; k = bmBc[y[j + m - 1]];
+ j += k; k = bmBc[y[j + m - 1]];
+ j += k; k = bmBc[y[j + m - 1]];
}
if(memcmp(x, y + j, m) == 0 && j < n)
return j;
diff --git a/src/crc64.c b/src/crc64.c
new file mode 100644
index 0000000..63d9035
--- /dev/null
+++ b/src/crc64.c
@@ -0,0 +1,191 @@
+/* Redis uses the CRC64 variant with "Jones" coefficients and init value of 0.
+ *
+ * Specification of this CRC64 variant follows:
+ * Name: crc-64-jones
+ * Width: 64 bites
+ * Poly: 0xad93d23594c935a9
+ * Reflected In: True
+ * Xor_In: 0xffffffffffffffff
+ * Reflected_Out: True
+ * Xor_Out: 0x0
+ * Check("123456789"): 0xe9c6d914c4b8d9ca
+ *
+ * Copyright (c) 2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE. */
+
+#include <stdint.h>
+
+static const uint64_t crc64_tab[256] = {
+ UINT64_C(0x0000000000000000), UINT64_C(0x7ad870c830358979),
+ UINT64_C(0xf5b0e190606b12f2), UINT64_C(0x8f689158505e9b8b),
+ UINT64_C(0xc038e5739841b68f), UINT64_C(0xbae095bba8743ff6),
+ UINT64_C(0x358804e3f82aa47d), UINT64_C(0x4f50742bc81f2d04),
+ UINT64_C(0xab28ecb46814fe75), UINT64_C(0xd1f09c7c5821770c),
+ UINT64_C(0x5e980d24087fec87), UINT64_C(0x24407dec384a65fe),
+ UINT64_C(0x6b1009c7f05548fa), UINT64_C(0x11c8790fc060c183),
+ UINT64_C(0x9ea0e857903e5a08), UINT64_C(0xe478989fa00bd371),
+ UINT64_C(0x7d08ff3b88be6f81), UINT64_C(0x07d08ff3b88be6f8),
+ UINT64_C(0x88b81eabe8d57d73), UINT64_C(0xf2606e63d8e0f40a),
+ UINT64_C(0xbd301a4810ffd90e), UINT64_C(0xc7e86a8020ca5077),
+ UINT64_C(0x4880fbd87094cbfc), UINT64_C(0x32588b1040a14285),
+ UINT64_C(0xd620138fe0aa91f4), UINT64_C(0xacf86347d09f188d),
+ UINT64_C(0x2390f21f80c18306), UINT64_C(0x594882d7b0f40a7f),
+ UINT64_C(0x1618f6fc78eb277b), UINT64_C(0x6cc0863448deae02),
+ UINT64_C(0xe3a8176c18803589), UINT64_C(0x997067a428b5bcf0),
+ UINT64_C(0xfa11fe77117cdf02), UINT64_C(0x80c98ebf2149567b),
+ UINT64_C(0x0fa11fe77117cdf0), UINT64_C(0x75796f2f41224489),
+ UINT64_C(0x3a291b04893d698d), UINT64_C(0x40f16bccb908e0f4),
+ UINT64_C(0xcf99fa94e9567b7f), UINT64_C(0xb5418a5cd963f206),
+ UINT64_C(0x513912c379682177), UINT64_C(0x2be1620b495da80e),
+ UINT64_C(0xa489f35319033385), UINT64_C(0xde51839b2936bafc),
+ UINT64_C(0x9101f7b0e12997f8), UINT64_C(0xebd98778d11c1e81),
+ UINT64_C(0x64b116208142850a), UINT64_C(0x1e6966e8b1770c73),
+ UINT64_C(0x8719014c99c2b083), UINT64_C(0xfdc17184a9f739fa),
+ UINT64_C(0x72a9e0dcf9a9a271), UINT64_C(0x08719014c99c2b08),
+ UINT64_C(0x4721e43f0183060c), UINT64_C(0x3df994f731b68f75),
+ UINT64_C(0xb29105af61e814fe), UINT64_C(0xc849756751dd9d87),
+ UINT64_C(0x2c31edf8f1d64ef6), UINT64_C(0x56e99d30c1e3c78f),
+ UINT64_C(0xd9810c6891bd5c04), UINT64_C(0xa3597ca0a188d57d),
+ UINT64_C(0xec09088b6997f879), UINT64_C(0x96d1784359a27100),
+ UINT64_C(0x19b9e91b09fcea8b), UINT64_C(0x636199d339c963f2),
+ UINT64_C(0xdf7adabd7a6e2d6f), UINT64_C(0xa5a2aa754a5ba416),
+ UINT64_C(0x2aca3b2d1a053f9d), UINT64_C(0x50124be52a30b6e4),
+ UINT64_C(0x1f423fcee22f9be0), UINT64_C(0x659a4f06d21a1299),
+ UINT64_C(0xeaf2de5e82448912), UINT64_C(0x902aae96b271006b),
+ UINT64_C(0x74523609127ad31a), UINT64_C(0x0e8a46c1224f5a63),
+ UINT64_C(0x81e2d7997211c1e8), UINT64_C(0xfb3aa75142244891),
+ UINT64_C(0xb46ad37a8a3b6595), UINT64_C(0xceb2a3b2ba0eecec),
+ UINT64_C(0x41da32eaea507767), UINT64_C(0x3b024222da65fe1e),
+ UINT64_C(0xa2722586f2d042ee), UINT64_C(0xd8aa554ec2e5cb97),
+ UINT64_C(0x57c2c41692bb501c), UINT64_C(0x2d1ab4dea28ed965),
+ UINT64_C(0x624ac0f56a91f461), UINT64_C(0x1892b03d5aa47d18),
+ UINT64_C(0x97fa21650afae693), UINT64_C(0xed2251ad3acf6fea),
+ UINT64_C(0x095ac9329ac4bc9b), UINT64_C(0x7382b9faaaf135e2),
+ UINT64_C(0xfcea28a2faafae69), UINT64_C(0x8632586aca9a2710),
+ UINT64_C(0xc9622c4102850a14), UINT64_C(0xb3ba5c8932b0836d),
+ UINT64_C(0x3cd2cdd162ee18e6), UINT64_C(0x460abd1952db919f),
+ UINT64_C(0x256b24ca6b12f26d), UINT64_C(0x5fb354025b277b14),
+ UINT64_C(0xd0dbc55a0b79e09f), UINT64_C(0xaa03b5923b4c69e6),
+ UINT64_C(0xe553c1b9f35344e2), UINT64_C(0x9f8bb171c366cd9b),
+ UINT64_C(0x10e3202993385610), UINT64_C(0x6a3b50e1a30ddf69),
+ UINT64_C(0x8e43c87e03060c18), UINT64_C(0xf49bb8b633338561),
+ UINT64_C(0x7bf329ee636d1eea), UINT64_C(0x012b592653589793),
+ UINT64_C(0x4e7b2d0d9b47ba97), UINT64_C(0x34a35dc5ab7233ee),
+ UINT64_C(0xbbcbcc9dfb2ca865), UINT64_C(0xc113bc55cb19211c),
+ UINT64_C(0x5863dbf1e3ac9dec), UINT64_C(0x22bbab39d3991495),
+ UINT64_C(0xadd33a6183c78f1e), UINT64_C(0xd70b4aa9b3f20667),
+ UINT64_C(0x985b3e827bed2b63), UINT64_C(0xe2834e4a4bd8a21a),
+ UINT64_C(0x6debdf121b863991), UINT64_C(0x1733afda2bb3b0e8),
+ UINT64_C(0xf34b37458bb86399), UINT64_C(0x8993478dbb8deae0),
+ UINT64_C(0x06fbd6d5ebd3716b), UINT64_C(0x7c23a61ddbe6f812),
+ UINT64_C(0x3373d23613f9d516), UINT64_C(0x49aba2fe23cc5c6f),
+ UINT64_C(0xc6c333a67392c7e4), UINT64_C(0xbc1b436e43a74e9d),
+ UINT64_C(0x95ac9329ac4bc9b5), UINT64_C(0xef74e3e19c7e40cc),
+ UINT64_C(0x601c72b9cc20db47), UINT64_C(0x1ac40271fc15523e),
+ UINT64_C(0x5594765a340a7f3a), UINT64_C(0x2f4c0692043ff643),
+ UINT64_C(0xa02497ca54616dc8), UINT64_C(0xdafce7026454e4b1),
+ UINT64_C(0x3e847f9dc45f37c0), UINT64_C(0x445c0f55f46abeb9),
+ UINT64_C(0xcb349e0da4342532), UINT64_C(0xb1eceec59401ac4b),
+ UINT64_C(0xfebc9aee5c1e814f), UINT64_C(0x8464ea266c2b0836),
+ UINT64_C(0x0b0c7b7e3c7593bd), UINT64_C(0x71d40bb60c401ac4),
+ UINT64_C(0xe8a46c1224f5a634), UINT64_C(0x927c1cda14c02f4d),
+ UINT64_C(0x1d148d82449eb4c6), UINT64_C(0x67ccfd4a74ab3dbf),
+ UINT64_C(0x289c8961bcb410bb), UINT64_C(0x5244f9a98c8199c2),
+ UINT64_C(0xdd2c68f1dcdf0249), UINT64_C(0xa7f41839ecea8b30),
+ UINT64_C(0x438c80a64ce15841), UINT64_C(0x3954f06e7cd4d138),
+ UINT64_C(0xb63c61362c8a4ab3), UINT64_C(0xcce411fe1cbfc3ca),
+ UINT64_C(0x83b465d5d4a0eece), UINT64_C(0xf96c151de49567b7),
+ UINT64_C(0x76048445b4cbfc3c), UINT64_C(0x0cdcf48d84fe7545),
+ UINT64_C(0x6fbd6d5ebd3716b7), UINT64_C(0x15651d968d029fce),
+ UINT64_C(0x9a0d8ccedd5c0445), UINT64_C(0xe0d5fc06ed698d3c),
+ UINT64_C(0xaf85882d2576a038), UINT64_C(0xd55df8e515432941),
+ UINT64_C(0x5a3569bd451db2ca), UINT64_C(0x20ed197575283bb3),
+ UINT64_C(0xc49581ead523e8c2), UINT64_C(0xbe4df122e51661bb),
+ UINT64_C(0x3125607ab548fa30), UINT64_C(0x4bfd10b2857d7349),
+ UINT64_C(0x04ad64994d625e4d), UINT64_C(0x7e7514517d57d734),
+ UINT64_C(0xf11d85092d094cbf), UINT64_C(0x8bc5f5c11d3cc5c6),
+ UINT64_C(0x12b5926535897936), UINT64_C(0x686de2ad05bcf04f),
+ UINT64_C(0xe70573f555e26bc4), UINT64_C(0x9ddd033d65d7e2bd),
+ UINT64_C(0xd28d7716adc8cfb9), UINT64_C(0xa85507de9dfd46c0),
+ UINT64_C(0x273d9686cda3dd4b), UINT64_C(0x5de5e64efd965432),
+ UINT64_C(0xb99d7ed15d9d8743), UINT64_C(0xc3450e196da80e3a),
+ UINT64_C(0x4c2d9f413df695b1), UINT64_C(0x36f5ef890dc31cc8),
+ UINT64_C(0x79a59ba2c5dc31cc), UINT64_C(0x037deb6af5e9b8b5),
+ UINT64_C(0x8c157a32a5b7233e), UINT64_C(0xf6cd0afa9582aa47),
+ UINT64_C(0x4ad64994d625e4da), UINT64_C(0x300e395ce6106da3),
+ UINT64_C(0xbf66a804b64ef628), UINT64_C(0xc5bed8cc867b7f51),
+ UINT64_C(0x8aeeace74e645255), UINT64_C(0xf036dc2f7e51db2c),
+ UINT64_C(0x7f5e4d772e0f40a7), UINT64_C(0x05863dbf1e3ac9de),
+ UINT64_C(0xe1fea520be311aaf), UINT64_C(0x9b26d5e88e0493d6),
+ UINT64_C(0x144e44b0de5a085d), UINT64_C(0x6e963478ee6f8124),
+ UINT64_C(0x21c640532670ac20), UINT64_C(0x5b1e309b16452559),
+ UINT64_C(0xd476a1c3461bbed2), UINT64_C(0xaeaed10b762e37ab),
+ UINT64_C(0x37deb6af5e9b8b5b), UINT64_C(0x4d06c6676eae0222),
+ UINT64_C(0xc26e573f3ef099a9), UINT64_C(0xb8b627f70ec510d0),
+ UINT64_C(0xf7e653dcc6da3dd4), UINT64_C(0x8d3e2314f6efb4ad),
+ UINT64_C(0x0256b24ca6b12f26), UINT64_C(0x788ec2849684a65f),
+ UINT64_C(0x9cf65a1b368f752e), UINT64_C(0xe62e2ad306bafc57),
+ UINT64_C(0x6946bb8b56e467dc), UINT64_C(0x139ecb4366d1eea5),
+ UINT64_C(0x5ccebf68aecec3a1), UINT64_C(0x2616cfa09efb4ad8),
+ UINT64_C(0xa97e5ef8cea5d153), UINT64_C(0xd3a62e30fe90582a),
+ UINT64_C(0xb0c7b7e3c7593bd8), UINT64_C(0xca1fc72bf76cb2a1),
+ UINT64_C(0x45775673a732292a), UINT64_C(0x3faf26bb9707a053),
+ UINT64_C(0x70ff52905f188d57), UINT64_C(0x0a2722586f2d042e),
+ UINT64_C(0x854fb3003f739fa5), UINT64_C(0xff97c3c80f4616dc),
+ UINT64_C(0x1bef5b57af4dc5ad), UINT64_C(0x61372b9f9f784cd4),
+ UINT64_C(0xee5fbac7cf26d75f), UINT64_C(0x9487ca0fff135e26),
+ UINT64_C(0xdbd7be24370c7322), UINT64_C(0xa10fceec0739fa5b),
+ UINT64_C(0x2e675fb4576761d0), UINT64_C(0x54bf2f7c6752e8a9),
+ UINT64_C(0xcdcf48d84fe75459), UINT64_C(0xb71738107fd2dd20),
+ UINT64_C(0x387fa9482f8c46ab), UINT64_C(0x42a7d9801fb9cfd2),
+ UINT64_C(0x0df7adabd7a6e2d6), UINT64_C(0x772fdd63e7936baf),
+ UINT64_C(0xf8474c3bb7cdf024), UINT64_C(0x829f3cf387f8795d),
+ UINT64_C(0x66e7a46c27f3aa2c), UINT64_C(0x1c3fd4a417c62355),
+ UINT64_C(0x935745fc4798b8de), UINT64_C(0xe98f353477ad31a7),
+ UINT64_C(0xa6df411fbfb21ca3), UINT64_C(0xdc0731d78f8795da),
+ UINT64_C(0x536fa08fdfd90e51), UINT64_C(0x29b7d047efec8728),
+};
+
+uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) {
+ uint64_t j;
+
+ for (j = 0; j < l; j++) {
+ uint8_t byte = s[j];
+ crc = crc64_tab[(uint8_t)crc ^ byte] ^ (crc >> 8);
+ }
+ return crc;
+}
+
+/* Test main */
+#ifdef TEST_MAIN
+#include <stdio.h>
+int main(void) {
+ printf("e9c6d914c4b8d9ca == %016llx\n",
+ (unsigned long long) crc64(0,(unsigned char*)"123456789",9));
+ return 0;
+}
+#endif \ No newline at end of file
diff --git a/src/database.c b/src/database.c
new file mode 100644
index 0000000..521b0a1
--- /dev/null
+++ b/src/database.c
@@ -0,0 +1,179 @@
+#include "db.h"
+
+
+database *newDatabase(char *name){
+ database *db = malloc(sizeof(database));
+ memcpy(db->name, name, len(name)+1);
+ db->lfiles = newLtable(0);
+ db->ltags = newLtable(0);
+ db->hfiles = newHtable(0);
+ db->htags = newHtable(0);
+ db->map = newMtable(0);
+ return db;
+}
+
+database *loadDatabase(const char* path){
+ FILE *fp = fopen(path, "rb");
+ char *header = calloc(2, sizeof(char));
+ fread(header, sizeof(char), 2, fp);
+ if(!sameStr(header, "DB")){
+ printf("header is %c%c and not DB\n", header[0], header[1]);
+ }
+ char name[32];
+ fread(&name, sizeof(char), 32, fp);
+ database *db = newDatabase(name);
+ db->lfiles = loadLtable(fp);
+ db->ltags = loadLtable(fp);
+ db->hfiles = loadHtable(fp);
+ db->htags = loadHtable(fp);
+ db->map = loadMtable(fp);
+ char end[4];
+ fread(&end, sizeof(char), 3, fp);
+ if(!sameStr(end, "END")){
+ printf("end is %s and not END\n", end);
+ }
+ fclose(fp);
+ return db;
+}
+
+int storeDatabase(database *db, const char *path){
+ FILE *fp = fopen(path, "wb");
+
+ char header[2] = "DB";
+ fwrite(header, sizeof(char), 2, fp);
+ fwrite(db->name, sizeof(char), 32, fp);
+
+ storeLtable(db->lfiles, fp);
+ storeLtable(db->ltags, fp);
+ storeHtable(db->hfiles, fp);
+ storeHtable(db->htags, fp);
+ storeMtable(db->map, fp);
+
+ char end[3] = "END";
+ fwrite(end, sizeof(char), 3, fp);
+
+ fclose(fp);
+}
+
+static int addRelation(database *db, relation r){
+ if(mtableSearch(db->map, r) != -1){
+ return -1;
+ }
+ mtableAdd(db->map, r);
+
+ return 0;
+}
+
+int addFileTag(database *db, char *file, char *tag){
+ uint32_t lf, lt;
+ file = normalizeStrLimit(file, &lf, MAXPATH-1);
+ tag = normalizeStrLimit(tag, &lt, MAXPATH-1);
+ uint64_t hf = crc64(0, file, lf), ht = crc64(0, tag, lt);
+ uint64_t fi = htableSearch(db->hfiles, hf), ti = htableSearch(db->htags, ht);
+
+ if(fi == -1){
+ ltableAdd(db->lfiles, file);
+ htableAdd(db->hfiles, hf);
+ fi = db->hfiles->size-1;
+ }
+ if(ti == -1){
+ ltableAdd(db->ltags, tag);
+ htableAdd(db->htags, ht);
+ ti = db->htags->size-1;
+ }
+
+ addRelation(db, (relation){.file = fi, .tag = ti});
+ return 0;
+}
+
+int addFileTags(database *db, char *file, int ntags, ...){
+ uint32_t lf;
+ file = normalizeStrLimit(file, &lf, MAXPATH-1);
+ uint64_t hf = crc64(0, file, lf);
+ uint64_t fi = htableSearch(db->hfiles, hf);
+
+ if(fi == -1){
+ ltableAdd(db->lfiles, file);
+ htableAdd(db->hfiles, hf);
+ fi = db->hfiles->size-1;
+ }
+
+ va_list tags;
+ va_start(tags, ntags);
+ for(uint64_t i = 0; i < ntags; ++i){
+ char *tag = va_arg(tags, char*);
+ uint32_t lt;
+ tag = normalizeStrLimit(tag, &lt, MAXPATH-1);
+ uint64_t ht = crc64(0, tag, lt);
+ uint64_t ti = htableSearch(db->htags, ht);
+
+ if(ti == -1){
+ ltableAdd(db->ltags, tag);
+ htableAdd(db->htags, ht);
+ ti = db->htags->size-1;
+ }
+
+ addRelation(db, (relation){.file = fi, .tag = ti});
+ }
+ va_end(tags);
+
+ return 0;
+}
+
+// Should return a list with the indexes of the files that have this tag
+int searchTag(database *db, char *tag, uint64_t *rl){
+ uint32_t l;
+ tag = normalizeStrLimit(tag, &l, MAXPATH-1);
+ uint64_t h = crc64(0, tag, l);
+ uint64_t ti = htableSearch(db->htags, h);
+ // TODO: error checking
+
+ uint64_t c = 0;
+ for(uint64_t i = 0; i < db->map->size; ++i){
+ if(db->map->table[i].tag == ti){
+ ++c;
+ }
+ }
+ uint64_t *r = malloc(c*sizeof(uint64_t));
+ c = 0;
+ for(uint64_t i = 0; i < db->map->size; ++i){
+ if(db->map->table[i].tag == ti){
+ r[c++] = db->map->table[i].file;
+ }
+ }
+ rl = r;
+ return 0;
+}
+
+void printDatabase(database *db){
+ for(uint64_t i = 0; i < db->map->size; ++i){
+ printf("%s -> %s\n", db->lfiles->table[db->map->table[i].file], db->ltags->table[db->map->table[i].tag]);
+ }
+ printf("\n");
+}
+
+void debugDatabase(database *db){
+ printf("\n");
+ printf("Name: %s\n", db->name);
+ printf("\t-lfiles: %d\n", db->lfiles->size);
+ for(uint64_t i = 0; i < db->lfiles->size; ++i){
+ printf("\t\t+%s\n", db->lfiles->table[i]);
+ }
+ printf("\t-ltags: %d\n", db->ltags->size);
+ for(uint64_t i = 0; i < db->ltags->size; ++i){
+ printf("\t\t+%s\n", db->ltags->table[i]);
+ }
+ printf("\t-hfiles: %d\n", db->hfiles->size);
+ for(uint64_t i = 0; i < db->hfiles->size; ++i){
+ printf("\t\t+%" PRIu64 "\n", db->hfiles->table[i]);
+ }
+ printf("\t-htags: %d\n", db->htags->size);
+ for(uint64_t i = 0; i < db->htags->size; ++i){
+ printf("\t\t+%" PRIu64 "\n", db->htags->table[i]);
+ }
+ printf("\t-map: %d\n", db->map->size);
+ for(uint64_t i = 0; i < db->map->size; ++i){
+ printf("\t\t+%" PRIu64 ":%" PRIu64 "\n", db->map->table[i].file, db->map->table[i].tag);
+ }
+ printf("\n");
+}
diff --git a/src/main.c b/src/main.c
index 9ca0eb2..da6e007 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,31 +1,57 @@
#include "db.h"
+#include <time.h>
+void nothing(FILE *fp){
+ uint64_t a = 0xDEADBEEF;
+ fwrite(&a, sizeof(uint64_t), 1, fp);
+}
+
+
+void print(FILE *fp){
+ while(!feof(fp)){
+ uint8_t a;
+ fread(&a, sizeof(uint8_t), 1, fp);
+ printf("%x ", a);
+ }
+}
+
int main(){
- inputBuffer *in = newInputBuffer();
+
+
- row *r = newRow("~/test/img.png");
+ printf("%016llu\n", (uint64_t) crc64(0, (unsigned char*)"cacadevaca", 10));
+
+ inputBuffer *in = newInputBuffer();
+
char *str = "grandmother;football;capital;concerned;entire;realize;garden;refused;proud;tune;rhyme;other;writer;command;fresh;fence;rapidly;active;cover;repeat;determine;yard;cannot;animal;pure;rich;mirror;frozen;vast;coach;brass;activity;bottom;airplane;local;tone;attack;though;between;value;collect;mission;tower;brought;original;history;reason;minute;would;hung;strange;children;offer;blue;wrapped;magnet;color;cage;easily;percent;lower;verb;hundred;larger;away;was;certain;western;yes;lack;wish;same;spend;arrive;fog;heard;bill;effort;steam;wolf;indicate;suppose;because;life;down;seat;age;earn;under;cell;floating;although;spent;folks;swing;hello;cent;swung;pen;happened;slip;pupil;smell;fix;piano;closer;idea;trunk;model;school;particularly;he;coast;describe;such;join;been;hard;three;around;tube;soldier;baby;mouse;note;sort;house;gasoline;organized;eat;sat;crowd;alive;spoken;wide;square;luck;tales;angry;having;wear;frog;outer;nice;regular;year;clothing;check;throughout;farmer;dug;dark;exercise;table;your;form;should;personal;use;road;bright;walk;fairly;affect;but;night;close;job;front;fight;beside;ocean;herd;pass;hardly;widely;prepare;nails;paid;lucky;design;grandfather;aid;heavy;truck;sleep;difficult;log;keep;government;headed;mother;sad;bread;voyage;when;happy;making;whistle;plural;guard;therefore;continent;roof;money;pan;unusual;region;special;generally;plate;visit;look;lost;sick;wonderful;farther;put;characteristic;gravity;trap;system;twice;taste;knew;mad;smallest;automobile;return;huge;underline;danger;news;electric;information;breeze;thread;equally;five;new;average;former;wild;spend;cabin;recognize;nearest;circle;such;found;pass;whistle;slave;event;knowledge;fear;friend;am;browserling;cry;length;thy;create;busy;office;earth;blind;smallest;birthday;putting;classroom;pen;southern;summer;put;open;solution;spread;equator;else;kitchen;determine;strong;change;world;pocket;claws;earn;excellent;drove;donkey;rush;band;energy;fighting;hurt;ordinary;native;visitor;give;storm;pressure;imagine;street;engine;worth;hospital;attached;subject;perhaps;hospital;living;waste;dark;natural;change;enter;girl;motor;element;experiment;physical;value;excited;fort;layers;buy;minerals;satisfied;next;spirit;unhappy;storm;angry;science;desk;develop;behind;afraid;act;else;prepare;given;raw;affect;husband;ring;older;brought;book;cow;lake;sides;ago;fill;successful;real;aside;taught;mind;straight;date;very;chart;slabs;thin;saddle;full;sort;heard;surprise;fox;cool;dish;alphabet;early;spring;nest;sometime;date;light;break;lion;difference;rhyme;might;step;teach;potatoes;young;nine;liquid;how;lunch;heavy;mass;being;save;cutting;negative;swimming;cutting;journey;army;none;worry;leave;explore;baseball;fight;road;exact;hay;voyage;sheet;test;right;examine;agree;heart;pig;cannot;tool;hill;changing;bee;find;together;lay;tie;lost;continued;then;came;rhyme;mirror;town;substance;both;up;quite;push;shake;solid;result;you;ought;chicken;waste;freedom;why;somehow;not;complete;sick;struggle;military;pure;top;south;step;education;could;between;familiar;recognize;rich;tool;material;were;chicken;stopped;stay;policeman;round;firm";
- int l = len(str);
- printf("len: %d\n", l);
- //measure these two
- int a = strInTags(str, l, "percent", 7, ';');
- //int a = BM("percent", 7, str, l);
+ /*database *db = newDatabase("miDB");
+
+ addFileTag(db, "vaca.png", "naturaleza");
+ addFileTags(db, "donald-tromp.jpg", 3, "based", "hitler", "very cool");
+
+ storeDatabase(db, "db.db");
+ */
+ database *db = loadDatabase("db.db");
+ printDatabase(db);
+ debugDatabase(db);
while(0){
prompt();
getInput(in);
+ /*
insertTag(r, in->buffer);
printf("Tags of row '%s': %s\n", r->path, r->tags);
printf("Number of tags: %u. Length of tags: %u\n", r->numTags, r->lenTags);
+ */
/*switch(handleInput(in)){
case META_COMMAND_SUCCESS:
diff --git a/src/storage.c b/src/storage.c
index 868e0b2..930f636 100644
--- a/src/storage.c
+++ b/src/storage.c
@@ -1,18 +1,234 @@
#include "db.h"
-row *newRow(const char path[MAXPATH]){
- row *nr = malloc(sizeof(row));
- memcpy(nr->path, path, len(path));
- nr->numTags = 0;
- nr->lenTags = 0;
+ltable *newLtable(uint64_t size){
+ ltable *lt = malloc(sizeof(ltable));
+ size = (((uint64_t)size) < 0) ? 0 : size;
+ lt->size = size;
+ lt->table = malloc(size*sizeof(char*));
+ return lt;
+}
+
+ltable *loadLtable(FILE *fp){
+ char header;
+ fread(&header, sizeof(char), 1, fp);
+ if(header != 'L'){
+ printf("header is %c not L\n", header);
+ }
+ uint64_t size;
+ fread(&size, sizeof(uint64_t), 1, fp);
+ ltable *lt = newLtable(size);
+ for(uint64_t i = 0; i < lt->size; ++i){
+ uint32_t sl;
+ fread(&sl, sizeof(uint32_t), 1, fp);
+ lt->table[i] = malloc(sl*sizeof(char));
+ fread(lt->table[i], sizeof(char), sl, fp);
+ }
+ char end;
+ fread(&end, sizeof(char), 1, fp);
+ if(end != 'E'){
+ printf("end is %c not E\n", end);
+ }
+ return lt;
+}
+
+int storeLtable(const ltable *lt, FILE *fp){
+ char header = 'L';
+ fwrite(&header, sizeof(char), 1, fp);
+ fwrite(&lt->size, sizeof(uint64_t), 1, fp);
+ for(uint64_t i = 0; i < lt->size; ++i){
+ uint32_t l = len(lt->table[i]) + 1;
+ fwrite(&l, sizeof(uint32_t), 1, fp);
+ fwrite(lt->table[i], sizeof(char), l, fp);
+ }
+ char end = 'E';
+ fwrite(&end, sizeof(char), 1, fp);
+ return 0;
+}
+
+int ltableAdd(ltable *lt, char *str){
+ uint32_t ls;
+ str = normalizeStrLimit(str, &ls, MAXPATH-1);
+
+ char **nlt = malloc((lt->size+1)*sizeof(char*));
+ for(uint64_t i = 0; i < lt->size; ++i){
+ if(sameStr(str, lt->table[i])){
+ return -1;
+ }
+ uint32_t l = len(lt->table[i]);
+ nlt[i] = malloc((l+1)*sizeof(char));
+ memcpy(nlt[i], lt->table[i], l+1);
+ }
+ nlt[lt->size] = malloc((ls+1)*sizeof(char));
+ memcpy(nlt[lt->size], str, ls+1);
+
+ lt->size++;
+ lt->table = malloc(lt->size*sizeof(char*));
+ for(uint64_t i = 0; i < lt->size; ++i){
+ uint32_t l = len(nlt[i]);
+ lt->table[i] = malloc((l+1)*sizeof(char));
+ memcpy(lt->table[i], nlt[i], l+1);
+ }
+ return 0;
+}
+
+uint64_t ltableSearch(ltable *lt, char *str){
+ uint32_t l;
+ str = normalizeStrLimit(str, &l, MAXPATH-1);
+
+ for(uint64_t i = 0; i < lt->size; ++i){
+ if(sameStr(str, lt->table[i])){
+ return i;
+ }
+ }
+ return -1;
+}
+
+htable *newHtable(uint64_t size){
+ htable *ht = malloc(sizeof(htable));
+ size = (((uint64_t)size) < 0) ? 0 : size;
+ ht->size = size;
+ ht->table = malloc(size*sizeof(uint64_t));
+ return ht;
+}
+
+htable *loadHtable(FILE *fp){
+ char header;
+ fread(&header, sizeof(char), 1, fp);
+ if(header != 'H'){
+ printf("header is %c not H\n", header);
+ }
+ uint64_t size;
+ fread(&size, sizeof(uint64_t), 1, fp);
+ htable *ht = newHtable(size);
+ for(uint64_t i = 0; i < ht->size; ++i){
+ fread(&ht->table[i], sizeof(uint64_t), 1, fp);
+ }
+ char end;
+ fread(&end, sizeof(char), 1, fp);
+ if(end != 'E'){
+ printf("end is %c not E\n", end);
+ }
+ return ht;
+}
+
+int storeHtable(const htable *ht, FILE *fp){
+ char header = 'H';
+ fwrite(&header, sizeof(char), 1, fp);
+ fwrite(&ht->size, sizeof(uint64_t), 1, fp);
+ for(uint64_t i = 0; i < ht->size; ++i){
+ fwrite(&ht->table[i], sizeof(uint64_t), 1, fp);
+ }
+ char end = 'E';
+ fwrite(&end, sizeof(char), 1, fp);
+ return 0;
+}
+
+int htableAdd(htable *ht, uint64_t h){
+ uint64_t *nht = malloc((ht->size+1)*sizeof(uint64_t));
+ for(uint64_t i = 0; i < ht->size; ++i){
+ if(h == ht->table[i]){
+ return -1;
+ }
+ nht[i] = ht->table[i];
+ }
+ nht[ht->size] = h;
+
+ ht->size++;
+ ht->table = malloc(ht->size*sizeof(uint64_t));
+ for(uint64_t i = 0; i < ht->size; ++i){
+ ht->table[i] = nht[i];
+ }
+ return 0;
+}
+
+uint64_t htableSearch(htable *ht, uint64_t h){
+ for(uint64_t i = 0; i < ht->size; ++i){
+ if(h == ht->table[i]){
+ return i;
+ }
+ }
+ return -1;
+}
+
+mtable *newMtable(uint64_t size){
+ mtable *mt = malloc(sizeof(mtable));
+ size = (((uint64_t)size) < 0) ? 0 : size;
+ mt->size = size;
+ mt->table = malloc(size*sizeof(relation));
+ return mt;
+}
+
+mtable *loadMtable(FILE *fp){
+ char header;
+ fread(&header, sizeof(char), 1, fp);
+ if(header != 'M'){
+ printf("header is %c not M\n", header);
+ }
+ uint64_t size;
+ fread(&size, sizeof(uint64_t), 1, fp);
+ mtable *mt = newMtable(size);
+ for(uint64_t i = 0; i < mt->size; ++i){
+ fread(&mt->table[i].file, sizeof(uint64_t), 1, fp);
+ fread(&mt->table[i].tag, sizeof(uint64_t), 1, fp);
+ }
+ char end;
+ fread(&end, sizeof(char), 1, fp);
+ if(end != 'E'){
+ printf("end is %c not E\n", end);
+ }
+ return mt;
+}
- return nr;
+int storeMtable(const mtable *mt, FILE *fp){
+ char header = 'M';
+ fwrite(&header, sizeof(char), 1, fp);
+ fwrite(&mt->size, sizeof(uint64_t), 1, fp);
+ for(uint64_t i = 0; i < mt->size; ++i){
+ fwrite(&mt->table[i].file, sizeof(uint64_t), 1, fp);
+ fwrite(&mt->table[i].tag, sizeof(uint64_t), 1, fp);
+ }
+ char end = 'E';
+ fwrite(&end, sizeof(char), 1, fp);
+ return 0;
}
+int mtableAdd(mtable *mt, relation r){
+ relation *nmt = malloc((mt->size+1)*sizeof(relation));
+ for(uint64_t i = 0; i < mt->size; ++i){
+ if(r.file == mt->table[i].file && r.tag == mt->table[i].tag){
+ return -1;
+ }
+ nmt[i] = mt->table[i];
+ }
+ nmt[mt->size] = r;
+
+ mt->size++;
+ mt->table = malloc(mt->size*sizeof(relation));
+ for(uint64_t i = 0; i < mt->size; ++i){
+ mt->table[i] = nmt[i];
+ }
+ return 0;
+}
+
+uint64_t mtableSearch(mtable *mt, relation r){
+ for(uint64_t i = 0; i < mt->size; ++i){
+ if(r.file == mt->table[i].file && r.tag == mt->table[i].tag){
+ return i;
+ }
+ }
+ return -1;
+}
+
+
+
+
+/*
+// TODO: remove old impl
+
// Splits src into words based on a separator character (sep) and stores them in arr,
// and the length in len. Inspired by https://github.com/joshdk/tag/blob/master/src/dsv.c's split
-static void split(const char *src, char sep, char ***arr, uint16_t *len){
+static void split(const char *src, char sep, char ***arr, uint32_t *len){
int slen = 0, ai = 0, wnum = 0, wlen = 0;
while(src[slen] != '\0'){
@@ -48,26 +264,12 @@ static void swapWords(char ***arr, int a, int b){
(*arr)[b] = tmp;
}
-static char *normalizeTag(char *tag, uint16_t *ln){
- uint16_t l = len(tag);
- char *ntag = calloc(l+1, sizeof(char));
- for(int i = 0; i < l; ++i){
- ntag[i] = tolower(tag[i]);
- if(i == l-1 && tag[i] == ' '){
- ntag[i] = '\0';
- --l;
- }
- }
- *ln = l;
- return ntag;
-}
-
// Adds a tag in the tags array in the row r, sorted by natural string
// comparison with strnatcmp. We assume that when adding a tag all other
// tags are already sorted. Nothing is done if the tag is already in the tags
void insertTag(row *r, char *tag){
- uint16_t l, ltag;
- tag = normalizeTag(tag, &ltag);
+ uint32_t l, ltag;
+ tag = normalizeStr(tag, &ltag);
if(ltag == 0){
return;
@@ -130,8 +332,8 @@ void insertTag(row *r, char *tag){
// Remove a tag from the tags array in the row r
// Nothing is done if the tag isnt in the tags
void removeTag(row *r, char *tag){
- uint16_t l, ltag;
- tag = normalizeTag(tag, &ltag);
+ uint32_t l, ltag;
+ tag = normalizeStr(tag, &ltag);
if(ltag == 0){
return;
@@ -166,4 +368,5 @@ void removeTag(row *r, char *tag){
r->tags[tagnum] = '\0';
r->numTags = l;
r->lenTags = tagnum;
-} \ No newline at end of file
+}
+*/
diff --git a/src/str.c b/src/str.c
index ef2edad..d896868 100644
--- a/src/str.c
+++ b/src/str.c
@@ -1,24 +1,54 @@
#include "db.h"
-uint16_t len(const char *s){
- uint16_t l = -1;
+uint32_t len(const char *s){
+ uint32_t l = -1;
while(s[++l]);
return l;
}
bool sameStr(const char *s1, const char *s2){
- uint16_t i1 = 0, i2 = 0;
+ uint32_t i1 = 0, i2 = 0;
while(s1[i1] && s1[i1] == s2[i2])
++i1, ++i2;
return !s1[i1] && !s2[i2];
}
+// Lowercases the whole string and removes trailing spaces
+char *normalizeStr(const char *str, uint32_t *l){
+ *l = len(str);
+ uint32_t trw = 0;
+ while(isspace(str[--(*l)]))
+ ++trw;
+ char *nstr = calloc(++(*l)+1, sizeof(char));
+ for(int i = 0; i < *l; ++i)
+ nstr[i] = tolower(str[i]);
+ return nstr;
+}
+
+// Same as normalizeStr but with a limit (str[limit] will be equal to '\0')
+// If limit is 0, it will return NULL
+// WARNING: It allocates limit+1 characters
+char *normalizeStrLimit(const char *str, uint32_t *l, uint32_t limit){
+ if(limit == 0){
+ return NULL;
+ }
+ *l = len(str);
+ *l = (*l > limit) ? limit : *l;
+ uint32_t trw = 0;
+ while(isspace(str[--(*l)]))
+ ++trw;
+ char *nstr = calloc(++(*l)+1, sizeof(char));
+ for(int i = 0; i < *l; ++i)
+ nstr[i] = tolower(str[i]);
+ return nstr;
+}
+
// Auxiliary function for creating a lookup table of the haystack
// table[i] will be the number of shifts right until the next
// separator when checking position i
// Only really useful for this implementation of tags
-static int *table(const char *y, int n, char sep){
+static int *toTable(const char *y, int n, char sep){
int *tb = calloc(n, sizeof(int));
if(tb == NULL){
fprintf(stderr, "Error callocating array (table)");
@@ -41,7 +71,7 @@ static int *table(const char *y, int n, char sep){
// A return of 0 means ndl occurs in tags starting in position 0
// Use 'if(strInTags(...) != -1)' when using this function
ssize_t strInTags(const char *tags, int n, const char *ndl, int m, char sep){
- int *tb = table(tags, n, sep);
+ int *tb = toTable(tags, n, sep);
for(int i = 0; i < n; ){
int j = 0;