diff options
| author | Soikk | 2022-07-23 01:46:24 +0200 |
|---|---|---|
| committer | Soikk | 2022-07-23 01:46:24 +0200 |
| commit | 28578b192d0828a9820983b5624b9bcc3577cd18 (patch) | |
| tree | 2f30b1730f30a7eeee80995ee3984c10f5bdc2ff /src | |
| parent | 377dc104be127291ede5b32640c23eea0ba6791a (diff) | |
| download | soikk-DB-28578b192d0828a9820983b5624b9bcc3577cd18.tar.xz soikk-DB-28578b192d0828a9820983b5624b9bcc3577cd18.tar.zst | |
Improved the database storage system. Added persistency.
Diffstat (limited to 'src')
| -rw-r--r-- | src/bm.c | 6 | ||||
| -rw-r--r-- | src/crc64.c | 191 | ||||
| -rw-r--r-- | src/database.c | 179 | ||||
| -rw-r--r-- | src/main.c | 40 | ||||
| -rw-r--r-- | src/storage.c | 255 | ||||
| -rw-r--r-- | src/str.c | 40 |
6 files changed, 670 insertions, 41 deletions
@@ -32,9 +32,9 @@ ssize_t BM(char *x, int m, char *y, int n){ while(j < n){ k = bmBc[y[j + m -1]]; while(k != 0){ - j += k; k = bmBc[y[j + m -1]]; - //j += k; k = bmBc[y[j + m -1]]; - //j += k; k = bmBc[y[j + m -1]]; + j += k; k = bmBc[y[j + m - 1]]; + j += k; k = bmBc[y[j + m - 1]]; + j += k; k = bmBc[y[j + m - 1]]; } if(memcmp(x, y + j, m) == 0 && j < n) return j; diff --git a/src/crc64.c b/src/crc64.c new file mode 100644 index 0000000..63d9035 --- /dev/null +++ b/src/crc64.c @@ -0,0 +1,191 @@ +/* Redis uses the CRC64 variant with "Jones" coefficients and init value of 0. + * + * Specification of this CRC64 variant follows: + * Name: crc-64-jones + * Width: 64 bites + * Poly: 0xad93d23594c935a9 + * Reflected In: True + * Xor_In: 0xffffffffffffffff + * Reflected_Out: True + * Xor_Out: 0x0 + * Check("123456789"): 0xe9c6d914c4b8d9ca + * + * Copyright (c) 2012, Salvatore Sanfilippo <antirez at gmail dot com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. */ + +#include <stdint.h> + +static const uint64_t crc64_tab[256] = { + UINT64_C(0x0000000000000000), UINT64_C(0x7ad870c830358979), + UINT64_C(0xf5b0e190606b12f2), UINT64_C(0x8f689158505e9b8b), + UINT64_C(0xc038e5739841b68f), UINT64_C(0xbae095bba8743ff6), + UINT64_C(0x358804e3f82aa47d), UINT64_C(0x4f50742bc81f2d04), + UINT64_C(0xab28ecb46814fe75), UINT64_C(0xd1f09c7c5821770c), + UINT64_C(0x5e980d24087fec87), UINT64_C(0x24407dec384a65fe), + UINT64_C(0x6b1009c7f05548fa), UINT64_C(0x11c8790fc060c183), + UINT64_C(0x9ea0e857903e5a08), UINT64_C(0xe478989fa00bd371), + UINT64_C(0x7d08ff3b88be6f81), UINT64_C(0x07d08ff3b88be6f8), + UINT64_C(0x88b81eabe8d57d73), UINT64_C(0xf2606e63d8e0f40a), + UINT64_C(0xbd301a4810ffd90e), UINT64_C(0xc7e86a8020ca5077), + UINT64_C(0x4880fbd87094cbfc), UINT64_C(0x32588b1040a14285), + UINT64_C(0xd620138fe0aa91f4), UINT64_C(0xacf86347d09f188d), + UINT64_C(0x2390f21f80c18306), UINT64_C(0x594882d7b0f40a7f), + UINT64_C(0x1618f6fc78eb277b), UINT64_C(0x6cc0863448deae02), + UINT64_C(0xe3a8176c18803589), UINT64_C(0x997067a428b5bcf0), + UINT64_C(0xfa11fe77117cdf02), UINT64_C(0x80c98ebf2149567b), + UINT64_C(0x0fa11fe77117cdf0), UINT64_C(0x75796f2f41224489), + UINT64_C(0x3a291b04893d698d), UINT64_C(0x40f16bccb908e0f4), + UINT64_C(0xcf99fa94e9567b7f), UINT64_C(0xb5418a5cd963f206), + UINT64_C(0x513912c379682177), UINT64_C(0x2be1620b495da80e), + UINT64_C(0xa489f35319033385), UINT64_C(0xde51839b2936bafc), + UINT64_C(0x9101f7b0e12997f8), UINT64_C(0xebd98778d11c1e81), + UINT64_C(0x64b116208142850a), UINT64_C(0x1e6966e8b1770c73), + UINT64_C(0x8719014c99c2b083), UINT64_C(0xfdc17184a9f739fa), + UINT64_C(0x72a9e0dcf9a9a271), UINT64_C(0x08719014c99c2b08), + UINT64_C(0x4721e43f0183060c), UINT64_C(0x3df994f731b68f75), + UINT64_C(0xb29105af61e814fe), UINT64_C(0xc849756751dd9d87), + UINT64_C(0x2c31edf8f1d64ef6), UINT64_C(0x56e99d30c1e3c78f), + UINT64_C(0xd9810c6891bd5c04), UINT64_C(0xa3597ca0a188d57d), + UINT64_C(0xec09088b6997f879), UINT64_C(0x96d1784359a27100), + UINT64_C(0x19b9e91b09fcea8b), UINT64_C(0x636199d339c963f2), + UINT64_C(0xdf7adabd7a6e2d6f), UINT64_C(0xa5a2aa754a5ba416), + UINT64_C(0x2aca3b2d1a053f9d), UINT64_C(0x50124be52a30b6e4), + UINT64_C(0x1f423fcee22f9be0), UINT64_C(0x659a4f06d21a1299), + UINT64_C(0xeaf2de5e82448912), UINT64_C(0x902aae96b271006b), + UINT64_C(0x74523609127ad31a), UINT64_C(0x0e8a46c1224f5a63), + UINT64_C(0x81e2d7997211c1e8), UINT64_C(0xfb3aa75142244891), + UINT64_C(0xb46ad37a8a3b6595), UINT64_C(0xceb2a3b2ba0eecec), + UINT64_C(0x41da32eaea507767), UINT64_C(0x3b024222da65fe1e), + UINT64_C(0xa2722586f2d042ee), UINT64_C(0xd8aa554ec2e5cb97), + UINT64_C(0x57c2c41692bb501c), UINT64_C(0x2d1ab4dea28ed965), + UINT64_C(0x624ac0f56a91f461), UINT64_C(0x1892b03d5aa47d18), + UINT64_C(0x97fa21650afae693), UINT64_C(0xed2251ad3acf6fea), + UINT64_C(0x095ac9329ac4bc9b), UINT64_C(0x7382b9faaaf135e2), + UINT64_C(0xfcea28a2faafae69), UINT64_C(0x8632586aca9a2710), + UINT64_C(0xc9622c4102850a14), UINT64_C(0xb3ba5c8932b0836d), + UINT64_C(0x3cd2cdd162ee18e6), UINT64_C(0x460abd1952db919f), + UINT64_C(0x256b24ca6b12f26d), UINT64_C(0x5fb354025b277b14), + UINT64_C(0xd0dbc55a0b79e09f), UINT64_C(0xaa03b5923b4c69e6), + UINT64_C(0xe553c1b9f35344e2), UINT64_C(0x9f8bb171c366cd9b), + UINT64_C(0x10e3202993385610), UINT64_C(0x6a3b50e1a30ddf69), + UINT64_C(0x8e43c87e03060c18), UINT64_C(0xf49bb8b633338561), + UINT64_C(0x7bf329ee636d1eea), UINT64_C(0x012b592653589793), + UINT64_C(0x4e7b2d0d9b47ba97), UINT64_C(0x34a35dc5ab7233ee), + UINT64_C(0xbbcbcc9dfb2ca865), UINT64_C(0xc113bc55cb19211c), + UINT64_C(0x5863dbf1e3ac9dec), UINT64_C(0x22bbab39d3991495), + UINT64_C(0xadd33a6183c78f1e), UINT64_C(0xd70b4aa9b3f20667), + UINT64_C(0x985b3e827bed2b63), UINT64_C(0xe2834e4a4bd8a21a), + UINT64_C(0x6debdf121b863991), UINT64_C(0x1733afda2bb3b0e8), + UINT64_C(0xf34b37458bb86399), UINT64_C(0x8993478dbb8deae0), + UINT64_C(0x06fbd6d5ebd3716b), UINT64_C(0x7c23a61ddbe6f812), + UINT64_C(0x3373d23613f9d516), UINT64_C(0x49aba2fe23cc5c6f), + UINT64_C(0xc6c333a67392c7e4), UINT64_C(0xbc1b436e43a74e9d), + UINT64_C(0x95ac9329ac4bc9b5), UINT64_C(0xef74e3e19c7e40cc), + UINT64_C(0x601c72b9cc20db47), UINT64_C(0x1ac40271fc15523e), + UINT64_C(0x5594765a340a7f3a), UINT64_C(0x2f4c0692043ff643), + UINT64_C(0xa02497ca54616dc8), UINT64_C(0xdafce7026454e4b1), + UINT64_C(0x3e847f9dc45f37c0), UINT64_C(0x445c0f55f46abeb9), + UINT64_C(0xcb349e0da4342532), UINT64_C(0xb1eceec59401ac4b), + UINT64_C(0xfebc9aee5c1e814f), UINT64_C(0x8464ea266c2b0836), + UINT64_C(0x0b0c7b7e3c7593bd), UINT64_C(0x71d40bb60c401ac4), + UINT64_C(0xe8a46c1224f5a634), UINT64_C(0x927c1cda14c02f4d), + UINT64_C(0x1d148d82449eb4c6), UINT64_C(0x67ccfd4a74ab3dbf), + UINT64_C(0x289c8961bcb410bb), UINT64_C(0x5244f9a98c8199c2), + UINT64_C(0xdd2c68f1dcdf0249), UINT64_C(0xa7f41839ecea8b30), + UINT64_C(0x438c80a64ce15841), UINT64_C(0x3954f06e7cd4d138), + UINT64_C(0xb63c61362c8a4ab3), UINT64_C(0xcce411fe1cbfc3ca), + UINT64_C(0x83b465d5d4a0eece), UINT64_C(0xf96c151de49567b7), + UINT64_C(0x76048445b4cbfc3c), UINT64_C(0x0cdcf48d84fe7545), + UINT64_C(0x6fbd6d5ebd3716b7), UINT64_C(0x15651d968d029fce), + UINT64_C(0x9a0d8ccedd5c0445), UINT64_C(0xe0d5fc06ed698d3c), + UINT64_C(0xaf85882d2576a038), UINT64_C(0xd55df8e515432941), + UINT64_C(0x5a3569bd451db2ca), UINT64_C(0x20ed197575283bb3), + UINT64_C(0xc49581ead523e8c2), UINT64_C(0xbe4df122e51661bb), + UINT64_C(0x3125607ab548fa30), UINT64_C(0x4bfd10b2857d7349), + UINT64_C(0x04ad64994d625e4d), UINT64_C(0x7e7514517d57d734), + UINT64_C(0xf11d85092d094cbf), UINT64_C(0x8bc5f5c11d3cc5c6), + UINT64_C(0x12b5926535897936), UINT64_C(0x686de2ad05bcf04f), + UINT64_C(0xe70573f555e26bc4), UINT64_C(0x9ddd033d65d7e2bd), + UINT64_C(0xd28d7716adc8cfb9), UINT64_C(0xa85507de9dfd46c0), + UINT64_C(0x273d9686cda3dd4b), UINT64_C(0x5de5e64efd965432), + UINT64_C(0xb99d7ed15d9d8743), UINT64_C(0xc3450e196da80e3a), + UINT64_C(0x4c2d9f413df695b1), UINT64_C(0x36f5ef890dc31cc8), + UINT64_C(0x79a59ba2c5dc31cc), UINT64_C(0x037deb6af5e9b8b5), + UINT64_C(0x8c157a32a5b7233e), UINT64_C(0xf6cd0afa9582aa47), + UINT64_C(0x4ad64994d625e4da), UINT64_C(0x300e395ce6106da3), + UINT64_C(0xbf66a804b64ef628), UINT64_C(0xc5bed8cc867b7f51), + UINT64_C(0x8aeeace74e645255), UINT64_C(0xf036dc2f7e51db2c), + UINT64_C(0x7f5e4d772e0f40a7), UINT64_C(0x05863dbf1e3ac9de), + UINT64_C(0xe1fea520be311aaf), UINT64_C(0x9b26d5e88e0493d6), + UINT64_C(0x144e44b0de5a085d), UINT64_C(0x6e963478ee6f8124), + UINT64_C(0x21c640532670ac20), UINT64_C(0x5b1e309b16452559), + UINT64_C(0xd476a1c3461bbed2), UINT64_C(0xaeaed10b762e37ab), + UINT64_C(0x37deb6af5e9b8b5b), UINT64_C(0x4d06c6676eae0222), + UINT64_C(0xc26e573f3ef099a9), UINT64_C(0xb8b627f70ec510d0), + UINT64_C(0xf7e653dcc6da3dd4), UINT64_C(0x8d3e2314f6efb4ad), + UINT64_C(0x0256b24ca6b12f26), UINT64_C(0x788ec2849684a65f), + UINT64_C(0x9cf65a1b368f752e), UINT64_C(0xe62e2ad306bafc57), + UINT64_C(0x6946bb8b56e467dc), UINT64_C(0x139ecb4366d1eea5), + UINT64_C(0x5ccebf68aecec3a1), UINT64_C(0x2616cfa09efb4ad8), + UINT64_C(0xa97e5ef8cea5d153), UINT64_C(0xd3a62e30fe90582a), + UINT64_C(0xb0c7b7e3c7593bd8), UINT64_C(0xca1fc72bf76cb2a1), + UINT64_C(0x45775673a732292a), UINT64_C(0x3faf26bb9707a053), + UINT64_C(0x70ff52905f188d57), UINT64_C(0x0a2722586f2d042e), + UINT64_C(0x854fb3003f739fa5), UINT64_C(0xff97c3c80f4616dc), + UINT64_C(0x1bef5b57af4dc5ad), UINT64_C(0x61372b9f9f784cd4), + UINT64_C(0xee5fbac7cf26d75f), UINT64_C(0x9487ca0fff135e26), + UINT64_C(0xdbd7be24370c7322), UINT64_C(0xa10fceec0739fa5b), + UINT64_C(0x2e675fb4576761d0), UINT64_C(0x54bf2f7c6752e8a9), + UINT64_C(0xcdcf48d84fe75459), UINT64_C(0xb71738107fd2dd20), + UINT64_C(0x387fa9482f8c46ab), UINT64_C(0x42a7d9801fb9cfd2), + UINT64_C(0x0df7adabd7a6e2d6), UINT64_C(0x772fdd63e7936baf), + UINT64_C(0xf8474c3bb7cdf024), UINT64_C(0x829f3cf387f8795d), + UINT64_C(0x66e7a46c27f3aa2c), UINT64_C(0x1c3fd4a417c62355), + UINT64_C(0x935745fc4798b8de), UINT64_C(0xe98f353477ad31a7), + UINT64_C(0xa6df411fbfb21ca3), UINT64_C(0xdc0731d78f8795da), + UINT64_C(0x536fa08fdfd90e51), UINT64_C(0x29b7d047efec8728), +}; + +uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) { + uint64_t j; + + for (j = 0; j < l; j++) { + uint8_t byte = s[j]; + crc = crc64_tab[(uint8_t)crc ^ byte] ^ (crc >> 8); + } + return crc; +} + +/* Test main */ +#ifdef TEST_MAIN +#include <stdio.h> +int main(void) { + printf("e9c6d914c4b8d9ca == %016llx\n", + (unsigned long long) crc64(0,(unsigned char*)"123456789",9)); + return 0; +} +#endif
\ No newline at end of file diff --git a/src/database.c b/src/database.c new file mode 100644 index 0000000..521b0a1 --- /dev/null +++ b/src/database.c @@ -0,0 +1,179 @@ +#include "db.h" + + +database *newDatabase(char *name){ + database *db = malloc(sizeof(database)); + memcpy(db->name, name, len(name)+1); + db->lfiles = newLtable(0); + db->ltags = newLtable(0); + db->hfiles = newHtable(0); + db->htags = newHtable(0); + db->map = newMtable(0); + return db; +} + +database *loadDatabase(const char* path){ + FILE *fp = fopen(path, "rb"); + char *header = calloc(2, sizeof(char)); + fread(header, sizeof(char), 2, fp); + if(!sameStr(header, "DB")){ + printf("header is %c%c and not DB\n", header[0], header[1]); + } + char name[32]; + fread(&name, sizeof(char), 32, fp); + database *db = newDatabase(name); + db->lfiles = loadLtable(fp); + db->ltags = loadLtable(fp); + db->hfiles = loadHtable(fp); + db->htags = loadHtable(fp); + db->map = loadMtable(fp); + char end[4]; + fread(&end, sizeof(char), 3, fp); + if(!sameStr(end, "END")){ + printf("end is %s and not END\n", end); + } + fclose(fp); + return db; +} + +int storeDatabase(database *db, const char *path){ + FILE *fp = fopen(path, "wb"); + + char header[2] = "DB"; + fwrite(header, sizeof(char), 2, fp); + fwrite(db->name, sizeof(char), 32, fp); + + storeLtable(db->lfiles, fp); + storeLtable(db->ltags, fp); + storeHtable(db->hfiles, fp); + storeHtable(db->htags, fp); + storeMtable(db->map, fp); + + char end[3] = "END"; + fwrite(end, sizeof(char), 3, fp); + + fclose(fp); +} + +static int addRelation(database *db, relation r){ + if(mtableSearch(db->map, r) != -1){ + return -1; + } + mtableAdd(db->map, r); + + return 0; +} + +int addFileTag(database *db, char *file, char *tag){ + uint32_t lf, lt; + file = normalizeStrLimit(file, &lf, MAXPATH-1); + tag = normalizeStrLimit(tag, <, MAXPATH-1); + uint64_t hf = crc64(0, file, lf), ht = crc64(0, tag, lt); + uint64_t fi = htableSearch(db->hfiles, hf), ti = htableSearch(db->htags, ht); + + if(fi == -1){ + ltableAdd(db->lfiles, file); + htableAdd(db->hfiles, hf); + fi = db->hfiles->size-1; + } + if(ti == -1){ + ltableAdd(db->ltags, tag); + htableAdd(db->htags, ht); + ti = db->htags->size-1; + } + + addRelation(db, (relation){.file = fi, .tag = ti}); + return 0; +} + +int addFileTags(database *db, char *file, int ntags, ...){ + uint32_t lf; + file = normalizeStrLimit(file, &lf, MAXPATH-1); + uint64_t hf = crc64(0, file, lf); + uint64_t fi = htableSearch(db->hfiles, hf); + + if(fi == -1){ + ltableAdd(db->lfiles, file); + htableAdd(db->hfiles, hf); + fi = db->hfiles->size-1; + } + + va_list tags; + va_start(tags, ntags); + for(uint64_t i = 0; i < ntags; ++i){ + char *tag = va_arg(tags, char*); + uint32_t lt; + tag = normalizeStrLimit(tag, <, MAXPATH-1); + uint64_t ht = crc64(0, tag, lt); + uint64_t ti = htableSearch(db->htags, ht); + + if(ti == -1){ + ltableAdd(db->ltags, tag); + htableAdd(db->htags, ht); + ti = db->htags->size-1; + } + + addRelation(db, (relation){.file = fi, .tag = ti}); + } + va_end(tags); + + return 0; +} + +// Should return a list with the indexes of the files that have this tag +int searchTag(database *db, char *tag, uint64_t *rl){ + uint32_t l; + tag = normalizeStrLimit(tag, &l, MAXPATH-1); + uint64_t h = crc64(0, tag, l); + uint64_t ti = htableSearch(db->htags, h); + // TODO: error checking + + uint64_t c = 0; + for(uint64_t i = 0; i < db->map->size; ++i){ + if(db->map->table[i].tag == ti){ + ++c; + } + } + uint64_t *r = malloc(c*sizeof(uint64_t)); + c = 0; + for(uint64_t i = 0; i < db->map->size; ++i){ + if(db->map->table[i].tag == ti){ + r[c++] = db->map->table[i].file; + } + } + rl = r; + return 0; +} + +void printDatabase(database *db){ + for(uint64_t i = 0; i < db->map->size; ++i){ + printf("%s -> %s\n", db->lfiles->table[db->map->table[i].file], db->ltags->table[db->map->table[i].tag]); + } + printf("\n"); +} + +void debugDatabase(database *db){ + printf("\n"); + printf("Name: %s\n", db->name); + printf("\t-lfiles: %d\n", db->lfiles->size); + for(uint64_t i = 0; i < db->lfiles->size; ++i){ + printf("\t\t+%s\n", db->lfiles->table[i]); + } + printf("\t-ltags: %d\n", db->ltags->size); + for(uint64_t i = 0; i < db->ltags->size; ++i){ + printf("\t\t+%s\n", db->ltags->table[i]); + } + printf("\t-hfiles: %d\n", db->hfiles->size); + for(uint64_t i = 0; i < db->hfiles->size; ++i){ + printf("\t\t+%" PRIu64 "\n", db->hfiles->table[i]); + } + printf("\t-htags: %d\n", db->htags->size); + for(uint64_t i = 0; i < db->htags->size; ++i){ + printf("\t\t+%" PRIu64 "\n", db->htags->table[i]); + } + printf("\t-map: %d\n", db->map->size); + for(uint64_t i = 0; i < db->map->size; ++i){ + printf("\t\t+%" PRIu64 ":%" PRIu64 "\n", db->map->table[i].file, db->map->table[i].tag); + } + printf("\n"); +} @@ -1,31 +1,57 @@ #include "db.h" +#include <time.h> +void nothing(FILE *fp){ + uint64_t a = 0xDEADBEEF; + fwrite(&a, sizeof(uint64_t), 1, fp); +} + + +void print(FILE *fp){ + while(!feof(fp)){ + uint8_t a; + fread(&a, sizeof(uint8_t), 1, fp); + printf("%x ", a); + } +} + int main(){ - inputBuffer *in = newInputBuffer(); + + - row *r = newRow("~/test/img.png"); + printf("%016llu\n", (uint64_t) crc64(0, (unsigned char*)"cacadevaca", 10)); + + inputBuffer *in = newInputBuffer(); + char *str = "grandmother;football;capital;concerned;entire;realize;garden;refused;proud;tune;rhyme;other;writer;command;fresh;fence;rapidly;active;cover;repeat;determine;yard;cannot;animal;pure;rich;mirror;frozen;vast;coach;brass;activity;bottom;airplane;local;tone;attack;though;between;value;collect;mission;tower;brought;original;history;reason;minute;would;hung;strange;children;offer;blue;wrapped;magnet;color;cage;easily;percent;lower;verb;hundred;larger;away;was;certain;western;yes;lack;wish;same;spend;arrive;fog;heard;bill;effort;steam;wolf;indicate;suppose;because;life;down;seat;age;earn;under;cell;floating;although;spent;folks;swing;hello;cent;swung;pen;happened;slip;pupil;smell;fix;piano;closer;idea;trunk;model;school;particularly;he;coast;describe;such;join;been;hard;three;around;tube;soldier;baby;mouse;note;sort;house;gasoline;organized;eat;sat;crowd;alive;spoken;wide;square;luck;tales;angry;having;wear;frog;outer;nice;regular;year;clothing;check;throughout;farmer;dug;dark;exercise;table;your;form;should;personal;use;road;bright;walk;fairly;affect;but;night;close;job;front;fight;beside;ocean;herd;pass;hardly;widely;prepare;nails;paid;lucky;design;grandfather;aid;heavy;truck;sleep;difficult;log;keep;government;headed;mother;sad;bread;voyage;when;happy;making;whistle;plural;guard;therefore;continent;roof;money;pan;unusual;region;special;generally;plate;visit;look;lost;sick;wonderful;farther;put;characteristic;gravity;trap;system;twice;taste;knew;mad;smallest;automobile;return;huge;underline;danger;news;electric;information;breeze;thread;equally;five;new;average;former;wild;spend;cabin;recognize;nearest;circle;such;found;pass;whistle;slave;event;knowledge;fear;friend;am;browserling;cry;length;thy;create;busy;office;earth;blind;smallest;birthday;putting;classroom;pen;southern;summer;put;open;solution;spread;equator;else;kitchen;determine;strong;change;world;pocket;claws;earn;excellent;drove;donkey;rush;band;energy;fighting;hurt;ordinary;native;visitor;give;storm;pressure;imagine;street;engine;worth;hospital;attached;subject;perhaps;hospital;living;waste;dark;natural;change;enter;girl;motor;element;experiment;physical;value;excited;fort;layers;buy;minerals;satisfied;next;spirit;unhappy;storm;angry;science;desk;develop;behind;afraid;act;else;prepare;given;raw;affect;husband;ring;older;brought;book;cow;lake;sides;ago;fill;successful;real;aside;taught;mind;straight;date;very;chart;slabs;thin;saddle;full;sort;heard;surprise;fox;cool;dish;alphabet;early;spring;nest;sometime;date;light;break;lion;difference;rhyme;might;step;teach;potatoes;young;nine;liquid;how;lunch;heavy;mass;being;save;cutting;negative;swimming;cutting;journey;army;none;worry;leave;explore;baseball;fight;road;exact;hay;voyage;sheet;test;right;examine;agree;heart;pig;cannot;tool;hill;changing;bee;find;together;lay;tie;lost;continued;then;came;rhyme;mirror;town;substance;both;up;quite;push;shake;solid;result;you;ought;chicken;waste;freedom;why;somehow;not;complete;sick;struggle;military;pure;top;south;step;education;could;between;familiar;recognize;rich;tool;material;were;chicken;stopped;stay;policeman;round;firm"; - int l = len(str); - printf("len: %d\n", l); - //measure these two - int a = strInTags(str, l, "percent", 7, ';'); - //int a = BM("percent", 7, str, l); + /*database *db = newDatabase("miDB"); + + addFileTag(db, "vaca.png", "naturaleza"); + addFileTags(db, "donald-tromp.jpg", 3, "based", "hitler", "very cool"); + + storeDatabase(db, "db.db"); + */ + database *db = loadDatabase("db.db"); + printDatabase(db); + debugDatabase(db); while(0){ prompt(); getInput(in); + /* insertTag(r, in->buffer); printf("Tags of row '%s': %s\n", r->path, r->tags); printf("Number of tags: %u. Length of tags: %u\n", r->numTags, r->lenTags); + */ /*switch(handleInput(in)){ case META_COMMAND_SUCCESS: diff --git a/src/storage.c b/src/storage.c index 868e0b2..930f636 100644 --- a/src/storage.c +++ b/src/storage.c @@ -1,18 +1,234 @@ #include "db.h" -row *newRow(const char path[MAXPATH]){ - row *nr = malloc(sizeof(row)); - memcpy(nr->path, path, len(path)); - nr->numTags = 0; - nr->lenTags = 0; +ltable *newLtable(uint64_t size){ + ltable *lt = malloc(sizeof(ltable)); + size = (((uint64_t)size) < 0) ? 0 : size; + lt->size = size; + lt->table = malloc(size*sizeof(char*)); + return lt; +} + +ltable *loadLtable(FILE *fp){ + char header; + fread(&header, sizeof(char), 1, fp); + if(header != 'L'){ + printf("header is %c not L\n", header); + } + uint64_t size; + fread(&size, sizeof(uint64_t), 1, fp); + ltable *lt = newLtable(size); + for(uint64_t i = 0; i < lt->size; ++i){ + uint32_t sl; + fread(&sl, sizeof(uint32_t), 1, fp); + lt->table[i] = malloc(sl*sizeof(char)); + fread(lt->table[i], sizeof(char), sl, fp); + } + char end; + fread(&end, sizeof(char), 1, fp); + if(end != 'E'){ + printf("end is %c not E\n", end); + } + return lt; +} + +int storeLtable(const ltable *lt, FILE *fp){ + char header = 'L'; + fwrite(&header, sizeof(char), 1, fp); + fwrite(<->size, sizeof(uint64_t), 1, fp); + for(uint64_t i = 0; i < lt->size; ++i){ + uint32_t l = len(lt->table[i]) + 1; + fwrite(&l, sizeof(uint32_t), 1, fp); + fwrite(lt->table[i], sizeof(char), l, fp); + } + char end = 'E'; + fwrite(&end, sizeof(char), 1, fp); + return 0; +} + +int ltableAdd(ltable *lt, char *str){ + uint32_t ls; + str = normalizeStrLimit(str, &ls, MAXPATH-1); + + char **nlt = malloc((lt->size+1)*sizeof(char*)); + for(uint64_t i = 0; i < lt->size; ++i){ + if(sameStr(str, lt->table[i])){ + return -1; + } + uint32_t l = len(lt->table[i]); + nlt[i] = malloc((l+1)*sizeof(char)); + memcpy(nlt[i], lt->table[i], l+1); + } + nlt[lt->size] = malloc((ls+1)*sizeof(char)); + memcpy(nlt[lt->size], str, ls+1); + + lt->size++; + lt->table = malloc(lt->size*sizeof(char*)); + for(uint64_t i = 0; i < lt->size; ++i){ + uint32_t l = len(nlt[i]); + lt->table[i] = malloc((l+1)*sizeof(char)); + memcpy(lt->table[i], nlt[i], l+1); + } + return 0; +} + +uint64_t ltableSearch(ltable *lt, char *str){ + uint32_t l; + str = normalizeStrLimit(str, &l, MAXPATH-1); + + for(uint64_t i = 0; i < lt->size; ++i){ + if(sameStr(str, lt->table[i])){ + return i; + } + } + return -1; +} + +htable *newHtable(uint64_t size){ + htable *ht = malloc(sizeof(htable)); + size = (((uint64_t)size) < 0) ? 0 : size; + ht->size = size; + ht->table = malloc(size*sizeof(uint64_t)); + return ht; +} + +htable *loadHtable(FILE *fp){ + char header; + fread(&header, sizeof(char), 1, fp); + if(header != 'H'){ + printf("header is %c not H\n", header); + } + uint64_t size; + fread(&size, sizeof(uint64_t), 1, fp); + htable *ht = newHtable(size); + for(uint64_t i = 0; i < ht->size; ++i){ + fread(&ht->table[i], sizeof(uint64_t), 1, fp); + } + char end; + fread(&end, sizeof(char), 1, fp); + if(end != 'E'){ + printf("end is %c not E\n", end); + } + return ht; +} + +int storeHtable(const htable *ht, FILE *fp){ + char header = 'H'; + fwrite(&header, sizeof(char), 1, fp); + fwrite(&ht->size, sizeof(uint64_t), 1, fp); + for(uint64_t i = 0; i < ht->size; ++i){ + fwrite(&ht->table[i], sizeof(uint64_t), 1, fp); + } + char end = 'E'; + fwrite(&end, sizeof(char), 1, fp); + return 0; +} + +int htableAdd(htable *ht, uint64_t h){ + uint64_t *nht = malloc((ht->size+1)*sizeof(uint64_t)); + for(uint64_t i = 0; i < ht->size; ++i){ + if(h == ht->table[i]){ + return -1; + } + nht[i] = ht->table[i]; + } + nht[ht->size] = h; + + ht->size++; + ht->table = malloc(ht->size*sizeof(uint64_t)); + for(uint64_t i = 0; i < ht->size; ++i){ + ht->table[i] = nht[i]; + } + return 0; +} + +uint64_t htableSearch(htable *ht, uint64_t h){ + for(uint64_t i = 0; i < ht->size; ++i){ + if(h == ht->table[i]){ + return i; + } + } + return -1; +} + +mtable *newMtable(uint64_t size){ + mtable *mt = malloc(sizeof(mtable)); + size = (((uint64_t)size) < 0) ? 0 : size; + mt->size = size; + mt->table = malloc(size*sizeof(relation)); + return mt; +} + +mtable *loadMtable(FILE *fp){ + char header; + fread(&header, sizeof(char), 1, fp); + if(header != 'M'){ + printf("header is %c not M\n", header); + } + uint64_t size; + fread(&size, sizeof(uint64_t), 1, fp); + mtable *mt = newMtable(size); + for(uint64_t i = 0; i < mt->size; ++i){ + fread(&mt->table[i].file, sizeof(uint64_t), 1, fp); + fread(&mt->table[i].tag, sizeof(uint64_t), 1, fp); + } + char end; + fread(&end, sizeof(char), 1, fp); + if(end != 'E'){ + printf("end is %c not E\n", end); + } + return mt; +} - return nr; +int storeMtable(const mtable *mt, FILE *fp){ + char header = 'M'; + fwrite(&header, sizeof(char), 1, fp); + fwrite(&mt->size, sizeof(uint64_t), 1, fp); + for(uint64_t i = 0; i < mt->size; ++i){ + fwrite(&mt->table[i].file, sizeof(uint64_t), 1, fp); + fwrite(&mt->table[i].tag, sizeof(uint64_t), 1, fp); + } + char end = 'E'; + fwrite(&end, sizeof(char), 1, fp); + return 0; } +int mtableAdd(mtable *mt, relation r){ + relation *nmt = malloc((mt->size+1)*sizeof(relation)); + for(uint64_t i = 0; i < mt->size; ++i){ + if(r.file == mt->table[i].file && r.tag == mt->table[i].tag){ + return -1; + } + nmt[i] = mt->table[i]; + } + nmt[mt->size] = r; + + mt->size++; + mt->table = malloc(mt->size*sizeof(relation)); + for(uint64_t i = 0; i < mt->size; ++i){ + mt->table[i] = nmt[i]; + } + return 0; +} + +uint64_t mtableSearch(mtable *mt, relation r){ + for(uint64_t i = 0; i < mt->size; ++i){ + if(r.file == mt->table[i].file && r.tag == mt->table[i].tag){ + return i; + } + } + return -1; +} + + + + +/* +// TODO: remove old impl + // Splits src into words based on a separator character (sep) and stores them in arr, // and the length in len. Inspired by https://github.com/joshdk/tag/blob/master/src/dsv.c's split -static void split(const char *src, char sep, char ***arr, uint16_t *len){ +static void split(const char *src, char sep, char ***arr, uint32_t *len){ int slen = 0, ai = 0, wnum = 0, wlen = 0; while(src[slen] != '\0'){ @@ -48,26 +264,12 @@ static void swapWords(char ***arr, int a, int b){ (*arr)[b] = tmp; } -static char *normalizeTag(char *tag, uint16_t *ln){ - uint16_t l = len(tag); - char *ntag = calloc(l+1, sizeof(char)); - for(int i = 0; i < l; ++i){ - ntag[i] = tolower(tag[i]); - if(i == l-1 && tag[i] == ' '){ - ntag[i] = '\0'; - --l; - } - } - *ln = l; - return ntag; -} - // Adds a tag in the tags array in the row r, sorted by natural string // comparison with strnatcmp. We assume that when adding a tag all other // tags are already sorted. Nothing is done if the tag is already in the tags void insertTag(row *r, char *tag){ - uint16_t l, ltag; - tag = normalizeTag(tag, <ag); + uint32_t l, ltag; + tag = normalizeStr(tag, <ag); if(ltag == 0){ return; @@ -130,8 +332,8 @@ void insertTag(row *r, char *tag){ // Remove a tag from the tags array in the row r // Nothing is done if the tag isnt in the tags void removeTag(row *r, char *tag){ - uint16_t l, ltag; - tag = normalizeTag(tag, <ag); + uint32_t l, ltag; + tag = normalizeStr(tag, <ag); if(ltag == 0){ return; @@ -166,4 +368,5 @@ void removeTag(row *r, char *tag){ r->tags[tagnum] = '\0'; r->numTags = l; r->lenTags = tagnum; -}
\ No newline at end of file +} +*/ @@ -1,24 +1,54 @@ #include "db.h" -uint16_t len(const char *s){ - uint16_t l = -1; +uint32_t len(const char *s){ + uint32_t l = -1; while(s[++l]); return l; } bool sameStr(const char *s1, const char *s2){ - uint16_t i1 = 0, i2 = 0; + uint32_t i1 = 0, i2 = 0; while(s1[i1] && s1[i1] == s2[i2]) ++i1, ++i2; return !s1[i1] && !s2[i2]; } +// Lowercases the whole string and removes trailing spaces +char *normalizeStr(const char *str, uint32_t *l){ + *l = len(str); + uint32_t trw = 0; + while(isspace(str[--(*l)])) + ++trw; + char *nstr = calloc(++(*l)+1, sizeof(char)); + for(int i = 0; i < *l; ++i) + nstr[i] = tolower(str[i]); + return nstr; +} + +// Same as normalizeStr but with a limit (str[limit] will be equal to '\0') +// If limit is 0, it will return NULL +// WARNING: It allocates limit+1 characters +char *normalizeStrLimit(const char *str, uint32_t *l, uint32_t limit){ + if(limit == 0){ + return NULL; + } + *l = len(str); + *l = (*l > limit) ? limit : *l; + uint32_t trw = 0; + while(isspace(str[--(*l)])) + ++trw; + char *nstr = calloc(++(*l)+1, sizeof(char)); + for(int i = 0; i < *l; ++i) + nstr[i] = tolower(str[i]); + return nstr; +} + // Auxiliary function for creating a lookup table of the haystack // table[i] will be the number of shifts right until the next // separator when checking position i // Only really useful for this implementation of tags -static int *table(const char *y, int n, char sep){ +static int *toTable(const char *y, int n, char sep){ int *tb = calloc(n, sizeof(int)); if(tb == NULL){ fprintf(stderr, "Error callocating array (table)"); @@ -41,7 +71,7 @@ static int *table(const char *y, int n, char sep){ // A return of 0 means ndl occurs in tags starting in position 0 // Use 'if(strInTags(...) != -1)' when using this function ssize_t strInTags(const char *tags, int n, const char *ndl, int m, char sep){ - int *tb = table(tags, n, sep); + int *tb = toTable(tags, n, sep); for(int i = 0; i < n; ){ int j = 0; |
