diff options
| author | Soikk | 2022-07-23 01:46:24 +0200 |
|---|---|---|
| committer | Soikk | 2022-07-23 01:46:24 +0200 |
| commit | 28578b192d0828a9820983b5624b9bcc3577cd18 (patch) | |
| tree | 2f30b1730f30a7eeee80995ee3984c10f5bdc2ff /hash-testing/main.c | |
| parent | 377dc104be127291ede5b32640c23eea0ba6791a (diff) | |
| download | soikk-DB-28578b192d0828a9820983b5624b9bcc3577cd18.tar.xz soikk-DB-28578b192d0828a9820983b5624b9bcc3577cd18.tar.zst | |
Improved the database storage system. Added persistency.
Diffstat (limited to 'hash-testing/main.c')
| -rw-r--r-- | hash-testing/main.c | 362 |
1 files changed, 362 insertions, 0 deletions
diff --git a/hash-testing/main.c b/hash-testing/main.c new file mode 100644 index 0000000..f95b391 --- /dev/null +++ b/hash-testing/main.c @@ -0,0 +1,362 @@ +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> +#include <string.h> +#include <ctype.h> +#include <time.h> + +uint32_t +murmurhash (const char *key, uint32_t len, uint32_t seed) { + uint32_t c1 = 0xcc9e2d51; + uint32_t c2 = 0x1b873593; + uint32_t r1 = 15; + uint32_t r2 = 13; + uint32_t m = 5; + uint32_t n = 0xe6546b64; + uint32_t h = 0; + uint32_t k = 0; + uint8_t *d = (uint8_t *) key; // 32 bit extract from `key' + const uint32_t *chunks = NULL; + const uint8_t *tail = NULL; // tail - last 8 bytes + int i = 0; + int l = len / 4; // chunk length + + h = seed; + + chunks = (const uint32_t *) (d + l * 4); // body + tail = (const uint8_t *) (d + l * 4); // last 8 byte chunk of `key' + + // for each 4 byte chunk of `key' + for (i = -l; i != 0; ++i) { + // next 4 byte chunk of `key' + k = chunks[i]; + + // encode next 4 byte chunk of `key' + k *= c1; + k = (k << r1) | (k >> (32 - r1)); + k *= c2; + + // append to hash + h ^= k; + h = (h << r2) | (h >> (32 - r2)); + h = h * m + n; + } + + k = 0; + + // remainder + switch (len & 3) { // `len % 4' + case 3: k ^= (tail[2] << 16); + case 2: k ^= (tail[1] << 8); + + case 1: + k ^= tail[0]; + k *= c1; + k = (k << r1) | (k >> (32 - r1)); + k *= c2; + h ^= k; + } + + h ^= len; + + h ^= (h >> 16); + h *= 0x85ebca6b; + h ^= (h >> 13); + h *= 0xc2b2ae35; + h ^= (h >> 16); + + return h; +} +static const uint32_t crc32_table[] = +{ + 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, + 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, + 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, + 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, + 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, + 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, + 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, + 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, + 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, + 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, + 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, + 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, + 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, + 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, + 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, + 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, + 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, + 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, + 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, + 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, + 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, + 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, + 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, + 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, + 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, + 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, + 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, + 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, + 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, + 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, + 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, + 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, + 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, + 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, + 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, + 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, + 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, + 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, + 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, + 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, + 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, + 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, + 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, + 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, + 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, + 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, + 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, + 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, + 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, + 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, + 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, + 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, + 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, + 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, + 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, + 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, + 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, + 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, + 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, + 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, + 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, + 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, + 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, + 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 +}; + + +uint32_t +xcrc32 (const unsigned char *buf, int len, uint32_t init) +{ + uint32_t crc = init; + while (len--) + { + crc = (crc << 8) ^ crc32_table[((crc >> 24) ^ *buf) & 255]; + buf++; + } + return crc; +} + +static const uint64_t crc64_tab[256] = { + UINT64_C(0x0000000000000000), UINT64_C(0x7ad870c830358979), + UINT64_C(0xf5b0e190606b12f2), UINT64_C(0x8f689158505e9b8b), + UINT64_C(0xc038e5739841b68f), UINT64_C(0xbae095bba8743ff6), + UINT64_C(0x358804e3f82aa47d), UINT64_C(0x4f50742bc81f2d04), + UINT64_C(0xab28ecb46814fe75), UINT64_C(0xd1f09c7c5821770c), + UINT64_C(0x5e980d24087fec87), UINT64_C(0x24407dec384a65fe), + UINT64_C(0x6b1009c7f05548fa), UINT64_C(0x11c8790fc060c183), + UINT64_C(0x9ea0e857903e5a08), UINT64_C(0xe478989fa00bd371), + UINT64_C(0x7d08ff3b88be6f81), UINT64_C(0x07d08ff3b88be6f8), + UINT64_C(0x88b81eabe8d57d73), UINT64_C(0xf2606e63d8e0f40a), + UINT64_C(0xbd301a4810ffd90e), UINT64_C(0xc7e86a8020ca5077), + UINT64_C(0x4880fbd87094cbfc), UINT64_C(0x32588b1040a14285), + UINT64_C(0xd620138fe0aa91f4), UINT64_C(0xacf86347d09f188d), + UINT64_C(0x2390f21f80c18306), UINT64_C(0x594882d7b0f40a7f), + UINT64_C(0x1618f6fc78eb277b), UINT64_C(0x6cc0863448deae02), + UINT64_C(0xe3a8176c18803589), UINT64_C(0x997067a428b5bcf0), + UINT64_C(0xfa11fe77117cdf02), UINT64_C(0x80c98ebf2149567b), + UINT64_C(0x0fa11fe77117cdf0), UINT64_C(0x75796f2f41224489), + UINT64_C(0x3a291b04893d698d), UINT64_C(0x40f16bccb908e0f4), + UINT64_C(0xcf99fa94e9567b7f), UINT64_C(0xb5418a5cd963f206), + UINT64_C(0x513912c379682177), UINT64_C(0x2be1620b495da80e), + UINT64_C(0xa489f35319033385), UINT64_C(0xde51839b2936bafc), + UINT64_C(0x9101f7b0e12997f8), UINT64_C(0xebd98778d11c1e81), + UINT64_C(0x64b116208142850a), UINT64_C(0x1e6966e8b1770c73), + UINT64_C(0x8719014c99c2b083), UINT64_C(0xfdc17184a9f739fa), + UINT64_C(0x72a9e0dcf9a9a271), UINT64_C(0x08719014c99c2b08), + UINT64_C(0x4721e43f0183060c), UINT64_C(0x3df994f731b68f75), + UINT64_C(0xb29105af61e814fe), UINT64_C(0xc849756751dd9d87), + UINT64_C(0x2c31edf8f1d64ef6), UINT64_C(0x56e99d30c1e3c78f), + UINT64_C(0xd9810c6891bd5c04), UINT64_C(0xa3597ca0a188d57d), + UINT64_C(0xec09088b6997f879), UINT64_C(0x96d1784359a27100), + UINT64_C(0x19b9e91b09fcea8b), UINT64_C(0x636199d339c963f2), + UINT64_C(0xdf7adabd7a6e2d6f), UINT64_C(0xa5a2aa754a5ba416), + UINT64_C(0x2aca3b2d1a053f9d), UINT64_C(0x50124be52a30b6e4), + UINT64_C(0x1f423fcee22f9be0), UINT64_C(0x659a4f06d21a1299), + UINT64_C(0xeaf2de5e82448912), UINT64_C(0x902aae96b271006b), + UINT64_C(0x74523609127ad31a), UINT64_C(0x0e8a46c1224f5a63), + UINT64_C(0x81e2d7997211c1e8), UINT64_C(0xfb3aa75142244891), + UINT64_C(0xb46ad37a8a3b6595), UINT64_C(0xceb2a3b2ba0eecec), + UINT64_C(0x41da32eaea507767), UINT64_C(0x3b024222da65fe1e), + UINT64_C(0xa2722586f2d042ee), UINT64_C(0xd8aa554ec2e5cb97), + UINT64_C(0x57c2c41692bb501c), UINT64_C(0x2d1ab4dea28ed965), + UINT64_C(0x624ac0f56a91f461), UINT64_C(0x1892b03d5aa47d18), + UINT64_C(0x97fa21650afae693), UINT64_C(0xed2251ad3acf6fea), + UINT64_C(0x095ac9329ac4bc9b), UINT64_C(0x7382b9faaaf135e2), + UINT64_C(0xfcea28a2faafae69), UINT64_C(0x8632586aca9a2710), + UINT64_C(0xc9622c4102850a14), UINT64_C(0xb3ba5c8932b0836d), + UINT64_C(0x3cd2cdd162ee18e6), UINT64_C(0x460abd1952db919f), + UINT64_C(0x256b24ca6b12f26d), UINT64_C(0x5fb354025b277b14), + UINT64_C(0xd0dbc55a0b79e09f), UINT64_C(0xaa03b5923b4c69e6), + UINT64_C(0xe553c1b9f35344e2), UINT64_C(0x9f8bb171c366cd9b), + UINT64_C(0x10e3202993385610), UINT64_C(0x6a3b50e1a30ddf69), + UINT64_C(0x8e43c87e03060c18), UINT64_C(0xf49bb8b633338561), + UINT64_C(0x7bf329ee636d1eea), UINT64_C(0x012b592653589793), + UINT64_C(0x4e7b2d0d9b47ba97), UINT64_C(0x34a35dc5ab7233ee), + UINT64_C(0xbbcbcc9dfb2ca865), UINT64_C(0xc113bc55cb19211c), + UINT64_C(0x5863dbf1e3ac9dec), UINT64_C(0x22bbab39d3991495), + UINT64_C(0xadd33a6183c78f1e), UINT64_C(0xd70b4aa9b3f20667), + UINT64_C(0x985b3e827bed2b63), UINT64_C(0xe2834e4a4bd8a21a), + UINT64_C(0x6debdf121b863991), UINT64_C(0x1733afda2bb3b0e8), + UINT64_C(0xf34b37458bb86399), UINT64_C(0x8993478dbb8deae0), + UINT64_C(0x06fbd6d5ebd3716b), UINT64_C(0x7c23a61ddbe6f812), + UINT64_C(0x3373d23613f9d516), UINT64_C(0x49aba2fe23cc5c6f), + UINT64_C(0xc6c333a67392c7e4), UINT64_C(0xbc1b436e43a74e9d), + UINT64_C(0x95ac9329ac4bc9b5), UINT64_C(0xef74e3e19c7e40cc), + UINT64_C(0x601c72b9cc20db47), UINT64_C(0x1ac40271fc15523e), + UINT64_C(0x5594765a340a7f3a), UINT64_C(0x2f4c0692043ff643), + UINT64_C(0xa02497ca54616dc8), UINT64_C(0xdafce7026454e4b1), + UINT64_C(0x3e847f9dc45f37c0), UINT64_C(0x445c0f55f46abeb9), + UINT64_C(0xcb349e0da4342532), UINT64_C(0xb1eceec59401ac4b), + UINT64_C(0xfebc9aee5c1e814f), UINT64_C(0x8464ea266c2b0836), + UINT64_C(0x0b0c7b7e3c7593bd), UINT64_C(0x71d40bb60c401ac4), + UINT64_C(0xe8a46c1224f5a634), UINT64_C(0x927c1cda14c02f4d), + UINT64_C(0x1d148d82449eb4c6), UINT64_C(0x67ccfd4a74ab3dbf), + UINT64_C(0x289c8961bcb410bb), UINT64_C(0x5244f9a98c8199c2), + UINT64_C(0xdd2c68f1dcdf0249), UINT64_C(0xa7f41839ecea8b30), + UINT64_C(0x438c80a64ce15841), UINT64_C(0x3954f06e7cd4d138), + UINT64_C(0xb63c61362c8a4ab3), UINT64_C(0xcce411fe1cbfc3ca), + UINT64_C(0x83b465d5d4a0eece), UINT64_C(0xf96c151de49567b7), + UINT64_C(0x76048445b4cbfc3c), UINT64_C(0x0cdcf48d84fe7545), + UINT64_C(0x6fbd6d5ebd3716b7), UINT64_C(0x15651d968d029fce), + UINT64_C(0x9a0d8ccedd5c0445), UINT64_C(0xe0d5fc06ed698d3c), + UINT64_C(0xaf85882d2576a038), UINT64_C(0xd55df8e515432941), + UINT64_C(0x5a3569bd451db2ca), UINT64_C(0x20ed197575283bb3), + UINT64_C(0xc49581ead523e8c2), UINT64_C(0xbe4df122e51661bb), + UINT64_C(0x3125607ab548fa30), UINT64_C(0x4bfd10b2857d7349), + UINT64_C(0x04ad64994d625e4d), UINT64_C(0x7e7514517d57d734), + UINT64_C(0xf11d85092d094cbf), UINT64_C(0x8bc5f5c11d3cc5c6), + UINT64_C(0x12b5926535897936), UINT64_C(0x686de2ad05bcf04f), + UINT64_C(0xe70573f555e26bc4), UINT64_C(0x9ddd033d65d7e2bd), + UINT64_C(0xd28d7716adc8cfb9), UINT64_C(0xa85507de9dfd46c0), + UINT64_C(0x273d9686cda3dd4b), UINT64_C(0x5de5e64efd965432), + UINT64_C(0xb99d7ed15d9d8743), UINT64_C(0xc3450e196da80e3a), + UINT64_C(0x4c2d9f413df695b1), UINT64_C(0x36f5ef890dc31cc8), + UINT64_C(0x79a59ba2c5dc31cc), UINT64_C(0x037deb6af5e9b8b5), + UINT64_C(0x8c157a32a5b7233e), UINT64_C(0xf6cd0afa9582aa47), + UINT64_C(0x4ad64994d625e4da), UINT64_C(0x300e395ce6106da3), + UINT64_C(0xbf66a804b64ef628), UINT64_C(0xc5bed8cc867b7f51), + UINT64_C(0x8aeeace74e645255), UINT64_C(0xf036dc2f7e51db2c), + UINT64_C(0x7f5e4d772e0f40a7), UINT64_C(0x05863dbf1e3ac9de), + UINT64_C(0xe1fea520be311aaf), UINT64_C(0x9b26d5e88e0493d6), + UINT64_C(0x144e44b0de5a085d), UINT64_C(0x6e963478ee6f8124), + UINT64_C(0x21c640532670ac20), UINT64_C(0x5b1e309b16452559), + UINT64_C(0xd476a1c3461bbed2), UINT64_C(0xaeaed10b762e37ab), + UINT64_C(0x37deb6af5e9b8b5b), UINT64_C(0x4d06c6676eae0222), + UINT64_C(0xc26e573f3ef099a9), UINT64_C(0xb8b627f70ec510d0), + UINT64_C(0xf7e653dcc6da3dd4), UINT64_C(0x8d3e2314f6efb4ad), + UINT64_C(0x0256b24ca6b12f26), UINT64_C(0x788ec2849684a65f), + UINT64_C(0x9cf65a1b368f752e), UINT64_C(0xe62e2ad306bafc57), + UINT64_C(0x6946bb8b56e467dc), UINT64_C(0x139ecb4366d1eea5), + UINT64_C(0x5ccebf68aecec3a1), UINT64_C(0x2616cfa09efb4ad8), + UINT64_C(0xa97e5ef8cea5d153), UINT64_C(0xd3a62e30fe90582a), + UINT64_C(0xb0c7b7e3c7593bd8), UINT64_C(0xca1fc72bf76cb2a1), + UINT64_C(0x45775673a732292a), UINT64_C(0x3faf26bb9707a053), + UINT64_C(0x70ff52905f188d57), UINT64_C(0x0a2722586f2d042e), + UINT64_C(0x854fb3003f739fa5), UINT64_C(0xff97c3c80f4616dc), + UINT64_C(0x1bef5b57af4dc5ad), UINT64_C(0x61372b9f9f784cd4), + UINT64_C(0xee5fbac7cf26d75f), UINT64_C(0x9487ca0fff135e26), + UINT64_C(0xdbd7be24370c7322), UINT64_C(0xa10fceec0739fa5b), + UINT64_C(0x2e675fb4576761d0), UINT64_C(0x54bf2f7c6752e8a9), + UINT64_C(0xcdcf48d84fe75459), UINT64_C(0xb71738107fd2dd20), + UINT64_C(0x387fa9482f8c46ab), UINT64_C(0x42a7d9801fb9cfd2), + UINT64_C(0x0df7adabd7a6e2d6), UINT64_C(0x772fdd63e7936baf), + UINT64_C(0xf8474c3bb7cdf024), UINT64_C(0x829f3cf387f8795d), + UINT64_C(0x66e7a46c27f3aa2c), UINT64_C(0x1c3fd4a417c62355), + UINT64_C(0x935745fc4798b8de), UINT64_C(0xe98f353477ad31a7), + UINT64_C(0xa6df411fbfb21ca3), UINT64_C(0xdc0731d78f8795da), + UINT64_C(0x536fa08fdfd90e51), UINT64_C(0x29b7d047efec8728), +}; + +uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) { + uint64_t j; + + for (j = 0; j < l; j++) { + uint8_t byte = s[j]; + crc = crc64_tab[(uint8_t)crc ^ byte] ^ (crc >> 8); + } + return crc; +} + +uint16_t len(const char *s){ + uint16_t l = -1; + while(s[++l] != '\0'); + return l; +} + +char *normalizeStr(char *str, uint16_t *ln){ + uint16_t l = len(str); + char *nstr = malloc((l+1)*sizeof(char)); + for(int i = 0; i < l; ++i){ + if(isalpha(str[i])) + nstr[i] = tolower(str[i]); + else + nstr[i] = str[i]; + if(i == l-1 && isspace(str[i])){ + nstr[i] = '\0'; + --l; + } + } + *ln = l; + return nstr; +} + +int isInArr(uint64_t *arr, uint64_t hash){ + uint64_t i = 0; + while(arr[i] != 0){ + if(arr[i] == hash) + return 1; + ++i; + } + return 0; +} + + +#define MAXLINE 2048 +#define SEED 0x12345678 + +int main(){ + + // Read file + FILE* f = fopen("words.txt", "r"); + + uint64_t i = 0, collisions = 0; + uint64_t *arr = calloc(466551, sizeof(uint64_t)); + + + // Read file line by line, calculate hash + char line[MAXLINE]; + clock_t start, end; + double cpu_time_used; + start = clock(); + + while (fgets(line, sizeof(line), f)) { + uint16_t l; + char *nline = normalizeStr(line, &l); + uint64_t hash = crc64(0, nline, l); + if(isInArr(arr, hash)){ + ++collisions; + } + arr[i] = hash; + //printf("%s\t%08x\n", nline, hash); + ++i; + } + end = clock(); + cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; + printf("time: %f\n", cpu_time_used); + fclose(f); + printf("Collisions: %" PRIu64 "\n", collisions); + + return 0; +}
\ No newline at end of file |
