diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..cf208f4a228ca9062db1e1f1aa6dee227ed39e00 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "src/hashmap"] + path = src/hashmap + url = git@github.com:tidwall/hashmap.c.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 8634ffc63d5f3ae63fe83265acf800dd750021e9..73dbda5c19bcf5ca45f42976791e6c6688cc90a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,12 +4,15 @@ cmake_minimum_required(VERSION 3.20) # Project name project(protocol-parsers C) -# Set project directories +## Set project directories link_directories($ENV{LD_LIBRARY_PATH}) set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR}) set(INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include) +set(LIB_DIR ${PROJECT_SOURCE_DIR}/lib) set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin) set(EXECUTABLE_OUTPUT_PATH ${BIN_DIR}) +# Hashmap directory +set(HASHMAP_DIR ${PROJECT_SOURCE_DIR}/src/hashmap) # Set compiler flags #add_compile_options(-Wall -Werror -Wno-unused-variable -DDEBUG) # Debug diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f066ccc22fc86928610a2182545687091a52af7c..b097323301d1b1d1b0d1768b4efd22053695b804 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.20) # hashmap -add_library(hashmap STATIC ${INCLUDE_DIR}/hashmap.h hashmap.c) -target_include_directories(hashmap PRIVATE ${INCLUDE_DIR}) +add_library(hashmap STATIC ${HASHMAP_DIR}/hashmap.h ${HASHMAP_DIR}/hashmap.c) +target_include_directories(hashmap PRIVATE ${HASHMAP_DIR}) install(TARGETS hashmap DESTINATION ${LIB_DIR}) # SHA256 diff --git a/src/hashmap b/src/hashmap new file mode 160000 index 0000000000000000000000000000000000000000..1c139923fe08f36143ecc0ba37cd674684f87f9c --- /dev/null +++ b/src/hashmap @@ -0,0 +1 @@ +Subproject commit 1c139923fe08f36143ecc0ba37cd674684f87f9c diff --git a/src/hashmap.c b/src/hashmap.c deleted file mode 100644 index 6633e35317a920c8cb468192584704889ebc6990..0000000000000000000000000000000000000000 --- a/src/hashmap.c +++ /dev/null @@ -1,980 +0,0 @@ -// Copyright 2020 Joshua J Baker. All rights reserved. -// Use of this source code is governed by an MIT-style -// license that can be found in the LICENSE file. - -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <stdint.h> -#include <stddef.h> -#include "hashmap.h" - -static void *(*_malloc)(size_t) = NULL; -static void *(*_realloc)(void *, size_t) = NULL; -static void (*_free)(void *) = NULL; - -// hashmap_set_allocator allows for configuring a custom allocator for -// all hashmap library operations. This function, if needed, should be called -// only once at startup and a prior to calling hashmap_new(). -void hashmap_set_allocator(void *(*malloc)(size_t), void (*free)(void*)) -{ - _malloc = malloc; - _free = free; -} - -#define panic(_msg_) { \ - fprintf(stderr, "panic: %s (%s:%d)\n", (_msg_), __FILE__, __LINE__); \ - exit(1); \ -} - -struct bucket { - uint64_t hash:48; - uint64_t dib:16; -}; - -// hashmap is an open addressed hash map using robinhood hashing. -struct hashmap { - void *(*malloc)(size_t); - void *(*realloc)(void *, size_t); - void (*free)(void *); - bool oom; - size_t elsize; - size_t cap; - uint64_t seed0; - uint64_t seed1; - uint64_t (*hash)(const void *item, uint64_t seed0, uint64_t seed1); - int (*compare)(const void *a, const void *b, void *udata); - void (*elfree)(void *item); - void *udata; - size_t bucketsz; - size_t nbuckets; - size_t count; - size_t mask; - size_t growat; - size_t shrinkat; - void *buckets; - void *spare; - void *edata; -}; - -static struct bucket *bucket_at(struct hashmap *map, size_t index) { - return (struct bucket*)(((char*)map->buckets)+(map->bucketsz*index)); -} - -static void *bucket_item(struct bucket *entry) { - return ((char*)entry)+sizeof(struct bucket); -} - -static uint64_t get_hash(struct hashmap *map, const void *key) { - return map->hash(key, map->seed0, map->seed1) << 16 >> 16; -} - -// hashmap_new_with_allocator returns a new hash map using a custom allocator. -// See hashmap_new for more information information -struct hashmap *hashmap_new_with_allocator( - void *(*_malloc)(size_t), - void *(*_realloc)(void*, size_t), - void (*_free)(void*), - size_t elsize, size_t cap, - uint64_t seed0, uint64_t seed1, - uint64_t (*hash)(const void *item, - uint64_t seed0, uint64_t seed1), - int (*compare)(const void *a, const void *b, - void *udata), - void (*elfree)(void *item), - void *udata) -{ - _malloc = _malloc ? _malloc : malloc; - _realloc = _realloc ? _realloc : realloc; - _free = _free ? _free : free; - int ncap = 16; - if (cap < ncap) { - cap = ncap; - } else { - while (ncap < cap) { - ncap *= 2; - } - cap = ncap; - } - size_t bucketsz = sizeof(struct bucket) + elsize; - while (bucketsz & (sizeof(uintptr_t)-1)) { - bucketsz++; - } - // hashmap + spare + edata - size_t size = sizeof(struct hashmap)+bucketsz*2; - struct hashmap *map = _malloc(size); - if (!map) { - return NULL; - } - memset(map, 0, sizeof(struct hashmap)); - map->elsize = elsize; - map->bucketsz = bucketsz; - map->seed0 = seed0; - map->seed1 = seed1; - map->hash = hash; - map->compare = compare; - map->elfree = elfree; - map->udata = udata; - map->spare = ((char*)map)+sizeof(struct hashmap); - map->edata = (char*)map->spare+bucketsz; - map->cap = cap; - map->nbuckets = cap; - map->mask = map->nbuckets-1; - map->buckets = _malloc(map->bucketsz*map->nbuckets); - if (!map->buckets) { - _free(map); - return NULL; - } - memset(map->buckets, 0, map->bucketsz*map->nbuckets); - map->growat = map->nbuckets*0.75; - map->shrinkat = map->nbuckets*0.10; - map->malloc = _malloc; - map->realloc = _realloc; - map->free = _free; - return map; -} - - -// hashmap_new returns a new hash map. -// Param `elsize` is the size of each element in the tree. Every element that -// is inserted, deleted, or retrieved will be this size. -// Param `cap` is the default lower capacity of the hashmap. Setting this to -// zero will default to 16. -// Params `seed0` and `seed1` are optional seed values that are passed to the -// following `hash` function. These can be any value you wish but it's often -// best to use randomly generated values. -// Param `hash` is a function that generates a hash value for an item. It's -// important that you provide a good hash function, otherwise it will perform -// poorly or be vulnerable to Denial-of-service attacks. This implementation -// comes with two helper functions `hashmap_sip()` and `hashmap_murmur()`. -// Param `compare` is a function that compares items in the tree. See the -// qsort stdlib function for an example of how this function works. -// The hashmap must be freed with hashmap_free(). -// Param `elfree` is a function that frees a specific item. This should be NULL -// unless you're storing some kind of reference data in the hash. -struct hashmap *hashmap_new(size_t elsize, size_t cap, - uint64_t seed0, uint64_t seed1, - uint64_t (*hash)(const void *item, - uint64_t seed0, uint64_t seed1), - int (*compare)(const void *a, const void *b, - void *udata), - void (*elfree)(void *item), - void *udata) -{ - return hashmap_new_with_allocator( - (_malloc?_malloc:malloc), - (_realloc?_realloc:realloc), - (_free?_free:free), - elsize, cap, seed0, seed1, hash, compare, elfree, udata - ); -} - -static void free_elements(struct hashmap *map) { - if (map->elfree) { - for (size_t i = 0; i < map->nbuckets; i++) { - struct bucket *bucket = bucket_at(map, i); - if (bucket->dib) map->elfree(bucket_item(bucket)); - } - } -} - - -// hashmap_clear quickly clears the map. -// Every item is called with the element-freeing function given in hashmap_new, -// if present, to free any data referenced in the elements of the hashmap. -// When the update_cap is provided, the map's capacity will be updated to match -// the currently number of allocated buckets. This is an optimization to ensure -// that this operation does not perform any allocations. -void hashmap_clear(struct hashmap *map, bool update_cap) { - map->count = 0; - free_elements(map); - if (update_cap) { - map->cap = map->nbuckets; - } else if (map->nbuckets != map->cap) { - void *new_buckets = map->malloc(map->bucketsz*map->cap); - if (new_buckets) { - map->free(map->buckets); - map->buckets = new_buckets; - } - map->nbuckets = map->cap; - } - memset(map->buckets, 0, map->bucketsz*map->nbuckets); - map->mask = map->nbuckets-1; - map->growat = map->nbuckets*0.75; - map->shrinkat = map->nbuckets*0.10; -} - - -static bool resize(struct hashmap *map, size_t new_cap) { - struct hashmap *map2 = hashmap_new_with_allocator(map->malloc, map->realloc, map->free, - map->elsize, new_cap, map->seed0, - map->seed1, map->hash, map->compare, - map->elfree, map->udata); - if (!map2) { - return false; - } - for (size_t i = 0; i < map->nbuckets; i++) { - struct bucket *entry = bucket_at(map, i); - if (!entry->dib) { - continue; - } - entry->dib = 1; - size_t j = entry->hash & map2->mask; - for (;;) { - struct bucket *bucket = bucket_at(map2, j); - if (bucket->dib == 0) { - memcpy(bucket, entry, map->bucketsz); - break; - } - if (bucket->dib < entry->dib) { - memcpy(map2->spare, bucket, map->bucketsz); - memcpy(bucket, entry, map->bucketsz); - memcpy(entry, map2->spare, map->bucketsz); - } - j = (j + 1) & map2->mask; - entry->dib += 1; - } - } - map->free(map->buckets); - map->buckets = map2->buckets; - map->nbuckets = map2->nbuckets; - map->mask = map2->mask; - map->growat = map2->growat; - map->shrinkat = map2->shrinkat; - map->free(map2); - return true; -} - -// hashmap_set inserts or replaces an item in the hash map. If an item is -// replaced then it is returned otherwise NULL is returned. This operation -// may allocate memory. If the system is unable to allocate additional -// memory then NULL is returned and hashmap_oom() returns true. -void *hashmap_set(struct hashmap *map, const void *item) { - if (!item) { - panic("item is null"); - } - map->oom = false; - if (map->count == map->growat) { - if (!resize(map, map->nbuckets*2)) { - map->oom = true; - return NULL; - } - } - - - struct bucket *entry = map->edata; - entry->hash = get_hash(map, item); - entry->dib = 1; - memcpy(bucket_item(entry), item, map->elsize); - - size_t i = entry->hash & map->mask; - for (;;) { - struct bucket *bucket = bucket_at(map, i); - if (bucket->dib == 0) { - memcpy(bucket, entry, map->bucketsz); - map->count++; - return NULL; - } - if (entry->hash == bucket->hash && - map->compare(bucket_item(entry), bucket_item(bucket), - map->udata) == 0) - { - memcpy(map->spare, bucket_item(bucket), map->elsize); - memcpy(bucket_item(bucket), bucket_item(entry), map->elsize); - return map->spare; - } - if (bucket->dib < entry->dib) { - memcpy(map->spare, bucket, map->bucketsz); - memcpy(bucket, entry, map->bucketsz); - memcpy(entry, map->spare, map->bucketsz); - } - i = (i + 1) & map->mask; - entry->dib += 1; - } -} - -// hashmap_get returns the item based on the provided key. If the item is not -// found then NULL is returned. -void *hashmap_get(struct hashmap *map, const void *key) { - if (!key) { - panic("key is null"); - } - uint64_t hash = get_hash(map, key); - size_t i = hash & map->mask; - for (;;) { - struct bucket *bucket = bucket_at(map, i); - if (!bucket->dib) { - return NULL; - } - if (bucket->hash == hash && - map->compare(key, bucket_item(bucket), map->udata) == 0) - { - return bucket_item(bucket); - } - i = (i + 1) & map->mask; - } -} - -// hashmap_probe returns the item in the bucket at position or NULL if an item -// is not set for that bucket. The position is 'moduloed' by the number of -// buckets in the hashmap. -void *hashmap_probe(struct hashmap *map, uint64_t position) { - size_t i = position & map->mask; - struct bucket *bucket = bucket_at(map, i); - if (!bucket->dib) { - return NULL; - } - return bucket_item(bucket); -} - - -// hashmap_delete removes an item from the hash map and returns it. If the -// item is not found then NULL is returned. -void *hashmap_delete(struct hashmap *map, void *key) { - if (!key) { - panic("key is null"); - } - map->oom = false; - uint64_t hash = get_hash(map, key); - size_t i = hash & map->mask; - for (;;) { - struct bucket *bucket = bucket_at(map, i); - if (!bucket->dib) { - return NULL; - } - if (bucket->hash == hash && - map->compare(key, bucket_item(bucket), map->udata) == 0) - { - memcpy(map->spare, bucket_item(bucket), map->elsize); - bucket->dib = 0; - for (;;) { - struct bucket *prev = bucket; - i = (i + 1) & map->mask; - bucket = bucket_at(map, i); - if (bucket->dib <= 1) { - prev->dib = 0; - break; - } - memcpy(prev, bucket, map->bucketsz); - prev->dib--; - } - map->count--; - if (map->nbuckets > map->cap && map->count <= map->shrinkat) { - // Ignore the return value. It's ok for the resize operation to - // fail to allocate enough memory because a shrink operation - // does not change the integrity of the data. - resize(map, map->nbuckets/2); - } - return map->spare; - } - i = (i + 1) & map->mask; - } -} - -// hashmap_count returns the number of items in the hash map. -size_t hashmap_count(struct hashmap *map) { - return map->count; -} - -// hashmap_free frees the hash map -// Every item is called with the element-freeing function given in hashmap_new, -// if present, to free any data referenced in the elements of the hashmap. -void hashmap_free(struct hashmap *map) { - if (!map) return; - free_elements(map); - map->free(map->buckets); - map->free(map); -} - -// hashmap_oom returns true if the last hashmap_set() call failed due to the -// system being out of memory. -bool hashmap_oom(struct hashmap *map) { - return map->oom; -} - -// hashmap_scan iterates over all items in the hash map -// Param `iter` can return false to stop iteration early. -// Returns false if the iteration has been stopped early. -bool hashmap_scan(struct hashmap *map, - bool (*iter)(const void *item, void *udata), void *udata) -{ - for (size_t i = 0; i < map->nbuckets; i++) { - struct bucket *bucket = bucket_at(map, i); - if (bucket->dib) { - if (!iter(bucket_item(bucket), udata)) { - return false; - } - } - } - return true; -} - - -// hashmap_iter iterates one key at a time yielding a reference to an -// entry at each iteration. Useful to write simple loops and avoid writing -// dedicated callbacks and udata structures, as in hashmap_scan. -// -// map is a hash map handle. i is a pointer to a size_t cursor that -// should be initialized to 0 at the beginning of the loop. item is a void -// pointer pointer that is populated with the retrieved item. Note that this -// is NOT a copy of the item stored in the hash map and can be directly -// modified. -// -// Note that if hashmap_delete() is called on the hashmap being iterated, -// the buckets are rearranged and the iterator must be reset to 0, otherwise -// unexpected results may be returned after deletion. -// -// This function has not been tested for thread safety. -// -// The function returns true if an item was retrieved; false if the end of the -// iteration has been reached. -bool hashmap_iter(struct hashmap *map, size_t *i, void **item) -{ - struct bucket *bucket; - - do { - if (*i >= map->nbuckets) return false; - - bucket = bucket_at(map, *i); - (*i)++; - } while (!bucket->dib); - - *item = bucket_item(bucket); - - return true; -} - - -//----------------------------------------------------------------------------- -// SipHash reference C implementation -// -// Copyright (c) 2012-2016 Jean-Philippe Aumasson -// <jeanphilippe.aumasson@gmail.com> -// Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to> -// -// To the extent possible under law, the author(s) have dedicated all copyright -// and related and neighboring rights to this software to the public domain -// worldwide. This software is distributed without any warranty. -// -// You should have received a copy of the CC0 Public Domain Dedication along -// with this software. If not, see -// <http://creativecommons.org/publicdomain/zero/1.0/>. -// -// default: SipHash-2-4 -//----------------------------------------------------------------------------- -static uint64_t SIP64(const uint8_t *in, const size_t inlen, - uint64_t seed0, uint64_t seed1) -{ -#define U8TO64_LE(p) \ - { (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ - ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ - ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ - ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) } -#define U64TO8_LE(p, v) \ - { U32TO8_LE((p), (uint32_t)((v))); \ - U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); } -#define U32TO8_LE(p, v) \ - { (p)[0] = (uint8_t)((v)); \ - (p)[1] = (uint8_t)((v) >> 8); \ - (p)[2] = (uint8_t)((v) >> 16); \ - (p)[3] = (uint8_t)((v) >> 24); } -#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) -#define SIPROUND \ - { v0 += v1; v1 = ROTL(v1, 13); \ - v1 ^= v0; v0 = ROTL(v0, 32); \ - v2 += v3; v3 = ROTL(v3, 16); \ - v3 ^= v2; \ - v0 += v3; v3 = ROTL(v3, 21); \ - v3 ^= v0; \ - v2 += v1; v1 = ROTL(v1, 17); \ - v1 ^= v2; v2 = ROTL(v2, 32); } - uint64_t k0 = U8TO64_LE((uint8_t*)&seed0); - uint64_t k1 = U8TO64_LE((uint8_t*)&seed1); - uint64_t v3 = UINT64_C(0x7465646279746573) ^ k1; - uint64_t v2 = UINT64_C(0x6c7967656e657261) ^ k0; - uint64_t v1 = UINT64_C(0x646f72616e646f6d) ^ k1; - uint64_t v0 = UINT64_C(0x736f6d6570736575) ^ k0; - const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t)); - for (; in != end; in += 8) { - uint64_t m = U8TO64_LE(in); - v3 ^= m; - SIPROUND; SIPROUND; - v0 ^= m; - } - const int left = inlen & 7; - uint64_t b = ((uint64_t)inlen) << 56; - switch (left) { - case 7: b |= ((uint64_t)in[6]) << 48; - case 6: b |= ((uint64_t)in[5]) << 40; - case 5: b |= ((uint64_t)in[4]) << 32; - case 4: b |= ((uint64_t)in[3]) << 24; - case 3: b |= ((uint64_t)in[2]) << 16; - case 2: b |= ((uint64_t)in[1]) << 8; - case 1: b |= ((uint64_t)in[0]); break; - case 0: break; - } - v3 ^= b; - SIPROUND; SIPROUND; - v0 ^= b; - v2 ^= 0xff; - SIPROUND; SIPROUND; SIPROUND; SIPROUND; - b = v0 ^ v1 ^ v2 ^ v3; - uint64_t out = 0; - U64TO8_LE((uint8_t*)&out, b); - return out; -} - -//----------------------------------------------------------------------------- -// MurmurHash3 was written by Austin Appleby, and is placed in the public -// domain. The author hereby disclaims copyright to this source code. -// -// Murmur3_86_128 -//----------------------------------------------------------------------------- -static void MM86128(const void *key, const int len, uint32_t seed, void *out) { -#define ROTL32(x, r) ((x << r) | (x >> (32 - r))) -#define FMIX32(h) h^=h>>16; h*=0x85ebca6b; h^=h>>13; h*=0xc2b2ae35; h^=h>>16; - const uint8_t * data = (const uint8_t*)key; - const int nblocks = len / 16; - uint32_t h1 = seed; - uint32_t h2 = seed; - uint32_t h3 = seed; - uint32_t h4 = seed; - uint32_t c1 = 0x239b961b; - uint32_t c2 = 0xab0e9789; - uint32_t c3 = 0x38b34ae5; - uint32_t c4 = 0xa1e38b93; - const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); - for (int i = -nblocks; i; i++) { - uint32_t k1 = blocks[i*4+0]; - uint32_t k2 = blocks[i*4+1]; - uint32_t k3 = blocks[i*4+2]; - uint32_t k4 = blocks[i*4+3]; - k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; - h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; - k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; - h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; - k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; - h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; - k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; - h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; - } - const uint8_t * tail = (const uint8_t*)(data + nblocks*16); - uint32_t k1 = 0; - uint32_t k2 = 0; - uint32_t k3 = 0; - uint32_t k4 = 0; - switch(len & 15) { - case 15: k4 ^= tail[14] << 16; - case 14: k4 ^= tail[13] << 8; - case 13: k4 ^= tail[12] << 0; - k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; - case 12: k3 ^= tail[11] << 24; - case 11: k3 ^= tail[10] << 16; - case 10: k3 ^= tail[ 9] << 8; - case 9: k3 ^= tail[ 8] << 0; - k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; - case 8: k2 ^= tail[ 7] << 24; - case 7: k2 ^= tail[ 6] << 16; - case 6: k2 ^= tail[ 5] << 8; - case 5: k2 ^= tail[ 4] << 0; - k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; - case 4: k1 ^= tail[ 3] << 24; - case 3: k1 ^= tail[ 2] << 16; - case 2: k1 ^= tail[ 1] << 8; - case 1: k1 ^= tail[ 0] << 0; - k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; - }; - h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; - h1 += h2; h1 += h3; h1 += h4; - h2 += h1; h3 += h1; h4 += h1; - FMIX32(h1); FMIX32(h2); FMIX32(h3); FMIX32(h4); - h1 += h2; h1 += h3; h1 += h4; - h2 += h1; h3 += h1; h4 += h1; - ((uint32_t*)out)[0] = h1; - ((uint32_t*)out)[1] = h2; - ((uint32_t*)out)[2] = h3; - ((uint32_t*)out)[3] = h4; -} - -// hashmap_sip returns a hash value for `data` using SipHash-2-4. -uint64_t hashmap_sip(const void *data, size_t len, - uint64_t seed0, uint64_t seed1) -{ - return SIP64((uint8_t*)data, len, seed0, seed1); -} - -// hashmap_murmur returns a hash value for `data` using Murmur3_86_128. -uint64_t hashmap_murmur(const void *data, size_t len, - uint64_t seed0, uint64_t seed1) -{ - char out[16]; - MM86128(data, len, seed0, &out); - return *(uint64_t*)out; -} - -//============================================================================== -// TESTS AND BENCHMARKS -// $ cc -DHASHMAP_TEST hashmap.c && ./a.out # run tests -// $ cc -DHASHMAP_TEST -O3 hashmap.c && BENCH=1 ./a.out # run benchmarks -//============================================================================== -#ifdef HASHMAP_TEST - -static size_t deepcount(struct hashmap *map) { - size_t count = 0; - for (size_t i = 0; i < map->nbuckets; i++) { - if (bucket_at(map, i)->dib) { - count++; - } - } - return count; -} - - -#pragma GCC diagnostic ignored "-Wextra" - - -#include <stdlib.h> -#include <string.h> -#include <time.h> -#include <assert.h> -#include <stdio.h> -#include "hashmap.h" - -static bool rand_alloc_fail = false; -static int rand_alloc_fail_odds = 3; // 1 in 3 chance malloc will fail. -static uintptr_t total_allocs = 0; -static uintptr_t total_mem = 0; - -static void *xmalloc(size_t size) { - if (rand_alloc_fail && rand()%rand_alloc_fail_odds == 0) { - return NULL; - } - void *mem = malloc(sizeof(uintptr_t)+size); - assert(mem); - *(uintptr_t*)mem = size; - total_allocs++; - total_mem += size; - return (char*)mem+sizeof(uintptr_t); -} - -static void xfree(void *ptr) { - if (ptr) { - total_mem -= *(uintptr_t*)((char*)ptr-sizeof(uintptr_t)); - free((char*)ptr-sizeof(uintptr_t)); - total_allocs--; - } -} - -static void shuffle(void *array, size_t numels, size_t elsize) { - char tmp[elsize]; - char *arr = array; - for (size_t i = 0; i < numels - 1; i++) { - int j = i + rand() / (RAND_MAX / (numels - i) + 1); - memcpy(tmp, arr + j * elsize, elsize); - memcpy(arr + j * elsize, arr + i * elsize, elsize); - memcpy(arr + i * elsize, tmp, elsize); - } -} - -static bool iter_ints(const void *item, void *udata) { - int *vals = *(int**)udata; - vals[*(int*)item] = 1; - return true; -} - -static int compare_ints(const void *a, const void *b) { - return *(int*)a - *(int*)b; -} - -static int compare_ints_udata(const void *a, const void *b, void *udata) { - return *(int*)a - *(int*)b; -} - -static int compare_strs(const void *a, const void *b, void *udata) { - return strcmp(*(char**)a, *(char**)b); -} - -static uint64_t hash_int(const void *item, uint64_t seed0, uint64_t seed1) { - return hashmap_murmur(item, sizeof(int), seed0, seed1); -} - -static uint64_t hash_str(const void *item, uint64_t seed0, uint64_t seed1) { - return hashmap_murmur(*(char**)item, strlen(*(char**)item), seed0, seed1); -} - -static void free_str(void *item) { - xfree(*(char**)item); -} - -static void all() { - int seed = getenv("SEED")?atoi(getenv("SEED")):time(NULL); - int N = getenv("N")?atoi(getenv("N")):2000; - printf("seed=%d, count=%d, item_size=%zu\n", seed, N, sizeof(int)); - srand(seed); - - rand_alloc_fail = true; - - // test sip and murmur hashes - assert(hashmap_sip("hello", 5, 1, 2) == 2957200328589801622); - assert(hashmap_murmur("hello", 5, 1, 2) == 1682575153221130884); - - int *vals; - while (!(vals = xmalloc(N * sizeof(int)))) {} - for (int i = 0; i < N; i++) { - vals[i] = i; - } - - struct hashmap *map; - - while (!(map = hashmap_new(sizeof(int), 0, seed, seed, - hash_int, compare_ints_udata, NULL, NULL))) {} - shuffle(vals, N, sizeof(int)); - for (int i = 0; i < N; i++) { - // // printf("== %d ==\n", vals[i]); - assert(map->count == i); - assert(map->count == hashmap_count(map)); - assert(map->count == deepcount(map)); - int *v; - assert(!hashmap_get(map, &vals[i])); - assert(!hashmap_delete(map, &vals[i])); - while (true) { - assert(!hashmap_set(map, &vals[i])); - if (!hashmap_oom(map)) { - break; - } - } - - for (int j = 0; j < i; j++) { - v = hashmap_get(map, &vals[j]); - assert(v && *v == vals[j]); - } - while (true) { - v = hashmap_set(map, &vals[i]); - if (!v) { - assert(hashmap_oom(map)); - continue; - } else { - assert(!hashmap_oom(map)); - assert(v && *v == vals[i]); - break; - } - } - v = hashmap_get(map, &vals[i]); - assert(v && *v == vals[i]); - v = hashmap_delete(map, &vals[i]); - assert(v && *v == vals[i]); - assert(!hashmap_get(map, &vals[i])); - assert(!hashmap_delete(map, &vals[i])); - assert(!hashmap_set(map, &vals[i])); - assert(map->count == i+1); - assert(map->count == hashmap_count(map)); - assert(map->count == deepcount(map)); - } - - int *vals2; - while (!(vals2 = xmalloc(N * sizeof(int)))) {} - memset(vals2, 0, N * sizeof(int)); - assert(hashmap_scan(map, iter_ints, &vals2)); - - // Test hashmap_iter. This does the same as hashmap_scan above. - size_t iter = 0; - void *iter_val; - while (hashmap_iter (map, &iter, &iter_val)) { - assert (iter_ints(iter_val, &vals2)); - } - for (int i = 0; i < N; i++) { - assert(vals2[i] == 1); - } - xfree(vals2); - - shuffle(vals, N, sizeof(int)); - for (int i = 0; i < N; i++) { - int *v; - v = hashmap_delete(map, &vals[i]); - assert(v && *v == vals[i]); - assert(!hashmap_get(map, &vals[i])); - assert(map->count == N-i-1); - assert(map->count == hashmap_count(map)); - assert(map->count == deepcount(map)); - for (int j = N-1; j > i; j--) { - v = hashmap_get(map, &vals[j]); - assert(v && *v == vals[j]); - } - } - - for (int i = 0; i < N; i++) { - while (true) { - assert(!hashmap_set(map, &vals[i])); - if (!hashmap_oom(map)) { - break; - } - } - } - - assert(map->count != 0); - size_t prev_cap = map->cap; - hashmap_clear(map, true); - assert(prev_cap < map->cap); - assert(map->count == 0); - - - for (int i = 0; i < N; i++) { - while (true) { - assert(!hashmap_set(map, &vals[i])); - if (!hashmap_oom(map)) { - break; - } - } - } - - prev_cap = map->cap; - hashmap_clear(map, false); - assert(prev_cap == map->cap); - - hashmap_free(map); - - xfree(vals); - - - while (!(map = hashmap_new(sizeof(char*), 0, seed, seed, - hash_str, compare_strs, free_str, NULL))); - - for (int i = 0; i < N; i++) { - char *str; - while (!(str = xmalloc(16))); - sprintf(str, "s%i", i); - while(!hashmap_set(map, &str)); - } - - hashmap_clear(map, false); - assert(hashmap_count(map) == 0); - - for (int i = 0; i < N; i++) { - char *str; - while (!(str = xmalloc(16))); - sprintf(str, "s%i", i); - while(!hashmap_set(map, &str)); - } - - hashmap_free(map); - - if (total_allocs != 0) { - fprintf(stderr, "total_allocs: expected 0, got %lu\n", total_allocs); - exit(1); - } -} - -#define bench(name, N, code) {{ \ - if (strlen(name) > 0) { \ - printf("%-14s ", name); \ - } \ - size_t tmem = total_mem; \ - size_t tallocs = total_allocs; \ - uint64_t bytes = 0; \ - clock_t begin = clock(); \ - for (int i = 0; i < N; i++) { \ - (code); \ - } \ - clock_t end = clock(); \ - double elapsed_secs = (double)(end - begin) / CLOCKS_PER_SEC; \ - double bytes_sec = (double)bytes/elapsed_secs; \ - printf("%d ops in %.3f secs, %.0f ns/op, %.0f op/sec", \ - N, elapsed_secs, \ - elapsed_secs/(double)N*1e9, \ - (double)N/elapsed_secs \ - ); \ - if (bytes > 0) { \ - printf(", %.1f GB/sec", bytes_sec/1024/1024/1024); \ - } \ - if (total_mem > tmem) { \ - size_t used_mem = total_mem-tmem; \ - printf(", %.2f bytes/op", (double)used_mem/N); \ - } \ - if (total_allocs > tallocs) { \ - size_t used_allocs = total_allocs-tallocs; \ - printf(", %.2f allocs/op", (double)used_allocs/N); \ - } \ - printf("\n"); \ -}} - -static void benchmarks() { - int seed = getenv("SEED")?atoi(getenv("SEED")):time(NULL); - int N = getenv("N")?atoi(getenv("N")):5000000; - printf("seed=%d, count=%d, item_size=%zu\n", seed, N, sizeof(int)); - srand(seed); - - - int *vals = xmalloc(N * sizeof(int)); - for (int i = 0; i < N; i++) { - vals[i] = i; - } - - shuffle(vals, N, sizeof(int)); - - struct hashmap *map; - shuffle(vals, N, sizeof(int)); - - map = hashmap_new(sizeof(int), 0, seed, seed, hash_int, compare_ints_udata, - NULL, NULL); - bench("set", N, { - int *v = hashmap_set(map, &vals[i]); - assert(!v); - }) - shuffle(vals, N, sizeof(int)); - bench("get", N, { - int *v = hashmap_get(map, &vals[i]); - assert(v && *v == vals[i]); - }) - shuffle(vals, N, sizeof(int)); - bench("delete", N, { - int *v = hashmap_delete(map, &vals[i]); - assert(v && *v == vals[i]); - }) - hashmap_free(map); - - map = hashmap_new(sizeof(int), N, seed, seed, hash_int, compare_ints_udata, - NULL, NULL); - bench("set (cap)", N, { - int *v = hashmap_set(map, &vals[i]); - assert(!v); - }) - shuffle(vals, N, sizeof(int)); - bench("get (cap)", N, { - int *v = hashmap_get(map, &vals[i]); - assert(v && *v == vals[i]); - }) - shuffle(vals, N, sizeof(int)); - bench("delete (cap)" , N, { - int *v = hashmap_delete(map, &vals[i]); - assert(v && *v == vals[i]); - }) - - hashmap_free(map); - - - xfree(vals); - - if (total_allocs != 0) { - fprintf(stderr, "total_allocs: expected 0, got %lu\n", total_allocs); - exit(1); - } -} - -int main() { - hashmap_set_allocator(xmalloc, xfree); - - if (getenv("BENCH")) { - printf("Running hashmap.c benchmarks...\n"); - benchmarks(); - } else { - printf("Running hashmap.c tests...\n"); - all(); - printf("PASSED\n"); - } -} - - -#endif - - -