aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHristo Venev <hristo@venev.name>2020-06-22 12:32:11 +0300
committerHristo Venev <hristo@venev.name>2020-06-22 12:54:22 +0300
commitcb2a50691fb0cddb64e1b5a9ed242a6a0b42d503 (patch)
tree57228e4e9efacb0d2a88d7d8b825a44278075875
Initial commit.HEADmaster
-rw-r--r--.gitignore1
-rw-r--r--README.md7
-rw-r--r--examples/print.c61
-rw-r--r--examples/write.c84
-rw-r--r--fuzz/simple_ops.c162
-rw-r--r--include/hkvs.h318
-rw-r--r--lib/db.c180
-rw-r--r--lib/die.c21
-rw-r--r--lib/extfile.c150
-rw-r--r--lib/hash.c78
-rw-r--r--lib/impl.h209
-rw-r--r--lib/internal.h106
-rw-r--r--lib/io_dirfd.c276
-rw-r--r--lib/io_mem.c181
-rw-r--r--lib/iter.c43
-rw-r--r--lib/ix.c215
-rw-r--r--lib/record.c352
-rw-r--r--lib/slab.c199
-rw-r--r--lib/table.c212
-rw-r--r--lib/util.c30
-rw-r--r--meson.build138
-rw-r--r--meson_options.txt1
22 files changed, 3024 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..796b96d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/build
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a2bbb69
--- /dev/null
+++ b/README.md
@@ -0,0 +1,7 @@
+Hash-based Key-Value Store
+===
+
+`hkvs` is a simple and efficient single-threaded key-value store. Transaction
+support is to be provided by the I/O layer.
+
+For API documentation, see `include/hkvs.h`
diff --git a/examples/print.c b/examples/print.c
new file mode 100644
index 0000000..066d33c
--- /dev/null
+++ b/examples/print.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include <hkvs.h>
+#include <string.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+int main(int argc, char **argv) {
+ argc--;
+ argv++;
+ if(argc % 2 != 0) {
+ return 1;
+ }
+
+ (void)argc;
+ (void)argv;
+
+ int dfd = open("dbdir", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+ if(dfd < 0) {
+ fprintf(stderr, "Failed to open dbdir: %s\n", strerror(errno));
+ return 1;
+ }
+
+ hkvs_io *io = hkvs_io_new_unsafe_dirfd(dfd);
+ if(!io) {
+ fprintf(stderr, "Failed to create hkvs_io\n");
+ return 1;
+ }
+
+ hkvs *c;
+ int r = hkvs_new(&c, io);
+ if(r < 0) {
+ fprintf(stderr, "Failed to open database: %s\n", strerror(-r));
+ return 1;
+ }
+
+ for(size_t i = 0;; i++) {
+ hkvs_table *t = hkvs_table_at(c, i);
+ if(!t) break;
+ size_t key_size = hkvs_table_key_size(c, t);
+
+ printf("Table %zu: %u [key size = %zu]\n", i, hkvs_table_id(c, t), key_size);
+
+ hkvs_iter it;
+ hkvs_iter_open(c, t, &it);
+ const char *key;
+ for(hkvs_rid rid; hkvs_rid_ok((rid = hkvs_iter_next(c, t, &it, &key)));) {
+ char *val;
+ size_t val_size;
+ r = hkvs_record_value(c, t, rid, &val, &val_size);
+ if(r < 0) {
+ fprintf(stderr, "Error: %s\n", strerror(-r));
+ return 1;
+ }
+
+ printf("[%llu] %.*s: [%zu] %.*s\n", (unsigned long long)rid.id, (int)key_size, key, val_size, (int)val_size, val);
+ }
+ }
+
+ hkvs_free(c);
+}
diff --git a/examples/write.c b/examples/write.c
new file mode 100644
index 0000000..456fb8e
--- /dev/null
+++ b/examples/write.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include <hkvs.h>
+#include <string.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+int main(int argc, char **argv) {
+ argc--;
+ argv++;
+ if(argc % 2 != 0) {
+ return 1;
+ }
+
+ (void)argc;
+ (void)argv;
+
+ int dfd = open("dbdir", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+ if(dfd < 0) {
+ fprintf(stderr, "Failed to open dbdir: %s\n", strerror(errno));
+ return 1;
+ }
+
+ hkvs_io *io = hkvs_io_new_unsafe_dirfd(dfd);
+ if(!io) {
+ fprintf(stderr, "Failed to create hkvs_io\n");
+ return 1;
+ }
+
+ hkvs *c;
+ int r = hkvs_new(&c, io);
+ if(r < 0) {
+ fprintf(stderr, "Failed to open database: %s\n", strerror(-r));
+ return 1;
+ }
+
+ hkvs_table *t;
+ r = hkvs_table_create(c, &t, 42, 4, 1);
+ if(r < 0) {
+ fprintf(stderr, "Failed to open table: %s\n", strerror(-r));
+ return 1;
+ }
+ if(r) {
+ printf("Created table\n");
+ }
+
+ while(argc) {
+ const char *key = *argv++;
+ const char *val = *argv++;
+ argc -= 2;
+ if(strlen(key) != 4) {
+ fprintf(stderr, "Bad arg length\n");
+ return 1;
+ }
+
+ hkvs_rid id = hkvs_record_find(c, t, key);
+ if(hkvs_rid_ok(id)) {
+ char *cur_val;
+ size_t cur_len;
+ r = hkvs_record_value(c, t, id, &cur_val, &cur_len);
+ if(r < 0) {
+ fprintf(stderr, "Failed to get record value: %s\n", strerror(-r));
+ return 1;
+ }
+ printf("%s: id=%llu [%zu] %.*s\n", key, (unsigned long long)id.id, cur_len, (int)cur_len, (const char*)cur_val);
+ } else {
+ printf("%s: not found\n", key);
+ }
+
+ if(!!strcmp(val, "-")) {
+ r = hkvs_record_update(c, t, &id, key, val, strlen(val));
+ if(r < 0) {
+ fprintf(stderr, "Failed to update record: %s\n", strerror(-r));
+ return 1;
+ }
+ printf("%s: wrote id=%llu\n", key, (unsigned long long)id.id);
+ } else if(hkvs_rid_ok(id)) {
+ hkvs_record_delete(c, t, id);
+ printf("%s: deleted\n", key);
+ }
+ }
+
+ hkvs_free(c);
+}
diff --git a/fuzz/simple_ops.c b/fuzz/simple_ops.c
new file mode 100644
index 0000000..a7f501e
--- /dev/null
+++ b/fuzz/simple_ops.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include <hkvs.h>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+inline static int rdlen(size_t *pr, const uint8_t **pd, size_t *pn) {
+ const uint8_t *d = *pd;
+ size_t n = *pn;
+ if(n == 0) return -1;
+ *pd = d + 1;
+ *pn = n - 1;
+ *pr = d[0];
+ return 0;
+}
+
+static const char zeroes[256];
+
+static void no_io_destroy(hkvs_io *io) {
+ (void)io;
+}
+
+inline static void check_error(int r) {
+ if(r < 0) {
+ fprintf(stderr, "Error: %s\n", strerror(-r));
+ abort();
+ }
+}
+
+extern int LLVMFuzzerTestOneInput(const uint8_t *fz_data, size_t fz_n);
+int LLVMFuzzerTestOneInput(const uint8_t *fz_data, size_t fz_n) {
+ bool log = false;
+
+ if(fz_n < 8) return 0;
+ hkvs_io *io;
+ {
+ uint64_t seed;
+ memcpy(&seed, fz_data, 8);
+ fz_data += 8;
+ fz_n -= 8;
+ io = hkvs_io_new_mem(seed);
+ }
+
+ size_t sizes[256];
+ for(size_t i = 0; i < 256; i++) {
+ sizes[i] = (size_t)-1;
+ }
+
+ hkvs *db;
+ int r = hkvs_new(&db, io);
+ if(r == -E2BIG) return 0;
+ check_error(r);
+
+ hkvs_table *t;
+ r = hkvs_table_create(db, &t, 42, 1, 16);
+ check_error(r);
+ assert(r == 1);
+
+ hkvs_rid id = HKVS_INVALID_RID;
+ uint8_t id_k = 0;
+
+ if(log) puts("begin");
+
+ while(fz_n) {
+ uint8_t op = *fz_data++;
+ fz_n--;
+
+ switch(op) {
+ case 0:
+ {
+ if(log) puts("reopen");
+
+ void (*old_destroy)(hkvs_io*) = io->destroy;
+ io->destroy = no_io_destroy;
+ hkvs_free(db);
+ io->destroy = old_destroy;
+
+ r = hkvs_new(&db, io);
+ check_error(r);
+
+ r = hkvs_table_create(db, &t, 42, 1, 16);
+ check_error(r);
+ assert(r == 0);
+ id = HKVS_INVALID_RID;
+ break;
+ }
+ case 3:
+ {
+ if(fz_n < 1) goto end;
+ uint8_t k = *fz_data;
+ fz_data++;
+ fz_n--;
+ if(log) printf("find %02x ", k);
+ id = hkvs_record_find(db, t, (const char *)&k);
+ if(hkvs_rid_ok(id)) {
+ id_k = k;
+ assert(!memcmp(hkvs_record_key(db, t, id), &k, 1));
+ size_t len;
+ char *val;
+ r = hkvs_record_value(db, t, id, &val, &len);
+ check_error(r);
+ if(log) printf("[%zu]\n", len);
+ assert(len == sizes[k]);
+ } else {
+ if(log) puts("not found");
+ assert(sizes[k] == (size_t)-1);
+ }
+ break;
+ }
+ case 4:
+ {
+ if(!hkvs_rid_ok(id)) break;
+ if(log) printf("delete %02x\n", id_k);
+ hkvs_record_delete(db, t, id);
+ sizes[id_k] = (size_t)-1;
+ id = HKVS_INVALID_RID;
+ break;
+ }
+ case 5:
+ {
+ if(!hkvs_rid_ok(id)) break;
+ size_t n;
+ r = rdlen(&n, &fz_data, &fz_n);
+ if(r < 0) goto end;
+ if(log) printf("set %02x [%zu]\n", id_k, n);
+ r = hkvs_record_set(db, t, id, zeroes, n);
+ check_error(r);
+ sizes[id_k] = n;
+ break;
+ }
+ case 6:
+ {
+ if(fz_n < 1) goto end;
+ uint8_t k = *fz_data;
+ fz_data++;
+ fz_n--;
+ size_t n;
+ r = rdlen(&n, &fz_data, &fz_n);
+ if(r < 0) goto end;
+ if(log) printf("update %02x [%zu]\n", k, n);
+ r = hkvs_record_update(db, t, &id, (const char*)&k, zeroes, n);
+ check_error(r);
+ if(r) {
+ assert(sizes[k] == (size_t)-1);
+ } else {
+ assert(sizes[k] != (size_t)-1);
+ }
+ id_k = k;
+ sizes[k] = n;
+ break;
+ }
+ }
+ }
+
+end:
+ hkvs_free(db);
+
+ return 0;
+}
diff --git a/include/hkvs.h b/include/hkvs.h
new file mode 100644
index 0000000..7ab706a
--- /dev/null
+++ b/include/hkvs.h
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#pragma once
+#include <errno.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#ifndef HKVS_API
+#define HKVS_API extern __attribute__((__nothrow__))
+#endif
+
+#ifndef HKVS_INLINE
+#define HKVS_INLINE inline static __attribute__((__always_inline__, __unused__, __nothrow__))
+#endif
+
+#define HKVS_MUSTCHECK __attribute__((__warn_unused_result__))
+
+#define HKVS_MAGIC ((uint64_t)0xfffec19153564b48ULL)
+
+/* Error codes:
+ * - `-ENOMEM`: Memory allocation failed.
+ * - `-EFBIG`: An implementation limit on the database size has been reached.
+ * Note that this may happen to writes that don't change the number of
+ * records.
+ * - `-EBADMSG`: Database corruption has been detected.
+ */
+
+/* ================================================================ */
+
+
+/*
+ * ---------------------- Core Database API -----------------------
+ *
+ * The database contains a set of tables with 32-bit integer identifiers. Each
+ * table contains records with fixed-size keys and variable-sized values.
+ *
+ * The database storage is provided by an implementation of `hkvs_io`. The core
+ * of the database does not provide transaction support. That has to be
+ * implemented at the I/O layer.
+ */
+
+/* `hkvs*` is the database handle type. */
+typedef struct hkvs hkvs;
+
+/* `hkvs_table*` represents a database table. */
+typedef struct hkvs_table hkvs_table;
+
+/* `hkvs_io` is an interface for the I/O methods used by the database. */
+typedef struct hkvs_io hkvs_io;
+
+/* Open a new database. The database pointer is stored in `*pc`.
+ *
+ * On failure, a negative error code is returned and `*pc` is left unchanged.
+ * The `hkvs_io` is destroyed.
+ */
+HKVS_API HKVS_MUSTCHECK int hkvs_new(hkvs **pc, hkvs_io *io);
+
+/* Close the database and destroy its `hkvs_io`.
+ *
+ * This method does not fail. */
+HKVS_API void hkvs_free(hkvs *c);
+
+/* Find a table by index. If fewer tables exist, NULL is returned.
+ *
+ * This method does not fail. */
+HKVS_API hkvs_table *hkvs_table_at(hkvs *c, size_t i);
+
+/* Find a table by ID. If no table with the given ID exists or its key size does
+ * not match, NULL is returned.
+ *
+ * This method does not fail. */
+HKVS_API hkvs_table *hkvs_table_open(hkvs *c, uint32_t id, size_t key_size);
+
+/* Create a table and store its pointer in `*ptable`.
+ *
+ * Record values with size <= `val_size_hint` may be stored together with the
+ * key.
+ *
+ * A positive return value indicates that a table was created, and 0 means that
+ * the table already exists.
+ *
+ * On failure, a negative error code is returned and the database is left
+ * unchanged. If a table with the given ID exists but has a different key size,
+ * `-EEXIST` is returned. */
+HKVS_API int hkvs_table_create(hkvs *c, hkvs_table **ptable, uint32_t id, size_t key_size, size_t val_size_hint);
+
+/* Delete the table with the given ID. A positive return value indicates that a
+ * table was found and deleted, and 0 means that no table with the given ID
+ * exists.
+ *
+ * On failure, a negative error code is returned and the database is left
+ * unchanged. */
+HKVS_API int hkvs_table_delete(hkvs *c, uint32_t id);
+
+/* Get the ID of the given table.
+ *
+ * This method does not fail. */
+HKVS_API uint32_t hkvs_table_id(hkvs *c, hkvs_table *t);
+
+/* Get the key size of the given table.
+ *
+ * This method does not fail. */
+HKVS_API size_t hkvs_table_key_size(hkvs *c, hkvs_table *t);
+
+/* Invalidate all record key and value pointers. This may release some
+ * resources.
+ *
+ * This method does not fail. */
+HKVS_API void hkvs_put_records(hkvs *c);
+
+/* ================================================================ */
+
+
+/*
+ * -------------------------- Record API --------------------------
+ *
+ * The record API provides access to the contents of tables.
+ *
+ * Record IDs are 64-bit integers. New record IDs are assigned either
+ * sequentially or by reusing IDs of deleted records.
+ *
+ * Record IDs are persistent, meaning that a record will keep its ID until
+ * deleted.
+ */
+
+/* `hkvs_rid` is a wrapper for a record ID. */
+typedef struct hkvs_rid hkvs_rid;
+struct hkvs_rid {
+ uint64_t id;
+};
+
+#define HKVS_INVALID_RID ((struct hkvs_rid){(size_t)-1})
+
+/* Check if a record ID is valid. */
+HKVS_INLINE bool hkvs_rid_ok(hkvs_rid rid) {
+ return rid.id <= UINT64_MAX / 2;
+}
+
+/* Return a pointer to the key of the given record, or NULL if no record with
+ * that ID exists. The pointer is invalidated by methods that add, remove, or
+ * resize records of any table.
+ *
+ * This method does not fail. */
+HKVS_API HKVS_MUSTCHECK const char *hkvs_record_key(hkvs *c, hkvs_table *t, hkvs_rid rid);
+
+/* Get the data pointer and value of the given record. If the record exists,
+ * a positive value is returned. If not, 0 is returned.
+ *
+ * The pointer is invalidated by methods that add, remove, or resize records
+ * of any table. Do not forget to call `hkvs_put_records` when done with the
+ * value.
+ *
+ * On failure, a negative error code is returned. */
+HKVS_API HKVS_MUSTCHECK int hkvs_record_value(hkvs *c, hkvs_table *t, hkvs_rid rid, char **pvalue, size_t *psize);
+
+/* Resize a given record. Only the first `min(keep_size, new_size, old_size)`
+ * bytes of the value are retained, the rest of the value is uninitialized.
+ *
+ * If `pvalue` is provided, a pointer to the record value is stored there. Do
+ * not forget to call `hkvs_put_records` when done with the value.
+ *
+ * It is a programmer error to call this method with an ID that does not
+ * correspond to an existing record.
+ *
+ * On failure, a negative error code is returned and the database is left
+ * unchanged. */
+HKVS_API HKVS_MUSTCHECK int hkvs_record_resize(hkvs *c, hkvs_table *t, hkvs_rid rid, char **pvalue, size_t new_size, size_t keep_size);
+
+/* Find the record with the given key. If no record was found,
+ * `HKVS_INVALID_RID` is returned.
+ *
+ * This method does not fail. */
+HKVS_API HKVS_MUSTCHECK hkvs_rid hkvs_record_find(hkvs *c, hkvs_table *t, const char *key);
+
+/* Delete the given record.
+ *
+ * It is a programmer error to call this method with an ID that does not
+ * correspond to an existing record.
+ *
+ * This method does not fail. */
+HKVS_API void hkvs_record_delete(hkvs *c, hkvs_table *t, hkvs_rid rid);
+
+/* Append a new record to the table. The ID of the new record is stored in
+ * `*prid`.
+ *
+ * It is a programmer error to call this method with a key that is already in
+ * the table.
+ *
+ * On failure, a negative error code is returned and the database is left
+ * unchanged. */
+HKVS_API HKVS_MUSTCHECK int hkvs_record_append(hkvs *c, hkvs_table *t, hkvs_rid *prid, const char *key, char **pvalue, size_t size);
+
+/* ================================================================ */
+
+
+/*
+ * ---------------------- Utility Functions -----------------------
+ *
+ * These are some utility functions that can be correctly implemented using
+ * the rest of the public API. However, the implementations provided here may be
+ * slightly more efficient.
+ */
+
+/* Set the value of a record.
+ *
+ * It is a programmer error to call this method with a record ID that does not
+ * correspond to an existing record.
+ *
+ * On failure, a negative error code is returned and the database is left
+ * unchanged. */
+HKVS_API HKVS_MUSTCHECK int hkvs_record_set(hkvs *c, hkvs_table *t, hkvs_rid rid, const char *value, size_t size);
+
+/* Append or set the record with the given key.
+ *
+ * The ID of the record is stored in `*prid`. If a new record was created, 1
+ * is returned. If an existing record was changed, 0 is returned.
+ *
+ * On failure, a negative error code is returned and the database is left
+ * unchanged. */
+HKVS_API HKVS_MUSTCHECK int hkvs_record_update(hkvs *c, hkvs_table *t, hkvs_rid *prid, const char *key, const char *value, size_t size);
+
+/* ================================================================ */
+
+
+/*
+ * ------------------------- Iterator API -------------------------
+ *
+ * An iterator provides a way to enumerate all records in a table. Iterators
+ * themselves do not consume any resources.
+ *
+ * The fields of `hkvs_iter` are an implementation detail.
+ */
+typedef struct hkvs_iter hkvs_iter;
+struct hkvs_iter {
+ size_t priv1, priv2;
+};
+
+/* Initialize an iterator. The first call to `hkvs_iter_next` on the iterator
+ * will return the first record. Any change of the database invalidates the
+ * iterator, and it must be reinitialized.
+ *
+ * This method does not fail.
+ */
+HKVS_API void hkvs_iter_open(hkvs *c, hkvs_table *t, hkvs_iter *it);
+
+/* Initialize an iterator positioned at a given record. The next call to
+ * `hkvs_iter_next()` will return the first record with an ID greater than
+ * or equal to the one given here.
+ *
+ * This method does not fail. */
+HKVS_API void hkvs_iter_seek(hkvs *c, hkvs_table *t, hkvs_iter *it, hkvs_rid rid);
+
+/* Retrieve the ID of the next record. If `pkey` is provided, a pointer to the
+ * record's key is stored there.
+ *
+ * If the end of the database has been reached, `HKVS_INVALID_RID` is returned.
+ *
+ * This method does not fail. */
+HKVS_API HKVS_MUSTCHECK hkvs_rid hkvs_iter_next(hkvs *c, hkvs_table *t, hkvs_iter *it, const char **pkey);
+
+/* Delete the record last returned by `hkvs_iter_next()` without invalidating
+ * the iterator. Key and value pointers are still invalidated.
+ *
+ * It is a programmer error to call this method without a preceding call to
+ * `hkvs_iter_next()`.
+ *
+ * This method does not fail. */
+HKVS_API void hkvs_iter_delete(hkvs *c, hkvs_table *t, hkvs_iter *it);
+
+/* ================================================================ */
+
+
+/*
+ * ------------------------- hkvs_io API --------------------------
+ *
+ * The I/O interface consists of several methods for operating on memory-mapped
+ * files.
+ *
+ * In order to initialize the database, an empty file 0 must be created. It will
+ * never be deleted.
+ */
+
+struct hkvs_io {
+ /* Destroy the IO interface. */
+ void (*destroy)(hkvs_io *io);
+
+ /* Write random bytes to the given buffer. */
+ int (*random)(hkvs_io *io, char *buf, size_t size);
+
+ /* Open the file and memory-map it. The size is stored in `*psz` and the
+ * address is stored in `*pmem`. The file is not open when this method is
+ * called. */
+ int (*open)(hkvs_io *io, uint64_t fi, char **pmem, size_t *psz);
+
+ /* Create a zero-filled file with the given size and open it. */
+ int (*create)(hkvs_io *io, uint64_t *pfi, char **pmem, size_t sz);
+
+ /* Resize the file and update its memory-mapped address. The file is open
+ * and mapped at `*pmem` with size `old_sz` when this method is called. */
+ int (*resize)(hkvs_io *io, uint64_t fi, char **pmem, size_t old_sz, size_t new_sz);
+
+ /* Release the file's memory mapping and close it. The file is open and
+ * mapped at `mem` with size `sz` when this method is called. */
+ void (*close)(hkvs_io *io, uint64_t fi, char *mem, size_t sz);
+
+ /* Delete the file. The file is not open when this method is called. */
+ void (*unlink)(hkvs_io *io, uint64_t fi);
+};
+
+/* Create a new `hkvs_io` that entirely works in memory. */
+HKVS_API hkvs_io *hkvs_io_new_mem(uint64_t random_seed);
+
+/* Create a new `hkvs_io` given a directory file descriptor. This implementation
+ * is simple but it does no locking and is not crash-safe. */
+HKVS_API hkvs_io *hkvs_io_new_unsafe_dirfd(int fd);
+
+/* ================================================================ */
diff --git a/lib/db.c b/lib/db.c
new file mode 100644
index 0000000..81cce47
--- /dev/null
+++ b/lib/db.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+#ifdef FUZZING
+static const size_t hkvs_slab_sizes[HKVS_N_SLABS] = {
+ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24,
+ 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48,
+ 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72,
+ 74, 76, 78, 80,
+};
+#else
+static const size_t hkvs_slab_sizes[HKVS_N_SLABS] = {
+ (2 << 6), (3 << 6), (4 << 6), (5 << 6), (6 << 6), (7 << 6),
+ (8 << 6), (9 << 6), (10 << 6), (11 << 6), (12 << 6), (14 << 6),
+ (8 << 7), (9 << 7), (10 << 7), (11 << 7), (12 << 7), (14 << 7),
+ (8 << 8), (9 << 8), (10 << 8), (11 << 8), (12 << 8), (14 << 8),
+ (8 << 9), (9 << 9), (10 << 9), (11 << 9), (12 << 9), (14 << 9),
+ (8 << 10), (9 << 10), (10 << 10), (11 << 10), (12 << 10), (14 << 10),
+ (8 << 11), (9 << 11), (10 << 11), (11 << 11),
+};
+#endif
+
+int hkvs_new(hkvs **pc, hkvs_io *io) {
+ int r;
+ bool created = false;
+
+ hkvs *c = malloc(sizeof(*c));
+ if(!c) return -ENOMEM;
+
+ c->io = io;
+
+ r = io->open(io, 0, &c->root_param, &c->root_param_size);
+ if(r < 0) {
+ goto fail_free;
+ }
+
+ char *at = c->root_param;
+ size_t param_size = c->root_param_size;
+ if(param_size == 0) {
+ r = io->resize(io, 0, &c->root_param, param_size, HKVS_POFF_TABLE);
+ if(r < 0) goto fail_close;
+ at = c->root_param;
+ param_size = HKVS_POFF_TABLE;
+ c->root_param_size = param_size;
+ memset(at, 0, HKVS_POFF_TABLE);
+ hkvs_write_u64(at + HKVS_PARAM_HEADER_MAGIC, HKVS_MAGIC);
+ created = true;
+ } else if(param_size < HKVS_PSIZE_HEADER || hkvs_read_u64(at + HKVS_PARAM_HEADER_MAGIC) != HKVS_MAGIC) {
+ r = -EBADMSG;
+ goto fail_close;
+ }
+
+ uint64_t version = hkvs_read_u64(at + HKVS_PARAM_HEADER_VERSION);
+ if(version != 0) {
+ r = -ENOTSUP;
+ goto fail_close;
+ }
+
+ uint64_t n_tables_u64 = hkvs_read_u64(at + 16);
+ at += HKVS_PSIZE_HEADER;
+
+ if(param_size < HKVS_POFF_TABLE || n_tables_u64 > (param_size - HKVS_POFF_TABLE) / HKVS_PSIZE_TABLE) {
+ r = -EBADMSG;
+ goto fail_close;
+ }
+ size_t n_tables = (size_t)n_tables_u64;
+
+ for(size_t i = 0; i < HKVS_N_SLABS; i++) {
+ r = hkvs_slab_init(&c->slabs[i], io, at, hkvs_slab_sizes[i]);
+ at += HKVS_PSIZE_SLAB;
+ if(r < 0) goto fail_slabs;
+ }
+
+ hkvs_table **tables = calloc(n_tables, sizeof(hkvs_table*));
+ c->tables = tables;
+ c->n_tables = n_tables;
+ c->alloc_tables = n_tables;
+ for(size_t i = 0; i < n_tables; i++) {
+ r = hkvs_table_new(&tables[i], io, at);
+ at += HKVS_PSIZE_TABLE;
+ if(r < 0) goto fail_tables;
+ }
+
+ c->extfiles = NULL;
+ c->n_extfiles = 0;
+ c->alloc_extfiles = 0;
+ c->extfile_gen = 0;
+
+ *pc = c;
+ return created;
+
+fail_tables:
+ for(size_t i = 0; i < n_tables; i++) {
+ hkvs_table *t = tables[i];
+ if(t) hkvs_table_free(t, io);
+ }
+ free(tables);
+
+fail_slabs:;
+ size_t n_slabs = (size_t)(at - (char*)c->root_param) / HKVS_PSIZE_SLAB;
+ if(n_slabs > HKVS_N_SLABS) n_slabs = HKVS_N_SLABS;
+ while(n_slabs--) {
+ hkvs_slab_destroy(&c->slabs[n_slabs], io);
+ }
+
+fail_close:
+ io->close(io, 0, c->root_param, c->root_param_size);
+fail_free:
+ free(c);
+ io->destroy(io);
+ return r;
+}
+
+int hkvs_resize_param(hkvs *c) {
+ if(c->n_tables > (SIZE_MAX - HKVS_POFF_TABLE) / HKVS_PSIZE_TABLE) return -EFBIG;
+
+ size_t param_size = HKVS_POFF_TABLE + c->n_tables * HKVS_PSIZE_TABLE;
+ int r = c->io->resize(c->io, 0, &c->root_param, c->root_param_size, param_size);
+ if(r < 0) return r;
+ c->root_param_size = param_size;
+
+ char *at = c->root_param;
+ if(hkvs_read_u64(at + HKVS_PARAM_HEADER_MAGIC) != HKVS_MAGIC || hkvs_read_u64(at + HKVS_PARAM_HEADER_VERSION) != 0) {
+ hkvs_die("magic corrupted");
+ }
+ hkvs_write_u64(at + 16, c->n_tables);
+ at += HKVS_PSIZE_HEADER;
+
+ for(size_t i = 0; i < HKVS_N_SLABS; i++) {
+ c->slabs[i].param = at;
+ at += HKVS_PSIZE_SLAB;
+ }
+
+ hkvs_table **tables = c->tables;
+ size_t n_tables = c->n_tables;
+ for(size_t i = 0; i < n_tables; i++) {
+ hkvs_table *t = tables[i];
+ if(t) t->slab.param = at;
+ at += HKVS_PSIZE_TABLE;
+ }
+
+ return 0;
+}
+
+void hkvs_free(hkvs *c) {
+ hkvs_io *io = c->io;
+
+ hkvs_extfile *ext = c->extfiles;
+ size_t n_ext = c->n_extfiles;
+ for(size_t i = 0; i < n_ext; i++) {
+ io->close(io, ext[i].fi, ext[i].addr, ext[i].size);
+ }
+ free(ext);
+
+ hkvs_table **tables = c->tables;
+ size_t n_tables = c->n_tables;
+ for(size_t i = 0; i < n_tables; i++) {
+ hkvs_table *t = tables[i];
+ if(t) hkvs_table_free(t, io);
+ }
+ free(tables);
+
+ for(size_t i = 0; i < HKVS_N_SLABS; i++) {
+ hkvs_slab_destroy(&c->slabs[i], io);
+ }
+
+ io->close(io, 0, c->root_param, c->root_param_size);
+ free(c);
+ io->destroy(io);
+ return;
+}
+
+uint8_t hkvs_slab_for_size(size_t data_size) {
+ // TODO: faster
+ for(uint8_t i = 0; i < HKVS_N_SLABS; i++) {
+ if(data_size <= hkvs_slab_sizes[i]) return i;
+ }
+ return 255;
+}
diff --git a/lib/die.c b/lib/die.c
new file mode 100644
index 0000000..f581951
--- /dev/null
+++ b/lib/die.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#undef hkvs_die
+void hkvs_die(const char *msg) {
+ hkvs_die_ext(NULL, NULL, 0, msg);
+}
+
+void hkvs_die_ext(const char *func, const char *file, int line, const char *msg) {
+ (void)file;
+ if(func) {
+ fprintf(stderr, "Internal error: [%s:%d] %s\n", func, line, msg);
+ } else {
+ fprintf(stderr, "Internal error: %s\n", msg);
+ }
+ abort();
+}
diff --git a/lib/extfile.c b/lib/extfile.c
new file mode 100644
index 0000000..fdd94eb
--- /dev/null
+++ b/lib/extfile.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+#define MAX_GEN ((uint32_t)2147483648u)
+
+static bool hkvs_extfile_gc(hkvs *c) {
+ hkvs_extfile *ext = c->extfiles;
+ size_t n = c->n_extfiles;
+
+ uint32_t gen = c->extfile_gen;
+ uint32_t max = 0;
+ for(size_t i = 0; i < n; i++) {
+ uint32_t diff = gen - ext[i].gen;
+ if(diff > max) max = diff;
+ }
+ if(max == 0) return false;
+
+ if(max >= MAX_GEN) max = MAX_GEN;
+
+ hkvs_io *io = c->io;
+ size_t i = 0;
+ for(size_t j = 0; j < n; j++) {
+ uint32_t diff = gen - ext[j].gen;
+ if(diff >= max) {
+ io->close(io, ext[j].fi, ext[j].addr, ext[j].size);
+ } else {
+ ext[i++] = ext[j];
+ }
+ }
+
+ if(n > 64 && i < n / 2) {
+ ext = realloc(ext, i * sizeof(hkvs_extfile));
+ if(ext) {
+ c->extfiles = ext;
+ c->alloc_extfiles = i;
+ }
+ }
+
+ c->n_extfiles = i;
+ return true;
+}
+
+static int hkvs_extfile_reserve_slow(hkvs *c) {
+ if(c->n_extfiles > 16) {
+ if(hkvs_extfile_gc(c)) return 0;
+ }
+
+ size_t n = c->n_extfiles;
+ hkvs_extfile *ext = c->extfiles;
+ n += (n >> 2) + 1;
+ ext = realloc(ext, n * sizeof(hkvs_extfile));
+ if(!ext) return -ENOMEM;
+ c->extfiles = ext;
+ c->alloc_extfiles = n;
+ return 0;
+}
+
+HKVS_INLINE int hkvs_extfile_reserve(hkvs *c) {
+ if(c->n_extfiles != c->alloc_extfiles) return 0;
+ return hkvs_extfile_reserve_slow(c);
+}
+
+int hkvs_extfile_create(hkvs *c, uint64_t *pfi, char **paddr, size_t size, bool pin) {
+ int r = hkvs_extfile_reserve(c);
+ if(r < 0) return r;
+
+ uint64_t fi;
+ char *addr;
+ r = c->io->create(c->io, &fi, &addr, size);
+ if(r < 0) return r;
+ if(fi == 0) hkvs_die("created file 0");
+
+ c->extfiles[c->n_extfiles++] = (hkvs_extfile) {
+ .fi = fi,
+ .addr = addr,
+ .size = size,
+ .gen = pin ? c->extfile_gen : c->extfile_gen - 1,
+ };
+
+ *pfi = fi;
+ *paddr = addr;
+
+ return 0;
+}
+
+HKVS_MUSTCHECK bool hkvs_extfile_find(hkvs *c, uint64_t fi, size_t *pslot) {
+ hkvs_extfile *ext = c->extfiles;
+ size_t n = c->n_extfiles;
+ for(size_t i = 0; i < n; i++) {
+ if(ext[i].fi == fi) {
+ *pslot = i;
+ return true;
+ }
+ }
+ return false;
+}
+
+HKVS_MUSTCHECK int hkvs_extfile_open(hkvs *c, uint64_t fi, size_t *pslot, size_t want_size) {
+ if(fi == 0) return -EBADMSG;
+
+ hkvs_extfile *ext = c->extfiles;
+ size_t n = c->n_extfiles;
+ for(size_t i = 0; i < n; i++) {
+ if(ext[i].fi == fi) {
+ if(ext[i].size < want_size) return -EBADMSG;
+ *pslot = i;
+ return 0;
+ }
+ }
+
+ int r = hkvs_extfile_reserve(c);
+ if(r < 0) return 0;
+
+ char *addr;
+ size_t size;
+ r = c->io->open(c->io, fi, &addr, &size);
+ if(r < 0) return r;
+
+ size_t slot = c->n_extfiles++;
+ c->extfiles[slot] = (hkvs_extfile) {
+ .fi = fi,
+ .addr = addr,
+ .size = size,
+ .gen = c->extfile_gen - 1,
+ };
+
+ if(size < want_size) return -EBADMSG;
+
+ *pslot = slot;
+ return 0;
+}
+
+void hkvs_extfile_close(hkvs *c, size_t slot) {
+ hkvs_extfile *ext = c->extfiles;
+ size_t n = c->n_extfiles;
+ hkvs_assume(slot < n);
+ n--;
+ c->n_extfiles = n;
+
+ c->io->close(c->io, ext[slot].fi, ext[slot].addr, ext[slot].size);
+ if(slot != n) ext[slot] = ext[n];
+}
+
+void hkvs_put_records(hkvs *c) {
+ c->extfile_gen++;
+ if(c->extfile_gen % MAX_GEN == 0) {
+ hkvs_extfile_gc(c);
+ }
+}
diff --git a/lib/hash.c b/lib/hash.c
new file mode 100644
index 0000000..6a969a8
--- /dev/null
+++ b/lib/hash.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+#define rotl(v, x) (((v) << (x)) | ((v) >> (64 - (x))))
+
+#define ROUND do { \
+ v0 += v1; \
+ v2 += v3; \
+ v1 = rotl(v1, 13); \
+ v3 = rotl(v3, 16); \
+ v1 ^= v0; \
+ v3 ^= v2; \
+ v0 = rotl(v0, 32); \
+ v2 += v1; \
+ v0 += v3; \
+ v1 = rotl(v1, 17); \
+ v3 = rotl(v3, 21); \
+ v1 ^= v2; \
+ v3 ^= v0; \
+ v2 = rotl(v2, 32); \
+} while(0)
+
+uint64_t hkvs_hash(const char *hk, const char *data, size_t data_size) {
+ uint64_t hk0 = hkvs_read_u64(hk + 0);
+ uint64_t hk1 = hkvs_read_u64(hk + 8);
+ uint64_t v0 = (uint64_t)0x736f6d6570736575 ^ hk0;
+ uint64_t v1 = (uint64_t)0x646f72616e646f6d ^ hk1;
+ uint64_t v2 = (uint64_t)0x6c7967656e657261 ^ hk0;
+ uint64_t v3 = (uint64_t)0x7465646279746573 ^ hk1;
+ uint64_t last = (uint64_t)data_size << 56;
+ while(data_size >= 8) {
+ uint64_t m = hkvs_read_u64(data);
+ v3 ^= m;
+ ROUND;
+ v0 ^= m;
+ data += 8;
+ data_size -= 8;
+ }
+
+ switch(data_size) {
+ case 7:
+ last |= (uint64_t)(uint8_t)data[6] << 48;
+ HKVS_FALLTHROUGH;
+ case 6:
+ last |= (uint64_t)(uint8_t)data[5] << 40;
+ HKVS_FALLTHROUGH;
+ case 5:
+ last |= (uint64_t)(uint8_t)data[4] << 32;
+ HKVS_FALLTHROUGH;
+ case 4:
+ last |= (uint64_t)(uint8_t)data[3] << 24;
+ HKVS_FALLTHROUGH;
+ case 3:
+ last |= (uint64_t)(uint8_t)data[2] << 16;
+ HKVS_FALLTHROUGH;
+ case 2:
+ last |= (uint64_t)(uint8_t)data[1] << 8;
+ HKVS_FALLTHROUGH;
+ case 1:
+ last |= (uint64_t)(uint8_t)data[0];
+ HKVS_FALLTHROUGH;
+ case 0:
+ break;
+ default:
+ __builtin_unreachable();
+ }
+
+ v3 ^= last;
+ ROUND;
+ v0 ^= last;
+
+ v2 ^= 0xff;
+ ROUND;
+ ROUND;
+ ROUND;
+ return v0 ^ v1 ^ v2 ^ v3;
+}
diff --git a/lib/impl.h b/lib/impl.h
new file mode 100644
index 0000000..47a05ca
--- /dev/null
+++ b/lib/impl.h
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#pragma once
+#include "internal.h"
+
+#ifdef FUZZING
+#define HKVS_SLAB_BLOCK ((size_t)16)
+#else
+#define HKVS_SLAB_BLOCK ((size_t)4096)
+#endif
+
+#define HKVS_REWRITE_THRESHOLD ((size_t)8192)
+
+#define HKVS_SLAB_FREEPTRS ((size_t)8)
+
+#define HKVS_N_SLABS ((uint8_t)40)
+
+#define HKVS_MARK_MAX_SMALL ((uint8_t)249)
+#define HKVS_MARK_EXT ((uint8_t)255)
+#define HKVS_MARK_SLAB ((uint8_t)254)
+
+#define HKVS_IX_ENTRY ((size_t)16)
+#define HKVS_IX_KEY_SIZE ((size_t)16)
+#define HKVS_IX_MAX_DISP ((size_t)6)
+#define HKVS_IX_SIZE(bits) ((uint64_t)HKVS_IX_ENTRY << (bits))
+
+#define HKVS_PSIZE_HEADER ((size_t)64)
+#define HKVS_PARAM_HEADER_MAGIC ((size_t)0)
+#define HKVS_PARAM_HEADER_VERSION ((size_t)8)
+/*
+ * header {
+ * magic: u64,
+ * version: u64,
+ * app_id: u64,
+ * app_ver: u64,
+ * pad: [u64; 4],
+ * }
+ */
+
+#define HKVS_POFF_SLAB HKVS_PSIZE_HEADER
+#define HKVS_PSIZE_SLAB ((size_t)32)
+#define HKVS_PARAM_SLAB_FI ((size_t)0)
+#define HKVS_PARAM_SLAB_ALLOC ((size_t)8)
+#define HKVS_PARAM_SLAB_FREE ((size_t)16)
+/*
+ * slab {
+ * fi: u64,
+ * alloc_count: u64,
+ * free: u64,
+ * pad: u64,
+ * }
+ */
+
+#define HKVS_POFF_TABLE ((size_t)(HKVS_POFF_SLAB + HKVS_N_SLABS * HKVS_PSIZE_SLAB))
+#define HKVS_PSIZE_TABLE ((size_t)64)
+#define HKVS_PARAM_TABLE_ELTSZ ((size_t)32)
+#define HKVS_PARAM_TABLE_KEYSZ ((size_t)34)
+#define HKVS_PARAM_TABLE_IXBIT ((size_t)35)
+#define HKVS_PARAM_TABLE_ID ((size_t)36)
+#define HKVS_PARAM_TABLE_IXFI ((size_t)40)
+#define HKVS_PARAM_TABLE_IXKEY ((size_t)48)
+/* table {
+ * slab;
+ * elt_size: u16,
+ * key_size: u8,
+ * ix_bits: u8,
+ * id: u32,
+ * ix_fi: u64,
+ * ix_key: [u64; 2],
+ * }
+ */
+
+typedef struct hkvs_slab hkvs_slab;
+struct hkvs_slab {
+ char *param;
+ char *addr;
+ size_t map_size;
+ size_t elt_size;
+};
+
+typedef struct hkvs_table hkvs_table;
+struct hkvs_table {
+ hkvs_slab slab;
+ char *ix;
+ size_t ix_size;
+};
+
+typedef struct hkvs_extfile hkvs_extfile;
+struct hkvs_extfile {
+ uint64_t fi;
+ char *addr;
+ size_t size;
+ uint32_t gen;
+};
+
+struct hkvs {
+ hkvs_io *io;
+
+ char *root_param;
+ size_t root_param_size;
+
+ hkvs_table **tables;
+ size_t n_tables;
+ size_t alloc_tables;
+
+ hkvs_extfile *extfiles;
+ size_t n_extfiles;
+ size_t alloc_extfiles;
+ uint32_t extfile_gen;
+
+ hkvs_slab slabs[HKVS_N_SLABS];
+};
+
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_extfile_create(hkvs *c, uint64_t *pfi, char **paddr, size_t size, bool pin);
+
+HKVS_INTERNAL HKVS_MUSTCHECK bool hkvs_extfile_find(hkvs *c, uint64_t fi, size_t *pslot);
+
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_extfile_open(hkvs *c, uint64_t fi, size_t *pslot, size_t want_size);
+
+HKVS_INTERNAL void hkvs_extfile_close(hkvs *c, size_t slot);
+
+HKVS_INLINE void hkvs_extfile_pin(hkvs *c, hkvs_extfile *f) {
+ f->gen = c->extfile_gen;
+}
+
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_resize_param(hkvs *c);
+
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_slab_init(hkvs_slab *s, hkvs_io *io, char *param, size_t elt_size);
+
+HKVS_INLINE void hkvs_slab_destroy(hkvs_slab *s, hkvs_io *io) {
+ uint64_t fi = hkvs_read_u64(s->param);
+ if(fi) {
+ io->close(io, fi, s->addr, s->map_size);
+ }
+}
+
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_slab_alloc(hkvs *c, hkvs_slab *s, size_t *pi);
+
+HKVS_INTERNAL void hkvs_slab_discard(hkvs_slab *s, size_t id);
+
+HKVS_INLINE HKVS_PURE size_t hkvs_slab_size(hkvs_slab *s) {
+ uint64_t r = hkvs_read_u64(s->param + HKVS_PARAM_SLAB_ALLOC);
+ hkvs_assume(r <= SIZE_MAX / 2);
+ return (size_t)r;
+}
+
+HKVS_INLINE char *hkvs_slab_mark_at(hkvs_slab *s, size_t i) {
+ hkvs_assume(i <= SIZE_MAX / 2);
+ hkvs_assume(i < hkvs_slab_size(s));
+ size_t j = i & ~(HKVS_SLAB_BLOCK - 1);
+ return s->addr + j * s->elt_size + i;
+}
+
+HKVS_INLINE char *hkvs_slab_at(hkvs_slab *s, size_t i) {
+ hkvs_assume(i <= SIZE_MAX / 2);
+ hkvs_assume(i < hkvs_slab_size(s));
+ size_t j = i & ~(HKVS_SLAB_BLOCK - 1);
+ return s->addr + i * s->elt_size + j + HKVS_SLAB_BLOCK;
+}
+
+HKVS_INLINE uint8_t hkvs_slab_mark(hkvs_slab *s, size_t i) {
+ return hkvs_read_u8(hkvs_slab_mark_at(s, i));
+}
+
+HKVS_INLINE void hkvs_slab_set_mark(hkvs_slab *s, size_t i, uint8_t v) {
+ hkvs_write_u8(hkvs_slab_mark_at(s, i), v);
+}
+
+HKVS_INTERNAL HKVS_CONST uint8_t hkvs_slab_for_size(size_t data_size);
+
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_table_new(hkvs_table **t, hkvs_io *io, char *param);
+
+HKVS_INLINE HKVS_PURE uint32_t hkvs_table__id(hkvs_table *t) {
+ return hkvs_read_u32(t->slab.param + HKVS_PARAM_TABLE_ID);
+}
+
+HKVS_INLINE HKVS_PURE uint8_t hkvs_table__key_size(hkvs_table *t) {
+ return hkvs_read_u8(t->slab.param + HKVS_PARAM_TABLE_KEYSZ);
+}
+
+HKVS_INLINE uint8_t hkvs_table__inl_size(hkvs_table *t) {
+ size_t r = t->slab.elt_size;
+ size_t key_size = hkvs_table__key_size(t);
+ hkvs_assume(key_size <= r);
+ r -= key_size;
+ if(r > 248) r = 248;
+ return (uint8_t)r;
+}
+
+HKVS_INLINE HKVS_PURE uint8_t hkvs_table__ix_bits(hkvs_table *t) {
+ return hkvs_read_u8(t->slab.param + HKVS_PARAM_TABLE_IXBIT);
+}
+
+HKVS_INLINE HKVS_PURE const char *hkvs_table__ix_key(hkvs_table *t) {
+ return t->slab.param + HKVS_PARAM_TABLE_IXKEY;
+}
+
+HKVS_INTERNAL int hkvs_record__value(hkvs *c, hkvs_table *t, size_t id, char *key, size_t key_size, int notfound, char **pvalue, size_t *psize);
+
+HKVS_INLINE void hkvs_table_free(hkvs_table *t, hkvs_io *io) {
+ hkvs_slab_destroy(&t->slab, io);
+ uint64_t ix_fi = hkvs_read_u64(t->slab.param + HKVS_PARAM_TABLE_IXFI);
+ io->close(io, ix_fi, t->ix, t->ix_size);
+ free(t);
+}
+
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_ix_build(hkvs_table *t, hkvs_io *io);
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_ix_add(hkvs *c, hkvs_table *t, size_t id);
+HKVS_INTERNAL void hkvs_ix_remove(hkvs_table *t, size_t id);
diff --git a/lib/internal.h b/lib/internal.h
new file mode 100644
index 0000000..3ad3ac5
--- /dev/null
+++ b/lib/internal.h
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#pragma once
+
+#define HKVS_API extern __attribute__((__visibility__("protected"), __nothrow__))
+#define HKVS_INTERNAL extern __attribute__((__visibility__("hidden"), __nothrow__))
+
+#define HKVS_PURE __attribute__((__pure__))
+
+#define HKVS_CONST __attribute__((__const__))
+
+#define HKVS_NORETURN __attribute__((__noreturn__, __cold__))
+
+#define HKVS_FALLTHROUGH __attribute__((__fallthrough__))
+
+#include <hkvs.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+HKVS_INLINE void hkvs_assume(bool cond) {
+ if(!cond) {
+#ifdef HKVS_FUZZING
+ hkvs_die("assumption failed");
+#else
+ __builtin_unreachable();
+#endif
+ }
+}
+
+HKVS_INLINE uint8_t hkvs_read_u8(const char *pv) {
+ const uint8_t *p = (const uint8_t*)pv;
+ return *p;
+}
+
+HKVS_INLINE void hkvs_write_u8(char *pv, uint8_t v) {
+ uint8_t *p = (uint8_t*)pv;
+ p[0] = v;
+}
+HKVS_INLINE uint16_t hkvs_read_u16(const char *pv) {
+ const uint8_t *p = (const uint8_t*)pv;
+ return (uint16_t)(
+ (uint16_t)p[0] |
+ (uint16_t)p[1] << 8
+ );
+}
+
+HKVS_INLINE void hkvs_write_u16(char *pv, uint16_t v) {
+ uint8_t *p = (uint8_t*)pv;
+ p[0] = (uint8_t)(v >> 0);
+ p[1] = (uint8_t)(v >> 8);
+}
+
+HKVS_INLINE uint32_t hkvs_read_u32(const char *pv) {
+ const uint8_t *p = (const uint8_t*)pv;
+ return (uint32_t)(
+ (uint32_t)p[0] |
+ (uint32_t)p[1] << 8 |
+ (uint32_t)p[2] << 16 |
+ (uint32_t)p[3] << 24
+ );
+}
+
+HKVS_INLINE void hkvs_write_u32(char *pv, uint32_t v) {
+ uint8_t *p = (uint8_t*)pv;
+ p[0] = (uint8_t)(v >> 0);
+ p[1] = (uint8_t)(v >> 8);
+ p[2] = (uint8_t)(v >> 16);
+ p[3] = (uint8_t)(v >> 24);
+}
+
+HKVS_INLINE uint64_t hkvs_read_u64(const char *pv) {
+ const uint8_t *p = (const uint8_t*)pv;
+ return (uint64_t)(
+ (uint64_t)p[0] |
+ (uint64_t)p[1] << 8 |
+ (uint64_t)p[2] << 16 |
+ (uint64_t)p[3] << 24 |
+ (uint64_t)p[4] << 32 |
+ (uint64_t)p[5] << 40 |
+ (uint64_t)p[6] << 48 |
+ (uint64_t)p[7] << 56
+ );
+}
+
+HKVS_INLINE void hkvs_write_u64(char *pv, uint64_t v) {
+ uint8_t *p = (uint8_t*)pv;
+ p[0] = (uint8_t)(v >> 0);
+ p[1] = (uint8_t)(v >> 8);
+ p[2] = (uint8_t)(v >> 16);
+ p[3] = (uint8_t)(v >> 24);
+ p[4] = (uint8_t)(v >> 32);
+ p[5] = (uint8_t)(v >> 40);
+ p[6] = (uint8_t)(v >> 48);
+ p[7] = (uint8_t)(v >> 56);
+}
+
+HKVS_INTERNAL HKVS_NORETURN void hkvs_die(const char *msg);
+
+HKVS_INTERNAL HKVS_NORETURN void hkvs_die_ext(const char *func, const char *file, int line, const char *msg);
+
+#define hkvs_die(msg) hkvs_die_ext(__func__, __FILE__, __LINE__, msg)
+
+HKVS_INTERNAL HKVS_PURE uint64_t hkvs_hash(const char *hk, const char *data, size_t data_size);
diff --git a/lib/io_dirfd.c b/lib/io_dirfd.c
new file mode 100644
index 0000000..252658c
--- /dev/null
+++ b/lib/io_dirfd.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#define _GNU_SOURCE
+#include <hkvs.h>
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/mman.h>
+#include <sys/random.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+typedef struct hkvs_dirfd hkvs_dirfd;
+struct hkvs_dirfd {
+ hkvs_io base;
+ int fd;
+ uint64_t create_fi;
+};
+
+static const char *const base32 = "ABCDEFGHIJKLMNOPQRSTUVWXTZ234567";
+
+HKVS_INLINE bool hkvs_dirfd_check_size(size_t size) {
+ return (size_t)(off64_t)size == size && (off64_t)size >= 0;
+}
+
+static void hkvs_dirfd_name(char *name, uint64_t fi) {
+ name[0] = 'd';
+ name[1] = 'a';
+ name[2] = 't';
+ name[3] = 'a';
+ name[4] = '.';
+ name[18] = 0;
+ {
+ uint64_t val = fi;
+ for(size_t i = 0; i < 13; i++) {
+ name[17 - i] = base32[val % 32];
+ val /= 32;
+ }
+ }
+}
+
+static void hkvs_dirfd_destroy(hkvs_io *iov) {
+ hkvs_dirfd *io = (hkvs_dirfd*)iov;
+ (void) close(io->fd);
+ free(io);
+}
+
+static int hkvs_dirfd_random(hkvs_io *iov, char *buf, size_t n) {
+ (void)iov;
+ while(n) {
+ ssize_t r = getrandom(buf, n, 0);
+ if(r < 0) return -errno;
+ size_t k = (size_t)r;
+ buf += k;
+ n -= k;
+ }
+ return 0;
+}
+
+static int hkvs_dirfd_open(hkvs_io *iov, uint64_t fi, char **pmem, size_t *psize) {
+ hkvs_dirfd *io = (hkvs_dirfd*)iov;
+
+ int r;
+ if(fi == 0) {
+ r = openat(io->fd, "main", O_RDWR | O_CREAT | O_CLOEXEC, 0666);
+ } else {
+ char name[32];
+ hkvs_dirfd_name(name, fi);
+ r = openat(io->fd, name, O_RDWR | O_CLOEXEC, 0666);
+ }
+ if(r < 0) return -errno;
+ int fd = r;
+
+ off64_t fsize = lseek64(fd, 0, SEEK_END);
+ size_t size = (size_t)fsize;
+ if((off64_t)size != fsize) {
+ r = -EFBIG;
+ goto end;
+ }
+
+ void *addr;
+ if(size == 0) {
+ addr = NULL;
+ } else {
+ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if(addr == MAP_FAILED) {
+ r = -errno;
+ goto end;
+ }
+ }
+
+ *psize = size;
+ *pmem = addr;
+ r = 0;
+
+end:
+ close(fd);
+ return r;
+}
+
+static int hkvs_dirfd_create(hkvs_io *iov, uint64_t *pfi, char **pmem, size_t size) {
+ hkvs_dirfd *io = (hkvs_dirfd*)iov;
+ uint64_t fi = io->create_fi;
+ if(fi == 0) abort();
+
+ if(!hkvs_dirfd_check_size(size)) return -EFBIG;
+
+ int fd;
+ char name[32];
+ hkvs_dirfd_name(name, fi);
+ while(true) {
+ int r = openat(io->fd, name, O_RDWR | O_CREAT | O_EXCL | O_CLOEXEC, 0666);
+ if(r >= 0) {
+ fd = r;
+ break;
+ }
+ r = errno;
+ if(r != EEXIST) return -r;
+ fi++;
+ uint64_t val = fi;
+ char *p = name + 17;
+ while(true) {
+ *p = base32[val % 32];
+ if(val % 32 != 0) break;
+ val /= 32;
+ p--;
+ }
+ }
+
+ io->create_fi = fi + 1;
+
+ int r = ftruncate64(fd, (off64_t)size);
+ if(r < 0) {
+ r = -errno;
+ goto fail;
+ return r;
+ }
+
+ void *addr;
+ if(size == 0) {
+ addr = NULL;
+ } else {
+ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if(addr == MAP_FAILED) {
+ r = -errno;
+ goto fail;
+ }
+ }
+
+ *pfi = fi;
+ *pmem = addr;
+ r = 0;
+
+end:
+ close(fd);
+ return r;
+
+fail:
+ (void) unlinkat(io->fd, name, 0);
+ goto end;
+}
+
+static void hkvs_dirfd_close(hkvs_io *iov, uint64_t fi, char *pmem, size_t size) {
+ (void)iov;
+ (void)fi;
+
+ if(size == 0) return;
+
+ int r = munmap(pmem, size);
+ if(r < 0) {
+ r = errno;
+ // TODO
+ (void) r;
+ abort();
+ }
+}
+
+static int hkvs_dirfd_resize(hkvs_io *iov, uint64_t fi, char **pmem, size_t old_size, size_t new_size) {
+ hkvs_dirfd *io = (hkvs_dirfd*)iov;
+
+ if(!hkvs_dirfd_check_size(new_size)) return -EFBIG;
+
+ int r;
+ if(fi == 0) {
+ r = openat(io->fd, "main", O_RDWR | O_CREAT | O_CLOEXEC, 0666);
+ } else {
+ char name[32];
+ hkvs_dirfd_name(name, fi);
+ r = openat(io->fd, name, O_RDWR | O_CLOEXEC, 0666);
+ }
+ if(r < 0) return -errno;
+ int fd = r;
+
+ void *addr = *pmem;
+ if(new_size > old_size) {
+ r = ftruncate64(fd, (off64_t)new_size);
+ if(r < 0) goto fail;
+
+ if(addr) {
+ addr = mremap(addr, old_size, new_size, MREMAP_MAYMOVE);
+ } else {
+ addr = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ }
+ if(addr == MAP_FAILED) {
+ r = -errno;
+ (void) ftruncate64(fd, (off64_t)old_size);
+ goto end;
+ }
+ *pmem = addr;
+ } else if(new_size > old_size) {
+ if(new_size == 0) {
+ munmap(addr, old_size);
+ addr = NULL;
+ } else {
+ addr = mremap(addr, old_size, new_size, MREMAP_MAYMOVE);
+ if(addr == MAP_FAILED) goto fail;
+ }
+
+ r = ftruncate64(fd, (off64_t)old_size);
+ if(r < 0) {
+ // TODO
+ (void) r;
+ }
+
+ *pmem = addr;
+ }
+
+ r = 0;
+
+end:
+ close(fd);
+ return r;
+
+fail:
+ r = -errno;
+ goto end;
+}
+
+static void hkvs_dirfd_unlink(hkvs_io *iov, uint64_t fi) {
+ hkvs_dirfd *io = (hkvs_dirfd*)iov;
+
+ if(fi == 0) abort();
+
+ char name[32];
+ hkvs_dirfd_name(name, fi);
+ int r = unlinkat(io->fd, name, 0);
+ if(r < 0) {
+ r = errno;
+ // TODO
+ (void) r;
+ } else {
+ if(fi < io->create_fi) {
+ io->create_fi = fi;
+ }
+ }
+}
+
+hkvs_io *hkvs_io_new_unsafe_dirfd(int fd) {
+ hkvs_dirfd *io = malloc(sizeof(*io));
+ if(!io) return NULL;
+ io->base = (hkvs_io){
+ .destroy = &hkvs_dirfd_destroy,
+ .random = &hkvs_dirfd_random,
+ .open = &hkvs_dirfd_open,
+ .create = &hkvs_dirfd_create,
+ .resize = &hkvs_dirfd_resize,
+ .close = &hkvs_dirfd_close,
+ .unlink = &hkvs_dirfd_unlink,
+ };
+ io->fd = fd;
+ io->create_fi = 1;
+ return &io->base;
+}
diff --git a/lib/io_mem.c b/lib/io_mem.c
new file mode 100644
index 0000000..4f27c02
--- /dev/null
+++ b/lib/io_mem.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "internal.h"
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+typedef struct hkvs_iomem_file hkvs_iomem_file;
+struct hkvs_iomem_file {
+ uintptr_t data_mode;
+ size_t size;
+};
+
+typedef struct hkvs_iomem hkvs_iomem;
+struct hkvs_iomem {
+ hkvs_io base;
+ uint64_t rand_ctr;
+ char rand_key[16];
+ size_t n_files;
+ struct hkvs_iomem_file *files;
+};
+
+static void hkvs_iomem_destroy(hkvs_io *iov) {
+ hkvs_iomem *io = (hkvs_iomem*)iov;
+ size_t n = io->n_files;
+ for(size_t i = 0; i < n; i++) {
+ struct hkvs_iomem_file *f = io->files + i;
+ uintptr_t mode = f->data_mode & 3;
+ char *p = (char*)(f->data_mode - mode);
+ free(p);
+ }
+ free(io->files);
+ free(io);
+}
+
+static int hkvs_iomem_random(hkvs_io *iov, char *buf, size_t n) {
+ hkvs_iomem *io = (hkvs_iomem*)iov;
+ while(true) {
+ uint64_t h = hkvs_hash(io->rand_key, (const char*)&io->rand_ctr, sizeof(io->rand_ctr));
+ io->rand_ctr++;
+ if(n > 8) {
+ memcpy(buf, &h, 8);
+ buf += 8;
+ n -= 8;
+ } else {
+ memcpy(buf, &h, n);
+ return 0;
+ }
+ }
+}
+
+static int hkvs_iomem_open(hkvs_io *iov, uint64_t fi, char **pmem, size_t *psz) {
+ hkvs_iomem *io = (hkvs_iomem*)iov;
+
+ if(fi >= io->n_files) return -ENOENT;
+
+ hkvs_iomem_file *f = io->files + (size_t)fi;
+ uintptr_t mode = f->data_mode & 3;
+ char *p = (char*)(f->data_mode - mode);
+ if(mode == 0) {
+ if(fi != 0) return -ENOENT;
+ } else if(mode != 1) {
+ hkvs_die("bad file state");
+ }
+
+ f->data_mode = (uintptr_t)p + 2;
+ *pmem = p;
+ *psz = f->size;
+ return 0;
+}
+
+static int hkvs_iomem_create(hkvs_io *iov, uint64_t *pfi, char **pmem, size_t sz) {
+ hkvs_iomem *io = (hkvs_iomem*)iov;
+
+ hkvs_iomem_file *f = io->files;
+ size_t n = io->n_files;
+ for(size_t i = 1; i < n; i++) {
+ if(!f[i].data_mode) {
+ n = i;
+ f += n;
+ goto done;
+ }
+ }
+
+ f = realloc(f, (n + 1) * sizeof(hkvs_iomem_file));
+ if(!f) return -ENOMEM;
+ io->files = f;
+ io->n_files = n + 1;
+ f += n;
+ f->data_mode = 0;
+
+done:;
+ char *p = malloc(sz);
+ if(!p) return -ENOMEM;
+ memset(p, 0, sz);
+
+ f->data_mode = (uintptr_t)p + 2;
+ f->size = sz;
+ *pfi = n;
+ *pmem = p;
+ return 0;
+}
+
+static int hkvs_iomem_resize(hkvs_io *iov, uint64_t fi, char **pmem, size_t old_sz, size_t new_sz) {
+ hkvs_iomem *io = (hkvs_iomem*)iov;
+
+ if(fi > io->n_files) hkvs_die("bad file index");
+
+ hkvs_iomem_file *f = io->files + (size_t)fi;
+ uintptr_t mode = f->data_mode & 3;
+ char *p = (char*)(f->data_mode - mode);
+ if(mode != 2) hkvs_die("bad file state");
+ if(*pmem != p) hkvs_die("bad file mem");
+ if(old_sz != f->size) hkvs_die("bad file old_size");
+
+ p = realloc(p, new_sz);
+ if(new_sz && !p) return -ENOMEM;
+
+ f->data_mode = (uintptr_t)p + 2;
+ f->size = new_sz;
+ *pmem = p;
+ return 0;
+}
+
+static void hkvs_iomem_close(hkvs_io *iov, uint64_t fi, char *mem, size_t sz) {
+ hkvs_iomem *io = (hkvs_iomem*)iov;
+
+ if(fi > io->n_files) hkvs_die("bad file index");
+
+ hkvs_iomem_file *f = io->files + (size_t)fi;
+ uintptr_t mode = f->data_mode & 3;
+ char *p = (char*)(f->data_mode - mode);
+ if(mode != 2) hkvs_die("bad file state");
+ if(mem != p) hkvs_die("bad file mem");
+ if(sz != f->size) hkvs_die("bad file size");
+
+ f->data_mode = (uintptr_t)p + 1;
+}
+
+static void hkvs_iomem_unlink(hkvs_io *iov, uint64_t fi) {
+ hkvs_iomem *io = (hkvs_iomem*)iov;
+
+ if(fi > io->n_files) hkvs_die("bad file index");
+
+ hkvs_iomem_file *f = io->files + (size_t)fi;
+ uintptr_t mode = f->data_mode & 3;
+ char *p = (char*)(f->data_mode - mode);
+ if(mode != 1) hkvs_die("bad file state");
+
+ free(p);
+ f->data_mode = 0;
+ f->size = 0;
+}
+
+hkvs_io *hkvs_io_new_mem(uint64_t rand_seed) {
+ hkvs_iomem_file *files = malloc(sizeof(hkvs_iomem_file));
+ if(!files) return NULL;
+ files[0].data_mode = 0;
+ files[0].size = 0;
+
+ hkvs_iomem *io = malloc(sizeof(*io));
+ if(!io) {
+ free(files);
+ return NULL;
+ }
+ io->base = (hkvs_io){
+ .destroy = &hkvs_iomem_destroy,
+ .random = &hkvs_iomem_random,
+ .open = &hkvs_iomem_open,
+ .create = &hkvs_iomem_create,
+ .close = &hkvs_iomem_close,
+ .resize = &hkvs_iomem_resize,
+ .unlink = &hkvs_iomem_unlink,
+ };
+ hkvs_write_u64(io->rand_key, rand_seed);
+ hkvs_write_u64(io->rand_key + 8, rand_seed);
+ io->rand_ctr = 0;
+ io->n_files = 1;
+ io->files = files;
+ return &io->base;
+}
diff --git a/lib/iter.c b/lib/iter.c
new file mode 100644
index 0000000..ff27a68
--- /dev/null
+++ b/lib/iter.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+void hkvs_iter_open(hkvs *c, hkvs_table *t, hkvs_iter *it) {
+ (void)c;
+
+ it->priv1 = 0;
+ it->priv2 = hkvs_slab_size(&t->slab);
+}
+
+void hkvs_iter_seek(hkvs *c, hkvs_table *t, hkvs_iter *it, hkvs_rid id) {
+ (void)c;
+
+ size_t n = hkvs_slab_size(&t->slab);
+ it->priv1 = id.id >= n ? SIZE_MAX : (size_t)id.id;
+ it->priv2 = n;
+}
+
+hkvs_rid hkvs_iter_next(hkvs *c, hkvs_table *t, hkvs_iter *it, const char **pkey) {
+ (void)c;
+
+ size_t id = it->priv1;
+ size_t n = it->priv2;
+ if(id >= n) return HKVS_INVALID_RID;
+ while(true) {
+ uint8_t mark = hkvs_slab_mark(&t->slab, id);
+ if(mark != 0) {
+ it->priv1 = id + 1;
+ if(pkey) *pkey = hkvs_slab_at(&t->slab, id);
+ return (hkvs_rid){id};
+ }
+ id++;
+ if(id == n) {
+ it->priv1 = SIZE_MAX;
+ return HKVS_INVALID_RID;
+ }
+ }
+}
+
+void hkvs_iter_delete(hkvs *c, hkvs_table *t, hkvs_iter *it) {
+ hkvs_record_delete(c, t, (hkvs_rid){it->priv1 - 1});
+}
diff --git a/lib/ix.c b/lib/ix.c
new file mode 100644
index 0000000..0a6af19
--- /dev/null
+++ b/lib/ix.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+static bool hkvs_ix_add_fast(char *ix, size_t id, const char *key, size_t key_size, uint8_t bits, const char *hk);
+
+static int hkvs_ix_build_try(hkvs_table *t, hkvs_io *io, uint8_t ix_bits) {
+ if(ix_bits > 59) return -EFBIG;
+ uint64_t ix_size_64 = HKVS_IX_SIZE(ix_bits);
+ if(ix_size_64 > SIZE_MAX) return -EFBIG;
+ size_t ix_size = (size_t)ix_size_64;
+
+ char ix_key[HKVS_IX_KEY_SIZE];
+
+ int r = io->random(io, ix_key, HKVS_IX_KEY_SIZE);
+ if(r < 0) return r;
+
+ char *ix;
+ uint64_t fi;
+ r = io->create(io, &fi, &ix, ix_size);
+ if(r < 0) return r;
+
+ size_t key_size = hkvs_read_u8(t->slab.param + HKVS_PARAM_TABLE_KEYSZ);
+ size_t n = hkvs_slab_size(&t->slab);
+ for(size_t i = 0; i < n; i++) {
+ if(hkvs_slab_mark(&t->slab, i) == 0) continue;
+ const char *key = hkvs_slab_at(&t->slab, i);
+ if(!hkvs_ix_add_fast(ix, i, key, key_size, ix_bits, ix_key)) {
+ io->close(io, fi, ix, ix_size);
+ io->unlink(io, fi);
+ return 0;
+ }
+ }
+
+ uint64_t old_fi = hkvs_read_u64(t->slab.param + HKVS_PARAM_TABLE_IXFI);
+ if(old_fi) {
+ if(t->ix) {
+ io->close(io, old_fi, t->ix, t->ix_size);
+ }
+ io->unlink(io, old_fi);
+ }
+
+ t->ix = ix;
+ t->ix_size = ix_size;
+
+ hkvs_write_u8(t->slab.param + HKVS_PARAM_TABLE_IXBIT, ix_bits);
+ hkvs_write_u64(t->slab.param + HKVS_PARAM_TABLE_IXFI, fi);
+ memcpy(t->slab.param + HKVS_PARAM_TABLE_IXKEY, ix_key, HKVS_IX_KEY_SIZE);
+
+ return 1;
+}
+
+int hkvs_ix_build(hkvs_table *t, hkvs_io *io) {
+ size_t count = 0;
+ {
+ size_t n = hkvs_slab_size(&t->slab);
+ for(size_t i = 0; i < n; i++) {
+ if(hkvs_slab_mark(&t->slab, i)) count++;
+ }
+ }
+
+ uint8_t bits = 4;
+ if(count > 0) {
+ size_t load = count - 1;
+ while(load >= 13) {
+ load /= 2;
+ bits++;
+ }
+ }
+
+ size_t attempts = 0;
+ while(true) {
+ int r = hkvs_ix_build_try(t, io, bits);
+ if(r < 0) return r;
+ if(r) return 0;
+
+ attempts++;
+ if(attempts == 8 || attempts == 64) {
+ bits++;
+ }
+ }
+}
+
+int hkvs_ix_add(hkvs *c, hkvs_table *t, size_t id) {
+ const char *key = hkvs_slab_at(&t->slab, id);
+
+ char *ix = t->ix;
+ uint8_t bits = hkvs_table__ix_bits(t);
+ size_t key_size = hkvs_table__key_size(t);
+ const char *hk = hkvs_table__ix_key(t);
+
+ if(hkvs_ix_add_fast(ix, id, key, key_size, bits, hk)) return 0;
+
+ return hkvs_ix_build(t, c->io);
+}
+
+hkvs_rid hkvs_record_find(hkvs *c, hkvs_table *t, const char *key) {
+ (void)c;
+
+ const char *hk = hkvs_table__ix_key(t);
+ size_t key_size = hkvs_table__key_size(t);
+ uint64_t hv = hkvs_hash(hk, key, key_size);
+
+ uint8_t bits = hkvs_table__ix_bits(t);
+ size_t entries = hkvs_slab_size(&t->slab);
+ char *ix = t->ix;
+
+ size_t size = (size_t)1 << bits;
+ size_t mask = size - 1;
+
+ for(size_t disp = 0; disp < HKVS_IX_MAX_DISP; disp++) {
+ size_t at = ((size_t)hv + disp) & mask;
+ char *ent = ix + at * HKVS_IX_ENTRY;
+ uint64_t v = hkvs_read_u64(ent);
+ if(v == 0) return HKVS_INVALID_RID;
+ if(v > entries) goto fail;
+ uint64_t h = hkvs_read_u64(ent + 8);
+ if(h != hv) {
+ if(((at - h) & mask) < disp) break;
+ continue;
+ }
+
+ size_t id = (size_t)(v - 1);
+ char *found = hkvs_slab_at(&t->slab, id);
+ if(!memcmp(key, found, key_size)) {
+ return (hkvs_rid){id};
+ }
+ }
+
+fail:
+ return HKVS_INVALID_RID;
+}
+
+void hkvs_ix_remove(hkvs_table *t, size_t id) {
+ const char *key = hkvs_slab_at(&t->slab, id);
+
+ uint8_t bits = hkvs_table__ix_bits(t);
+ const char *hk = hkvs_table__ix_key(t);
+ size_t key_size = hkvs_table__key_size(t);
+ char *ix = t->ix;
+
+ uint64_t hv = hkvs_hash(hk, key, key_size);
+ size_t size = (size_t)1 << bits;
+ size_t mask = size - 1;
+
+ uint64_t vv = (uint64_t)id + 1;
+
+ char *ent;
+ for(size_t disp = 0; disp < HKVS_IX_MAX_DISP; disp++) {
+ size_t at = ((size_t)hv + disp) & mask;
+ ent = ix + at * HKVS_IX_ENTRY;
+ uint64_t v = hkvs_read_u64(ent);
+ if(v == vv) {
+ if(hkvs_read_u64(ent + 8) != hv) goto corrupted;
+ while(true) {
+ size_t next = (at + 1) & mask;
+ char *e2 = ix + next * HKVS_IX_ENTRY;
+ v = hkvs_read_u64(e2);
+ if(v == 0) break;
+ uint64_t h = hkvs_read_u64(e2 + 8);
+ if((h & mask) == next) break;
+ hkvs_write_u64(ent, v);
+ hkvs_write_u64(ent + 8, h);
+ at = next;
+ ent = e2;
+ }
+ hkvs_write_u64(ent, 0);
+ hkvs_write_u64(ent + 8, 0);
+ return;
+ }
+ if(v == 0) goto corrupted;
+ }
+
+corrupted:
+ hkvs_die("database corrupted");
+}
+
+bool hkvs_ix_add_fast(char *ix, size_t id, const char *key, size_t key_size, uint8_t bits, const char *hk) {
+ uint64_t h_new = hkvs_hash(hk, key, key_size);
+ size_t size = (size_t)1 << bits;
+ size_t mask = size - 1;
+
+ uint64_t v_new = (uint64_t)id + 1;
+
+ for(size_t disp = 0; disp < HKVS_IX_MAX_DISP; disp++) {
+ size_t at = ((size_t)h_new + disp) & mask;
+ char *ent = ix + at * HKVS_IX_ENTRY;
+ uint64_t v_disp = hkvs_read_u64(ent);
+ if(v_disp == 0) {
+ hkvs_write_u64(ent, v_new);
+ hkvs_write_u64(ent + 8, h_new);
+ return true;
+ }
+ uint64_t h_disp = hkvs_read_u64(ent + 8);
+ if(((at - h_disp) & mask) < disp) {
+ hkvs_write_u64(ent, v_new);
+ hkvs_write_u64(ent + 8, h_new);
+ while(true) {
+ at = (at + 1) & mask;
+ ent = ix + at * HKVS_IX_ENTRY;
+ uint64_t v2 = hkvs_read_u64(ent);
+ uint64_t h2 = hkvs_read_u64(ent + 8);
+ hkvs_write_u64(ent, v_disp);
+ hkvs_write_u64(ent + 8, h_disp);
+ if(v2 == 0) return true;
+ v_disp = v2;
+ h_disp = h2;
+ if(((at - h_disp) & mask) >= HKVS_IX_MAX_DISP - 1) return false;
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
diff --git a/lib/record.c b/lib/record.c
new file mode 100644
index 0000000..c9c36d6
--- /dev/null
+++ b/lib/record.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+const char *hkvs_record_key(hkvs *c, hkvs_table *t, hkvs_rid rid) {
+ (void)c;
+
+ if(rid.id > hkvs_slab_size(&t->slab)) return NULL;
+ size_t id = (size_t)rid.id;
+ if(hkvs_slab_mark(&t->slab, id) == 0) return NULL;
+ return hkvs_slab_at(&t->slab, id);
+}
+
+int hkvs_record_value(hkvs *c, hkvs_table *t, hkvs_rid rid, char **pvalue, size_t *psize) {
+ if(rid.id > hkvs_slab_size(&t->slab)) return 0;
+ size_t id = (size_t)rid.id;
+ size_t key_size = hkvs_table__key_size(t);
+ char *key = hkvs_slab_at(&t->slab, id);
+ return hkvs_record__value(c, t, id, key, key_size, 0, pvalue, psize);
+}
+
+int hkvs_record__value(hkvs *c, hkvs_table *t, size_t id, char *key, size_t key_size, int notfound, char **pvalue, size_t *psize) {
+ uint8_t mark = hkvs_slab_mark(&t->slab, id);
+ if(mark == 0) return notfound;
+
+ char *p = key + key_size;
+ size_t size;
+
+ if(mark <= HKVS_MARK_MAX_SMALL) {
+ size = mark - 1;
+ if(!pvalue) goto ret_novalue;
+ goto ret_value;
+ }
+
+ uint64_t size_64 = hkvs_read_u64(p);
+ if(size_64 > SIZE_MAX) return -EFBIG;
+ size = (size_t)size_64;
+
+ if(!pvalue) goto ret_novalue;
+
+ uint64_t val = hkvs_read_u64(p + 8);
+ switch(mark) {
+ case HKVS_MARK_EXT:
+ {
+ size_t slot;
+ int r = hkvs_extfile_open(c, val, &slot, size);
+ if(r < 0) return r;
+
+ hkvs_extfile *f = &c->extfiles[slot];
+ hkvs_extfile_pin(c, f);
+ p = f->addr;
+ break;
+ }
+
+ case HKVS_MARK_SLAB:
+ {
+ uint8_t slab = val & 255;
+ val >>= 8;
+
+ if(slab >= HKVS_N_SLABS) return -EBADMSG;
+ hkvs_slab *s = &c->slabs[slab];
+ if(size > s->elt_size) return -EBADMSG;
+
+ if(val > hkvs_slab_size(s)) return -EBADMSG;
+ size_t sid = (size_t)val;
+
+ p = hkvs_slab_at(s, sid);
+ break;
+ }
+
+ default:
+ return -EBADMSG;
+ }
+
+ret_value:
+ *pvalue = p;
+ret_novalue:
+ if(psize) *psize = size;
+ return 1;
+}
+
+int hkvs_record_resize(hkvs *c, hkvs_table *t, hkvs_rid rid, char **pvalue, size_t new_size, size_t keep_size) {
+ hkvs_put_records(c);
+
+ if(rid.id > hkvs_slab_size(&t->slab)) goto wtf;
+ size_t id = (size_t)rid.id;
+ size_t key_size = hkvs_table__key_size(t);
+ uint8_t inl_size = hkvs_table__inl_size(t);
+ uint8_t mark = hkvs_slab_mark(&t->slab, id);
+ if(mark == 0) goto wtf;
+ char *key = hkvs_slab_at(&t->slab, id);
+ char *p = key + key_size;
+
+ uint8_t new_slab = hkvs_slab_for_size(new_size);
+
+ uint64_t old_fi = 0;
+ size_t old_fi_slot;
+
+ hkvs_slab *old_s = NULL;
+ size_t old_sid;
+
+ char *old_value;
+ size_t old_size;
+
+ if(mark < HKVS_MARK_MAX_SMALL) {
+ if(new_size <= inl_size) {
+ hkvs_slab_set_mark(&t->slab, id, (uint8_t)(new_size + 1));
+ if(pvalue) *pvalue = p;
+ return 0;
+ }
+ old_value = p;
+ old_size = mark - 1;
+ } else {
+ uint64_t val = hkvs_read_u64(p);
+ if(val > SIZE_MAX) return -EFBIG;
+ old_size = (size_t)val;
+ val = hkvs_read_u64(p + 8);
+
+ switch(mark) {
+ case HKVS_MARK_EXT:
+ {
+ old_fi = val;
+
+ int r = hkvs_extfile_open(c, old_fi, &old_fi_slot, old_size);
+ if(r < 0) return r;
+ hkvs_extfile *f = &c->extfiles[old_fi_slot];
+ old_value = f->addr;
+
+ if(new_slab >= HKVS_N_SLABS && keep_size <= HKVS_REWRITE_THRESHOLD) {
+ r = c->io->resize(c->io, old_fi, &f->addr, f->size, new_size);
+ if(r < 0) return r;
+ f->size = new_size;
+ hkvs_write_u64(p, new_size);
+ if(pvalue) {
+ hkvs_extfile_pin(c, f);
+ *pvalue = f->addr;
+ }
+ return 0;
+ }
+
+ break;
+ }
+
+ case HKVS_MARK_SLAB:
+ {
+ uint8_t slab = val & 255;
+ val >>= 8;
+
+ if(slab >= HKVS_N_SLABS) return -EBADMSG;
+ old_s = &c->slabs[slab];
+ if(old_size > old_s->elt_size) return -EBADMSG;
+
+ if(val > hkvs_slab_size(old_s)) return -EBADMSG;
+ old_sid = (size_t)val;
+
+ old_value = hkvs_slab_at(old_s, old_sid);
+
+ if(slab == new_slab) {
+ hkvs_write_u64(p, new_size);
+ if(pvalue) *pvalue = old_value;
+ return 0;
+ }
+ break;
+ }
+
+ default:
+ return -EBADMSG;
+ }
+ }
+
+ if(old_size < keep_size) keep_size = old_size;
+ if(new_size < keep_size) keep_size = new_size;
+
+ char *new_value;
+
+ if(new_size <= inl_size) {
+ hkvs_slab_set_mark(&t->slab, id, (uint8_t)(new_size + 1));
+ new_value = p;
+ } else {
+ new_slab = hkvs_slab_for_size(new_size);
+ hkvs_write_u64(p, new_size);
+ if(new_slab < HKVS_N_SLABS) {
+ hkvs_slab *s = &c->slabs[new_slab];
+ size_t sid;
+ int r = hkvs_slab_alloc(c, s, &sid);
+ if(r < 0) return r;
+ hkvs_slab_set_mark(s, sid, 1);
+ new_value = hkvs_slab_at(s, sid);
+ hkvs_write_u64(p + 8, ((uint64_t)sid << 8) | new_slab);
+ hkvs_slab_set_mark(&t->slab, id, HKVS_MARK_SLAB);
+ } else {
+ char *addr;
+ uint64_t new_fi;
+ int r = hkvs_extfile_create(c, &new_fi, &addr, new_size, !!pvalue);
+ if(r < 0) return r;
+ new_value = addr;
+ hkvs_write_u64(p + 8, new_fi);
+ hkvs_slab_set_mark(&t->slab, id, HKVS_MARK_EXT);
+ }
+ }
+
+ memcpy(new_value, old_value, keep_size);
+ if(pvalue) *pvalue = new_value;
+
+ if(old_fi) {
+ hkvs_extfile_close(c, old_fi_slot);
+ c->io->unlink(c->io, old_fi);
+ }
+
+ if(old_s) {
+ hkvs_slab_discard(old_s, old_sid);
+ }
+
+ return 0;
+
+wtf:
+ hkvs_die("record does not exist");
+}
+
+void hkvs_record_delete(hkvs *c, hkvs_table *t, hkvs_rid rid) {
+ if(rid.id > hkvs_slab_size(&t->slab)) goto wtf;
+ size_t id = (size_t)rid.id;
+ size_t key_size = hkvs_table__key_size(t);
+ uint8_t mark = hkvs_slab_mark(&t->slab, id);
+ if(mark == 0) goto wtf;
+ char *key = hkvs_slab_at(&t->slab, id);
+ char *p = key + key_size;
+
+ uint64_t unlink_fi = 0;
+ hkvs_slab *s = NULL;
+ size_t sid;
+
+ switch(mark) {
+ case HKVS_MARK_SLAB:
+ {
+ uint64_t val = hkvs_read_u64(p + 8);
+ uint8_t slab = val & 255;
+ val >>= 8;
+
+ if(slab >= HKVS_N_SLABS) goto corrupted;
+ s = &c->slabs[slab];
+
+ if(val >= hkvs_slab_size(s)) goto corrupted;
+ sid = (size_t)val;
+ break;
+ }
+
+ case HKVS_MARK_EXT:
+ {
+ uint64_t fi = hkvs_read_u64(p + 8);
+ if(fi == 0) goto corrupted;
+
+ size_t slot;
+ if(hkvs_extfile_find(c, fi, &slot)) {
+ hkvs_extfile_close(c, slot);
+ }
+
+ unlink_fi = fi;
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ hkvs_ix_remove(t, id);
+ hkvs_slab_discard(&t->slab, id);
+
+ if(unlink_fi) {
+ c->io->unlink(c->io, unlink_fi);
+ }
+
+ if(s) {
+ hkvs_slab_discard(s, sid);
+ }
+
+ return;
+
+corrupted:
+ hkvs_die("database corrupted");
+
+wtf:
+ hkvs_die("record does not exist");
+}
+
+int hkvs_record_append(hkvs *c, hkvs_table *t, hkvs_rid *prid, const char *key, char **pvalue, size_t size) {
+ size_t key_size = hkvs_table__key_size(t);
+ uint8_t inl_size = hkvs_table__inl_size(t);
+
+ size_t id;
+ int r = hkvs_slab_alloc(c, &t->slab, &id);
+ if(r < 0) return r;
+
+ char *r_key = hkvs_slab_at(&t->slab, id);
+ memcpy(r_key, key, key_size);
+
+ char *p = r_key + key_size;
+ char *mark = hkvs_slab_mark_at(&t->slab, id);
+
+ hkvs_slab *s = NULL;
+ size_t sid;
+ char *value;
+
+ uint64_t ext_fi = 0;
+
+ if(size <= inl_size) {
+ hkvs_write_u8(mark, (uint8_t)(size + 1));
+ value = p;
+ } else {
+ uint8_t slab = hkvs_slab_for_size(size);
+ hkvs_write_u64(p, size);
+ if(slab < HKVS_N_SLABS) {
+ s = &c->slabs[slab];
+ r = hkvs_slab_alloc(c, s, &sid);
+ if(r < 0) return r;
+ value = hkvs_slab_at(s, sid);
+ hkvs_slab_set_mark(s, sid, 1);
+
+ hkvs_write_u64(p + 8, (uint64_t)sid << 8 | slab);
+ hkvs_write_u8(mark, HKVS_MARK_SLAB);
+ } else {
+ char *addr;
+ r = hkvs_extfile_create(c, &ext_fi, &addr, size, !!pvalue);
+ value = addr;
+
+ hkvs_write_u64(p + 8, ext_fi);
+ hkvs_write_u8(mark, HKVS_MARK_EXT);
+ }
+ }
+
+ r = hkvs_ix_add(c, t, id);
+ if(r >= 0) {
+ if(prid) *prid = (hkvs_rid){id};
+ if(pvalue) *pvalue = value;
+ return 1;
+ }
+
+ hkvs_write_u8(mark, 0);
+
+ if(s) {
+ hkvs_slab_discard(s, sid);
+ }
+
+ if(ext_fi) {
+ size_t slot = --c->n_extfiles;
+ hkvs_io *io = c->io;
+ io->close(io, ext_fi, c->extfiles[slot].addr, c->extfiles[slot].size);
+ io->unlink(io, ext_fi);
+ }
+
+ return r;
+}
diff --git a/lib/slab.c b/lib/slab.c
new file mode 100644
index 0000000..2d2d8e1
--- /dev/null
+++ b/lib/slab.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+#include <string.h>
+#include <stdlib.h>
+
+static size_t hkvs_slab_size_to_count(size_t elt_size, size_t size) {
+ size_t block_size = (elt_size + 1) * HKVS_SLAB_BLOCK;
+
+ size_t blocks = size / block_size;
+ size %= block_size;
+ size_t rest = 0;
+ if(size > HKVS_SLAB_BLOCK) {
+ rest = (size - HKVS_SLAB_BLOCK) / elt_size;
+ }
+
+ return blocks * HKVS_SLAB_BLOCK + rest;
+}
+
+static size_t hkvs_slab_count_to_size(size_t elt_size, size_t count) {
+ size_t size;
+ if(__builtin_mul_overflow(count, elt_size + 1, &size)) return SIZE_MAX;
+ if(__builtin_add_overflow(size, (-count) % HKVS_SLAB_BLOCK, &size)) return SIZE_MAX;
+ return size;
+}
+
+int hkvs_slab_init(hkvs_slab *s, hkvs_io *io, char *param, size_t elt_size) {
+ s->param = param;
+ s->elt_size = elt_size;
+
+ uint64_t fi = hkvs_read_u64(param + HKVS_PARAM_SLAB_FI);
+ if(fi == 0) {
+ s->addr = NULL;
+ s->map_size = 0;
+ return 0;
+ }
+
+ int r = io->open(io, fi, &s->addr, &s->map_size);
+ if(r < 0) return r;
+
+ uint64_t alloc_count = hkvs_read_u64(param + HKVS_PARAM_SLAB_ALLOC);
+ if(alloc_count > hkvs_slab_size_to_count(elt_size, s->map_size)) {
+ io->close(io, fi, s->addr, s->map_size);
+ return -EBADMSG;
+ }
+
+ return 0;
+}
+
+int hkvs_slab_alloc(hkvs *c, hkvs_slab *s, size_t *pi) {
+ size_t alloc_count = hkvs_slab_size(s);
+
+ uint64_t at_64 = hkvs_read_u64(s->param + HKVS_PARAM_SLAB_FREE);
+ if(at_64 < alloc_count) {
+ size_t at = (size_t)at_64;
+
+ char *p = hkvs_slab_at(s, at);
+
+ for(size_t i = 1; i < HKVS_SLAB_FREEPTRS; i++) {
+ uint64_t v = hkvs_read_u64(p + i * 8);
+ if(v <= at) return -EBADMSG;
+ if(v < alloc_count) {
+ size_t id = (size_t)v;
+ hkvs_write_u64(p + i * 8, UINT64_MAX);
+ *pi = id;
+ return 0;
+ }
+ }
+
+ at_64 = hkvs_read_u64(p);
+ hkvs_write_u64(s->param + HKVS_PARAM_SLAB_FREE, at_64);
+
+ *pi = at;
+ return 0;
+ }
+
+ if(at_64 != UINT64_MAX) {
+ hkvs_write_u64(s->param + HKVS_PARAM_SLAB_FREE, UINT64_MAX);
+ }
+
+ size_t cap_count = hkvs_slab_size_to_count(s->elt_size, s->map_size);
+ hkvs_assume(alloc_count <= cap_count);
+ if(alloc_count == cap_count) {
+ size_t grow_count = cap_count + (cap_count >> 3) + 1;
+ size_t grow_size;
+ while(true) {
+ grow_size = hkvs_slab_count_to_size(s->elt_size, grow_count);
+ if(grow_size != 0) break;
+ size_t dec = (grow_count - cap_count) / 2;
+ if(dec == 0) return -EFBIG;
+ grow_count -= dec;
+ }
+
+ hkvs_io *io = c->io;
+ uint64_t fi = hkvs_read_u64(s->param + HKVS_PARAM_SLAB_FI);
+ if(fi == 0) {
+ int r = io->create(io, &fi, &s->addr, grow_size);
+ if(r < 0) return r;
+ hkvs_write_u64(s->param + HKVS_PARAM_SLAB_FI, fi);
+ } else {
+ int r = io->resize(io, fi, &s->addr, s->map_size, grow_size);
+ if(r < 0) return r;
+ }
+ s->map_size = grow_size;
+ }
+
+ hkvs_write_u64(s->param + HKVS_PARAM_SLAB_ALLOC, alloc_count + 1);
+ *pi = alloc_count;
+ return 0;
+}
+
+void hkvs_slab_discard(hkvs_slab *s, size_t id) {
+ size_t alloc_count = hkvs_slab_size(s);
+ hkvs_assume(id < alloc_count);
+ if(id == alloc_count - 1) {
+ do {
+ alloc_count--;
+ } while(alloc_count > 0 && hkvs_slab_mark(s, alloc_count - 1) == 0);
+
+ hkvs_write_u64(s->param + HKVS_PARAM_SLAB_ALLOC, alloc_count);
+ return;
+ }
+
+ hkvs_slab_set_mark(s, id, 0);
+
+ uint64_t free_64 = hkvs_read_u64(s->param + HKVS_PARAM_SLAB_FREE);
+ if(id < free_64) {
+ hkvs_write_u64(s->param + HKVS_PARAM_SLAB_FREE, id);
+ char *p = hkvs_slab_at(s, id);
+ hkvs_write_u64(p, free_64);
+ for(size_t i = 1; i < HKVS_SLAB_FREEPTRS; i++) {
+ hkvs_write_u64(p + i * 8, UINT64_MAX);
+ }
+ return;
+ }
+
+ char *p = hkvs_slab_at(s, (size_t)free_64);
+ uint64_t v = hkvs_read_u64(p + 8);
+
+ if(v >= alloc_count) {
+ size_t i = 2;
+ while(true) {
+ v = hkvs_read_u64(p + i * 8);
+ if(v < alloc_count) break;
+ i++;
+ if(i == HKVS_SLAB_FREEPTRS) {
+ hkvs_write_u64(p + 8, id);
+ return;
+ }
+ }
+ while(v < id) {
+ hkvs_write_u64(p + (i - 1) * 8, v);
+ i++;
+ if(i == HKVS_SLAB_FREEPTRS) break;
+ v = hkvs_read_u64(p + i * 8);
+ }
+ hkvs_write_u64(p + (i - 1) * 8, id);
+ return;
+ }
+
+ if(hkvs_read_u64(p + (HKVS_SLAB_FREEPTRS - 1) * 8) >= alloc_count) {
+ size_t i = 1;
+ while(v < id) {
+ i++;
+ v = hkvs_read_u64(p + i * 8);
+ }
+ uint64_t v2 = id;
+ while(true) {
+ hkvs_write_u64(p + i * 8, v2);
+ if(v >= alloc_count) break;
+ v2 = v;
+ i++;
+ v = hkvs_read_u64(p + i * 8);
+ }
+ return;
+ }
+
+ if(v < id) {
+ uint64_t v2 = id;
+ id = (size_t)v;
+ size_t i = 2;
+ do {
+ v = hkvs_read_u64(p + i * 8);
+ if(v >= v2) break;
+ hkvs_write_u64(p + (i - 1) * 8, v);
+ i++;
+ } while(i < HKVS_SLAB_FREEPTRS);
+ hkvs_write_u64(p + (i - 1) * 8, v2);
+ }
+
+ char *p2 = hkvs_slab_at(s, id);
+ memcpy(p2, p, HKVS_SLAB_FREEPTRS * 8);
+
+ hkvs_write_u64(p, id);
+ for(size_t i = 1; i < HKVS_SLAB_FREEPTRS; i++) {
+ hkvs_write_u64(p + i * 8, UINT64_MAX);
+ }
+ return;
+}
diff --git a/lib/table.c b/lib/table.c
new file mode 100644
index 0000000..331a865
--- /dev/null
+++ b/lib/table.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+int hkvs_table_new(hkvs_table **pt, hkvs_io *io, char *param) {
+ size_t elt_size = hkvs_read_u16(param + HKVS_PARAM_TABLE_ELTSZ);
+ if(!elt_size) {
+ *pt = NULL;
+ return 0;
+ }
+
+ size_t key_size = hkvs_read_u8(param + HKVS_PARAM_TABLE_KEYSZ);
+ if(key_size > elt_size) return -EBADMSG;
+
+ hkvs_table *t = malloc(sizeof(hkvs_table));
+ if(!t) return -ENOMEM;
+
+ int r = hkvs_slab_init(&t->slab, io, param, elt_size);
+ if(r < 0) goto fail_free;
+
+ uint64_t ix_fi = hkvs_read_u64(param + HKVS_PARAM_TABLE_IXFI);
+ if(ix_fi != 0) {
+ uint8_t ix_bits = hkvs_read_u8(param + HKVS_PARAM_TABLE_IXBIT);
+ if(ix_bits < 4 || ix_bits > 59) goto ix_unlink;
+
+ r = io->open(io, ix_fi, &t->ix, &t->ix_size);
+ if(r < 0) {
+ if(r == -ENOENT) goto ix_dropped;
+ return r;
+ }
+ if(t->ix_size < HKVS_IX_SIZE(ix_bits)) {
+ goto ix_build;
+ }
+ }
+
+ret:
+ *pt = t;
+ return 1;
+
+ix_unlink:
+ io->unlink(io, ix_fi);
+
+ix_dropped:
+ hkvs_write_u64(param + HKVS_PARAM_TABLE_IXFI, 0);
+
+ix_build:
+ r = hkvs_ix_build(t, io);
+ if(r < 0) goto fail_close;
+
+ goto ret;
+
+fail_close:
+ hkvs_slab_destroy(&t->slab, io);
+
+fail_free:
+ free(t);
+ return r;
+}
+
+hkvs_table *hkvs_table_at(hkvs *c, size_t i) {
+ size_t n_tables = c->n_tables;
+ if(i >= n_tables) return NULL;
+
+ hkvs_table **tables = c->tables;
+ for(size_t j = 0; j < n_tables; j++) {
+ hkvs_table *t = tables[j];
+ if(!t) continue;
+ if(!i--) return t;
+ }
+ return NULL;
+}
+
+uint32_t hkvs_table_id(hkvs *c, hkvs_table *t) {
+ (void)c;
+ return hkvs_table__id(t);
+}
+
+size_t hkvs_table_key_size(hkvs *c, hkvs_table *t) {
+ (void)c;
+ return hkvs_table__key_size(t);
+}
+
+int hkvs_table_delete(hkvs *c, uint32_t id) {
+ hkvs_table **tables = c->tables;
+ size_t n_tables = c->n_tables;
+ for(size_t i = 0; i < n_tables; i++) {
+ hkvs_table *t = tables[i];
+ if(!t) continue;
+ if(hkvs_table_id(c, t) == id) {
+ tables[i] = NULL;
+
+ char *param = t->slab.param;
+ hkvs_io *io = c->io;
+
+ hkvs_table_free(t, io);
+
+ uint64_t fi = hkvs_read_u64(param + HKVS_PARAM_SLAB_FI);
+ uint64_t ix_fi = hkvs_read_u64(param + HKVS_PARAM_TABLE_IXFI);
+ memset(param, 0, HKVS_PSIZE_TABLE);
+
+ assert(fi && ix_fi);
+
+ io->unlink(io, ix_fi);
+ io->unlink(io, fi);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+hkvs_table *hkvs_table_open(hkvs *c, uint32_t id, size_t key_size) {
+ hkvs_table **tables = c->tables;
+ size_t n_tables = c->n_tables;
+ for(size_t i = 0; i < n_tables; i++) {
+ hkvs_table *t = tables[i];
+ if(!t) continue;
+ if(hkvs_table__id(t) == id) {
+ if(hkvs_table__key_size(t) != key_size) return NULL;
+ return t;
+ }
+ }
+ return NULL;
+}
+
+HKVS_INLINE size_t hkvs_elt_size(size_t key_size, size_t val_size_hint) {
+ size_t elt_size;
+
+ if(val_size_hint >= 16 && val_size_hint <= 248) {
+ elt_size = key_size + val_size_hint;
+ elt_size += (-elt_size) % 64;
+ if(elt_size - key_size <= 248) return elt_size;
+ }
+
+ elt_size = key_size + 16;
+ elt_size += (-elt_size) % 64;
+ return elt_size;
+}
+
+
+int hkvs_table_create(hkvs *c, hkvs_table **pt, uint32_t id, size_t key_size_z, size_t val_size_hint) {
+ hkvs_table **tables = c->tables;
+ size_t n_tables = c->n_tables;
+ for(size_t i = 0; i < n_tables; i++) {
+ hkvs_table *t = tables[i];
+ if(!t) continue;
+ if(hkvs_table__id(t) == id) {
+ if(hkvs_table__key_size(t) != key_size_z) return -EEXIST;
+ *pt = t;
+ return 0;
+ }
+ }
+
+ if(key_size_z > 255) return -EINVAL;
+ uint8_t key_size = (uint8_t)key_size_z;
+ size_t elt_size = hkvs_elt_size(key_size, val_size_hint);
+
+ size_t slot;
+ for(slot = 0; slot < n_tables; slot++) {
+ if(!tables[slot]) break;
+ }
+ if(slot == n_tables) {
+ if(c->alloc_tables == n_tables) {
+ size_t new_alloc = c->alloc_tables;
+ new_alloc += (new_alloc >> 2) + 1;
+ tables = realloc(tables, new_alloc * sizeof(hkvs_table*));
+ if(!tables) return -ENOMEM;
+ c->tables = tables;
+ c->alloc_tables = new_alloc;
+ }
+
+ c->tables[n_tables++] = NULL;
+ c->n_tables = n_tables;
+ int r = hkvs_resize_param(c);
+ if(r < 0) {
+ c->n_tables = --n_tables;
+ return r;
+ }
+ }
+
+ char *param = c->root_param + HKVS_POFF_TABLE + slot * HKVS_PSIZE_TABLE;
+ memset(param, 0, HKVS_PSIZE_TABLE);
+
+ hkvs_table *t = malloc(sizeof(hkvs_table));
+ if(!t) return -ENOMEM;
+
+ hkvs_write_u16(param + HKVS_PARAM_TABLE_ELTSZ, (uint16_t)elt_size);
+ hkvs_write_u8(param + HKVS_PARAM_TABLE_KEYSZ, key_size);
+ hkvs_write_u32(param + HKVS_PARAM_TABLE_ID, id);
+
+ hkvs_io *io = c->io;
+
+ int r = hkvs_slab_init(&t->slab, io, param, elt_size);
+ if(r < 0) goto fail_free;
+
+ r = hkvs_ix_build(t, io);
+ if(r < 0) goto fail_close;
+
+ c->tables[slot] = t;
+
+ *pt = t;
+ return 1;
+
+fail_close:
+ hkvs_slab_destroy(&t->slab, io);
+
+ uint64_t fi = hkvs_read_u64(param + HKVS_PARAM_SLAB_FI);
+ io->unlink(io, fi);
+
+fail_free:
+ free(t);
+ return r;
+}
diff --git a/lib/util.c b/lib/util.c
new file mode 100644
index 0000000..560671c
--- /dev/null
+++ b/lib/util.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+int hkvs_record_set(hkvs *c, hkvs_table *t, hkvs_rid rid, const char *value, size_t size) {
+ char *ptr;
+ int r = hkvs_record_resize(c, t, rid, &ptr, size, 0);
+ if(r < 0) return r;
+ memcpy(ptr, value, size);
+ hkvs_put_records(c);
+ return 0;
+}
+
+int hkvs_record_update(hkvs *c, hkvs_table *t, hkvs_rid *prid, const char *key, const char *value, size_t size) {
+ hkvs_rid rid = hkvs_record_find(c, t, key);
+ if(hkvs_rid_ok(rid)) {
+ int r = hkvs_record_set(c, t, rid, value, size);
+ if(r < 0) return 0;
+ *prid = rid;
+ return 0;
+ } else {
+ char *ptr;
+ int r = hkvs_record_append(c, t, &rid, key, &ptr, size);
+ if(r < 0) return r;
+ memcpy(ptr, value, size);
+ hkvs_put_records(c);
+ *prid = rid;
+ return 1;
+ }
+}
diff --git a/meson.build b/meson.build
new file mode 100644
index 0000000..8c64ffe
--- /dev/null
+++ b/meson.build
@@ -0,0 +1,138 @@
+project(
+ 'hkvs', 'c',
+ default_options: [
+ 'warning_level=3',
+ 'c_std=gnu18',
+ ],
+)
+
+cc = meson.get_compiler('c')
+
+fuzz = get_option('fuzz')
+
+if fuzz == 'none'
+ fuzz_deps = []
+elif fuzz == 'libFuzzer'
+ fuzz_deps = [declare_dependency(
+ link_args: ['-fsanitize=fuzzer'],
+ )]
+ add_project_arguments(['-fsanitize=fuzzer-no-link', '-DFUZZING'], language: 'c')
+ add_project_link_arguments(['-fsanitize=fuzzer-no-link'], language: 'c')
+endif
+
+warning_flags = [
+ '-Werror=conversion',
+ '-Werror=sign-conversion',
+ '-Werror=float-conversion',
+ '-Werror=old-style-definition',
+ '-Werror=strict-prototypes',
+ '-Werror=missing-prototypes',
+ '-Werror=implicit-function-declaration',
+ '-Werror=missing-declarations',
+ '-Werror=return-type',
+ '-Werror=pointer-arith',
+ '-Werror=vla',
+ '-Werror=incompatible-pointer-types',
+ '-Werror=int-conversion',
+ '-Werror=int-conversion',
+ '-Werror=format=2',
+ '-Werror=overflow',
+ '-Werror=shift-count-overflow',
+ '-Werror=init-self',
+ '-Werror=shadow',
+ '-Werror=redundant-decls',
+ '-Werror=missing-field-initializers',
+ '-Werror=write-strings',
+ '-Werror=date-time',
+ '-Werror=nested-externs',
+ '-Werror=endif-labels',
+ '-Werror=undef',
+ '-Wmissing-include-dirs',
+ '-Wfloat-equal',
+ '-Winline',
+ '-Wmissing-noreturn',
+]
+
+if cc.get_id() == 'gcc'
+ warning_flags += [
+ '-Werror=implicit-fallthrough=5',
+ '-Werror=strict-aliasing=3',
+ '-Werror=old-style-declaration',
+ '-Werror=shift-overflow=2',
+ '-Wlogical-op',
+ '-Wsuggest-attribute=noreturn',
+ '-Wsuggest-attribute=const',
+ '-Wsuggest-attribute=pure',
+ '-Wsuggest-attribute=malloc',
+ '-Wsuggest-attribute=format',
+ '-Wmissing-format-attribute',
+ '-Wsuggest-attribute=cold',
+ ]
+elif cc.get_id() == 'clang'
+ warning_flags += [
+ '-Werror=missing-variable-declarations',
+ '-Werror=implicit-fallthrough',
+ '-Werror=shift-overflow',
+ ]
+else
+ warning('Unknown C compiler')
+endif
+
+add_project_arguments(warning_flags, language : 'c')
+
+hkvs_include = include_directories('include')
+
+hkvs_lib = library(
+ 'hkvs',
+ [
+ 'lib/die.c',
+ 'lib/db.c',
+ 'lib/table.c',
+ 'lib/slab.c',
+ 'lib/extfile.c',
+ 'lib/hash.c',
+ 'lib/ix.c',
+ 'lib/record.c',
+ 'lib/util.c',
+ 'lib/iter.c',
+ 'lib/io_mem.c',
+ 'lib/io_dirfd.c',
+ ],
+ implicit_include_directories: false,
+ include_directories: hkvs_include,
+)
+
+hkvs = declare_dependency(
+ include_directories: hkvs_include,
+ link_with: hkvs_lib
+)
+
+examples = [
+ 'write', 'print',
+]
+
+tests_fuzz = [
+ 'simple_ops',
+]
+
+foreach i: examples
+ executable(
+ 'example-' + i,
+ 'examples/' + i + '.c',
+ implicit_include_directories: false,
+ dependencies: [hkvs],
+ install: false,
+ )
+endforeach
+
+if fuzz != 'none'
+ foreach i: tests_fuzz
+ executable(
+ 'fuzz-' + i,
+ 'fuzz/' + i + '.c',
+ implicit_include_directories: false,
+ dependencies: [hkvs] + fuzz_deps,
+ install: false,
+ )
+ endforeach
+endif
diff --git a/meson_options.txt b/meson_options.txt
new file mode 100644
index 0000000..d49a8d3
--- /dev/null
+++ b/meson_options.txt
@@ -0,0 +1 @@
+option('fuzz', type : 'combo', choices: ['none', 'libFuzzer'])