aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/db.c180
-rw-r--r--lib/die.c21
-rw-r--r--lib/extfile.c150
-rw-r--r--lib/hash.c78
-rw-r--r--lib/impl.h209
-rw-r--r--lib/internal.h106
-rw-r--r--lib/io_dirfd.c276
-rw-r--r--lib/io_mem.c181
-rw-r--r--lib/iter.c43
-rw-r--r--lib/ix.c215
-rw-r--r--lib/record.c352
-rw-r--r--lib/slab.c199
-rw-r--r--lib/table.c212
-rw-r--r--lib/util.c30
14 files changed, 2252 insertions, 0 deletions
diff --git a/lib/db.c b/lib/db.c
new file mode 100644
index 0000000..81cce47
--- /dev/null
+++ b/lib/db.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+#ifdef FUZZING
+static const size_t hkvs_slab_sizes[HKVS_N_SLABS] = {
+ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24,
+ 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48,
+ 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72,
+ 74, 76, 78, 80,
+};
+#else
+static const size_t hkvs_slab_sizes[HKVS_N_SLABS] = {
+ (2 << 6), (3 << 6), (4 << 6), (5 << 6), (6 << 6), (7 << 6),
+ (8 << 6), (9 << 6), (10 << 6), (11 << 6), (12 << 6), (14 << 6),
+ (8 << 7), (9 << 7), (10 << 7), (11 << 7), (12 << 7), (14 << 7),
+ (8 << 8), (9 << 8), (10 << 8), (11 << 8), (12 << 8), (14 << 8),
+ (8 << 9), (9 << 9), (10 << 9), (11 << 9), (12 << 9), (14 << 9),
+ (8 << 10), (9 << 10), (10 << 10), (11 << 10), (12 << 10), (14 << 10),
+ (8 << 11), (9 << 11), (10 << 11), (11 << 11),
+};
+#endif
+
+int hkvs_new(hkvs **pc, hkvs_io *io) {
+ int r;
+ bool created = false;
+
+ hkvs *c = malloc(sizeof(*c));
+ if(!c) return -ENOMEM;
+
+ c->io = io;
+
+ r = io->open(io, 0, &c->root_param, &c->root_param_size);
+ if(r < 0) {
+ goto fail_free;
+ }
+
+ char *at = c->root_param;
+ size_t param_size = c->root_param_size;
+ if(param_size == 0) {
+ r = io->resize(io, 0, &c->root_param, param_size, HKVS_POFF_TABLE);
+ if(r < 0) goto fail_close;
+ at = c->root_param;
+ param_size = HKVS_POFF_TABLE;
+ c->root_param_size = param_size;
+ memset(at, 0, HKVS_POFF_TABLE);
+ hkvs_write_u64(at + HKVS_PARAM_HEADER_MAGIC, HKVS_MAGIC);
+ created = true;
+ } else if(param_size < HKVS_PSIZE_HEADER || hkvs_read_u64(at + HKVS_PARAM_HEADER_MAGIC) != HKVS_MAGIC) {
+ r = -EBADMSG;
+ goto fail_close;
+ }
+
+ uint64_t version = hkvs_read_u64(at + HKVS_PARAM_HEADER_VERSION);
+ if(version != 0) {
+ r = -ENOTSUP;
+ goto fail_close;
+ }
+
+ uint64_t n_tables_u64 = hkvs_read_u64(at + 16);
+ at += HKVS_PSIZE_HEADER;
+
+ if(param_size < HKVS_POFF_TABLE || n_tables_u64 > (param_size - HKVS_POFF_TABLE) / HKVS_PSIZE_TABLE) {
+ r = -EBADMSG;
+ goto fail_close;
+ }
+ size_t n_tables = (size_t)n_tables_u64;
+
+ for(size_t i = 0; i < HKVS_N_SLABS; i++) {
+ r = hkvs_slab_init(&c->slabs[i], io, at, hkvs_slab_sizes[i]);
+ at += HKVS_PSIZE_SLAB;
+ if(r < 0) goto fail_slabs;
+ }
+
+ hkvs_table **tables = calloc(n_tables, sizeof(hkvs_table*));
+ c->tables = tables;
+ c->n_tables = n_tables;
+ c->alloc_tables = n_tables;
+ for(size_t i = 0; i < n_tables; i++) {
+ r = hkvs_table_new(&tables[i], io, at);
+ at += HKVS_PSIZE_TABLE;
+ if(r < 0) goto fail_tables;
+ }
+
+ c->extfiles = NULL;
+ c->n_extfiles = 0;
+ c->alloc_extfiles = 0;
+ c->extfile_gen = 0;
+
+ *pc = c;
+ return created;
+
+fail_tables:
+ for(size_t i = 0; i < n_tables; i++) {
+ hkvs_table *t = tables[i];
+ if(t) hkvs_table_free(t, io);
+ }
+ free(tables);
+
+fail_slabs:;
+ size_t n_slabs = (size_t)(at - (char*)c->root_param) / HKVS_PSIZE_SLAB;
+ if(n_slabs > HKVS_N_SLABS) n_slabs = HKVS_N_SLABS;
+ while(n_slabs--) {
+ hkvs_slab_destroy(&c->slabs[n_slabs], io);
+ }
+
+fail_close:
+ io->close(io, 0, c->root_param, c->root_param_size);
+fail_free:
+ free(c);
+ io->destroy(io);
+ return r;
+}
+
+int hkvs_resize_param(hkvs *c) {
+ if(c->n_tables > (SIZE_MAX - HKVS_POFF_TABLE) / HKVS_PSIZE_TABLE) return -EFBIG;
+
+ size_t param_size = HKVS_POFF_TABLE + c->n_tables * HKVS_PSIZE_TABLE;
+ int r = c->io->resize(c->io, 0, &c->root_param, c->root_param_size, param_size);
+ if(r < 0) return r;
+ c->root_param_size = param_size;
+
+ char *at = c->root_param;
+ if(hkvs_read_u64(at + HKVS_PARAM_HEADER_MAGIC) != HKVS_MAGIC || hkvs_read_u64(at + HKVS_PARAM_HEADER_VERSION) != 0) {
+ hkvs_die("magic corrupted");
+ }
+ hkvs_write_u64(at + 16, c->n_tables);
+ at += HKVS_PSIZE_HEADER;
+
+ for(size_t i = 0; i < HKVS_N_SLABS; i++) {
+ c->slabs[i].param = at;
+ at += HKVS_PSIZE_SLAB;
+ }
+
+ hkvs_table **tables = c->tables;
+ size_t n_tables = c->n_tables;
+ for(size_t i = 0; i < n_tables; i++) {
+ hkvs_table *t = tables[i];
+ if(t) t->slab.param = at;
+ at += HKVS_PSIZE_TABLE;
+ }
+
+ return 0;
+}
+
+void hkvs_free(hkvs *c) {
+ hkvs_io *io = c->io;
+
+ hkvs_extfile *ext = c->extfiles;
+ size_t n_ext = c->n_extfiles;
+ for(size_t i = 0; i < n_ext; i++) {
+ io->close(io, ext[i].fi, ext[i].addr, ext[i].size);
+ }
+ free(ext);
+
+ hkvs_table **tables = c->tables;
+ size_t n_tables = c->n_tables;
+ for(size_t i = 0; i < n_tables; i++) {
+ hkvs_table *t = tables[i];
+ if(t) hkvs_table_free(t, io);
+ }
+ free(tables);
+
+ for(size_t i = 0; i < HKVS_N_SLABS; i++) {
+ hkvs_slab_destroy(&c->slabs[i], io);
+ }
+
+ io->close(io, 0, c->root_param, c->root_param_size);
+ free(c);
+ io->destroy(io);
+ return;
+}
+
+uint8_t hkvs_slab_for_size(size_t data_size) {
+ // TODO: faster
+ for(uint8_t i = 0; i < HKVS_N_SLABS; i++) {
+ if(data_size <= hkvs_slab_sizes[i]) return i;
+ }
+ return 255;
+}
diff --git a/lib/die.c b/lib/die.c
new file mode 100644
index 0000000..f581951
--- /dev/null
+++ b/lib/die.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#undef hkvs_die
+void hkvs_die(const char *msg) {
+ hkvs_die_ext(NULL, NULL, 0, msg);
+}
+
+void hkvs_die_ext(const char *func, const char *file, int line, const char *msg) {
+ (void)file;
+ if(func) {
+ fprintf(stderr, "Internal error: [%s:%d] %s\n", func, line, msg);
+ } else {
+ fprintf(stderr, "Internal error: %s\n", msg);
+ }
+ abort();
+}
diff --git a/lib/extfile.c b/lib/extfile.c
new file mode 100644
index 0000000..fdd94eb
--- /dev/null
+++ b/lib/extfile.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+#define MAX_GEN ((uint32_t)2147483648u)
+
+static bool hkvs_extfile_gc(hkvs *c) {
+ hkvs_extfile *ext = c->extfiles;
+ size_t n = c->n_extfiles;
+
+ uint32_t gen = c->extfile_gen;
+ uint32_t max = 0;
+ for(size_t i = 0; i < n; i++) {
+ uint32_t diff = gen - ext[i].gen;
+ if(diff > max) max = diff;
+ }
+ if(max == 0) return false;
+
+ if(max >= MAX_GEN) max = MAX_GEN;
+
+ hkvs_io *io = c->io;
+ size_t i = 0;
+ for(size_t j = 0; j < n; j++) {
+ uint32_t diff = gen - ext[j].gen;
+ if(diff >= max) {
+ io->close(io, ext[j].fi, ext[j].addr, ext[j].size);
+ } else {
+ ext[i++] = ext[j];
+ }
+ }
+
+ if(n > 64 && i < n / 2) {
+ ext = realloc(ext, i * sizeof(hkvs_extfile));
+ if(ext) {
+ c->extfiles = ext;
+ c->alloc_extfiles = i;
+ }
+ }
+
+ c->n_extfiles = i;
+ return true;
+}
+
+static int hkvs_extfile_reserve_slow(hkvs *c) {
+ if(c->n_extfiles > 16) {
+ if(hkvs_extfile_gc(c)) return 0;
+ }
+
+ size_t n = c->n_extfiles;
+ hkvs_extfile *ext = c->extfiles;
+ n += (n >> 2) + 1;
+ ext = realloc(ext, n * sizeof(hkvs_extfile));
+ if(!ext) return -ENOMEM;
+ c->extfiles = ext;
+ c->alloc_extfiles = n;
+ return 0;
+}
+
+HKVS_INLINE int hkvs_extfile_reserve(hkvs *c) {
+ if(c->n_extfiles != c->alloc_extfiles) return 0;
+ return hkvs_extfile_reserve_slow(c);
+}
+
+int hkvs_extfile_create(hkvs *c, uint64_t *pfi, char **paddr, size_t size, bool pin) {
+ int r = hkvs_extfile_reserve(c);
+ if(r < 0) return r;
+
+ uint64_t fi;
+ char *addr;
+ r = c->io->create(c->io, &fi, &addr, size);
+ if(r < 0) return r;
+ if(fi == 0) hkvs_die("created file 0");
+
+ c->extfiles[c->n_extfiles++] = (hkvs_extfile) {
+ .fi = fi,
+ .addr = addr,
+ .size = size,
+ .gen = pin ? c->extfile_gen : c->extfile_gen - 1,
+ };
+
+ *pfi = fi;
+ *paddr = addr;
+
+ return 0;
+}
+
+HKVS_MUSTCHECK bool hkvs_extfile_find(hkvs *c, uint64_t fi, size_t *pslot) {
+ hkvs_extfile *ext = c->extfiles;
+ size_t n = c->n_extfiles;
+ for(size_t i = 0; i < n; i++) {
+ if(ext[i].fi == fi) {
+ *pslot = i;
+ return true;
+ }
+ }
+ return false;
+}
+
+HKVS_MUSTCHECK int hkvs_extfile_open(hkvs *c, uint64_t fi, size_t *pslot, size_t want_size) {
+ if(fi == 0) return -EBADMSG;
+
+ hkvs_extfile *ext = c->extfiles;
+ size_t n = c->n_extfiles;
+ for(size_t i = 0; i < n; i++) {
+ if(ext[i].fi == fi) {
+ if(ext[i].size < want_size) return -EBADMSG;
+ *pslot = i;
+ return 0;
+ }
+ }
+
+ int r = hkvs_extfile_reserve(c);
+ if(r < 0) return 0;
+
+ char *addr;
+ size_t size;
+ r = c->io->open(c->io, fi, &addr, &size);
+ if(r < 0) return r;
+
+ size_t slot = c->n_extfiles++;
+ c->extfiles[slot] = (hkvs_extfile) {
+ .fi = fi,
+ .addr = addr,
+ .size = size,
+ .gen = c->extfile_gen - 1,
+ };
+
+ if(size < want_size) return -EBADMSG;
+
+ *pslot = slot;
+ return 0;
+}
+
+void hkvs_extfile_close(hkvs *c, size_t slot) {
+ hkvs_extfile *ext = c->extfiles;
+ size_t n = c->n_extfiles;
+ hkvs_assume(slot < n);
+ n--;
+ c->n_extfiles = n;
+
+ c->io->close(c->io, ext[slot].fi, ext[slot].addr, ext[slot].size);
+ if(slot != n) ext[slot] = ext[n];
+}
+
+void hkvs_put_records(hkvs *c) {
+ c->extfile_gen++;
+ if(c->extfile_gen % MAX_GEN == 0) {
+ hkvs_extfile_gc(c);
+ }
+}
diff --git a/lib/hash.c b/lib/hash.c
new file mode 100644
index 0000000..6a969a8
--- /dev/null
+++ b/lib/hash.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+#define rotl(v, x) (((v) << (x)) | ((v) >> (64 - (x))))
+
+#define ROUND do { \
+ v0 += v1; \
+ v2 += v3; \
+ v1 = rotl(v1, 13); \
+ v3 = rotl(v3, 16); \
+ v1 ^= v0; \
+ v3 ^= v2; \
+ v0 = rotl(v0, 32); \
+ v2 += v1; \
+ v0 += v3; \
+ v1 = rotl(v1, 17); \
+ v3 = rotl(v3, 21); \
+ v1 ^= v2; \
+ v3 ^= v0; \
+ v2 = rotl(v2, 32); \
+} while(0)
+
+uint64_t hkvs_hash(const char *hk, const char *data, size_t data_size) {
+ uint64_t hk0 = hkvs_read_u64(hk + 0);
+ uint64_t hk1 = hkvs_read_u64(hk + 8);
+ uint64_t v0 = (uint64_t)0x736f6d6570736575 ^ hk0;
+ uint64_t v1 = (uint64_t)0x646f72616e646f6d ^ hk1;
+ uint64_t v2 = (uint64_t)0x6c7967656e657261 ^ hk0;
+ uint64_t v3 = (uint64_t)0x7465646279746573 ^ hk1;
+ uint64_t last = (uint64_t)data_size << 56;
+ while(data_size >= 8) {
+ uint64_t m = hkvs_read_u64(data);
+ v3 ^= m;
+ ROUND;
+ v0 ^= m;
+ data += 8;
+ data_size -= 8;
+ }
+
+ switch(data_size) {
+ case 7:
+ last |= (uint64_t)(uint8_t)data[6] << 48;
+ HKVS_FALLTHROUGH;
+ case 6:
+ last |= (uint64_t)(uint8_t)data[5] << 40;
+ HKVS_FALLTHROUGH;
+ case 5:
+ last |= (uint64_t)(uint8_t)data[4] << 32;
+ HKVS_FALLTHROUGH;
+ case 4:
+ last |= (uint64_t)(uint8_t)data[3] << 24;
+ HKVS_FALLTHROUGH;
+ case 3:
+ last |= (uint64_t)(uint8_t)data[2] << 16;
+ HKVS_FALLTHROUGH;
+ case 2:
+ last |= (uint64_t)(uint8_t)data[1] << 8;
+ HKVS_FALLTHROUGH;
+ case 1:
+ last |= (uint64_t)(uint8_t)data[0];
+ HKVS_FALLTHROUGH;
+ case 0:
+ break;
+ default:
+ __builtin_unreachable();
+ }
+
+ v3 ^= last;
+ ROUND;
+ v0 ^= last;
+
+ v2 ^= 0xff;
+ ROUND;
+ ROUND;
+ ROUND;
+ return v0 ^ v1 ^ v2 ^ v3;
+}
diff --git a/lib/impl.h b/lib/impl.h
new file mode 100644
index 0000000..47a05ca
--- /dev/null
+++ b/lib/impl.h
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#pragma once
+#include "internal.h"
+
+#ifdef FUZZING
+#define HKVS_SLAB_BLOCK ((size_t)16)
+#else
+#define HKVS_SLAB_BLOCK ((size_t)4096)
+#endif
+
+#define HKVS_REWRITE_THRESHOLD ((size_t)8192)
+
+#define HKVS_SLAB_FREEPTRS ((size_t)8)
+
+#define HKVS_N_SLABS ((uint8_t)40)
+
+#define HKVS_MARK_MAX_SMALL ((uint8_t)249)
+#define HKVS_MARK_EXT ((uint8_t)255)
+#define HKVS_MARK_SLAB ((uint8_t)254)
+
+#define HKVS_IX_ENTRY ((size_t)16)
+#define HKVS_IX_KEY_SIZE ((size_t)16)
+#define HKVS_IX_MAX_DISP ((size_t)6)
+#define HKVS_IX_SIZE(bits) ((uint64_t)HKVS_IX_ENTRY << (bits))
+
+#define HKVS_PSIZE_HEADER ((size_t)64)
+#define HKVS_PARAM_HEADER_MAGIC ((size_t)0)
+#define HKVS_PARAM_HEADER_VERSION ((size_t)8)
+/*
+ * header {
+ * magic: u64,
+ * version: u64,
+ * app_id: u64,
+ * app_ver: u64,
+ * pad: [u64; 4],
+ * }
+ */
+
+#define HKVS_POFF_SLAB HKVS_PSIZE_HEADER
+#define HKVS_PSIZE_SLAB ((size_t)32)
+#define HKVS_PARAM_SLAB_FI ((size_t)0)
+#define HKVS_PARAM_SLAB_ALLOC ((size_t)8)
+#define HKVS_PARAM_SLAB_FREE ((size_t)16)
+/*
+ * slab {
+ * fi: u64,
+ * alloc_count: u64,
+ * free: u64,
+ * pad: u64,
+ * }
+ */
+
+#define HKVS_POFF_TABLE ((size_t)(HKVS_POFF_SLAB + HKVS_N_SLABS * HKVS_PSIZE_SLAB))
+#define HKVS_PSIZE_TABLE ((size_t)64)
+#define HKVS_PARAM_TABLE_ELTSZ ((size_t)32)
+#define HKVS_PARAM_TABLE_KEYSZ ((size_t)34)
+#define HKVS_PARAM_TABLE_IXBIT ((size_t)35)
+#define HKVS_PARAM_TABLE_ID ((size_t)36)
+#define HKVS_PARAM_TABLE_IXFI ((size_t)40)
+#define HKVS_PARAM_TABLE_IXKEY ((size_t)48)
+/* table {
+ * slab;
+ * elt_size: u16,
+ * key_size: u8,
+ * ix_bits: u8,
+ * id: u32,
+ * ix_fi: u64,
+ * ix_key: [u64; 2],
+ * }
+ */
+
+typedef struct hkvs_slab hkvs_slab;
+struct hkvs_slab {
+ char *param;
+ char *addr;
+ size_t map_size;
+ size_t elt_size;
+};
+
+typedef struct hkvs_table hkvs_table;
+struct hkvs_table {
+ hkvs_slab slab;
+ char *ix;
+ size_t ix_size;
+};
+
+typedef struct hkvs_extfile hkvs_extfile;
+struct hkvs_extfile {
+ uint64_t fi;
+ char *addr;
+ size_t size;
+ uint32_t gen;
+};
+
+struct hkvs {
+ hkvs_io *io;
+
+ char *root_param;
+ size_t root_param_size;
+
+ hkvs_table **tables;
+ size_t n_tables;
+ size_t alloc_tables;
+
+ hkvs_extfile *extfiles;
+ size_t n_extfiles;
+ size_t alloc_extfiles;
+ uint32_t extfile_gen;
+
+ hkvs_slab slabs[HKVS_N_SLABS];
+};
+
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_extfile_create(hkvs *c, uint64_t *pfi, char **paddr, size_t size, bool pin);
+
+HKVS_INTERNAL HKVS_MUSTCHECK bool hkvs_extfile_find(hkvs *c, uint64_t fi, size_t *pslot);
+
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_extfile_open(hkvs *c, uint64_t fi, size_t *pslot, size_t want_size);
+
+HKVS_INTERNAL void hkvs_extfile_close(hkvs *c, size_t slot);
+
+HKVS_INLINE void hkvs_extfile_pin(hkvs *c, hkvs_extfile *f) {
+ f->gen = c->extfile_gen;
+}
+
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_resize_param(hkvs *c);
+
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_slab_init(hkvs_slab *s, hkvs_io *io, char *param, size_t elt_size);
+
+HKVS_INLINE void hkvs_slab_destroy(hkvs_slab *s, hkvs_io *io) {
+ uint64_t fi = hkvs_read_u64(s->param);
+ if(fi) {
+ io->close(io, fi, s->addr, s->map_size);
+ }
+}
+
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_slab_alloc(hkvs *c, hkvs_slab *s, size_t *pi);
+
+HKVS_INTERNAL void hkvs_slab_discard(hkvs_slab *s, size_t id);
+
+HKVS_INLINE HKVS_PURE size_t hkvs_slab_size(hkvs_slab *s) {
+ uint64_t r = hkvs_read_u64(s->param + HKVS_PARAM_SLAB_ALLOC);
+ hkvs_assume(r <= SIZE_MAX / 2);
+ return (size_t)r;
+}
+
+HKVS_INLINE char *hkvs_slab_mark_at(hkvs_slab *s, size_t i) {
+ hkvs_assume(i <= SIZE_MAX / 2);
+ hkvs_assume(i < hkvs_slab_size(s));
+ size_t j = i & ~(HKVS_SLAB_BLOCK - 1);
+ return s->addr + j * s->elt_size + i;
+}
+
+HKVS_INLINE char *hkvs_slab_at(hkvs_slab *s, size_t i) {
+ hkvs_assume(i <= SIZE_MAX / 2);
+ hkvs_assume(i < hkvs_slab_size(s));
+ size_t j = i & ~(HKVS_SLAB_BLOCK - 1);
+ return s->addr + i * s->elt_size + j + HKVS_SLAB_BLOCK;
+}
+
+HKVS_INLINE uint8_t hkvs_slab_mark(hkvs_slab *s, size_t i) {
+ return hkvs_read_u8(hkvs_slab_mark_at(s, i));
+}
+
+HKVS_INLINE void hkvs_slab_set_mark(hkvs_slab *s, size_t i, uint8_t v) {
+ hkvs_write_u8(hkvs_slab_mark_at(s, i), v);
+}
+
+HKVS_INTERNAL HKVS_CONST uint8_t hkvs_slab_for_size(size_t data_size);
+
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_table_new(hkvs_table **t, hkvs_io *io, char *param);
+
+HKVS_INLINE HKVS_PURE uint32_t hkvs_table__id(hkvs_table *t) {
+ return hkvs_read_u32(t->slab.param + HKVS_PARAM_TABLE_ID);
+}
+
+HKVS_INLINE HKVS_PURE uint8_t hkvs_table__key_size(hkvs_table *t) {
+ return hkvs_read_u8(t->slab.param + HKVS_PARAM_TABLE_KEYSZ);
+}
+
+HKVS_INLINE uint8_t hkvs_table__inl_size(hkvs_table *t) {
+ size_t r = t->slab.elt_size;
+ size_t key_size = hkvs_table__key_size(t);
+ hkvs_assume(key_size <= r);
+ r -= key_size;
+ if(r > 248) r = 248;
+ return (uint8_t)r;
+}
+
+HKVS_INLINE HKVS_PURE uint8_t hkvs_table__ix_bits(hkvs_table *t) {
+ return hkvs_read_u8(t->slab.param + HKVS_PARAM_TABLE_IXBIT);
+}
+
+HKVS_INLINE HKVS_PURE const char *hkvs_table__ix_key(hkvs_table *t) {
+ return t->slab.param + HKVS_PARAM_TABLE_IXKEY;
+}
+
+HKVS_INTERNAL int hkvs_record__value(hkvs *c, hkvs_table *t, size_t id, char *key, size_t key_size, int notfound, char **pvalue, size_t *psize);
+
+HKVS_INLINE void hkvs_table_free(hkvs_table *t, hkvs_io *io) {
+ hkvs_slab_destroy(&t->slab, io);
+ uint64_t ix_fi = hkvs_read_u64(t->slab.param + HKVS_PARAM_TABLE_IXFI);
+ io->close(io, ix_fi, t->ix, t->ix_size);
+ free(t);
+}
+
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_ix_build(hkvs_table *t, hkvs_io *io);
+HKVS_INTERNAL HKVS_MUSTCHECK int hkvs_ix_add(hkvs *c, hkvs_table *t, size_t id);
+HKVS_INTERNAL void hkvs_ix_remove(hkvs_table *t, size_t id);
diff --git a/lib/internal.h b/lib/internal.h
new file mode 100644
index 0000000..3ad3ac5
--- /dev/null
+++ b/lib/internal.h
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#pragma once
+
+#define HKVS_API extern __attribute__((__visibility__("protected"), __nothrow__))
+#define HKVS_INTERNAL extern __attribute__((__visibility__("hidden"), __nothrow__))
+
+#define HKVS_PURE __attribute__((__pure__))
+
+#define HKVS_CONST __attribute__((__const__))
+
+#define HKVS_NORETURN __attribute__((__noreturn__, __cold__))
+
+#define HKVS_FALLTHROUGH __attribute__((__fallthrough__))
+
+#include <hkvs.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+HKVS_INLINE void hkvs_assume(bool cond) {
+ if(!cond) {
+#ifdef HKVS_FUZZING
+ hkvs_die("assumption failed");
+#else
+ __builtin_unreachable();
+#endif
+ }
+}
+
+HKVS_INLINE uint8_t hkvs_read_u8(const char *pv) {
+ const uint8_t *p = (const uint8_t*)pv;
+ return *p;
+}
+
+HKVS_INLINE void hkvs_write_u8(char *pv, uint8_t v) {
+ uint8_t *p = (uint8_t*)pv;
+ p[0] = v;
+}
+HKVS_INLINE uint16_t hkvs_read_u16(const char *pv) {
+ const uint8_t *p = (const uint8_t*)pv;
+ return (uint16_t)(
+ (uint16_t)p[0] |
+ (uint16_t)p[1] << 8
+ );
+}
+
+HKVS_INLINE void hkvs_write_u16(char *pv, uint16_t v) {
+ uint8_t *p = (uint8_t*)pv;
+ p[0] = (uint8_t)(v >> 0);
+ p[1] = (uint8_t)(v >> 8);
+}
+
+HKVS_INLINE uint32_t hkvs_read_u32(const char *pv) {
+ const uint8_t *p = (const uint8_t*)pv;
+ return (uint32_t)(
+ (uint32_t)p[0] |
+ (uint32_t)p[1] << 8 |
+ (uint32_t)p[2] << 16 |
+ (uint32_t)p[3] << 24
+ );
+}
+
+HKVS_INLINE void hkvs_write_u32(char *pv, uint32_t v) {
+ uint8_t *p = (uint8_t*)pv;
+ p[0] = (uint8_t)(v >> 0);
+ p[1] = (uint8_t)(v >> 8);
+ p[2] = (uint8_t)(v >> 16);
+ p[3] = (uint8_t)(v >> 24);
+}
+
+HKVS_INLINE uint64_t hkvs_read_u64(const char *pv) {
+ const uint8_t *p = (const uint8_t*)pv;
+ return (uint64_t)(
+ (uint64_t)p[0] |
+ (uint64_t)p[1] << 8 |
+ (uint64_t)p[2] << 16 |
+ (uint64_t)p[3] << 24 |
+ (uint64_t)p[4] << 32 |
+ (uint64_t)p[5] << 40 |
+ (uint64_t)p[6] << 48 |
+ (uint64_t)p[7] << 56
+ );
+}
+
+HKVS_INLINE void hkvs_write_u64(char *pv, uint64_t v) {
+ uint8_t *p = (uint8_t*)pv;
+ p[0] = (uint8_t)(v >> 0);
+ p[1] = (uint8_t)(v >> 8);
+ p[2] = (uint8_t)(v >> 16);
+ p[3] = (uint8_t)(v >> 24);
+ p[4] = (uint8_t)(v >> 32);
+ p[5] = (uint8_t)(v >> 40);
+ p[6] = (uint8_t)(v >> 48);
+ p[7] = (uint8_t)(v >> 56);
+}
+
+HKVS_INTERNAL HKVS_NORETURN void hkvs_die(const char *msg);
+
+HKVS_INTERNAL HKVS_NORETURN void hkvs_die_ext(const char *func, const char *file, int line, const char *msg);
+
+#define hkvs_die(msg) hkvs_die_ext(__func__, __FILE__, __LINE__, msg)
+
+HKVS_INTERNAL HKVS_PURE uint64_t hkvs_hash(const char *hk, const char *data, size_t data_size);
diff --git a/lib/io_dirfd.c b/lib/io_dirfd.c
new file mode 100644
index 0000000..252658c
--- /dev/null
+++ b/lib/io_dirfd.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#define _GNU_SOURCE
+#include <hkvs.h>
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/mman.h>
+#include <sys/random.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+typedef struct hkvs_dirfd hkvs_dirfd;
+struct hkvs_dirfd {
+ hkvs_io base;
+ int fd;
+ uint64_t create_fi;
+};
+
+static const char *const base32 = "ABCDEFGHIJKLMNOPQRSTUVWXTZ234567";
+
+HKVS_INLINE bool hkvs_dirfd_check_size(size_t size) {
+ return (size_t)(off64_t)size == size && (off64_t)size >= 0;
+}
+
+static void hkvs_dirfd_name(char *name, uint64_t fi) {
+ name[0] = 'd';
+ name[1] = 'a';
+ name[2] = 't';
+ name[3] = 'a';
+ name[4] = '.';
+ name[18] = 0;
+ {
+ uint64_t val = fi;
+ for(size_t i = 0; i < 13; i++) {
+ name[17 - i] = base32[val % 32];
+ val /= 32;
+ }
+ }
+}
+
+static void hkvs_dirfd_destroy(hkvs_io *iov) {
+ hkvs_dirfd *io = (hkvs_dirfd*)iov;
+ (void) close(io->fd);
+ free(io);
+}
+
+static int hkvs_dirfd_random(hkvs_io *iov, char *buf, size_t n) {
+ (void)iov;
+ while(n) {
+ ssize_t r = getrandom(buf, n, 0);
+ if(r < 0) return -errno;
+ size_t k = (size_t)r;
+ buf += k;
+ n -= k;
+ }
+ return 0;
+}
+
+static int hkvs_dirfd_open(hkvs_io *iov, uint64_t fi, char **pmem, size_t *psize) {
+ hkvs_dirfd *io = (hkvs_dirfd*)iov;
+
+ int r;
+ if(fi == 0) {
+ r = openat(io->fd, "main", O_RDWR | O_CREAT | O_CLOEXEC, 0666);
+ } else {
+ char name[32];
+ hkvs_dirfd_name(name, fi);
+ r = openat(io->fd, name, O_RDWR | O_CLOEXEC, 0666);
+ }
+ if(r < 0) return -errno;
+ int fd = r;
+
+ off64_t fsize = lseek64(fd, 0, SEEK_END);
+ size_t size = (size_t)fsize;
+ if((off64_t)size != fsize) {
+ r = -EFBIG;
+ goto end;
+ }
+
+ void *addr;
+ if(size == 0) {
+ addr = NULL;
+ } else {
+ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if(addr == MAP_FAILED) {
+ r = -errno;
+ goto end;
+ }
+ }
+
+ *psize = size;
+ *pmem = addr;
+ r = 0;
+
+end:
+ close(fd);
+ return r;
+}
+
+static int hkvs_dirfd_create(hkvs_io *iov, uint64_t *pfi, char **pmem, size_t size) {
+ hkvs_dirfd *io = (hkvs_dirfd*)iov;
+ uint64_t fi = io->create_fi;
+ if(fi == 0) abort();
+
+ if(!hkvs_dirfd_check_size(size)) return -EFBIG;
+
+ int fd;
+ char name[32];
+ hkvs_dirfd_name(name, fi);
+ while(true) {
+ int r = openat(io->fd, name, O_RDWR | O_CREAT | O_EXCL | O_CLOEXEC, 0666);
+ if(r >= 0) {
+ fd = r;
+ break;
+ }
+ r = errno;
+ if(r != EEXIST) return -r;
+ fi++;
+ uint64_t val = fi;
+ char *p = name + 17;
+ while(true) {
+ *p = base32[val % 32];
+ if(val % 32 != 0) break;
+ val /= 32;
+ p--;
+ }
+ }
+
+ io->create_fi = fi + 1;
+
+ int r = ftruncate64(fd, (off64_t)size);
+ if(r < 0) {
+ r = -errno;
+ goto fail;
+ return r;
+ }
+
+ void *addr;
+ if(size == 0) {
+ addr = NULL;
+ } else {
+ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if(addr == MAP_FAILED) {
+ r = -errno;
+ goto fail;
+ }
+ }
+
+ *pfi = fi;
+ *pmem = addr;
+ r = 0;
+
+end:
+ close(fd);
+ return r;
+
+fail:
+ (void) unlinkat(io->fd, name, 0);
+ goto end;
+}
+
+static void hkvs_dirfd_close(hkvs_io *iov, uint64_t fi, char *pmem, size_t size) {
+ (void)iov;
+ (void)fi;
+
+ if(size == 0) return;
+
+ int r = munmap(pmem, size);
+ if(r < 0) {
+ r = errno;
+ // TODO
+ (void) r;
+ abort();
+ }
+}
+
+static int hkvs_dirfd_resize(hkvs_io *iov, uint64_t fi, char **pmem, size_t old_size, size_t new_size) {
+ hkvs_dirfd *io = (hkvs_dirfd*)iov;
+
+ if(!hkvs_dirfd_check_size(new_size)) return -EFBIG;
+
+ int r;
+ if(fi == 0) {
+ r = openat(io->fd, "main", O_RDWR | O_CREAT | O_CLOEXEC, 0666);
+ } else {
+ char name[32];
+ hkvs_dirfd_name(name, fi);
+ r = openat(io->fd, name, O_RDWR | O_CLOEXEC, 0666);
+ }
+ if(r < 0) return -errno;
+ int fd = r;
+
+ void *addr = *pmem;
+ if(new_size > old_size) {
+ r = ftruncate64(fd, (off64_t)new_size);
+ if(r < 0) goto fail;
+
+ if(addr) {
+ addr = mremap(addr, old_size, new_size, MREMAP_MAYMOVE);
+ } else {
+ addr = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ }
+ if(addr == MAP_FAILED) {
+ r = -errno;
+ (void) ftruncate64(fd, (off64_t)old_size);
+ goto end;
+ }
+ *pmem = addr;
+ } else if(new_size > old_size) {
+ if(new_size == 0) {
+ munmap(addr, old_size);
+ addr = NULL;
+ } else {
+ addr = mremap(addr, old_size, new_size, MREMAP_MAYMOVE);
+ if(addr == MAP_FAILED) goto fail;
+ }
+
+ r = ftruncate64(fd, (off64_t)old_size);
+ if(r < 0) {
+ // TODO
+ (void) r;
+ }
+
+ *pmem = addr;
+ }
+
+ r = 0;
+
+end:
+ close(fd);
+ return r;
+
+fail:
+ r = -errno;
+ goto end;
+}
+
+static void hkvs_dirfd_unlink(hkvs_io *iov, uint64_t fi) {
+ hkvs_dirfd *io = (hkvs_dirfd*)iov;
+
+ if(fi == 0) abort();
+
+ char name[32];
+ hkvs_dirfd_name(name, fi);
+ int r = unlinkat(io->fd, name, 0);
+ if(r < 0) {
+ r = errno;
+ // TODO
+ (void) r;
+ } else {
+ if(fi < io->create_fi) {
+ io->create_fi = fi;
+ }
+ }
+}
+
+hkvs_io *hkvs_io_new_unsafe_dirfd(int fd) {
+ hkvs_dirfd *io = malloc(sizeof(*io));
+ if(!io) return NULL;
+ io->base = (hkvs_io){
+ .destroy = &hkvs_dirfd_destroy,
+ .random = &hkvs_dirfd_random,
+ .open = &hkvs_dirfd_open,
+ .create = &hkvs_dirfd_create,
+ .resize = &hkvs_dirfd_resize,
+ .close = &hkvs_dirfd_close,
+ .unlink = &hkvs_dirfd_unlink,
+ };
+ io->fd = fd;
+ io->create_fi = 1;
+ return &io->base;
+}
diff --git a/lib/io_mem.c b/lib/io_mem.c
new file mode 100644
index 0000000..4f27c02
--- /dev/null
+++ b/lib/io_mem.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "internal.h"
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+typedef struct hkvs_iomem_file hkvs_iomem_file;
+struct hkvs_iomem_file {
+ uintptr_t data_mode;
+ size_t size;
+};
+
+typedef struct hkvs_iomem hkvs_iomem;
+struct hkvs_iomem {
+ hkvs_io base;
+ uint64_t rand_ctr;
+ char rand_key[16];
+ size_t n_files;
+ struct hkvs_iomem_file *files;
+};
+
+static void hkvs_iomem_destroy(hkvs_io *iov) {
+ hkvs_iomem *io = (hkvs_iomem*)iov;
+ size_t n = io->n_files;
+ for(size_t i = 0; i < n; i++) {
+ struct hkvs_iomem_file *f = io->files + i;
+ uintptr_t mode = f->data_mode & 3;
+ char *p = (char*)(f->data_mode - mode);
+ free(p);
+ }
+ free(io->files);
+ free(io);
+}
+
+static int hkvs_iomem_random(hkvs_io *iov, char *buf, size_t n) {
+ hkvs_iomem *io = (hkvs_iomem*)iov;
+ while(true) {
+ uint64_t h = hkvs_hash(io->rand_key, (const char*)&io->rand_ctr, sizeof(io->rand_ctr));
+ io->rand_ctr++;
+ if(n > 8) {
+ memcpy(buf, &h, 8);
+ buf += 8;
+ n -= 8;
+ } else {
+ memcpy(buf, &h, n);
+ return 0;
+ }
+ }
+}
+
+static int hkvs_iomem_open(hkvs_io *iov, uint64_t fi, char **pmem, size_t *psz) {
+ hkvs_iomem *io = (hkvs_iomem*)iov;
+
+ if(fi >= io->n_files) return -ENOENT;
+
+ hkvs_iomem_file *f = io->files + (size_t)fi;
+ uintptr_t mode = f->data_mode & 3;
+ char *p = (char*)(f->data_mode - mode);
+ if(mode == 0) {
+ if(fi != 0) return -ENOENT;
+ } else if(mode != 1) {
+ hkvs_die("bad file state");
+ }
+
+ f->data_mode = (uintptr_t)p + 2;
+ *pmem = p;
+ *psz = f->size;
+ return 0;
+}
+
+static int hkvs_iomem_create(hkvs_io *iov, uint64_t *pfi, char **pmem, size_t sz) {
+ hkvs_iomem *io = (hkvs_iomem*)iov;
+
+ hkvs_iomem_file *f = io->files;
+ size_t n = io->n_files;
+ for(size_t i = 1; i < n; i++) {
+ if(!f[i].data_mode) {
+ n = i;
+ f += n;
+ goto done;
+ }
+ }
+
+ f = realloc(f, (n + 1) * sizeof(hkvs_iomem_file));
+ if(!f) return -ENOMEM;
+ io->files = f;
+ io->n_files = n + 1;
+ f += n;
+ f->data_mode = 0;
+
+done:;
+ char *p = malloc(sz);
+ if(!p) return -ENOMEM;
+ memset(p, 0, sz);
+
+ f->data_mode = (uintptr_t)p + 2;
+ f->size = sz;
+ *pfi = n;
+ *pmem = p;
+ return 0;
+}
+
+static int hkvs_iomem_resize(hkvs_io *iov, uint64_t fi, char **pmem, size_t old_sz, size_t new_sz) {
+ hkvs_iomem *io = (hkvs_iomem*)iov;
+
+ if(fi > io->n_files) hkvs_die("bad file index");
+
+ hkvs_iomem_file *f = io->files + (size_t)fi;
+ uintptr_t mode = f->data_mode & 3;
+ char *p = (char*)(f->data_mode - mode);
+ if(mode != 2) hkvs_die("bad file state");
+ if(*pmem != p) hkvs_die("bad file mem");
+ if(old_sz != f->size) hkvs_die("bad file old_size");
+
+ p = realloc(p, new_sz);
+ if(new_sz && !p) return -ENOMEM;
+
+ f->data_mode = (uintptr_t)p + 2;
+ f->size = new_sz;
+ *pmem = p;
+ return 0;
+}
+
+static void hkvs_iomem_close(hkvs_io *iov, uint64_t fi, char *mem, size_t sz) {
+ hkvs_iomem *io = (hkvs_iomem*)iov;
+
+ if(fi > io->n_files) hkvs_die("bad file index");
+
+ hkvs_iomem_file *f = io->files + (size_t)fi;
+ uintptr_t mode = f->data_mode & 3;
+ char *p = (char*)(f->data_mode - mode);
+ if(mode != 2) hkvs_die("bad file state");
+ if(mem != p) hkvs_die("bad file mem");
+ if(sz != f->size) hkvs_die("bad file size");
+
+ f->data_mode = (uintptr_t)p + 1;
+}
+
+static void hkvs_iomem_unlink(hkvs_io *iov, uint64_t fi) {
+ hkvs_iomem *io = (hkvs_iomem*)iov;
+
+ if(fi > io->n_files) hkvs_die("bad file index");
+
+ hkvs_iomem_file *f = io->files + (size_t)fi;
+ uintptr_t mode = f->data_mode & 3;
+ char *p = (char*)(f->data_mode - mode);
+ if(mode != 1) hkvs_die("bad file state");
+
+ free(p);
+ f->data_mode = 0;
+ f->size = 0;
+}
+
+hkvs_io *hkvs_io_new_mem(uint64_t rand_seed) {
+ hkvs_iomem_file *files = malloc(sizeof(hkvs_iomem_file));
+ if(!files) return NULL;
+ files[0].data_mode = 0;
+ files[0].size = 0;
+
+ hkvs_iomem *io = malloc(sizeof(*io));
+ if(!io) {
+ free(files);
+ return NULL;
+ }
+ io->base = (hkvs_io){
+ .destroy = &hkvs_iomem_destroy,
+ .random = &hkvs_iomem_random,
+ .open = &hkvs_iomem_open,
+ .create = &hkvs_iomem_create,
+ .close = &hkvs_iomem_close,
+ .resize = &hkvs_iomem_resize,
+ .unlink = &hkvs_iomem_unlink,
+ };
+ hkvs_write_u64(io->rand_key, rand_seed);
+ hkvs_write_u64(io->rand_key + 8, rand_seed);
+ io->rand_ctr = 0;
+ io->n_files = 1;
+ io->files = files;
+ return &io->base;
+}
diff --git a/lib/iter.c b/lib/iter.c
new file mode 100644
index 0000000..ff27a68
--- /dev/null
+++ b/lib/iter.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+void hkvs_iter_open(hkvs *c, hkvs_table *t, hkvs_iter *it) {
+ (void)c;
+
+ it->priv1 = 0;
+ it->priv2 = hkvs_slab_size(&t->slab);
+}
+
+void hkvs_iter_seek(hkvs *c, hkvs_table *t, hkvs_iter *it, hkvs_rid id) {
+ (void)c;
+
+ size_t n = hkvs_slab_size(&t->slab);
+ it->priv1 = id.id >= n ? SIZE_MAX : (size_t)id.id;
+ it->priv2 = n;
+}
+
+hkvs_rid hkvs_iter_next(hkvs *c, hkvs_table *t, hkvs_iter *it, const char **pkey) {
+ (void)c;
+
+ size_t id = it->priv1;
+ size_t n = it->priv2;
+ if(id >= n) return HKVS_INVALID_RID;
+ while(true) {
+ uint8_t mark = hkvs_slab_mark(&t->slab, id);
+ if(mark != 0) {
+ it->priv1 = id + 1;
+ if(pkey) *pkey = hkvs_slab_at(&t->slab, id);
+ return (hkvs_rid){id};
+ }
+ id++;
+ if(id == n) {
+ it->priv1 = SIZE_MAX;
+ return HKVS_INVALID_RID;
+ }
+ }
+}
+
+void hkvs_iter_delete(hkvs *c, hkvs_table *t, hkvs_iter *it) {
+ hkvs_record_delete(c, t, (hkvs_rid){it->priv1 - 1});
+}
diff --git a/lib/ix.c b/lib/ix.c
new file mode 100644
index 0000000..0a6af19
--- /dev/null
+++ b/lib/ix.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+static bool hkvs_ix_add_fast(char *ix, size_t id, const char *key, size_t key_size, uint8_t bits, const char *hk);
+
+static int hkvs_ix_build_try(hkvs_table *t, hkvs_io *io, uint8_t ix_bits) {
+ if(ix_bits > 59) return -EFBIG;
+ uint64_t ix_size_64 = HKVS_IX_SIZE(ix_bits);
+ if(ix_size_64 > SIZE_MAX) return -EFBIG;
+ size_t ix_size = (size_t)ix_size_64;
+
+ char ix_key[HKVS_IX_KEY_SIZE];
+
+ int r = io->random(io, ix_key, HKVS_IX_KEY_SIZE);
+ if(r < 0) return r;
+
+ char *ix;
+ uint64_t fi;
+ r = io->create(io, &fi, &ix, ix_size);
+ if(r < 0) return r;
+
+ size_t key_size = hkvs_read_u8(t->slab.param + HKVS_PARAM_TABLE_KEYSZ);
+ size_t n = hkvs_slab_size(&t->slab);
+ for(size_t i = 0; i < n; i++) {
+ if(hkvs_slab_mark(&t->slab, i) == 0) continue;
+ const char *key = hkvs_slab_at(&t->slab, i);
+ if(!hkvs_ix_add_fast(ix, i, key, key_size, ix_bits, ix_key)) {
+ io->close(io, fi, ix, ix_size);
+ io->unlink(io, fi);
+ return 0;
+ }
+ }
+
+ uint64_t old_fi = hkvs_read_u64(t->slab.param + HKVS_PARAM_TABLE_IXFI);
+ if(old_fi) {
+ if(t->ix) {
+ io->close(io, old_fi, t->ix, t->ix_size);
+ }
+ io->unlink(io, old_fi);
+ }
+
+ t->ix = ix;
+ t->ix_size = ix_size;
+
+ hkvs_write_u8(t->slab.param + HKVS_PARAM_TABLE_IXBIT, ix_bits);
+ hkvs_write_u64(t->slab.param + HKVS_PARAM_TABLE_IXFI, fi);
+ memcpy(t->slab.param + HKVS_PARAM_TABLE_IXKEY, ix_key, HKVS_IX_KEY_SIZE);
+
+ return 1;
+}
+
+int hkvs_ix_build(hkvs_table *t, hkvs_io *io) {
+ size_t count = 0;
+ {
+ size_t n = hkvs_slab_size(&t->slab);
+ for(size_t i = 0; i < n; i++) {
+ if(hkvs_slab_mark(&t->slab, i)) count++;
+ }
+ }
+
+ uint8_t bits = 4;
+ if(count > 0) {
+ size_t load = count - 1;
+ while(load >= 13) {
+ load /= 2;
+ bits++;
+ }
+ }
+
+ size_t attempts = 0;
+ while(true) {
+ int r = hkvs_ix_build_try(t, io, bits);
+ if(r < 0) return r;
+ if(r) return 0;
+
+ attempts++;
+ if(attempts == 8 || attempts == 64) {
+ bits++;
+ }
+ }
+}
+
+int hkvs_ix_add(hkvs *c, hkvs_table *t, size_t id) {
+ const char *key = hkvs_slab_at(&t->slab, id);
+
+ char *ix = t->ix;
+ uint8_t bits = hkvs_table__ix_bits(t);
+ size_t key_size = hkvs_table__key_size(t);
+ const char *hk = hkvs_table__ix_key(t);
+
+ if(hkvs_ix_add_fast(ix, id, key, key_size, bits, hk)) return 0;
+
+ return hkvs_ix_build(t, c->io);
+}
+
+hkvs_rid hkvs_record_find(hkvs *c, hkvs_table *t, const char *key) {
+ (void)c;
+
+ const char *hk = hkvs_table__ix_key(t);
+ size_t key_size = hkvs_table__key_size(t);
+ uint64_t hv = hkvs_hash(hk, key, key_size);
+
+ uint8_t bits = hkvs_table__ix_bits(t);
+ size_t entries = hkvs_slab_size(&t->slab);
+ char *ix = t->ix;
+
+ size_t size = (size_t)1 << bits;
+ size_t mask = size - 1;
+
+ for(size_t disp = 0; disp < HKVS_IX_MAX_DISP; disp++) {
+ size_t at = ((size_t)hv + disp) & mask;
+ char *ent = ix + at * HKVS_IX_ENTRY;
+ uint64_t v = hkvs_read_u64(ent);
+ if(v == 0) return HKVS_INVALID_RID;
+ if(v > entries) goto fail;
+ uint64_t h = hkvs_read_u64(ent + 8);
+ if(h != hv) {
+ if(((at - h) & mask) < disp) break;
+ continue;
+ }
+
+ size_t id = (size_t)(v - 1);
+ char *found = hkvs_slab_at(&t->slab, id);
+ if(!memcmp(key, found, key_size)) {
+ return (hkvs_rid){id};
+ }
+ }
+
+fail:
+ return HKVS_INVALID_RID;
+}
+
+void hkvs_ix_remove(hkvs_table *t, size_t id) {
+ const char *key = hkvs_slab_at(&t->slab, id);
+
+ uint8_t bits = hkvs_table__ix_bits(t);
+ const char *hk = hkvs_table__ix_key(t);
+ size_t key_size = hkvs_table__key_size(t);
+ char *ix = t->ix;
+
+ uint64_t hv = hkvs_hash(hk, key, key_size);
+ size_t size = (size_t)1 << bits;
+ size_t mask = size - 1;
+
+ uint64_t vv = (uint64_t)id + 1;
+
+ char *ent;
+ for(size_t disp = 0; disp < HKVS_IX_MAX_DISP; disp++) {
+ size_t at = ((size_t)hv + disp) & mask;
+ ent = ix + at * HKVS_IX_ENTRY;
+ uint64_t v = hkvs_read_u64(ent);
+ if(v == vv) {
+ if(hkvs_read_u64(ent + 8) != hv) goto corrupted;
+ while(true) {
+ size_t next = (at + 1) & mask;
+ char *e2 = ix + next * HKVS_IX_ENTRY;
+ v = hkvs_read_u64(e2);
+ if(v == 0) break;
+ uint64_t h = hkvs_read_u64(e2 + 8);
+ if((h & mask) == next) break;
+ hkvs_write_u64(ent, v);
+ hkvs_write_u64(ent + 8, h);
+ at = next;
+ ent = e2;
+ }
+ hkvs_write_u64(ent, 0);
+ hkvs_write_u64(ent + 8, 0);
+ return;
+ }
+ if(v == 0) goto corrupted;
+ }
+
+corrupted:
+ hkvs_die("database corrupted");
+}
+
+bool hkvs_ix_add_fast(char *ix, size_t id, const char *key, size_t key_size, uint8_t bits, const char *hk) {
+ uint64_t h_new = hkvs_hash(hk, key, key_size);
+ size_t size = (size_t)1 << bits;
+ size_t mask = size - 1;
+
+ uint64_t v_new = (uint64_t)id + 1;
+
+ for(size_t disp = 0; disp < HKVS_IX_MAX_DISP; disp++) {
+ size_t at = ((size_t)h_new + disp) & mask;
+ char *ent = ix + at * HKVS_IX_ENTRY;
+ uint64_t v_disp = hkvs_read_u64(ent);
+ if(v_disp == 0) {
+ hkvs_write_u64(ent, v_new);
+ hkvs_write_u64(ent + 8, h_new);
+ return true;
+ }
+ uint64_t h_disp = hkvs_read_u64(ent + 8);
+ if(((at - h_disp) & mask) < disp) {
+ hkvs_write_u64(ent, v_new);
+ hkvs_write_u64(ent + 8, h_new);
+ while(true) {
+ at = (at + 1) & mask;
+ ent = ix + at * HKVS_IX_ENTRY;
+ uint64_t v2 = hkvs_read_u64(ent);
+ uint64_t h2 = hkvs_read_u64(ent + 8);
+ hkvs_write_u64(ent, v_disp);
+ hkvs_write_u64(ent + 8, h_disp);
+ if(v2 == 0) return true;
+ v_disp = v2;
+ h_disp = h2;
+ if(((at - h_disp) & mask) >= HKVS_IX_MAX_DISP - 1) return false;
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
diff --git a/lib/record.c b/lib/record.c
new file mode 100644
index 0000000..c9c36d6
--- /dev/null
+++ b/lib/record.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+const char *hkvs_record_key(hkvs *c, hkvs_table *t, hkvs_rid rid) {
+ (void)c;
+
+ if(rid.id > hkvs_slab_size(&t->slab)) return NULL;
+ size_t id = (size_t)rid.id;
+ if(hkvs_slab_mark(&t->slab, id) == 0) return NULL;
+ return hkvs_slab_at(&t->slab, id);
+}
+
+int hkvs_record_value(hkvs *c, hkvs_table *t, hkvs_rid rid, char **pvalue, size_t *psize) {
+ if(rid.id > hkvs_slab_size(&t->slab)) return 0;
+ size_t id = (size_t)rid.id;
+ size_t key_size = hkvs_table__key_size(t);
+ char *key = hkvs_slab_at(&t->slab, id);
+ return hkvs_record__value(c, t, id, key, key_size, 0, pvalue, psize);
+}
+
+int hkvs_record__value(hkvs *c, hkvs_table *t, size_t id, char *key, size_t key_size, int notfound, char **pvalue, size_t *psize) {
+ uint8_t mark = hkvs_slab_mark(&t->slab, id);
+ if(mark == 0) return notfound;
+
+ char *p = key + key_size;
+ size_t size;
+
+ if(mark <= HKVS_MARK_MAX_SMALL) {
+ size = mark - 1;
+ if(!pvalue) goto ret_novalue;
+ goto ret_value;
+ }
+
+ uint64_t size_64 = hkvs_read_u64(p);
+ if(size_64 > SIZE_MAX) return -EFBIG;
+ size = (size_t)size_64;
+
+ if(!pvalue) goto ret_novalue;
+
+ uint64_t val = hkvs_read_u64(p + 8);
+ switch(mark) {
+ case HKVS_MARK_EXT:
+ {
+ size_t slot;
+ int r = hkvs_extfile_open(c, val, &slot, size);
+ if(r < 0) return r;
+
+ hkvs_extfile *f = &c->extfiles[slot];
+ hkvs_extfile_pin(c, f);
+ p = f->addr;
+ break;
+ }
+
+ case HKVS_MARK_SLAB:
+ {
+ uint8_t slab = val & 255;
+ val >>= 8;
+
+ if(slab >= HKVS_N_SLABS) return -EBADMSG;
+ hkvs_slab *s = &c->slabs[slab];
+ if(size > s->elt_size) return -EBADMSG;
+
+ if(val > hkvs_slab_size(s)) return -EBADMSG;
+ size_t sid = (size_t)val;
+
+ p = hkvs_slab_at(s, sid);
+ break;
+ }
+
+ default:
+ return -EBADMSG;
+ }
+
+ret_value:
+ *pvalue = p;
+ret_novalue:
+ if(psize) *psize = size;
+ return 1;
+}
+
+int hkvs_record_resize(hkvs *c, hkvs_table *t, hkvs_rid rid, char **pvalue, size_t new_size, size_t keep_size) {
+ hkvs_put_records(c);
+
+ if(rid.id > hkvs_slab_size(&t->slab)) goto wtf;
+ size_t id = (size_t)rid.id;
+ size_t key_size = hkvs_table__key_size(t);
+ uint8_t inl_size = hkvs_table__inl_size(t);
+ uint8_t mark = hkvs_slab_mark(&t->slab, id);
+ if(mark == 0) goto wtf;
+ char *key = hkvs_slab_at(&t->slab, id);
+ char *p = key + key_size;
+
+ uint8_t new_slab = hkvs_slab_for_size(new_size);
+
+ uint64_t old_fi = 0;
+ size_t old_fi_slot;
+
+ hkvs_slab *old_s = NULL;
+ size_t old_sid;
+
+ char *old_value;
+ size_t old_size;
+
+ if(mark < HKVS_MARK_MAX_SMALL) {
+ if(new_size <= inl_size) {
+ hkvs_slab_set_mark(&t->slab, id, (uint8_t)(new_size + 1));
+ if(pvalue) *pvalue = p;
+ return 0;
+ }
+ old_value = p;
+ old_size = mark - 1;
+ } else {
+ uint64_t val = hkvs_read_u64(p);
+ if(val > SIZE_MAX) return -EFBIG;
+ old_size = (size_t)val;
+ val = hkvs_read_u64(p + 8);
+
+ switch(mark) {
+ case HKVS_MARK_EXT:
+ {
+ old_fi = val;
+
+ int r = hkvs_extfile_open(c, old_fi, &old_fi_slot, old_size);
+ if(r < 0) return r;
+ hkvs_extfile *f = &c->extfiles[old_fi_slot];
+ old_value = f->addr;
+
+ if(new_slab >= HKVS_N_SLABS && keep_size <= HKVS_REWRITE_THRESHOLD) {
+ r = c->io->resize(c->io, old_fi, &f->addr, f->size, new_size);
+ if(r < 0) return r;
+ f->size = new_size;
+ hkvs_write_u64(p, new_size);
+ if(pvalue) {
+ hkvs_extfile_pin(c, f);
+ *pvalue = f->addr;
+ }
+ return 0;
+ }
+
+ break;
+ }
+
+ case HKVS_MARK_SLAB:
+ {
+ uint8_t slab = val & 255;
+ val >>= 8;
+
+ if(slab >= HKVS_N_SLABS) return -EBADMSG;
+ old_s = &c->slabs[slab];
+ if(old_size > old_s->elt_size) return -EBADMSG;
+
+ if(val > hkvs_slab_size(old_s)) return -EBADMSG;
+ old_sid = (size_t)val;
+
+ old_value = hkvs_slab_at(old_s, old_sid);
+
+ if(slab == new_slab) {
+ hkvs_write_u64(p, new_size);
+ if(pvalue) *pvalue = old_value;
+ return 0;
+ }
+ break;
+ }
+
+ default:
+ return -EBADMSG;
+ }
+ }
+
+ if(old_size < keep_size) keep_size = old_size;
+ if(new_size < keep_size) keep_size = new_size;
+
+ char *new_value;
+
+ if(new_size <= inl_size) {
+ hkvs_slab_set_mark(&t->slab, id, (uint8_t)(new_size + 1));
+ new_value = p;
+ } else {
+ new_slab = hkvs_slab_for_size(new_size);
+ hkvs_write_u64(p, new_size);
+ if(new_slab < HKVS_N_SLABS) {
+ hkvs_slab *s = &c->slabs[new_slab];
+ size_t sid;
+ int r = hkvs_slab_alloc(c, s, &sid);
+ if(r < 0) return r;
+ hkvs_slab_set_mark(s, sid, 1);
+ new_value = hkvs_slab_at(s, sid);
+ hkvs_write_u64(p + 8, ((uint64_t)sid << 8) | new_slab);
+ hkvs_slab_set_mark(&t->slab, id, HKVS_MARK_SLAB);
+ } else {
+ char *addr;
+ uint64_t new_fi;
+ int r = hkvs_extfile_create(c, &new_fi, &addr, new_size, !!pvalue);
+ if(r < 0) return r;
+ new_value = addr;
+ hkvs_write_u64(p + 8, new_fi);
+ hkvs_slab_set_mark(&t->slab, id, HKVS_MARK_EXT);
+ }
+ }
+
+ memcpy(new_value, old_value, keep_size);
+ if(pvalue) *pvalue = new_value;
+
+ if(old_fi) {
+ hkvs_extfile_close(c, old_fi_slot);
+ c->io->unlink(c->io, old_fi);
+ }
+
+ if(old_s) {
+ hkvs_slab_discard(old_s, old_sid);
+ }
+
+ return 0;
+
+wtf:
+ hkvs_die("record does not exist");
+}
+
+void hkvs_record_delete(hkvs *c, hkvs_table *t, hkvs_rid rid) {
+ if(rid.id > hkvs_slab_size(&t->slab)) goto wtf;
+ size_t id = (size_t)rid.id;
+ size_t key_size = hkvs_table__key_size(t);
+ uint8_t mark = hkvs_slab_mark(&t->slab, id);
+ if(mark == 0) goto wtf;
+ char *key = hkvs_slab_at(&t->slab, id);
+ char *p = key + key_size;
+
+ uint64_t unlink_fi = 0;
+ hkvs_slab *s = NULL;
+ size_t sid;
+
+ switch(mark) {
+ case HKVS_MARK_SLAB:
+ {
+ uint64_t val = hkvs_read_u64(p + 8);
+ uint8_t slab = val & 255;
+ val >>= 8;
+
+ if(slab >= HKVS_N_SLABS) goto corrupted;
+ s = &c->slabs[slab];
+
+ if(val >= hkvs_slab_size(s)) goto corrupted;
+ sid = (size_t)val;
+ break;
+ }
+
+ case HKVS_MARK_EXT:
+ {
+ uint64_t fi = hkvs_read_u64(p + 8);
+ if(fi == 0) goto corrupted;
+
+ size_t slot;
+ if(hkvs_extfile_find(c, fi, &slot)) {
+ hkvs_extfile_close(c, slot);
+ }
+
+ unlink_fi = fi;
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ hkvs_ix_remove(t, id);
+ hkvs_slab_discard(&t->slab, id);
+
+ if(unlink_fi) {
+ c->io->unlink(c->io, unlink_fi);
+ }
+
+ if(s) {
+ hkvs_slab_discard(s, sid);
+ }
+
+ return;
+
+corrupted:
+ hkvs_die("database corrupted");
+
+wtf:
+ hkvs_die("record does not exist");
+}
+
+int hkvs_record_append(hkvs *c, hkvs_table *t, hkvs_rid *prid, const char *key, char **pvalue, size_t size) {
+ size_t key_size = hkvs_table__key_size(t);
+ uint8_t inl_size = hkvs_table__inl_size(t);
+
+ size_t id;
+ int r = hkvs_slab_alloc(c, &t->slab, &id);
+ if(r < 0) return r;
+
+ char *r_key = hkvs_slab_at(&t->slab, id);
+ memcpy(r_key, key, key_size);
+
+ char *p = r_key + key_size;
+ char *mark = hkvs_slab_mark_at(&t->slab, id);
+
+ hkvs_slab *s = NULL;
+ size_t sid;
+ char *value;
+
+ uint64_t ext_fi = 0;
+
+ if(size <= inl_size) {
+ hkvs_write_u8(mark, (uint8_t)(size + 1));
+ value = p;
+ } else {
+ uint8_t slab = hkvs_slab_for_size(size);
+ hkvs_write_u64(p, size);
+ if(slab < HKVS_N_SLABS) {
+ s = &c->slabs[slab];
+ r = hkvs_slab_alloc(c, s, &sid);
+ if(r < 0) return r;
+ value = hkvs_slab_at(s, sid);
+ hkvs_slab_set_mark(s, sid, 1);
+
+ hkvs_write_u64(p + 8, (uint64_t)sid << 8 | slab);
+ hkvs_write_u8(mark, HKVS_MARK_SLAB);
+ } else {
+ char *addr;
+ r = hkvs_extfile_create(c, &ext_fi, &addr, size, !!pvalue);
+ value = addr;
+
+ hkvs_write_u64(p + 8, ext_fi);
+ hkvs_write_u8(mark, HKVS_MARK_EXT);
+ }
+ }
+
+ r = hkvs_ix_add(c, t, id);
+ if(r >= 0) {
+ if(prid) *prid = (hkvs_rid){id};
+ if(pvalue) *pvalue = value;
+ return 1;
+ }
+
+ hkvs_write_u8(mark, 0);
+
+ if(s) {
+ hkvs_slab_discard(s, sid);
+ }
+
+ if(ext_fi) {
+ size_t slot = --c->n_extfiles;
+ hkvs_io *io = c->io;
+ io->close(io, ext_fi, c->extfiles[slot].addr, c->extfiles[slot].size);
+ io->unlink(io, ext_fi);
+ }
+
+ return r;
+}
diff --git a/lib/slab.c b/lib/slab.c
new file mode 100644
index 0000000..2d2d8e1
--- /dev/null
+++ b/lib/slab.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+#include <string.h>
+#include <stdlib.h>
+
+static size_t hkvs_slab_size_to_count(size_t elt_size, size_t size) {
+ size_t block_size = (elt_size + 1) * HKVS_SLAB_BLOCK;
+
+ size_t blocks = size / block_size;
+ size %= block_size;
+ size_t rest = 0;
+ if(size > HKVS_SLAB_BLOCK) {
+ rest = (size - HKVS_SLAB_BLOCK) / elt_size;
+ }
+
+ return blocks * HKVS_SLAB_BLOCK + rest;
+}
+
+static size_t hkvs_slab_count_to_size(size_t elt_size, size_t count) {
+ size_t size;
+ if(__builtin_mul_overflow(count, elt_size + 1, &size)) return SIZE_MAX;
+ if(__builtin_add_overflow(size, (-count) % HKVS_SLAB_BLOCK, &size)) return SIZE_MAX;
+ return size;
+}
+
+int hkvs_slab_init(hkvs_slab *s, hkvs_io *io, char *param, size_t elt_size) {
+ s->param = param;
+ s->elt_size = elt_size;
+
+ uint64_t fi = hkvs_read_u64(param + HKVS_PARAM_SLAB_FI);
+ if(fi == 0) {
+ s->addr = NULL;
+ s->map_size = 0;
+ return 0;
+ }
+
+ int r = io->open(io, fi, &s->addr, &s->map_size);
+ if(r < 0) return r;
+
+ uint64_t alloc_count = hkvs_read_u64(param + HKVS_PARAM_SLAB_ALLOC);
+ if(alloc_count > hkvs_slab_size_to_count(elt_size, s->map_size)) {
+ io->close(io, fi, s->addr, s->map_size);
+ return -EBADMSG;
+ }
+
+ return 0;
+}
+
+int hkvs_slab_alloc(hkvs *c, hkvs_slab *s, size_t *pi) {
+ size_t alloc_count = hkvs_slab_size(s);
+
+ uint64_t at_64 = hkvs_read_u64(s->param + HKVS_PARAM_SLAB_FREE);
+ if(at_64 < alloc_count) {
+ size_t at = (size_t)at_64;
+
+ char *p = hkvs_slab_at(s, at);
+
+ for(size_t i = 1; i < HKVS_SLAB_FREEPTRS; i++) {
+ uint64_t v = hkvs_read_u64(p + i * 8);
+ if(v <= at) return -EBADMSG;
+ if(v < alloc_count) {
+ size_t id = (size_t)v;
+ hkvs_write_u64(p + i * 8, UINT64_MAX);
+ *pi = id;
+ return 0;
+ }
+ }
+
+ at_64 = hkvs_read_u64(p);
+ hkvs_write_u64(s->param + HKVS_PARAM_SLAB_FREE, at_64);
+
+ *pi = at;
+ return 0;
+ }
+
+ if(at_64 != UINT64_MAX) {
+ hkvs_write_u64(s->param + HKVS_PARAM_SLAB_FREE, UINT64_MAX);
+ }
+
+ size_t cap_count = hkvs_slab_size_to_count(s->elt_size, s->map_size);
+ hkvs_assume(alloc_count <= cap_count);
+ if(alloc_count == cap_count) {
+ size_t grow_count = cap_count + (cap_count >> 3) + 1;
+ size_t grow_size;
+ while(true) {
+ grow_size = hkvs_slab_count_to_size(s->elt_size, grow_count);
+ if(grow_size != 0) break;
+ size_t dec = (grow_count - cap_count) / 2;
+ if(dec == 0) return -EFBIG;
+ grow_count -= dec;
+ }
+
+ hkvs_io *io = c->io;
+ uint64_t fi = hkvs_read_u64(s->param + HKVS_PARAM_SLAB_FI);
+ if(fi == 0) {
+ int r = io->create(io, &fi, &s->addr, grow_size);
+ if(r < 0) return r;
+ hkvs_write_u64(s->param + HKVS_PARAM_SLAB_FI, fi);
+ } else {
+ int r = io->resize(io, fi, &s->addr, s->map_size, grow_size);
+ if(r < 0) return r;
+ }
+ s->map_size = grow_size;
+ }
+
+ hkvs_write_u64(s->param + HKVS_PARAM_SLAB_ALLOC, alloc_count + 1);
+ *pi = alloc_count;
+ return 0;
+}
+
+void hkvs_slab_discard(hkvs_slab *s, size_t id) {
+ size_t alloc_count = hkvs_slab_size(s);
+ hkvs_assume(id < alloc_count);
+ if(id == alloc_count - 1) {
+ do {
+ alloc_count--;
+ } while(alloc_count > 0 && hkvs_slab_mark(s, alloc_count - 1) == 0);
+
+ hkvs_write_u64(s->param + HKVS_PARAM_SLAB_ALLOC, alloc_count);
+ return;
+ }
+
+ hkvs_slab_set_mark(s, id, 0);
+
+ uint64_t free_64 = hkvs_read_u64(s->param + HKVS_PARAM_SLAB_FREE);
+ if(id < free_64) {
+ hkvs_write_u64(s->param + HKVS_PARAM_SLAB_FREE, id);
+ char *p = hkvs_slab_at(s, id);
+ hkvs_write_u64(p, free_64);
+ for(size_t i = 1; i < HKVS_SLAB_FREEPTRS; i++) {
+ hkvs_write_u64(p + i * 8, UINT64_MAX);
+ }
+ return;
+ }
+
+ char *p = hkvs_slab_at(s, (size_t)free_64);
+ uint64_t v = hkvs_read_u64(p + 8);
+
+ if(v >= alloc_count) {
+ size_t i = 2;
+ while(true) {
+ v = hkvs_read_u64(p + i * 8);
+ if(v < alloc_count) break;
+ i++;
+ if(i == HKVS_SLAB_FREEPTRS) {
+ hkvs_write_u64(p + 8, id);
+ return;
+ }
+ }
+ while(v < id) {
+ hkvs_write_u64(p + (i - 1) * 8, v);
+ i++;
+ if(i == HKVS_SLAB_FREEPTRS) break;
+ v = hkvs_read_u64(p + i * 8);
+ }
+ hkvs_write_u64(p + (i - 1) * 8, id);
+ return;
+ }
+
+ if(hkvs_read_u64(p + (HKVS_SLAB_FREEPTRS - 1) * 8) >= alloc_count) {
+ size_t i = 1;
+ while(v < id) {
+ i++;
+ v = hkvs_read_u64(p + i * 8);
+ }
+ uint64_t v2 = id;
+ while(true) {
+ hkvs_write_u64(p + i * 8, v2);
+ if(v >= alloc_count) break;
+ v2 = v;
+ i++;
+ v = hkvs_read_u64(p + i * 8);
+ }
+ return;
+ }
+
+ if(v < id) {
+ uint64_t v2 = id;
+ id = (size_t)v;
+ size_t i = 2;
+ do {
+ v = hkvs_read_u64(p + i * 8);
+ if(v >= v2) break;
+ hkvs_write_u64(p + (i - 1) * 8, v);
+ i++;
+ } while(i < HKVS_SLAB_FREEPTRS);
+ hkvs_write_u64(p + (i - 1) * 8, v2);
+ }
+
+ char *p2 = hkvs_slab_at(s, id);
+ memcpy(p2, p, HKVS_SLAB_FREEPTRS * 8);
+
+ hkvs_write_u64(p, id);
+ for(size_t i = 1; i < HKVS_SLAB_FREEPTRS; i++) {
+ hkvs_write_u64(p + i * 8, UINT64_MAX);
+ }
+ return;
+}
diff --git a/lib/table.c b/lib/table.c
new file mode 100644
index 0000000..331a865
--- /dev/null
+++ b/lib/table.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+int hkvs_table_new(hkvs_table **pt, hkvs_io *io, char *param) {
+ size_t elt_size = hkvs_read_u16(param + HKVS_PARAM_TABLE_ELTSZ);
+ if(!elt_size) {
+ *pt = NULL;
+ return 0;
+ }
+
+ size_t key_size = hkvs_read_u8(param + HKVS_PARAM_TABLE_KEYSZ);
+ if(key_size > elt_size) return -EBADMSG;
+
+ hkvs_table *t = malloc(sizeof(hkvs_table));
+ if(!t) return -ENOMEM;
+
+ int r = hkvs_slab_init(&t->slab, io, param, elt_size);
+ if(r < 0) goto fail_free;
+
+ uint64_t ix_fi = hkvs_read_u64(param + HKVS_PARAM_TABLE_IXFI);
+ if(ix_fi != 0) {
+ uint8_t ix_bits = hkvs_read_u8(param + HKVS_PARAM_TABLE_IXBIT);
+ if(ix_bits < 4 || ix_bits > 59) goto ix_unlink;
+
+ r = io->open(io, ix_fi, &t->ix, &t->ix_size);
+ if(r < 0) {
+ if(r == -ENOENT) goto ix_dropped;
+ return r;
+ }
+ if(t->ix_size < HKVS_IX_SIZE(ix_bits)) {
+ goto ix_build;
+ }
+ }
+
+ret:
+ *pt = t;
+ return 1;
+
+ix_unlink:
+ io->unlink(io, ix_fi);
+
+ix_dropped:
+ hkvs_write_u64(param + HKVS_PARAM_TABLE_IXFI, 0);
+
+ix_build:
+ r = hkvs_ix_build(t, io);
+ if(r < 0) goto fail_close;
+
+ goto ret;
+
+fail_close:
+ hkvs_slab_destroy(&t->slab, io);
+
+fail_free:
+ free(t);
+ return r;
+}
+
+hkvs_table *hkvs_table_at(hkvs *c, size_t i) {
+ size_t n_tables = c->n_tables;
+ if(i >= n_tables) return NULL;
+
+ hkvs_table **tables = c->tables;
+ for(size_t j = 0; j < n_tables; j++) {
+ hkvs_table *t = tables[j];
+ if(!t) continue;
+ if(!i--) return t;
+ }
+ return NULL;
+}
+
+uint32_t hkvs_table_id(hkvs *c, hkvs_table *t) {
+ (void)c;
+ return hkvs_table__id(t);
+}
+
+size_t hkvs_table_key_size(hkvs *c, hkvs_table *t) {
+ (void)c;
+ return hkvs_table__key_size(t);
+}
+
+int hkvs_table_delete(hkvs *c, uint32_t id) {
+ hkvs_table **tables = c->tables;
+ size_t n_tables = c->n_tables;
+ for(size_t i = 0; i < n_tables; i++) {
+ hkvs_table *t = tables[i];
+ if(!t) continue;
+ if(hkvs_table_id(c, t) == id) {
+ tables[i] = NULL;
+
+ char *param = t->slab.param;
+ hkvs_io *io = c->io;
+
+ hkvs_table_free(t, io);
+
+ uint64_t fi = hkvs_read_u64(param + HKVS_PARAM_SLAB_FI);
+ uint64_t ix_fi = hkvs_read_u64(param + HKVS_PARAM_TABLE_IXFI);
+ memset(param, 0, HKVS_PSIZE_TABLE);
+
+ assert(fi && ix_fi);
+
+ io->unlink(io, ix_fi);
+ io->unlink(io, fi);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+hkvs_table *hkvs_table_open(hkvs *c, uint32_t id, size_t key_size) {
+ hkvs_table **tables = c->tables;
+ size_t n_tables = c->n_tables;
+ for(size_t i = 0; i < n_tables; i++) {
+ hkvs_table *t = tables[i];
+ if(!t) continue;
+ if(hkvs_table__id(t) == id) {
+ if(hkvs_table__key_size(t) != key_size) return NULL;
+ return t;
+ }
+ }
+ return NULL;
+}
+
+HKVS_INLINE size_t hkvs_elt_size(size_t key_size, size_t val_size_hint) {
+ size_t elt_size;
+
+ if(val_size_hint >= 16 && val_size_hint <= 248) {
+ elt_size = key_size + val_size_hint;
+ elt_size += (-elt_size) % 64;
+ if(elt_size - key_size <= 248) return elt_size;
+ }
+
+ elt_size = key_size + 16;
+ elt_size += (-elt_size) % 64;
+ return elt_size;
+}
+
+
+int hkvs_table_create(hkvs *c, hkvs_table **pt, uint32_t id, size_t key_size_z, size_t val_size_hint) {
+ hkvs_table **tables = c->tables;
+ size_t n_tables = c->n_tables;
+ for(size_t i = 0; i < n_tables; i++) {
+ hkvs_table *t = tables[i];
+ if(!t) continue;
+ if(hkvs_table__id(t) == id) {
+ if(hkvs_table__key_size(t) != key_size_z) return -EEXIST;
+ *pt = t;
+ return 0;
+ }
+ }
+
+ if(key_size_z > 255) return -EINVAL;
+ uint8_t key_size = (uint8_t)key_size_z;
+ size_t elt_size = hkvs_elt_size(key_size, val_size_hint);
+
+ size_t slot;
+ for(slot = 0; slot < n_tables; slot++) {
+ if(!tables[slot]) break;
+ }
+ if(slot == n_tables) {
+ if(c->alloc_tables == n_tables) {
+ size_t new_alloc = c->alloc_tables;
+ new_alloc += (new_alloc >> 2) + 1;
+ tables = realloc(tables, new_alloc * sizeof(hkvs_table*));
+ if(!tables) return -ENOMEM;
+ c->tables = tables;
+ c->alloc_tables = new_alloc;
+ }
+
+ c->tables[n_tables++] = NULL;
+ c->n_tables = n_tables;
+ int r = hkvs_resize_param(c);
+ if(r < 0) {
+ c->n_tables = --n_tables;
+ return r;
+ }
+ }
+
+ char *param = c->root_param + HKVS_POFF_TABLE + slot * HKVS_PSIZE_TABLE;
+ memset(param, 0, HKVS_PSIZE_TABLE);
+
+ hkvs_table *t = malloc(sizeof(hkvs_table));
+ if(!t) return -ENOMEM;
+
+ hkvs_write_u16(param + HKVS_PARAM_TABLE_ELTSZ, (uint16_t)elt_size);
+ hkvs_write_u8(param + HKVS_PARAM_TABLE_KEYSZ, key_size);
+ hkvs_write_u32(param + HKVS_PARAM_TABLE_ID, id);
+
+ hkvs_io *io = c->io;
+
+ int r = hkvs_slab_init(&t->slab, io, param, elt_size);
+ if(r < 0) goto fail_free;
+
+ r = hkvs_ix_build(t, io);
+ if(r < 0) goto fail_close;
+
+ c->tables[slot] = t;
+
+ *pt = t;
+ return 1;
+
+fail_close:
+ hkvs_slab_destroy(&t->slab, io);
+
+ uint64_t fi = hkvs_read_u64(param + HKVS_PARAM_SLAB_FI);
+ io->unlink(io, fi);
+
+fail_free:
+ free(t);
+ return r;
+}
diff --git a/lib/util.c b/lib/util.c
new file mode 100644
index 0000000..560671c
--- /dev/null
+++ b/lib/util.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#include "impl.h"
+
+int hkvs_record_set(hkvs *c, hkvs_table *t, hkvs_rid rid, const char *value, size_t size) {
+ char *ptr;
+ int r = hkvs_record_resize(c, t, rid, &ptr, size, 0);
+ if(r < 0) return r;
+ memcpy(ptr, value, size);
+ hkvs_put_records(c);
+ return 0;
+}
+
+int hkvs_record_update(hkvs *c, hkvs_table *t, hkvs_rid *prid, const char *key, const char *value, size_t size) {
+ hkvs_rid rid = hkvs_record_find(c, t, key);
+ if(hkvs_rid_ok(rid)) {
+ int r = hkvs_record_set(c, t, rid, value, size);
+ if(r < 0) return 0;
+ *prid = rid;
+ return 0;
+ } else {
+ char *ptr;
+ int r = hkvs_record_append(c, t, &rid, key, &ptr, size);
+ if(r < 0) return r;
+ memcpy(ptr, value, size);
+ hkvs_put_records(c);
+ *prid = rid;
+ return 1;
+ }
+}