diff options
Diffstat (limited to 'src/include')
-rw-r--r-- | src/include/liburing.h | 221 | ||||
-rw-r--r-- | src/include/liburing/barrier.h | 87 | ||||
-rw-r--r-- | src/include/liburing/compat.h | 8 | ||||
-rw-r--r-- | src/include/liburing/io_uring.h | 146 |
4 files changed, 462 insertions, 0 deletions
diff --git a/src/include/liburing.h b/src/include/liburing.h new file mode 100644 index 0000000..fb78cd3 --- /dev/null +++ b/src/include/liburing.h @@ -0,0 +1,221 @@ +#ifndef LIB_URING_H +#define LIB_URING_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/uio.h> +#include <signal.h> +#include <string.h> +#include <inttypes.h> +#include "liburing/compat.h" +#include "liburing/io_uring.h" +#include "liburing/barrier.h" + +/* + * Library interface to io_uring + */ +struct io_uring_sq { + unsigned *khead; + unsigned *ktail; + unsigned *kring_mask; + unsigned *kring_entries; + unsigned *kflags; + unsigned *kdropped; + unsigned *array; + struct io_uring_sqe *sqes; + + unsigned sqe_head; + unsigned sqe_tail; + + size_t ring_sz; + void *ring_ptr; +}; + +struct io_uring_cq { + unsigned *khead; + unsigned *ktail; + unsigned *kring_mask; + unsigned *kring_entries; + unsigned *koverflow; + struct io_uring_cqe *cqes; + + size_t ring_sz; + void *ring_ptr; +}; + +struct io_uring { + struct io_uring_sq sq; + struct io_uring_cq cq; + unsigned flags; + int ring_fd; +}; + +/* + * System calls + */ +extern int io_uring_setup(unsigned entries, struct io_uring_params *p); +extern int io_uring_enter(unsigned fd, unsigned to_submit, + unsigned min_complete, unsigned flags, sigset_t *sig); +extern int io_uring_register(int fd, unsigned int opcode, const void *arg, + unsigned int nr_args); + +/* + * Library interface + */ +extern int io_uring_queue_init(unsigned entries, struct io_uring *ring, + unsigned flags); +extern int io_uring_queue_mmap(int fd, struct io_uring_params *p, + struct io_uring *ring); +extern void io_uring_queue_exit(struct io_uring *ring); +extern int io_uring_peek_cqe(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr); +extern int io_uring_wait_cqe(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr); +extern int io_uring_submit(struct io_uring *ring); +extern int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr); +extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring); + +extern int io_uring_register_buffers(struct io_uring *ring, + const struct iovec *iovecs, + unsigned nr_iovecs); +extern int io_uring_unregister_buffers(struct io_uring *ring); +extern int io_uring_register_files(struct io_uring *ring, const int *files, + unsigned nr_files); +extern int io_uring_unregister_files(struct io_uring *ring); +extern int io_uring_register_eventfd(struct io_uring *ring, int fd); +extern int io_uring_unregister_eventfd(struct io_uring *ring); + +#define io_uring_for_each_cqe(ring, head, cqe) \ + /* smp_load_acquire() enforces the order of tail and CQE reads. */ \ + for (head = *(ring)->cq.khead; \ + (cqe = (head != smp_load_acquire((ring)->cq.ktail) ? \ + &(ring)->cq.cqes[head & (*(ring)->cq.kring_mask)] : NULL)); \ + head++) \ + + +/* + * Must be called after io_uring_for_each_cqe() + */ +static inline void io_uring_cq_advance(struct io_uring *ring, + unsigned nr) +{ + if (nr) { + struct io_uring_cq *cq = &ring->cq; + + /* + * Ensure that the kernel only sees the new value of the head + * index after the CQEs have been read. + */ + smp_store_release(cq->khead, *cq->khead + nr); + } +} + +/* + * Must be called after io_uring_{peek,wait}_cqe() after the cqe has + * been processed by the application. + */ +static inline void io_uring_cqe_seen(struct io_uring *ring, + struct io_uring_cqe *cqe) +{ + if (cqe) + io_uring_cq_advance(ring, 1); +} + +/* + * Command prep helpers + */ +static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data) +{ + sqe->user_data = (unsigned long) data; +} + +static inline void *io_uring_cqe_get_data(struct io_uring_cqe *cqe) +{ + return (void *) (uintptr_t) cqe->user_data; +} + +static inline void io_uring_sqe_set_flags(struct io_uring_sqe *sqe, + unsigned flags) +{ + sqe->flags = flags; +} + +static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd, + const void *addr, unsigned len, + off_t offset) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = op; + sqe->fd = fd; + sqe->off = offset; + sqe->addr = (unsigned long) addr; + sqe->len = len; +} + +static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd, + const struct iovec *iovecs, + unsigned nr_vecs, off_t offset) +{ + io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset); +} + +static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd, + void *buf, unsigned nbytes, + off_t offset) +{ + io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset); +} + +static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd, + const struct iovec *iovecs, + unsigned nr_vecs, off_t offset) +{ + io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset); +} + +static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd, + const void *buf, unsigned nbytes, + off_t offset) +{ + io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset); +} + +static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd, + short poll_mask) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = IORING_OP_POLL_ADD; + sqe->fd = fd; + sqe->poll_events = poll_mask; +} + +static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe, + void *user_data) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = IORING_OP_POLL_REMOVE; + sqe->addr = (unsigned long) user_data; +} + +static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd, + unsigned fsync_flags) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = IORING_OP_FSYNC; + sqe->fd = fd; + sqe->fsync_flags = fsync_flags; +} + +static inline void io_uring_prep_nop(struct io_uring_sqe *sqe) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = IORING_OP_NOP; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/include/liburing/barrier.h b/src/include/liburing/barrier.h new file mode 100644 index 0000000..98be9e5 --- /dev/null +++ b/src/include/liburing/barrier.h @@ -0,0 +1,87 @@ +#ifndef LIBURING_BARRIER_H +#define LIBURING_BARRIER_H + +/* +From the kernel documentation file refcount-vs-atomic.rst: + +A RELEASE memory ordering guarantees that all prior loads and +stores (all po-earlier instructions) on the same CPU are completed +before the operation. It also guarantees that all po-earlier +stores on the same CPU and all propagated stores from other CPUs +must propagate to all other CPUs before the release operation +(A-cumulative property). This is implemented using +:c:func:`smp_store_release`. + +An ACQUIRE memory ordering guarantees that all post loads and +stores (all po-later instructions) on the same CPU are +completed after the acquire operation. It also guarantees that all +po-later stores on the same CPU must propagate to all other CPUs +after the acquire operation executes. This is implemented using +:c:func:`smp_acquire__after_ctrl_dep`. +*/ + +/* From tools/include/linux/compiler.h */ +/* Optimization barrier */ +/* The "volatile" is due to gcc bugs */ +#define barrier() __asm__ __volatile__("": : :"memory") + +/* From tools/virtio/linux/compiler.h */ +#define WRITE_ONCE(var, val) \ + (*((volatile __typeof(val) *)(&(var))) = (val)) +#define READ_ONCE(var) (*((volatile __typeof(var) *)(&(var)))) + + +#if defined(__x86_64__) || defined(__i386__) +/* Adapted from arch/x86/include/asm/barrier.h */ +#define mb() asm volatile("mfence" ::: "memory") +#define rmb() asm volatile("lfence" ::: "memory") +#define wmb() asm volatile("sfence" ::: "memory") +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#if defined(__i386__) +#define smp_mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory", "cc") +#else +#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") +#endif + +#define smp_store_release(p, v) \ +do { \ + barrier(); \ + WRITE_ONCE(*(p), (v)); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + __typeof(*p) ___p1 = READ_ONCE(*(p)); \ + barrier(); \ + ___p1; \ +}) +#else /* defined(__x86_64__) || defined(__i386__) */ +/* + * Add arch appropriate definitions. Be safe and use full barriers for + * archs we don't have support for. + */ +#define smp_rmb() __sync_synchronize() +#define smp_wmb() __sync_synchronize() +#endif /* defined(__x86_64__) || defined(__i386__) */ + +/* From tools/include/asm/barrier.h */ + +#ifndef smp_store_release +# define smp_store_release(p, v) \ +do { \ + smp_mb(); \ + WRITE_ONCE(*p, v); \ +} while (0) +#endif + +#ifndef smp_load_acquire +# define smp_load_acquire(p) \ +({ \ + __typeof(*p) ___p1 = READ_ONCE(*p); \ + smp_mb(); \ + ___p1; \ +}) +#endif + +#endif /* defined(LIBURING_BARRIER_H) */ diff --git a/src/include/liburing/compat.h b/src/include/liburing/compat.h new file mode 100644 index 0000000..d322499 --- /dev/null +++ b/src/include/liburing/compat.h @@ -0,0 +1,8 @@ +#ifndef LIBURING_COMPAT_H +#define LIBURING_COMPAT_H + +#if !defined(CONFIG_HAVE_KERNEL_RWF_T) +typedef int __kernel_rwf_t; +#endif + +#endif diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h new file mode 100644 index 0000000..a61c4a6 --- /dev/null +++ b/src/include/liburing/io_uring.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Header file for the io_uring interface. + * + * Copyright (C) 2019 Jens Axboe + * Copyright (C) 2019 Christoph Hellwig + */ +#ifndef LINUX_IO_URING_H +#define LINUX_IO_URING_H + +#include <linux/fs.h> +#include <linux/types.h> + +/* + * IO submission data structure (Submission Queue Entry) + */ +struct io_uring_sqe { + __u8 opcode; /* type of operation for this sqe */ + __u8 flags; /* IOSQE_ flags */ + __u16 ioprio; /* ioprio for the request */ + __s32 fd; /* file descriptor to do IO on */ + __u64 off; /* offset into file */ + __u64 addr; /* pointer to buffer or iovecs */ + __u32 len; /* buffer size or number of iovecs */ + union { + __kernel_rwf_t rw_flags; + __u32 fsync_flags; + __u16 poll_events; + __u32 sync_range_flags; + __u32 msg_flags; + }; + __u64 user_data; /* data to be passed back at completion time */ + union { + __u16 buf_index; /* index into fixed buffers, if used */ + __u64 __pad2[3]; + }; +}; + +/* + * sqe->flags + */ +#define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */ +#define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */ +#define IOSQE_IO_LINK (1U << 2) /* next IO depends on this one */ + +/* + * io_uring_setup() flags + */ +#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */ +#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */ +#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ + +#define IORING_OP_NOP 0 +#define IORING_OP_READV 1 +#define IORING_OP_WRITEV 2 +#define IORING_OP_FSYNC 3 +#define IORING_OP_READ_FIXED 4 +#define IORING_OP_WRITE_FIXED 5 +#define IORING_OP_POLL_ADD 6 +#define IORING_OP_POLL_REMOVE 7 +#define IORING_OP_SYNC_FILE_RANGE 8 +#define IORING_OP_SENDMSG 9 +#define IORING_OP_RECVMSG 10 + +/* + * sqe->fsync_flags + */ +#define IORING_FSYNC_DATASYNC (1U << 0) + +/* + * IO completion data structure (Completion Queue Entry) + */ +struct io_uring_cqe { + __u64 user_data; /* sqe->data submission passed back */ + __s32 res; /* result code for this event */ + __u32 flags; +}; + +/* + * Magic offsets for the application to mmap the data it needs + */ +#define IORING_OFF_SQ_RING 0ULL +#define IORING_OFF_CQ_RING 0x8000000ULL +#define IORING_OFF_SQES 0x10000000ULL + +/* + * Filled with the offset for mmap(2) + */ +struct io_sqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 flags; + __u32 dropped; + __u32 array; + __u32 resv1; + __u64 resv2; +}; + +/* + * sq_ring->flags + */ +#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ + +struct io_cqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 overflow; + __u32 cqes; + __u64 resv[2]; +}; + +/* + * io_uring_enter(2) flags + */ +#define IORING_ENTER_GETEVENTS (1U << 0) +#define IORING_ENTER_SQ_WAKEUP (1U << 1) + +/* + * Passed in for io_uring_setup(2). Copied back with updated info on success + */ +struct io_uring_params { + __u32 sq_entries; + __u32 cq_entries; + __u32 flags; + __u32 sq_thread_cpu; + __u32 sq_thread_idle; + __u32 resv[5]; + struct io_sqring_offsets sq_off; + struct io_cqring_offsets cq_off; +}; + +/* + * io_uring_register(2) opcodes and arguments + */ +#define IORING_REGISTER_BUFFERS 0 +#define IORING_UNREGISTER_BUFFERS 1 +#define IORING_REGISTER_FILES 2 +#define IORING_UNREGISTER_FILES 3 +#define IORING_REGISTER_EVENTFD 4 +#define IORING_UNREGISTER_EVENTFD 5 + +#endif |