From c31c7ec4bcd7bb0d7b28897d730431c02b9d4ea1 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 24 Jul 2019 09:24:50 +0100 Subject: src/Makefile: keep private headers in It is not possible to install barrier.h and compat.h into the top-level /usr/include directly since they are likely to conflict with other software. io_uring.h could be confused with the system's kernel header file. Put liburing headers into so there is no chance of conflicts or confusion. Existing applications continue to build successfully since the location of is unchanged. In-tree examples and tests require modification because src/liburing.h is moved to src/include/liburing.h. Signed-off-by: Stefan Hajnoczi Signed-off-by: Jens Axboe --- examples/Makefile | 2 +- examples/io_uring-cp.c | 2 +- examples/io_uring-test.c | 2 +- examples/link-cp.c | 2 +- liburing.spec | 3 +- src/Makefile | 14 +-- src/barrier.h | 87 ---------------- src/compat.h | 8 -- src/include/liburing.h | 221 ++++++++++++++++++++++++++++++++++++++++ src/include/liburing/barrier.h | 87 ++++++++++++++++ src/include/liburing/compat.h | 8 ++ src/include/liburing/io_uring.h | 146 ++++++++++++++++++++++++++ src/io_uring.h | 146 -------------------------- src/liburing.h | 221 ---------------------------------------- src/queue.c | 6 +- src/register.c | 4 +- src/setup.c | 4 +- src/syscall.c | 4 +- test/Makefile | 2 +- test/cq-full.c | 2 +- test/eeed8b54e0df-test.c | 2 +- test/fsync.c | 2 +- test/io_uring_enter.c | 4 +- test/io_uring_register.c | 2 +- test/io_uring_setup.c | 2 +- test/link.c | 2 +- test/nop.c | 2 +- test/poll-cancel.c | 2 +- test/poll.c | 2 +- test/ring-leak.c | 2 +- test/send_recvmsg.c | 2 +- test/sq-full.c | 2 +- 32 files changed, 499 insertions(+), 498 deletions(-) delete mode 100644 src/barrier.h delete mode 100644 src/compat.h create mode 100644 src/include/liburing.h create mode 100644 src/include/liburing/barrier.h create mode 100644 src/include/liburing/compat.h create mode 100644 src/include/liburing/io_uring.h delete mode 100644 src/io_uring.h delete mode 100644 src/liburing.h diff --git a/examples/Makefile b/examples/Makefile index ed73fcd..1539ecc 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -1,5 +1,5 @@ CFLAGS ?= -g -O2 -override CFLAGS += -Wall -D_GNU_SOURCE -L../src/ +override CFLAGS += -Wall -D_GNU_SOURCE -L../src/ -I../src/include/ all_targets += io_uring-test io_uring-cp link-cp diff --git a/examples/io_uring-cp.c b/examples/io_uring-cp.c index 97f61aa..adb7b29 100644 --- a/examples/io_uring-cp.c +++ b/examples/io_uring-cp.c @@ -12,7 +12,7 @@ #include #include #include -#include "../src/liburing.h" +#include "liburing.h" #define QD 64 #define BS (32*1024) diff --git a/examples/io_uring-test.c b/examples/io_uring-test.c index 0b975ad..4f5ebf6 100644 --- a/examples/io_uring-test.c +++ b/examples/io_uring-test.c @@ -9,7 +9,7 @@ #include #include #include -#include "../src/liburing.h" +#include "liburing.h" #define QD 4 diff --git a/examples/link-cp.c b/examples/link-cp.c index a4c02e5..af80a2e 100644 --- a/examples/link-cp.c +++ b/examples/link-cp.c @@ -13,7 +13,7 @@ #include #include #include -#include "../src/liburing.h" +#include "liburing.h" #define QD 64 #define BS (32*1024) diff --git a/liburing.spec b/liburing.spec index 189a16a..e577a8f 100644 --- a/liburing.spec +++ b/liburing.spec @@ -47,7 +47,8 @@ make install DESTDIR=$RPM_BUILD_ROOT %files devel %defattr(-,root,root) -%attr(0644,root,root) %{_includedir}/* +%attr(0755,root,root) %{_includedir}/liburing/* +%attr(0644,root,root) %{_includedir}/liburing.h %attr(0755,root,root) %{_libdir}/liburing.so %attr(0644,root,root) %{_libdir}/liburing.a %attr(0644,root,root) %{_libdir}/pkgconfig/* diff --git a/src/Makefile b/src/Makefile index aa93199..cbd3fda 100644 --- a/src/Makefile +++ b/src/Makefile @@ -3,7 +3,7 @@ includedir ?= $(prefix)/include libdir ?= $(prefix)/lib CFLAGS ?= -g -fomit-frame-pointer -O2 -override CFLAGS += -Wall -I. +override CFLAGS += -Wall -Iinclude/ SO_CFLAGS=-shared -fPIC $(CFLAGS) L_CFLAGS=$(CFLAGS) LINK_FLAGS= @@ -27,7 +27,7 @@ liburing_srcs := setup.c queue.c syscall.c register.c liburing_objs := $(patsubst %.c,%.ol,$(liburing_srcs)) liburing_sobjs := $(patsubst %.c,%.os,$(liburing_srcs)) -$(liburing_objs) $(liburing_sobjs): io_uring.h +$(liburing_objs) $(liburing_sobjs): include/liburing/io_uring.h %.os: %.c $(CC) $(SO_CFLAGS) -c -o $@ $< @@ -46,10 +46,10 @@ $(libname): $(liburing_sobjs) liburing.map $(CC) $(SO_CFLAGS) -Wl,--version-script=liburing.map -Wl,-soname=$(soname) -o $@ $(liburing_sobjs) $(LINK_FLAGS) install: $(all_targets) - install -D -m 644 io_uring.h $(includedir)/io_uring.h - install -D -m 644 liburing.h $(includedir)/liburing.h - install -D -m 644 compat.h $(includedir)/compat.h - install -D -m 644 barrier.h $(includedir)/barrier.h + install -D -m 644 include/liburing/io_uring.h $(includedir)/liburing/io_uring.h + install -D -m 644 include/liburing.h $(includedir)/liburing.h + install -D -m 644 include/liburing/compat.h $(includedir)/liburing/compat.h + install -D -m 644 include/liburing/barrier.h $(includedir)/liburing/barrier.h install -D -m 644 liburing.a $(libdir)/liburing.a ifeq ($(ENABLE_SHARED),1) install -D -m 755 $(libname) $(libdir)/$(libname) @@ -57,7 +57,7 @@ ifeq ($(ENABLE_SHARED),1) ln -sf $(libname) $(libdir)/liburing.so endif -$(liburing_objs): liburing.h +$(liburing_objs): include/liburing.h clean: rm -f $(all_targets) $(liburing_objs) $(liburing_sobjs) $(soname).new diff --git a/src/barrier.h b/src/barrier.h deleted file mode 100644 index 98be9e5..0000000 --- a/src/barrier.h +++ /dev/null @@ -1,87 +0,0 @@ -#ifndef LIBURING_BARRIER_H -#define LIBURING_BARRIER_H - -/* -From the kernel documentation file refcount-vs-atomic.rst: - -A RELEASE memory ordering guarantees that all prior loads and -stores (all po-earlier instructions) on the same CPU are completed -before the operation. It also guarantees that all po-earlier -stores on the same CPU and all propagated stores from other CPUs -must propagate to all other CPUs before the release operation -(A-cumulative property). This is implemented using -:c:func:`smp_store_release`. - -An ACQUIRE memory ordering guarantees that all post loads and -stores (all po-later instructions) on the same CPU are -completed after the acquire operation. It also guarantees that all -po-later stores on the same CPU must propagate to all other CPUs -after the acquire operation executes. This is implemented using -:c:func:`smp_acquire__after_ctrl_dep`. -*/ - -/* From tools/include/linux/compiler.h */ -/* Optimization barrier */ -/* The "volatile" is due to gcc bugs */ -#define barrier() __asm__ __volatile__("": : :"memory") - -/* From tools/virtio/linux/compiler.h */ -#define WRITE_ONCE(var, val) \ - (*((volatile __typeof(val) *)(&(var))) = (val)) -#define READ_ONCE(var) (*((volatile __typeof(var) *)(&(var)))) - - -#if defined(__x86_64__) || defined(__i386__) -/* Adapted from arch/x86/include/asm/barrier.h */ -#define mb() asm volatile("mfence" ::: "memory") -#define rmb() asm volatile("lfence" ::: "memory") -#define wmb() asm volatile("sfence" ::: "memory") -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#if defined(__i386__) -#define smp_mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory", "cc") -#else -#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") -#endif - -#define smp_store_release(p, v) \ -do { \ - barrier(); \ - WRITE_ONCE(*(p), (v)); \ -} while (0) - -#define smp_load_acquire(p) \ -({ \ - __typeof(*p) ___p1 = READ_ONCE(*(p)); \ - barrier(); \ - ___p1; \ -}) -#else /* defined(__x86_64__) || defined(__i386__) */ -/* - * Add arch appropriate definitions. Be safe and use full barriers for - * archs we don't have support for. - */ -#define smp_rmb() __sync_synchronize() -#define smp_wmb() __sync_synchronize() -#endif /* defined(__x86_64__) || defined(__i386__) */ - -/* From tools/include/asm/barrier.h */ - -#ifndef smp_store_release -# define smp_store_release(p, v) \ -do { \ - smp_mb(); \ - WRITE_ONCE(*p, v); \ -} while (0) -#endif - -#ifndef smp_load_acquire -# define smp_load_acquire(p) \ -({ \ - __typeof(*p) ___p1 = READ_ONCE(*p); \ - smp_mb(); \ - ___p1; \ -}) -#endif - -#endif /* defined(LIBURING_BARRIER_H) */ diff --git a/src/compat.h b/src/compat.h deleted file mode 100644 index d322499..0000000 --- a/src/compat.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef LIBURING_COMPAT_H -#define LIBURING_COMPAT_H - -#if !defined(CONFIG_HAVE_KERNEL_RWF_T) -typedef int __kernel_rwf_t; -#endif - -#endif diff --git a/src/include/liburing.h b/src/include/liburing.h new file mode 100644 index 0000000..fb78cd3 --- /dev/null +++ b/src/include/liburing.h @@ -0,0 +1,221 @@ +#ifndef LIB_URING_H +#define LIB_URING_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include "liburing/compat.h" +#include "liburing/io_uring.h" +#include "liburing/barrier.h" + +/* + * Library interface to io_uring + */ +struct io_uring_sq { + unsigned *khead; + unsigned *ktail; + unsigned *kring_mask; + unsigned *kring_entries; + unsigned *kflags; + unsigned *kdropped; + unsigned *array; + struct io_uring_sqe *sqes; + + unsigned sqe_head; + unsigned sqe_tail; + + size_t ring_sz; + void *ring_ptr; +}; + +struct io_uring_cq { + unsigned *khead; + unsigned *ktail; + unsigned *kring_mask; + unsigned *kring_entries; + unsigned *koverflow; + struct io_uring_cqe *cqes; + + size_t ring_sz; + void *ring_ptr; +}; + +struct io_uring { + struct io_uring_sq sq; + struct io_uring_cq cq; + unsigned flags; + int ring_fd; +}; + +/* + * System calls + */ +extern int io_uring_setup(unsigned entries, struct io_uring_params *p); +extern int io_uring_enter(unsigned fd, unsigned to_submit, + unsigned min_complete, unsigned flags, sigset_t *sig); +extern int io_uring_register(int fd, unsigned int opcode, const void *arg, + unsigned int nr_args); + +/* + * Library interface + */ +extern int io_uring_queue_init(unsigned entries, struct io_uring *ring, + unsigned flags); +extern int io_uring_queue_mmap(int fd, struct io_uring_params *p, + struct io_uring *ring); +extern void io_uring_queue_exit(struct io_uring *ring); +extern int io_uring_peek_cqe(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr); +extern int io_uring_wait_cqe(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr); +extern int io_uring_submit(struct io_uring *ring); +extern int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr); +extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring); + +extern int io_uring_register_buffers(struct io_uring *ring, + const struct iovec *iovecs, + unsigned nr_iovecs); +extern int io_uring_unregister_buffers(struct io_uring *ring); +extern int io_uring_register_files(struct io_uring *ring, const int *files, + unsigned nr_files); +extern int io_uring_unregister_files(struct io_uring *ring); +extern int io_uring_register_eventfd(struct io_uring *ring, int fd); +extern int io_uring_unregister_eventfd(struct io_uring *ring); + +#define io_uring_for_each_cqe(ring, head, cqe) \ + /* smp_load_acquire() enforces the order of tail and CQE reads. */ \ + for (head = *(ring)->cq.khead; \ + (cqe = (head != smp_load_acquire((ring)->cq.ktail) ? \ + &(ring)->cq.cqes[head & (*(ring)->cq.kring_mask)] : NULL)); \ + head++) \ + + +/* + * Must be called after io_uring_for_each_cqe() + */ +static inline void io_uring_cq_advance(struct io_uring *ring, + unsigned nr) +{ + if (nr) { + struct io_uring_cq *cq = &ring->cq; + + /* + * Ensure that the kernel only sees the new value of the head + * index after the CQEs have been read. + */ + smp_store_release(cq->khead, *cq->khead + nr); + } +} + +/* + * Must be called after io_uring_{peek,wait}_cqe() after the cqe has + * been processed by the application. + */ +static inline void io_uring_cqe_seen(struct io_uring *ring, + struct io_uring_cqe *cqe) +{ + if (cqe) + io_uring_cq_advance(ring, 1); +} + +/* + * Command prep helpers + */ +static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data) +{ + sqe->user_data = (unsigned long) data; +} + +static inline void *io_uring_cqe_get_data(struct io_uring_cqe *cqe) +{ + return (void *) (uintptr_t) cqe->user_data; +} + +static inline void io_uring_sqe_set_flags(struct io_uring_sqe *sqe, + unsigned flags) +{ + sqe->flags = flags; +} + +static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd, + const void *addr, unsigned len, + off_t offset) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = op; + sqe->fd = fd; + sqe->off = offset; + sqe->addr = (unsigned long) addr; + sqe->len = len; +} + +static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd, + const struct iovec *iovecs, + unsigned nr_vecs, off_t offset) +{ + io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset); +} + +static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd, + void *buf, unsigned nbytes, + off_t offset) +{ + io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset); +} + +static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd, + const struct iovec *iovecs, + unsigned nr_vecs, off_t offset) +{ + io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset); +} + +static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd, + const void *buf, unsigned nbytes, + off_t offset) +{ + io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset); +} + +static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd, + short poll_mask) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = IORING_OP_POLL_ADD; + sqe->fd = fd; + sqe->poll_events = poll_mask; +} + +static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe, + void *user_data) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = IORING_OP_POLL_REMOVE; + sqe->addr = (unsigned long) user_data; +} + +static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd, + unsigned fsync_flags) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = IORING_OP_FSYNC; + sqe->fd = fd; + sqe->fsync_flags = fsync_flags; +} + +static inline void io_uring_prep_nop(struct io_uring_sqe *sqe) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = IORING_OP_NOP; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/include/liburing/barrier.h b/src/include/liburing/barrier.h new file mode 100644 index 0000000..98be9e5 --- /dev/null +++ b/src/include/liburing/barrier.h @@ -0,0 +1,87 @@ +#ifndef LIBURING_BARRIER_H +#define LIBURING_BARRIER_H + +/* +From the kernel documentation file refcount-vs-atomic.rst: + +A RELEASE memory ordering guarantees that all prior loads and +stores (all po-earlier instructions) on the same CPU are completed +before the operation. It also guarantees that all po-earlier +stores on the same CPU and all propagated stores from other CPUs +must propagate to all other CPUs before the release operation +(A-cumulative property). This is implemented using +:c:func:`smp_store_release`. + +An ACQUIRE memory ordering guarantees that all post loads and +stores (all po-later instructions) on the same CPU are +completed after the acquire operation. It also guarantees that all +po-later stores on the same CPU must propagate to all other CPUs +after the acquire operation executes. This is implemented using +:c:func:`smp_acquire__after_ctrl_dep`. +*/ + +/* From tools/include/linux/compiler.h */ +/* Optimization barrier */ +/* The "volatile" is due to gcc bugs */ +#define barrier() __asm__ __volatile__("": : :"memory") + +/* From tools/virtio/linux/compiler.h */ +#define WRITE_ONCE(var, val) \ + (*((volatile __typeof(val) *)(&(var))) = (val)) +#define READ_ONCE(var) (*((volatile __typeof(var) *)(&(var)))) + + +#if defined(__x86_64__) || defined(__i386__) +/* Adapted from arch/x86/include/asm/barrier.h */ +#define mb() asm volatile("mfence" ::: "memory") +#define rmb() asm volatile("lfence" ::: "memory") +#define wmb() asm volatile("sfence" ::: "memory") +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#if defined(__i386__) +#define smp_mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory", "cc") +#else +#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") +#endif + +#define smp_store_release(p, v) \ +do { \ + barrier(); \ + WRITE_ONCE(*(p), (v)); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + __typeof(*p) ___p1 = READ_ONCE(*(p)); \ + barrier(); \ + ___p1; \ +}) +#else /* defined(__x86_64__) || defined(__i386__) */ +/* + * Add arch appropriate definitions. Be safe and use full barriers for + * archs we don't have support for. + */ +#define smp_rmb() __sync_synchronize() +#define smp_wmb() __sync_synchronize() +#endif /* defined(__x86_64__) || defined(__i386__) */ + +/* From tools/include/asm/barrier.h */ + +#ifndef smp_store_release +# define smp_store_release(p, v) \ +do { \ + smp_mb(); \ + WRITE_ONCE(*p, v); \ +} while (0) +#endif + +#ifndef smp_load_acquire +# define smp_load_acquire(p) \ +({ \ + __typeof(*p) ___p1 = READ_ONCE(*p); \ + smp_mb(); \ + ___p1; \ +}) +#endif + +#endif /* defined(LIBURING_BARRIER_H) */ diff --git a/src/include/liburing/compat.h b/src/include/liburing/compat.h new file mode 100644 index 0000000..d322499 --- /dev/null +++ b/src/include/liburing/compat.h @@ -0,0 +1,8 @@ +#ifndef LIBURING_COMPAT_H +#define LIBURING_COMPAT_H + +#if !defined(CONFIG_HAVE_KERNEL_RWF_T) +typedef int __kernel_rwf_t; +#endif + +#endif diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h new file mode 100644 index 0000000..a61c4a6 --- /dev/null +++ b/src/include/liburing/io_uring.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Header file for the io_uring interface. + * + * Copyright (C) 2019 Jens Axboe + * Copyright (C) 2019 Christoph Hellwig + */ +#ifndef LINUX_IO_URING_H +#define LINUX_IO_URING_H + +#include +#include + +/* + * IO submission data structure (Submission Queue Entry) + */ +struct io_uring_sqe { + __u8 opcode; /* type of operation for this sqe */ + __u8 flags; /* IOSQE_ flags */ + __u16 ioprio; /* ioprio for the request */ + __s32 fd; /* file descriptor to do IO on */ + __u64 off; /* offset into file */ + __u64 addr; /* pointer to buffer or iovecs */ + __u32 len; /* buffer size or number of iovecs */ + union { + __kernel_rwf_t rw_flags; + __u32 fsync_flags; + __u16 poll_events; + __u32 sync_range_flags; + __u32 msg_flags; + }; + __u64 user_data; /* data to be passed back at completion time */ + union { + __u16 buf_index; /* index into fixed buffers, if used */ + __u64 __pad2[3]; + }; +}; + +/* + * sqe->flags + */ +#define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */ +#define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */ +#define IOSQE_IO_LINK (1U << 2) /* next IO depends on this one */ + +/* + * io_uring_setup() flags + */ +#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */ +#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */ +#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ + +#define IORING_OP_NOP 0 +#define IORING_OP_READV 1 +#define IORING_OP_WRITEV 2 +#define IORING_OP_FSYNC 3 +#define IORING_OP_READ_FIXED 4 +#define IORING_OP_WRITE_FIXED 5 +#define IORING_OP_POLL_ADD 6 +#define IORING_OP_POLL_REMOVE 7 +#define IORING_OP_SYNC_FILE_RANGE 8 +#define IORING_OP_SENDMSG 9 +#define IORING_OP_RECVMSG 10 + +/* + * sqe->fsync_flags + */ +#define IORING_FSYNC_DATASYNC (1U << 0) + +/* + * IO completion data structure (Completion Queue Entry) + */ +struct io_uring_cqe { + __u64 user_data; /* sqe->data submission passed back */ + __s32 res; /* result code for this event */ + __u32 flags; +}; + +/* + * Magic offsets for the application to mmap the data it needs + */ +#define IORING_OFF_SQ_RING 0ULL +#define IORING_OFF_CQ_RING 0x8000000ULL +#define IORING_OFF_SQES 0x10000000ULL + +/* + * Filled with the offset for mmap(2) + */ +struct io_sqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 flags; + __u32 dropped; + __u32 array; + __u32 resv1; + __u64 resv2; +}; + +/* + * sq_ring->flags + */ +#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ + +struct io_cqring_offsets { + __u32 head; + __u32 tail; + __u32 ring_mask; + __u32 ring_entries; + __u32 overflow; + __u32 cqes; + __u64 resv[2]; +}; + +/* + * io_uring_enter(2) flags + */ +#define IORING_ENTER_GETEVENTS (1U << 0) +#define IORING_ENTER_SQ_WAKEUP (1U << 1) + +/* + * Passed in for io_uring_setup(2). Copied back with updated info on success + */ +struct io_uring_params { + __u32 sq_entries; + __u32 cq_entries; + __u32 flags; + __u32 sq_thread_cpu; + __u32 sq_thread_idle; + __u32 resv[5]; + struct io_sqring_offsets sq_off; + struct io_cqring_offsets cq_off; +}; + +/* + * io_uring_register(2) opcodes and arguments + */ +#define IORING_REGISTER_BUFFERS 0 +#define IORING_UNREGISTER_BUFFERS 1 +#define IORING_REGISTER_FILES 2 +#define IORING_UNREGISTER_FILES 3 +#define IORING_REGISTER_EVENTFD 4 +#define IORING_UNREGISTER_EVENTFD 5 + +#endif diff --git a/src/io_uring.h b/src/io_uring.h deleted file mode 100644 index a61c4a6..0000000 --- a/src/io_uring.h +++ /dev/null @@ -1,146 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Header file for the io_uring interface. - * - * Copyright (C) 2019 Jens Axboe - * Copyright (C) 2019 Christoph Hellwig - */ -#ifndef LINUX_IO_URING_H -#define LINUX_IO_URING_H - -#include -#include - -/* - * IO submission data structure (Submission Queue Entry) - */ -struct io_uring_sqe { - __u8 opcode; /* type of operation for this sqe */ - __u8 flags; /* IOSQE_ flags */ - __u16 ioprio; /* ioprio for the request */ - __s32 fd; /* file descriptor to do IO on */ - __u64 off; /* offset into file */ - __u64 addr; /* pointer to buffer or iovecs */ - __u32 len; /* buffer size or number of iovecs */ - union { - __kernel_rwf_t rw_flags; - __u32 fsync_flags; - __u16 poll_events; - __u32 sync_range_flags; - __u32 msg_flags; - }; - __u64 user_data; /* data to be passed back at completion time */ - union { - __u16 buf_index; /* index into fixed buffers, if used */ - __u64 __pad2[3]; - }; -}; - -/* - * sqe->flags - */ -#define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */ -#define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */ -#define IOSQE_IO_LINK (1U << 2) /* next IO depends on this one */ - -/* - * io_uring_setup() flags - */ -#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */ -#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */ -#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ - -#define IORING_OP_NOP 0 -#define IORING_OP_READV 1 -#define IORING_OP_WRITEV 2 -#define IORING_OP_FSYNC 3 -#define IORING_OP_READ_FIXED 4 -#define IORING_OP_WRITE_FIXED 5 -#define IORING_OP_POLL_ADD 6 -#define IORING_OP_POLL_REMOVE 7 -#define IORING_OP_SYNC_FILE_RANGE 8 -#define IORING_OP_SENDMSG 9 -#define IORING_OP_RECVMSG 10 - -/* - * sqe->fsync_flags - */ -#define IORING_FSYNC_DATASYNC (1U << 0) - -/* - * IO completion data structure (Completion Queue Entry) - */ -struct io_uring_cqe { - __u64 user_data; /* sqe->data submission passed back */ - __s32 res; /* result code for this event */ - __u32 flags; -}; - -/* - * Magic offsets for the application to mmap the data it needs - */ -#define IORING_OFF_SQ_RING 0ULL -#define IORING_OFF_CQ_RING 0x8000000ULL -#define IORING_OFF_SQES 0x10000000ULL - -/* - * Filled with the offset for mmap(2) - */ -struct io_sqring_offsets { - __u32 head; - __u32 tail; - __u32 ring_mask; - __u32 ring_entries; - __u32 flags; - __u32 dropped; - __u32 array; - __u32 resv1; - __u64 resv2; -}; - -/* - * sq_ring->flags - */ -#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ - -struct io_cqring_offsets { - __u32 head; - __u32 tail; - __u32 ring_mask; - __u32 ring_entries; - __u32 overflow; - __u32 cqes; - __u64 resv[2]; -}; - -/* - * io_uring_enter(2) flags - */ -#define IORING_ENTER_GETEVENTS (1U << 0) -#define IORING_ENTER_SQ_WAKEUP (1U << 1) - -/* - * Passed in for io_uring_setup(2). Copied back with updated info on success - */ -struct io_uring_params { - __u32 sq_entries; - __u32 cq_entries; - __u32 flags; - __u32 sq_thread_cpu; - __u32 sq_thread_idle; - __u32 resv[5]; - struct io_sqring_offsets sq_off; - struct io_cqring_offsets cq_off; -}; - -/* - * io_uring_register(2) opcodes and arguments - */ -#define IORING_REGISTER_BUFFERS 0 -#define IORING_UNREGISTER_BUFFERS 1 -#define IORING_REGISTER_FILES 2 -#define IORING_UNREGISTER_FILES 3 -#define IORING_REGISTER_EVENTFD 4 -#define IORING_UNREGISTER_EVENTFD 5 - -#endif diff --git a/src/liburing.h b/src/liburing.h deleted file mode 100644 index a350a01..0000000 --- a/src/liburing.h +++ /dev/null @@ -1,221 +0,0 @@ -#ifndef LIB_URING_H -#define LIB_URING_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include -#include -#include "compat.h" -#include "io_uring.h" -#include "barrier.h" - -/* - * Library interface to io_uring - */ -struct io_uring_sq { - unsigned *khead; - unsigned *ktail; - unsigned *kring_mask; - unsigned *kring_entries; - unsigned *kflags; - unsigned *kdropped; - unsigned *array; - struct io_uring_sqe *sqes; - - unsigned sqe_head; - unsigned sqe_tail; - - size_t ring_sz; - void *ring_ptr; -}; - -struct io_uring_cq { - unsigned *khead; - unsigned *ktail; - unsigned *kring_mask; - unsigned *kring_entries; - unsigned *koverflow; - struct io_uring_cqe *cqes; - - size_t ring_sz; - void *ring_ptr; -}; - -struct io_uring { - struct io_uring_sq sq; - struct io_uring_cq cq; - unsigned flags; - int ring_fd; -}; - -/* - * System calls - */ -extern int io_uring_setup(unsigned entries, struct io_uring_params *p); -extern int io_uring_enter(unsigned fd, unsigned to_submit, - unsigned min_complete, unsigned flags, sigset_t *sig); -extern int io_uring_register(int fd, unsigned int opcode, const void *arg, - unsigned int nr_args); - -/* - * Library interface - */ -extern int io_uring_queue_init(unsigned entries, struct io_uring *ring, - unsigned flags); -extern int io_uring_queue_mmap(int fd, struct io_uring_params *p, - struct io_uring *ring); -extern void io_uring_queue_exit(struct io_uring *ring); -extern int io_uring_peek_cqe(struct io_uring *ring, - struct io_uring_cqe **cqe_ptr); -extern int io_uring_wait_cqe(struct io_uring *ring, - struct io_uring_cqe **cqe_ptr); -extern int io_uring_submit(struct io_uring *ring); -extern int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr); -extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring); - -extern int io_uring_register_buffers(struct io_uring *ring, - const struct iovec *iovecs, - unsigned nr_iovecs); -extern int io_uring_unregister_buffers(struct io_uring *ring); -extern int io_uring_register_files(struct io_uring *ring, const int *files, - unsigned nr_files); -extern int io_uring_unregister_files(struct io_uring *ring); -extern int io_uring_register_eventfd(struct io_uring *ring, int fd); -extern int io_uring_unregister_eventfd(struct io_uring *ring); - -#define io_uring_for_each_cqe(ring, head, cqe) \ - /* smp_load_acquire() enforces the order of tail and CQE reads. */ \ - for (head = *(ring)->cq.khead; \ - (cqe = (head != smp_load_acquire((ring)->cq.ktail) ? \ - &(ring)->cq.cqes[head & (*(ring)->cq.kring_mask)] : NULL)); \ - head++) \ - - -/* - * Must be called after io_uring_for_each_cqe() - */ -static inline void io_uring_cq_advance(struct io_uring *ring, - unsigned nr) -{ - if (nr) { - struct io_uring_cq *cq = &ring->cq; - - /* - * Ensure that the kernel only sees the new value of the head - * index after the CQEs have been read. - */ - smp_store_release(cq->khead, *cq->khead + nr); - } -} - -/* - * Must be called after io_uring_{peek,wait}_cqe() after the cqe has - * been processed by the application. - */ -static inline void io_uring_cqe_seen(struct io_uring *ring, - struct io_uring_cqe *cqe) -{ - if (cqe) - io_uring_cq_advance(ring, 1); -} - -/* - * Command prep helpers - */ -static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data) -{ - sqe->user_data = (unsigned long) data; -} - -static inline void *io_uring_cqe_get_data(struct io_uring_cqe *cqe) -{ - return (void *) (uintptr_t) cqe->user_data; -} - -static inline void io_uring_sqe_set_flags(struct io_uring_sqe *sqe, - unsigned flags) -{ - sqe->flags = flags; -} - -static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd, - const void *addr, unsigned len, - off_t offset) -{ - memset(sqe, 0, sizeof(*sqe)); - sqe->opcode = op; - sqe->fd = fd; - sqe->off = offset; - sqe->addr = (unsigned long) addr; - sqe->len = len; -} - -static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd, - const struct iovec *iovecs, - unsigned nr_vecs, off_t offset) -{ - io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset); -} - -static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd, - void *buf, unsigned nbytes, - off_t offset) -{ - io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset); -} - -static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd, - const struct iovec *iovecs, - unsigned nr_vecs, off_t offset) -{ - io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset); -} - -static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd, - const void *buf, unsigned nbytes, - off_t offset) -{ - io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset); -} - -static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd, - short poll_mask) -{ - memset(sqe, 0, sizeof(*sqe)); - sqe->opcode = IORING_OP_POLL_ADD; - sqe->fd = fd; - sqe->poll_events = poll_mask; -} - -static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe, - void *user_data) -{ - memset(sqe, 0, sizeof(*sqe)); - sqe->opcode = IORING_OP_POLL_REMOVE; - sqe->addr = (unsigned long) user_data; -} - -static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd, - unsigned fsync_flags) -{ - memset(sqe, 0, sizeof(*sqe)); - sqe->opcode = IORING_OP_FSYNC; - sqe->fd = fd; - sqe->fsync_flags = fsync_flags; -} - -static inline void io_uring_prep_nop(struct io_uring_sqe *sqe) -{ - memset(sqe, 0, sizeof(*sqe)); - sqe->opcode = IORING_OP_NOP; -} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/queue.c b/src/queue.c index 72b2293..74a077f 100644 --- a/src/queue.c +++ b/src/queue.c @@ -6,10 +6,10 @@ #include #include -#include "compat.h" -#include "io_uring.h" +#include "liburing/compat.h" +#include "liburing/io_uring.h" #include "liburing.h" -#include "barrier.h" +#include "liburing/barrier.h" static int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, int wait) diff --git a/src/register.c b/src/register.c index 7561575..f5fc196 100644 --- a/src/register.c +++ b/src/register.c @@ -5,8 +5,8 @@ #include #include -#include "compat.h" -#include "io_uring.h" +#include "liburing/compat.h" +#include "liburing/io_uring.h" #include "liburing.h" int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs, diff --git a/src/setup.c b/src/setup.c index 343a317..47b0deb 100644 --- a/src/setup.c +++ b/src/setup.c @@ -5,8 +5,8 @@ #include #include -#include "compat.h" -#include "io_uring.h" +#include "liburing/compat.h" +#include "liburing/io_uring.h" #include "liburing.h" static int io_uring_mmap(int fd, struct io_uring_params *p, diff --git a/src/syscall.c b/src/syscall.c index d0c58cf..3fd8713 100644 --- a/src/syscall.c +++ b/src/syscall.c @@ -5,8 +5,8 @@ #include #include #include -#include "compat.h" -#include "io_uring.h" +#include "liburing/compat.h" +#include "liburing/io_uring.h" #ifdef __alpha__ /* diff --git a/test/Makefile b/test/Makefile index 98f863c..4d056f8 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,5 +1,5 @@ CFLAGS ?= -g -O2 -override CFLAGS += -Wall -D_GNU_SOURCE -L../src/ +override CFLAGS += -Wall -D_GNU_SOURCE -L../src/ -I../src/include/ all_targets += poll poll-cancel ring-leak fsync io_uring_setup io_uring_register \ io_uring_enter nop sq-full cq-full 35fa71a030ca-test \ diff --git a/test/cq-full.c b/test/cq-full.c index 82c5a65..25fa42c 100644 --- a/test/cq-full.c +++ b/test/cq-full.c @@ -9,7 +9,7 @@ #include #include -#include "../src/liburing.h" +#include "liburing.h" static int queue_n_nops(struct io_uring *ring, int n) { diff --git a/test/eeed8b54e0df-test.c b/test/eeed8b54e0df-test.c index 9083d3e..84237d5 100644 --- a/test/eeed8b54e0df-test.c +++ b/test/eeed8b54e0df-test.c @@ -9,7 +9,7 @@ #include #include -#include "../src/liburing.h" +#include "liburing.h" #define BLOCK 4096 diff --git a/test/fsync.c b/test/fsync.c index 44264f4..e6e0898 100644 --- a/test/fsync.c +++ b/test/fsync.c @@ -9,7 +9,7 @@ #include #include -#include "../src/liburing.h" +#include "liburing.h" static int test_single_fsync(struct io_uring *ring) { diff --git a/test/io_uring_enter.c b/test/io_uring_enter.c index b25afd5..c2030c1 100644 --- a/test/io_uring_enter.c +++ b/test/io_uring_enter.c @@ -22,8 +22,8 @@ #include #include #include -#include "../src/liburing.h" -#include "../src/barrier.h" +#include "liburing.h" +#include "liburing/barrier.h" #define IORING_MAX_ENTRIES 4096 diff --git a/test/io_uring_register.c b/test/io_uring_register.c index 32e5217..59c8a86 100644 --- a/test/io_uring_register.c +++ b/test/io_uring_register.c @@ -21,7 +21,7 @@ #include #include #include -#include "../src/liburing.h" +#include "liburing.h" static int pagesize; static rlim_t mlock_limit; diff --git a/test/io_uring_setup.c b/test/io_uring_setup.c index 09e16e5..2dd3763 100644 --- a/test/io_uring_setup.c +++ b/test/io_uring_setup.c @@ -13,7 +13,7 @@ #include #include #include -#include "../src/liburing.h" +#include "liburing.h" /* * Attempt the call with the given args. Return 0 when expect matches diff --git a/test/link.c b/test/link.c index e7ca3e3..603b507 100644 --- a/test/link.c +++ b/test/link.c @@ -9,7 +9,7 @@ #include #include -#include "../src/liburing.h" +#include "liburing.h" /* * Test failing head of chain, and dependent getting -ECANCELED diff --git a/test/nop.c b/test/nop.c index 8e6bfb0..1373695 100644 --- a/test/nop.c +++ b/test/nop.c @@ -9,7 +9,7 @@ #include #include -#include "../src/liburing.h" +#include "liburing.h" static int test_single_nop(struct io_uring *ring) { diff --git a/test/poll-cancel.c b/test/poll-cancel.c index 19efc5f..4761569 100644 --- a/test/poll-cancel.c +++ b/test/poll-cancel.c @@ -12,7 +12,7 @@ #include #include -#include "../src/liburing.h" +#include "liburing.h" struct poll_data { unsigned is_poll; diff --git a/test/poll.c b/test/poll.c index d22d9c5..ed424fc 100644 --- a/test/poll.c +++ b/test/poll.c @@ -11,7 +11,7 @@ #include #include -#include "../src/liburing.h" +#include "liburing.h" static void sig_alrm(int sig) { diff --git a/test/ring-leak.c b/test/ring-leak.c index 99466e4..02b06f9 100644 --- a/test/ring-leak.c +++ b/test/ring-leak.c @@ -21,7 +21,7 @@ #include #include -#include "../src/liburing.h" +#include "liburing.h" static int __io_uring_register_files(int ring_fd, int fd1, int fd2) { diff --git a/test/send_recvmsg.c b/test/send_recvmsg.c index 9187906..ada6559 100644 --- a/test/send_recvmsg.c +++ b/test/send_recvmsg.c @@ -9,7 +9,7 @@ #include #include -#include "../src/liburing.h" +#include "liburing.h" static char str[] = "This is a test of sendmsg and recvmsg over io_uring!"; diff --git a/test/sq-full.c b/test/sq-full.c index 5bf7f72..3fbe0a5 100644 --- a/test/sq-full.c +++ b/test/sq-full.c @@ -9,7 +9,7 @@ #include #include -#include "../src/liburing.h" +#include "liburing.h" int main(int argc, char *argv[]) { -- cgit