summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBart Van Assche <bvanassche@acm.org>2019-07-01 14:42:32 -0700
committerJens Axboe <axboe@kernel.dk>2019-07-02 07:33:52 -0600
commitecefd7958eb32602df07f12e9808598b2c2de84b (patch)
tree789bcf30a838d99b02237568dae0b215cc13f944
parentbbb30995a0b4c9e3489aa5d66d1807425734b791 (diff)
Fix the use of memory barriers
Introduce the smp_load_acquire() and smp_store_release() macros. Fix synchronization in io_uring_cq_advance() and __io_uring_get_cqe(). Remove a superfluous local variable, if-test and write barrier from __io_uring_submit(). Remove a superfluous barrier from test/io_uring_enter.c. Cc: Stefan Hajnoczi <stefanha@redhat.com> Cc: Roman Penyaev <rpenyaev@suse.de> Signed-off-by: Bart Van Assche <bvanassche@acm.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--man/io_uring_setup.26
-rw-r--r--src/barrier.h87
-rw-r--r--src/liburing.h15
-rw-r--r--src/queue.c30
-rw-r--r--test/io_uring_enter.c8
5 files changed, 107 insertions, 39 deletions
diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
index ebaee2d..9ab0143 100644
--- a/man/io_uring_setup.2
+++ b/man/io_uring_setup.2
@@ -97,7 +97,11 @@ call with the following code sequence:
.in +4n
.EX
-read_barrier();
+/*
+ * Ensure that the wakeup flag is read after the tail pointer has been
+ * written.
+ */
+smp_mb();
if (*sq_ring->flags & IORING_SQ_NEED_WAKEUP)
io_uring_enter(fd, 0, 0, IORING_ENTER_SQ_WAKEUP);
.EE
diff --git a/src/barrier.h b/src/barrier.h
index ef00f67..e1a407f 100644
--- a/src/barrier.h
+++ b/src/barrier.h
@@ -1,16 +1,95 @@
#ifndef LIBURING_BARRIER_H
#define LIBURING_BARRIER_H
+/*
+From the kernel documentation file refcount-vs-atomic.rst:
+
+A RELEASE memory ordering guarantees that all prior loads and
+stores (all po-earlier instructions) on the same CPU are completed
+before the operation. It also guarantees that all po-earlier
+stores on the same CPU and all propagated stores from other CPUs
+must propagate to all other CPUs before the release operation
+(A-cumulative property). This is implemented using
+:c:func:`smp_store_release`.
+
+An ACQUIRE memory ordering guarantees that all post loads and
+stores (all po-later instructions) on the same CPU are
+completed after the acquire operation. It also guarantees that all
+po-later stores on the same CPU must propagate to all other CPUs
+after the acquire operation executes. This is implemented using
+:c:func:`smp_acquire__after_ctrl_dep`.
+*/
+
+/* From tools/include/linux/compiler.h */
+/* Optimization barrier */
+/* The "volatile" is due to gcc bugs */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+/* From tools/virtio/linux/compiler.h */
+#define WRITE_ONCE(var, val) \
+ (*((volatile typeof(val) *)(&(var))) = (val))
+#define READ_ONCE(var) (*((volatile typeof(var) *)(&(var))))
+
+
#if defined(__x86_64) || defined(__i386__)
-#define read_barrier() __asm__ __volatile__("":::"memory")
-#define write_barrier() __asm__ __volatile__("":::"memory")
+/* From tools/arch/x86/include/asm/barrier.h */
+#if defined(__i386__)
+/*
+ * Some non-Intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#elif defined(__x86_64__)
+#define mb() asm volatile("mfence" ::: "memory")
+#define rmb() asm volatile("lfence" ::: "memory")
+#define wmb() asm volatile("sfence" ::: "memory")
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
+#endif
+
+#if defined(__x86_64__)
+#define smp_store_release(p, v) \
+do { \
+ barrier(); \
+ WRITE_ONCE(*(p), (v)); \
+} while (0)
+
+#define smp_load_acquire(p) \
+({ \
+ typeof(*p) ___p1 = READ_ONCE(*(p)); \
+ barrier(); \
+ ___p1; \
+})
+#endif /* defined(__x86_64__) */
#else
/*
* Add arch appropriate definitions. Be safe and use full barriers for
* archs we don't have support for.
*/
-#define read_barrier() __sync_synchronize()
-#define write_barrier() __sync_synchronize()
+#define smp_rmb() __sync_synchronize()
+#define smp_wmb() __sync_synchronize()
+#endif
+
+/* From tools/include/asm/barrier.h */
+
+#ifndef smp_store_release
+# define smp_store_release(p, v) \
+do { \
+ smp_mb(); \
+ WRITE_ONCE(*p, v); \
+} while (0)
+#endif
+
+#ifndef smp_load_acquire
+# define smp_load_acquire(p) \
+({ \
+ typeof(*p) ___p1 = READ_ONCE(*p); \
+ smp_mb(); \
+ ___p1; \
+})
#endif
#endif
diff --git a/src/liburing.h b/src/liburing.h
index d3fcd15..a350a01 100644
--- a/src/liburing.h
+++ b/src/liburing.h
@@ -88,11 +88,10 @@ extern int io_uring_register_eventfd(struct io_uring *ring, int fd);
extern int io_uring_unregister_eventfd(struct io_uring *ring);
#define io_uring_for_each_cqe(ring, head, cqe) \
+ /* smp_load_acquire() enforces the order of tail and CQE reads. */ \
for (head = *(ring)->cq.khead; \
- /* See read_barrier() explanation in __io_uring_get_cqe() */ \
- ({read_barrier(); \
- cqe = (head != *(ring)->cq.ktail ? \
- &(ring)->cq.cqes[head & (*(ring)->cq.kring_mask)] : NULL);}); \
+ (cqe = (head != smp_load_acquire((ring)->cq.ktail) ? \
+ &(ring)->cq.cqes[head & (*(ring)->cq.kring_mask)] : NULL)); \
head++) \
@@ -105,13 +104,11 @@ static inline void io_uring_cq_advance(struct io_uring *ring,
if (nr) {
struct io_uring_cq *cq = &ring->cq;
- (*cq->khead) += nr;
-
/*
- * Ensure that the kernel sees our new head, the kernel has
- * the matching read barrier.
+ * Ensure that the kernel only sees the new value of the head
+ * index after the CQEs have been read.
*/
- write_barrier();
+ smp_store_release(cq->khead, *cq->khead + nr);
}
}
diff --git a/src/queue.c b/src/queue.c
index bec363f..72b2293 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -77,7 +77,7 @@ static int __io_uring_submit(struct io_uring *ring, unsigned wait_nr)
{
struct io_uring_sq *sq = &ring->sq;
const unsigned mask = *sq->kring_mask;
- unsigned ktail, ktail_next, submitted, to_submit;
+ unsigned ktail, submitted, to_submit;
unsigned flags;
int ret;
@@ -88,15 +88,11 @@ static int __io_uring_submit(struct io_uring *ring, unsigned wait_nr)
* Fill in sqes that we have queued up, adding them to the kernel ring
*/
submitted = 0;
- ktail = ktail_next = *sq->ktail;
+ ktail = *sq->ktail;
to_submit = sq->sqe_tail - sq->sqe_head;
while (to_submit--) {
- ktail_next++;
- read_barrier();
-
sq->array[ktail & mask] = sq->sqe_head & mask;
- ktail = ktail_next;
-
+ ktail++;
sq->sqe_head++;
submitted++;
}
@@ -104,21 +100,11 @@ static int __io_uring_submit(struct io_uring *ring, unsigned wait_nr)
if (!submitted)
return 0;
- if (*sq->ktail != ktail) {
- /*
- * First write barrier ensures that the SQE stores are updated
- * with the tail update. This is needed so that the kernel
- * will never see a tail update without the preceeding sQE
- * stores being done.
- */
- write_barrier();
- *sq->ktail = ktail;
- /*
- * The kernel has the matching read barrier for reading the
- * SQ tail.
- */
- write_barrier();
- }
+ /*
+ * Ensure that the kernel sees the SQE updates before it sees the tail
+ * update.
+ */
+ smp_store_release(sq->ktail, ktail);
flags = 0;
if (wait_nr || sq_ring_needs_enter(ring, &flags)) {
diff --git a/test/io_uring_enter.c b/test/io_uring_enter.c
index d6e407e..b25afd5 100644
--- a/test/io_uring_enter.c
+++ b/test/io_uring_enter.c
@@ -262,9 +262,11 @@ main(int argc, char **argv)
ktail = *sq->ktail;
sq->array[ktail & mask] = index;
++ktail;
- write_barrier();
- *sq->ktail = ktail;
- write_barrier();
+ /*
+ * Ensure that the kernel sees the SQE update before it sees the tail
+ * update.
+ */
+ smp_store_release(sq->ktail, ktail);
ret = io_uring_enter(ring.ring_fd, 1, 0, 0, NULL);
/* now check to see if our sqe was dropped */