summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/barrier.h87
-rw-r--r--src/liburing.h15
-rw-r--r--src/queue.c30
3 files changed, 97 insertions, 35 deletions
diff --git a/src/barrier.h b/src/barrier.h
index ef00f67..e1a407f 100644
--- a/src/barrier.h
+++ b/src/barrier.h
@@ -1,16 +1,95 @@
#ifndef LIBURING_BARRIER_H
#define LIBURING_BARRIER_H
+/*
+From the kernel documentation file refcount-vs-atomic.rst:
+
+A RELEASE memory ordering guarantees that all prior loads and
+stores (all po-earlier instructions) on the same CPU are completed
+before the operation. It also guarantees that all po-earlier
+stores on the same CPU and all propagated stores from other CPUs
+must propagate to all other CPUs before the release operation
+(A-cumulative property). This is implemented using
+:c:func:`smp_store_release`.
+
+An ACQUIRE memory ordering guarantees that all post loads and
+stores (all po-later instructions) on the same CPU are
+completed after the acquire operation. It also guarantees that all
+po-later stores on the same CPU must propagate to all other CPUs
+after the acquire operation executes. This is implemented using
+:c:func:`smp_acquire__after_ctrl_dep`.
+*/
+
+/* From tools/include/linux/compiler.h */
+/* Optimization barrier */
+/* The "volatile" is due to gcc bugs */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+/* From tools/virtio/linux/compiler.h */
+#define WRITE_ONCE(var, val) \
+ (*((volatile typeof(val) *)(&(var))) = (val))
+#define READ_ONCE(var) (*((volatile typeof(var) *)(&(var))))
+
+
#if defined(__x86_64) || defined(__i386__)
-#define read_barrier() __asm__ __volatile__("":::"memory")
-#define write_barrier() __asm__ __volatile__("":::"memory")
+/* From tools/arch/x86/include/asm/barrier.h */
+#if defined(__i386__)
+/*
+ * Some non-Intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#elif defined(__x86_64__)
+#define mb() asm volatile("mfence" ::: "memory")
+#define rmb() asm volatile("lfence" ::: "memory")
+#define wmb() asm volatile("sfence" ::: "memory")
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
+#endif
+
+#if defined(__x86_64__)
+#define smp_store_release(p, v) \
+do { \
+ barrier(); \
+ WRITE_ONCE(*(p), (v)); \
+} while (0)
+
+#define smp_load_acquire(p) \
+({ \
+ typeof(*p) ___p1 = READ_ONCE(*(p)); \
+ barrier(); \
+ ___p1; \
+})
+#endif /* defined(__x86_64__) */
#else
/*
* Add arch appropriate definitions. Be safe and use full barriers for
* archs we don't have support for.
*/
-#define read_barrier() __sync_synchronize()
-#define write_barrier() __sync_synchronize()
+#define smp_rmb() __sync_synchronize()
+#define smp_wmb() __sync_synchronize()
+#endif
+
+/* From tools/include/asm/barrier.h */
+
+#ifndef smp_store_release
+# define smp_store_release(p, v) \
+do { \
+ smp_mb(); \
+ WRITE_ONCE(*p, v); \
+} while (0)
+#endif
+
+#ifndef smp_load_acquire
+# define smp_load_acquire(p) \
+({ \
+ typeof(*p) ___p1 = READ_ONCE(*p); \
+ smp_mb(); \
+ ___p1; \
+})
#endif
#endif
diff --git a/src/liburing.h b/src/liburing.h
index d3fcd15..a350a01 100644
--- a/src/liburing.h
+++ b/src/liburing.h
@@ -88,11 +88,10 @@ extern int io_uring_register_eventfd(struct io_uring *ring, int fd);
extern int io_uring_unregister_eventfd(struct io_uring *ring);
#define io_uring_for_each_cqe(ring, head, cqe) \
+ /* smp_load_acquire() enforces the order of tail and CQE reads. */ \
for (head = *(ring)->cq.khead; \
- /* See read_barrier() explanation in __io_uring_get_cqe() */ \
- ({read_barrier(); \
- cqe = (head != *(ring)->cq.ktail ? \
- &(ring)->cq.cqes[head & (*(ring)->cq.kring_mask)] : NULL);}); \
+ (cqe = (head != smp_load_acquire((ring)->cq.ktail) ? \
+ &(ring)->cq.cqes[head & (*(ring)->cq.kring_mask)] : NULL)); \
head++) \
@@ -105,13 +104,11 @@ static inline void io_uring_cq_advance(struct io_uring *ring,
if (nr) {
struct io_uring_cq *cq = &ring->cq;
- (*cq->khead) += nr;
-
/*
- * Ensure that the kernel sees our new head, the kernel has
- * the matching read barrier.
+ * Ensure that the kernel only sees the new value of the head
+ * index after the CQEs have been read.
*/
- write_barrier();
+ smp_store_release(cq->khead, *cq->khead + nr);
}
}
diff --git a/src/queue.c b/src/queue.c
index bec363f..72b2293 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -77,7 +77,7 @@ static int __io_uring_submit(struct io_uring *ring, unsigned wait_nr)
{
struct io_uring_sq *sq = &ring->sq;
const unsigned mask = *sq->kring_mask;
- unsigned ktail, ktail_next, submitted, to_submit;
+ unsigned ktail, submitted, to_submit;
unsigned flags;
int ret;
@@ -88,15 +88,11 @@ static int __io_uring_submit(struct io_uring *ring, unsigned wait_nr)
* Fill in sqes that we have queued up, adding them to the kernel ring
*/
submitted = 0;
- ktail = ktail_next = *sq->ktail;
+ ktail = *sq->ktail;
to_submit = sq->sqe_tail - sq->sqe_head;
while (to_submit--) {
- ktail_next++;
- read_barrier();
-
sq->array[ktail & mask] = sq->sqe_head & mask;
- ktail = ktail_next;
-
+ ktail++;
sq->sqe_head++;
submitted++;
}
@@ -104,21 +100,11 @@ static int __io_uring_submit(struct io_uring *ring, unsigned wait_nr)
if (!submitted)
return 0;
- if (*sq->ktail != ktail) {
- /*
- * First write barrier ensures that the SQE stores are updated
- * with the tail update. This is needed so that the kernel
- * will never see a tail update without the preceeding sQE
- * stores being done.
- */
- write_barrier();
- *sq->ktail = ktail;
- /*
- * The kernel has the matching read barrier for reading the
- * SQ tail.
- */
- write_barrier();
- }
+ /*
+ * Ensure that the kernel sees the SQE updates before it sees the tail
+ * update.
+ */
+ smp_store_release(sq->ktail, ktail);
flags = 0;
if (wait_nr || sq_ring_needs_enter(ring, &flags)) {