summaryrefslogblamecommitdiff
path: root/src/io_uring.c
blob: 72059147d02cebebcbf65438f8bf22ad1afa2a02 (plain) (tree)
1
2
3
4
5
6
7
8






                      
                   



                     
                                                                    
                                                                             

                                              


                      
                        



                                         
                                                          

                              

                              




                                                                       
                       



                                      



                 

                                                       
                                                  
                                                          
 
                                                                               




                                                       
                                                   
                                                           
 
                                                                               


  
                                                              
  
                                   
   
                                          
 
                                           











                                                              
                                         


                         
                                                                              


                                        
                                             




                                             
                                                              

                                   
                               












                                   

                                                          


  
                                                                       


                                                                        
                                               
   
                                                            
 
                                           

                                         

          
                            
           
                                                     

                            


                                                        





















                                                                           

                                                           
                                                              

                                                 





                                               
                                                                                   



                                                                           
                                                                              






                                                         
                                        
                 


  

                                                                 
   
                                                                                
 
                                 
                    
 



                                         


                          
                                       
                                                          


                                   

 
                                               
 


                                           
                                                                           

                                       
                             
 
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>

#include "compat.h"
#include "io_uring.h"
#include "liburing.h"
#include "barrier.h"

static int __io_uring_get_completion(int fd, struct io_uring_cq *cq,
				     struct io_uring_cqe **cqe_ptr, int wait)
{
	const unsigned mask = *cq->kring_mask;
	unsigned head;
	int ret;

	*cqe_ptr = NULL;
	head = *cq->khead;
	do {
		read_barrier();
		if (head != *cq->ktail) {
			*cqe_ptr = &cq->cqes[head & mask];
			break;
		}
		if (!wait)
			break;
		ret = io_uring_enter(fd, 0, 1, IORING_ENTER_GETEVENTS);
		if (ret < 0)
			return -errno;
	} while (1);

	if (*cqe_ptr) {
		*cq->khead = head + 1;
		write_barrier();
	}

	return 0;
}

/*
 * Return an IO completion, if one is readily available
 */
int io_uring_get_completion(struct io_uring *ring,
			    struct io_uring_cqe **cqe_ptr)
{
	return __io_uring_get_completion(ring->ring_fd, &ring->cq, cqe_ptr, 0);
}

/*
 * Return an IO completion, waiting for it if necessary
 */
int io_uring_wait_completion(struct io_uring *ring,
			     struct io_uring_cqe **cqe_ptr)
{
	return __io_uring_get_completion(ring->ring_fd, &ring->cq, cqe_ptr, 1);
}

/*
 * Submit sqes acquired from io_uring_get_sqe() to the kernel.
 *
 * Returns number of sqes submitted
 */
int io_uring_submit(struct io_uring *ring)
{
	struct io_uring_sq *sq = &ring->sq;
	const unsigned mask = *sq->kring_mask;
	unsigned ktail, ktail_next, submitted;

	/*
	 * If we have pending IO in the kring, submit it first
	 */
	read_barrier();
	if (*sq->khead != *sq->ktail) {
		submitted = *sq->kring_entries;
		goto submit;
	}

	if (sq->sqe_head == sq->sqe_tail)
		return 0;

	/*
	 * Fill in sqes that we have queued up, adding them to the kernel ring
	 */
	submitted = 0;
	ktail = ktail_next = *sq->ktail;
	while (sq->sqe_head < sq->sqe_tail) {
		ktail_next++;
		read_barrier();
		if (ktail_next == *sq->khead)
			break;

		sq->array[ktail & mask] = sq->sqe_head & mask;
		ktail = ktail_next;

		sq->sqe_head++;
		submitted++;
	}

	if (!submitted)
		return 0;

	if (*sq->ktail != ktail) {
		write_barrier();
		*sq->ktail = ktail;
		write_barrier();
	}

submit:
	return io_uring_enter(ring->ring_fd, submitted, 0,
				IORING_ENTER_GETEVENTS);
}

/*
 * Return an sqe to fill. Application must later call io_uring_submit()
 * when it's ready to tell the kernel about it. The caller may call this
 * function multiple times before calling io_uring_submit().
 *
 * Returns a vacant sqe, or NULL if we're full.
 */
struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
{
	struct io_uring_sq *sq = &ring->sq;
	unsigned next = sq->sqe_tail + 1;
	struct io_uring_sqe *sqe;

	/*
	 * All sqes are used
	 */
	if (next - sq->sqe_head > *sq->kring_entries)
		return NULL;

	sqe = &sq->sqes[sq->sqe_tail & *sq->kring_mask];
	sq->sqe_tail = next;
	return sqe;
}

static int io_uring_mmap(int fd, struct io_uring_params *p,
			 struct io_uring_sq *sq, struct io_uring_cq *cq)
{
	size_t size;
	void *ptr;
	int ret;

	sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned);
	ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
			MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
	if (ptr == MAP_FAILED)
		return -errno;
	sq->khead = ptr + p->sq_off.head;
	sq->ktail = ptr + p->sq_off.tail;
	sq->kring_mask = ptr + p->sq_off.ring_mask;
	sq->kring_entries = ptr + p->sq_off.ring_entries;
	sq->kflags = ptr + p->sq_off.flags;
	sq->kdropped = ptr + p->sq_off.dropped;
	sq->array = ptr + p->sq_off.array;

	size = p->sq_entries * sizeof(struct io_uring_sqe),
	sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
				MAP_SHARED | MAP_POPULATE, fd,
				IORING_OFF_SQES);
	if (sq->sqes == MAP_FAILED) {
		ret = -errno;
err:
		munmap(sq->khead, sq->ring_sz);
		return ret;
	}

	cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe);
	ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
			MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
	if (ptr == MAP_FAILED) {
		ret = -errno;
		munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe));
		goto err;
	}
	cq->khead = ptr + p->cq_off.head;
	cq->ktail = ptr + p->cq_off.tail;
	cq->kring_mask = ptr + p->cq_off.ring_mask;
	cq->kring_entries = ptr + p->cq_off.ring_entries;
	cq->koverflow = ptr + p->cq_off.overflow;
	cq->cqes = ptr + p->cq_off.cqes;
	return 0;
}

/*
 * Returns -1 on error, or zero on success. On success, 'ring'
 * contains the necessary information to read/write to the rings.
 */
int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
{
	struct io_uring_params p;
	int fd, ret;

	memset(&p, 0, sizeof(p));
	p.flags = flags;

	fd = io_uring_setup(entries, &p);
	if (fd < 0)
		return fd;

	memset(ring, 0, sizeof(*ring));
	ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq);
	if (!ret)
		ring->ring_fd = fd;
	return ret;
}

void io_uring_queue_exit(struct io_uring *ring)
{
	struct io_uring_sq *sq = &ring->sq;
	struct io_uring_cq *cq = &ring->cq;

	munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe));
	munmap(sq->khead, sq->ring_sz);
	munmap(cq->khead, cq->ring_sz);
	close(ring->ring_fd);
}