summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--man/io_uring_enter.2378
1 files changed, 378 insertions, 0 deletions
diff --git a/man/io_uring_enter.2 b/man/io_uring_enter.2
new file mode 100644
index 0000000..dc81fc4
--- /dev/null
+++ b/man/io_uring_enter.2
@@ -0,0 +1,378 @@
+.\" Copyright (C) 2019 Jens Axboe <axboe@kernel.dk>
+.\" Copyright (C) 2019 Red Hat, Inc.
+.\"
+.\" %%%LICENSE_START(LGPL_V2.1)
+.\" This file is distributed according to the GNU Lesser General Public License.
+.\" %%%LICENSE_END
+.\"
+.TH IO_URING_ENTER 2 2019-01-22 "Linux" "Linux Programmer's Manual"
+.SH NAME
+io_uring_enter \- initiate and/or complete asynchronous I/O
+.SH SYNOPSIS
+.nf
+.BR "#include <linux/io_uring.h>"
+.PP
+.BI "int io_uring_enter(unsigned int " fd ", unsigned int " to_submit ,
+.BI " unsigned int " min_complete ", unsigned int " flags)
+.fi
+.PP
+.SH DESCRIPTION
+.PP
+.BR io_uring_enter ()
+is used to initiate and complete I/O using the shared submission and
+completion queues setup by a call to
+.BR io_uring_setup(2).
+A single call can both submit new I/O and wait for completions of I/O
+initiated by this call or previous calls to
+.BR io_uring_enter ().
+
+.I fd
+is the file descriptor returned by
+.BR io_uring_setup(2).
+.I to_submit
+specifies the number of I/Os to submit from the submission queue. If
+the
+.B IORING_ENTER_GETEVENTS
+bit is set in
+.I flags,
+then the system call will attempt to wait for
+.I min_events
+I/O completions. Note that it is valid to specify
+.B IORING_ENTER_GETEVENTS
+in
+.I flags
+and pass in
+.I min_complete
+as 0 at the same time. This allows the kernel to return already
+completed events without waiting. This is useful only when the
+io_uring instance was configured for polling (by specifying the
+.B IORING_SETUP_IOPOLL
+flag in
+.BR io_uring_setup(2)),
+as for IRQ driven I/O, the application can just check the completion
+queue without entering the kernel.
+
+Submission queue entries are represented using the following data
+structure:
+.PP
+.in +4n
+.EX
+/*
+ * IO submission data structure (Submission Queue Entry)
+ */
+struct io_uring_sqe {
+ __u8 opcode; /* type of operation for this sqe */
+ __u8 flags; /* IOSQE_ flags */
+ __u16 ioprio; /* ioprio for the request */
+ __s32 fd; /* file descriptor to do IO on */
+ __u64 off; /* offset into file */
+ __u64 addr; /* pointer to buffer or iovecs */
+ __u32 len; /* buffer size or number of iovecs */
+ union {
+ __kernel_rwf_t rw_flags;
+ __u32 fsync_flags;
+ __u16 poll_events;
+ };
+ __u64 user_data; /* data passed back at completion time */
+ union {
+ __u16 buf_index; /* index into fixed buffers, if used */
+ __u64 __pad2[3];
+ };
+};
+.EE
+.in
+.PP
+The
+.I opcode
+describes the operation to be performed. It can be one of:
+.TP
+.BR IORING_OP_NOP
+Do not perform any I/O. This is useful for testing the performance of
+the io_uring implementation itself.
+.TP
+.BR IORING_OP_READV
+.TP
+.BR IORING_OP_WRITEV
+Vectored read and write operations, similar to
+.BR preadv2(2)
+and
+.BR pwritev2(2).
+
+.TP
+.BR IORING_OP_READ_FIXED
+.TP
+.BR IORING_OP_WRITE_FIXED
+Read from or write to pre-mapped buffers. See
+.BR io_uring_register(2)
+for details on how to setup a context for fixed reads and writes.
+
+.TP
+.BR IORING_OP_FSYNC
+File sync. See also
+.BR fsync(2).
+Note that, while I/O is initiated in the order in which it appears in
+the submission queue, completions are unordered. For example, an
+application which places a write I/O followed by an fsync in the
+submission queue cannot expect the fsync to apply to the write. The
+two operations execute in parallel, so the fsync may complete before
+the write is issued to the storage. The same is also true for
+previously issued writes that have not completed prior to the fsync.
+
+.TP
+.BR IORING_OP_POLL_ADD
+Poll the
+.I fd
+specified in the submission queue entry for the events
+specified in the
+.I poll_events
+field. Unlike poll or epoll without
+.B EPOLLONESHOT,
+this interface always works in one shot mode. That is, once the poll
+operation is completed, it will have to be resubmitted.
+
+.TP
+.BR IORING_OP_POLL_REMOVE
+Remove an existing poll request. If found, the
+.I res
+field of the
+.I struct io_uring_cqe
+will contain 0. If not found,
+.I res
+will contain
+.B -ENOENT.
+
+.PP
+The
+.I flags
+field is a bit mask. Currently, the only supported flag is
+.B IOSQE_FIXED_FILE.
+This flag must be specified for io_uring instances that registered
+files using the
+.BR io_uring_register(2)
+system call. When specified,
+.I fd
+contains an index into the files array registered with the io_uring
+instance.
+
+.I ioprio
+specifies the I/O priority. See
+.BR ioprio_get(2)
+for a description of Linux I/O priorities.
+
+.I fd
+specifies the file descriptor against which the operation will be
+performed, with the exception noted above.
+
+If the operation is one of
+.B IORING_OP_READ_FIXED
+or
+.B IORING_OP_WRITE_FIXED,
+.I addr
+and
+.I len
+must fall within the buffer located at
+.I buf_index
+in the fixed buffer array. If the operation is either
+.B IORING_OP_READV
+or
+.B IORING_OP_WRITEV,
+then
+.I addr
+points to an iovec array of
+.I len
+entries.
+
+.I rw_flags,
+specified for read and write operations, contains a bitwise OR of
+per-I/O flags, as described in the
+.BR preadv2(2)
+man page.
+
+The
+.I fsync_flags
+bit mask may contain either 0, for a normal file integrity sync, or
+.B IORING_FSYNC_DATASYNC
+to provide data sync only semantics. See the descriptions of
+.B O_SYNC
+and
+.B O_DSYNC
+in the
+.BR open(2)
+manual page for more information.
+
+The bits that may be set in
+.I poll_events
+are defined in \fI<poll.h>\fP, and documented in
+.BR poll(2).
+
+.I user_data
+is an application-supplied value that will be copied into
+the completion queue entry (see below).
+.I buf_index
+is an index into an array of fixed buffers, and is only valid if fixed
+buffers were registered
+.PP
+Once the submission queue entry is initialized, I/O is submitted by
+placing the index of the submission queue entry into the tail of the
+submission queue. After one or more indexes are added to the queue,
+and the queue tail is advanced, the io_uring_enter(2) system call can
+be invoked to initiate the I/O.
+
+Completions use the following data structure:
+.PP
+.in +4n
+.EX
+/*
+ * IO completion data structure (Completion Queue Entry)
+ */
+struct io_uring_cqe {
+ __u64 user_data; /* sqe->data submission passed back */
+ __s32 res; /* result code for this event */
+ __u32 flags;
+};
+.EE
+.in
+.PP
+.I user_data
+is copied from the field of the same name in the submission queue
+entry. The primary use case is to store data that the application
+will need to access upon completion of this particular I/O. The
+.I flags
+bit mask may contain 0 or more of the following values, ORed together:
+.TP
+.BR IOCQE_FLAG_CACHEHIT
+The page(s) associated with the buffered I/O operation were present in
+the page cache.
+.I res
+is the operation-specific result.
+.PP
+For read and write opcodes, the
+return values match those documented in the
+.BR preadv2(2)
+and
+.BR pwritev2(2)
+man pages.
+Return codes for the io_uring-specific opcodes are documented in the
+description of the opcodes above.
+.PP
+.SH RETURN VALUE
+.BR io_uring_enter ()
+returns the number of I/Os successfully submitted. This can be zero
+if
+.I to_submit
+was zero, if there were invalid entries in the submission queue, or if
+the submission queue was empty.
+
+On error, -1 is returned and
+.I errno
+is set appropriately.
+.PP
+.SH ERRORS
+.TP
+.BR EAGAIN
+The kernel was unable to allocate memory for the request.
+.TP
+.BR EBADF
+The
+.I fd
+field in the submission queue entry is invalid, or the
+.B IOSQE_FIXED_FILE
+flag was set in the submission queue entry, but no files were registered
+with the io_uring instance
+.TP
+.BR EBUSY
+the io_uring instance is being acted on by another thread
+.TP
+.BR EFAULT
+buffer is outside of the process' accessible address space
+.TP
+.BR EFAULT
+.B IORING_OP_READ_FIXED
+or
+.B IORING_OP_WRITE_FIXED
+was specified in the
+.I opcode
+field of the submission queue entry, but either buffers were not
+registered for this io_uring instance, or the address range described
+by
+.I addr
+and
+.I len
+does not fit within the buffer registered at
+.I buf_index
+.TP
+.BR EINVAL
+The
+.I index
+member of the submission queue entry is invalid.
+.TP
+.BR EINVAL
+the
+.I flags
+field or
+.I opcode
+in a submission queue entry is invalid
+.TP
+.BR EINVAL
+.B IORING_OP_NOP
+was specified in the submission queue entry, but the io_uring context
+was setup for polling (
+.B IORING_SETUP_IOPOLL
+was specified in the call to io_uring_setup)
+.TP
+.BR EINVAL
+.B IORING_OP_READV
+or
+.B IORING_OP_WRITEV
+was specified in the submission queue entry, but the io_uring instance
+has fixed buffers registered.
+.TP
+.BR EINVAL
+.B IORING_OP_READ_FIXED
+or
+.B IORING_OP_WRITE_FIXED
+was specified in the submission queue entry, and the
+.I buf_index
+is invalid
+.TP
+.BR EINVAL
+.B IORING_OP_READV, IORING_OP_WRITEV, IORING_OP_READ_FIXED,
+.B IORING_OP_WRITE_FIXED
+or
+.B IORING_OP_FSYNC
+was specified in the submission queue entry, but the io_uring instance
+was configured for IOPOLLing, or any of
+.I addr, ioprio, off, len,
+or
+.I buf_index
+was set in the submission queue entry.
+.TP
+.BR EINVAL
+.B IORING_OP_POLL_ADD
+or
+.B IORING_OP_POLL_REMOVE
+was specified in the
+.I opcode
+field of the submission queue entry, but the io_uring instance was
+configured for busy-wait polling
+.B (IORING_SETUP_IOPOLL),
+or any of
+.I ioprio, off, len
+or
+.I buf_index
+was non-zero in the submission queue entry.
+.TP
+.BR EINVAL
+.B IORING_OP_POLL_ADD
+was specified in the
+.I opcode
+field of the submission queue entry, and the
+.I addr
+field was non-zero.
+.TP
+.BR ENXIO
+the io_uring instance is in the process of being torn down
+.TP
+.BR EOPNOTSUPP
+.I fd
+does not refer to an io_uring instance