The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/sys_aio.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: sys_aio.c,v 1.48 2020/05/23 23:42:43 ad Exp $  */
    2 
    3 /*
    4  * Copyright (c) 2007 Mindaugas Rasiukevicius <rmind at NetBSD org>
    5  * All rights reserved.
    6  * 
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 /*
   30  * Implementation of POSIX asynchronous I/O.
   31  * Defined in the Base Definitions volume of IEEE Std 1003.1-2001.
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.48 2020/05/23 23:42:43 ad Exp $");
   36 
   37 #ifdef _KERNEL_OPT
   38 #include "opt_ddb.h"
   39 #endif
   40 
   41 #include <sys/param.h>
   42 #include <sys/condvar.h>
   43 #include <sys/file.h>
   44 #include <sys/filedesc.h>
   45 #include <sys/kernel.h>
   46 #include <sys/kmem.h>
   47 #include <sys/lwp.h>
   48 #include <sys/mutex.h>
   49 #include <sys/pool.h>
   50 #include <sys/proc.h>
   51 #include <sys/queue.h>
   52 #include <sys/signal.h>
   53 #include <sys/signalvar.h>
   54 #include <sys/syscall.h>
   55 #include <sys/syscallargs.h>
   56 #include <sys/syscallvar.h>
   57 #include <sys/sysctl.h>
   58 #include <sys/systm.h>
   59 #include <sys/types.h>
   60 #include <sys/vnode.h>
   61 #include <sys/atomic.h>
   62 #include <sys/module.h>
   63 #include <sys/buf.h>
   64 
   65 #include <uvm/uvm_extern.h>
   66 
   67 MODULE(MODULE_CLASS_MISC, aio, NULL);
   68 
   69 /*
   70  * System-wide limits and counter of AIO operations.
   71  */
   72 u_int                   aio_listio_max = AIO_LISTIO_MAX;
   73 static u_int            aio_max = AIO_MAX;
   74 static u_int            aio_jobs_count;
   75 
   76 static struct pool      aio_job_pool;
   77 static struct pool      aio_lio_pool;
   78 static void *           aio_ehook;
   79 
   80 static void             aio_worker(void *);
   81 static void             aio_process(struct aio_job *);
   82 static void             aio_sendsig(struct proc *, struct sigevent *);
   83 static int              aio_enqueue_job(int, void *, struct lio_req *);
   84 static void             aio_exit(proc_t *, void *);
   85 
   86 static int              sysctl_aio_listio_max(SYSCTLFN_PROTO);
   87 static int              sysctl_aio_max(SYSCTLFN_PROTO);
   88 
   89 static const struct syscall_package aio_syscalls[] = {
   90         { SYS_aio_cancel, 0, (sy_call_t *)sys_aio_cancel },
   91         { SYS_aio_error, 0, (sy_call_t *)sys_aio_error },
   92         { SYS_aio_fsync, 0, (sy_call_t *)sys_aio_fsync },
   93         { SYS_aio_read, 0, (sy_call_t *)sys_aio_read },
   94         { SYS_aio_return, 0, (sy_call_t *)sys_aio_return },
   95         { SYS___aio_suspend50, 0, (sy_call_t *)sys___aio_suspend50 },
   96         { SYS_aio_write, 0, (sy_call_t *)sys_aio_write },
   97         { SYS_lio_listio, 0, (sy_call_t *)sys_lio_listio },
   98         { 0, 0, NULL },
   99 };
  100 
  101 /*
  102  * Tear down all AIO state.
  103  */
  104 static int
  105 aio_fini(bool interface)
  106 {
  107         int error;
  108         proc_t *p;
  109 
  110         if (interface) {
  111                 /* Stop syscall activity. */
  112                 error = syscall_disestablish(NULL, aio_syscalls);
  113                 if (error != 0)
  114                         return error;
  115                 /* Abort if any processes are using AIO. */
  116                 mutex_enter(&proc_lock);
  117                 PROCLIST_FOREACH(p, &allproc) {
  118                         if (p->p_aio != NULL)
  119                                 break;
  120                 }
  121                 mutex_exit(&proc_lock);
  122                 if (p != NULL) {
  123                         error = syscall_establish(NULL, aio_syscalls);
  124                         KASSERT(error == 0);
  125                         return EBUSY;
  126                 }
  127         }
  128 
  129         KASSERT(aio_jobs_count == 0);
  130         exithook_disestablish(aio_ehook);
  131         pool_destroy(&aio_job_pool);
  132         pool_destroy(&aio_lio_pool);
  133         return 0;
  134 }
  135 
  136 /*
  137  * Initialize global AIO state.
  138  */
  139 static int
  140 aio_init(void)
  141 {
  142         int error;
  143 
  144         pool_init(&aio_job_pool, sizeof(struct aio_job), 0, 0, 0,
  145             "aio_jobs_pool", &pool_allocator_nointr, IPL_NONE);
  146         pool_init(&aio_lio_pool, sizeof(struct lio_req), 0, 0, 0,
  147             "aio_lio_pool", &pool_allocator_nointr, IPL_NONE);
  148         aio_ehook = exithook_establish(aio_exit, NULL);
  149 
  150         error = syscall_establish(NULL, aio_syscalls);
  151         if (error != 0)
  152                 (void)aio_fini(false);
  153         return error;
  154 }
  155 
  156 /*
  157  * Module interface.
  158  */
  159 static int
  160 aio_modcmd(modcmd_t cmd, void *arg)
  161 {
  162 
  163         switch (cmd) {
  164         case MODULE_CMD_INIT:
  165                 return aio_init();
  166         case MODULE_CMD_FINI:
  167                 return aio_fini(true);
  168         default:
  169                 return ENOTTY;
  170         }
  171 }
  172 
  173 /*
  174  * Initialize Asynchronous I/O data structures for the process.
  175  */
  176 static int
  177 aio_procinit(struct proc *p)
  178 {
  179         struct aioproc *aio;
  180         struct lwp *l;
  181         int error;
  182         vaddr_t uaddr;
  183 
  184         /* Allocate and initialize AIO structure */
  185         aio = kmem_zalloc(sizeof(struct aioproc), KM_SLEEP);
  186 
  187         /* Initialize queue and their synchronization structures */
  188         mutex_init(&aio->aio_mtx, MUTEX_DEFAULT, IPL_NONE);
  189         cv_init(&aio->aio_worker_cv, "aiowork");
  190         cv_init(&aio->done_cv, "aiodone");
  191         TAILQ_INIT(&aio->jobs_queue);
  192 
  193         /*
  194          * Create an AIO worker thread.
  195          * XXX: Currently, AIO thread is not protected against user's actions.
  196          */
  197         uaddr = uvm_uarea_alloc();
  198         if (uaddr == 0) {
  199                 aio_exit(p, aio);
  200                 return EAGAIN;
  201         }
  202         error = lwp_create(curlwp, p, uaddr, 0, NULL, 0, aio_worker,
  203             NULL, &l, curlwp->l_class, &curlwp->l_sigmask, &curlwp->l_sigstk);
  204         if (error != 0) {
  205                 uvm_uarea_free(uaddr);
  206                 aio_exit(p, aio);
  207                 return error;
  208         }
  209 
  210         /* Recheck if we are really first */
  211         mutex_enter(p->p_lock);
  212         if (p->p_aio) {
  213                 mutex_exit(p->p_lock);
  214                 aio_exit(p, aio);
  215                 lwp_exit(l);
  216                 return 0;
  217         }
  218         p->p_aio = aio;
  219 
  220         /* Complete the initialization of thread, and run it */
  221         aio->aio_worker = l;
  222         lwp_lock(l);
  223         lwp_changepri(l, MAXPRI_USER);
  224         setrunnable(l);
  225         /* LWP now unlocked */
  226         mutex_exit(p->p_lock);
  227 
  228         return 0;
  229 }
  230 
  231 /*
  232  * Exit of Asynchronous I/O subsystem of process.
  233  */
  234 static void
  235 aio_exit(struct proc *p, void *cookie)
  236 {
  237         struct aio_job *a_job;
  238         struct aioproc *aio;
  239 
  240         if (cookie != NULL)
  241                 aio = cookie;
  242         else if ((aio = p->p_aio) == NULL)
  243                 return;
  244 
  245         /* Free AIO queue */
  246         while (!TAILQ_EMPTY(&aio->jobs_queue)) {
  247                 a_job = TAILQ_FIRST(&aio->jobs_queue);
  248                 TAILQ_REMOVE(&aio->jobs_queue, a_job, list);
  249                 pool_put(&aio_job_pool, a_job);
  250                 atomic_dec_uint(&aio_jobs_count);
  251         }
  252 
  253         /* Destroy and free the entire AIO data structure */
  254         cv_destroy(&aio->aio_worker_cv);
  255         cv_destroy(&aio->done_cv);
  256         mutex_destroy(&aio->aio_mtx);
  257         kmem_free(aio, sizeof(struct aioproc));
  258 }
  259 
  260 /*
  261  * AIO worker thread and processor.
  262  */
  263 static void
  264 aio_worker(void *arg)
  265 {
  266         struct proc *p = curlwp->l_proc;
  267         struct aioproc *aio = p->p_aio;
  268         struct aio_job *a_job;
  269         struct lio_req *lio;
  270         sigset_t oss, nss;
  271         int error __diagused, refcnt;
  272 
  273         /*
  274          * Make an empty signal mask, so it
  275          * handles only SIGKILL and SIGSTOP.
  276          */
  277         sigfillset(&nss);
  278         mutex_enter(p->p_lock);
  279         error = sigprocmask1(curlwp, SIG_SETMASK, &nss, &oss);
  280         mutex_exit(p->p_lock);
  281         KASSERT(error == 0);
  282 
  283         for (;;) {
  284                 /*
  285                  * Loop for each job in the queue.  If there
  286                  * are no jobs then sleep.
  287                  */
  288                 mutex_enter(&aio->aio_mtx);
  289                 while ((a_job = TAILQ_FIRST(&aio->jobs_queue)) == NULL) {
  290                         if (cv_wait_sig(&aio->aio_worker_cv, &aio->aio_mtx)) {
  291                                 /*
  292                                  * Thread was interrupted - check for
  293                                  * pending exit or suspend.
  294                                  */
  295                                 mutex_exit(&aio->aio_mtx);
  296                                 lwp_userret(curlwp);
  297                                 mutex_enter(&aio->aio_mtx);
  298                         }
  299                 }
  300 
  301                 /* Take the job from the queue */
  302                 aio->curjob = a_job;
  303                 TAILQ_REMOVE(&aio->jobs_queue, a_job, list);
  304 
  305                 atomic_dec_uint(&aio_jobs_count);
  306                 aio->jobs_count--;
  307 
  308                 mutex_exit(&aio->aio_mtx);
  309 
  310                 /* Process an AIO operation */
  311                 aio_process(a_job);
  312 
  313                 /* Copy data structure back to the user-space */
  314                 (void)copyout(&a_job->aiocbp, a_job->aiocb_uptr,
  315                     sizeof(struct aiocb));
  316 
  317                 mutex_enter(&aio->aio_mtx);
  318                 KASSERT(aio->curjob == a_job);
  319                 aio->curjob = NULL;
  320 
  321                 /* Decrease a reference counter, if there is a LIO structure */
  322                 lio = a_job->lio;
  323                 refcnt = (lio != NULL ? --lio->refcnt : -1);
  324 
  325                 /* Notify all suspenders */
  326                 cv_broadcast(&aio->done_cv);
  327                 mutex_exit(&aio->aio_mtx);
  328 
  329                 /* Send a signal, if any */
  330                 aio_sendsig(p, &a_job->aiocbp.aio_sigevent);
  331 
  332                 /* Destroy the LIO structure */
  333                 if (refcnt == 0) {
  334                         aio_sendsig(p, &lio->sig);
  335                         pool_put(&aio_lio_pool, lio);
  336                 }
  337 
  338                 /* Destroy the job */
  339                 pool_put(&aio_job_pool, a_job);
  340         }
  341 
  342         /* NOTREACHED */
  343 }
  344 
  345 static void
  346 aio_process(struct aio_job *a_job)
  347 {
  348         struct proc *p = curlwp->l_proc;
  349         struct aiocb *aiocbp = &a_job->aiocbp;
  350         struct file *fp;
  351         int fd = aiocbp->aio_fildes;
  352         int error = 0;
  353 
  354         KASSERT(a_job->aio_op != 0);
  355 
  356         if ((a_job->aio_op & (AIO_READ | AIO_WRITE)) != 0) {
  357                 struct iovec aiov;
  358                 struct uio auio;
  359 
  360                 if (aiocbp->aio_nbytes > SSIZE_MAX) {
  361                         error = EINVAL;
  362                         goto done;
  363                 }
  364 
  365                 fp = fd_getfile(fd);
  366                 if (fp == NULL) {
  367                         error = EBADF;
  368                         goto done;
  369                 }
  370 
  371                 aiov.iov_base = (void *)(uintptr_t)aiocbp->aio_buf;
  372                 aiov.iov_len = aiocbp->aio_nbytes;
  373                 auio.uio_iov = &aiov;
  374                 auio.uio_iovcnt = 1;
  375                 auio.uio_resid = aiocbp->aio_nbytes;
  376                 auio.uio_vmspace = p->p_vmspace;
  377 
  378                 if (a_job->aio_op & AIO_READ) {
  379                         /*
  380                          * Perform a Read operation
  381                          */
  382                         KASSERT((a_job->aio_op & AIO_WRITE) == 0);
  383 
  384                         if ((fp->f_flag & FREAD) == 0) {
  385                                 fd_putfile(fd);
  386                                 error = EBADF;
  387                                 goto done;
  388                         }
  389                         auio.uio_rw = UIO_READ;
  390                         error = (*fp->f_ops->fo_read)(fp, &aiocbp->aio_offset,
  391                             &auio, fp->f_cred, FOF_UPDATE_OFFSET);
  392                 } else {
  393                         /*
  394                          * Perform a Write operation
  395                          */
  396                         KASSERT(a_job->aio_op & AIO_WRITE);
  397 
  398                         if ((fp->f_flag & FWRITE) == 0) {
  399                                 fd_putfile(fd);
  400                                 error = EBADF;
  401                                 goto done;
  402                         }
  403                         auio.uio_rw = UIO_WRITE;
  404                         error = (*fp->f_ops->fo_write)(fp, &aiocbp->aio_offset,
  405                             &auio, fp->f_cred, FOF_UPDATE_OFFSET);
  406                 }
  407                 fd_putfile(fd);
  408 
  409                 /* Store the result value */
  410                 a_job->aiocbp.aio_nbytes -= auio.uio_resid;
  411                 a_job->aiocbp._retval = (error == 0) ?
  412                     a_job->aiocbp.aio_nbytes : -1;
  413 
  414         } else if ((a_job->aio_op & (AIO_SYNC | AIO_DSYNC)) != 0) {
  415                 /*
  416                  * Perform a file Sync operation
  417                  */
  418                 struct vnode *vp;
  419 
  420                 if ((error = fd_getvnode(fd, &fp)) != 0)
  421                         goto done;
  422 
  423                 if ((fp->f_flag & FWRITE) == 0) {
  424                         fd_putfile(fd);
  425                         error = EBADF;
  426                         goto done;
  427                 }
  428 
  429                 vp = fp->f_vnode;
  430                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  431                 if (a_job->aio_op & AIO_DSYNC) {
  432                         error = VOP_FSYNC(vp, fp->f_cred,
  433                             FSYNC_WAIT | FSYNC_DATAONLY, 0, 0);
  434                 } else if (a_job->aio_op & AIO_SYNC) {
  435                         error = VOP_FSYNC(vp, fp->f_cred,
  436                             FSYNC_WAIT, 0, 0);
  437                 }
  438                 VOP_UNLOCK(vp);
  439                 fd_putfile(fd);
  440 
  441                 /* Store the result value */
  442                 a_job->aiocbp._retval = (error == 0) ? 0 : -1;
  443 
  444         } else
  445                 panic("aio_process: invalid operation code\n");
  446 
  447 done:
  448         /* Job is done, set the error, if any */
  449         a_job->aiocbp._errno = error;
  450         a_job->aiocbp._state = JOB_DONE;
  451 }
  452 
  453 /*
  454  * Send AIO signal.
  455  */
  456 static void
  457 aio_sendsig(struct proc *p, struct sigevent *sig)
  458 {
  459         ksiginfo_t ksi;
  460 
  461         if (sig->sigev_signo == 0 || sig->sigev_notify == SIGEV_NONE)
  462                 return;
  463 
  464         KSI_INIT(&ksi);
  465         ksi.ksi_signo = sig->sigev_signo;
  466         ksi.ksi_code = SI_ASYNCIO;
  467         ksi.ksi_value = sig->sigev_value;
  468         mutex_enter(&proc_lock);
  469         kpsignal(p, &ksi, NULL);
  470         mutex_exit(&proc_lock);
  471 }
  472 
  473 /*
  474  * Enqueue the job.
  475  */
  476 static int
  477 aio_enqueue_job(int op, void *aiocb_uptr, struct lio_req *lio)
  478 {
  479         struct proc *p = curlwp->l_proc;
  480         struct aioproc *aio;
  481         struct aio_job *a_job;
  482         struct aiocb aiocbp;
  483         struct sigevent *sig;
  484         int error;
  485 
  486         /* Non-accurate check for the limit */
  487         if (aio_jobs_count + 1 > aio_max)
  488                 return EAGAIN;
  489 
  490         /* Get the data structure from user-space */
  491         error = copyin(aiocb_uptr, &aiocbp, sizeof(struct aiocb));
  492         if (error)
  493                 return error;
  494 
  495         /* Check if signal is set, and validate it */
  496         sig = &aiocbp.aio_sigevent;
  497         if (sig->sigev_signo < 0 || sig->sigev_signo >= NSIG ||
  498             sig->sigev_notify < SIGEV_NONE || sig->sigev_notify > SIGEV_SA)
  499                 return EINVAL;
  500 
  501         /* Buffer and byte count */
  502         if (((AIO_SYNC | AIO_DSYNC) & op) == 0)
  503                 if (aiocbp.aio_buf == NULL || aiocbp.aio_nbytes > SSIZE_MAX)
  504                         return EINVAL;
  505 
  506         /* Check the opcode, if LIO_NOP - simply ignore */
  507         if (op == AIO_LIO) {
  508                 KASSERT(lio != NULL);
  509                 if (aiocbp.aio_lio_opcode == LIO_WRITE)
  510                         op = AIO_WRITE;
  511                 else if (aiocbp.aio_lio_opcode == LIO_READ)
  512                         op = AIO_READ;
  513                 else
  514                         return (aiocbp.aio_lio_opcode == LIO_NOP) ? 0 : EINVAL;
  515         } else {
  516                 KASSERT(lio == NULL);
  517         }
  518 
  519         /*
  520          * Look for already existing job.  If found - the job is in-progress.
  521          * According to POSIX this is invalid, so return the error.
  522          */
  523         aio = p->p_aio;
  524         if (aio) {
  525                 mutex_enter(&aio->aio_mtx);
  526                 TAILQ_FOREACH(a_job, &aio->jobs_queue, list) {
  527                         if (a_job->aiocb_uptr != aiocb_uptr)
  528                                 continue;
  529                         mutex_exit(&aio->aio_mtx);
  530                         return EINVAL;
  531                 }
  532                 mutex_exit(&aio->aio_mtx);
  533         }
  534 
  535         /*
  536          * Check if AIO structure is initialized, if not - initialize it.
  537          * In LIO case, we did that already.  We will recheck this with
  538          * the lock in aio_procinit().
  539          */
  540         if (lio == NULL && p->p_aio == NULL)
  541                 if (aio_procinit(p))
  542                         return EAGAIN;
  543         aio = p->p_aio;
  544 
  545         /*
  546          * Set the state with errno, and copy data
  547          * structure back to the user-space.
  548          */
  549         aiocbp._state = JOB_WIP;
  550         aiocbp._errno = EINPROGRESS;
  551         aiocbp._retval = -1;
  552         error = copyout(&aiocbp, aiocb_uptr, sizeof(struct aiocb));
  553         if (error)
  554                 return error;
  555 
  556         /* Allocate and initialize a new AIO job */
  557         a_job = pool_get(&aio_job_pool, PR_WAITOK | PR_ZERO);
  558 
  559         /*
  560          * Set the data.
  561          * Store the user-space pointer for searching.  Since we
  562          * are storing only per proc pointers - it is safe.
  563          */
  564         memcpy(&a_job->aiocbp, &aiocbp, sizeof(struct aiocb));
  565         a_job->aiocb_uptr = aiocb_uptr;
  566         a_job->aio_op |= op;
  567         a_job->lio = lio;
  568 
  569         /*
  570          * Add the job to the queue, update the counters, and
  571          * notify the AIO worker thread to handle the job.
  572          */
  573         mutex_enter(&aio->aio_mtx);
  574 
  575         /* Fail, if the limit was reached */
  576         if (atomic_inc_uint_nv(&aio_jobs_count) > aio_max ||
  577             aio->jobs_count >= aio_listio_max) {
  578                 atomic_dec_uint(&aio_jobs_count);
  579                 mutex_exit(&aio->aio_mtx);
  580                 pool_put(&aio_job_pool, a_job);
  581                 return EAGAIN;
  582         }
  583 
  584         TAILQ_INSERT_TAIL(&aio->jobs_queue, a_job, list);
  585         aio->jobs_count++;
  586         if (lio)
  587                 lio->refcnt++;
  588         cv_signal(&aio->aio_worker_cv);
  589 
  590         mutex_exit(&aio->aio_mtx);
  591 
  592         /*
  593          * One would handle the errors only with aio_error() function.
  594          * This way is appropriate according to POSIX.
  595          */
  596         return 0;
  597 }
  598 
  599 /*
  600  * Syscall functions.
  601  */
  602 
  603 int
  604 sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap,
  605     register_t *retval)
  606 {
  607         /* {
  608                 syscallarg(int) fildes;
  609                 syscallarg(struct aiocb *) aiocbp;
  610         } */
  611         struct proc *p = l->l_proc;
  612         struct aioproc *aio;
  613         struct aio_job *a_job;
  614         struct aiocb *aiocbp_ptr;
  615         struct lio_req *lio;
  616         struct filedesc *fdp = p->p_fd;
  617         unsigned int cn, errcnt, fildes;
  618         fdtab_t *dt;
  619 
  620         TAILQ_HEAD(, aio_job) tmp_jobs_list;
  621 
  622         /* Check for invalid file descriptor */
  623         fildes = (unsigned int)SCARG(uap, fildes);
  624         dt = atomic_load_consume(&fdp->fd_dt);
  625         if (fildes >= dt->dt_nfiles)
  626                 return EBADF;
  627         if (dt->dt_ff[fildes] == NULL || dt->dt_ff[fildes]->ff_file == NULL)
  628                 return EBADF;
  629 
  630         /* Check if AIO structure is initialized */
  631         if (p->p_aio == NULL) {
  632                 *retval = AIO_NOTCANCELED;
  633                 return 0;
  634         }
  635 
  636         aio = p->p_aio;
  637         aiocbp_ptr = (struct aiocb *)SCARG(uap, aiocbp);
  638 
  639         mutex_enter(&aio->aio_mtx);
  640 
  641         /* Cancel the jobs, and remove them from the queue */
  642         cn = 0;
  643         TAILQ_INIT(&tmp_jobs_list);
  644         TAILQ_FOREACH(a_job, &aio->jobs_queue, list) {
  645                 if (aiocbp_ptr) {
  646                         if (aiocbp_ptr != a_job->aiocb_uptr)
  647                                 continue;
  648                         if (fildes != a_job->aiocbp.aio_fildes) {
  649                                 mutex_exit(&aio->aio_mtx);
  650                                 return EBADF;
  651                         }
  652                 } else if (a_job->aiocbp.aio_fildes != fildes)
  653                         continue;
  654 
  655                 TAILQ_REMOVE(&aio->jobs_queue, a_job, list);
  656                 TAILQ_INSERT_TAIL(&tmp_jobs_list, a_job, list);
  657 
  658                 /* Decrease the counters */
  659                 atomic_dec_uint(&aio_jobs_count);
  660                 aio->jobs_count--;
  661                 lio = a_job->lio;
  662                 if (lio != NULL && --lio->refcnt != 0)
  663                         a_job->lio = NULL;
  664 
  665                 cn++;
  666                 if (aiocbp_ptr)
  667                         break;
  668         }
  669 
  670         /* There are canceled jobs */
  671         if (cn)
  672                 *retval = AIO_CANCELED;
  673 
  674         /* We cannot cancel current job */
  675         a_job = aio->curjob;
  676         if (a_job && ((a_job->aiocbp.aio_fildes == fildes) ||
  677             (a_job->aiocb_uptr == aiocbp_ptr)))
  678                 *retval = AIO_NOTCANCELED;
  679 
  680         mutex_exit(&aio->aio_mtx);
  681 
  682         /* Free the jobs after the lock */
  683         errcnt = 0;
  684         while (!TAILQ_EMPTY(&tmp_jobs_list)) {
  685                 a_job = TAILQ_FIRST(&tmp_jobs_list);
  686                 TAILQ_REMOVE(&tmp_jobs_list, a_job, list);
  687                 /* Set the errno and copy structures back to the user-space */
  688                 a_job->aiocbp._errno = ECANCELED;
  689                 a_job->aiocbp._state = JOB_DONE;
  690                 if (copyout(&a_job->aiocbp, a_job->aiocb_uptr,
  691                     sizeof(struct aiocb)))
  692                         errcnt++;
  693                 /* Send a signal if any */
  694                 aio_sendsig(p, &a_job->aiocbp.aio_sigevent);
  695                 if (a_job->lio) {
  696                         lio = a_job->lio;
  697                         aio_sendsig(p, &lio->sig);
  698                         pool_put(&aio_lio_pool, lio);
  699                 }
  700                 pool_put(&aio_job_pool, a_job);
  701         }
  702 
  703         if (errcnt)
  704                 return EFAULT;
  705 
  706         /* Set a correct return value */
  707         if (*retval == 0)
  708                 *retval = AIO_ALLDONE;
  709 
  710         return 0;
  711 }
  712 
  713 int
  714 sys_aio_error(struct lwp *l, const struct sys_aio_error_args *uap,
  715     register_t *retval)
  716 {
  717         /* {
  718                 syscallarg(const struct aiocb *) aiocbp;
  719         } */
  720         struct proc *p = l->l_proc;
  721         struct aioproc *aio = p->p_aio;
  722         struct aiocb aiocbp;
  723         int error;
  724 
  725         if (aio == NULL)
  726                 return EINVAL;
  727 
  728         error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb));
  729         if (error)
  730                 return error;
  731 
  732         if (aiocbp._state == JOB_NONE)
  733                 return EINVAL;
  734 
  735         *retval = aiocbp._errno;
  736 
  737         return 0;
  738 }
  739 
  740 int
  741 sys_aio_fsync(struct lwp *l, const struct sys_aio_fsync_args *uap,
  742     register_t *retval)
  743 {
  744         /* {
  745                 syscallarg(int) op;
  746                 syscallarg(struct aiocb *) aiocbp;
  747         } */
  748         int op = SCARG(uap, op);
  749 
  750         if ((op != O_DSYNC) && (op != O_SYNC))
  751                 return EINVAL;
  752 
  753         op = O_DSYNC ? AIO_DSYNC : AIO_SYNC;
  754 
  755         return aio_enqueue_job(op, SCARG(uap, aiocbp), NULL);
  756 }
  757 
  758 int
  759 sys_aio_read(struct lwp *l, const struct sys_aio_read_args *uap,
  760     register_t *retval)
  761 {
  762         /* {
  763                 syscallarg(struct aiocb *) aiocbp;
  764         } */
  765 
  766         return aio_enqueue_job(AIO_READ, SCARG(uap, aiocbp), NULL);
  767 }
  768 
  769 int
  770 sys_aio_return(struct lwp *l, const struct sys_aio_return_args *uap,
  771     register_t *retval)
  772 {
  773         /* {
  774                 syscallarg(struct aiocb *) aiocbp;
  775         } */
  776         struct proc *p = l->l_proc;
  777         struct aioproc *aio = p->p_aio;
  778         struct aiocb aiocbp;
  779         int error;
  780 
  781         if (aio == NULL)
  782                 return EINVAL;
  783 
  784         error = copyin(SCARG(uap, aiocbp), &aiocbp, sizeof(struct aiocb));
  785         if (error)
  786                 return error;
  787 
  788         if (aiocbp._errno == EINPROGRESS || aiocbp._state != JOB_DONE)
  789                 return EINVAL;
  790 
  791         *retval = aiocbp._retval;
  792 
  793         /* Reset the internal variables */
  794         aiocbp._errno = 0;
  795         aiocbp._retval = -1;
  796         aiocbp._state = JOB_NONE;
  797         error = copyout(&aiocbp, SCARG(uap, aiocbp), sizeof(struct aiocb));
  798 
  799         return error;
  800 }
  801 
  802 int
  803 sys___aio_suspend50(struct lwp *l, const struct sys___aio_suspend50_args *uap,
  804     register_t *retval)
  805 {
  806         /* {
  807                 syscallarg(const struct aiocb *const[]) list;
  808                 syscallarg(int) nent;
  809                 syscallarg(const struct timespec *) timeout;
  810         } */
  811         struct aiocb **list;
  812         struct timespec ts;
  813         int error, nent;
  814 
  815         nent = SCARG(uap, nent);
  816         if (nent <= 0 || nent > aio_listio_max)
  817                 return EAGAIN;
  818 
  819         if (SCARG(uap, timeout)) {
  820                 /* Convert timespec to ticks */
  821                 error = copyin(SCARG(uap, timeout), &ts,
  822                     sizeof(struct timespec));
  823                 if (error)
  824                         return error;
  825         }
  826 
  827         list = kmem_alloc(nent * sizeof(*list), KM_SLEEP);
  828         error = copyin(SCARG(uap, list), list, nent * sizeof(*list));
  829         if (error)
  830                 goto out;
  831         error = aio_suspend1(l, list, nent, SCARG(uap, timeout) ? &ts : NULL);
  832 out:
  833         kmem_free(list, nent * sizeof(*list));
  834         return error;
  835 }
  836 
  837 int
  838 aio_suspend1(struct lwp *l, struct aiocb **aiocbp_list, int nent,
  839     struct timespec *ts)
  840 {
  841         struct proc *p = l->l_proc;
  842         struct aioproc *aio;
  843         struct aio_job *a_job;
  844         int i, error, timo;
  845 
  846         if (p->p_aio == NULL)
  847                 return EAGAIN;
  848         aio = p->p_aio;
  849 
  850         if (ts) {
  851                 timo = mstohz((ts->tv_sec * 1000) + (ts->tv_nsec / 1000000));
  852                 if (timo == 0 && ts->tv_sec == 0 && ts->tv_nsec > 0)
  853                         timo = 1;
  854                 if (timo <= 0)
  855                         return EAGAIN;
  856         } else
  857                 timo = 0;
  858 
  859         mutex_enter(&aio->aio_mtx);
  860         for (;;) {
  861                 for (i = 0; i < nent; i++) {
  862 
  863                         /* Skip NULL entries */
  864                         if (aiocbp_list[i] == NULL)
  865                                 continue;
  866 
  867                         /* Skip current job */
  868                         if (aio->curjob) {
  869                                 a_job = aio->curjob;
  870                                 if (a_job->aiocb_uptr == aiocbp_list[i])
  871                                         continue;
  872                         }
  873 
  874                         /* Look for a job in the queue */
  875                         TAILQ_FOREACH(a_job, &aio->jobs_queue, list)
  876                                 if (a_job->aiocb_uptr == aiocbp_list[i])
  877                                         break;
  878 
  879                         if (a_job == NULL) {
  880                                 struct aiocb aiocbp;
  881 
  882                                 mutex_exit(&aio->aio_mtx);
  883 
  884                                 /* Check if the job is done. */
  885                                 error = copyin(aiocbp_list[i], &aiocbp,
  886                                     sizeof(struct aiocb));
  887                                 if (error == 0 && aiocbp._state != JOB_DONE) {
  888                                         mutex_enter(&aio->aio_mtx);
  889                                         continue;
  890                                 }
  891                                 return error;
  892                         }
  893                 }
  894 
  895                 /* Wait for a signal or when timeout occurs */
  896                 error = cv_timedwait_sig(&aio->done_cv, &aio->aio_mtx, timo);
  897                 if (error) {
  898                         if (error == EWOULDBLOCK)
  899                                 error = EAGAIN;
  900                         break;
  901                 }
  902         }
  903         mutex_exit(&aio->aio_mtx);
  904         return error;
  905 }
  906 
  907 int
  908 sys_aio_write(struct lwp *l, const struct sys_aio_write_args *uap,
  909     register_t *retval)
  910 {
  911         /* {
  912                 syscallarg(struct aiocb *) aiocbp;
  913         } */
  914 
  915         return aio_enqueue_job(AIO_WRITE, SCARG(uap, aiocbp), NULL);
  916 }
  917 
  918 int
  919 sys_lio_listio(struct lwp *l, const struct sys_lio_listio_args *uap,
  920     register_t *retval)
  921 {
  922         /* {
  923                 syscallarg(int) mode;
  924                 syscallarg(struct aiocb *const[]) list;
  925                 syscallarg(int) nent;
  926                 syscallarg(struct sigevent *) sig;
  927         } */
  928         struct proc *p = l->l_proc;
  929         struct aioproc *aio;
  930         struct aiocb **aiocbp_list;
  931         struct lio_req *lio;
  932         int i, error, errcnt, mode, nent;
  933 
  934         mode = SCARG(uap, mode);
  935         nent = SCARG(uap, nent);
  936 
  937         /* Non-accurate checks for the limit and invalid values */
  938         if (nent < 1 || nent > aio_listio_max)
  939                 return EINVAL;
  940         if (aio_jobs_count + nent > aio_max)
  941                 return EAGAIN;
  942 
  943         /* Check if AIO structure is initialized, if not - initialize it */
  944         if (p->p_aio == NULL)
  945                 if (aio_procinit(p))
  946                         return EAGAIN;
  947         aio = p->p_aio;
  948 
  949         /* Create a LIO structure */
  950         lio = pool_get(&aio_lio_pool, PR_WAITOK);
  951         lio->refcnt = 1;
  952         error = 0;
  953 
  954         switch (mode) {
  955         case LIO_WAIT:
  956                 memset(&lio->sig, 0, sizeof(struct sigevent));
  957                 break;
  958         case LIO_NOWAIT:
  959                 /* Check for signal, validate it */
  960                 if (SCARG(uap, sig)) {
  961                         struct sigevent *sig = &lio->sig;
  962 
  963                         error = copyin(SCARG(uap, sig), &lio->sig,
  964                             sizeof(struct sigevent));
  965                         if (error == 0 &&
  966                             (sig->sigev_signo < 0 ||
  967                             sig->sigev_signo >= NSIG ||
  968                             sig->sigev_notify < SIGEV_NONE ||
  969                             sig->sigev_notify > SIGEV_SA))
  970                                 error = EINVAL;
  971                 } else
  972                         memset(&lio->sig, 0, sizeof(struct sigevent));
  973                 break;
  974         default:
  975                 error = EINVAL;
  976                 break;
  977         }
  978 
  979         if (error != 0) {
  980                 pool_put(&aio_lio_pool, lio);
  981                 return error;
  982         }
  983 
  984         /* Get the list from user-space */
  985         aiocbp_list = kmem_alloc(nent * sizeof(*aiocbp_list), KM_SLEEP);
  986         error = copyin(SCARG(uap, list), aiocbp_list,
  987             nent * sizeof(*aiocbp_list));
  988         if (error) {
  989                 mutex_enter(&aio->aio_mtx);
  990                 goto err;
  991         }
  992 
  993         /* Enqueue all jobs */
  994         errcnt = 0;
  995         for (i = 0; i < nent; i++) {
  996                 error = aio_enqueue_job(AIO_LIO, aiocbp_list[i], lio);
  997                 /*
  998                  * According to POSIX, in such error case it may
  999                  * fail with other I/O operations initiated.
 1000                  */
 1001                 if (error)
 1002                         errcnt++;
 1003         }
 1004 
 1005         mutex_enter(&aio->aio_mtx);
 1006 
 1007         /* Return an error, if any */
 1008         if (errcnt) {
 1009                 error = EIO;
 1010                 goto err;
 1011         }
 1012 
 1013         if (mode == LIO_WAIT) {
 1014                 /*
 1015                  * Wait for AIO completion.  In such case,
 1016                  * the LIO structure will be freed here.
 1017                  */
 1018                 while (lio->refcnt > 1 && error == 0)
 1019                         error = cv_wait_sig(&aio->done_cv, &aio->aio_mtx);
 1020                 if (error)
 1021                         error = EINTR;
 1022         }
 1023 
 1024 err:
 1025         if (--lio->refcnt != 0)
 1026                 lio = NULL;
 1027         mutex_exit(&aio->aio_mtx);
 1028         if (lio != NULL) {
 1029                 aio_sendsig(p, &lio->sig);
 1030                 pool_put(&aio_lio_pool, lio);
 1031         }
 1032         kmem_free(aiocbp_list, nent * sizeof(*aiocbp_list));
 1033         return error;
 1034 }
 1035 
 1036 /*
 1037  * SysCtl
 1038  */
 1039 
 1040 static int
 1041 sysctl_aio_listio_max(SYSCTLFN_ARGS)
 1042 {
 1043         struct sysctlnode node;
 1044         int error, newsize;
 1045 
 1046         node = *rnode;
 1047         node.sysctl_data = &newsize;
 1048 
 1049         newsize = aio_listio_max;
 1050         error = sysctl_lookup(SYSCTLFN_CALL(&node));
 1051         if (error || newp == NULL)
 1052                 return error;
 1053 
 1054         if (newsize < 1 || newsize > aio_max)
 1055                 return EINVAL;
 1056         aio_listio_max = newsize;
 1057 
 1058         return 0;
 1059 }
 1060 
 1061 static int
 1062 sysctl_aio_max(SYSCTLFN_ARGS)
 1063 {
 1064         struct sysctlnode node;
 1065         int error, newsize;
 1066 
 1067         node = *rnode;
 1068         node.sysctl_data = &newsize;
 1069 
 1070         newsize = aio_max;
 1071         error = sysctl_lookup(SYSCTLFN_CALL(&node));
 1072         if (error || newp == NULL)
 1073                 return error;
 1074 
 1075         if (newsize < 1 || newsize < aio_listio_max)
 1076                 return EINVAL;
 1077         aio_max = newsize;
 1078 
 1079         return 0;
 1080 }
 1081 
 1082 SYSCTL_SETUP(sysctl_aio_init, "aio sysctl")
 1083 {
 1084         int rv;
 1085 
 1086         rv = sysctl_createv(clog, 0, NULL, NULL,
 1087                 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
 1088                 CTLTYPE_INT, "posix_aio",
 1089                 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
 1090                              "Asynchronous I/O option to which the "
 1091                              "system attempts to conform"),
 1092                 NULL, _POSIX_ASYNCHRONOUS_IO, NULL, 0,
 1093                 CTL_KERN, CTL_CREATE, CTL_EOL);
 1094 
 1095         if (rv != 0)
 1096                 return;
 1097 
 1098         rv = sysctl_createv(clog, 0, NULL, NULL,
 1099                 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
 1100                 CTLTYPE_INT, "aio_listio_max",
 1101                 SYSCTL_DESCR("Maximum number of asynchronous I/O "
 1102                              "operations in a single list I/O call"),
 1103                 sysctl_aio_listio_max, 0, &aio_listio_max, 0,
 1104                 CTL_KERN, CTL_CREATE, CTL_EOL);
 1105 
 1106         if (rv != 0)
 1107                 return;
 1108 
 1109         rv = sysctl_createv(clog, 0, NULL, NULL,
 1110                 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
 1111                 CTLTYPE_INT, "aio_max",
 1112                 SYSCTL_DESCR("Maximum number of asynchronous I/O "
 1113                              "operations"),
 1114                 sysctl_aio_max, 0, &aio_max, 0,
 1115                 CTL_KERN, CTL_CREATE, CTL_EOL);
 1116 
 1117         return;
 1118 }
 1119 
 1120 /*
 1121  * Debugging
 1122  */
 1123 #if defined(DDB)
 1124 void
 1125 aio_print_jobs(void (*pr)(const char *, ...))
 1126 {
 1127         struct proc *p = curlwp->l_proc;
 1128         struct aioproc *aio;
 1129         struct aio_job *a_job;
 1130         struct aiocb *aiocbp;
 1131 
 1132         if (p == NULL) {
 1133                 (*pr)("AIO: We are not in the processes right now.\n");
 1134                 return;
 1135         }
 1136 
 1137         aio = p->p_aio;
 1138         if (aio == NULL) {
 1139                 (*pr)("AIO data is not initialized (PID = %d).\n", p->p_pid);
 1140                 return;
 1141         }
 1142 
 1143         (*pr)("AIO: PID = %d\n", p->p_pid);
 1144         (*pr)("AIO: Global count of the jobs = %u\n", aio_jobs_count);
 1145         (*pr)("AIO: Count of the jobs = %u\n", aio->jobs_count);
 1146 
 1147         if (aio->curjob) {
 1148                 a_job = aio->curjob;
 1149                 (*pr)("\nAIO current job:\n");
 1150                 (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n",
 1151                     a_job->aio_op, a_job->aiocbp._errno,
 1152                     a_job->aiocbp._state, a_job->aiocb_uptr);
 1153                 aiocbp = &a_job->aiocbp;
 1154                 (*pr)("   fd = %d, offset = %u, buf = %p, nbytes = %u\n",
 1155                     aiocbp->aio_fildes, aiocbp->aio_offset,
 1156                     aiocbp->aio_buf, aiocbp->aio_nbytes);
 1157         }
 1158 
 1159         (*pr)("\nAIO queue:\n");
 1160         TAILQ_FOREACH(a_job, &aio->jobs_queue, list) {
 1161                 (*pr)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n",
 1162                     a_job->aio_op, a_job->aiocbp._errno,
 1163                     a_job->aiocbp._state, a_job->aiocb_uptr);
 1164                 aiocbp = &a_job->aiocbp;
 1165                 (*pr)("   fd = %d, offset = %u, buf = %p, nbytes = %u\n",
 1166                     aiocbp->aio_fildes, aiocbp->aio_offset,
 1167                     aiocbp->aio_buf, aiocbp->aio_nbytes);
 1168         }
 1169 }
 1170 #endif /* defined(DDB) */

Cache object: 1ab96b89be44b534eb94142e5f71b09b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.