The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_aio.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1997 John S. Dyson.  All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without
    5  * modification, are permitted provided that the following conditions
    6  * are met:
    7  * 1. Redistributions of source code must retain the above copyright
    8  *    notice, this list of conditions and the following disclaimer.
    9  * 2. John S. Dyson's name may not be used to endorse or promote products
   10  *    derived from this software without specific prior written permission.
   11  *
   12  * DISCLAIMER:  This code isn't warranted to do anything useful.  Anything
   13  * bad that happens because of using this software isn't the responsibility
   14  * of the author.  This software is distributed AS-IS.
   15  */
   16 
   17 /*
   18  * This file contains support for the POSIX 1003.1B AIO/LIO facility.
   19  */
   20 
   21 #include <sys/cdefs.h>
   22 __FBSDID("$FreeBSD: releng/11.0/sys/kern/vfs_aio.c 303787 2016-08-05 22:23:04Z jhb $");
   23 
   24 #include "opt_compat.h"
   25 
   26 #include <sys/param.h>
   27 #include <sys/systm.h>
   28 #include <sys/malloc.h>
   29 #include <sys/bio.h>
   30 #include <sys/buf.h>
   31 #include <sys/capsicum.h>
   32 #include <sys/eventhandler.h>
   33 #include <sys/sysproto.h>
   34 #include <sys/filedesc.h>
   35 #include <sys/kernel.h>
   36 #include <sys/module.h>
   37 #include <sys/kthread.h>
   38 #include <sys/fcntl.h>
   39 #include <sys/file.h>
   40 #include <sys/limits.h>
   41 #include <sys/lock.h>
   42 #include <sys/mutex.h>
   43 #include <sys/unistd.h>
   44 #include <sys/posix4.h>
   45 #include <sys/proc.h>
   46 #include <sys/resourcevar.h>
   47 #include <sys/signalvar.h>
   48 #include <sys/protosw.h>
   49 #include <sys/rwlock.h>
   50 #include <sys/sema.h>
   51 #include <sys/socket.h>
   52 #include <sys/socketvar.h>
   53 #include <sys/syscall.h>
   54 #include <sys/sysent.h>
   55 #include <sys/sysctl.h>
   56 #include <sys/syslog.h>
   57 #include <sys/sx.h>
   58 #include <sys/taskqueue.h>
   59 #include <sys/vnode.h>
   60 #include <sys/conf.h>
   61 #include <sys/event.h>
   62 #include <sys/mount.h>
   63 #include <geom/geom.h>
   64 
   65 #include <machine/atomic.h>
   66 
   67 #include <vm/vm.h>
   68 #include <vm/vm_page.h>
   69 #include <vm/vm_extern.h>
   70 #include <vm/pmap.h>
   71 #include <vm/vm_map.h>
   72 #include <vm/vm_object.h>
   73 #include <vm/uma.h>
   74 #include <sys/aio.h>
   75 
   76 /*
   77  * Counter for allocating reference ids to new jobs.  Wrapped to 1 on
   78  * overflow. (XXX will be removed soon.)
   79  */
   80 static u_long jobrefid;
   81 
   82 /*
   83  * Counter for aio_fsync.
   84  */
   85 static uint64_t jobseqno;
   86 
   87 #ifndef MAX_AIO_PER_PROC
   88 #define MAX_AIO_PER_PROC        32
   89 #endif
   90 
   91 #ifndef MAX_AIO_QUEUE_PER_PROC
   92 #define MAX_AIO_QUEUE_PER_PROC  256 /* Bigger than AIO_LISTIO_MAX */
   93 #endif
   94 
   95 #ifndef MAX_AIO_QUEUE
   96 #define MAX_AIO_QUEUE           1024 /* Bigger than AIO_LISTIO_MAX */
   97 #endif
   98 
   99 #ifndef MAX_BUF_AIO
  100 #define MAX_BUF_AIO             16
  101 #endif
  102 
  103 FEATURE(aio, "Asynchronous I/O");
  104 
  105 static MALLOC_DEFINE(M_LIO, "lio", "listio aio control block list");
  106 
  107 static SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0,
  108     "Async IO management");
  109 
  110 static int enable_aio_unsafe = 0;
  111 SYSCTL_INT(_vfs_aio, OID_AUTO, enable_unsafe, CTLFLAG_RW, &enable_aio_unsafe, 0,
  112     "Permit asynchronous IO on all file types, not just known-safe types");
  113 
  114 static unsigned int unsafe_warningcnt = 1;
  115 SYSCTL_UINT(_vfs_aio, OID_AUTO, unsafe_warningcnt, CTLFLAG_RW,
  116     &unsafe_warningcnt, 0,
  117     "Warnings that will be triggered upon failed IO requests on unsafe files");
  118 
  119 static int max_aio_procs = MAX_AIO_PROCS;
  120 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs, CTLFLAG_RW, &max_aio_procs, 0,
  121     "Maximum number of kernel processes to use for handling async IO ");
  122 
  123 static int num_aio_procs = 0;
  124 SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs, CTLFLAG_RD, &num_aio_procs, 0,
  125     "Number of presently active kernel processes for async IO");
  126 
  127 /*
  128  * The code will adjust the actual number of AIO processes towards this
  129  * number when it gets a chance.
  130  */
  131 static int target_aio_procs = TARGET_AIO_PROCS;
  132 SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, CTLFLAG_RW, &target_aio_procs,
  133     0,
  134     "Preferred number of ready kernel processes for async IO");
  135 
  136 static int max_queue_count = MAX_AIO_QUEUE;
  137 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, CTLFLAG_RW, &max_queue_count, 0,
  138     "Maximum number of aio requests to queue, globally");
  139 
  140 static int num_queue_count = 0;
  141 SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count, CTLFLAG_RD, &num_queue_count, 0,
  142     "Number of queued aio requests");
  143 
  144 static int num_buf_aio = 0;
  145 SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio, CTLFLAG_RD, &num_buf_aio, 0,
  146     "Number of aio requests presently handled by the buf subsystem");
  147 
  148 /* Number of async I/O processes in the process of being started */
  149 /* XXX This should be local to aio_aqueue() */
  150 static int num_aio_resv_start = 0;
  151 
  152 static int aiod_lifetime;
  153 SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_lifetime, CTLFLAG_RW, &aiod_lifetime, 0,
  154     "Maximum lifetime for idle aiod");
  155 
  156 static int max_aio_per_proc = MAX_AIO_PER_PROC;
  157 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc, CTLFLAG_RW, &max_aio_per_proc,
  158     0,
  159     "Maximum active aio requests per process (stored in the process)");
  160 
  161 static int max_aio_queue_per_proc = MAX_AIO_QUEUE_PER_PROC;
  162 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc, CTLFLAG_RW,
  163     &max_aio_queue_per_proc, 0,
  164     "Maximum queued aio requests per process (stored in the process)");
  165 
  166 static int max_buf_aio = MAX_BUF_AIO;
  167 SYSCTL_INT(_vfs_aio, OID_AUTO, max_buf_aio, CTLFLAG_RW, &max_buf_aio, 0,
  168     "Maximum buf aio requests per process (stored in the process)");
  169 
  170 #ifdef COMPAT_FREEBSD6
  171 typedef struct oaiocb {
  172         int     aio_fildes;             /* File descriptor */
  173         off_t   aio_offset;             /* File offset for I/O */
  174         volatile void *aio_buf;         /* I/O buffer in process space */
  175         size_t  aio_nbytes;             /* Number of bytes for I/O */
  176         struct  osigevent aio_sigevent; /* Signal to deliver */
  177         int     aio_lio_opcode;         /* LIO opcode */
  178         int     aio_reqprio;            /* Request priority -- ignored */
  179         struct  __aiocb_private _aiocb_private;
  180 } oaiocb_t;
  181 #endif
  182 
  183 /*
  184  * Below is a key of locks used to protect each member of struct kaiocb
  185  * aioliojob and kaioinfo and any backends.
  186  *
  187  * * - need not protected
  188  * a - locked by kaioinfo lock
  189  * b - locked by backend lock, the backend lock can be null in some cases,
  190  *     for example, BIO belongs to this type, in this case, proc lock is
  191  *     reused.
  192  * c - locked by aio_job_mtx, the lock for the generic file I/O backend.
  193  */
  194 
  195 /*
  196  * If the routine that services an AIO request blocks while running in an
  197  * AIO kernel process it can starve other I/O requests.  BIO requests
  198  * queued via aio_qphysio() complete in GEOM and do not use AIO kernel
  199  * processes at all.  Socket I/O requests use a separate pool of
  200  * kprocs and also force non-blocking I/O.  Other file I/O requests
  201  * use the generic fo_read/fo_write operations which can block.  The
  202  * fsync and mlock operations can also block while executing.  Ideally
  203  * none of these requests would block while executing.
  204  *
  205  * Note that the service routines cannot toggle O_NONBLOCK in the file
  206  * structure directly while handling a request due to races with
  207  * userland threads.
  208  */
  209 
  210 /* jobflags */
  211 #define KAIOCB_QUEUEING         0x01
  212 #define KAIOCB_CANCELLED        0x02
  213 #define KAIOCB_CANCELLING       0x04
  214 #define KAIOCB_CHECKSYNC        0x08
  215 #define KAIOCB_CLEARED          0x10
  216 #define KAIOCB_FINISHED         0x20
  217 
  218 /*
  219  * AIO process info
  220  */
  221 #define AIOP_FREE       0x1                     /* proc on free queue */
  222 
  223 struct aioproc {
  224         int     aioprocflags;                   /* (c) AIO proc flags */
  225         TAILQ_ENTRY(aioproc) list;              /* (c) list of processes */
  226         struct  proc *aioproc;                  /* (*) the AIO proc */
  227 };
  228 
  229 /*
  230  * data-structure for lio signal management
  231  */
  232 struct aioliojob {
  233         int     lioj_flags;                     /* (a) listio flags */
  234         int     lioj_count;                     /* (a) listio flags */
  235         int     lioj_finished_count;            /* (a) listio flags */
  236         struct  sigevent lioj_signal;           /* (a) signal on all I/O done */
  237         TAILQ_ENTRY(aioliojob) lioj_list;       /* (a) lio list */
  238         struct  knlist klist;                   /* (a) list of knotes */
  239         ksiginfo_t lioj_ksi;                    /* (a) Realtime signal info */
  240 };
  241 
  242 #define LIOJ_SIGNAL             0x1     /* signal on all done (lio) */
  243 #define LIOJ_SIGNAL_POSTED      0x2     /* signal has been posted */
  244 #define LIOJ_KEVENT_POSTED      0x4     /* kevent triggered */
  245 
  246 /*
  247  * per process aio data structure
  248  */
  249 struct kaioinfo {
  250         struct  mtx kaio_mtx;           /* the lock to protect this struct */
  251         int     kaio_flags;             /* (a) per process kaio flags */
  252         int     kaio_maxactive_count;   /* (*) maximum number of AIOs */
  253         int     kaio_active_count;      /* (c) number of currently used AIOs */
  254         int     kaio_qallowed_count;    /* (*) maxiumu size of AIO queue */
  255         int     kaio_count;             /* (a) size of AIO queue */
  256         int     kaio_ballowed_count;    /* (*) maximum number of buffers */
  257         int     kaio_buffer_count;      /* (a) number of physio buffers */
  258         TAILQ_HEAD(,kaiocb) kaio_all;   /* (a) all AIOs in a process */
  259         TAILQ_HEAD(,kaiocb) kaio_done;  /* (a) done queue for process */
  260         TAILQ_HEAD(,aioliojob) kaio_liojoblist; /* (a) list of lio jobs */
  261         TAILQ_HEAD(,kaiocb) kaio_jobqueue;      /* (a) job queue for process */
  262         TAILQ_HEAD(,kaiocb) kaio_syncqueue;     /* (a) queue for aio_fsync */
  263         TAILQ_HEAD(,kaiocb) kaio_syncready;  /* (a) second q for aio_fsync */
  264         struct  task kaio_task;         /* (*) task to kick aio processes */
  265         struct  task kaio_sync_task;    /* (*) task to schedule fsync jobs */
  266 };
  267 
  268 #define AIO_LOCK(ki)            mtx_lock(&(ki)->kaio_mtx)
  269 #define AIO_UNLOCK(ki)          mtx_unlock(&(ki)->kaio_mtx)
  270 #define AIO_LOCK_ASSERT(ki, f)  mtx_assert(&(ki)->kaio_mtx, (f))
  271 #define AIO_MTX(ki)             (&(ki)->kaio_mtx)
  272 
  273 #define KAIO_RUNDOWN    0x1     /* process is being run down */
  274 #define KAIO_WAKEUP     0x2     /* wakeup process when AIO completes */
  275 
  276 /*
  277  * Operations used to interact with userland aio control blocks.
  278  * Different ABIs provide their own operations.
  279  */
  280 struct aiocb_ops {
  281         int     (*copyin)(struct aiocb *ujob, struct aiocb *kjob);
  282         long    (*fetch_status)(struct aiocb *ujob);
  283         long    (*fetch_error)(struct aiocb *ujob);
  284         int     (*store_status)(struct aiocb *ujob, long status);
  285         int     (*store_error)(struct aiocb *ujob, long error);
  286         int     (*store_kernelinfo)(struct aiocb *ujob, long jobref);
  287         int     (*store_aiocb)(struct aiocb **ujobp, struct aiocb *ujob);
  288 };
  289 
  290 static TAILQ_HEAD(,aioproc) aio_freeproc;               /* (c) Idle daemons */
  291 static struct sema aio_newproc_sem;
  292 static struct mtx aio_job_mtx;
  293 static TAILQ_HEAD(,kaiocb) aio_jobs;                    /* (c) Async job list */
  294 static struct unrhdr *aiod_unr;
  295 
  296 void            aio_init_aioinfo(struct proc *p);
  297 static int      aio_onceonly(void);
  298 static int      aio_free_entry(struct kaiocb *job);
  299 static void     aio_process_rw(struct kaiocb *job);
  300 static void     aio_process_sync(struct kaiocb *job);
  301 static void     aio_process_mlock(struct kaiocb *job);
  302 static void     aio_schedule_fsync(void *context, int pending);
  303 static int      aio_newproc(int *);
  304 int             aio_aqueue(struct thread *td, struct aiocb *ujob,
  305                     struct aioliojob *lio, int type, struct aiocb_ops *ops);
  306 static int      aio_queue_file(struct file *fp, struct kaiocb *job);
  307 static void     aio_physwakeup(struct bio *bp);
  308 static void     aio_proc_rundown(void *arg, struct proc *p);
  309 static void     aio_proc_rundown_exec(void *arg, struct proc *p,
  310                     struct image_params *imgp);
  311 static int      aio_qphysio(struct proc *p, struct kaiocb *job);
  312 static void     aio_daemon(void *param);
  313 static void     aio_bio_done_notify(struct proc *userp, struct kaiocb *job);
  314 static bool     aio_clear_cancel_function_locked(struct kaiocb *job);
  315 static int      aio_kick(struct proc *userp);
  316 static void     aio_kick_nowait(struct proc *userp);
  317 static void     aio_kick_helper(void *context, int pending);
  318 static int      filt_aioattach(struct knote *kn);
  319 static void     filt_aiodetach(struct knote *kn);
  320 static int      filt_aio(struct knote *kn, long hint);
  321 static int      filt_lioattach(struct knote *kn);
  322 static void     filt_liodetach(struct knote *kn);
  323 static int      filt_lio(struct knote *kn, long hint);
  324 
  325 /*
  326  * Zones for:
  327  *      kaio    Per process async io info
  328  *      aiop    async io process data
  329  *      aiocb   async io jobs
  330  *      aiol    list io job pointer - internal to aio_suspend XXX
  331  *      aiolio  list io jobs
  332  */
  333 static uma_zone_t kaio_zone, aiop_zone, aiocb_zone, aiol_zone, aiolio_zone;
  334 
  335 /* kqueue filters for aio */
  336 static struct filterops aio_filtops = {
  337         .f_isfd = 0,
  338         .f_attach = filt_aioattach,
  339         .f_detach = filt_aiodetach,
  340         .f_event = filt_aio,
  341 };
  342 static struct filterops lio_filtops = {
  343         .f_isfd = 0,
  344         .f_attach = filt_lioattach,
  345         .f_detach = filt_liodetach,
  346         .f_event = filt_lio
  347 };
  348 
  349 static eventhandler_tag exit_tag, exec_tag;
  350 
  351 TASKQUEUE_DEFINE_THREAD(aiod_kick);
  352 
  353 /*
  354  * Main operations function for use as a kernel module.
  355  */
  356 static int
  357 aio_modload(struct module *module, int cmd, void *arg)
  358 {
  359         int error = 0;
  360 
  361         switch (cmd) {
  362         case MOD_LOAD:
  363                 aio_onceonly();
  364                 break;
  365         case MOD_SHUTDOWN:
  366                 break;
  367         default:
  368                 error = EOPNOTSUPP;
  369                 break;
  370         }
  371         return (error);
  372 }
  373 
  374 static moduledata_t aio_mod = {
  375         "aio",
  376         &aio_modload,
  377         NULL
  378 };
  379 
  380 DECLARE_MODULE(aio, aio_mod, SI_SUB_VFS, SI_ORDER_ANY);
  381 MODULE_VERSION(aio, 1);
  382 
  383 /*
  384  * Startup initialization
  385  */
  386 static int
  387 aio_onceonly(void)
  388 {
  389 
  390         exit_tag = EVENTHANDLER_REGISTER(process_exit, aio_proc_rundown, NULL,
  391             EVENTHANDLER_PRI_ANY);
  392         exec_tag = EVENTHANDLER_REGISTER(process_exec, aio_proc_rundown_exec,
  393             NULL, EVENTHANDLER_PRI_ANY);
  394         kqueue_add_filteropts(EVFILT_AIO, &aio_filtops);
  395         kqueue_add_filteropts(EVFILT_LIO, &lio_filtops);
  396         TAILQ_INIT(&aio_freeproc);
  397         sema_init(&aio_newproc_sem, 0, "aio_new_proc");
  398         mtx_init(&aio_job_mtx, "aio_job", NULL, MTX_DEF);
  399         TAILQ_INIT(&aio_jobs);
  400         aiod_unr = new_unrhdr(1, INT_MAX, NULL);
  401         kaio_zone = uma_zcreate("AIO", sizeof(struct kaioinfo), NULL, NULL,
  402             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
  403         aiop_zone = uma_zcreate("AIOP", sizeof(struct aioproc), NULL,
  404             NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
  405         aiocb_zone = uma_zcreate("AIOCB", sizeof(struct kaiocb), NULL, NULL,
  406             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
  407         aiol_zone = uma_zcreate("AIOL", AIO_LISTIO_MAX*sizeof(intptr_t) , NULL,
  408             NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
  409         aiolio_zone = uma_zcreate("AIOLIO", sizeof(struct aioliojob), NULL,
  410             NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
  411         aiod_lifetime = AIOD_LIFETIME_DEFAULT;
  412         jobrefid = 1;
  413         p31b_setcfg(CTL_P1003_1B_ASYNCHRONOUS_IO, _POSIX_ASYNCHRONOUS_IO);
  414         p31b_setcfg(CTL_P1003_1B_AIO_LISTIO_MAX, AIO_LISTIO_MAX);
  415         p31b_setcfg(CTL_P1003_1B_AIO_MAX, MAX_AIO_QUEUE);
  416         p31b_setcfg(CTL_P1003_1B_AIO_PRIO_DELTA_MAX, 0);
  417 
  418         return (0);
  419 }
  420 
  421 /*
  422  * Init the per-process aioinfo structure.  The aioinfo limits are set
  423  * per-process for user limit (resource) management.
  424  */
  425 void
  426 aio_init_aioinfo(struct proc *p)
  427 {
  428         struct kaioinfo *ki;
  429 
  430         ki = uma_zalloc(kaio_zone, M_WAITOK);
  431         mtx_init(&ki->kaio_mtx, "aiomtx", NULL, MTX_DEF | MTX_NEW);
  432         ki->kaio_flags = 0;
  433         ki->kaio_maxactive_count = max_aio_per_proc;
  434         ki->kaio_active_count = 0;
  435         ki->kaio_qallowed_count = max_aio_queue_per_proc;
  436         ki->kaio_count = 0;
  437         ki->kaio_ballowed_count = max_buf_aio;
  438         ki->kaio_buffer_count = 0;
  439         TAILQ_INIT(&ki->kaio_all);
  440         TAILQ_INIT(&ki->kaio_done);
  441         TAILQ_INIT(&ki->kaio_jobqueue);
  442         TAILQ_INIT(&ki->kaio_liojoblist);
  443         TAILQ_INIT(&ki->kaio_syncqueue);
  444         TAILQ_INIT(&ki->kaio_syncready);
  445         TASK_INIT(&ki->kaio_task, 0, aio_kick_helper, p);
  446         TASK_INIT(&ki->kaio_sync_task, 0, aio_schedule_fsync, ki);
  447         PROC_LOCK(p);
  448         if (p->p_aioinfo == NULL) {
  449                 p->p_aioinfo = ki;
  450                 PROC_UNLOCK(p);
  451         } else {
  452                 PROC_UNLOCK(p);
  453                 mtx_destroy(&ki->kaio_mtx);
  454                 uma_zfree(kaio_zone, ki);
  455         }
  456 
  457         while (num_aio_procs < MIN(target_aio_procs, max_aio_procs))
  458                 aio_newproc(NULL);
  459 }
  460 
  461 static int
  462 aio_sendsig(struct proc *p, struct sigevent *sigev, ksiginfo_t *ksi)
  463 {
  464         struct thread *td;
  465         int error;
  466 
  467         error = sigev_findtd(p, sigev, &td);
  468         if (error)
  469                 return (error);
  470         if (!KSI_ONQ(ksi)) {
  471                 ksiginfo_set_sigev(ksi, sigev);
  472                 ksi->ksi_code = SI_ASYNCIO;
  473                 ksi->ksi_flags |= KSI_EXT | KSI_INS;
  474                 tdsendsignal(p, td, ksi->ksi_signo, ksi);
  475         }
  476         PROC_UNLOCK(p);
  477         return (error);
  478 }
  479 
  480 /*
  481  * Free a job entry.  Wait for completion if it is currently active, but don't
  482  * delay forever.  If we delay, we return a flag that says that we have to
  483  * restart the queue scan.
  484  */
  485 static int
  486 aio_free_entry(struct kaiocb *job)
  487 {
  488         struct kaioinfo *ki;
  489         struct aioliojob *lj;
  490         struct proc *p;
  491 
  492         p = job->userproc;
  493         MPASS(curproc == p);
  494         ki = p->p_aioinfo;
  495         MPASS(ki != NULL);
  496 
  497         AIO_LOCK_ASSERT(ki, MA_OWNED);
  498         MPASS(job->jobflags & KAIOCB_FINISHED);
  499 
  500         atomic_subtract_int(&num_queue_count, 1);
  501 
  502         ki->kaio_count--;
  503         MPASS(ki->kaio_count >= 0);
  504 
  505         TAILQ_REMOVE(&ki->kaio_done, job, plist);
  506         TAILQ_REMOVE(&ki->kaio_all, job, allist);
  507 
  508         lj = job->lio;
  509         if (lj) {
  510                 lj->lioj_count--;
  511                 lj->lioj_finished_count--;
  512 
  513                 if (lj->lioj_count == 0) {
  514                         TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list);
  515                         /* lio is going away, we need to destroy any knotes */
  516                         knlist_delete(&lj->klist, curthread, 1);
  517                         PROC_LOCK(p);
  518                         sigqueue_take(&lj->lioj_ksi);
  519                         PROC_UNLOCK(p);
  520                         uma_zfree(aiolio_zone, lj);
  521                 }
  522         }
  523 
  524         /* job is going away, we need to destroy any knotes */
  525         knlist_delete(&job->klist, curthread, 1);
  526         PROC_LOCK(p);
  527         sigqueue_take(&job->ksi);
  528         PROC_UNLOCK(p);
  529 
  530         AIO_UNLOCK(ki);
  531 
  532         /*
  533          * The thread argument here is used to find the owning process
  534          * and is also passed to fo_close() which may pass it to various
  535          * places such as devsw close() routines.  Because of that, we
  536          * need a thread pointer from the process owning the job that is
  537          * persistent and won't disappear out from under us or move to
  538          * another process.
  539          *
  540          * Currently, all the callers of this function call it to remove
  541          * a kaiocb from the current process' job list either via a
  542          * syscall or due to the current process calling exit() or
  543          * execve().  Thus, we know that p == curproc.  We also know that
  544          * curthread can't exit since we are curthread.
  545          *
  546          * Therefore, we use curthread as the thread to pass to
  547          * knlist_delete().  This does mean that it is possible for the
  548          * thread pointer at close time to differ from the thread pointer
  549          * at open time, but this is already true of file descriptors in
  550          * a multithreaded process.
  551          */
  552         if (job->fd_file)
  553                 fdrop(job->fd_file, curthread);
  554         crfree(job->cred);
  555         uma_zfree(aiocb_zone, job);
  556         AIO_LOCK(ki);
  557 
  558         return (0);
  559 }
  560 
  561 static void
  562 aio_proc_rundown_exec(void *arg, struct proc *p,
  563     struct image_params *imgp __unused)
  564 {
  565         aio_proc_rundown(arg, p);
  566 }
  567 
  568 static int
  569 aio_cancel_job(struct proc *p, struct kaioinfo *ki, struct kaiocb *job)
  570 {
  571         aio_cancel_fn_t *func;
  572         int cancelled;
  573 
  574         AIO_LOCK_ASSERT(ki, MA_OWNED);
  575         if (job->jobflags & (KAIOCB_CANCELLED | KAIOCB_FINISHED))
  576                 return (0);
  577         MPASS((job->jobflags & KAIOCB_CANCELLING) == 0);
  578         job->jobflags |= KAIOCB_CANCELLED;
  579 
  580         func = job->cancel_fn;
  581 
  582         /*
  583          * If there is no cancel routine, just leave the job marked as
  584          * cancelled.  The job should be in active use by a caller who
  585          * should complete it normally or when it fails to install a
  586          * cancel routine.
  587          */
  588         if (func == NULL)
  589                 return (0);
  590 
  591         /*
  592          * Set the CANCELLING flag so that aio_complete() will defer
  593          * completions of this job.  This prevents the job from being
  594          * freed out from under the cancel callback.  After the
  595          * callback any deferred completion (whether from the callback
  596          * or any other source) will be completed.
  597          */
  598         job->jobflags |= KAIOCB_CANCELLING;
  599         AIO_UNLOCK(ki);
  600         func(job);
  601         AIO_LOCK(ki);
  602         job->jobflags &= ~KAIOCB_CANCELLING;
  603         if (job->jobflags & KAIOCB_FINISHED) {
  604                 cancelled = job->uaiocb._aiocb_private.error == ECANCELED;
  605                 TAILQ_REMOVE(&ki->kaio_jobqueue, job, plist);
  606                 aio_bio_done_notify(p, job);
  607         } else {
  608                 /*
  609                  * The cancel callback might have scheduled an
  610                  * operation to cancel this request, but it is
  611                  * only counted as cancelled if the request is
  612                  * cancelled when the callback returns.
  613                  */
  614                 cancelled = 0;
  615         }
  616         return (cancelled);
  617 }
  618 
  619 /*
  620  * Rundown the jobs for a given process.
  621  */
  622 static void
  623 aio_proc_rundown(void *arg, struct proc *p)
  624 {
  625         struct kaioinfo *ki;
  626         struct aioliojob *lj;
  627         struct kaiocb *job, *jobn;
  628 
  629         KASSERT(curthread->td_proc == p,
  630             ("%s: called on non-curproc", __func__));
  631         ki = p->p_aioinfo;
  632         if (ki == NULL)
  633                 return;
  634 
  635         AIO_LOCK(ki);
  636         ki->kaio_flags |= KAIO_RUNDOWN;
  637 
  638 restart:
  639 
  640         /*
  641          * Try to cancel all pending requests. This code simulates
  642          * aio_cancel on all pending I/O requests.
  643          */
  644         TAILQ_FOREACH_SAFE(job, &ki->kaio_jobqueue, plist, jobn) {
  645                 aio_cancel_job(p, ki, job);
  646         }
  647 
  648         /* Wait for all running I/O to be finished */
  649         if (TAILQ_FIRST(&ki->kaio_jobqueue) || ki->kaio_active_count != 0) {
  650                 ki->kaio_flags |= KAIO_WAKEUP;
  651                 msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO, "aioprn", hz);
  652                 goto restart;
  653         }
  654 
  655         /* Free all completed I/O requests. */
  656         while ((job = TAILQ_FIRST(&ki->kaio_done)) != NULL)
  657                 aio_free_entry(job);
  658 
  659         while ((lj = TAILQ_FIRST(&ki->kaio_liojoblist)) != NULL) {
  660                 if (lj->lioj_count == 0) {
  661                         TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list);
  662                         knlist_delete(&lj->klist, curthread, 1);
  663                         PROC_LOCK(p);
  664                         sigqueue_take(&lj->lioj_ksi);
  665                         PROC_UNLOCK(p);
  666                         uma_zfree(aiolio_zone, lj);
  667                 } else {
  668                         panic("LIO job not cleaned up: C:%d, FC:%d\n",
  669                             lj->lioj_count, lj->lioj_finished_count);
  670                 }
  671         }
  672         AIO_UNLOCK(ki);
  673         taskqueue_drain(taskqueue_aiod_kick, &ki->kaio_task);
  674         taskqueue_drain(taskqueue_aiod_kick, &ki->kaio_sync_task);
  675         mtx_destroy(&ki->kaio_mtx);
  676         uma_zfree(kaio_zone, ki);
  677         p->p_aioinfo = NULL;
  678 }
  679 
  680 /*
  681  * Select a job to run (called by an AIO daemon).
  682  */
  683 static struct kaiocb *
  684 aio_selectjob(struct aioproc *aiop)
  685 {
  686         struct kaiocb *job;
  687         struct kaioinfo *ki;
  688         struct proc *userp;
  689 
  690         mtx_assert(&aio_job_mtx, MA_OWNED);
  691 restart:
  692         TAILQ_FOREACH(job, &aio_jobs, list) {
  693                 userp = job->userproc;
  694                 ki = userp->p_aioinfo;
  695 
  696                 if (ki->kaio_active_count < ki->kaio_maxactive_count) {
  697                         TAILQ_REMOVE(&aio_jobs, job, list);
  698                         if (!aio_clear_cancel_function(job))
  699                                 goto restart;
  700 
  701                         /* Account for currently active jobs. */
  702                         ki->kaio_active_count++;
  703                         break;
  704                 }
  705         }
  706         return (job);
  707 }
  708 
  709 /*
  710  * Move all data to a permanent storage device.  This code
  711  * simulates the fsync syscall.
  712  */
  713 static int
  714 aio_fsync_vnode(struct thread *td, struct vnode *vp)
  715 {
  716         struct mount *mp;
  717         int error;
  718 
  719         if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
  720                 goto drop;
  721         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  722         if (vp->v_object != NULL) {
  723                 VM_OBJECT_WLOCK(vp->v_object);
  724                 vm_object_page_clean(vp->v_object, 0, 0, 0);
  725                 VM_OBJECT_WUNLOCK(vp->v_object);
  726         }
  727         error = VOP_FSYNC(vp, MNT_WAIT, td);
  728 
  729         VOP_UNLOCK(vp, 0);
  730         vn_finished_write(mp);
  731 drop:
  732         return (error);
  733 }
  734 
  735 /*
  736  * The AIO processing activity for LIO_READ/LIO_WRITE.  This is the code that
  737  * does the I/O request for the non-physio version of the operations.  The
  738  * normal vn operations are used, and this code should work in all instances
  739  * for every type of file, including pipes, sockets, fifos, and regular files.
  740  *
  741  * XXX I don't think it works well for socket, pipe, and fifo.
  742  */
  743 static void
  744 aio_process_rw(struct kaiocb *job)
  745 {
  746         struct ucred *td_savedcred;
  747         struct thread *td;
  748         struct aiocb *cb;
  749         struct file *fp;
  750         struct uio auio;
  751         struct iovec aiov;
  752         ssize_t cnt;
  753         long msgsnd_st, msgsnd_end;
  754         long msgrcv_st, msgrcv_end;
  755         long oublock_st, oublock_end;
  756         long inblock_st, inblock_end;
  757         int error;
  758 
  759         KASSERT(job->uaiocb.aio_lio_opcode == LIO_READ ||
  760             job->uaiocb.aio_lio_opcode == LIO_WRITE,
  761             ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode));
  762 
  763         aio_switch_vmspace(job);
  764         td = curthread;
  765         td_savedcred = td->td_ucred;
  766         td->td_ucred = job->cred;
  767         cb = &job->uaiocb;
  768         fp = job->fd_file;
  769 
  770         aiov.iov_base = (void *)(uintptr_t)cb->aio_buf;
  771         aiov.iov_len = cb->aio_nbytes;
  772 
  773         auio.uio_iov = &aiov;
  774         auio.uio_iovcnt = 1;
  775         auio.uio_offset = cb->aio_offset;
  776         auio.uio_resid = cb->aio_nbytes;
  777         cnt = cb->aio_nbytes;
  778         auio.uio_segflg = UIO_USERSPACE;
  779         auio.uio_td = td;
  780 
  781         msgrcv_st = td->td_ru.ru_msgrcv;
  782         msgsnd_st = td->td_ru.ru_msgsnd;
  783         inblock_st = td->td_ru.ru_inblock;
  784         oublock_st = td->td_ru.ru_oublock;
  785 
  786         /*
  787          * aio_aqueue() acquires a reference to the file that is
  788          * released in aio_free_entry().
  789          */
  790         if (cb->aio_lio_opcode == LIO_READ) {
  791                 auio.uio_rw = UIO_READ;
  792                 if (auio.uio_resid == 0)
  793                         error = 0;
  794                 else
  795                         error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, td);
  796         } else {
  797                 if (fp->f_type == DTYPE_VNODE)
  798                         bwillwrite();
  799                 auio.uio_rw = UIO_WRITE;
  800                 error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, td);
  801         }
  802         msgrcv_end = td->td_ru.ru_msgrcv;
  803         msgsnd_end = td->td_ru.ru_msgsnd;
  804         inblock_end = td->td_ru.ru_inblock;
  805         oublock_end = td->td_ru.ru_oublock;
  806 
  807         job->msgrcv = msgrcv_end - msgrcv_st;
  808         job->msgsnd = msgsnd_end - msgsnd_st;
  809         job->inblock = inblock_end - inblock_st;
  810         job->outblock = oublock_end - oublock_st;
  811 
  812         if ((error) && (auio.uio_resid != cnt)) {
  813                 if (error == ERESTART || error == EINTR || error == EWOULDBLOCK)
  814                         error = 0;
  815                 if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) {
  816                         PROC_LOCK(job->userproc);
  817                         kern_psignal(job->userproc, SIGPIPE);
  818                         PROC_UNLOCK(job->userproc);
  819                 }
  820         }
  821 
  822         cnt -= auio.uio_resid;
  823         td->td_ucred = td_savedcred;
  824         if (error)
  825                 aio_complete(job, -1, error);
  826         else
  827                 aio_complete(job, cnt, 0);
  828 }
  829 
  830 static void
  831 aio_process_sync(struct kaiocb *job)
  832 {
  833         struct thread *td = curthread;
  834         struct ucred *td_savedcred = td->td_ucred;
  835         struct file *fp = job->fd_file;
  836         int error = 0;
  837 
  838         KASSERT(job->uaiocb.aio_lio_opcode == LIO_SYNC,
  839             ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode));
  840 
  841         td->td_ucred = job->cred;
  842         if (fp->f_vnode != NULL)
  843                 error = aio_fsync_vnode(td, fp->f_vnode);
  844         td->td_ucred = td_savedcred;
  845         if (error)
  846                 aio_complete(job, -1, error);
  847         else
  848                 aio_complete(job, 0, 0);
  849 }
  850 
  851 static void
  852 aio_process_mlock(struct kaiocb *job)
  853 {
  854         struct aiocb *cb = &job->uaiocb;
  855         int error;
  856 
  857         KASSERT(job->uaiocb.aio_lio_opcode == LIO_MLOCK,
  858             ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode));
  859 
  860         aio_switch_vmspace(job);
  861         error = vm_mlock(job->userproc, job->cred,
  862             __DEVOLATILE(void *, cb->aio_buf), cb->aio_nbytes);
  863         if (error)
  864                 aio_complete(job, -1, error);
  865         else
  866                 aio_complete(job, 0, 0);
  867 }
  868 
  869 static void
  870 aio_bio_done_notify(struct proc *userp, struct kaiocb *job)
  871 {
  872         struct aioliojob *lj;
  873         struct kaioinfo *ki;
  874         struct kaiocb *sjob, *sjobn;
  875         int lj_done;
  876         bool schedule_fsync;
  877 
  878         ki = userp->p_aioinfo;
  879         AIO_LOCK_ASSERT(ki, MA_OWNED);
  880         lj = job->lio;
  881         lj_done = 0;
  882         if (lj) {
  883                 lj->lioj_finished_count++;
  884                 if (lj->lioj_count == lj->lioj_finished_count)
  885                         lj_done = 1;
  886         }
  887         TAILQ_INSERT_TAIL(&ki->kaio_done, job, plist);
  888         MPASS(job->jobflags & KAIOCB_FINISHED);
  889 
  890         if (ki->kaio_flags & KAIO_RUNDOWN)
  891                 goto notification_done;
  892 
  893         if (job->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL ||
  894             job->uaiocb.aio_sigevent.sigev_notify == SIGEV_THREAD_ID)
  895                 aio_sendsig(userp, &job->uaiocb.aio_sigevent, &job->ksi);
  896 
  897         KNOTE_LOCKED(&job->klist, 1);
  898 
  899         if (lj_done) {
  900                 if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) {
  901                         lj->lioj_flags |= LIOJ_KEVENT_POSTED;
  902                         KNOTE_LOCKED(&lj->klist, 1);
  903                 }
  904                 if ((lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED))
  905                     == LIOJ_SIGNAL
  906                     && (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL ||
  907                         lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID)) {
  908                         aio_sendsig(userp, &lj->lioj_signal, &lj->lioj_ksi);
  909                         lj->lioj_flags |= LIOJ_SIGNAL_POSTED;
  910                 }
  911         }
  912 
  913 notification_done:
  914         if (job->jobflags & KAIOCB_CHECKSYNC) {
  915                 schedule_fsync = false;
  916                 TAILQ_FOREACH_SAFE(sjob, &ki->kaio_syncqueue, list, sjobn) {
  917                         if (job->fd_file != sjob->fd_file ||
  918                             job->seqno >= sjob->seqno)
  919                                 continue;
  920                         if (--sjob->pending > 0)
  921                                 continue;
  922                         TAILQ_REMOVE(&ki->kaio_syncqueue, sjob, list);
  923                         if (!aio_clear_cancel_function_locked(sjob))
  924                                 continue;
  925                         TAILQ_INSERT_TAIL(&ki->kaio_syncready, sjob, list);
  926                         schedule_fsync = true;
  927                 }
  928                 if (schedule_fsync)
  929                         taskqueue_enqueue(taskqueue_aiod_kick,
  930                             &ki->kaio_sync_task);
  931         }
  932         if (ki->kaio_flags & KAIO_WAKEUP) {
  933                 ki->kaio_flags &= ~KAIO_WAKEUP;
  934                 wakeup(&userp->p_aioinfo);
  935         }
  936 }
  937 
  938 static void
  939 aio_schedule_fsync(void *context, int pending)
  940 {
  941         struct kaioinfo *ki;
  942         struct kaiocb *job;
  943 
  944         ki = context;
  945         AIO_LOCK(ki);
  946         while (!TAILQ_EMPTY(&ki->kaio_syncready)) {
  947                 job = TAILQ_FIRST(&ki->kaio_syncready);
  948                 TAILQ_REMOVE(&ki->kaio_syncready, job, list);
  949                 AIO_UNLOCK(ki);
  950                 aio_schedule(job, aio_process_sync);
  951                 AIO_LOCK(ki);
  952         }
  953         AIO_UNLOCK(ki);
  954 }
  955 
  956 bool
  957 aio_cancel_cleared(struct kaiocb *job)
  958 {
  959         struct kaioinfo *ki;
  960 
  961         /*
  962          * The caller should hold the same queue lock held when
  963          * aio_clear_cancel_function() was called and set this flag
  964          * ensuring this check sees an up-to-date value.  However,
  965          * there is no way to assert that.
  966          */
  967         ki = job->userproc->p_aioinfo;
  968         return ((job->jobflags & KAIOCB_CLEARED) != 0);
  969 }
  970 
  971 static bool
  972 aio_clear_cancel_function_locked(struct kaiocb *job)
  973 {
  974 
  975         AIO_LOCK_ASSERT(job->userproc->p_aioinfo, MA_OWNED);
  976         MPASS(job->cancel_fn != NULL);
  977         if (job->jobflags & KAIOCB_CANCELLING) {
  978                 job->jobflags |= KAIOCB_CLEARED;
  979                 return (false);
  980         }
  981         job->cancel_fn = NULL;
  982         return (true);
  983 }
  984 
  985 bool
  986 aio_clear_cancel_function(struct kaiocb *job)
  987 {
  988         struct kaioinfo *ki;
  989         bool ret;
  990 
  991         ki = job->userproc->p_aioinfo;
  992         AIO_LOCK(ki);
  993         ret = aio_clear_cancel_function_locked(job);
  994         AIO_UNLOCK(ki);
  995         return (ret);
  996 }
  997 
  998 static bool
  999 aio_set_cancel_function_locked(struct kaiocb *job, aio_cancel_fn_t *func)
 1000 {
 1001 
 1002         AIO_LOCK_ASSERT(job->userproc->p_aioinfo, MA_OWNED);
 1003         if (job->jobflags & KAIOCB_CANCELLED)
 1004                 return (false);
 1005         job->cancel_fn = func;
 1006         return (true);
 1007 }
 1008 
 1009 bool
 1010 aio_set_cancel_function(struct kaiocb *job, aio_cancel_fn_t *func)
 1011 {
 1012         struct kaioinfo *ki;
 1013         bool ret;
 1014 
 1015         ki = job->userproc->p_aioinfo;
 1016         AIO_LOCK(ki);
 1017         ret = aio_set_cancel_function_locked(job, func);
 1018         AIO_UNLOCK(ki);
 1019         return (ret);
 1020 }
 1021 
 1022 void
 1023 aio_complete(struct kaiocb *job, long status, int error)
 1024 {
 1025         struct kaioinfo *ki;
 1026         struct proc *userp;
 1027 
 1028         job->uaiocb._aiocb_private.error = error;
 1029         job->uaiocb._aiocb_private.status = status;
 1030 
 1031         userp = job->userproc;
 1032         ki = userp->p_aioinfo;
 1033 
 1034         AIO_LOCK(ki);
 1035         KASSERT(!(job->jobflags & KAIOCB_FINISHED),
 1036             ("duplicate aio_complete"));
 1037         job->jobflags |= KAIOCB_FINISHED;
 1038         if ((job->jobflags & (KAIOCB_QUEUEING | KAIOCB_CANCELLING)) == 0) {
 1039                 TAILQ_REMOVE(&ki->kaio_jobqueue, job, plist);
 1040                 aio_bio_done_notify(userp, job);
 1041         }
 1042         AIO_UNLOCK(ki);
 1043 }
 1044 
 1045 void
 1046 aio_cancel(struct kaiocb *job)
 1047 {
 1048 
 1049         aio_complete(job, -1, ECANCELED);
 1050 }
 1051 
 1052 void
 1053 aio_switch_vmspace(struct kaiocb *job)
 1054 {
 1055 
 1056         vmspace_switch_aio(job->userproc->p_vmspace);
 1057 }
 1058 
 1059 /*
 1060  * The AIO daemon, most of the actual work is done in aio_process_*,
 1061  * but the setup (and address space mgmt) is done in this routine.
 1062  */
 1063 static void
 1064 aio_daemon(void *_id)
 1065 {
 1066         struct kaiocb *job;
 1067         struct aioproc *aiop;
 1068         struct kaioinfo *ki;
 1069         struct proc *p;
 1070         struct vmspace *myvm;
 1071         struct thread *td = curthread;
 1072         int id = (intptr_t)_id;
 1073 
 1074         /*
 1075          * Grab an extra reference on the daemon's vmspace so that it
 1076          * doesn't get freed by jobs that switch to a different
 1077          * vmspace.
 1078          */
 1079         p = td->td_proc;
 1080         myvm = vmspace_acquire_ref(p);
 1081 
 1082         KASSERT(p->p_textvp == NULL, ("kthread has a textvp"));
 1083 
 1084         /*
 1085          * Allocate and ready the aio control info.  There is one aiop structure
 1086          * per daemon.
 1087          */
 1088         aiop = uma_zalloc(aiop_zone, M_WAITOK);
 1089         aiop->aioproc = p;
 1090         aiop->aioprocflags = 0;
 1091 
 1092         /*
 1093          * Wakeup parent process.  (Parent sleeps to keep from blasting away
 1094          * and creating too many daemons.)
 1095          */
 1096         sema_post(&aio_newproc_sem);
 1097 
 1098         mtx_lock(&aio_job_mtx);
 1099         for (;;) {
 1100                 /*
 1101                  * Take daemon off of free queue
 1102                  */
 1103                 if (aiop->aioprocflags & AIOP_FREE) {
 1104                         TAILQ_REMOVE(&aio_freeproc, aiop, list);
 1105                         aiop->aioprocflags &= ~AIOP_FREE;
 1106                 }
 1107 
 1108                 /*
 1109                  * Check for jobs.
 1110                  */
 1111                 while ((job = aio_selectjob(aiop)) != NULL) {
 1112                         mtx_unlock(&aio_job_mtx);
 1113 
 1114                         ki = job->userproc->p_aioinfo;
 1115                         job->handle_fn(job);
 1116 
 1117                         mtx_lock(&aio_job_mtx);
 1118                         /* Decrement the active job count. */
 1119                         ki->kaio_active_count--;
 1120                 }
 1121 
 1122                 /*
 1123                  * Disconnect from user address space.
 1124                  */
 1125                 if (p->p_vmspace != myvm) {
 1126                         mtx_unlock(&aio_job_mtx);
 1127                         vmspace_switch_aio(myvm);
 1128                         mtx_lock(&aio_job_mtx);
 1129                         /*
 1130                          * We have to restart to avoid race, we only sleep if
 1131                          * no job can be selected.
 1132                          */
 1133                         continue;
 1134                 }
 1135 
 1136                 mtx_assert(&aio_job_mtx, MA_OWNED);
 1137 
 1138                 TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list);
 1139                 aiop->aioprocflags |= AIOP_FREE;
 1140 
 1141                 /*
 1142                  * If daemon is inactive for a long time, allow it to exit,
 1143                  * thereby freeing resources.
 1144                  */
 1145                 if (msleep(p, &aio_job_mtx, PRIBIO, "aiordy",
 1146                     aiod_lifetime) == EWOULDBLOCK && TAILQ_EMPTY(&aio_jobs) &&
 1147                     (aiop->aioprocflags & AIOP_FREE) &&
 1148                     num_aio_procs > target_aio_procs)
 1149                         break;
 1150         }
 1151         TAILQ_REMOVE(&aio_freeproc, aiop, list);
 1152         num_aio_procs--;
 1153         mtx_unlock(&aio_job_mtx);
 1154         uma_zfree(aiop_zone, aiop);
 1155         free_unr(aiod_unr, id);
 1156         vmspace_free(myvm);
 1157 
 1158         KASSERT(p->p_vmspace == myvm,
 1159             ("AIOD: bad vmspace for exiting daemon"));
 1160         KASSERT(myvm->vm_refcnt > 1,
 1161             ("AIOD: bad vm refcnt for exiting daemon: %d", myvm->vm_refcnt));
 1162         kproc_exit(0);
 1163 }
 1164 
 1165 /*
 1166  * Create a new AIO daemon. This is mostly a kernel-thread fork routine. The
 1167  * AIO daemon modifies its environment itself.
 1168  */
 1169 static int
 1170 aio_newproc(int *start)
 1171 {
 1172         int error;
 1173         struct proc *p;
 1174         int id;
 1175 
 1176         id = alloc_unr(aiod_unr);
 1177         error = kproc_create(aio_daemon, (void *)(intptr_t)id, &p,
 1178                 RFNOWAIT, 0, "aiod%d", id);
 1179         if (error == 0) {
 1180                 /*
 1181                  * Wait until daemon is started.
 1182                  */
 1183                 sema_wait(&aio_newproc_sem);
 1184                 mtx_lock(&aio_job_mtx);
 1185                 num_aio_procs++;
 1186                 if (start != NULL)
 1187                         (*start)--;
 1188                 mtx_unlock(&aio_job_mtx);
 1189         } else {
 1190                 free_unr(aiod_unr, id);
 1191         }
 1192         return (error);
 1193 }
 1194 
 1195 /*
 1196  * Try the high-performance, low-overhead physio method for eligible
 1197  * VCHR devices.  This method doesn't use an aio helper thread, and
 1198  * thus has very low overhead.
 1199  *
 1200  * Assumes that the caller, aio_aqueue(), has incremented the file
 1201  * structure's reference count, preventing its deallocation for the
 1202  * duration of this call.
 1203  */
 1204 static int
 1205 aio_qphysio(struct proc *p, struct kaiocb *job)
 1206 {
 1207         struct aiocb *cb;
 1208         struct file *fp;
 1209         struct bio *bp;
 1210         struct buf *pbuf;
 1211         struct vnode *vp;
 1212         struct cdevsw *csw;
 1213         struct cdev *dev;
 1214         struct kaioinfo *ki;
 1215         int error, ref, poff;
 1216         vm_prot_t prot;
 1217 
 1218         cb = &job->uaiocb;
 1219         fp = job->fd_file;
 1220 
 1221         if (fp == NULL || fp->f_type != DTYPE_VNODE)
 1222                 return (-1);
 1223 
 1224         vp = fp->f_vnode;
 1225         if (vp->v_type != VCHR)
 1226                 return (-1);
 1227         if (vp->v_bufobj.bo_bsize == 0)
 1228                 return (-1);
 1229         if (cb->aio_nbytes % vp->v_bufobj.bo_bsize)
 1230                 return (-1);
 1231 
 1232         ref = 0;
 1233         csw = devvn_refthread(vp, &dev, &ref);
 1234         if (csw == NULL)
 1235                 return (ENXIO);
 1236 
 1237         if ((csw->d_flags & D_DISK) == 0) {
 1238                 error = -1;
 1239                 goto unref;
 1240         }
 1241         if (cb->aio_nbytes > dev->si_iosize_max) {
 1242                 error = -1;
 1243                 goto unref;
 1244         }
 1245 
 1246         ki = p->p_aioinfo;
 1247         poff = (vm_offset_t)cb->aio_buf & PAGE_MASK;
 1248         if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) {
 1249                 if (cb->aio_nbytes > MAXPHYS) {
 1250                         error = -1;
 1251                         goto unref;
 1252                 }
 1253 
 1254                 pbuf = NULL;
 1255         } else {
 1256                 if (cb->aio_nbytes > MAXPHYS - poff) {
 1257                         error = -1;
 1258                         goto unref;
 1259                 }
 1260                 if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) {
 1261                         error = -1;
 1262                         goto unref;
 1263                 }
 1264 
 1265                 job->pbuf = pbuf = (struct buf *)getpbuf(NULL);
 1266                 BUF_KERNPROC(pbuf);
 1267                 AIO_LOCK(ki);
 1268                 ki->kaio_buffer_count++;
 1269                 AIO_UNLOCK(ki);
 1270         }
 1271         job->bp = bp = g_alloc_bio();
 1272 
 1273         bp->bio_length = cb->aio_nbytes;
 1274         bp->bio_bcount = cb->aio_nbytes;
 1275         bp->bio_done = aio_physwakeup;
 1276         bp->bio_data = (void *)(uintptr_t)cb->aio_buf;
 1277         bp->bio_offset = cb->aio_offset;
 1278         bp->bio_cmd = cb->aio_lio_opcode == LIO_WRITE ? BIO_WRITE : BIO_READ;
 1279         bp->bio_dev = dev;
 1280         bp->bio_caller1 = (void *)job;
 1281 
 1282         prot = VM_PROT_READ;
 1283         if (cb->aio_lio_opcode == LIO_READ)
 1284                 prot |= VM_PROT_WRITE;  /* Less backwards than it looks */
 1285         job->npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
 1286             (vm_offset_t)bp->bio_data, bp->bio_length, prot, job->pages,
 1287             nitems(job->pages));
 1288         if (job->npages < 0) {
 1289                 error = EFAULT;
 1290                 goto doerror;
 1291         }
 1292         if (pbuf != NULL) {
 1293                 pmap_qenter((vm_offset_t)pbuf->b_data,
 1294                     job->pages, job->npages);
 1295                 bp->bio_data = pbuf->b_data + poff;
 1296                 atomic_add_int(&num_buf_aio, 1);
 1297         } else {
 1298                 bp->bio_ma = job->pages;
 1299                 bp->bio_ma_n = job->npages;
 1300                 bp->bio_ma_offset = poff;
 1301                 bp->bio_data = unmapped_buf;
 1302                 bp->bio_flags |= BIO_UNMAPPED;
 1303         }
 1304 
 1305         /* Perform transfer. */
 1306         csw->d_strategy(bp);
 1307         dev_relthread(dev, ref);
 1308         return (0);
 1309 
 1310 doerror:
 1311         if (pbuf != NULL) {
 1312                 AIO_LOCK(ki);
 1313                 ki->kaio_buffer_count--;
 1314                 AIO_UNLOCK(ki);
 1315                 relpbuf(pbuf, NULL);
 1316                 job->pbuf = NULL;
 1317         }
 1318         g_destroy_bio(bp);
 1319         job->bp = NULL;
 1320 unref:
 1321         dev_relthread(dev, ref);
 1322         return (error);
 1323 }
 1324 
 1325 #ifdef COMPAT_FREEBSD6
 1326 static int
 1327 convert_old_sigevent(struct osigevent *osig, struct sigevent *nsig)
 1328 {
 1329 
 1330         /*
 1331          * Only SIGEV_NONE, SIGEV_SIGNAL, and SIGEV_KEVENT are
 1332          * supported by AIO with the old sigevent structure.
 1333          */
 1334         nsig->sigev_notify = osig->sigev_notify;
 1335         switch (nsig->sigev_notify) {
 1336         case SIGEV_NONE:
 1337                 break;
 1338         case SIGEV_SIGNAL:
 1339                 nsig->sigev_signo = osig->__sigev_u.__sigev_signo;
 1340                 break;
 1341         case SIGEV_KEVENT:
 1342                 nsig->sigev_notify_kqueue =
 1343                     osig->__sigev_u.__sigev_notify_kqueue;
 1344                 nsig->sigev_value.sival_ptr = osig->sigev_value.sival_ptr;
 1345                 break;
 1346         default:
 1347                 return (EINVAL);
 1348         }
 1349         return (0);
 1350 }
 1351 
 1352 static int
 1353 aiocb_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob)
 1354 {
 1355         struct oaiocb *ojob;
 1356         int error;
 1357 
 1358         bzero(kjob, sizeof(struct aiocb));
 1359         error = copyin(ujob, kjob, sizeof(struct oaiocb));
 1360         if (error)
 1361                 return (error);
 1362         ojob = (struct oaiocb *)kjob;
 1363         return (convert_old_sigevent(&ojob->aio_sigevent, &kjob->aio_sigevent));
 1364 }
 1365 #endif
 1366 
 1367 static int
 1368 aiocb_copyin(struct aiocb *ujob, struct aiocb *kjob)
 1369 {
 1370 
 1371         return (copyin(ujob, kjob, sizeof(struct aiocb)));
 1372 }
 1373 
 1374 static long
 1375 aiocb_fetch_status(struct aiocb *ujob)
 1376 {
 1377 
 1378         return (fuword(&ujob->_aiocb_private.status));
 1379 }
 1380 
 1381 static long
 1382 aiocb_fetch_error(struct aiocb *ujob)
 1383 {
 1384 
 1385         return (fuword(&ujob->_aiocb_private.error));
 1386 }
 1387 
 1388 static int
 1389 aiocb_store_status(struct aiocb *ujob, long status)
 1390 {
 1391 
 1392         return (suword(&ujob->_aiocb_private.status, status));
 1393 }
 1394 
 1395 static int
 1396 aiocb_store_error(struct aiocb *ujob, long error)
 1397 {
 1398 
 1399         return (suword(&ujob->_aiocb_private.error, error));
 1400 }
 1401 
 1402 static int
 1403 aiocb_store_kernelinfo(struct aiocb *ujob, long jobref)
 1404 {
 1405 
 1406         return (suword(&ujob->_aiocb_private.kernelinfo, jobref));
 1407 }
 1408 
 1409 static int
 1410 aiocb_store_aiocb(struct aiocb **ujobp, struct aiocb *ujob)
 1411 {
 1412 
 1413         return (suword(ujobp, (long)ujob));
 1414 }
 1415 
 1416 static struct aiocb_ops aiocb_ops = {
 1417         .copyin = aiocb_copyin,
 1418         .fetch_status = aiocb_fetch_status,
 1419         .fetch_error = aiocb_fetch_error,
 1420         .store_status = aiocb_store_status,
 1421         .store_error = aiocb_store_error,
 1422         .store_kernelinfo = aiocb_store_kernelinfo,
 1423         .store_aiocb = aiocb_store_aiocb,
 1424 };
 1425 
 1426 #ifdef COMPAT_FREEBSD6
 1427 static struct aiocb_ops aiocb_ops_osigevent = {
 1428         .copyin = aiocb_copyin_old_sigevent,
 1429         .fetch_status = aiocb_fetch_status,
 1430         .fetch_error = aiocb_fetch_error,
 1431         .store_status = aiocb_store_status,
 1432         .store_error = aiocb_store_error,
 1433         .store_kernelinfo = aiocb_store_kernelinfo,
 1434         .store_aiocb = aiocb_store_aiocb,
 1435 };
 1436 #endif
 1437 
 1438 /*
 1439  * Queue a new AIO request.  Choosing either the threaded or direct physio VCHR
 1440  * technique is done in this code.
 1441  */
 1442 int
 1443 aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
 1444         int type, struct aiocb_ops *ops)
 1445 {
 1446         struct proc *p = td->td_proc;
 1447         cap_rights_t rights;
 1448         struct file *fp;
 1449         struct kaiocb *job;
 1450         struct kaioinfo *ki;
 1451         struct kevent kev;
 1452         int opcode;
 1453         int error;
 1454         int fd, kqfd;
 1455         int jid;
 1456         u_short evflags;
 1457 
 1458         if (p->p_aioinfo == NULL)
 1459                 aio_init_aioinfo(p);
 1460 
 1461         ki = p->p_aioinfo;
 1462 
 1463         ops->store_status(ujob, -1);
 1464         ops->store_error(ujob, 0);
 1465         ops->store_kernelinfo(ujob, -1);
 1466 
 1467         if (num_queue_count >= max_queue_count ||
 1468             ki->kaio_count >= ki->kaio_qallowed_count) {
 1469                 ops->store_error(ujob, EAGAIN);
 1470                 return (EAGAIN);
 1471         }
 1472 
 1473         job = uma_zalloc(aiocb_zone, M_WAITOK | M_ZERO);
 1474         knlist_init_mtx(&job->klist, AIO_MTX(ki));
 1475 
 1476         error = ops->copyin(ujob, &job->uaiocb);
 1477         if (error) {
 1478                 ops->store_error(ujob, error);
 1479                 uma_zfree(aiocb_zone, job);
 1480                 return (error);
 1481         }
 1482 
 1483         if (job->uaiocb.aio_nbytes > IOSIZE_MAX) {
 1484                 uma_zfree(aiocb_zone, job);
 1485                 return (EINVAL);
 1486         }
 1487 
 1488         if (job->uaiocb.aio_sigevent.sigev_notify != SIGEV_KEVENT &&
 1489             job->uaiocb.aio_sigevent.sigev_notify != SIGEV_SIGNAL &&
 1490             job->uaiocb.aio_sigevent.sigev_notify != SIGEV_THREAD_ID &&
 1491             job->uaiocb.aio_sigevent.sigev_notify != SIGEV_NONE) {
 1492                 ops->store_error(ujob, EINVAL);
 1493                 uma_zfree(aiocb_zone, job);
 1494                 return (EINVAL);
 1495         }
 1496 
 1497         if ((job->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL ||
 1498              job->uaiocb.aio_sigevent.sigev_notify == SIGEV_THREAD_ID) &&
 1499                 !_SIG_VALID(job->uaiocb.aio_sigevent.sigev_signo)) {
 1500                 uma_zfree(aiocb_zone, job);
 1501                 return (EINVAL);
 1502         }
 1503 
 1504         ksiginfo_init(&job->ksi);
 1505 
 1506         /* Save userspace address of the job info. */
 1507         job->ujob = ujob;
 1508 
 1509         /* Get the opcode. */
 1510         if (type != LIO_NOP)
 1511                 job->uaiocb.aio_lio_opcode = type;
 1512         opcode = job->uaiocb.aio_lio_opcode;
 1513 
 1514         /*
 1515          * Validate the opcode and fetch the file object for the specified
 1516          * file descriptor.
 1517          *
 1518          * XXXRW: Moved the opcode validation up here so that we don't
 1519          * retrieve a file descriptor without knowing what the capabiltity
 1520          * should be.
 1521          */
 1522         fd = job->uaiocb.aio_fildes;
 1523         switch (opcode) {
 1524         case LIO_WRITE:
 1525                 error = fget_write(td, fd,
 1526                     cap_rights_init(&rights, CAP_PWRITE), &fp);
 1527                 break;
 1528         case LIO_READ:
 1529                 error = fget_read(td, fd,
 1530                     cap_rights_init(&rights, CAP_PREAD), &fp);
 1531                 break;
 1532         case LIO_SYNC:
 1533                 error = fget(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp);
 1534                 break;
 1535         case LIO_MLOCK:
 1536                 fp = NULL;
 1537                 break;
 1538         case LIO_NOP:
 1539                 error = fget(td, fd, cap_rights_init(&rights), &fp);
 1540                 break;
 1541         default:
 1542                 error = EINVAL;
 1543         }
 1544         if (error) {
 1545                 uma_zfree(aiocb_zone, job);
 1546                 ops->store_error(ujob, error);
 1547                 return (error);
 1548         }
 1549 
 1550         if (opcode == LIO_SYNC && fp->f_vnode == NULL) {
 1551                 error = EINVAL;
 1552                 goto aqueue_fail;
 1553         }
 1554 
 1555         if (opcode != LIO_SYNC && job->uaiocb.aio_offset == -1LL) {
 1556                 error = EINVAL;
 1557                 goto aqueue_fail;
 1558         }
 1559 
 1560         job->fd_file = fp;
 1561 
 1562         mtx_lock(&aio_job_mtx);
 1563         jid = jobrefid++;
 1564         job->seqno = jobseqno++;
 1565         mtx_unlock(&aio_job_mtx);
 1566         error = ops->store_kernelinfo(ujob, jid);
 1567         if (error) {
 1568                 error = EINVAL;
 1569                 goto aqueue_fail;
 1570         }
 1571         job->uaiocb._aiocb_private.kernelinfo = (void *)(intptr_t)jid;
 1572 
 1573         if (opcode == LIO_NOP) {
 1574                 fdrop(fp, td);
 1575                 uma_zfree(aiocb_zone, job);
 1576                 return (0);
 1577         }
 1578 
 1579         if (job->uaiocb.aio_sigevent.sigev_notify != SIGEV_KEVENT)
 1580                 goto no_kqueue;
 1581         evflags = job->uaiocb.aio_sigevent.sigev_notify_kevent_flags;
 1582         if ((evflags & ~(EV_CLEAR | EV_DISPATCH | EV_ONESHOT)) != 0) {
 1583                 error = EINVAL;
 1584                 goto aqueue_fail;
 1585         }
 1586         kqfd = job->uaiocb.aio_sigevent.sigev_notify_kqueue;
 1587         kev.ident = (uintptr_t)job->ujob;
 1588         kev.filter = EVFILT_AIO;
 1589         kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1 | evflags;
 1590         kev.data = (intptr_t)job;
 1591         kev.udata = job->uaiocb.aio_sigevent.sigev_value.sival_ptr;
 1592         error = kqfd_register(kqfd, &kev, td, 1);
 1593         if (error)
 1594                 goto aqueue_fail;
 1595 
 1596 no_kqueue:
 1597 
 1598         ops->store_error(ujob, EINPROGRESS);
 1599         job->uaiocb._aiocb_private.error = EINPROGRESS;
 1600         job->userproc = p;
 1601         job->cred = crhold(td->td_ucred);
 1602         job->jobflags = KAIOCB_QUEUEING;
 1603         job->lio = lj;
 1604 
 1605         if (opcode == LIO_MLOCK) {
 1606                 aio_schedule(job, aio_process_mlock);
 1607                 error = 0;
 1608         } else if (fp->f_ops->fo_aio_queue == NULL)
 1609                 error = aio_queue_file(fp, job);
 1610         else
 1611                 error = fo_aio_queue(fp, job);
 1612         if (error)
 1613                 goto aqueue_fail;
 1614 
 1615         AIO_LOCK(ki);
 1616         job->jobflags &= ~KAIOCB_QUEUEING;
 1617         TAILQ_INSERT_TAIL(&ki->kaio_all, job, allist);
 1618         ki->kaio_count++;
 1619         if (lj)
 1620                 lj->lioj_count++;
 1621         atomic_add_int(&num_queue_count, 1);
 1622         if (job->jobflags & KAIOCB_FINISHED) {
 1623                 /*
 1624                  * The queue callback completed the request synchronously.
 1625                  * The bulk of the completion is deferred in that case
 1626                  * until this point.
 1627                  */
 1628                 aio_bio_done_notify(p, job);
 1629         } else
 1630                 TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, job, plist);
 1631         AIO_UNLOCK(ki);
 1632         return (0);
 1633 
 1634 aqueue_fail:
 1635         knlist_delete(&job->klist, curthread, 0);
 1636         if (fp)
 1637                 fdrop(fp, td);
 1638         uma_zfree(aiocb_zone, job);
 1639         ops->store_error(ujob, error);
 1640         return (error);
 1641 }
 1642 
 1643 static void
 1644 aio_cancel_daemon_job(struct kaiocb *job)
 1645 {
 1646 
 1647         mtx_lock(&aio_job_mtx);
 1648         if (!aio_cancel_cleared(job))
 1649                 TAILQ_REMOVE(&aio_jobs, job, list);
 1650         mtx_unlock(&aio_job_mtx);
 1651         aio_cancel(job);
 1652 }
 1653 
 1654 void
 1655 aio_schedule(struct kaiocb *job, aio_handle_fn_t *func)
 1656 {
 1657 
 1658         mtx_lock(&aio_job_mtx);
 1659         if (!aio_set_cancel_function(job, aio_cancel_daemon_job)) {
 1660                 mtx_unlock(&aio_job_mtx);
 1661                 aio_cancel(job);
 1662                 return;
 1663         }
 1664         job->handle_fn = func;
 1665         TAILQ_INSERT_TAIL(&aio_jobs, job, list);
 1666         aio_kick_nowait(job->userproc);
 1667         mtx_unlock(&aio_job_mtx);
 1668 }
 1669 
 1670 static void
 1671 aio_cancel_sync(struct kaiocb *job)
 1672 {
 1673         struct kaioinfo *ki;
 1674 
 1675         ki = job->userproc->p_aioinfo;
 1676         AIO_LOCK(ki);
 1677         if (!aio_cancel_cleared(job))
 1678                 TAILQ_REMOVE(&ki->kaio_syncqueue, job, list);
 1679         AIO_UNLOCK(ki);
 1680         aio_cancel(job);
 1681 }
 1682 
 1683 int
 1684 aio_queue_file(struct file *fp, struct kaiocb *job)
 1685 {
 1686         struct aioliojob *lj;
 1687         struct kaioinfo *ki;
 1688         struct kaiocb *job2;
 1689         struct vnode *vp;
 1690         struct mount *mp;
 1691         int error, opcode;
 1692         bool safe;
 1693 
 1694         lj = job->lio;
 1695         ki = job->userproc->p_aioinfo;
 1696         opcode = job->uaiocb.aio_lio_opcode;
 1697         if (opcode == LIO_SYNC)
 1698                 goto queueit;
 1699 
 1700         if ((error = aio_qphysio(job->userproc, job)) == 0)
 1701                 goto done;
 1702 #if 0
 1703         /*
 1704          * XXX: This means qphysio() failed with EFAULT.  The current
 1705          * behavior is to retry the operation via fo_read/fo_write.
 1706          * Wouldn't it be better to just complete the request with an
 1707          * error here?
 1708          */
 1709         if (error > 0)
 1710                 goto done;
 1711 #endif
 1712 queueit:
 1713         safe = false;
 1714         if (fp->f_type == DTYPE_VNODE) {
 1715                 vp = fp->f_vnode;
 1716                 if (vp->v_type == VREG || vp->v_type == VDIR) {
 1717                         mp = fp->f_vnode->v_mount;
 1718                         if (mp == NULL || (mp->mnt_flag & MNT_LOCAL) != 0)
 1719                                 safe = true;
 1720                 }
 1721         }
 1722         if (!(safe || enable_aio_unsafe)) {
 1723                 counted_warning(&unsafe_warningcnt,
 1724                     "is attempting to use unsafe AIO requests");
 1725                 return (EOPNOTSUPP);
 1726         }
 1727 
 1728         if (opcode == LIO_SYNC) {
 1729                 AIO_LOCK(ki);
 1730                 TAILQ_FOREACH(job2, &ki->kaio_jobqueue, plist) {
 1731                         if (job2->fd_file == job->fd_file &&
 1732                             job2->uaiocb.aio_lio_opcode != LIO_SYNC &&
 1733                             job2->seqno < job->seqno) {
 1734                                 job2->jobflags |= KAIOCB_CHECKSYNC;
 1735                                 job->pending++;
 1736                         }
 1737                 }
 1738                 if (job->pending != 0) {
 1739                         if (!aio_set_cancel_function_locked(job,
 1740                                 aio_cancel_sync)) {
 1741                                 AIO_UNLOCK(ki);
 1742                                 aio_cancel(job);
 1743                                 return (0);
 1744                         }
 1745                         TAILQ_INSERT_TAIL(&ki->kaio_syncqueue, job, list);
 1746                         AIO_UNLOCK(ki);
 1747                         return (0);
 1748                 }
 1749                 AIO_UNLOCK(ki);
 1750         }
 1751 
 1752         switch (opcode) {
 1753         case LIO_READ:
 1754         case LIO_WRITE:
 1755                 aio_schedule(job, aio_process_rw);
 1756                 error = 0;
 1757                 break;
 1758         case LIO_SYNC:
 1759                 aio_schedule(job, aio_process_sync);
 1760                 error = 0;
 1761                 break;
 1762         default:
 1763                 error = EINVAL;
 1764         }
 1765 done:
 1766         return (error);
 1767 }
 1768 
 1769 static void
 1770 aio_kick_nowait(struct proc *userp)
 1771 {
 1772         struct kaioinfo *ki = userp->p_aioinfo;
 1773         struct aioproc *aiop;
 1774 
 1775         mtx_assert(&aio_job_mtx, MA_OWNED);
 1776         if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) {
 1777                 TAILQ_REMOVE(&aio_freeproc, aiop, list);
 1778                 aiop->aioprocflags &= ~AIOP_FREE;
 1779                 wakeup(aiop->aioproc);
 1780         } else if (num_aio_resv_start + num_aio_procs < max_aio_procs &&
 1781             ki->kaio_active_count + num_aio_resv_start <
 1782             ki->kaio_maxactive_count) {
 1783                 taskqueue_enqueue(taskqueue_aiod_kick, &ki->kaio_task);
 1784         }
 1785 }
 1786 
 1787 static int
 1788 aio_kick(struct proc *userp)
 1789 {
 1790         struct kaioinfo *ki = userp->p_aioinfo;
 1791         struct aioproc *aiop;
 1792         int error, ret = 0;
 1793 
 1794         mtx_assert(&aio_job_mtx, MA_OWNED);
 1795 retryproc:
 1796         if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) {
 1797                 TAILQ_REMOVE(&aio_freeproc, aiop, list);
 1798                 aiop->aioprocflags &= ~AIOP_FREE;
 1799                 wakeup(aiop->aioproc);
 1800         } else if (num_aio_resv_start + num_aio_procs < max_aio_procs &&
 1801             ki->kaio_active_count + num_aio_resv_start <
 1802             ki->kaio_maxactive_count) {
 1803                 num_aio_resv_start++;
 1804                 mtx_unlock(&aio_job_mtx);
 1805                 error = aio_newproc(&num_aio_resv_start);
 1806                 mtx_lock(&aio_job_mtx);
 1807                 if (error) {
 1808                         num_aio_resv_start--;
 1809                         goto retryproc;
 1810                 }
 1811         } else {
 1812                 ret = -1;
 1813         }
 1814         return (ret);
 1815 }
 1816 
 1817 static void
 1818 aio_kick_helper(void *context, int pending)
 1819 {
 1820         struct proc *userp = context;
 1821 
 1822         mtx_lock(&aio_job_mtx);
 1823         while (--pending >= 0) {
 1824                 if (aio_kick(userp))
 1825                         break;
 1826         }
 1827         mtx_unlock(&aio_job_mtx);
 1828 }
 1829 
 1830 /*
 1831  * Support the aio_return system call, as a side-effect, kernel resources are
 1832  * released.
 1833  */
 1834 static int
 1835 kern_aio_return(struct thread *td, struct aiocb *ujob, struct aiocb_ops *ops)
 1836 {
 1837         struct proc *p = td->td_proc;
 1838         struct kaiocb *job;
 1839         struct kaioinfo *ki;
 1840         long status, error;
 1841 
 1842         ki = p->p_aioinfo;
 1843         if (ki == NULL)
 1844                 return (EINVAL);
 1845         AIO_LOCK(ki);
 1846         TAILQ_FOREACH(job, &ki->kaio_done, plist) {
 1847                 if (job->ujob == ujob)
 1848                         break;
 1849         }
 1850         if (job != NULL) {
 1851                 MPASS(job->jobflags & KAIOCB_FINISHED);
 1852                 status = job->uaiocb._aiocb_private.status;
 1853                 error = job->uaiocb._aiocb_private.error;
 1854                 td->td_retval[0] = status;
 1855                 td->td_ru.ru_oublock += job->outblock;
 1856                 td->td_ru.ru_inblock += job->inblock;
 1857                 td->td_ru.ru_msgsnd += job->msgsnd;
 1858                 td->td_ru.ru_msgrcv += job->msgrcv;
 1859                 aio_free_entry(job);
 1860                 AIO_UNLOCK(ki);
 1861                 ops->store_error(ujob, error);
 1862                 ops->store_status(ujob, status);
 1863         } else {
 1864                 error = EINVAL;
 1865                 AIO_UNLOCK(ki);
 1866         }
 1867         return (error);
 1868 }
 1869 
 1870 int
 1871 sys_aio_return(struct thread *td, struct aio_return_args *uap)
 1872 {
 1873 
 1874         return (kern_aio_return(td, uap->aiocbp, &aiocb_ops));
 1875 }
 1876 
 1877 /*
 1878  * Allow a process to wakeup when any of the I/O requests are completed.
 1879  */
 1880 static int
 1881 kern_aio_suspend(struct thread *td, int njoblist, struct aiocb **ujoblist,
 1882     struct timespec *ts)
 1883 {
 1884         struct proc *p = td->td_proc;
 1885         struct timeval atv;
 1886         struct kaioinfo *ki;
 1887         struct kaiocb *firstjob, *job;
 1888         int error, i, timo;
 1889 
 1890         timo = 0;
 1891         if (ts) {
 1892                 if (ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000)
 1893                         return (EINVAL);
 1894 
 1895                 TIMESPEC_TO_TIMEVAL(&atv, ts);
 1896                 if (itimerfix(&atv))
 1897                         return (EINVAL);
 1898                 timo = tvtohz(&atv);
 1899         }
 1900 
 1901         ki = p->p_aioinfo;
 1902         if (ki == NULL)
 1903                 return (EAGAIN);
 1904 
 1905         if (njoblist == 0)
 1906                 return (0);
 1907 
 1908         AIO_LOCK(ki);
 1909         for (;;) {
 1910                 firstjob = NULL;
 1911                 error = 0;
 1912                 TAILQ_FOREACH(job, &ki->kaio_all, allist) {
 1913                         for (i = 0; i < njoblist; i++) {
 1914                                 if (job->ujob == ujoblist[i]) {
 1915                                         if (firstjob == NULL)
 1916                                                 firstjob = job;
 1917                                         if (job->jobflags & KAIOCB_FINISHED)
 1918                                                 goto RETURN;
 1919                                 }
 1920                         }
 1921                 }
 1922                 /* All tasks were finished. */
 1923                 if (firstjob == NULL)
 1924                         break;
 1925 
 1926                 ki->kaio_flags |= KAIO_WAKEUP;
 1927                 error = msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO | PCATCH,
 1928                     "aiospn", timo);
 1929                 if (error == ERESTART)
 1930                         error = EINTR;
 1931                 if (error)
 1932                         break;
 1933         }
 1934 RETURN:
 1935         AIO_UNLOCK(ki);
 1936         return (error);
 1937 }
 1938 
 1939 int
 1940 sys_aio_suspend(struct thread *td, struct aio_suspend_args *uap)
 1941 {
 1942         struct timespec ts, *tsp;
 1943         struct aiocb **ujoblist;
 1944         int error;
 1945 
 1946         if (uap->nent < 0 || uap->nent > AIO_LISTIO_MAX)
 1947                 return (EINVAL);
 1948 
 1949         if (uap->timeout) {
 1950                 /* Get timespec struct. */
 1951                 if ((error = copyin(uap->timeout, &ts, sizeof(ts))) != 0)
 1952                         return (error);
 1953                 tsp = &ts;
 1954         } else
 1955                 tsp = NULL;
 1956 
 1957         ujoblist = uma_zalloc(aiol_zone, M_WAITOK);
 1958         error = copyin(uap->aiocbp, ujoblist, uap->nent * sizeof(ujoblist[0]));
 1959         if (error == 0)
 1960                 error = kern_aio_suspend(td, uap->nent, ujoblist, tsp);
 1961         uma_zfree(aiol_zone, ujoblist);
 1962         return (error);
 1963 }
 1964 
 1965 /*
 1966  * aio_cancel cancels any non-physio aio operations not currently in
 1967  * progress.
 1968  */
 1969 int
 1970 sys_aio_cancel(struct thread *td, struct aio_cancel_args *uap)
 1971 {
 1972         struct proc *p = td->td_proc;
 1973         struct kaioinfo *ki;
 1974         struct kaiocb *job, *jobn;
 1975         struct file *fp;
 1976         cap_rights_t rights;
 1977         int error;
 1978         int cancelled = 0;
 1979         int notcancelled = 0;
 1980         struct vnode *vp;
 1981 
 1982         /* Lookup file object. */
 1983         error = fget(td, uap->fd, cap_rights_init(&rights), &fp);
 1984         if (error)
 1985                 return (error);
 1986 
 1987         ki = p->p_aioinfo;
 1988         if (ki == NULL)
 1989                 goto done;
 1990 
 1991         if (fp->f_type == DTYPE_VNODE) {
 1992                 vp = fp->f_vnode;
 1993                 if (vn_isdisk(vp, &error)) {
 1994                         fdrop(fp, td);
 1995                         td->td_retval[0] = AIO_NOTCANCELED;
 1996                         return (0);
 1997                 }
 1998         }
 1999 
 2000         AIO_LOCK(ki);
 2001         TAILQ_FOREACH_SAFE(job, &ki->kaio_jobqueue, plist, jobn) {
 2002                 if ((uap->fd == job->uaiocb.aio_fildes) &&
 2003                     ((uap->aiocbp == NULL) ||
 2004                      (uap->aiocbp == job->ujob))) {
 2005                         if (aio_cancel_job(p, ki, job)) {
 2006                                 cancelled++;
 2007                         } else {
 2008                                 notcancelled++;
 2009                         }
 2010                         if (uap->aiocbp != NULL)
 2011                                 break;
 2012                 }
 2013         }
 2014         AIO_UNLOCK(ki);
 2015 
 2016 done:
 2017         fdrop(fp, td);
 2018 
 2019         if (uap->aiocbp != NULL) {
 2020                 if (cancelled) {
 2021                         td->td_retval[0] = AIO_CANCELED;
 2022                         return (0);
 2023                 }
 2024         }
 2025 
 2026         if (notcancelled) {
 2027                 td->td_retval[0] = AIO_NOTCANCELED;
 2028                 return (0);
 2029         }
 2030 
 2031         if (cancelled) {
 2032                 td->td_retval[0] = AIO_CANCELED;
 2033                 return (0);
 2034         }
 2035 
 2036         td->td_retval[0] = AIO_ALLDONE;
 2037 
 2038         return (0);
 2039 }
 2040 
 2041 /*
 2042  * aio_error is implemented in the kernel level for compatibility purposes
 2043  * only.  For a user mode async implementation, it would be best to do it in
 2044  * a userland subroutine.
 2045  */
 2046 static int
 2047 kern_aio_error(struct thread *td, struct aiocb *ujob, struct aiocb_ops *ops)
 2048 {
 2049         struct proc *p = td->td_proc;
 2050         struct kaiocb *job;
 2051         struct kaioinfo *ki;
 2052         int status;
 2053 
 2054         ki = p->p_aioinfo;
 2055         if (ki == NULL) {
 2056                 td->td_retval[0] = EINVAL;
 2057                 return (0);
 2058         }
 2059 
 2060         AIO_LOCK(ki);
 2061         TAILQ_FOREACH(job, &ki->kaio_all, allist) {
 2062                 if (job->ujob == ujob) {
 2063                         if (job->jobflags & KAIOCB_FINISHED)
 2064                                 td->td_retval[0] =
 2065                                         job->uaiocb._aiocb_private.error;
 2066                         else
 2067                                 td->td_retval[0] = EINPROGRESS;
 2068                         AIO_UNLOCK(ki);
 2069                         return (0);
 2070                 }
 2071         }
 2072         AIO_UNLOCK(ki);
 2073 
 2074         /*
 2075          * Hack for failure of aio_aqueue.
 2076          */
 2077         status = ops->fetch_status(ujob);
 2078         if (status == -1) {
 2079                 td->td_retval[0] = ops->fetch_error(ujob);
 2080                 return (0);
 2081         }
 2082 
 2083         td->td_retval[0] = EINVAL;
 2084         return (0);
 2085 }
 2086 
 2087 int
 2088 sys_aio_error(struct thread *td, struct aio_error_args *uap)
 2089 {
 2090 
 2091         return (kern_aio_error(td, uap->aiocbp, &aiocb_ops));
 2092 }
 2093 
 2094 /* syscall - asynchronous read from a file (REALTIME) */
 2095 #ifdef COMPAT_FREEBSD6
 2096 int
 2097 freebsd6_aio_read(struct thread *td, struct freebsd6_aio_read_args *uap)
 2098 {
 2099 
 2100         return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ,
 2101             &aiocb_ops_osigevent));
 2102 }
 2103 #endif
 2104 
 2105 int
 2106 sys_aio_read(struct thread *td, struct aio_read_args *uap)
 2107 {
 2108 
 2109         return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READ, &aiocb_ops));
 2110 }
 2111 
 2112 /* syscall - asynchronous write to a file (REALTIME) */
 2113 #ifdef COMPAT_FREEBSD6
 2114 int
 2115 freebsd6_aio_write(struct thread *td, struct freebsd6_aio_write_args *uap)
 2116 {
 2117 
 2118         return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE,
 2119             &aiocb_ops_osigevent));
 2120 }
 2121 #endif
 2122 
 2123 int
 2124 sys_aio_write(struct thread *td, struct aio_write_args *uap)
 2125 {
 2126 
 2127         return (aio_aqueue(td, uap->aiocbp, NULL, LIO_WRITE, &aiocb_ops));
 2128 }
 2129 
 2130 int
 2131 sys_aio_mlock(struct thread *td, struct aio_mlock_args *uap)
 2132 {
 2133 
 2134         return (aio_aqueue(td, uap->aiocbp, NULL, LIO_MLOCK, &aiocb_ops));
 2135 }
 2136 
 2137 static int
 2138 kern_lio_listio(struct thread *td, int mode, struct aiocb * const *uacb_list,
 2139     struct aiocb **acb_list, int nent, struct sigevent *sig,
 2140     struct aiocb_ops *ops)
 2141 {
 2142         struct proc *p = td->td_proc;
 2143         struct aiocb *job;
 2144         struct kaioinfo *ki;
 2145         struct aioliojob *lj;
 2146         struct kevent kev;
 2147         int error;
 2148         int nerror;
 2149         int i;
 2150 
 2151         if ((mode != LIO_NOWAIT) && (mode != LIO_WAIT))
 2152                 return (EINVAL);
 2153 
 2154         if (nent < 0 || nent > AIO_LISTIO_MAX)
 2155                 return (EINVAL);
 2156 
 2157         if (p->p_aioinfo == NULL)
 2158                 aio_init_aioinfo(p);
 2159 
 2160         ki = p->p_aioinfo;
 2161 
 2162         lj = uma_zalloc(aiolio_zone, M_WAITOK);
 2163         lj->lioj_flags = 0;
 2164         lj->lioj_count = 0;
 2165         lj->lioj_finished_count = 0;
 2166         knlist_init_mtx(&lj->klist, AIO_MTX(ki));
 2167         ksiginfo_init(&lj->lioj_ksi);
 2168 
 2169         /*
 2170          * Setup signal.
 2171          */
 2172         if (sig && (mode == LIO_NOWAIT)) {
 2173                 bcopy(sig, &lj->lioj_signal, sizeof(lj->lioj_signal));
 2174                 if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) {
 2175                         /* Assume only new style KEVENT */
 2176                         kev.filter = EVFILT_LIO;
 2177                         kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1;
 2178                         kev.ident = (uintptr_t)uacb_list; /* something unique */
 2179                         kev.data = (intptr_t)lj;
 2180                         /* pass user defined sigval data */
 2181                         kev.udata = lj->lioj_signal.sigev_value.sival_ptr;
 2182                         error = kqfd_register(
 2183                             lj->lioj_signal.sigev_notify_kqueue, &kev, td, 1);
 2184                         if (error) {
 2185                                 uma_zfree(aiolio_zone, lj);
 2186                                 return (error);
 2187                         }
 2188                 } else if (lj->lioj_signal.sigev_notify == SIGEV_NONE) {
 2189                         ;
 2190                 } else if (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL ||
 2191                            lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID) {
 2192                                 if (!_SIG_VALID(lj->lioj_signal.sigev_signo)) {
 2193                                         uma_zfree(aiolio_zone, lj);
 2194                                         return EINVAL;
 2195                                 }
 2196                                 lj->lioj_flags |= LIOJ_SIGNAL;
 2197                 } else {
 2198                         uma_zfree(aiolio_zone, lj);
 2199                         return EINVAL;
 2200                 }
 2201         }
 2202 
 2203         AIO_LOCK(ki);
 2204         TAILQ_INSERT_TAIL(&ki->kaio_liojoblist, lj, lioj_list);
 2205         /*
 2206          * Add extra aiocb count to avoid the lio to be freed
 2207          * by other threads doing aio_waitcomplete or aio_return,
 2208          * and prevent event from being sent until we have queued
 2209          * all tasks.
 2210          */
 2211         lj->lioj_count = 1;
 2212         AIO_UNLOCK(ki);
 2213 
 2214         /*
 2215          * Get pointers to the list of I/O requests.
 2216          */
 2217         nerror = 0;
 2218         for (i = 0; i < nent; i++) {
 2219                 job = acb_list[i];
 2220                 if (job != NULL) {
 2221                         error = aio_aqueue(td, job, lj, LIO_NOP, ops);
 2222                         if (error != 0)
 2223                                 nerror++;
 2224                 }
 2225         }
 2226 
 2227         error = 0;
 2228         AIO_LOCK(ki);
 2229         if (mode == LIO_WAIT) {
 2230                 while (lj->lioj_count - 1 != lj->lioj_finished_count) {
 2231                         ki->kaio_flags |= KAIO_WAKEUP;
 2232                         error = msleep(&p->p_aioinfo, AIO_MTX(ki),
 2233                             PRIBIO | PCATCH, "aiospn", 0);
 2234                         if (error == ERESTART)
 2235                                 error = EINTR;
 2236                         if (error)
 2237                                 break;
 2238                 }
 2239         } else {
 2240                 if (lj->lioj_count - 1 == lj->lioj_finished_count) {
 2241                         if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) {
 2242                                 lj->lioj_flags |= LIOJ_KEVENT_POSTED;
 2243                                 KNOTE_LOCKED(&lj->klist, 1);
 2244                         }
 2245                         if ((lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED))
 2246                             == LIOJ_SIGNAL
 2247                             && (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL ||
 2248                             lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID)) {
 2249                                 aio_sendsig(p, &lj->lioj_signal,
 2250                                             &lj->lioj_ksi);
 2251                                 lj->lioj_flags |= LIOJ_SIGNAL_POSTED;
 2252                         }
 2253                 }
 2254         }
 2255         lj->lioj_count--;
 2256         if (lj->lioj_count == 0) {
 2257                 TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list);
 2258                 knlist_delete(&lj->klist, curthread, 1);
 2259                 PROC_LOCK(p);
 2260                 sigqueue_take(&lj->lioj_ksi);
 2261                 PROC_UNLOCK(p);
 2262                 AIO_UNLOCK(ki);
 2263                 uma_zfree(aiolio_zone, lj);
 2264         } else
 2265                 AIO_UNLOCK(ki);
 2266 
 2267         if (nerror)
 2268                 return (EIO);
 2269         return (error);
 2270 }
 2271 
 2272 /* syscall - list directed I/O (REALTIME) */
 2273 #ifdef COMPAT_FREEBSD6
 2274 int
 2275 freebsd6_lio_listio(struct thread *td, struct freebsd6_lio_listio_args *uap)
 2276 {
 2277         struct aiocb **acb_list;
 2278         struct sigevent *sigp, sig;
 2279         struct osigevent osig;
 2280         int error, nent;
 2281 
 2282         if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT))
 2283                 return (EINVAL);
 2284 
 2285         nent = uap->nent;
 2286         if (nent < 0 || nent > AIO_LISTIO_MAX)
 2287                 return (EINVAL);
 2288 
 2289         if (uap->sig && (uap->mode == LIO_NOWAIT)) {
 2290                 error = copyin(uap->sig, &osig, sizeof(osig));
 2291                 if (error)
 2292                         return (error);
 2293                 error = convert_old_sigevent(&osig, &sig);
 2294                 if (error)
 2295                         return (error);
 2296                 sigp = &sig;
 2297         } else
 2298                 sigp = NULL;
 2299 
 2300         acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK);
 2301         error = copyin(uap->acb_list, acb_list, nent * sizeof(acb_list[0]));
 2302         if (error == 0)
 2303                 error = kern_lio_listio(td, uap->mode,
 2304                     (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp,
 2305                     &aiocb_ops_osigevent);
 2306         free(acb_list, M_LIO);
 2307         return (error);
 2308 }
 2309 #endif
 2310 
 2311 /* syscall - list directed I/O (REALTIME) */
 2312 int
 2313 sys_lio_listio(struct thread *td, struct lio_listio_args *uap)
 2314 {
 2315         struct aiocb **acb_list;
 2316         struct sigevent *sigp, sig;
 2317         int error, nent;
 2318 
 2319         if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT))
 2320                 return (EINVAL);
 2321 
 2322         nent = uap->nent;
 2323         if (nent < 0 || nent > AIO_LISTIO_MAX)
 2324                 return (EINVAL);
 2325 
 2326         if (uap->sig && (uap->mode == LIO_NOWAIT)) {
 2327                 error = copyin(uap->sig, &sig, sizeof(sig));
 2328                 if (error)
 2329                         return (error);
 2330                 sigp = &sig;
 2331         } else
 2332                 sigp = NULL;
 2333 
 2334         acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK);
 2335         error = copyin(uap->acb_list, acb_list, nent * sizeof(acb_list[0]));
 2336         if (error == 0)
 2337                 error = kern_lio_listio(td, uap->mode, uap->acb_list, acb_list,
 2338                     nent, sigp, &aiocb_ops);
 2339         free(acb_list, M_LIO);
 2340         return (error);
 2341 }
 2342 
 2343 static void
 2344 aio_physwakeup(struct bio *bp)
 2345 {
 2346         struct kaiocb *job = (struct kaiocb *)bp->bio_caller1;
 2347         struct proc *userp;
 2348         struct kaioinfo *ki;
 2349         size_t nbytes;
 2350         int error, nblks;
 2351 
 2352         /* Release mapping into kernel space. */
 2353         userp = job->userproc;
 2354         ki = userp->p_aioinfo;
 2355         if (job->pbuf) {
 2356                 pmap_qremove((vm_offset_t)job->pbuf->b_data, job->npages);
 2357                 relpbuf(job->pbuf, NULL);
 2358                 job->pbuf = NULL;
 2359                 atomic_subtract_int(&num_buf_aio, 1);
 2360                 AIO_LOCK(ki);
 2361                 ki->kaio_buffer_count--;
 2362                 AIO_UNLOCK(ki);
 2363         }
 2364         vm_page_unhold_pages(job->pages, job->npages);
 2365 
 2366         bp = job->bp;
 2367         job->bp = NULL;
 2368         nbytes = job->uaiocb.aio_nbytes - bp->bio_resid;
 2369         error = 0;
 2370         if (bp->bio_flags & BIO_ERROR)
 2371                 error = bp->bio_error;
 2372         nblks = btodb(nbytes);
 2373         if (job->uaiocb.aio_lio_opcode == LIO_WRITE)
 2374                 job->outblock += nblks;
 2375         else
 2376                 job->inblock += nblks;
 2377 
 2378         if (error)
 2379                 aio_complete(job, -1, error);
 2380         else
 2381                 aio_complete(job, nbytes, 0);
 2382 
 2383         g_destroy_bio(bp);
 2384 }
 2385 
 2386 /* syscall - wait for the next completion of an aio request */
 2387 static int
 2388 kern_aio_waitcomplete(struct thread *td, struct aiocb **ujobp,
 2389     struct timespec *ts, struct aiocb_ops *ops)
 2390 {
 2391         struct proc *p = td->td_proc;
 2392         struct timeval atv;
 2393         struct kaioinfo *ki;
 2394         struct kaiocb *job;
 2395         struct aiocb *ujob;
 2396         long error, status;
 2397         int timo;
 2398 
 2399         ops->store_aiocb(ujobp, NULL);
 2400 
 2401         if (ts == NULL) {
 2402                 timo = 0;
 2403         } else if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
 2404                 timo = -1;
 2405         } else {
 2406                 if ((ts->tv_nsec < 0) || (ts->tv_nsec >= 1000000000))
 2407                         return (EINVAL);
 2408 
 2409                 TIMESPEC_TO_TIMEVAL(&atv, ts);
 2410                 if (itimerfix(&atv))
 2411                         return (EINVAL);
 2412                 timo = tvtohz(&atv);
 2413         }
 2414 
 2415         if (p->p_aioinfo == NULL)
 2416                 aio_init_aioinfo(p);
 2417         ki = p->p_aioinfo;
 2418 
 2419         error = 0;
 2420         job = NULL;
 2421         AIO_LOCK(ki);
 2422         while ((job = TAILQ_FIRST(&ki->kaio_done)) == NULL) {
 2423                 if (timo == -1) {
 2424                         error = EWOULDBLOCK;
 2425                         break;
 2426                 }
 2427                 ki->kaio_flags |= KAIO_WAKEUP;
 2428                 error = msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO | PCATCH,
 2429                     "aiowc", timo);
 2430                 if (timo && error == ERESTART)
 2431                         error = EINTR;
 2432                 if (error)
 2433                         break;
 2434         }
 2435 
 2436         if (job != NULL) {
 2437                 MPASS(job->jobflags & KAIOCB_FINISHED);
 2438                 ujob = job->ujob;
 2439                 status = job->uaiocb._aiocb_private.status;
 2440                 error = job->uaiocb._aiocb_private.error;
 2441                 td->td_retval[0] = status;
 2442                 td->td_ru.ru_oublock += job->outblock;
 2443                 td->td_ru.ru_inblock += job->inblock;
 2444                 td->td_ru.ru_msgsnd += job->msgsnd;
 2445                 td->td_ru.ru_msgrcv += job->msgrcv;
 2446                 aio_free_entry(job);
 2447                 AIO_UNLOCK(ki);
 2448                 ops->store_aiocb(ujobp, ujob);
 2449                 ops->store_error(ujob, error);
 2450                 ops->store_status(ujob, status);
 2451         } else
 2452                 AIO_UNLOCK(ki);
 2453 
 2454         return (error);
 2455 }
 2456 
 2457 int
 2458 sys_aio_waitcomplete(struct thread *td, struct aio_waitcomplete_args *uap)
 2459 {
 2460         struct timespec ts, *tsp;
 2461         int error;
 2462 
 2463         if (uap->timeout) {
 2464                 /* Get timespec struct. */
 2465                 error = copyin(uap->timeout, &ts, sizeof(ts));
 2466                 if (error)
 2467                         return (error);
 2468                 tsp = &ts;
 2469         } else
 2470                 tsp = NULL;
 2471 
 2472         return (kern_aio_waitcomplete(td, uap->aiocbp, tsp, &aiocb_ops));
 2473 }
 2474 
 2475 static int
 2476 kern_aio_fsync(struct thread *td, int op, struct aiocb *ujob,
 2477     struct aiocb_ops *ops)
 2478 {
 2479         struct proc *p = td->td_proc;
 2480         struct kaioinfo *ki;
 2481 
 2482         if (op != O_SYNC) /* XXX lack of O_DSYNC */
 2483                 return (EINVAL);
 2484         ki = p->p_aioinfo;
 2485         if (ki == NULL)
 2486                 aio_init_aioinfo(p);
 2487         return (aio_aqueue(td, ujob, NULL, LIO_SYNC, ops));
 2488 }
 2489 
 2490 int
 2491 sys_aio_fsync(struct thread *td, struct aio_fsync_args *uap)
 2492 {
 2493 
 2494         return (kern_aio_fsync(td, uap->op, uap->aiocbp, &aiocb_ops));
 2495 }
 2496 
 2497 /* kqueue attach function */
 2498 static int
 2499 filt_aioattach(struct knote *kn)
 2500 {
 2501         struct kaiocb *job = (struct kaiocb *)kn->kn_sdata;
 2502 
 2503         /*
 2504          * The job pointer must be validated before using it, so
 2505          * registration is restricted to the kernel; the user cannot
 2506          * set EV_FLAG1.
 2507          */
 2508         if ((kn->kn_flags & EV_FLAG1) == 0)
 2509                 return (EPERM);
 2510         kn->kn_ptr.p_aio = job;
 2511         kn->kn_flags &= ~EV_FLAG1;
 2512 
 2513         knlist_add(&job->klist, kn, 0);
 2514 
 2515         return (0);
 2516 }
 2517 
 2518 /* kqueue detach function */
 2519 static void
 2520 filt_aiodetach(struct knote *kn)
 2521 {
 2522         struct knlist *knl;
 2523 
 2524         knl = &kn->kn_ptr.p_aio->klist;
 2525         knl->kl_lock(knl->kl_lockarg);
 2526         if (!knlist_empty(knl))
 2527                 knlist_remove(knl, kn, 1);
 2528         knl->kl_unlock(knl->kl_lockarg);
 2529 }
 2530 
 2531 /* kqueue filter function */
 2532 /*ARGSUSED*/
 2533 static int
 2534 filt_aio(struct knote *kn, long hint)
 2535 {
 2536         struct kaiocb *job = kn->kn_ptr.p_aio;
 2537 
 2538         kn->kn_data = job->uaiocb._aiocb_private.error;
 2539         if (!(job->jobflags & KAIOCB_FINISHED))
 2540                 return (0);
 2541         kn->kn_flags |= EV_EOF;
 2542         return (1);
 2543 }
 2544 
 2545 /* kqueue attach function */
 2546 static int
 2547 filt_lioattach(struct knote *kn)
 2548 {
 2549         struct aioliojob * lj = (struct aioliojob *)kn->kn_sdata;
 2550 
 2551         /*
 2552          * The aioliojob pointer must be validated before using it, so
 2553          * registration is restricted to the kernel; the user cannot
 2554          * set EV_FLAG1.
 2555          */
 2556         if ((kn->kn_flags & EV_FLAG1) == 0)
 2557                 return (EPERM);
 2558         kn->kn_ptr.p_lio = lj;
 2559         kn->kn_flags &= ~EV_FLAG1;
 2560 
 2561         knlist_add(&lj->klist, kn, 0);
 2562 
 2563         return (0);
 2564 }
 2565 
 2566 /* kqueue detach function */
 2567 static void
 2568 filt_liodetach(struct knote *kn)
 2569 {
 2570         struct knlist *knl;
 2571 
 2572         knl = &kn->kn_ptr.p_lio->klist;
 2573         knl->kl_lock(knl->kl_lockarg);
 2574         if (!knlist_empty(knl))
 2575                 knlist_remove(knl, kn, 1);
 2576         knl->kl_unlock(knl->kl_lockarg);
 2577 }
 2578 
 2579 /* kqueue filter function */
 2580 /*ARGSUSED*/
 2581 static int
 2582 filt_lio(struct knote *kn, long hint)
 2583 {
 2584         struct aioliojob * lj = kn->kn_ptr.p_lio;
 2585 
 2586         return (lj->lioj_flags & LIOJ_KEVENT_POSTED);
 2587 }
 2588 
 2589 #ifdef COMPAT_FREEBSD32
 2590 #include <sys/mount.h>
 2591 #include <sys/socket.h>
 2592 #include <compat/freebsd32/freebsd32.h>
 2593 #include <compat/freebsd32/freebsd32_proto.h>
 2594 #include <compat/freebsd32/freebsd32_signal.h>
 2595 #include <compat/freebsd32/freebsd32_syscall.h>
 2596 #include <compat/freebsd32/freebsd32_util.h>
 2597 
 2598 struct __aiocb_private32 {
 2599         int32_t status;
 2600         int32_t error;
 2601         uint32_t kernelinfo;
 2602 };
 2603 
 2604 #ifdef COMPAT_FREEBSD6
 2605 typedef struct oaiocb32 {
 2606         int     aio_fildes;             /* File descriptor */
 2607         uint64_t aio_offset __packed;   /* File offset for I/O */
 2608         uint32_t aio_buf;               /* I/O buffer in process space */
 2609         uint32_t aio_nbytes;            /* Number of bytes for I/O */
 2610         struct  osigevent32 aio_sigevent; /* Signal to deliver */
 2611         int     aio_lio_opcode;         /* LIO opcode */
 2612         int     aio_reqprio;            /* Request priority -- ignored */
 2613         struct  __aiocb_private32 _aiocb_private;
 2614 } oaiocb32_t;
 2615 #endif
 2616 
 2617 typedef struct aiocb32 {
 2618         int32_t aio_fildes;             /* File descriptor */
 2619         uint64_t aio_offset __packed;   /* File offset for I/O */
 2620         uint32_t aio_buf;               /* I/O buffer in process space */
 2621         uint32_t aio_nbytes;            /* Number of bytes for I/O */
 2622         int     __spare__[2];
 2623         uint32_t __spare2__;
 2624         int     aio_lio_opcode;         /* LIO opcode */
 2625         int     aio_reqprio;            /* Request priority -- ignored */
 2626         struct  __aiocb_private32 _aiocb_private;
 2627         struct  sigevent32 aio_sigevent;        /* Signal to deliver */
 2628 } aiocb32_t;
 2629 
 2630 #ifdef COMPAT_FREEBSD6
 2631 static int
 2632 convert_old_sigevent32(struct osigevent32 *osig, struct sigevent *nsig)
 2633 {
 2634 
 2635         /*
 2636          * Only SIGEV_NONE, SIGEV_SIGNAL, and SIGEV_KEVENT are
 2637          * supported by AIO with the old sigevent structure.
 2638          */
 2639         CP(*osig, *nsig, sigev_notify);
 2640         switch (nsig->sigev_notify) {
 2641         case SIGEV_NONE:
 2642                 break;
 2643         case SIGEV_SIGNAL:
 2644                 nsig->sigev_signo = osig->__sigev_u.__sigev_signo;
 2645                 break;
 2646         case SIGEV_KEVENT:
 2647                 nsig->sigev_notify_kqueue =
 2648                     osig->__sigev_u.__sigev_notify_kqueue;
 2649                 PTRIN_CP(*osig, *nsig, sigev_value.sival_ptr);
 2650                 break;
 2651         default:
 2652                 return (EINVAL);
 2653         }
 2654         return (0);
 2655 }
 2656 
 2657 static int
 2658 aiocb32_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob)
 2659 {
 2660         struct oaiocb32 job32;
 2661         int error;
 2662 
 2663         bzero(kjob, sizeof(struct aiocb));
 2664         error = copyin(ujob, &job32, sizeof(job32));
 2665         if (error)
 2666                 return (error);
 2667 
 2668         CP(job32, *kjob, aio_fildes);
 2669         CP(job32, *kjob, aio_offset);
 2670         PTRIN_CP(job32, *kjob, aio_buf);
 2671         CP(job32, *kjob, aio_nbytes);
 2672         CP(job32, *kjob, aio_lio_opcode);
 2673         CP(job32, *kjob, aio_reqprio);
 2674         CP(job32, *kjob, _aiocb_private.status);
 2675         CP(job32, *kjob, _aiocb_private.error);
 2676         PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo);
 2677         return (convert_old_sigevent32(&job32.aio_sigevent,
 2678             &kjob->aio_sigevent));
 2679 }
 2680 #endif
 2681 
 2682 static int
 2683 aiocb32_copyin(struct aiocb *ujob, struct aiocb *kjob)
 2684 {
 2685         struct aiocb32 job32;
 2686         int error;
 2687 
 2688         error = copyin(ujob, &job32, sizeof(job32));
 2689         if (error)
 2690                 return (error);
 2691         CP(job32, *kjob, aio_fildes);
 2692         CP(job32, *kjob, aio_offset);
 2693         PTRIN_CP(job32, *kjob, aio_buf);
 2694         CP(job32, *kjob, aio_nbytes);
 2695         CP(job32, *kjob, aio_lio_opcode);
 2696         CP(job32, *kjob, aio_reqprio);
 2697         CP(job32, *kjob, _aiocb_private.status);
 2698         CP(job32, *kjob, _aiocb_private.error);
 2699         PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo);
 2700         return (convert_sigevent32(&job32.aio_sigevent, &kjob->aio_sigevent));
 2701 }
 2702 
 2703 static long
 2704 aiocb32_fetch_status(struct aiocb *ujob)
 2705 {
 2706         struct aiocb32 *ujob32;
 2707 
 2708         ujob32 = (struct aiocb32 *)ujob;
 2709         return (fuword32(&ujob32->_aiocb_private.status));
 2710 }
 2711 
 2712 static long
 2713 aiocb32_fetch_error(struct aiocb *ujob)
 2714 {
 2715         struct aiocb32 *ujob32;
 2716 
 2717         ujob32 = (struct aiocb32 *)ujob;
 2718         return (fuword32(&ujob32->_aiocb_private.error));
 2719 }
 2720 
 2721 static int
 2722 aiocb32_store_status(struct aiocb *ujob, long status)
 2723 {
 2724         struct aiocb32 *ujob32;
 2725 
 2726         ujob32 = (struct aiocb32 *)ujob;
 2727         return (suword32(&ujob32->_aiocb_private.status, status));
 2728 }
 2729 
 2730 static int
 2731 aiocb32_store_error(struct aiocb *ujob, long error)
 2732 {
 2733         struct aiocb32 *ujob32;
 2734 
 2735         ujob32 = (struct aiocb32 *)ujob;
 2736         return (suword32(&ujob32->_aiocb_private.error, error));
 2737 }
 2738 
 2739 static int
 2740 aiocb32_store_kernelinfo(struct aiocb *ujob, long jobref)
 2741 {
 2742         struct aiocb32 *ujob32;
 2743 
 2744         ujob32 = (struct aiocb32 *)ujob;
 2745         return (suword32(&ujob32->_aiocb_private.kernelinfo, jobref));
 2746 }
 2747 
 2748 static int
 2749 aiocb32_store_aiocb(struct aiocb **ujobp, struct aiocb *ujob)
 2750 {
 2751 
 2752         return (suword32(ujobp, (long)ujob));
 2753 }
 2754 
 2755 static struct aiocb_ops aiocb32_ops = {
 2756         .copyin = aiocb32_copyin,
 2757         .fetch_status = aiocb32_fetch_status,
 2758         .fetch_error = aiocb32_fetch_error,
 2759         .store_status = aiocb32_store_status,
 2760         .store_error = aiocb32_store_error,
 2761         .store_kernelinfo = aiocb32_store_kernelinfo,
 2762         .store_aiocb = aiocb32_store_aiocb,
 2763 };
 2764 
 2765 #ifdef COMPAT_FREEBSD6
 2766 static struct aiocb_ops aiocb32_ops_osigevent = {
 2767         .copyin = aiocb32_copyin_old_sigevent,
 2768         .fetch_status = aiocb32_fetch_status,
 2769         .fetch_error = aiocb32_fetch_error,
 2770         .store_status = aiocb32_store_status,
 2771         .store_error = aiocb32_store_error,
 2772         .store_kernelinfo = aiocb32_store_kernelinfo,
 2773         .store_aiocb = aiocb32_store_aiocb,
 2774 };
 2775 #endif
 2776 
 2777 int
 2778 freebsd32_aio_return(struct thread *td, struct freebsd32_aio_return_args *uap)
 2779 {
 2780 
 2781         return (kern_aio_return(td, (struct aiocb *)uap->aiocbp, &aiocb32_ops));
 2782 }
 2783 
 2784 int
 2785 freebsd32_aio_suspend(struct thread *td, struct freebsd32_aio_suspend_args *uap)
 2786 {
 2787         struct timespec32 ts32;
 2788         struct timespec ts, *tsp;
 2789         struct aiocb **ujoblist;
 2790         uint32_t *ujoblist32;
 2791         int error, i;
 2792 
 2793         if (uap->nent < 0 || uap->nent > AIO_LISTIO_MAX)
 2794                 return (EINVAL);
 2795 
 2796         if (uap->timeout) {
 2797                 /* Get timespec struct. */
 2798                 if ((error = copyin(uap->timeout, &ts32, sizeof(ts32))) != 0)
 2799                         return (error);
 2800                 CP(ts32, ts, tv_sec);
 2801                 CP(ts32, ts, tv_nsec);
 2802                 tsp = &ts;
 2803         } else
 2804                 tsp = NULL;
 2805 
 2806         ujoblist = uma_zalloc(aiol_zone, M_WAITOK);
 2807         ujoblist32 = (uint32_t *)ujoblist;
 2808         error = copyin(uap->aiocbp, ujoblist32, uap->nent *
 2809             sizeof(ujoblist32[0]));
 2810         if (error == 0) {
 2811                 for (i = uap->nent; i > 0; i--)
 2812                         ujoblist[i] = PTRIN(ujoblist32[i]);
 2813 
 2814                 error = kern_aio_suspend(td, uap->nent, ujoblist, tsp);
 2815         }
 2816         uma_zfree(aiol_zone, ujoblist);
 2817         return (error);
 2818 }
 2819 
 2820 int
 2821 freebsd32_aio_error(struct thread *td, struct freebsd32_aio_error_args *uap)
 2822 {
 2823 
 2824         return (kern_aio_error(td, (struct aiocb *)uap->aiocbp, &aiocb32_ops));
 2825 }
 2826 
 2827 #ifdef COMPAT_FREEBSD6
 2828 int
 2829 freebsd6_freebsd32_aio_read(struct thread *td,
 2830     struct freebsd6_freebsd32_aio_read_args *uap)
 2831 {
 2832 
 2833         return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ,
 2834             &aiocb32_ops_osigevent));
 2835 }
 2836 #endif
 2837 
 2838 int
 2839 freebsd32_aio_read(struct thread *td, struct freebsd32_aio_read_args *uap)
 2840 {
 2841 
 2842         return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ,
 2843             &aiocb32_ops));
 2844 }
 2845 
 2846 #ifdef COMPAT_FREEBSD6
 2847 int
 2848 freebsd6_freebsd32_aio_write(struct thread *td,
 2849     struct freebsd6_freebsd32_aio_write_args *uap)
 2850 {
 2851 
 2852         return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE,
 2853             &aiocb32_ops_osigevent));
 2854 }
 2855 #endif
 2856 
 2857 int
 2858 freebsd32_aio_write(struct thread *td, struct freebsd32_aio_write_args *uap)
 2859 {
 2860 
 2861         return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE,
 2862             &aiocb32_ops));
 2863 }
 2864 
 2865 int
 2866 freebsd32_aio_mlock(struct thread *td, struct freebsd32_aio_mlock_args *uap)
 2867 {
 2868 
 2869         return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_MLOCK,
 2870             &aiocb32_ops));
 2871 }
 2872 
 2873 int
 2874 freebsd32_aio_waitcomplete(struct thread *td,
 2875     struct freebsd32_aio_waitcomplete_args *uap)
 2876 {
 2877         struct timespec32 ts32;
 2878         struct timespec ts, *tsp;
 2879         int error;
 2880 
 2881         if (uap->timeout) {
 2882                 /* Get timespec struct. */
 2883                 error = copyin(uap->timeout, &ts32, sizeof(ts32));
 2884                 if (error)
 2885                         return (error);
 2886                 CP(ts32, ts, tv_sec);
 2887                 CP(ts32, ts, tv_nsec);
 2888                 tsp = &ts;
 2889         } else
 2890                 tsp = NULL;
 2891 
 2892         return (kern_aio_waitcomplete(td, (struct aiocb **)uap->aiocbp, tsp,
 2893             &aiocb32_ops));
 2894 }
 2895 
 2896 int
 2897 freebsd32_aio_fsync(struct thread *td, struct freebsd32_aio_fsync_args *uap)
 2898 {
 2899 
 2900         return (kern_aio_fsync(td, uap->op, (struct aiocb *)uap->aiocbp,
 2901             &aiocb32_ops));
 2902 }
 2903 
 2904 #ifdef COMPAT_FREEBSD6
 2905 int
 2906 freebsd6_freebsd32_lio_listio(struct thread *td,
 2907     struct freebsd6_freebsd32_lio_listio_args *uap)
 2908 {
 2909         struct aiocb **acb_list;
 2910         struct sigevent *sigp, sig;
 2911         struct osigevent32 osig;
 2912         uint32_t *acb_list32;
 2913         int error, i, nent;
 2914 
 2915         if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT))
 2916                 return (EINVAL);
 2917 
 2918         nent = uap->nent;
 2919         if (nent < 0 || nent > AIO_LISTIO_MAX)
 2920                 return (EINVAL);
 2921 
 2922         if (uap->sig && (uap->mode == LIO_NOWAIT)) {
 2923                 error = copyin(uap->sig, &osig, sizeof(osig));
 2924                 if (error)
 2925                         return (error);
 2926                 error = convert_old_sigevent32(&osig, &sig);
 2927                 if (error)
 2928                         return (error);
 2929                 sigp = &sig;
 2930         } else
 2931                 sigp = NULL;
 2932 
 2933         acb_list32 = malloc(sizeof(uint32_t) * nent, M_LIO, M_WAITOK);
 2934         error = copyin(uap->acb_list, acb_list32, nent * sizeof(uint32_t));
 2935         if (error) {
 2936                 free(acb_list32, M_LIO);
 2937                 return (error);
 2938         }
 2939         acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK);
 2940         for (i = 0; i < nent; i++)
 2941                 acb_list[i] = PTRIN(acb_list32[i]);
 2942         free(acb_list32, M_LIO);
 2943 
 2944         error = kern_lio_listio(td, uap->mode,
 2945             (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp,
 2946             &aiocb32_ops_osigevent);
 2947         free(acb_list, M_LIO);
 2948         return (error);
 2949 }
 2950 #endif
 2951 
 2952 int
 2953 freebsd32_lio_listio(struct thread *td, struct freebsd32_lio_listio_args *uap)
 2954 {
 2955         struct aiocb **acb_list;
 2956         struct sigevent *sigp, sig;
 2957         struct sigevent32 sig32;
 2958         uint32_t *acb_list32;
 2959         int error, i, nent;
 2960 
 2961         if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT))
 2962                 return (EINVAL);
 2963 
 2964         nent = uap->nent;
 2965         if (nent < 0 || nent > AIO_LISTIO_MAX)
 2966                 return (EINVAL);
 2967 
 2968         if (uap->sig && (uap->mode == LIO_NOWAIT)) {
 2969                 error = copyin(uap->sig, &sig32, sizeof(sig32));
 2970                 if (error)
 2971                         return (error);
 2972                 error = convert_sigevent32(&sig32, &sig);
 2973                 if (error)
 2974                         return (error);
 2975                 sigp = &sig;
 2976         } else
 2977                 sigp = NULL;
 2978 
 2979         acb_list32 = malloc(sizeof(uint32_t) * nent, M_LIO, M_WAITOK);
 2980         error = copyin(uap->acb_list, acb_list32, nent * sizeof(uint32_t));
 2981         if (error) {
 2982                 free(acb_list32, M_LIO);
 2983                 return (error);
 2984         }
 2985         acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK);
 2986         for (i = 0; i < nent; i++)
 2987                 acb_list[i] = PTRIN(acb_list32[i]);
 2988         free(acb_list32, M_LIO);
 2989 
 2990         error = kern_lio_listio(td, uap->mode,
 2991             (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp,
 2992             &aiocb32_ops);
 2993         free(acb_list, M_LIO);
 2994         return (error);
 2995 }
 2996 
 2997 #endif

Cache object: 4f4a22e371a7ac429e572be268c0ac79


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.