audit_worker.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright (c) 1999-2005 Apple Computer, Inc.
    3  * Copyright (c) 2006 Robert N. M. Watson
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1.  Redistributions of source code must retain the above copyright
   10  *     notice, this list of conditions and the following disclaimer.
   11  * 2.  Redistributions in binary form must reproduce the above copyright
   12  *     notice, this list of conditions and the following disclaimer in the
   13  *     documentation and/or other materials provided with the distribution.
   14  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
   15  *     its contributors may be used to endorse or promote products derived
   16  *     from this software without specific prior written permission.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND
   19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   21  * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR
   22  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
   26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
   27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   28  * POSSIBILITY OF SUCH DAMAGE.
   29  *
   30  * $FreeBSD$
   31  */
   32 
   33 #include <sys/param.h>
   34 #include <sys/condvar.h>
   35 #include <sys/conf.h>
   36 #include <sys/file.h>
   37 #include <sys/filedesc.h>
   38 #include <sys/fcntl.h>
   39 #include <sys/ipc.h>
   40 #include <sys/kernel.h>
   41 #include <sys/kthread.h>
   42 #include <sys/malloc.h>
   43 #include <sys/mount.h>
   44 #include <sys/namei.h>
   45 #include <sys/proc.h>
   46 #include <sys/queue.h>
   47 #include <sys/socket.h>
   48 #include <sys/socketvar.h>
   49 #include <sys/protosw.h>
   50 #include <sys/domain.h>
   51 #include <sys/sysproto.h>
   52 #include <sys/sysent.h>
   53 #include <sys/systm.h>
   54 #include <sys/ucred.h>
   55 #include <sys/uio.h>
   56 #include <sys/un.h>
   57 #include <sys/unistd.h>
   58 #include <sys/vnode.h>
   59 
   60 #include <bsm/audit.h>
   61 #include <bsm/audit_internal.h>
   62 #include <bsm/audit_kevents.h>
   63 
   64 #include <netinet/in.h>
   65 #include <netinet/in_pcb.h>
   66 
   67 #include <security/audit/audit.h>
   68 #include <security/audit/audit_private.h>
   69 
   70 #include <vm/uma.h>
   71 
   72 /*
   73  * Worker thread that will schedule disk I/O, etc.
   74  */
   75 static struct proc              *audit_thread;
   76 
   77 /*
   78  * When an audit log is rotated, the actual rotation must be performed by the
   79  * audit worker thread, as it may have outstanding writes on the current
   80  * audit log.  audit_replacement_vp holds the vnode replacing the current
   81  * vnode.  We can't let more than one replacement occur at a time, so if more
   82  * than one thread requests a replacement, only one can have the replacement
   83  * "in progress" at any given moment.  If a thread tries to replace the audit
   84  * vnode and discovers a replacement is already in progress (i.e.,
   85  * audit_replacement_flag != 0), then it will sleep on audit_replacement_cv
   86  * waiting its turn to perform a replacement.  When a replacement is
   87  * completed, this cv is signalled by the worker thread so a waiting thread
   88  * can start another replacement.  We also store a credential to perform
   89  * audit log write operations with.
   90  *
   91  * The current credential and vnode are thread-local to audit_worker.
   92  */
   93 static struct cv                audit_replacement_cv;
   94 
   95 static int                      audit_replacement_flag;
   96 static struct vnode             *audit_replacement_vp;
   97 static struct ucred             *audit_replacement_cred;
   98 
   99 /*
  100  * Flags related to Kernel->user-space communication.
  101  */
  102 static int                      audit_file_rotate_wait;
  103 
  104 /*
  105  * Write an audit record to a file, performed as the last stage after both
  106  * preselection and BSM conversion.  Both space management and write failures
  107  * are handled in this function.
  108  *
  109  * No attempt is made to deal with possible failure to deliver a trigger to
  110  * the audit daemon, since the message is asynchronous anyway.
  111  */
  112 static void
  113 audit_record_write(struct vnode *vp, struct ucred *cred, struct thread *td,
  114     void *data, size_t len)
  115 {
  116         static struct timeval last_lowspace_trigger;
  117         static struct timeval last_fail;
  118         static int cur_lowspace_trigger;
  119         struct statfs *mnt_stat;
  120         int error, vfslocked;
  121         static int cur_fail;
  122         struct vattr vattr;
  123         long temp;
  124 
  125         if (vp == NULL)
  126                 return;
  127 
  128         mnt_stat = &vp->v_mount->mnt_stat;
  129         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  130 
  131         /*
  132          * First, gather statistics on the audit log file and file system so
  133          * that we know how we're doing on space.  Consider failure of these
  134          * operations to indicate a future inability to write to the file.
  135          */
  136         error = VFS_STATFS(vp->v_mount, mnt_stat, td);
  137         if (error)
  138                 goto fail;
  139         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
  140         error = VOP_GETATTR(vp, &vattr, cred, td);
  141         VOP_UNLOCK(vp, 0, td);
  142         if (error)
  143                 goto fail;
  144         audit_fstat.af_currsz = vattr.va_size;
  145 
  146         /*
  147          * We handle four different space-related limits:
  148          *
  149          * - A fixed (hard) limit on the minimum free blocks we require on
  150          *   the file system, and results in record loss, a trigger, and
  151          *   possible fail stop due to violating invariants.
  152          *
  153          * - An administrative (soft) limit, which when fallen below, results
  154          *   in the kernel notifying the audit daemon of low space.
  155          *
  156          * - An audit trail size limit, which when gone above, results in the
  157          *   kernel notifying the audit daemon that rotation is desired.
  158          *
  159          * - The total depth of the kernel audit record exceeding free space,
  160          *   which can lead to possible fail stop (with drain), in order to
  161          *   prevent violating invariants.  Failure here doesn't halt
  162          *   immediately, but prevents new records from being generated.
  163          *
  164          * Possibly, the last of these should be handled differently, always
  165          * allowing a full queue to be lost, rather than trying to prevent
  166          * loss.
  167          *
  168          * First, handle the hard limit, which generates a trigger and may
  169          * fail stop.  This is handled in the same manner as ENOSPC from
  170          * VOP_WRITE, and results in record loss.
  171          */
  172         if (mnt_stat->f_bfree < AUDIT_HARD_LIMIT_FREE_BLOCKS) {
  173                 error = ENOSPC;
  174                 goto fail_enospc;
  175         }
  176 
  177         /*
  178          * Second, handle falling below the soft limit, if defined; we send
  179          * the daemon a trigger and continue processing the record.  Triggers
  180          * are limited to 1/sec.
  181          */
  182         if (audit_qctrl.aq_minfree != 0) {
  183                 /*
  184                  * XXXAUDIT: Check math and block size calculations here.
  185                  */
  186                 temp = mnt_stat->f_blocks / (100 / audit_qctrl.aq_minfree);
  187                 if (mnt_stat->f_bfree < temp) {
  188                         if (ppsratecheck(&last_lowspace_trigger,
  189                             &cur_lowspace_trigger, 1)) {
  190                                 (void)send_trigger(AUDIT_TRIGGER_LOW_SPACE);
  191                                 printf("Warning: audit space low\n");
  192                         }
  193                 }
  194         }
  195 
  196         /*
  197          * If the current file is getting full, generate a rotation trigger
  198          * to the daemon.  This is only approximate, which is fine as more
  199          * records may be generated before the daemon rotates the file.
  200          */
  201         if ((audit_fstat.af_filesz != 0) && (audit_file_rotate_wait == 0) &&
  202             (vattr.va_size >= audit_fstat.af_filesz)) {
  203                 audit_file_rotate_wait = 1;
  204                 (void)send_trigger(AUDIT_TRIGGER_ROTATE_KERNEL);
  205         }
  206 
  207         /*
  208          * If the estimated amount of audit data in the audit event queue
  209          * (plus records allocated but not yet queued) has reached the amount
  210          * of free space on the disk, then we need to go into an audit fail
  211          * stop state, in which we do not permit the allocation/committing of
  212          * any new audit records.  We continue to process records but don't
  213          * allow any activities that might generate new records.  In the
  214          * future, we might want to detect when space is available again and
  215          * allow operation to continue, but this behavior is sufficient to
  216          * meet fail stop requirements in CAPP.
  217          */
  218         if (audit_fail_stop) {
  219                 if ((unsigned long)((audit_q_len + audit_pre_q_len + 1) *
  220                     MAX_AUDIT_RECORD_SIZE) / mnt_stat->f_bsize >=
  221                     (unsigned long)(mnt_stat->f_bfree)) {
  222                         if (ppsratecheck(&last_fail, &cur_fail, 1))
  223                                 printf("audit_record_write: free space "
  224                                     "below size of audit queue, failing "
  225                                     "stop\n");
  226                         audit_in_failure = 1;
  227                 } else if (audit_in_failure) {
  228                         /*
  229                          * Note: if we want to handle recovery, this is the
  230                          * spot to do it: unset audit_in_failure, and issue a
  231                          * wakeup on the cv.
  232                          */
  233                 }
  234         }
  235 
  236         error = vn_rdwr(UIO_WRITE, vp, data, len, (off_t)0, UIO_SYSSPACE,
  237             IO_APPEND|IO_UNIT, cred, NULL, NULL, td);
  238         if (error == ENOSPC)
  239                 goto fail_enospc;
  240         else if (error)
  241                 goto fail;
  242 
  243         /*
  244          * Catch completion of a queue drain here; if we're draining and the
  245          * queue is now empty, fail stop.  That audit_fail_stop is implicitly
  246          * true, since audit_in_failure can only be set of audit_fail_stop is
  247          * set.
  248          *
  249          * Note: if we handle recovery from audit_in_failure, then we need to
  250          * make panic here conditional.
  251          */
  252         if (audit_in_failure) {
  253                 if (audit_q_len == 0 && audit_pre_q_len == 0) {
  254                         VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, td);
  255                         (void)VOP_FSYNC(vp, MNT_WAIT, td);
  256                         VOP_UNLOCK(vp, 0, td);
  257                         panic("Audit store overflow; record queue drained.");
  258                 }
  259         }
  260 
  261         VFS_UNLOCK_GIANT(vfslocked);
  262         return;
  263 
  264 fail_enospc:
  265         /*
  266          * ENOSPC is considered a special case with respect to failures, as
  267          * this can reflect either our preemptive detection of insufficient
  268          * space, or ENOSPC returned by the vnode write call.
  269          */
  270         if (audit_fail_stop) {
  271                 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, td);
  272                 (void)VOP_FSYNC(vp, MNT_WAIT, td);
  273                 VOP_UNLOCK(vp, 0, td);
  274                 panic("Audit log space exhausted and fail-stop set.");
  275         }
  276         (void)send_trigger(AUDIT_TRIGGER_NO_SPACE);
  277         audit_suspended = 1;
  278 
  279         /* FALLTHROUGH */
  280 fail:
  281         /*
  282          * We have failed to write to the file, so the current record is
  283          * lost, which may require an immediate system halt.
  284          */
  285         if (audit_panic_on_write_fail) {
  286                 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, td);
  287                 (void)VOP_FSYNC(vp, MNT_WAIT, td);
  288                 VOP_UNLOCK(vp, 0, td);
  289                 panic("audit_worker: write error %d\n", error);
  290         } else if (ppsratecheck(&last_fail, &cur_fail, 1))
  291                 printf("audit_worker: write error %d\n", error);
  292         VFS_UNLOCK_GIANT(vfslocked);
  293 }
  294 
  295 /*
  296  * If an appropriate signal has been received rotate the audit log based on
  297  * the global replacement variables.  Signal consumers as needed that the
  298  * rotation has taken place.
  299  *
  300  * The global variables and CVs used to signal the audit_worker to perform a
  301  * rotation are essentially a message queue of depth 1.  It would be much
  302  * nicer to actually use a message queue.
  303  */
  304 static void
  305 audit_worker_rotate(struct ucred **audit_credp, struct vnode **audit_vpp,
  306     struct thread *audit_td)
  307 {
  308         int do_replacement_signal, vfslocked;
  309         struct ucred *old_cred;
  310         struct vnode *old_vp;
  311 
  312         mtx_assert(&audit_mtx, MA_OWNED);
  313 
  314         do_replacement_signal = 0;
  315         while (audit_replacement_flag != 0) {
  316                 old_cred = *audit_credp;
  317                 old_vp = *audit_vpp;
  318                 *audit_credp = audit_replacement_cred;
  319                 *audit_vpp = audit_replacement_vp;
  320                 audit_replacement_cred = NULL;
  321                 audit_replacement_vp = NULL;
  322                 audit_replacement_flag = 0;
  323 
  324                 audit_enabled = (*audit_vpp != NULL);
  325 
  326                 if (old_vp != NULL) {
  327                         mtx_unlock(&audit_mtx);
  328                         vfslocked = VFS_LOCK_GIANT(old_vp->v_mount);
  329                         vn_close(old_vp, AUDIT_CLOSE_FLAGS, old_cred,
  330                             audit_td);
  331                         VFS_UNLOCK_GIANT(vfslocked);
  332                         crfree(old_cred);
  333                         mtx_lock(&audit_mtx);
  334                         old_cred = NULL;
  335                         old_vp = NULL;
  336                 }
  337                 do_replacement_signal = 1;
  338         }
  339 
  340         /*
  341          * Signal that replacement have occurred to wake up and start any
  342          * other replacements started in parallel.  We can continue about our
  343          * business in the mean time.  We broadcast so that both new
  344          * replacements can be inserted, but also so that the source(s) of
  345          * replacement can return successfully.
  346          */
  347         if (do_replacement_signal)
  348                 cv_broadcast(&audit_replacement_cv);
  349 }
  350 
  351 /*
  352  * Given a kernel audit record, process as required.  Kernel audit records
  353  * are converted to one, or possibly two, BSM records, depending on whether
  354  * there is a user audit record present also.  Kernel records need be
  355  * converted to BSM before they can be written out.  Both types will be
  356  * written to disk, and audit pipes.
  357  */
  358 static void
  359 audit_worker_process_record(struct vnode *audit_vp, struct ucred *audit_cred,
  360     struct thread *audit_td, struct kaudit_record *ar)
  361 {
  362         struct au_record *bsm;
  363         au_class_t class;
  364         au_event_t event;
  365         au_id_t auid;
  366         int error, sorf;
  367 
  368         /*
  369          * First, handle the user record, if any: commit to the system trail
  370          * and audit pipes as selected.
  371          */
  372         if ((ar->k_ar_commit & AR_COMMIT_USER) &&
  373             (ar->k_ar_commit & AR_PRESELECT_USER_TRAIL))
  374                 audit_record_write(audit_vp, audit_cred, audit_td,
  375                     ar->k_udata, ar->k_ulen);
  376 
  377         if ((ar->k_ar_commit & AR_COMMIT_USER) &&
  378             (ar->k_ar_commit & AR_PRESELECT_USER_PIPE))
  379                 audit_pipe_submit_user(ar->k_udata, ar->k_ulen);
  380 
  381         if (!(ar->k_ar_commit & AR_COMMIT_KERNEL) ||
  382             ((ar->k_ar_commit & AR_PRESELECT_PIPE) == 0 &&
  383             (ar->k_ar_commit & AR_PRESELECT_TRAIL) == 0))
  384                 return;
  385 
  386         auid = ar->k_ar.ar_subj_auid;
  387         event = ar->k_ar.ar_event;
  388         class = au_event_class(event);
  389         if (ar->k_ar.ar_errno == 0)
  390                 sorf = AU_PRS_SUCCESS;
  391         else
  392                 sorf = AU_PRS_FAILURE;
  393 
  394         error = kaudit_to_bsm(ar, &bsm);
  395         switch (error) {
  396         case BSM_NOAUDIT:
  397                 return;
  398 
  399         case BSM_FAILURE:
  400                 printf("audit_worker_process_record: BSM_FAILURE\n");
  401                 return;
  402 
  403         case BSM_SUCCESS:
  404                 break;
  405 
  406         default:
  407                 panic("kaudit_to_bsm returned %d", error);
  408         }
  409 
  410         if (ar->k_ar_commit & AR_PRESELECT_TRAIL)
  411                 audit_record_write(audit_vp, audit_cred, audit_td, bsm->data,
  412                     bsm->len);
  413 
  414         if (ar->k_ar_commit & AR_PRESELECT_PIPE)
  415                 audit_pipe_submit(auid, event, class, sorf,
  416                     ar->k_ar_commit & AR_PRESELECT_TRAIL, bsm->data,
  417                     bsm->len);
  418 
  419         kau_free(bsm);
  420 }
  421 
  422 /*
  423  * The audit_worker thread is responsible for watching the event queue,
  424  * dequeueing records, converting them to BSM format, and committing them to
  425  * disk.  In order to minimize lock thrashing, records are dequeued in sets
  426  * to a thread-local work queue.  In addition, the audit_work performs the
  427  * actual exchange of audit log vnode pointer, as audit_vp is a thread-local
  428  * variable.
  429  */
  430 static void
  431 audit_worker(void *arg)
  432 {
  433         struct kaudit_queue ar_worklist;
  434         struct kaudit_record *ar;
  435         struct ucred *audit_cred;
  436         struct thread *audit_td;
  437         struct vnode *audit_vp;
  438         int lowater_signal;
  439 
  440         /*
  441          * These are thread-local variables requiring no synchronization.
  442          */
  443         TAILQ_INIT(&ar_worklist);
  444         audit_cred = NULL;
  445         audit_td = curthread;
  446         audit_vp = NULL;
  447 
  448         mtx_lock(&audit_mtx);
  449         while (1) {
  450                 mtx_assert(&audit_mtx, MA_OWNED);
  451 
  452                 /*
  453                  * Wait for record or rotation events.
  454                  */
  455                 while (!audit_replacement_flag && TAILQ_EMPTY(&audit_q))
  456                         cv_wait(&audit_worker_cv, &audit_mtx);
  457 
  458                 /*
  459                  * First priority: replace the audit log target if requested.
  460                  */
  461                 audit_worker_rotate(&audit_cred, &audit_vp, audit_td);
  462 
  463                 /*
  464                  * If there are records in the global audit record queue,
  465                  * transfer them to a thread-local queue and process them
  466                  * one by one.  If we cross the low watermark threshold,
  467                  * signal any waiting processes that they may wake up and
  468                  * continue generating records.
  469                  */
  470                 lowater_signal = 0;
  471                 while ((ar = TAILQ_FIRST(&audit_q))) {
  472                         TAILQ_REMOVE(&audit_q, ar, k_q);
  473                         audit_q_len--;
  474                         if (audit_q_len == audit_qctrl.aq_lowater)
  475                                 lowater_signal++;
  476                         TAILQ_INSERT_TAIL(&ar_worklist, ar, k_q);
  477                 }
  478                 if (lowater_signal)
  479                         cv_broadcast(&audit_watermark_cv);
  480 
  481                 mtx_unlock(&audit_mtx);
  482                 while ((ar = TAILQ_FIRST(&ar_worklist))) {
  483                         TAILQ_REMOVE(&ar_worklist, ar, k_q);
  484                         audit_worker_process_record(audit_vp, audit_cred,
  485                             audit_td, ar);
  486                         audit_free(ar);
  487                 }
  488                 mtx_lock(&audit_mtx);
  489         }
  490 }
  491 
  492 /*
  493  * audit_rotate_vnode() is called by a user or kernel thread to configure or
  494  * de-configure auditing on a vnode.  The arguments are the replacement
  495  * credential and vnode to substitute for the current credential and vnode,
  496  * if any.  If either is set to NULL, both should be NULL, and this is used
  497  * to indicate that audit is being disabled.  The real work is done in the
  498  * audit_worker thread, but audit_rotate_vnode() waits synchronously for that
  499  * to complete.
  500  *
  501  * The vnode should be referenced and opened by the caller.  The credential
  502  * should be referenced.  audit_rotate_vnode() will own both references as of
  503  * this call, so the caller should not release either.
  504  *
  505  * XXXAUDIT: Review synchronize communication logic.  Really, this is a
  506  * message queue of depth 1.  We are essentially acquiring ownership of the
  507  * communications queue, inserting our message, and waiting for an
  508  * acknowledgement.
  509  */
  510 void
  511 audit_rotate_vnode(struct ucred *cred, struct vnode *vp)
  512 {
  513 
  514         /*
  515          * If other parallel log replacements have been requested, we wait
  516          * until they've finished before continuing.
  517          */
  518         mtx_lock(&audit_mtx);
  519         while (audit_replacement_flag != 0)
  520                 cv_wait(&audit_replacement_cv, &audit_mtx);
  521         audit_replacement_cred = cred;
  522         audit_replacement_flag = 1;
  523         audit_replacement_vp = vp;
  524 
  525         /*
  526          * Wake up the audit worker to perform the exchange once we release
  527          * the mutex.
  528          */
  529         cv_signal(&audit_worker_cv);
  530 
  531         /*
  532          * Wait for the audit_worker to broadcast that a replacement has
  533          * taken place; we know that once this has happened, our vnode has
  534          * been replaced in, so we can return successfully.
  535          */
  536         cv_wait(&audit_replacement_cv, &audit_mtx);
  537         audit_file_rotate_wait = 0; /* We can now request another rotation */
  538         mtx_unlock(&audit_mtx);
  539 }
  540 
  541 void
  542 audit_worker_init(void)
  543 {
  544         int error;
  545 
  546         cv_init(&audit_replacement_cv, "audit_replacement_cv");
  547         error = kthread_create(audit_worker, NULL, &audit_thread, RFHIGHPID,
  548             0, "audit");
  549         if (error)
  550                 panic("audit_worker_init: kthread_create returned %d", error);
  551 }
Cache object: 0b3e94ba061f31a0917eea63f21e2b2c
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/security/audit/audit_worker.c

FreeBSD/Linux Kernel Cross Reference
sys/security/audit/audit_worker.c