The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_ktls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause
    3  *
    4  * Copyright (c) 2014-2019 Netflix Inc.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD$");
   30 
   31 #include "opt_inet.h"
   32 #include "opt_inet6.h"
   33 #include "opt_rss.h"
   34 
   35 #include <sys/param.h>
   36 #include <sys/kernel.h>
   37 #include <sys/domainset.h>
   38 #include <sys/ktls.h>
   39 #include <sys/lock.h>
   40 #include <sys/mbuf.h>
   41 #include <sys/mutex.h>
   42 #include <sys/rmlock.h>
   43 #include <sys/proc.h>
   44 #include <sys/protosw.h>
   45 #include <sys/refcount.h>
   46 #include <sys/smp.h>
   47 #include <sys/socket.h>
   48 #include <sys/socketvar.h>
   49 #include <sys/sysctl.h>
   50 #include <sys/taskqueue.h>
   51 #include <sys/kthread.h>
   52 #include <sys/uio.h>
   53 #include <sys/vmmeter.h>
   54 #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
   55 #include <machine/pcb.h>
   56 #endif
   57 #include <machine/vmparam.h>
   58 #include <net/if.h>
   59 #include <net/if_var.h>
   60 #ifdef RSS
   61 #include <net/netisr.h>
   62 #include <net/rss_config.h>
   63 #endif
   64 #include <net/route.h>
   65 #include <net/route/nhop.h>
   66 #if defined(INET) || defined(INET6)
   67 #include <netinet/in.h>
   68 #include <netinet/in_pcb.h>
   69 #endif
   70 #include <netinet/tcp_var.h>
   71 #ifdef TCP_OFFLOAD
   72 #include <netinet/tcp_offload.h>
   73 #endif
   74 #include <opencrypto/xform.h>
   75 #include <vm/uma_dbg.h>
   76 #include <vm/vm.h>
   77 #include <vm/vm_pageout.h>
   78 #include <vm/vm_page.h>
   79 
   80 struct ktls_wq {
   81         struct mtx      mtx;
   82         STAILQ_HEAD(, mbuf) m_head;
   83         STAILQ_HEAD(, socket) so_head;
   84         bool            running;
   85 } __aligned(CACHE_LINE_SIZE);
   86 
   87 struct ktls_domain_info {
   88         int count;
   89         int cpu[MAXCPU];
   90 };
   91 
   92 struct ktls_domain_info ktls_domains[MAXMEMDOM];
   93 static struct ktls_wq *ktls_wq;
   94 static struct proc *ktls_proc;
   95 LIST_HEAD(, ktls_crypto_backend) ktls_backends;
   96 static struct rmlock ktls_backends_lock;
   97 static uma_zone_t ktls_session_zone;
   98 static uint16_t ktls_cpuid_lookup[MAXCPU];
   99 
  100 SYSCTL_NODE(_kern_ipc, OID_AUTO, tls, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  101     "Kernel TLS offload");
  102 SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  103     "Kernel TLS offload stats");
  104 
  105 static int ktls_allow_unload;
  106 SYSCTL_INT(_kern_ipc_tls, OID_AUTO, allow_unload, CTLFLAG_RDTUN,
  107     &ktls_allow_unload, 0, "Allow software crypto modules to unload");
  108 
  109 #ifdef RSS
  110 static int ktls_bind_threads = 1;
  111 #else
  112 static int ktls_bind_threads;
  113 #endif
  114 SYSCTL_INT(_kern_ipc_tls, OID_AUTO, bind_threads, CTLFLAG_RDTUN,
  115     &ktls_bind_threads, 0,
  116     "Bind crypto threads to cores (1) or cores and domains (2) at boot");
  117 
  118 static u_int ktls_maxlen = 16384;
  119 SYSCTL_UINT(_kern_ipc_tls, OID_AUTO, maxlen, CTLFLAG_RWTUN,
  120     &ktls_maxlen, 0, "Maximum TLS record size");
  121 
  122 static int ktls_number_threads;
  123 SYSCTL_INT(_kern_ipc_tls_stats, OID_AUTO, threads, CTLFLAG_RD,
  124     &ktls_number_threads, 0,
  125     "Number of TLS threads in thread-pool");
  126 
  127 static bool ktls_offload_enable;
  128 SYSCTL_BOOL(_kern_ipc_tls, OID_AUTO, enable, CTLFLAG_RWTUN,
  129     &ktls_offload_enable, 0,
  130     "Enable support for kernel TLS offload");
  131 
  132 static bool ktls_cbc_enable = true;
  133 SYSCTL_BOOL(_kern_ipc_tls, OID_AUTO, cbc_enable, CTLFLAG_RWTUN,
  134     &ktls_cbc_enable, 1,
  135     "Enable Support of AES-CBC crypto for kernel TLS");
  136 
  137 static COUNTER_U64_DEFINE_EARLY(ktls_tasks_active);
  138 SYSCTL_COUNTER_U64(_kern_ipc_tls, OID_AUTO, tasks_active, CTLFLAG_RD,
  139     &ktls_tasks_active, "Number of active tasks");
  140 
  141 static COUNTER_U64_DEFINE_EARLY(ktls_cnt_tx_pending);
  142 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, sw_tx_pending, CTLFLAG_RD,
  143     &ktls_cnt_tx_pending,
  144     "Number of TLS 1.0 records waiting for earlier TLS records");
  145 
  146 static COUNTER_U64_DEFINE_EARLY(ktls_cnt_tx_queued);
  147 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, sw_tx_inqueue, CTLFLAG_RD,
  148     &ktls_cnt_tx_queued,
  149     "Number of TLS records in queue to tasks for SW encryption");
  150 
  151 static COUNTER_U64_DEFINE_EARLY(ktls_cnt_rx_queued);
  152 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, sw_rx_inqueue, CTLFLAG_RD,
  153     &ktls_cnt_rx_queued,
  154     "Number of TLS sockets in queue to tasks for SW decryption");
  155 
  156 static COUNTER_U64_DEFINE_EARLY(ktls_offload_total);
  157 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, offload_total,
  158     CTLFLAG_RD, &ktls_offload_total,
  159     "Total successful TLS setups (parameters set)");
  160 
  161 static COUNTER_U64_DEFINE_EARLY(ktls_offload_enable_calls);
  162 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, enable_calls,
  163     CTLFLAG_RD, &ktls_offload_enable_calls,
  164     "Total number of TLS enable calls made");
  165 
  166 static COUNTER_U64_DEFINE_EARLY(ktls_offload_active);
  167 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, active, CTLFLAG_RD,
  168     &ktls_offload_active, "Total Active TLS sessions");
  169 
  170 static COUNTER_U64_DEFINE_EARLY(ktls_offload_corrupted_records);
  171 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, corrupted_records, CTLFLAG_RD,
  172     &ktls_offload_corrupted_records, "Total corrupted TLS records received");
  173 
  174 static COUNTER_U64_DEFINE_EARLY(ktls_offload_failed_crypto);
  175 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, failed_crypto, CTLFLAG_RD,
  176     &ktls_offload_failed_crypto, "Total TLS crypto failures");
  177 
  178 static COUNTER_U64_DEFINE_EARLY(ktls_switch_to_ifnet);
  179 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, switch_to_ifnet, CTLFLAG_RD,
  180     &ktls_switch_to_ifnet, "TLS sessions switched from SW to ifnet");
  181 
  182 static COUNTER_U64_DEFINE_EARLY(ktls_switch_to_sw);
  183 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, switch_to_sw, CTLFLAG_RD,
  184     &ktls_switch_to_sw, "TLS sessions switched from ifnet to SW");
  185 
  186 static COUNTER_U64_DEFINE_EARLY(ktls_switch_failed);
  187 SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, switch_failed, CTLFLAG_RD,
  188     &ktls_switch_failed, "TLS sessions unable to switch between SW and ifnet");
  189 
  190 SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, sw, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  191     "Software TLS session stats");
  192 SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, ifnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  193     "Hardware (ifnet) TLS session stats");
  194 #ifdef TCP_OFFLOAD
  195 SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, toe, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  196     "TOE TLS session stats");
  197 #endif
  198 
  199 static COUNTER_U64_DEFINE_EARLY(ktls_sw_cbc);
  200 SYSCTL_COUNTER_U64(_kern_ipc_tls_sw, OID_AUTO, cbc, CTLFLAG_RD, &ktls_sw_cbc,
  201     "Active number of software TLS sessions using AES-CBC");
  202 
  203 static COUNTER_U64_DEFINE_EARLY(ktls_sw_gcm);
  204 SYSCTL_COUNTER_U64(_kern_ipc_tls_sw, OID_AUTO, gcm, CTLFLAG_RD, &ktls_sw_gcm,
  205     "Active number of software TLS sessions using AES-GCM");
  206 
  207 static COUNTER_U64_DEFINE_EARLY(ktls_sw_chacha20);
  208 SYSCTL_COUNTER_U64(_kern_ipc_tls_sw, OID_AUTO, chacha20, CTLFLAG_RD,
  209     &ktls_sw_chacha20,
  210     "Active number of software TLS sessions using Chacha20-Poly1305");
  211 
  212 static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_cbc);
  213 SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, cbc, CTLFLAG_RD,
  214     &ktls_ifnet_cbc,
  215     "Active number of ifnet TLS sessions using AES-CBC");
  216 
  217 static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_gcm);
  218 SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, gcm, CTLFLAG_RD,
  219     &ktls_ifnet_gcm,
  220     "Active number of ifnet TLS sessions using AES-GCM");
  221 
  222 static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_chacha20);
  223 SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, chacha20, CTLFLAG_RD,
  224     &ktls_ifnet_chacha20,
  225     "Active number of ifnet TLS sessions using Chacha20-Poly1305");
  226 
  227 static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_reset);
  228 SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, reset, CTLFLAG_RD,
  229     &ktls_ifnet_reset, "TLS sessions updated to a new ifnet send tag");
  230 
  231 static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_reset_dropped);
  232 SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, reset_dropped, CTLFLAG_RD,
  233     &ktls_ifnet_reset_dropped,
  234     "TLS sessions dropped after failing to update ifnet send tag");
  235 
  236 static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_reset_failed);
  237 SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, reset_failed, CTLFLAG_RD,
  238     &ktls_ifnet_reset_failed,
  239     "TLS sessions that failed to allocate a new ifnet send tag");
  240 
  241 static int ktls_ifnet_permitted;
  242 SYSCTL_UINT(_kern_ipc_tls_ifnet, OID_AUTO, permitted, CTLFLAG_RWTUN,
  243     &ktls_ifnet_permitted, 1,
  244     "Whether to permit hardware (ifnet) TLS sessions");
  245 
  246 #ifdef TCP_OFFLOAD
  247 static COUNTER_U64_DEFINE_EARLY(ktls_toe_cbc);
  248 SYSCTL_COUNTER_U64(_kern_ipc_tls_toe, OID_AUTO, cbc, CTLFLAG_RD,
  249     &ktls_toe_cbc,
  250     "Active number of TOE TLS sessions using AES-CBC");
  251 
  252 static COUNTER_U64_DEFINE_EARLY(ktls_toe_gcm);
  253 SYSCTL_COUNTER_U64(_kern_ipc_tls_toe, OID_AUTO, gcm, CTLFLAG_RD,
  254     &ktls_toe_gcm,
  255     "Active number of TOE TLS sessions using AES-GCM");
  256 
  257 static COUNTER_U64_DEFINE_EARLY(ktls_toe_chacha20);
  258 SYSCTL_COUNTER_U64(_kern_ipc_tls_toe, OID_AUTO, chacha20, CTLFLAG_RD,
  259     &ktls_toe_chacha20,
  260     "Active number of TOE TLS sessions using Chacha20-Poly1305");
  261 #endif
  262 
  263 static MALLOC_DEFINE(M_KTLS, "ktls", "Kernel TLS");
  264 
  265 static void ktls_cleanup(struct ktls_session *tls);
  266 #if defined(INET) || defined(INET6)
  267 static void ktls_reset_send_tag(void *context, int pending);
  268 #endif
  269 static void ktls_work_thread(void *ctx);
  270 
  271 int
  272 ktls_crypto_backend_register(struct ktls_crypto_backend *be)
  273 {
  274         struct ktls_crypto_backend *curr_be, *tmp;
  275 
  276         if (be->api_version != KTLS_API_VERSION) {
  277                 printf("KTLS: API version mismatch (%d vs %d) for %s\n",
  278                     be->api_version, KTLS_API_VERSION,
  279                     be->name);
  280                 return (EINVAL);
  281         }
  282 
  283         rm_wlock(&ktls_backends_lock);
  284         printf("KTLS: Registering crypto method %s with prio %d\n",
  285                be->name, be->prio);
  286         if (LIST_EMPTY(&ktls_backends)) {
  287                 LIST_INSERT_HEAD(&ktls_backends, be, next);
  288         } else {
  289                 LIST_FOREACH_SAFE(curr_be, &ktls_backends, next, tmp) {
  290                         if (curr_be->prio < be->prio) {
  291                                 LIST_INSERT_BEFORE(curr_be, be, next);
  292                                 break;
  293                         }
  294                         if (LIST_NEXT(curr_be, next) == NULL) {
  295                                 LIST_INSERT_AFTER(curr_be, be, next);
  296                                 break;
  297                         }
  298                 }
  299         }
  300         rm_wunlock(&ktls_backends_lock);
  301         return (0);
  302 }
  303 
  304 int
  305 ktls_crypto_backend_deregister(struct ktls_crypto_backend *be)
  306 {
  307         struct ktls_crypto_backend *tmp;
  308 
  309         /*
  310          * Don't error if the backend isn't registered.  This permits
  311          * MOD_UNLOAD handlers to use this function unconditionally.
  312          */
  313         rm_wlock(&ktls_backends_lock);
  314         LIST_FOREACH(tmp, &ktls_backends, next) {
  315                 if (tmp == be)
  316                         break;
  317         }
  318         if (tmp == NULL) {
  319                 rm_wunlock(&ktls_backends_lock);
  320                 return (0);
  321         }
  322 
  323         if (!ktls_allow_unload) {
  324                 rm_wunlock(&ktls_backends_lock);
  325                 printf(
  326                     "KTLS: Deregistering crypto method %s is not supported\n",
  327                     be->name);
  328                 return (EBUSY);
  329         }
  330 
  331         if (be->use_count) {
  332                 rm_wunlock(&ktls_backends_lock);
  333                 return (EBUSY);
  334         }
  335 
  336         LIST_REMOVE(be, next);
  337         rm_wunlock(&ktls_backends_lock);
  338         return (0);
  339 }
  340 
  341 #if defined(INET) || defined(INET6)
  342 static u_int
  343 ktls_get_cpu(struct socket *so)
  344 {
  345         struct inpcb *inp;
  346 #ifdef NUMA
  347         struct ktls_domain_info *di;
  348 #endif
  349         u_int cpuid;
  350 
  351         inp = sotoinpcb(so);
  352 #ifdef RSS
  353         cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
  354         if (cpuid != NETISR_CPUID_NONE)
  355                 return (cpuid);
  356 #endif
  357         /*
  358          * Just use the flowid to shard connections in a repeatable
  359          * fashion.  Note that some crypto backends rely on the
  360          * serialization provided by having the same connection use
  361          * the same queue.
  362          */
  363 #ifdef NUMA
  364         if (ktls_bind_threads > 1 && inp->inp_numa_domain != M_NODOM) {
  365                 di = &ktls_domains[inp->inp_numa_domain];
  366                 cpuid = di->cpu[inp->inp_flowid % di->count];
  367         } else
  368 #endif
  369                 cpuid = ktls_cpuid_lookup[inp->inp_flowid % ktls_number_threads];
  370         return (cpuid);
  371 }
  372 #endif
  373 
  374 static void
  375 ktls_init(void *dummy __unused)
  376 {
  377         struct thread *td;
  378         struct pcpu *pc;
  379         cpuset_t mask;
  380         int count, domain, error, i;
  381 
  382         rm_init(&ktls_backends_lock, "ktls backends");
  383         LIST_INIT(&ktls_backends);
  384 
  385         ktls_wq = malloc(sizeof(*ktls_wq) * (mp_maxid + 1), M_KTLS,
  386             M_WAITOK | M_ZERO);
  387 
  388         ktls_session_zone = uma_zcreate("ktls_session",
  389             sizeof(struct ktls_session),
  390             NULL, NULL, NULL, NULL,
  391             UMA_ALIGN_CACHE, 0);
  392 
  393         /*
  394          * Initialize the workqueues to run the TLS work.  We create a
  395          * work queue for each CPU.
  396          */
  397         CPU_FOREACH(i) {
  398                 STAILQ_INIT(&ktls_wq[i].m_head);
  399                 STAILQ_INIT(&ktls_wq[i].so_head);
  400                 mtx_init(&ktls_wq[i].mtx, "ktls work queue", NULL, MTX_DEF);
  401                 error = kproc_kthread_add(ktls_work_thread, &ktls_wq[i],
  402                     &ktls_proc, &td, 0, 0, "KTLS", "thr_%d", i);
  403                 if (error)
  404                         panic("Can't add KTLS thread %d error %d", i, error);
  405 
  406                 /*
  407                  * Bind threads to cores.  If ktls_bind_threads is >
  408                  * 1, then we bind to the NUMA domain.
  409                  */
  410                 if (ktls_bind_threads) {
  411                         if (ktls_bind_threads > 1) {
  412                                 pc = pcpu_find(i);
  413                                 domain = pc->pc_domain;
  414                                 CPU_COPY(&cpuset_domain[domain], &mask);
  415                                 count = ktls_domains[domain].count;
  416                                 ktls_domains[domain].cpu[count] = i;
  417                                 ktls_domains[domain].count++;
  418                         } else {
  419                                 CPU_SETOF(i, &mask);
  420                         }
  421                         error = cpuset_setthread(td->td_tid, &mask);
  422                         if (error)
  423                                 panic(
  424                             "Unable to bind KTLS thread for CPU %d error %d",
  425                                      i, error);
  426                 }
  427                 ktls_cpuid_lookup[ktls_number_threads] = i;
  428                 ktls_number_threads++;
  429         }
  430 
  431         /*
  432          * If we somehow have an empty domain, fall back to choosing
  433          * among all KTLS threads.
  434          */
  435         if (ktls_bind_threads > 1) {
  436                 for (i = 0; i < vm_ndomains; i++) {
  437                         if (ktls_domains[i].count == 0) {
  438                                 ktls_bind_threads = 1;
  439                                 break;
  440                         }
  441                 }
  442         }
  443 
  444         if (bootverbose)
  445                 printf("KTLS: Initialized %d threads\n", ktls_number_threads);
  446 }
  447 SYSINIT(ktls, SI_SUB_SMP + 1, SI_ORDER_ANY, ktls_init, NULL);
  448 
  449 #if defined(INET) || defined(INET6)
  450 static int
  451 ktls_create_session(struct socket *so, struct tls_enable *en,
  452     struct ktls_session **tlsp)
  453 {
  454         struct ktls_session *tls;
  455         int error;
  456 
  457         /* Only TLS 1.0 - 1.3 are supported. */
  458         if (en->tls_vmajor != TLS_MAJOR_VER_ONE)
  459                 return (EINVAL);
  460         if (en->tls_vminor < TLS_MINOR_VER_ZERO ||
  461             en->tls_vminor > TLS_MINOR_VER_THREE)
  462                 return (EINVAL);
  463 
  464         if (en->auth_key_len < 0 || en->auth_key_len > TLS_MAX_PARAM_SIZE)
  465                 return (EINVAL);
  466         if (en->cipher_key_len < 0 || en->cipher_key_len > TLS_MAX_PARAM_SIZE)
  467                 return (EINVAL);
  468         if (en->iv_len < 0 || en->iv_len > sizeof(tls->params.iv))
  469                 return (EINVAL);
  470 
  471         /* All supported algorithms require a cipher key. */
  472         if (en->cipher_key_len == 0)
  473                 return (EINVAL);
  474 
  475         /* No flags are currently supported. */
  476         if (en->flags != 0)
  477                 return (EINVAL);
  478 
  479         /* Common checks for supported algorithms. */
  480         switch (en->cipher_algorithm) {
  481         case CRYPTO_AES_NIST_GCM_16:
  482                 /*
  483                  * auth_algorithm isn't used, but permit GMAC values
  484                  * for compatibility.
  485                  */
  486                 switch (en->auth_algorithm) {
  487                 case 0:
  488 #ifdef COMPAT_FREEBSD12
  489                 /* XXX: Really 13.0-current COMPAT. */
  490                 case CRYPTO_AES_128_NIST_GMAC:
  491                 case CRYPTO_AES_192_NIST_GMAC:
  492                 case CRYPTO_AES_256_NIST_GMAC:
  493 #endif
  494                         break;
  495                 default:
  496                         return (EINVAL);
  497                 }
  498                 if (en->auth_key_len != 0)
  499                         return (EINVAL);
  500                 switch (en->tls_vminor) {
  501                 case TLS_MINOR_VER_TWO:
  502                         if (en->iv_len != TLS_AEAD_GCM_LEN)
  503                                 return (EINVAL);
  504                         break;
  505                 case TLS_MINOR_VER_THREE:
  506                         if (en->iv_len != TLS_1_3_GCM_IV_LEN)
  507                                 return (EINVAL);
  508                         break;
  509                 default:
  510                         return (EINVAL);
  511                 }
  512                 break;
  513         case CRYPTO_AES_CBC:
  514                 switch (en->auth_algorithm) {
  515                 case CRYPTO_SHA1_HMAC:
  516                         break;
  517                 case CRYPTO_SHA2_256_HMAC:
  518                 case CRYPTO_SHA2_384_HMAC:
  519                         if (en->tls_vminor != TLS_MINOR_VER_TWO)
  520                                 return (EINVAL);
  521                         break;
  522                 default:
  523                         return (EINVAL);
  524                 }
  525                 if (en->auth_key_len == 0)
  526                         return (EINVAL);
  527 
  528                 /*
  529                  * TLS 1.0 requires an implicit IV.  TLS 1.1 and 1.2
  530                  * use explicit IVs.
  531                  */
  532                 switch (en->tls_vminor) {
  533                 case TLS_MINOR_VER_ZERO:
  534                         if (en->iv_len != TLS_CBC_IMPLICIT_IV_LEN)
  535                                 return (EINVAL);
  536                         break;
  537                 case TLS_MINOR_VER_ONE:
  538                 case TLS_MINOR_VER_TWO:
  539                         /* Ignore any supplied IV. */
  540                         en->iv_len = 0;
  541                         break;
  542                 default:
  543                         return (EINVAL);
  544                 }
  545                 break;
  546         case CRYPTO_CHACHA20_POLY1305:
  547                 if (en->auth_algorithm != 0 || en->auth_key_len != 0)
  548                         return (EINVAL);
  549                 if (en->tls_vminor != TLS_MINOR_VER_TWO &&
  550                     en->tls_vminor != TLS_MINOR_VER_THREE)
  551                         return (EINVAL);
  552                 if (en->iv_len != TLS_CHACHA20_IV_LEN)
  553                         return (EINVAL);
  554                 break;
  555         default:
  556                 return (EINVAL);
  557         }
  558 
  559         tls = uma_zalloc(ktls_session_zone, M_WAITOK | M_ZERO);
  560 
  561         counter_u64_add(ktls_offload_active, 1);
  562 
  563         refcount_init(&tls->refcount, 1);
  564         TASK_INIT(&tls->reset_tag_task, 0, ktls_reset_send_tag, tls);
  565 
  566         tls->wq_index = ktls_get_cpu(so);
  567 
  568         tls->params.cipher_algorithm = en->cipher_algorithm;
  569         tls->params.auth_algorithm = en->auth_algorithm;
  570         tls->params.tls_vmajor = en->tls_vmajor;
  571         tls->params.tls_vminor = en->tls_vminor;
  572         tls->params.flags = en->flags;
  573         tls->params.max_frame_len = min(TLS_MAX_MSG_SIZE_V10_2, ktls_maxlen);
  574 
  575         /* Set the header and trailer lengths. */
  576         tls->params.tls_hlen = sizeof(struct tls_record_layer);
  577         switch (en->cipher_algorithm) {
  578         case CRYPTO_AES_NIST_GCM_16:
  579                 /*
  580                  * TLS 1.2 uses a 4 byte implicit IV with an explicit 8 byte
  581                  * nonce.  TLS 1.3 uses a 12 byte implicit IV.
  582                  */
  583                 if (en->tls_vminor < TLS_MINOR_VER_THREE)
  584                         tls->params.tls_hlen += sizeof(uint64_t);
  585                 tls->params.tls_tlen = AES_GMAC_HASH_LEN;
  586                 tls->params.tls_bs = 1;
  587                 break;
  588         case CRYPTO_AES_CBC:
  589                 switch (en->auth_algorithm) {
  590                 case CRYPTO_SHA1_HMAC:
  591                         if (en->tls_vminor == TLS_MINOR_VER_ZERO) {
  592                                 /* Implicit IV, no nonce. */
  593                                 tls->sequential_records = true;
  594                                 tls->next_seqno = be64dec(en->rec_seq);
  595                                 STAILQ_INIT(&tls->pending_records);
  596                         } else {
  597                                 tls->params.tls_hlen += AES_BLOCK_LEN;
  598                         }
  599                         tls->params.tls_tlen = AES_BLOCK_LEN +
  600                             SHA1_HASH_LEN;
  601                         break;
  602                 case CRYPTO_SHA2_256_HMAC:
  603                         tls->params.tls_hlen += AES_BLOCK_LEN;
  604                         tls->params.tls_tlen = AES_BLOCK_LEN +
  605                             SHA2_256_HASH_LEN;
  606                         break;
  607                 case CRYPTO_SHA2_384_HMAC:
  608                         tls->params.tls_hlen += AES_BLOCK_LEN;
  609                         tls->params.tls_tlen = AES_BLOCK_LEN +
  610                             SHA2_384_HASH_LEN;
  611                         break;
  612                 default:
  613                         panic("invalid hmac");
  614                 }
  615                 tls->params.tls_bs = AES_BLOCK_LEN;
  616                 break;
  617         case CRYPTO_CHACHA20_POLY1305:
  618                 /*
  619                  * Chacha20 uses a 12 byte implicit IV.
  620                  */
  621                 tls->params.tls_tlen = POLY1305_HASH_LEN;
  622                 tls->params.tls_bs = 1;
  623                 break;
  624         default:
  625                 panic("invalid cipher");
  626         }
  627 
  628         /*
  629          * TLS 1.3 includes optional padding which we do not support,
  630          * and also puts the "real" record type at the end of the
  631          * encrypted data.
  632          */
  633         if (en->tls_vminor == TLS_MINOR_VER_THREE)
  634                 tls->params.tls_tlen += sizeof(uint8_t);
  635 
  636         KASSERT(tls->params.tls_hlen <= MBUF_PEXT_HDR_LEN,
  637             ("TLS header length too long: %d", tls->params.tls_hlen));
  638         KASSERT(tls->params.tls_tlen <= MBUF_PEXT_TRAIL_LEN,
  639             ("TLS trailer length too long: %d", tls->params.tls_tlen));
  640 
  641         if (en->auth_key_len != 0) {
  642                 tls->params.auth_key_len = en->auth_key_len;
  643                 tls->params.auth_key = malloc(en->auth_key_len, M_KTLS,
  644                     M_WAITOK);
  645                 error = copyin(en->auth_key, tls->params.auth_key,
  646                     en->auth_key_len);
  647                 if (error)
  648                         goto out;
  649         }
  650 
  651         tls->params.cipher_key_len = en->cipher_key_len;
  652         tls->params.cipher_key = malloc(en->cipher_key_len, M_KTLS, M_WAITOK);
  653         error = copyin(en->cipher_key, tls->params.cipher_key,
  654             en->cipher_key_len);
  655         if (error)
  656                 goto out;
  657 
  658         /*
  659          * This holds the implicit portion of the nonce for AEAD
  660          * ciphers and the initial implicit IV for TLS 1.0.  The
  661          * explicit portions of the IV are generated in ktls_frame().
  662          */
  663         if (en->iv_len != 0) {
  664                 tls->params.iv_len = en->iv_len;
  665                 error = copyin(en->iv, tls->params.iv, en->iv_len);
  666                 if (error)
  667                         goto out;
  668 
  669                 /*
  670                  * For TLS 1.2 with GCM, generate an 8-byte nonce as a
  671                  * counter to generate unique explicit IVs.
  672                  *
  673                  * Store this counter in the last 8 bytes of the IV
  674                  * array so that it is 8-byte aligned.
  675                  */
  676                 if (en->cipher_algorithm == CRYPTO_AES_NIST_GCM_16 &&
  677                     en->tls_vminor == TLS_MINOR_VER_TWO)
  678                         arc4rand(tls->params.iv + 8, sizeof(uint64_t), 0);
  679         }
  680 
  681         *tlsp = tls;
  682         return (0);
  683 
  684 out:
  685         ktls_free(tls);
  686         return (error);
  687 }
  688 
  689 static struct ktls_session *
  690 ktls_clone_session(struct ktls_session *tls)
  691 {
  692         struct ktls_session *tls_new;
  693 
  694         tls_new = uma_zalloc(ktls_session_zone, M_WAITOK | M_ZERO);
  695 
  696         counter_u64_add(ktls_offload_active, 1);
  697 
  698         refcount_init(&tls_new->refcount, 1);
  699         TASK_INIT(&tls_new->reset_tag_task, 0, ktls_reset_send_tag, tls_new);
  700 
  701         /* Copy fields from existing session. */
  702         tls_new->params = tls->params;
  703         tls_new->wq_index = tls->wq_index;
  704 
  705         /* Deep copy keys. */
  706         if (tls_new->params.auth_key != NULL) {
  707                 tls_new->params.auth_key = malloc(tls->params.auth_key_len,
  708                     M_KTLS, M_WAITOK);
  709                 memcpy(tls_new->params.auth_key, tls->params.auth_key,
  710                     tls->params.auth_key_len);
  711         }
  712 
  713         tls_new->params.cipher_key = malloc(tls->params.cipher_key_len, M_KTLS,
  714             M_WAITOK);
  715         memcpy(tls_new->params.cipher_key, tls->params.cipher_key,
  716             tls->params.cipher_key_len);
  717 
  718         return (tls_new);
  719 }
  720 #endif
  721 
  722 static void
  723 ktls_cleanup(struct ktls_session *tls)
  724 {
  725 
  726         counter_u64_add(ktls_offload_active, -1);
  727         switch (tls->mode) {
  728         case TCP_TLS_MODE_SW:
  729                 MPASS(tls->be != NULL);
  730                 switch (tls->params.cipher_algorithm) {
  731                 case CRYPTO_AES_CBC:
  732                         counter_u64_add(ktls_sw_cbc, -1);
  733                         break;
  734                 case CRYPTO_AES_NIST_GCM_16:
  735                         counter_u64_add(ktls_sw_gcm, -1);
  736                         break;
  737                 case CRYPTO_CHACHA20_POLY1305:
  738                         counter_u64_add(ktls_sw_chacha20, -1);
  739                         break;
  740                 }
  741                 tls->free(tls);
  742                 break;
  743         case TCP_TLS_MODE_IFNET:
  744                 switch (tls->params.cipher_algorithm) {
  745                 case CRYPTO_AES_CBC:
  746                         counter_u64_add(ktls_ifnet_cbc, -1);
  747                         break;
  748                 case CRYPTO_AES_NIST_GCM_16:
  749                         counter_u64_add(ktls_ifnet_gcm, -1);
  750                         break;
  751                 case CRYPTO_CHACHA20_POLY1305:
  752                         counter_u64_add(ktls_ifnet_chacha20, -1);
  753                         break;
  754                 }
  755                 if (tls->snd_tag != NULL)
  756                         m_snd_tag_rele(tls->snd_tag);
  757                 break;
  758 #ifdef TCP_OFFLOAD
  759         case TCP_TLS_MODE_TOE:
  760                 switch (tls->params.cipher_algorithm) {
  761                 case CRYPTO_AES_CBC:
  762                         counter_u64_add(ktls_toe_cbc, -1);
  763                         break;
  764                 case CRYPTO_AES_NIST_GCM_16:
  765                         counter_u64_add(ktls_toe_gcm, -1);
  766                         break;
  767                 case CRYPTO_CHACHA20_POLY1305:
  768                         counter_u64_add(ktls_toe_chacha20, -1);
  769                         break;
  770                 }
  771                 break;
  772 #endif
  773         }
  774         if (tls->params.auth_key != NULL) {
  775                 zfree(tls->params.auth_key, M_KTLS);
  776                 tls->params.auth_key = NULL;
  777                 tls->params.auth_key_len = 0;
  778         }
  779         if (tls->params.cipher_key != NULL) {
  780                 zfree(tls->params.cipher_key, M_KTLS);
  781                 tls->params.cipher_key = NULL;
  782                 tls->params.cipher_key_len = 0;
  783         }
  784         explicit_bzero(tls->params.iv, sizeof(tls->params.iv));
  785 }
  786 
  787 #if defined(INET) || defined(INET6)
  788 
  789 #ifdef TCP_OFFLOAD
  790 static int
  791 ktls_try_toe(struct socket *so, struct ktls_session *tls, int direction)
  792 {
  793         struct inpcb *inp;
  794         struct tcpcb *tp;
  795         int error;
  796 
  797         inp = so->so_pcb;
  798         INP_WLOCK(inp);
  799         if (inp->inp_flags2 & INP_FREED) {
  800                 INP_WUNLOCK(inp);
  801                 return (ECONNRESET);
  802         }
  803         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  804                 INP_WUNLOCK(inp);
  805                 return (ECONNRESET);
  806         }
  807         if (inp->inp_socket == NULL) {
  808                 INP_WUNLOCK(inp);
  809                 return (ECONNRESET);
  810         }
  811         tp = intotcpcb(inp);
  812         if (!(tp->t_flags & TF_TOE)) {
  813                 INP_WUNLOCK(inp);
  814                 return (EOPNOTSUPP);
  815         }
  816 
  817         error = tcp_offload_alloc_tls_session(tp, tls, direction);
  818         INP_WUNLOCK(inp);
  819         if (error == 0) {
  820                 tls->mode = TCP_TLS_MODE_TOE;
  821                 switch (tls->params.cipher_algorithm) {
  822                 case CRYPTO_AES_CBC:
  823                         counter_u64_add(ktls_toe_cbc, 1);
  824                         break;
  825                 case CRYPTO_AES_NIST_GCM_16:
  826                         counter_u64_add(ktls_toe_gcm, 1);
  827                         break;
  828                 case CRYPTO_CHACHA20_POLY1305:
  829                         counter_u64_add(ktls_toe_chacha20, 1);
  830                         break;
  831                 }
  832         }
  833         return (error);
  834 }
  835 #endif
  836 
  837 /*
  838  * Common code used when first enabling ifnet TLS on a connection or
  839  * when allocating a new ifnet TLS session due to a routing change.
  840  * This function allocates a new TLS send tag on whatever interface
  841  * the connection is currently routed over.
  842  */
  843 static int
  844 ktls_alloc_snd_tag(struct inpcb *inp, struct ktls_session *tls, bool force,
  845     struct m_snd_tag **mstp)
  846 {
  847         union if_snd_tag_alloc_params params;
  848         struct ifnet *ifp;
  849         struct nhop_object *nh;
  850         struct tcpcb *tp;
  851         int error;
  852 
  853         INP_RLOCK(inp);
  854         if (inp->inp_flags2 & INP_FREED) {
  855                 INP_RUNLOCK(inp);
  856                 return (ECONNRESET);
  857         }
  858         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  859                 INP_RUNLOCK(inp);
  860                 return (ECONNRESET);
  861         }
  862         if (inp->inp_socket == NULL) {
  863                 INP_RUNLOCK(inp);
  864                 return (ECONNRESET);
  865         }
  866         tp = intotcpcb(inp);
  867 
  868         /*
  869          * Check administrative controls on ifnet TLS to determine if
  870          * ifnet TLS should be denied.
  871          *
  872          * - Always permit 'force' requests.
  873          * - ktls_ifnet_permitted == 0: always deny.
  874          */
  875         if (!force && ktls_ifnet_permitted == 0) {
  876                 INP_RUNLOCK(inp);
  877                 return (ENXIO);
  878         }
  879 
  880         /*
  881          * XXX: Use the cached route in the inpcb to find the
  882          * interface.  This should perhaps instead use
  883          * rtalloc1_fib(dst, 0, 0, fibnum).  Since KTLS is only
  884          * enabled after a connection has completed key negotiation in
  885          * userland, the cached route will be present in practice.
  886          */
  887         nh = inp->inp_route.ro_nh;
  888         if (nh == NULL) {
  889                 INP_RUNLOCK(inp);
  890                 return (ENXIO);
  891         }
  892         ifp = nh->nh_ifp;
  893         if_ref(ifp);
  894 
  895         /*
  896          * Allocate a TLS + ratelimit tag if the connection has an
  897          * existing pacing rate.
  898          */
  899         if (tp->t_pacing_rate != -1 &&
  900             (ifp->if_capenable & IFCAP_TXTLS_RTLMT) != 0) {
  901                 params.hdr.type = IF_SND_TAG_TYPE_TLS_RATE_LIMIT;
  902                 params.tls_rate_limit.inp = inp;
  903                 params.tls_rate_limit.tls = tls;
  904                 params.tls_rate_limit.max_rate = tp->t_pacing_rate;
  905         } else {
  906                 params.hdr.type = IF_SND_TAG_TYPE_TLS;
  907                 params.tls.inp = inp;
  908                 params.tls.tls = tls;
  909         }
  910         params.hdr.flowid = inp->inp_flowid;
  911         params.hdr.flowtype = inp->inp_flowtype;
  912         params.hdr.numa_domain = inp->inp_numa_domain;
  913         INP_RUNLOCK(inp);
  914 
  915         if ((ifp->if_capenable & IFCAP_MEXTPG) == 0) {
  916                 error = EOPNOTSUPP;
  917                 goto out;
  918         }
  919         if (inp->inp_vflag & INP_IPV6) {
  920                 if ((ifp->if_capenable & IFCAP_TXTLS6) == 0) {
  921                         error = EOPNOTSUPP;
  922                         goto out;
  923                 }
  924         } else {
  925                 if ((ifp->if_capenable & IFCAP_TXTLS4) == 0) {
  926                         error = EOPNOTSUPP;
  927                         goto out;
  928                 }
  929         }
  930         error = m_snd_tag_alloc(ifp, &params, mstp);
  931 out:
  932         if_rele(ifp);
  933         return (error);
  934 }
  935 
  936 static int
  937 ktls_try_ifnet(struct socket *so, struct ktls_session *tls, bool force)
  938 {
  939         struct m_snd_tag *mst;
  940         int error;
  941 
  942         error = ktls_alloc_snd_tag(so->so_pcb, tls, force, &mst);
  943         if (error == 0) {
  944                 tls->mode = TCP_TLS_MODE_IFNET;
  945                 tls->snd_tag = mst;
  946                 switch (tls->params.cipher_algorithm) {
  947                 case CRYPTO_AES_CBC:
  948                         counter_u64_add(ktls_ifnet_cbc, 1);
  949                         break;
  950                 case CRYPTO_AES_NIST_GCM_16:
  951                         counter_u64_add(ktls_ifnet_gcm, 1);
  952                         break;
  953                 case CRYPTO_CHACHA20_POLY1305:
  954                         counter_u64_add(ktls_ifnet_chacha20, 1);
  955                         break;
  956                 }
  957         }
  958         return (error);
  959 }
  960 
  961 static int
  962 ktls_try_sw(struct socket *so, struct ktls_session *tls, int direction)
  963 {
  964         struct rm_priotracker prio;
  965         struct ktls_crypto_backend *be;
  966 
  967         /*
  968          * Choose the best software crypto backend.  Backends are
  969          * stored in sorted priority order (larget value == most
  970          * important at the head of the list), so this just stops on
  971          * the first backend that claims the session by returning
  972          * success.
  973          */
  974         if (ktls_allow_unload)
  975                 rm_rlock(&ktls_backends_lock, &prio);
  976         LIST_FOREACH(be, &ktls_backends, next) {
  977                 if (be->try(so, tls, direction) == 0)
  978                         break;
  979                 KASSERT(tls->cipher == NULL,
  980                     ("ktls backend leaked a cipher pointer"));
  981         }
  982         if (be != NULL) {
  983                 if (ktls_allow_unload)
  984                         be->use_count++;
  985                 tls->be = be;
  986         }
  987         if (ktls_allow_unload)
  988                 rm_runlock(&ktls_backends_lock, &prio);
  989         if (be == NULL)
  990                 return (EOPNOTSUPP);
  991         tls->mode = TCP_TLS_MODE_SW;
  992         switch (tls->params.cipher_algorithm) {
  993         case CRYPTO_AES_CBC:
  994                 counter_u64_add(ktls_sw_cbc, 1);
  995                 break;
  996         case CRYPTO_AES_NIST_GCM_16:
  997                 counter_u64_add(ktls_sw_gcm, 1);
  998                 break;
  999         case CRYPTO_CHACHA20_POLY1305:
 1000                 counter_u64_add(ktls_sw_chacha20, 1);
 1001                 break;
 1002         }
 1003         return (0);
 1004 }
 1005 
 1006 /*
 1007  * KTLS RX stores data in the socket buffer as a list of TLS records,
 1008  * where each record is stored as a control message containg the TLS
 1009  * header followed by data mbufs containing the decrypted data.  This
 1010  * is different from KTLS TX which always uses an mb_ext_pgs mbuf for
 1011  * both encrypted and decrypted data.  TLS records decrypted by a NIC
 1012  * should be queued to the socket buffer as records, but encrypted
 1013  * data which needs to be decrypted by software arrives as a stream of
 1014  * regular mbufs which need to be converted.  In addition, there may
 1015  * already be pending encrypted data in the socket buffer when KTLS RX
 1016  * is enabled.
 1017  *
 1018  * To manage not-yet-decrypted data for KTLS RX, the following scheme
 1019  * is used:
 1020  *
 1021  * - A single chain of NOTREADY mbufs is hung off of sb_mtls.
 1022  *
 1023  * - ktls_check_rx checks this chain of mbufs reading the TLS header
 1024  *   from the first mbuf.  Once all of the data for that TLS record is
 1025  *   queued, the socket is queued to a worker thread.
 1026  *
 1027  * - The worker thread calls ktls_decrypt to decrypt TLS records in
 1028  *   the TLS chain.  Each TLS record is detached from the TLS chain,
 1029  *   decrypted, and inserted into the regular socket buffer chain as
 1030  *   record starting with a control message holding the TLS header and
 1031  *   a chain of mbufs holding the encrypted data.
 1032  */
 1033 
 1034 static void
 1035 sb_mark_notready(struct sockbuf *sb)
 1036 {
 1037         struct mbuf *m;
 1038 
 1039         m = sb->sb_mb;
 1040         sb->sb_mtls = m;
 1041         sb->sb_mb = NULL;
 1042         sb->sb_mbtail = NULL;
 1043         sb->sb_lastrecord = NULL;
 1044         for (; m != NULL; m = m->m_next) {
 1045                 KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt != NULL",
 1046                     __func__));
 1047                 KASSERT((m->m_flags & M_NOTAVAIL) == 0, ("%s: mbuf not avail",
 1048                     __func__));
 1049                 KASSERT(sb->sb_acc >= m->m_len, ("%s: sb_acc < m->m_len",
 1050                     __func__));
 1051                 m->m_flags |= M_NOTREADY;
 1052                 sb->sb_acc -= m->m_len;
 1053                 sb->sb_tlscc += m->m_len;
 1054                 sb->sb_mtlstail = m;
 1055         }
 1056         KASSERT(sb->sb_acc == 0 && sb->sb_tlscc == sb->sb_ccc,
 1057             ("%s: acc %u tlscc %u ccc %u", __func__, sb->sb_acc, sb->sb_tlscc,
 1058             sb->sb_ccc));
 1059 }
 1060 
 1061 int
 1062 ktls_enable_rx(struct socket *so, struct tls_enable *en)
 1063 {
 1064         struct ktls_session *tls;
 1065         int error;
 1066 
 1067         if (!ktls_offload_enable)
 1068                 return (ENOTSUP);
 1069         if (SOLISTENING(so))
 1070                 return (EINVAL);
 1071 
 1072         counter_u64_add(ktls_offload_enable_calls, 1);
 1073 
 1074         /*
 1075          * This should always be true since only the TCP socket option
 1076          * invokes this function.
 1077          */
 1078         if (so->so_proto->pr_protocol != IPPROTO_TCP)
 1079                 return (EINVAL);
 1080 
 1081         /*
 1082          * XXX: Don't overwrite existing sessions.  We should permit
 1083          * this to support rekeying in the future.
 1084          */
 1085         if (so->so_rcv.sb_tls_info != NULL)
 1086                 return (EALREADY);
 1087 
 1088         if (en->cipher_algorithm == CRYPTO_AES_CBC && !ktls_cbc_enable)
 1089                 return (ENOTSUP);
 1090 
 1091         error = ktls_create_session(so, en, &tls);
 1092         if (error)
 1093                 return (error);
 1094 
 1095 #ifdef TCP_OFFLOAD
 1096         error = ktls_try_toe(so, tls, KTLS_RX);
 1097         if (error)
 1098 #endif
 1099                 error = ktls_try_sw(so, tls, KTLS_RX);
 1100 
 1101         if (error) {
 1102                 ktls_free(tls);
 1103                 return (error);
 1104         }
 1105 
 1106         /* Mark the socket as using TLS offload. */
 1107         SOCKBUF_LOCK(&so->so_rcv);
 1108         so->so_rcv.sb_tls_seqno = be64dec(en->rec_seq);
 1109         so->so_rcv.sb_tls_info = tls;
 1110         so->so_rcv.sb_flags |= SB_TLS_RX;
 1111 
 1112         /* Mark existing data as not ready until it can be decrypted. */
 1113         if (tls->mode != TCP_TLS_MODE_TOE) {
 1114                 sb_mark_notready(&so->so_rcv);
 1115                 ktls_check_rx(&so->so_rcv);
 1116         }
 1117         SOCKBUF_UNLOCK(&so->so_rcv);
 1118 
 1119         counter_u64_add(ktls_offload_total, 1);
 1120 
 1121         return (0);
 1122 }
 1123 
 1124 int
 1125 ktls_enable_tx(struct socket *so, struct tls_enable *en)
 1126 {
 1127         struct ktls_session *tls;
 1128         struct inpcb *inp;
 1129         int error;
 1130 
 1131         if (!ktls_offload_enable)
 1132                 return (ENOTSUP);
 1133         if (SOLISTENING(so))
 1134                 return (EINVAL);
 1135 
 1136         counter_u64_add(ktls_offload_enable_calls, 1);
 1137 
 1138         /*
 1139          * This should always be true since only the TCP socket option
 1140          * invokes this function.
 1141          */
 1142         if (so->so_proto->pr_protocol != IPPROTO_TCP)
 1143                 return (EINVAL);
 1144 
 1145         /*
 1146          * XXX: Don't overwrite existing sessions.  We should permit
 1147          * this to support rekeying in the future.
 1148          */
 1149         if (so->so_snd.sb_tls_info != NULL)
 1150                 return (EALREADY);
 1151 
 1152         if (en->cipher_algorithm == CRYPTO_AES_CBC && !ktls_cbc_enable)
 1153                 return (ENOTSUP);
 1154 
 1155         /* TLS requires ext pgs */
 1156         if (mb_use_ext_pgs == 0)
 1157                 return (ENXIO);
 1158 
 1159         error = ktls_create_session(so, en, &tls);
 1160         if (error)
 1161                 return (error);
 1162 
 1163         /* Prefer TOE -> ifnet TLS -> software TLS. */
 1164 #ifdef TCP_OFFLOAD
 1165         error = ktls_try_toe(so, tls, KTLS_TX);
 1166         if (error)
 1167 #endif
 1168                 error = ktls_try_ifnet(so, tls, false);
 1169         if (error)
 1170                 error = ktls_try_sw(so, tls, KTLS_TX);
 1171 
 1172         if (error) {
 1173                 ktls_free(tls);
 1174                 return (error);
 1175         }
 1176 
 1177         error = SOCK_IO_SEND_LOCK(so, SBL_WAIT);
 1178         if (error) {
 1179                 ktls_free(tls);
 1180                 return (error);
 1181         }
 1182 
 1183         /*
 1184          * Write lock the INP when setting sb_tls_info so that
 1185          * routines in tcp_ratelimit.c can read sb_tls_info while
 1186          * holding the INP lock.
 1187          */
 1188         inp = so->so_pcb;
 1189         INP_WLOCK(inp);
 1190         SOCKBUF_LOCK(&so->so_snd);
 1191         so->so_snd.sb_tls_seqno = be64dec(en->rec_seq);
 1192         so->so_snd.sb_tls_info = tls;
 1193         if (tls->mode != TCP_TLS_MODE_SW)
 1194                 so->so_snd.sb_flags |= SB_TLS_IFNET;
 1195         SOCKBUF_UNLOCK(&so->so_snd);
 1196         INP_WUNLOCK(inp);
 1197         SOCK_IO_SEND_UNLOCK(so);
 1198 
 1199         counter_u64_add(ktls_offload_total, 1);
 1200 
 1201         return (0);
 1202 }
 1203 
 1204 int
 1205 ktls_get_rx_mode(struct socket *so)
 1206 {
 1207         struct ktls_session *tls;
 1208         struct inpcb *inp;
 1209         int mode;
 1210 
 1211         if (SOLISTENING(so))
 1212                 return (EINVAL);
 1213         inp = so->so_pcb;
 1214         INP_WLOCK_ASSERT(inp);
 1215         SOCKBUF_LOCK(&so->so_rcv);
 1216         tls = so->so_rcv.sb_tls_info;
 1217         if (tls == NULL)
 1218                 mode = TCP_TLS_MODE_NONE;
 1219         else
 1220                 mode = tls->mode;
 1221         SOCKBUF_UNLOCK(&so->so_rcv);
 1222         return (mode);
 1223 }
 1224 
 1225 int
 1226 ktls_get_tx_mode(struct socket *so)
 1227 {
 1228         struct ktls_session *tls;
 1229         struct inpcb *inp;
 1230         int mode;
 1231 
 1232         if (SOLISTENING(so))
 1233                 return (EINVAL);
 1234         inp = so->so_pcb;
 1235         INP_WLOCK_ASSERT(inp);
 1236         SOCKBUF_LOCK(&so->so_snd);
 1237         tls = so->so_snd.sb_tls_info;
 1238         if (tls == NULL)
 1239                 mode = TCP_TLS_MODE_NONE;
 1240         else
 1241                 mode = tls->mode;
 1242         SOCKBUF_UNLOCK(&so->so_snd);
 1243         return (mode);
 1244 }
 1245 
 1246 /*
 1247  * Switch between SW and ifnet TLS sessions as requested.
 1248  */
 1249 int
 1250 ktls_set_tx_mode(struct socket *so, int mode)
 1251 {
 1252         struct ktls_session *tls, *tls_new;
 1253         struct inpcb *inp;
 1254         int error;
 1255 
 1256         if (SOLISTENING(so))
 1257                 return (EINVAL);
 1258         switch (mode) {
 1259         case TCP_TLS_MODE_SW:
 1260         case TCP_TLS_MODE_IFNET:
 1261                 break;
 1262         default:
 1263                 return (EINVAL);
 1264         }
 1265 
 1266         inp = so->so_pcb;
 1267         INP_WLOCK_ASSERT(inp);
 1268         SOCKBUF_LOCK(&so->so_snd);
 1269         tls = so->so_snd.sb_tls_info;
 1270         if (tls == NULL) {
 1271                 SOCKBUF_UNLOCK(&so->so_snd);
 1272                 return (0);
 1273         }
 1274 
 1275         if (tls->mode == mode) {
 1276                 SOCKBUF_UNLOCK(&so->so_snd);
 1277                 return (0);
 1278         }
 1279 
 1280         tls = ktls_hold(tls);
 1281         SOCKBUF_UNLOCK(&so->so_snd);
 1282         INP_WUNLOCK(inp);
 1283 
 1284         tls_new = ktls_clone_session(tls);
 1285 
 1286         if (mode == TCP_TLS_MODE_IFNET)
 1287                 error = ktls_try_ifnet(so, tls_new, true);
 1288         else
 1289                 error = ktls_try_sw(so, tls_new, KTLS_TX);
 1290         if (error) {
 1291                 counter_u64_add(ktls_switch_failed, 1);
 1292                 ktls_free(tls_new);
 1293                 ktls_free(tls);
 1294                 INP_WLOCK(inp);
 1295                 return (error);
 1296         }
 1297 
 1298         error = SOCK_IO_SEND_LOCK(so, SBL_WAIT);
 1299         if (error) {
 1300                 counter_u64_add(ktls_switch_failed, 1);
 1301                 ktls_free(tls_new);
 1302                 ktls_free(tls);
 1303                 INP_WLOCK(inp);
 1304                 return (error);
 1305         }
 1306 
 1307         /*
 1308          * If we raced with another session change, keep the existing
 1309          * session.
 1310          */
 1311         if (tls != so->so_snd.sb_tls_info) {
 1312                 counter_u64_add(ktls_switch_failed, 1);
 1313                 SOCK_IO_SEND_UNLOCK(so);
 1314                 ktls_free(tls_new);
 1315                 ktls_free(tls);
 1316                 INP_WLOCK(inp);
 1317                 return (EBUSY);
 1318         }
 1319 
 1320         INP_WLOCK(inp);
 1321         SOCKBUF_LOCK(&so->so_snd);
 1322         so->so_snd.sb_tls_info = tls_new;
 1323         if (tls_new->mode != TCP_TLS_MODE_SW)
 1324                 so->so_snd.sb_flags |= SB_TLS_IFNET;
 1325         SOCKBUF_UNLOCK(&so->so_snd);
 1326         SOCK_IO_SEND_UNLOCK(so);
 1327 
 1328         /*
 1329          * Drop two references on 'tls'.  The first is for the
 1330          * ktls_hold() above.  The second drops the reference from the
 1331          * socket buffer.
 1332          */
 1333         KASSERT(tls->refcount >= 2, ("too few references on old session"));
 1334         ktls_free(tls);
 1335         ktls_free(tls);
 1336 
 1337         if (mode == TCP_TLS_MODE_IFNET)
 1338                 counter_u64_add(ktls_switch_to_ifnet, 1);
 1339         else
 1340                 counter_u64_add(ktls_switch_to_sw, 1);
 1341 
 1342         return (0);
 1343 }
 1344 
 1345 /*
 1346  * Try to allocate a new TLS send tag.  This task is scheduled when
 1347  * ip_output detects a route change while trying to transmit a packet
 1348  * holding a TLS record.  If a new tag is allocated, replace the tag
 1349  * in the TLS session.  Subsequent packets on the connection will use
 1350  * the new tag.  If a new tag cannot be allocated, drop the
 1351  * connection.
 1352  */
 1353 static void
 1354 ktls_reset_send_tag(void *context, int pending)
 1355 {
 1356         struct epoch_tracker et;
 1357         struct ktls_session *tls;
 1358         struct m_snd_tag *old, *new;
 1359         struct inpcb *inp;
 1360         struct tcpcb *tp;
 1361         int error;
 1362 
 1363         MPASS(pending == 1);
 1364 
 1365         tls = context;
 1366         inp = tls->inp;
 1367 
 1368         /*
 1369          * Free the old tag first before allocating a new one.
 1370          * ip[6]_output_send() will treat a NULL send tag the same as
 1371          * an ifp mismatch and drop packets until a new tag is
 1372          * allocated.
 1373          *
 1374          * Write-lock the INP when changing tls->snd_tag since
 1375          * ip[6]_output_send() holds a read-lock when reading the
 1376          * pointer.
 1377          */
 1378         INP_WLOCK(inp);
 1379         old = tls->snd_tag;
 1380         tls->snd_tag = NULL;
 1381         INP_WUNLOCK(inp);
 1382         if (old != NULL)
 1383                 m_snd_tag_rele(old);
 1384 
 1385         error = ktls_alloc_snd_tag(inp, tls, true, &new);
 1386 
 1387         if (error == 0) {
 1388                 INP_WLOCK(inp);
 1389                 tls->snd_tag = new;
 1390                 mtx_pool_lock(mtxpool_sleep, tls);
 1391                 tls->reset_pending = false;
 1392                 mtx_pool_unlock(mtxpool_sleep, tls);
 1393                 if (!in_pcbrele_wlocked(inp))
 1394                         INP_WUNLOCK(inp);
 1395 
 1396                 counter_u64_add(ktls_ifnet_reset, 1);
 1397 
 1398                 /*
 1399                  * XXX: Should we kick tcp_output explicitly now that
 1400                  * the send tag is fixed or just rely on timers?
 1401                  */
 1402         } else {
 1403                 NET_EPOCH_ENTER(et);
 1404                 INP_WLOCK(inp);
 1405                 if (!in_pcbrele_wlocked(inp)) {
 1406                         if (!(inp->inp_flags & INP_TIMEWAIT) &&
 1407                             !(inp->inp_flags & INP_DROPPED)) {
 1408                                 tp = intotcpcb(inp);
 1409                                 CURVNET_SET(tp->t_vnet);
 1410                                 tp = tcp_drop(tp, ECONNABORTED);
 1411                                 CURVNET_RESTORE();
 1412                                 if (tp != NULL)
 1413                                         INP_WUNLOCK(inp);
 1414                                 counter_u64_add(ktls_ifnet_reset_dropped, 1);
 1415                         } else
 1416                                 INP_WUNLOCK(inp);
 1417                 }
 1418                 NET_EPOCH_EXIT(et);
 1419 
 1420                 counter_u64_add(ktls_ifnet_reset_failed, 1);
 1421 
 1422                 /*
 1423                  * Leave reset_pending true to avoid future tasks while
 1424                  * the socket goes away.
 1425                  */
 1426         }
 1427 
 1428         ktls_free(tls);
 1429 }
 1430 
 1431 int
 1432 ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls)
 1433 {
 1434 
 1435         if (inp == NULL)
 1436                 return (ENOBUFS);
 1437 
 1438         INP_LOCK_ASSERT(inp);
 1439 
 1440         /*
 1441          * See if we should schedule a task to update the send tag for
 1442          * this session.
 1443          */
 1444         mtx_pool_lock(mtxpool_sleep, tls);
 1445         if (!tls->reset_pending) {
 1446                 (void) ktls_hold(tls);
 1447                 in_pcbref(inp);
 1448                 tls->inp = inp;
 1449                 tls->reset_pending = true;
 1450                 taskqueue_enqueue(taskqueue_thread, &tls->reset_tag_task);
 1451         }
 1452         mtx_pool_unlock(mtxpool_sleep, tls);
 1453         return (ENOBUFS);
 1454 }
 1455 
 1456 #ifdef RATELIMIT
 1457 int
 1458 ktls_modify_txrtlmt(struct ktls_session *tls, uint64_t max_pacing_rate)
 1459 {
 1460         union if_snd_tag_modify_params params = {
 1461                 .rate_limit.max_rate = max_pacing_rate,
 1462                 .rate_limit.flags = M_NOWAIT,
 1463         };
 1464         struct m_snd_tag *mst;
 1465         struct ifnet *ifp;
 1466         int error;
 1467 
 1468         /* Can't get to the inp, but it should be locked. */
 1469         /* INP_LOCK_ASSERT(inp); */
 1470 
 1471         MPASS(tls->mode == TCP_TLS_MODE_IFNET);
 1472 
 1473         if (tls->snd_tag == NULL) {
 1474                 /*
 1475                  * Resetting send tag, ignore this change.  The
 1476                  * pending reset may or may not see this updated rate
 1477                  * in the tcpcb.  If it doesn't, we will just lose
 1478                  * this rate change.
 1479                  */
 1480                 return (0);
 1481         }
 1482 
 1483         mst = tls->snd_tag;
 1484 
 1485         MPASS(mst != NULL);
 1486         MPASS(mst->type == IF_SND_TAG_TYPE_TLS_RATE_LIMIT);
 1487 
 1488         ifp = mst->ifp;
 1489         return (ifp->if_snd_tag_modify(mst, &params));
 1490 }
 1491 #endif
 1492 #endif
 1493 
 1494 void
 1495 ktls_destroy(struct ktls_session *tls)
 1496 {
 1497         struct rm_priotracker prio;
 1498 
 1499         if (tls->sequential_records) {
 1500                 struct mbuf *m, *n;
 1501                 int page_count;
 1502 
 1503                 STAILQ_FOREACH_SAFE(m, &tls->pending_records, m_epg_stailq, n) {
 1504                         page_count = m->m_epg_enc_cnt;
 1505                         while (page_count > 0) {
 1506                                 KASSERT(page_count >= m->m_epg_nrdy,
 1507                                     ("%s: too few pages", __func__));
 1508                                 page_count -= m->m_epg_nrdy;
 1509                                 m = m_free(m);
 1510                         }
 1511                 }
 1512         }
 1513         ktls_cleanup(tls);
 1514         if (tls->be != NULL && ktls_allow_unload) {
 1515                 rm_rlock(&ktls_backends_lock, &prio);
 1516                 tls->be->use_count--;
 1517                 rm_runlock(&ktls_backends_lock, &prio);
 1518         }
 1519         uma_zfree(ktls_session_zone, tls);
 1520 }
 1521 
 1522 void
 1523 ktls_seq(struct sockbuf *sb, struct mbuf *m)
 1524 {
 1525 
 1526         for (; m != NULL; m = m->m_next) {
 1527                 KASSERT((m->m_flags & M_EXTPG) != 0,
 1528                     ("ktls_seq: mapped mbuf %p", m));
 1529 
 1530                 m->m_epg_seqno = sb->sb_tls_seqno;
 1531                 sb->sb_tls_seqno++;
 1532         }
 1533 }
 1534 
 1535 /*
 1536  * Add TLS framing (headers and trailers) to a chain of mbufs.  Each
 1537  * mbuf in the chain must be an unmapped mbuf.  The payload of the
 1538  * mbuf must be populated with the payload of each TLS record.
 1539  *
 1540  * The record_type argument specifies the TLS record type used when
 1541  * populating the TLS header.
 1542  *
 1543  * The enq_count argument on return is set to the number of pages of
 1544  * payload data for this entire chain that need to be encrypted via SW
 1545  * encryption.  The returned value should be passed to ktls_enqueue
 1546  * when scheduling encryption of this chain of mbufs.  To handle the
 1547  * special case of empty fragments for TLS 1.0 sessions, an empty
 1548  * fragment counts as one page.
 1549  */
 1550 void
 1551 ktls_frame(struct mbuf *top, struct ktls_session *tls, int *enq_cnt,
 1552     uint8_t record_type)
 1553 {
 1554         struct tls_record_layer *tlshdr;
 1555         struct mbuf *m;
 1556         uint64_t *noncep;
 1557         uint16_t tls_len;
 1558         int maxlen;
 1559 
 1560         maxlen = tls->params.max_frame_len;
 1561         *enq_cnt = 0;
 1562         for (m = top; m != NULL; m = m->m_next) {
 1563                 /*
 1564                  * All mbufs in the chain should be TLS records whose
 1565                  * payload does not exceed the maximum frame length.
 1566                  *
 1567                  * Empty TLS 1.0 records are permitted when using CBC.
 1568                  */
 1569                 KASSERT(m->m_len <= maxlen && m->m_len >= 0 &&
 1570                     (m->m_len > 0 || ktls_permit_empty_frames(tls)),
 1571                     ("ktls_frame: m %p len %d", m, m->m_len));
 1572 
 1573                 /*
 1574                  * TLS frames require unmapped mbufs to store session
 1575                  * info.
 1576                  */
 1577                 KASSERT((m->m_flags & M_EXTPG) != 0,
 1578                     ("ktls_frame: mapped mbuf %p (top = %p)", m, top));
 1579 
 1580                 tls_len = m->m_len;
 1581 
 1582                 /* Save a reference to the session. */
 1583                 m->m_epg_tls = ktls_hold(tls);
 1584 
 1585                 m->m_epg_hdrlen = tls->params.tls_hlen;
 1586                 m->m_epg_trllen = tls->params.tls_tlen;
 1587                 if (tls->params.cipher_algorithm == CRYPTO_AES_CBC) {
 1588                         int bs, delta;
 1589 
 1590                         /*
 1591                          * AES-CBC pads messages to a multiple of the
 1592                          * block size.  Note that the padding is
 1593                          * applied after the digest and the encryption
 1594                          * is done on the "plaintext || mac || padding".
 1595                          * At least one byte of padding is always
 1596                          * present.
 1597                          *
 1598                          * Compute the final trailer length assuming
 1599                          * at most one block of padding.
 1600                          * tls->params.sb_tls_tlen is the maximum
 1601                          * possible trailer length (padding + digest).
 1602                          * delta holds the number of excess padding
 1603                          * bytes if the maximum were used.  Those
 1604                          * extra bytes are removed.
 1605                          */
 1606                         bs = tls->params.tls_bs;
 1607                         delta = (tls_len + tls->params.tls_tlen) & (bs - 1);
 1608                         m->m_epg_trllen -= delta;
 1609                 }
 1610                 m->m_len += m->m_epg_hdrlen + m->m_epg_trllen;
 1611 
 1612                 /* Populate the TLS header. */
 1613                 tlshdr = (void *)m->m_epg_hdr;
 1614                 tlshdr->tls_vmajor = tls->params.tls_vmajor;
 1615 
 1616                 /*
 1617                  * TLS 1.3 masquarades as TLS 1.2 with a record type
 1618                  * of TLS_RLTYPE_APP.
 1619                  */
 1620                 if (tls->params.tls_vminor == TLS_MINOR_VER_THREE &&
 1621                     tls->params.tls_vmajor == TLS_MAJOR_VER_ONE) {
 1622                         tlshdr->tls_vminor = TLS_MINOR_VER_TWO;
 1623                         tlshdr->tls_type = TLS_RLTYPE_APP;
 1624                         /* save the real record type for later */
 1625                         m->m_epg_record_type = record_type;
 1626                         m->m_epg_trail[0] = record_type;
 1627                 } else {
 1628                         tlshdr->tls_vminor = tls->params.tls_vminor;
 1629                         tlshdr->tls_type = record_type;
 1630                 }
 1631                 tlshdr->tls_length = htons(m->m_len - sizeof(*tlshdr));
 1632 
 1633                 /*
 1634                  * Store nonces / explicit IVs after the end of the
 1635                  * TLS header.
 1636                  *
 1637                  * For GCM with TLS 1.2, an 8 byte nonce is copied
 1638                  * from the end of the IV.  The nonce is then
 1639                  * incremented for use by the next record.
 1640                  *
 1641                  * For CBC, a random nonce is inserted for TLS 1.1+.
 1642                  */
 1643                 if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16 &&
 1644                     tls->params.tls_vminor == TLS_MINOR_VER_TWO) {
 1645                         noncep = (uint64_t *)(tls->params.iv + 8);
 1646                         be64enc(tlshdr + 1, *noncep);
 1647                         (*noncep)++;
 1648                 } else if (tls->params.cipher_algorithm == CRYPTO_AES_CBC &&
 1649                     tls->params.tls_vminor >= TLS_MINOR_VER_ONE)
 1650                         arc4rand(tlshdr + 1, AES_BLOCK_LEN, 0);
 1651 
 1652                 /*
 1653                  * When using SW encryption, mark the mbuf not ready.
 1654                  * It will be marked ready via sbready() after the
 1655                  * record has been encrypted.
 1656                  *
 1657                  * When using ifnet TLS, unencrypted TLS records are
 1658                  * sent down the stack to the NIC.
 1659                  */
 1660                 if (tls->mode == TCP_TLS_MODE_SW) {
 1661                         m->m_flags |= M_NOTREADY;
 1662                         if (__predict_false(tls_len == 0)) {
 1663                                 /* TLS 1.0 empty fragment. */
 1664                                 m->m_epg_nrdy = 1;
 1665                         } else
 1666                                 m->m_epg_nrdy = m->m_epg_npgs;
 1667                         *enq_cnt += m->m_epg_nrdy;
 1668                 }
 1669         }
 1670 }
 1671 
 1672 bool
 1673 ktls_permit_empty_frames(struct ktls_session *tls)
 1674 {
 1675         return (tls->params.cipher_algorithm == CRYPTO_AES_CBC &&
 1676             tls->params.tls_vminor == TLS_MINOR_VER_ZERO);
 1677 }
 1678 
 1679 void
 1680 ktls_check_rx(struct sockbuf *sb)
 1681 {
 1682         struct tls_record_layer hdr;
 1683         struct ktls_wq *wq;
 1684         struct socket *so;
 1685         bool running;
 1686 
 1687         SOCKBUF_LOCK_ASSERT(sb);
 1688         KASSERT(sb->sb_flags & SB_TLS_RX, ("%s: sockbuf %p isn't TLS RX",
 1689             __func__, sb));
 1690         so = __containerof(sb, struct socket, so_rcv);
 1691 
 1692         if (sb->sb_flags & SB_TLS_RX_RUNNING)
 1693                 return;
 1694 
 1695         /* Is there enough queued for a TLS header? */
 1696         if (sb->sb_tlscc < sizeof(hdr)) {
 1697                 if ((sb->sb_state & SBS_CANTRCVMORE) != 0 && sb->sb_tlscc != 0)
 1698                         so->so_error = EMSGSIZE;
 1699                 return;
 1700         }
 1701 
 1702         m_copydata(sb->sb_mtls, 0, sizeof(hdr), (void *)&hdr);
 1703 
 1704         /* Is the entire record queued? */
 1705         if (sb->sb_tlscc < sizeof(hdr) + ntohs(hdr.tls_length)) {
 1706                 if ((sb->sb_state & SBS_CANTRCVMORE) != 0)
 1707                         so->so_error = EMSGSIZE;
 1708                 return;
 1709         }
 1710 
 1711         sb->sb_flags |= SB_TLS_RX_RUNNING;
 1712 
 1713         soref(so);
 1714         wq = &ktls_wq[so->so_rcv.sb_tls_info->wq_index];
 1715         mtx_lock(&wq->mtx);
 1716         STAILQ_INSERT_TAIL(&wq->so_head, so, so_ktls_rx_list);
 1717         running = wq->running;
 1718         mtx_unlock(&wq->mtx);
 1719         if (!running)
 1720                 wakeup(wq);
 1721         counter_u64_add(ktls_cnt_rx_queued, 1);
 1722 }
 1723 
 1724 static struct mbuf *
 1725 ktls_detach_record(struct sockbuf *sb, int len)
 1726 {
 1727         struct mbuf *m, *n, *top;
 1728         int remain;
 1729 
 1730         SOCKBUF_LOCK_ASSERT(sb);
 1731         MPASS(len <= sb->sb_tlscc);
 1732 
 1733         /*
 1734          * If TLS chain is the exact size of the record,
 1735          * just grab the whole record.
 1736          */
 1737         top = sb->sb_mtls;
 1738         if (sb->sb_tlscc == len) {
 1739                 sb->sb_mtls = NULL;
 1740                 sb->sb_mtlstail = NULL;
 1741                 goto out;
 1742         }
 1743 
 1744         /*
 1745          * While it would be nice to use m_split() here, we need
 1746          * to know exactly what m_split() allocates to update the
 1747          * accounting, so do it inline instead.
 1748          */
 1749         remain = len;
 1750         for (m = top; remain > m->m_len; m = m->m_next)
 1751                 remain -= m->m_len;
 1752 
 1753         /* Easy case: don't have to split 'm'. */
 1754         if (remain == m->m_len) {
 1755                 sb->sb_mtls = m->m_next;
 1756                 if (sb->sb_mtls == NULL)
 1757                         sb->sb_mtlstail = NULL;
 1758                 m->m_next = NULL;
 1759                 goto out;
 1760         }
 1761 
 1762         /*
 1763          * Need to allocate an mbuf to hold the remainder of 'm'.  Try
 1764          * with M_NOWAIT first.
 1765          */
 1766         n = m_get(M_NOWAIT, MT_DATA);
 1767         if (n == NULL) {
 1768                 /*
 1769                  * Use M_WAITOK with socket buffer unlocked.  If
 1770                  * 'sb_mtls' changes while the lock is dropped, return
 1771                  * NULL to force the caller to retry.
 1772                  */
 1773                 SOCKBUF_UNLOCK(sb);
 1774 
 1775                 n = m_get(M_WAITOK, MT_DATA);
 1776 
 1777                 SOCKBUF_LOCK(sb);
 1778                 if (sb->sb_mtls != top) {
 1779                         m_free(n);
 1780                         return (NULL);
 1781                 }
 1782         }
 1783         n->m_flags |= M_NOTREADY;
 1784 
 1785         /* Store remainder in 'n'. */
 1786         n->m_len = m->m_len - remain;
 1787         if (m->m_flags & M_EXT) {
 1788                 n->m_data = m->m_data + remain;
 1789                 mb_dupcl(n, m);
 1790         } else {
 1791                 bcopy(mtod(m, caddr_t) + remain, mtod(n, caddr_t), n->m_len);
 1792         }
 1793 
 1794         /* Trim 'm' and update accounting. */
 1795         m->m_len -= n->m_len;
 1796         sb->sb_tlscc -= n->m_len;
 1797         sb->sb_ccc -= n->m_len;
 1798 
 1799         /* Account for 'n'. */
 1800         sballoc_ktls_rx(sb, n);
 1801 
 1802         /* Insert 'n' into the TLS chain. */
 1803         sb->sb_mtls = n;
 1804         n->m_next = m->m_next;
 1805         if (sb->sb_mtlstail == m)
 1806                 sb->sb_mtlstail = n;
 1807 
 1808         /* Detach the record from the TLS chain. */
 1809         m->m_next = NULL;
 1810 
 1811 out:
 1812         MPASS(m_length(top, NULL) == len);
 1813         for (m = top; m != NULL; m = m->m_next)
 1814                 sbfree_ktls_rx(sb, m);
 1815         sb->sb_tlsdcc = len;
 1816         sb->sb_ccc += len;
 1817         SBCHECK(sb);
 1818         return (top);
 1819 }
 1820 
 1821 /*
 1822  * Determine the length of the trailing zero padding and find the real
 1823  * record type in the byte before the padding.
 1824  *
 1825  * Walking the mbuf chain backwards is clumsy, so another option would
 1826  * be to scan forwards remembering the last non-zero byte before the
 1827  * trailer.  However, it would be expensive to scan the entire record.
 1828  * Instead, find the last non-zero byte of each mbuf in the chain
 1829  * keeping track of the relative offset of that nonzero byte.
 1830  *
 1831  * trail_len is the size of the MAC/tag on input and is set to the
 1832  * size of the full trailer including padding and the record type on
 1833  * return.
 1834  */
 1835 static int
 1836 tls13_find_record_type(struct ktls_session *tls, struct mbuf *m, int tls_len,
 1837     int *trailer_len, uint8_t *record_typep)
 1838 {
 1839         char *cp;
 1840         u_int digest_start, last_offset, m_len, offset;
 1841         uint8_t record_type;
 1842 
 1843         digest_start = tls_len - *trailer_len;
 1844         last_offset = 0;
 1845         offset = 0;
 1846         for (; m != NULL && offset < digest_start;
 1847              offset += m->m_len, m = m->m_next) {
 1848                 /* Don't look for padding in the tag. */
 1849                 m_len = min(digest_start - offset, m->m_len);
 1850                 cp = mtod(m, char *);
 1851 
 1852                 /* Find last non-zero byte in this mbuf. */
 1853                 while (m_len > 0 && cp[m_len - 1] == 0)
 1854                         m_len--;
 1855                 if (m_len > 0) {
 1856                         record_type = cp[m_len - 1];
 1857                         last_offset = offset + m_len;
 1858                 }
 1859         }
 1860         if (last_offset < tls->params.tls_hlen)
 1861                 return (EBADMSG);
 1862 
 1863         *record_typep = record_type;
 1864         *trailer_len = tls_len - last_offset + 1;
 1865         return (0);
 1866 }
 1867 
 1868 static void
 1869 ktls_drop(struct socket *so, int error)
 1870 {
 1871         struct epoch_tracker et;
 1872         struct inpcb *inp = sotoinpcb(so);
 1873         struct tcpcb *tp;
 1874 
 1875         NET_EPOCH_ENTER(et);
 1876         INP_WLOCK(inp);
 1877         if (!(inp->inp_flags & INP_DROPPED)) {
 1878                 tp = intotcpcb(inp);
 1879                 CURVNET_SET(inp->inp_vnet);
 1880                 tp = tcp_drop(tp, error);
 1881                 CURVNET_RESTORE();
 1882                 if (tp != NULL)
 1883                         INP_WUNLOCK(inp);
 1884         } else {
 1885                 so->so_error = error;
 1886                 SOCKBUF_LOCK(&so->so_rcv);
 1887                 sorwakeup_locked(so);
 1888                 INP_WUNLOCK(inp);
 1889         }
 1890         NET_EPOCH_EXIT(et);
 1891 }
 1892 
 1893 static void
 1894 ktls_decrypt(struct socket *so)
 1895 {
 1896         char tls_header[MBUF_PEXT_HDR_LEN];
 1897         struct ktls_session *tls;
 1898         struct sockbuf *sb;
 1899         struct tls_record_layer *hdr;
 1900         struct tls_get_record tgr;
 1901         struct mbuf *control, *data, *m;
 1902         uint64_t seqno;
 1903         int error, remain, tls_len, trail_len;
 1904         bool tls13;
 1905         uint8_t vminor, record_type;
 1906 
 1907         hdr = (struct tls_record_layer *)tls_header;
 1908         sb = &so->so_rcv;
 1909         SOCKBUF_LOCK(sb);
 1910         KASSERT(sb->sb_flags & SB_TLS_RX_RUNNING,
 1911             ("%s: socket %p not running", __func__, so));
 1912 
 1913         tls = sb->sb_tls_info;
 1914         MPASS(tls != NULL);
 1915 
 1916         tls13 = (tls->params.tls_vminor == TLS_MINOR_VER_THREE);
 1917         if (tls13)
 1918                 vminor = TLS_MINOR_VER_TWO;
 1919         else
 1920                 vminor = tls->params.tls_vminor;
 1921         for (;;) {
 1922                 /* Is there enough queued for a TLS header? */
 1923                 if (sb->sb_tlscc < tls->params.tls_hlen)
 1924                         break;
 1925 
 1926                 m_copydata(sb->sb_mtls, 0, tls->params.tls_hlen, tls_header);
 1927                 tls_len = sizeof(*hdr) + ntohs(hdr->tls_length);
 1928 
 1929                 if (hdr->tls_vmajor != tls->params.tls_vmajor ||
 1930                     hdr->tls_vminor != vminor)
 1931                         error = EINVAL;
 1932                 else if (tls13 && hdr->tls_type != TLS_RLTYPE_APP)
 1933                         error = EINVAL;
 1934                 else if (tls_len < tls->params.tls_hlen || tls_len >
 1935                     tls->params.tls_hlen + TLS_MAX_MSG_SIZE_V10_2 +
 1936                     tls->params.tls_tlen)
 1937                         error = EMSGSIZE;
 1938                 else
 1939                         error = 0;
 1940                 if (__predict_false(error != 0)) {
 1941                         /*
 1942                          * We have a corrupted record and are likely
 1943                          * out of sync.  The connection isn't
 1944                          * recoverable at this point, so abort it.
 1945                          */
 1946                         SOCKBUF_UNLOCK(sb);
 1947                         counter_u64_add(ktls_offload_corrupted_records, 1);
 1948 
 1949                         ktls_drop(so, error);
 1950                         goto deref;
 1951                 }
 1952 
 1953                 /* Is the entire record queued? */
 1954                 if (sb->sb_tlscc < tls_len)
 1955                         break;
 1956 
 1957                 /*
 1958                  * Split out the portion of the mbuf chain containing
 1959                  * this TLS record.
 1960                  */
 1961                 data = ktls_detach_record(sb, tls_len);
 1962                 if (data == NULL)
 1963                         continue;
 1964                 MPASS(sb->sb_tlsdcc == tls_len);
 1965 
 1966                 seqno = sb->sb_tls_seqno;
 1967                 sb->sb_tls_seqno++;
 1968                 SBCHECK(sb);
 1969                 SOCKBUF_UNLOCK(sb);
 1970 
 1971                 error = tls->sw_decrypt(tls, hdr, data, seqno, &trail_len);
 1972                 if (error == 0) {
 1973                         if (tls13)
 1974                                 error = tls13_find_record_type(tls, data,
 1975                                     tls_len, &trail_len, &record_type);
 1976                         else
 1977                                 record_type = hdr->tls_type;
 1978                 }
 1979                 if (error) {
 1980                         counter_u64_add(ktls_offload_failed_crypto, 1);
 1981 
 1982                         SOCKBUF_LOCK(sb);
 1983                         if (sb->sb_tlsdcc == 0) {
 1984                                 /*
 1985                                  * sbcut/drop/flush discarded these
 1986                                  * mbufs.
 1987                                  */
 1988                                 m_freem(data);
 1989                                 break;
 1990                         }
 1991 
 1992                         /*
 1993                          * Drop this TLS record's data, but keep
 1994                          * decrypting subsequent records.
 1995                          */
 1996                         sb->sb_ccc -= tls_len;
 1997                         sb->sb_tlsdcc = 0;
 1998 
 1999                         CURVNET_SET(so->so_vnet);
 2000                         so->so_error = EBADMSG;
 2001                         sorwakeup_locked(so);
 2002                         CURVNET_RESTORE();
 2003 
 2004                         m_freem(data);
 2005 
 2006                         SOCKBUF_LOCK(sb);
 2007                         continue;
 2008                 }
 2009 
 2010                 /* Allocate the control mbuf. */
 2011                 memset(&tgr, 0, sizeof(tgr));
 2012                 tgr.tls_type = record_type;
 2013                 tgr.tls_vmajor = hdr->tls_vmajor;
 2014                 tgr.tls_vminor = hdr->tls_vminor;
 2015                 tgr.tls_length = htobe16(tls_len - tls->params.tls_hlen -
 2016                     trail_len);
 2017                 control = sbcreatecontrol_how(&tgr, sizeof(tgr),
 2018                     TLS_GET_RECORD, IPPROTO_TCP, M_WAITOK);
 2019 
 2020                 SOCKBUF_LOCK(sb);
 2021                 if (sb->sb_tlsdcc == 0) {
 2022                         /* sbcut/drop/flush discarded these mbufs. */
 2023                         MPASS(sb->sb_tlscc == 0);
 2024                         m_freem(data);
 2025                         m_freem(control);
 2026                         break;
 2027                 }
 2028 
 2029                 /*
 2030                  * Clear the 'dcc' accounting in preparation for
 2031                  * adding the decrypted record.
 2032                  */
 2033                 sb->sb_ccc -= tls_len;
 2034                 sb->sb_tlsdcc = 0;
 2035                 SBCHECK(sb);
 2036 
 2037                 /* If there is no payload, drop all of the data. */
 2038                 if (tgr.tls_length == htobe16(0)) {
 2039                         m_freem(data);
 2040                         data = NULL;
 2041                 } else {
 2042                         /* Trim header. */
 2043                         remain = tls->params.tls_hlen;
 2044                         while (remain > 0) {
 2045                                 if (data->m_len > remain) {
 2046                                         data->m_data += remain;
 2047                                         data->m_len -= remain;
 2048                                         break;
 2049                                 }
 2050                                 remain -= data->m_len;
 2051                                 data = m_free(data);
 2052                         }
 2053 
 2054                         /* Trim trailer and clear M_NOTREADY. */
 2055                         remain = be16toh(tgr.tls_length);
 2056                         m = data;
 2057                         for (m = data; remain > m->m_len; m = m->m_next) {
 2058                                 m->m_flags &= ~M_NOTREADY;
 2059                                 remain -= m->m_len;
 2060                         }
 2061                         m->m_len = remain;
 2062                         m_freem(m->m_next);
 2063                         m->m_next = NULL;
 2064                         m->m_flags &= ~M_NOTREADY;
 2065 
 2066                         /* Set EOR on the final mbuf. */
 2067                         m->m_flags |= M_EOR;
 2068                 }
 2069 
 2070                 sbappendcontrol_locked(sb, data, control, 0);
 2071         }
 2072 
 2073         sb->sb_flags &= ~SB_TLS_RX_RUNNING;
 2074 
 2075         if ((sb->sb_state & SBS_CANTRCVMORE) != 0 && sb->sb_tlscc > 0)
 2076                 so->so_error = EMSGSIZE;
 2077 
 2078         sorwakeup_locked(so);
 2079 
 2080 deref:
 2081         SOCKBUF_UNLOCK_ASSERT(sb);
 2082 
 2083         CURVNET_SET(so->so_vnet);
 2084         SOCK_LOCK(so);
 2085         sorele(so);
 2086         CURVNET_RESTORE();
 2087 }
 2088 
 2089 void
 2090 ktls_enqueue_to_free(struct mbuf *m)
 2091 {
 2092         struct ktls_wq *wq;
 2093         bool running;
 2094 
 2095         /* Mark it for freeing. */
 2096         m->m_epg_flags |= EPG_FLAG_2FREE;
 2097         wq = &ktls_wq[m->m_epg_tls->wq_index];
 2098         mtx_lock(&wq->mtx);
 2099         STAILQ_INSERT_TAIL(&wq->m_head, m, m_epg_stailq);
 2100         running = wq->running;
 2101         mtx_unlock(&wq->mtx);
 2102         if (!running)
 2103                 wakeup(wq);
 2104 }
 2105 
 2106 /* Number of TLS records in a batch passed to ktls_enqueue(). */
 2107 static u_int
 2108 ktls_batched_records(struct mbuf *m)
 2109 {
 2110         int page_count, records;
 2111 
 2112         records = 0;
 2113         page_count = m->m_epg_enc_cnt;
 2114         while (page_count > 0) {
 2115                 records++;
 2116                 page_count -= m->m_epg_nrdy;
 2117                 m = m->m_next;
 2118         }
 2119         KASSERT(page_count == 0, ("%s: mismatched page count", __func__));
 2120         return (records);
 2121 }
 2122 
 2123 void
 2124 ktls_enqueue(struct mbuf *m, struct socket *so, int page_count)
 2125 {
 2126         struct ktls_session *tls;
 2127         struct ktls_wq *wq;
 2128         int queued;
 2129         bool running;
 2130 
 2131         KASSERT(((m->m_flags & (M_EXTPG | M_NOTREADY)) ==
 2132             (M_EXTPG | M_NOTREADY)),
 2133             ("ktls_enqueue: %p not unready & nomap mbuf\n", m));
 2134         KASSERT(page_count != 0, ("enqueueing TLS mbuf with zero page count"));
 2135 
 2136         KASSERT(m->m_epg_tls->mode == TCP_TLS_MODE_SW, ("!SW TLS mbuf"));
 2137 
 2138         m->m_epg_enc_cnt = page_count;
 2139 
 2140         /*
 2141          * Save a pointer to the socket.  The caller is responsible
 2142          * for taking an additional reference via soref().
 2143          */
 2144         m->m_epg_so = so;
 2145 
 2146         queued = 1;
 2147         tls = m->m_epg_tls;
 2148         wq = &ktls_wq[tls->wq_index];
 2149         mtx_lock(&wq->mtx);
 2150         if (__predict_false(tls->sequential_records)) {
 2151                 /*
 2152                  * For TLS 1.0, records must be encrypted
 2153                  * sequentially.  For a given connection, all records
 2154                  * queued to the associated work queue are processed
 2155                  * sequentially.  However, sendfile(2) might complete
 2156                  * I/O requests spanning multiple TLS records out of
 2157                  * order.  Here we ensure TLS records are enqueued to
 2158                  * the work queue in FIFO order.
 2159                  *
 2160                  * tls->next_seqno holds the sequence number of the
 2161                  * next TLS record that should be enqueued to the work
 2162                  * queue.  If this next record is not tls->next_seqno,
 2163                  * it must be a future record, so insert it, sorted by
 2164                  * TLS sequence number, into tls->pending_records and
 2165                  * return.
 2166                  *
 2167                  * If this TLS record matches tls->next_seqno, place
 2168                  * it in the work queue and then check
 2169                  * tls->pending_records to see if any
 2170                  * previously-queued records are now ready for
 2171                  * encryption.
 2172                  */
 2173                 if (m->m_epg_seqno != tls->next_seqno) {
 2174                         struct mbuf *n, *p;
 2175 
 2176                         p = NULL;
 2177                         STAILQ_FOREACH(n, &tls->pending_records, m_epg_stailq) {
 2178                                 if (n->m_epg_seqno > m->m_epg_seqno)
 2179                                         break;
 2180                                 p = n;
 2181                         }
 2182                         if (n == NULL)
 2183                                 STAILQ_INSERT_TAIL(&tls->pending_records, m,
 2184                                     m_epg_stailq);
 2185                         else if (p == NULL)
 2186                                 STAILQ_INSERT_HEAD(&tls->pending_records, m,
 2187                                     m_epg_stailq);
 2188                         else
 2189                                 STAILQ_INSERT_AFTER(&tls->pending_records, p, m,
 2190                                     m_epg_stailq);
 2191                         mtx_unlock(&wq->mtx);
 2192                         counter_u64_add(ktls_cnt_tx_pending, 1);
 2193                         return;
 2194                 }
 2195 
 2196                 tls->next_seqno += ktls_batched_records(m);
 2197                 STAILQ_INSERT_TAIL(&wq->m_head, m, m_epg_stailq);
 2198 
 2199                 while (!STAILQ_EMPTY(&tls->pending_records)) {
 2200                         struct mbuf *n;
 2201 
 2202                         n = STAILQ_FIRST(&tls->pending_records);
 2203                         if (n->m_epg_seqno != tls->next_seqno)
 2204                                 break;
 2205 
 2206                         queued++;
 2207                         STAILQ_REMOVE_HEAD(&tls->pending_records, m_epg_stailq);
 2208                         tls->next_seqno += ktls_batched_records(n);
 2209                         STAILQ_INSERT_TAIL(&wq->m_head, n, m_epg_stailq);
 2210                 }
 2211                 counter_u64_add(ktls_cnt_tx_pending, -(queued - 1));
 2212         } else
 2213                 STAILQ_INSERT_TAIL(&wq->m_head, m, m_epg_stailq);
 2214 
 2215         running = wq->running;
 2216         mtx_unlock(&wq->mtx);
 2217         if (!running)
 2218                 wakeup(wq);
 2219         counter_u64_add(ktls_cnt_tx_queued, queued);
 2220 }
 2221 
 2222 static __noinline void
 2223 ktls_encrypt(struct mbuf *top)
 2224 {
 2225         struct ktls_session *tls;
 2226         struct socket *so;
 2227         struct mbuf *m;
 2228         vm_paddr_t parray[1 + btoc(TLS_MAX_MSG_SIZE_V10_2)];
 2229         struct iovec src_iov[1 + btoc(TLS_MAX_MSG_SIZE_V10_2)];
 2230         struct iovec dst_iov[1 + btoc(TLS_MAX_MSG_SIZE_V10_2)];
 2231         vm_page_t pg;
 2232         int error, i, len, npages, off, total_pages;
 2233         bool is_anon;
 2234 
 2235         so = top->m_epg_so;
 2236         tls = top->m_epg_tls;
 2237         KASSERT(tls != NULL, ("tls = NULL, top = %p\n", top));
 2238         KASSERT(so != NULL, ("so = NULL, top = %p\n", top));
 2239 #ifdef INVARIANTS
 2240         top->m_epg_so = NULL;
 2241 #endif
 2242         total_pages = top->m_epg_enc_cnt;
 2243         npages = 0;
 2244 
 2245         /*
 2246          * Encrypt the TLS records in the chain of mbufs starting with
 2247          * 'top'.  'total_pages' gives us a total count of pages and is
 2248          * used to know when we have finished encrypting the TLS
 2249          * records originally queued with 'top'.
 2250          *
 2251          * NB: These mbufs are queued in the socket buffer and
 2252          * 'm_next' is traversing the mbufs in the socket buffer.  The
 2253          * socket buffer lock is not held while traversing this chain.
 2254          * Since the mbufs are all marked M_NOTREADY their 'm_next'
 2255          * pointers should be stable.  However, the 'm_next' of the
 2256          * last mbuf encrypted is not necessarily NULL.  It can point
 2257          * to other mbufs appended while 'top' was on the TLS work
 2258          * queue.
 2259          *
 2260          * Each mbuf holds an entire TLS record.
 2261          */
 2262         error = 0;
 2263         for (m = top; npages != total_pages; m = m->m_next) {
 2264                 KASSERT(m->m_epg_tls == tls,
 2265                     ("different TLS sessions in a single mbuf chain: %p vs %p",
 2266                     tls, m->m_epg_tls));
 2267                 KASSERT((m->m_flags & (M_EXTPG | M_NOTREADY)) ==
 2268                     (M_EXTPG | M_NOTREADY),
 2269                     ("%p not unready & nomap mbuf (top = %p)\n", m, top));
 2270                 KASSERT(npages + m->m_epg_npgs <= total_pages,
 2271                     ("page count mismatch: top %p, total_pages %d, m %p", top,
 2272                     total_pages, m));
 2273 
 2274                 /*
 2275                  * Generate source and destination ivoecs to pass to
 2276                  * the SW encryption backend.  For writable mbufs, the
 2277                  * destination iovec is a copy of the source and
 2278                  * encryption is done in place.  For file-backed mbufs
 2279                  * (from sendfile), anonymous wired pages are
 2280                  * allocated and assigned to the destination iovec.
 2281                  */
 2282                 is_anon = (m->m_epg_flags & EPG_FLAG_ANON) != 0;
 2283 
 2284                 off = m->m_epg_1st_off;
 2285                 for (i = 0; i < m->m_epg_npgs; i++, off = 0) {
 2286                         len = m_epg_pagelen(m, i, off);
 2287                         src_iov[i].iov_len = len;
 2288                         src_iov[i].iov_base =
 2289                             (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[i]) +
 2290                                 off;
 2291 
 2292                         if (is_anon) {
 2293                                 dst_iov[i].iov_base = src_iov[i].iov_base;
 2294                                 dst_iov[i].iov_len = src_iov[i].iov_len;
 2295                                 continue;
 2296                         }
 2297 retry_page:
 2298                         pg = vm_page_alloc_noobj(VM_ALLOC_NODUMP |
 2299                             VM_ALLOC_WIRED);
 2300                         if (pg == NULL) {
 2301                                 vm_wait(NULL);
 2302                                 goto retry_page;
 2303                         }
 2304                         parray[i] = VM_PAGE_TO_PHYS(pg);
 2305                         dst_iov[i].iov_base =
 2306                             (char *)(void *)PHYS_TO_DMAP(parray[i]) + off;
 2307                         dst_iov[i].iov_len = len;
 2308                 }
 2309 
 2310                 npages += m->m_epg_nrdy;
 2311 
 2312                 error = (*tls->sw_encrypt)(tls,
 2313                     (const struct tls_record_layer *)m->m_epg_hdr,
 2314                     m->m_epg_trail, src_iov, dst_iov, i, m->m_epg_seqno,
 2315                     m->m_epg_record_type);
 2316                 if (error) {
 2317                         counter_u64_add(ktls_offload_failed_crypto, 1);
 2318                         break;
 2319                 }
 2320 
 2321                 /*
 2322                  * For file-backed mbufs, release the file-backed
 2323                  * pages and replace them in the ext_pgs array with
 2324                  * the anonymous wired pages allocated above.
 2325                  */
 2326                 if (!is_anon) {
 2327                         /* Free the old pages. */
 2328                         m->m_ext.ext_free(m);
 2329 
 2330                         /* Replace them with the new pages. */
 2331                         for (i = 0; i < m->m_epg_npgs; i++)
 2332                                 m->m_epg_pa[i] = parray[i];
 2333 
 2334                         /* Use the basic free routine. */
 2335                         m->m_ext.ext_free = mb_free_mext_pgs;
 2336 
 2337                         /* Pages are now writable. */
 2338                         m->m_epg_flags |= EPG_FLAG_ANON;
 2339                 }
 2340 
 2341                 /*
 2342                  * Drop a reference to the session now that it is no
 2343                  * longer needed.  Existing code depends on encrypted
 2344                  * records having no associated session vs
 2345                  * yet-to-be-encrypted records having an associated
 2346                  * session.
 2347                  */
 2348                 m->m_epg_tls = NULL;
 2349                 ktls_free(tls);
 2350         }
 2351 
 2352         CURVNET_SET(so->so_vnet);
 2353         if (error == 0) {
 2354                 (void)(*so->so_proto->pr_usrreqs->pru_ready)(so, top, npages);
 2355         } else {
 2356                 ktls_drop(so, EIO);
 2357                 mb_free_notready(top, total_pages);
 2358         }
 2359 
 2360         SOCK_LOCK(so);
 2361         sorele(so);
 2362         CURVNET_RESTORE();
 2363 }
 2364 
 2365 static void
 2366 ktls_work_thread(void *ctx)
 2367 {
 2368         struct ktls_wq *wq = ctx;
 2369         struct mbuf *m, *n;
 2370         struct socket *so, *son;
 2371         STAILQ_HEAD(, mbuf) local_m_head;
 2372         STAILQ_HEAD(, socket) local_so_head;
 2373 
 2374         if (ktls_bind_threads > 1) {
 2375                 curthread->td_domain.dr_policy =
 2376                         DOMAINSET_PREF(PCPU_GET(domain));
 2377         }
 2378 #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
 2379         fpu_kern_thread(0);
 2380 #endif
 2381         for (;;) {
 2382                 mtx_lock(&wq->mtx);
 2383                 while (STAILQ_EMPTY(&wq->m_head) &&
 2384                     STAILQ_EMPTY(&wq->so_head)) {
 2385                         wq->running = false;
 2386                         mtx_sleep(wq, &wq->mtx, 0, "-", 0);
 2387                         wq->running = true;
 2388                 }
 2389 
 2390                 STAILQ_INIT(&local_m_head);
 2391                 STAILQ_CONCAT(&local_m_head, &wq->m_head);
 2392                 STAILQ_INIT(&local_so_head);
 2393                 STAILQ_CONCAT(&local_so_head, &wq->so_head);
 2394                 mtx_unlock(&wq->mtx);
 2395 
 2396                 STAILQ_FOREACH_SAFE(m, &local_m_head, m_epg_stailq, n) {
 2397                         if (m->m_epg_flags & EPG_FLAG_2FREE) {
 2398                                 ktls_free(m->m_epg_tls);
 2399                                 m_free_raw(m);
 2400                         } else {
 2401                                 ktls_encrypt(m);
 2402                                 counter_u64_add(ktls_cnt_tx_queued, -1);
 2403                         }
 2404                 }
 2405 
 2406                 STAILQ_FOREACH_SAFE(so, &local_so_head, so_ktls_rx_list, son) {
 2407                         ktls_decrypt(so);
 2408                         counter_u64_add(ktls_cnt_rx_queued, -1);
 2409                 }
 2410         }
 2411 }

Cache object: 70c3f012460a08b9a836d744658d8431


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.