FreeBSD/Linux Kernel Cross Reference
sys/kern/sched_m2.c
1 /* $NetBSD: sched_m2.c,v 1.39 2020/05/23 21:24:41 ad Exp $ */
2
3 /*
4 * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * TODO:
31 * - Implementation of fair share queue;
32 * - Support for NUMA;
33 */
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: sched_m2.c,v 1.39 2020/05/23 21:24:41 ad Exp $");
37
38 #include <sys/param.h>
39
40 #include <sys/cpu.h>
41 #include <sys/callout.h>
42 #include <sys/errno.h>
43 #include <sys/kernel.h>
44 #include <sys/kmem.h>
45 #include <sys/lwp.h>
46 #include <sys/mutex.h>
47 #include <sys/pool.h>
48 #include <sys/proc.h>
49 #include <sys/pset.h>
50 #include <sys/resource.h>
51 #include <sys/resourcevar.h>
52 #include <sys/sched.h>
53 #include <sys/syscallargs.h>
54 #include <sys/sysctl.h>
55 #include <sys/types.h>
56
57 /*
58 * Priority related definitions.
59 */
60 #define PRI_TS_COUNT (NPRI_USER)
61 #define PRI_RT_COUNT (PRI_COUNT - PRI_TS_COUNT)
62 #define PRI_HTS_RANGE (PRI_TS_COUNT / 10)
63
64 #define PRI_HIGHEST_TS (MAXPRI_USER)
65
66 /*
67 * Time-slices and priorities.
68 */
69 static u_int min_ts; /* Minimal time-slice */
70 static u_int max_ts; /* Maximal time-slice */
71 static u_int ts_map[PRI_COUNT]; /* Map of time-slices */
72 static pri_t high_pri[PRI_COUNT]; /* Map for priority increase */
73 u_int sched_rrticks; /* Real-time time-slice */
74
75 static void sched_precalcts(void);
76
77 /*
78 * Initialization and setup.
79 */
80
81 void
82 sched_rqinit(void)
83 {
84 if (hz < 100) {
85 panic("sched_rqinit: value of HZ is too low\n");
86 }
87
88 /* Default timing ranges */
89 min_ts = mstohz(20); /* ~20 ms */
90 max_ts = mstohz(150); /* ~150 ms */
91 sched_rrticks = mstohz(100); /* ~100 ms */
92 sched_precalcts();
93
94 #ifdef notdef
95 /* Need to set the name etc. This does not belong here */
96 /* Attach the primary CPU here */
97 sched_cpuattach(curcpu());
98 #endif
99
100 sched_lwp_fork(NULL, &lwp0);
101 sched_newts(&lwp0);
102 }
103
104 /* Pre-calculate the time-slices for the priorities */
105 static void
106 sched_precalcts(void)
107 {
108 pri_t p;
109
110 /* Time-sharing range */
111 for (p = 0; p <= PRI_HIGHEST_TS; p++) {
112 ts_map[p] = max_ts -
113 (p * 100 / (PRI_TS_COUNT - 1) * (max_ts - min_ts) / 100);
114 high_pri[p] = (PRI_HIGHEST_TS - PRI_HTS_RANGE) +
115 ((p * PRI_HTS_RANGE) / (PRI_TS_COUNT - 1));
116 }
117
118 /* Real-time range */
119 for (p = (PRI_HIGHEST_TS + 1); p < PRI_COUNT; p++) {
120 ts_map[p] = sched_rrticks;
121 high_pri[p] = p;
122 }
123 }
124
125 /*
126 * Hooks.
127 */
128
129 void
130 sched_proc_fork(struct proc *parent, struct proc *child)
131 {
132 struct lwp *l;
133
134 LIST_FOREACH(l, &child->p_lwps, l_sibling) {
135 lwp_lock(l);
136 sched_newts(l);
137 lwp_unlock(l);
138 }
139 }
140
141 void
142 sched_proc_exit(struct proc *child, struct proc *parent)
143 {
144
145 }
146
147 void
148 sched_lwp_fork(struct lwp *l1, struct lwp *l2)
149 {
150
151 }
152
153 void
154 sched_lwp_collect(struct lwp *l)
155 {
156
157 }
158
159 void
160 sched_setrunnable(struct lwp *l)
161 {
162
163 }
164
165 void
166 sched_schedclock(struct lwp *l)
167 {
168
169 }
170
171 /*
172 * Priorities and time-slice.
173 */
174
175 void
176 sched_nice(struct proc *p, int prio)
177 {
178 struct lwp *l;
179 int n;
180
181 KASSERT(mutex_owned(p->p_lock));
182
183 p->p_nice = prio;
184 n = (prio - NZERO) >> 2;
185 if (n == 0)
186 return;
187
188 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
189 lwp_lock(l);
190 if (l->l_class == SCHED_OTHER) {
191 pri_t pri = l->l_priority - n;
192 pri = (n < 0) ? uimin(pri, PRI_HIGHEST_TS) : imax(pri, 0);
193 lwp_changepri(l, pri);
194 }
195 lwp_unlock(l);
196 }
197 }
198
199 /* Recalculate the time-slice */
200 void
201 sched_newts(struct lwp *l)
202 {
203
204 l->l_sched.timeslice = ts_map[lwp_eprio(l)];
205 }
206
207 void
208 sched_slept(struct lwp *l)
209 {
210
211 /*
212 * If thread is in time-sharing queue and batch flag is not marked,
213 * increase the priority, and run with the lower time-quantum.
214 */
215 if (l->l_priority < PRI_HIGHEST_TS && (l->l_flag & LW_BATCH) == 0) {
216 struct proc *p = l->l_proc;
217
218 KASSERT(l->l_class == SCHED_OTHER);
219 if (__predict_false(p->p_nice < NZERO)) {
220 const int n = uimax((NZERO - p->p_nice) >> 2, 1);
221 l->l_priority = uimin(l->l_priority + n, PRI_HIGHEST_TS);
222 } else {
223 l->l_priority++;
224 }
225 }
226 }
227
228 void
229 sched_wakeup(struct lwp *l)
230 {
231
232 /* If thread was sleeping a second or more - set a high priority */
233 if (l->l_slptime >= 1)
234 l->l_priority = high_pri[l->l_priority];
235 }
236
237 void
238 sched_pstats_hook(struct lwp *l, int batch)
239 {
240 pri_t prio;
241
242 /*
243 * Estimate threads on time-sharing queue only, however,
244 * exclude the highest priority for performance purposes.
245 */
246 KASSERT(lwp_locked(l, NULL));
247 if (l->l_priority >= PRI_HIGHEST_TS)
248 return;
249 KASSERT(l->l_class == SCHED_OTHER);
250
251 /* If it is CPU-bound not a first time - decrease the priority */
252 prio = l->l_priority;
253 if (batch && prio != 0)
254 prio--;
255
256 /* If thread was not ran a second or more - set a high priority */
257 if (l->l_stat == LSRUN) {
258 if (l->l_rticks && (getticks() - l->l_rticks >= hz))
259 prio = high_pri[prio];
260 /* Re-enqueue the thread if priority has changed */
261 if (prio != l->l_priority)
262 lwp_changepri(l, prio);
263 } else {
264 /* In other states, change the priority directly */
265 l->l_priority = prio;
266 }
267 }
268
269 void
270 sched_oncpu(lwp_t *l)
271 {
272 struct schedstate_percpu *spc = &l->l_cpu->ci_schedstate;
273
274 /* Update the counters */
275 KASSERT(l->l_sched.timeslice >= min_ts);
276 KASSERT(l->l_sched.timeslice <= max_ts);
277 spc->spc_ticks = l->l_sched.timeslice;
278 }
279
280 /*
281 * Time-driven events.
282 */
283
284 /*
285 * Called once per time-quantum, with the running LWP lock held (spc_lwplock).
286 */
287 void
288 sched_tick(struct cpu_info *ci)
289 {
290 struct schedstate_percpu *spc = &ci->ci_schedstate;
291 struct lwp *l = ci->ci_onproc;
292 struct proc *p;
293
294 if (__predict_false(CURCPU_IDLE_P()))
295 return;
296
297 lwp_lock(l);
298 KASSERT(l->l_mutex != spc->spc_mutex);
299 switch (l->l_class) {
300 case SCHED_FIFO:
301 /*
302 * Update the time-quantum, and continue running,
303 * if thread runs on FIFO real-time policy.
304 */
305 KASSERT(l->l_priority > PRI_HIGHEST_TS);
306 spc->spc_ticks = l->l_sched.timeslice;
307 lwp_unlock(l);
308 return;
309 case SCHED_OTHER:
310 /*
311 * If thread is in time-sharing queue, decrease the priority,
312 * and run with a higher time-quantum.
313 */
314 KASSERT(l->l_priority <= PRI_HIGHEST_TS);
315 if (l->l_priority == 0)
316 break;
317
318 p = l->l_proc;
319 if (__predict_false(p->p_nice > NZERO)) {
320 const int n = uimax((p->p_nice - NZERO) >> 2, 1);
321 l->l_priority = imax(l->l_priority - n, 0);
322 } else
323 l->l_priority--;
324 break;
325 }
326
327 /*
328 * If there are higher priority threads or threads in the same queue,
329 * mark that thread should yield, otherwise, continue running.
330 */
331 if (lwp_eprio(l) <= spc->spc_maxpriority || l->l_target_cpu) {
332 spc->spc_flags |= SPCF_SHOULDYIELD;
333 spc_lock(ci);
334 sched_resched_cpu(ci, MAXPRI_KTHREAD, true);
335 /* spc now unlocked */
336 } else
337 spc->spc_ticks = l->l_sched.timeslice;
338 lwp_unlock(l);
339 }
340
341 /*
342 * Sysctl nodes and initialization.
343 */
344
345 static int
346 sysctl_sched_rtts(SYSCTLFN_ARGS)
347 {
348 struct sysctlnode node;
349 int rttsms = hztoms(sched_rrticks);
350
351 node = *rnode;
352 node.sysctl_data = &rttsms;
353 return sysctl_lookup(SYSCTLFN_CALL(&node));
354 }
355
356 static int
357 sysctl_sched_mints(SYSCTLFN_ARGS)
358 {
359 struct sysctlnode node;
360 struct cpu_info *ci;
361 int error, newsize;
362 CPU_INFO_ITERATOR cii;
363
364 node = *rnode;
365 node.sysctl_data = &newsize;
366
367 newsize = hztoms(min_ts);
368 error = sysctl_lookup(SYSCTLFN_CALL(&node));
369 if (error || newp == NULL)
370 return error;
371
372 newsize = mstohz(newsize);
373 if (newsize < 1 || newsize > hz || newsize >= max_ts)
374 return EINVAL;
375
376 /* It is safe to do this in such order */
377 for (CPU_INFO_FOREACH(cii, ci))
378 spc_lock(ci);
379
380 min_ts = newsize;
381 sched_precalcts();
382
383 for (CPU_INFO_FOREACH(cii, ci))
384 spc_unlock(ci);
385
386 return 0;
387 }
388
389 static int
390 sysctl_sched_maxts(SYSCTLFN_ARGS)
391 {
392 struct sysctlnode node;
393 struct cpu_info *ci;
394 int error, newsize;
395 CPU_INFO_ITERATOR cii;
396
397 node = *rnode;
398 node.sysctl_data = &newsize;
399
400 newsize = hztoms(max_ts);
401 error = sysctl_lookup(SYSCTLFN_CALL(&node));
402 if (error || newp == NULL)
403 return error;
404
405 newsize = mstohz(newsize);
406 if (newsize < 10 || newsize > hz || newsize <= min_ts)
407 return EINVAL;
408
409 /* It is safe to do this in such order */
410 for (CPU_INFO_FOREACH(cii, ci))
411 spc_lock(ci);
412
413 max_ts = newsize;
414 sched_precalcts();
415
416 for (CPU_INFO_FOREACH(cii, ci))
417 spc_unlock(ci);
418
419 return 0;
420 }
421
422 SYSCTL_SETUP(sysctl_sched_m2_setup, "sysctl sched setup")
423 {
424 const struct sysctlnode *node = NULL;
425
426 sysctl_createv(clog, 0, NULL, &node,
427 CTLFLAG_PERMANENT,
428 CTLTYPE_NODE, "sched",
429 SYSCTL_DESCR("Scheduler options"),
430 NULL, 0, NULL, 0,
431 CTL_KERN, CTL_CREATE, CTL_EOL);
432
433 if (node == NULL)
434 return;
435
436 sysctl_createv(NULL, 0, &node, NULL,
437 CTLFLAG_PERMANENT,
438 CTLTYPE_STRING, "name", NULL,
439 NULL, 0, __UNCONST("M2"), 0,
440 CTL_CREATE, CTL_EOL);
441 sysctl_createv(NULL, 0, &node, NULL,
442 CTLFLAG_PERMANENT,
443 CTLTYPE_INT, "rtts",
444 SYSCTL_DESCR("Round-robin time quantum (in milliseconds)"),
445 sysctl_sched_rtts, 0, NULL, 0,
446 CTL_CREATE, CTL_EOL);
447 sysctl_createv(NULL, 0, &node, NULL,
448 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
449 CTLTYPE_INT, "maxts",
450 SYSCTL_DESCR("Maximal time quantum (in milliseconds)"),
451 sysctl_sched_maxts, 0, &max_ts, 0,
452 CTL_CREATE, CTL_EOL);
453 sysctl_createv(NULL, 0, &node, NULL,
454 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
455 CTLTYPE_INT, "mints",
456 SYSCTL_DESCR("Minimal time quantum (in milliseconds)"),
457 sysctl_sched_mints, 0, &min_ts, 0,
458 CTL_CREATE, CTL_EOL);
459 }
Cache object: 423fabf4f86c30785e149ce936e0599f
|