source: src/linux/universal/linux-4.4/include/linux/sched.h @ 31885

Last change on this file since 31885 was 31885, checked in by brainslayer, 5 weeks ago

update

File size: 91.4 KB
Line 
1#ifndef _LINUX_SCHED_H
2#define _LINUX_SCHED_H
3
4#include <uapi/linux/sched.h>
5
6#include <linux/sched/prio.h>
7
8
9struct sched_param {
10        int sched_priority;
11};
12
13#include <asm/param.h>  /* for HZ */
14
15#include <linux/capability.h>
16#include <linux/threads.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/timex.h>
20#include <linux/jiffies.h>
21#include <linux/plist.h>
22#include <linux/rbtree.h>
23#include <linux/thread_info.h>
24#include <linux/cpumask.h>
25#include <linux/errno.h>
26#include <linux/nodemask.h>
27#include <linux/mm_types.h>
28#include <linux/preempt.h>
29
30#include <asm/page.h>
31#include <asm/ptrace.h>
32#include <linux/cputime.h>
33
34#include <linux/smp.h>
35#include <linux/sem.h>
36#include <linux/shm.h>
37#include <linux/signal.h>
38#include <linux/compiler.h>
39#include <linux/completion.h>
40#include <linux/pid.h>
41#include <linux/percpu.h>
42#include <linux/topology.h>
43#include <linux/proportions.h>
44#include <linux/seccomp.h>
45#include <linux/rcupdate.h>
46#include <linux/rculist.h>
47#include <linux/rtmutex.h>
48
49#include <linux/time.h>
50#include <linux/param.h>
51#include <linux/resource.h>
52#include <linux/timer.h>
53#include <linux/hrtimer.h>
54#include <linux/task_io_accounting.h>
55#include <linux/latencytop.h>
56#include <linux/cred.h>
57#include <linux/llist.h>
58#include <linux/uidgid.h>
59#include <linux/gfp.h>
60#include <linux/magic.h>
61#include <linux/cgroup-defs.h>
62
63#include <asm/processor.h>
64
65#define SCHED_ATTR_SIZE_VER0    48      /* sizeof first published struct */
66
67/*
68 * Extended scheduling parameters data structure.
69 *
70 * This is needed because the original struct sched_param can not be
71 * altered without introducing ABI issues with legacy applications
72 * (e.g., in sched_getparam()).
73 *
74 * However, the possibility of specifying more than just a priority for
75 * the tasks may be useful for a wide variety of application fields, e.g.,
76 * multimedia, streaming, automation and control, and many others.
77 *
78 * This variant (sched_attr) is meant at describing a so-called
79 * sporadic time-constrained task. In such model a task is specified by:
80 *  - the activation period or minimum instance inter-arrival time;
81 *  - the maximum (or average, depending on the actual scheduling
82 *    discipline) computation time of all instances, a.k.a. runtime;
83 *  - the deadline (relative to the actual activation time) of each
84 *    instance.
85 * Very briefly, a periodic (sporadic) task asks for the execution of
86 * some specific computation --which is typically called an instance--
87 * (at most) every period. Moreover, each instance typically lasts no more
88 * than the runtime and must be completed by time instant t equal to
89 * the instance activation time + the deadline.
90 *
91 * This is reflected by the actual fields of the sched_attr structure:
92 *
93 *  @size               size of the structure, for fwd/bwd compat.
94 *
95 *  @sched_policy       task's scheduling policy
96 *  @sched_flags        for customizing the scheduler behaviour
97 *  @sched_nice         task's nice value      (SCHED_NORMAL/BATCH)
98 *  @sched_priority     task's static priority (SCHED_FIFO/RR)
99 *  @sched_deadline     representative of the task's deadline
100 *  @sched_runtime      representative of the task's runtime
101 *  @sched_period       representative of the task's period
102 *
103 * Given this task model, there are a multiplicity of scheduling algorithms
104 * and policies, that can be used to ensure all the tasks will make their
105 * timing constraints.
106 *
107 * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
108 * only user of this new interface. More information about the algorithm
109 * available in the scheduling class file or in Documentation/.
110 */
111struct sched_attr {
112        u32 size;
113
114        u32 sched_policy;
115        u64 sched_flags;
116
117        /* SCHED_NORMAL, SCHED_BATCH */
118        s32 sched_nice;
119
120        /* SCHED_FIFO, SCHED_RR */
121        u32 sched_priority;
122
123        /* SCHED_DEADLINE */
124        u64 sched_runtime;
125        u64 sched_deadline;
126        u64 sched_period;
127};
128
129struct futex_pi_state;
130struct robust_list_head;
131struct bio_list;
132struct fs_struct;
133struct perf_event_context;
134struct blk_plug;
135struct filename;
136struct nameidata;
137
138#define VMACACHE_BITS 2
139#define VMACACHE_SIZE (1U << VMACACHE_BITS)
140#define VMACACHE_MASK (VMACACHE_SIZE - 1)
141
142/*
143 * These are the constant used to fake the fixed-point load-average
144 * counting. Some notes:
145 *  - 11 bit fractions expand to 22 bits by the multiplies: this gives
146 *    a load-average precision of 10 bits integer + 11 bits fractional
147 *  - if you want to count load-averages more often, you need more
148 *    precision, or rounding will get you. With 2-second counting freq,
149 *    the EXP_n values would be 1981, 2034 and 2043 if still using only
150 *    11 bit fractions.
151 */
152extern unsigned long avenrun[];         /* Load averages */
153extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
154
155#define FSHIFT          11              /* nr of bits of precision */
156#define FIXED_1         (1<<FSHIFT)     /* 1.0 as fixed-point */
157#define LOAD_FREQ       (5*HZ+1)        /* 5 sec intervals */
158#define EXP_1           1884            /* 1/exp(5sec/1min) as fixed-point */
159#define EXP_5           2014            /* 1/exp(5sec/5min) */
160#define EXP_15          2037            /* 1/exp(5sec/15min) */
161
162#define CALC_LOAD(load,exp,n) \
163        load *= exp; \
164        load += n*(FIXED_1-exp); \
165        load >>= FSHIFT;
166
167extern unsigned long total_forks;
168extern int nr_threads;
169DECLARE_PER_CPU(unsigned long, process_counts);
170extern int nr_processes(void);
171extern unsigned long nr_running(void);
172extern bool single_task_running(void);
173extern unsigned long nr_iowait(void);
174extern unsigned long nr_iowait_cpu(int cpu);
175extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
176
177extern void calc_global_load(unsigned long ticks);
178
179#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
180extern void update_cpu_load_nohz(void);
181#else
182static inline void update_cpu_load_nohz(void) { }
183#endif
184
185extern unsigned long get_parent_ip(unsigned long addr);
186
187extern void dump_cpu_task(int cpu);
188
189struct seq_file;
190struct cfs_rq;
191struct task_group;
192#ifdef CONFIG_SCHED_DEBUG
193extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
194extern void proc_sched_set_task(struct task_struct *p);
195#endif
196
197/*
198 * Task state bitmask. NOTE! These bits are also
199 * encoded in fs/proc/array.c: get_task_state().
200 *
201 * We have two separate sets of flags: task->state
202 * is about runnability, while task->exit_state are
203 * about the task exiting. Confusing, but this way
204 * modifying one set can't modify the other one by
205 * mistake.
206 */
207#define TASK_RUNNING            0
208#define TASK_INTERRUPTIBLE      1
209#define TASK_UNINTERRUPTIBLE    2
210#define __TASK_STOPPED          4
211#define __TASK_TRACED           8
212/* in tsk->exit_state */
213#define EXIT_DEAD               16
214#define EXIT_ZOMBIE             32
215#define EXIT_TRACE              (EXIT_ZOMBIE | EXIT_DEAD)
216/* in tsk->state again */
217#define TASK_DEAD               64
218#define TASK_WAKEKILL           128
219#define TASK_WAKING             256
220#define TASK_PARKED             512
221#define TASK_NOLOAD             1024
222#define TASK_STATE_MAX          2048
223
224#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN"
225
226extern char ___assert_task_state[1 - 2*!!(
227                sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
228
229/* Convenience macros for the sake of set_task_state */
230#define TASK_KILLABLE           (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
231#define TASK_STOPPED            (TASK_WAKEKILL | __TASK_STOPPED)
232#define TASK_TRACED             (TASK_WAKEKILL | __TASK_TRACED)
233
234#define TASK_IDLE               (TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
235
236/* Convenience macros for the sake of wake_up */
237#define TASK_NORMAL             (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
238#define TASK_ALL                (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
239
240/* get_task_state() */
241#define TASK_REPORT             (TASK_RUNNING | TASK_INTERRUPTIBLE | \
242                                 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
243                                 __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
244
245#define task_is_traced(task)    ((task->state & __TASK_TRACED) != 0)
246#define task_is_stopped(task)   ((task->state & __TASK_STOPPED) != 0)
247#define task_is_stopped_or_traced(task) \
248                        ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
249#define task_contributes_to_load(task)  \
250                                ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
251                                 (task->flags & PF_FROZEN) == 0 && \
252                                 (task->state & TASK_NOLOAD) == 0)
253
254#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
255
256#define __set_task_state(tsk, state_value)                      \
257        do {                                                    \
258                (tsk)->task_state_change = _THIS_IP_;           \
259                (tsk)->state = (state_value);                   \
260        } while (0)
261#define set_task_state(tsk, state_value)                        \
262        do {                                                    \
263                (tsk)->task_state_change = _THIS_IP_;           \
264                smp_store_mb((tsk)->state, (state_value));              \
265        } while (0)
266
267/*
268 * set_current_state() includes a barrier so that the write of current->state
269 * is correctly serialised wrt the caller's subsequent test of whether to
270 * actually sleep:
271 *
272 *      set_current_state(TASK_UNINTERRUPTIBLE);
273 *      if (do_i_need_to_sleep())
274 *              schedule();
275 *
276 * If the caller does not need such serialisation then use __set_current_state()
277 */
278#define __set_current_state(state_value)                        \
279        do {                                                    \
280                current->task_state_change = _THIS_IP_;         \
281                current->state = (state_value);                 \
282        } while (0)
283#define set_current_state(state_value)                          \
284        do {                                                    \
285                current->task_state_change = _THIS_IP_;         \
286                smp_store_mb(current->state, (state_value));            \
287        } while (0)
288
289#else
290
291#define __set_task_state(tsk, state_value)              \
292        do { (tsk)->state = (state_value); } while (0)
293#define set_task_state(tsk, state_value)                \
294        smp_store_mb((tsk)->state, (state_value))
295
296/*
297 * set_current_state() includes a barrier so that the write of current->state
298 * is correctly serialised wrt the caller's subsequent test of whether to
299 * actually sleep:
300 *
301 *      set_current_state(TASK_UNINTERRUPTIBLE);
302 *      if (do_i_need_to_sleep())
303 *              schedule();
304 *
305 * If the caller does not need such serialisation then use __set_current_state()
306 */
307#define __set_current_state(state_value)                \
308        do { current->state = (state_value); } while (0)
309#define set_current_state(state_value)                  \
310        smp_store_mb(current->state, (state_value))
311
312#endif
313
314/* Task command name length */
315#define TASK_COMM_LEN 16
316
317#include <linux/spinlock.h>
318
319/*
320 * This serializes "schedule()" and also protects
321 * the run-queue from deletions/modifications (but
322 * _adding_ to the beginning of the run-queue has
323 * a separate lock).
324 */
325extern rwlock_t tasklist_lock;
326extern spinlock_t mmlist_lock;
327
328struct task_struct;
329
330#ifdef CONFIG_PROVE_RCU
331extern int lockdep_tasklist_lock_is_held(void);
332#endif /* #ifdef CONFIG_PROVE_RCU */
333
334extern void sched_init(void);
335extern void sched_init_smp(void);
336extern asmlinkage void schedule_tail(struct task_struct *prev);
337extern void init_idle(struct task_struct *idle, int cpu);
338extern void init_idle_bootup_task(struct task_struct *idle);
339
340extern cpumask_var_t cpu_isolated_map;
341
342extern int runqueue_is_locked(int cpu);
343
344#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
345extern void nohz_balance_enter_idle(int cpu);
346extern void set_cpu_sd_state_idle(void);
347extern int get_nohz_timer_target(void);
348#else
349static inline void nohz_balance_enter_idle(int cpu) { }
350static inline void set_cpu_sd_state_idle(void) { }
351#endif
352
353/*
354 * Only dump TASK_* tasks. (0 for all tasks)
355 */
356extern void show_state_filter(unsigned long state_filter);
357
358static inline void show_state(void)
359{
360        show_state_filter(0);
361}
362
363extern void show_regs(struct pt_regs *);
364
365/*
366 * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
367 * task), SP is the stack pointer of the first frame that should be shown in the back
368 * trace (or NULL if the entire call-chain of the task should be shown).
369 */
370extern void show_stack(struct task_struct *task, unsigned long *sp);
371
372extern void cpu_init (void);
373extern void trap_init(void);
374extern void update_process_times(int user);
375extern void scheduler_tick(void);
376
377extern void sched_show_task(struct task_struct *p);
378
379#ifdef CONFIG_LOCKUP_DETECTOR
380extern void touch_softlockup_watchdog(void);
381extern void touch_softlockup_watchdog_sync(void);
382extern void touch_all_softlockup_watchdogs(void);
383extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
384                                  void __user *buffer,
385                                  size_t *lenp, loff_t *ppos);
386extern unsigned int  softlockup_panic;
387extern unsigned int  hardlockup_panic;
388void lockup_detector_init(void);
389#else
390static inline void touch_softlockup_watchdog(void)
391{
392}
393static inline void touch_softlockup_watchdog_sync(void)
394{
395}
396static inline void touch_all_softlockup_watchdogs(void)
397{
398}
399static inline void lockup_detector_init(void)
400{
401}
402#endif
403
404#ifdef CONFIG_DETECT_HUNG_TASK
405void reset_hung_task_detector(void);
406#else
407static inline void reset_hung_task_detector(void)
408{
409}
410#endif
411
412/* Attach to any functions which should be ignored in wchan output. */
413#define __sched         __attribute__((__section__(".sched.text")))
414
415/* Linker adds these: start and end of __sched functions */
416extern char __sched_text_start[], __sched_text_end[];
417
418/* Is this address in the __sched functions? */
419extern int in_sched_functions(unsigned long addr);
420
421#define MAX_SCHEDULE_TIMEOUT    LONG_MAX
422extern signed long schedule_timeout(signed long timeout);
423extern signed long schedule_timeout_interruptible(signed long timeout);
424extern signed long schedule_timeout_killable(signed long timeout);
425extern signed long schedule_timeout_uninterruptible(signed long timeout);
426asmlinkage void schedule(void);
427extern void schedule_preempt_disabled(void);
428
429extern long io_schedule_timeout(long timeout);
430
431static inline void io_schedule(void)
432{
433        io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
434}
435
436struct nsproxy;
437struct user_namespace;
438
439#ifdef CONFIG_MMU
440extern void arch_pick_mmap_layout(struct mm_struct *mm);
441extern unsigned long
442arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
443                       unsigned long, unsigned long);
444extern unsigned long
445arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
446                          unsigned long len, unsigned long pgoff,
447                          unsigned long flags);
448#else
449static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
450#endif
451
452#define SUID_DUMP_DISABLE       0       /* No setuid dumping */
453#define SUID_DUMP_USER          1       /* Dump as user of process */
454#define SUID_DUMP_ROOT          2       /* Dump as root */
455
456/* mm flags */
457
458/* for SUID_DUMP_* above */
459#define MMF_DUMPABLE_BITS 2
460#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
461
462extern void set_dumpable(struct mm_struct *mm, int value);
463/*
464 * This returns the actual value of the suid_dumpable flag. For things
465 * that are using this for checking for privilege transitions, it must
466 * test against SUID_DUMP_USER rather than treating it as a boolean
467 * value.
468 */
469static inline int __get_dumpable(unsigned long mm_flags)
470{
471        return mm_flags & MMF_DUMPABLE_MASK;
472}
473
474static inline int get_dumpable(struct mm_struct *mm)
475{
476        return __get_dumpable(mm->flags);
477}
478
479/* coredump filter bits */
480#define MMF_DUMP_ANON_PRIVATE   2
481#define MMF_DUMP_ANON_SHARED    3
482#define MMF_DUMP_MAPPED_PRIVATE 4
483#define MMF_DUMP_MAPPED_SHARED  5
484#define MMF_DUMP_ELF_HEADERS    6
485#define MMF_DUMP_HUGETLB_PRIVATE 7
486#define MMF_DUMP_HUGETLB_SHARED  8
487#define MMF_DUMP_DAX_PRIVATE    9
488#define MMF_DUMP_DAX_SHARED     10
489
490#define MMF_DUMP_FILTER_SHIFT   MMF_DUMPABLE_BITS
491#define MMF_DUMP_FILTER_BITS    9
492#define MMF_DUMP_FILTER_MASK \
493        (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
494#define MMF_DUMP_FILTER_DEFAULT \
495        ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\
496         (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
497
498#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
499# define MMF_DUMP_MASK_DEFAULT_ELF      (1 << MMF_DUMP_ELF_HEADERS)
500#else
501# define MMF_DUMP_MASK_DEFAULT_ELF      0
502#endif
503                                        /* leave room for more dump flags */
504#define MMF_VM_MERGEABLE        16      /* KSM may merge identical pages */
505#define MMF_VM_HUGEPAGE         17      /* set when VM_HUGEPAGE is set on vma */
506#define MMF_EXE_FILE_CHANGED    18      /* see prctl_set_mm_exe_file() */
507
508#define MMF_HAS_UPROBES         19      /* has uprobes */
509#define MMF_RECALC_UPROBES      20      /* MMF_HAS_UPROBES can be wrong */
510
511#define MMF_INIT_MASK           (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
512
513struct sighand_struct {
514        atomic_t                count;
515        struct k_sigaction      action[_NSIG];
516        spinlock_t              siglock;
517        wait_queue_head_t       signalfd_wqh;
518};
519
520struct pacct_struct {
521        int                     ac_flag;
522        long                    ac_exitcode;
523        unsigned long           ac_mem;
524        cputime_t               ac_utime, ac_stime;
525        unsigned long           ac_minflt, ac_majflt;
526};
527
528struct cpu_itimer {
529        cputime_t expires;
530        cputime_t incr;
531        u32 error;
532        u32 incr_error;
533};
534
535/**
536 * struct prev_cputime - snaphsot of system and user cputime
537 * @utime: time spent in user mode
538 * @stime: time spent in system mode
539 * @lock: protects the above two fields
540 *
541 * Stores previous user/system time values such that we can guarantee
542 * monotonicity.
543 */
544struct prev_cputime {
545#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
546        cputime_t utime;
547        cputime_t stime;
548        raw_spinlock_t lock;
549#endif
550};
551
552static inline void prev_cputime_init(struct prev_cputime *prev)
553{
554#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
555        prev->utime = prev->stime = 0;
556        raw_spin_lock_init(&prev->lock);
557#endif
558}
559
560/**
561 * struct task_cputime - collected CPU time counts
562 * @utime:              time spent in user mode, in &cputime_t units
563 * @stime:              time spent in kernel mode, in &cputime_t units
564 * @sum_exec_runtime:   total time spent on the CPU, in nanoseconds
565 *
566 * This structure groups together three kinds of CPU time that are tracked for
567 * threads and thread groups.  Most things considering CPU time want to group
568 * these counts together and treat all three of them in parallel.
569 */
570struct task_cputime {
571        cputime_t utime;
572        cputime_t stime;
573        unsigned long long sum_exec_runtime;
574};
575
576/* Alternate field names when used to cache expirations. */
577#define virt_exp        utime
578#define prof_exp        stime
579#define sched_exp       sum_exec_runtime
580
581#define INIT_CPUTIME    \
582        (struct task_cputime) {                                 \
583                .utime = 0,                                     \
584                .stime = 0,                                     \
585                .sum_exec_runtime = 0,                          \
586        }
587
588/*
589 * This is the atomic variant of task_cputime, which can be used for
590 * storing and updating task_cputime statistics without locking.
591 */
592struct task_cputime_atomic {
593        atomic64_t utime;
594        atomic64_t stime;
595        atomic64_t sum_exec_runtime;
596};
597
598#define INIT_CPUTIME_ATOMIC \
599        (struct task_cputime_atomic) {                          \
600                .utime = ATOMIC64_INIT(0),                      \
601                .stime = ATOMIC64_INIT(0),                      \
602                .sum_exec_runtime = ATOMIC64_INIT(0),           \
603        }
604
605#define PREEMPT_DISABLED        (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
606
607/*
608 * Disable preemption until the scheduler is running -- use an unconditional
609 * value so that it also works on !PREEMPT_COUNT kernels.
610 *
611 * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count().
612 */
613#define INIT_PREEMPT_COUNT      PREEMPT_OFFSET
614
615/*
616 * Initial preempt_count value; reflects the preempt_count schedule invariant
617 * which states that during context switches:
618 *
619 *    preempt_count() == 2*PREEMPT_DISABLE_OFFSET
620 *
621 * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
622 * Note: See finish_task_switch().
623 */
624#define FORK_PREEMPT_COUNT      (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
625
626/**
627 * struct thread_group_cputimer - thread group interval timer counts
628 * @cputime_atomic:     atomic thread group interval timers.
629 * @running:            true when there are timers running and
630 *                      @cputime_atomic receives updates.
631 * @checking_timer:     true when a thread in the group is in the
632 *                      process of checking for thread group timers.
633 *
634 * This structure contains the version of task_cputime, above, that is
635 * used for thread group CPU timer calculations.
636 */
637struct thread_group_cputimer {
638        struct task_cputime_atomic cputime_atomic;
639        bool running;
640        bool checking_timer;
641};
642
643#include <linux/rwsem.h>
644struct autogroup;
645
646/*
647 * NOTE! "signal_struct" does not have its own
648 * locking, because a shared signal_struct always
649 * implies a shared sighand_struct, so locking
650 * sighand_struct is always a proper superset of
651 * the locking of signal_struct.
652 */
653struct signal_struct {
654        atomic_t                sigcnt;
655        atomic_t                live;
656        int                     nr_threads;
657        struct list_head        thread_head;
658
659        wait_queue_head_t       wait_chldexit;  /* for wait4() */
660
661        /* current thread group signal load-balancing target: */
662        struct task_struct      *curr_target;
663
664        /* shared signal handling: */
665        struct sigpending       shared_pending;
666
667        /* thread group exit support */
668        int                     group_exit_code;
669        /* overloaded:
670         * - notify group_exit_task when ->count is equal to notify_count
671         * - everyone except group_exit_task is stopped during signal delivery
672         *   of fatal signals, group_exit_task processes the signal.
673         */
674        int                     notify_count;
675        struct task_struct      *group_exit_task;
676
677        /* thread group stop support, overloads group_exit_code too */
678        int                     group_stop_count;
679        unsigned int            flags; /* see SIGNAL_* flags below */
680
681        /*
682         * PR_SET_CHILD_SUBREAPER marks a process, like a service
683         * manager, to re-parent orphan (double-forking) child processes
684         * to this process instead of 'init'. The service manager is
685         * able to receive SIGCHLD signals and is able to investigate
686         * the process until it calls wait(). All children of this
687         * process will inherit a flag if they should look for a
688         * child_subreaper process at exit.
689         */
690        unsigned int            is_child_subreaper:1;
691        unsigned int            has_child_subreaper:1;
692
693        /* POSIX.1b Interval Timers */
694        int                     posix_timer_id;
695        struct list_head        posix_timers;
696
697        /* ITIMER_REAL timer for the process */
698        struct hrtimer real_timer;
699        struct pid *leader_pid;
700        ktime_t it_real_incr;
701
702        /*
703         * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use
704         * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these
705         * values are defined to 0 and 1 respectively
706         */
707        struct cpu_itimer it[2];
708
709        /*
710         * Thread group totals for process CPU timers.
711         * See thread_group_cputimer(), et al, for details.
712         */
713        struct thread_group_cputimer cputimer;
714
715        /* Earliest-expiration cache. */
716        struct task_cputime cputime_expires;
717
718        struct list_head cpu_timers[3];
719
720        struct pid *tty_old_pgrp;
721
722        /* boolean value for session group leader */
723        int leader;
724
725        struct tty_struct *tty; /* NULL if no tty */
726
727#ifdef CONFIG_SCHED_AUTOGROUP
728        struct autogroup *autogroup;
729#endif
730        /*
731         * Cumulative resource counters for dead threads in the group,
732         * and for reaped dead child processes forked by this group.
733         * Live threads maintain their own counters and add to these
734         * in __exit_signal, except for the group leader.
735         */
736        seqlock_t stats_lock;
737        cputime_t utime, stime, cutime, cstime;
738        cputime_t gtime;
739        cputime_t cgtime;
740        struct prev_cputime prev_cputime;
741        unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
742        unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
743        unsigned long inblock, oublock, cinblock, coublock;
744        unsigned long maxrss, cmaxrss;
745        struct task_io_accounting ioac;
746
747        /*
748         * Cumulative ns of schedule CPU time fo dead threads in the
749         * group, not including a zombie group leader, (This only differs
750         * from jiffies_to_ns(utime + stime) if sched_clock uses something
751         * other than jiffies.)
752         */
753        unsigned long long sum_sched_runtime;
754
755        /*
756         * We don't bother to synchronize most readers of this at all,
757         * because there is no reader checking a limit that actually needs
758         * to get both rlim_cur and rlim_max atomically, and either one
759         * alone is a single word that can safely be read normally.
760         * getrlimit/setrlimit use task_lock(current->group_leader) to
761         * protect this instead of the siglock, because they really
762         * have no need to disable irqs.
763         */
764        struct rlimit rlim[RLIM_NLIMITS];
765
766#ifdef CONFIG_BSD_PROCESS_ACCT
767        struct pacct_struct pacct;      /* per-process accounting information */
768#endif
769#ifdef CONFIG_TASKSTATS
770        struct taskstats *stats;
771#endif
772#ifdef CONFIG_AUDIT
773        unsigned audit_tty;
774        unsigned audit_tty_log_passwd;
775        struct tty_audit_buf *tty_audit_buf;
776#endif
777
778        oom_flags_t oom_flags;
779        short oom_score_adj;            /* OOM kill score adjustment */
780        short oom_score_adj_min;        /* OOM kill score adjustment min value.
781                                         * Only settable by CAP_SYS_RESOURCE. */
782
783        struct mutex cred_guard_mutex;  /* guard against foreign influences on
784                                         * credential calculations
785                                         * (notably. ptrace) */
786};
787
788/*
789 * Bits in flags field of signal_struct.
790 */
791#define SIGNAL_STOP_STOPPED     0x00000001 /* job control stop in effect */
792#define SIGNAL_STOP_CONTINUED   0x00000002 /* SIGCONT since WCONTINUED reap */
793#define SIGNAL_GROUP_EXIT       0x00000004 /* group exit in progress */
794#define SIGNAL_GROUP_COREDUMP   0x00000008 /* coredump in progress */
795/*
796 * Pending notifications to parent.
797 */
798#define SIGNAL_CLD_STOPPED      0x00000010
799#define SIGNAL_CLD_CONTINUED    0x00000020
800#define SIGNAL_CLD_MASK         (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED)
801
802#define SIGNAL_UNKILLABLE       0x00000040 /* for init: ignore fatal signals */
803
804/* If true, all threads except ->group_exit_task have pending SIGKILL */
805static inline int signal_group_exit(const struct signal_struct *sig)
806{
807        return  (sig->flags & SIGNAL_GROUP_EXIT) ||
808                (sig->group_exit_task != NULL);
809}
810
811/*
812 * Some day this will be a full-fledged user tracking system..
813 */
814struct user_struct {
815        atomic_t __count;       /* reference count */
816        atomic_t processes;     /* How many processes does this user have? */
817        atomic_t sigpending;    /* How many pending signals does this user have? */
818#ifdef CONFIG_INOTIFY_USER
819        atomic_t inotify_watches; /* How many inotify watches does this user have? */
820        atomic_t inotify_devs;  /* How many inotify devs does this user have opened? */
821#endif
822#ifdef CONFIG_FANOTIFY
823        atomic_t fanotify_listeners;
824#endif
825#ifdef CONFIG_EPOLL
826        atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
827#endif
828#ifdef CONFIG_POSIX_MQUEUE
829        /* protected by mq_lock */
830        unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */
831#endif
832        unsigned long locked_shm; /* How many pages of mlocked shm ? */
833        unsigned long unix_inflight;    /* How many files in flight in unix sockets */
834        atomic_long_t pipe_bufs;  /* how many pages are allocated in pipe buffers */
835
836#ifdef CONFIG_KEYS
837        struct key *uid_keyring;        /* UID specific keyring */
838        struct key *session_keyring;    /* UID's default session keyring */
839#endif
840
841        /* Hash table maintenance information */
842        struct hlist_node uidhash_node;
843        kuid_t uid;
844
845#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
846        atomic_long_t locked_vm;
847#endif
848};
849
850extern int uids_sysfs_init(void);
851
852extern struct user_struct *find_user(kuid_t);
853
854extern struct user_struct root_user;
855#define INIT_USER (&root_user)
856
857
858struct backing_dev_info;
859struct reclaim_state;
860
861#ifdef CONFIG_SCHED_INFO
862struct sched_info {
863        /* cumulative counters */
864        unsigned long pcount;         /* # of times run on this cpu */
865        unsigned long long run_delay; /* time spent waiting on a runqueue */
866
867        /* timestamps */
868        unsigned long long last_arrival,/* when we last ran on a cpu */
869                           last_queued; /* when we were last queued to run */
870};
871#endif /* CONFIG_SCHED_INFO */
872
873#ifdef CONFIG_TASK_DELAY_ACCT
874struct task_delay_info {
875        spinlock_t      lock;
876        unsigned int    flags;  /* Private per-task flags */
877
878        /* For each stat XXX, add following, aligned appropriately
879         *
880         * struct timespec XXX_start, XXX_end;
881         * u64 XXX_delay;
882         * u32 XXX_count;
883         *
884         * Atomicity of updates to XXX_delay, XXX_count protected by
885         * single lock above (split into XXX_lock if contention is an issue).
886         */
887
888        /*
889         * XXX_count is incremented on every XXX operation, the delay
890         * associated with the operation is added to XXX_delay.
891         * XXX_delay contains the accumulated delay time in nanoseconds.
892         */
893        u64 blkio_start;        /* Shared by blkio, swapin */
894        u64 blkio_delay;        /* wait for sync block io completion */
895        u64 swapin_delay;       /* wait for swapin block io completion */
896        u32 blkio_count;        /* total count of the number of sync block */
897                                /* io operations performed */
898        u32 swapin_count;       /* total count of the number of swapin block */
899                                /* io operations performed */
900
901        u64 freepages_start;
902        u64 freepages_delay;    /* wait for memory reclaim */
903        u32 freepages_count;    /* total count of memory reclaim */
904};
905#endif  /* CONFIG_TASK_DELAY_ACCT */
906
907static inline int sched_info_on(void)
908{
909#ifdef CONFIG_SCHEDSTATS
910        return 1;
911#elif defined(CONFIG_TASK_DELAY_ACCT)
912        extern int delayacct_on;
913        return delayacct_on;
914#else
915        return 0;
916#endif
917}
918
919enum cpu_idle_type {
920        CPU_IDLE,
921        CPU_NOT_IDLE,
922        CPU_NEWLY_IDLE,
923        CPU_MAX_IDLE_TYPES
924};
925
926/*
927 * Increase resolution of cpu_capacity calculations
928 */
929#define SCHED_CAPACITY_SHIFT    10
930#define SCHED_CAPACITY_SCALE    (1L << SCHED_CAPACITY_SHIFT)
931
932/*
933 * Wake-queues are lists of tasks with a pending wakeup, whose
934 * callers have already marked the task as woken internally,
935 * and can thus carry on. A common use case is being able to
936 * do the wakeups once the corresponding user lock as been
937 * released.
938 *
939 * We hold reference to each task in the list across the wakeup,
940 * thus guaranteeing that the memory is still valid by the time
941 * the actual wakeups are performed in wake_up_q().
942 *
943 * One per task suffices, because there's never a need for a task to be
944 * in two wake queues simultaneously; it is forbidden to abandon a task
945 * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
946 * already in a wake queue, the wakeup will happen soon and the second
947 * waker can just skip it.
948 *
949 * The WAKE_Q macro declares and initializes the list head.
950 * wake_up_q() does NOT reinitialize the list; it's expected to be
951 * called near the end of a function, where the fact that the queue is
952 * not used again will be easy to see by inspection.
953 *
954 * Note that this can cause spurious wakeups. schedule() callers
955 * must ensure the call is done inside a loop, confirming that the
956 * wakeup condition has in fact occurred.
957 */
958struct wake_q_node {
959        struct wake_q_node *next;
960};
961
962struct wake_q_head {
963        struct wake_q_node *first;
964        struct wake_q_node **lastp;
965};
966
967#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
968
969#define WAKE_Q(name)                                    \
970        struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
971
972extern void wake_q_add(struct wake_q_head *head,
973                       struct task_struct *task);
974extern void wake_up_q(struct wake_q_head *head);
975
976/*
977 * sched-domains (multiprocessor balancing) declarations:
978 */
979#ifdef CONFIG_SMP
980#define SD_LOAD_BALANCE         0x0001  /* Do load balancing on this domain. */
981#define SD_BALANCE_NEWIDLE      0x0002  /* Balance when about to become idle */
982#define SD_BALANCE_EXEC         0x0004  /* Balance on exec */
983#define SD_BALANCE_FORK         0x0008  /* Balance on fork, clone */
984#define SD_BALANCE_WAKE         0x0010  /* Balance on wakeup */
985#define SD_WAKE_AFFINE          0x0020  /* Wake task to waking CPU */
986#define SD_SHARE_CPUCAPACITY    0x0080  /* Domain members share cpu power */
987#define SD_SHARE_POWERDOMAIN    0x0100  /* Domain members share power domain */
988#define SD_SHARE_PKG_RESOURCES  0x0200  /* Domain members share cpu pkg resources */
989#define SD_SERIALIZE            0x0400  /* Only a single load balancing instance */
990#define SD_ASYM_PACKING         0x0800  /* Place busy groups earlier in the domain */
991#define SD_PREFER_SIBLING       0x1000  /* Prefer to place tasks in a sibling domain */
992#define SD_OVERLAP              0x2000  /* sched_domains of this level overlap */
993#define SD_NUMA                 0x4000  /* cross-node balancing */
994
995#ifdef CONFIG_SCHED_SMT
996static inline int cpu_smt_flags(void)
997{
998        return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
999}
1000#endif
1001
1002#ifdef CONFIG_SCHED_MC
1003static inline int cpu_core_flags(void)
1004{
1005        return SD_SHARE_PKG_RESOURCES;
1006}
1007#endif
1008
1009#ifdef CONFIG_NUMA
1010static inline int cpu_numa_flags(void)
1011{
1012        return SD_NUMA;
1013}
1014#endif
1015
1016struct sched_domain_attr {
1017        int relax_domain_level;
1018};
1019
1020#define SD_ATTR_INIT    (struct sched_domain_attr) {    \
1021        .relax_domain_level = -1,                       \
1022}
1023
1024extern int sched_domain_level_max;
1025
1026struct sched_group;
1027
1028struct sched_domain {
1029        /* These fields must be setup */
1030        struct sched_domain *parent;    /* top domain must be null terminated */
1031        struct sched_domain *child;     /* bottom domain must be null terminated */
1032        struct sched_group *groups;     /* the balancing groups of the domain */
1033        unsigned long min_interval;     /* Minimum balance interval ms */
1034        unsigned long max_interval;     /* Maximum balance interval ms */
1035        unsigned int busy_factor;       /* less balancing by factor if busy */
1036        unsigned int imbalance_pct;     /* No balance until over watermark */
1037        unsigned int cache_nice_tries;  /* Leave cache hot tasks for # tries */
1038        unsigned int busy_idx;
1039        unsigned int idle_idx;
1040        unsigned int newidle_idx;
1041        unsigned int wake_idx;
1042        unsigned int forkexec_idx;
1043        unsigned int smt_gain;
1044
1045        int nohz_idle;                  /* NOHZ IDLE status */
1046        int flags;                      /* See SD_* */
1047        int level;
1048
1049        /* Runtime fields. */
1050        unsigned long last_balance;     /* init to jiffies. units in jiffies */
1051        unsigned int balance_interval;  /* initialise to 1. units in ms. */
1052        unsigned int nr_balance_failed; /* initialise to 0 */
1053
1054        /* idle_balance() stats */
1055        u64 max_newidle_lb_cost;
1056        unsigned long next_decay_max_lb_cost;
1057
1058#ifdef CONFIG_SCHEDSTATS
1059        /* load_balance() stats */
1060        unsigned int lb_count[CPU_MAX_IDLE_TYPES];
1061        unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
1062        unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
1063        unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
1064        unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
1065        unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
1066        unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
1067        unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES];
1068
1069        /* Active load balancing */
1070        unsigned int alb_count;
1071        unsigned int alb_failed;
1072        unsigned int alb_pushed;
1073
1074        /* SD_BALANCE_EXEC stats */
1075        unsigned int sbe_count;
1076        unsigned int sbe_balanced;
1077        unsigned int sbe_pushed;
1078
1079        /* SD_BALANCE_FORK stats */
1080        unsigned int sbf_count;
1081        unsigned int sbf_balanced;
1082        unsigned int sbf_pushed;
1083
1084        /* try_to_wake_up() stats */
1085        unsigned int ttwu_wake_remote;
1086        unsigned int ttwu_move_affine;
1087        unsigned int ttwu_move_balance;
1088#endif
1089#ifdef CONFIG_SCHED_DEBUG
1090        char *name;
1091#endif
1092        union {
1093                void *private;          /* used during construction */
1094                struct rcu_head rcu;    /* used during destruction */
1095        };
1096
1097        unsigned int span_weight;
1098        /*
1099         * Span of all CPUs in this domain.
1100         *
1101         * NOTE: this field is variable length. (Allocated dynamically
1102         * by attaching extra space to the end of the structure,
1103         * depending on how many CPUs the kernel has booted up with)
1104         */
1105        unsigned long span[0];
1106};
1107
1108static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
1109{
1110        return to_cpumask(sd->span);
1111}
1112
1113extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
1114                                    struct sched_domain_attr *dattr_new);
1115
1116/* Allocate an array of sched domains, for partition_sched_domains(). */
1117cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
1118void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
1119
1120bool cpus_share_cache(int this_cpu, int that_cpu);
1121
1122typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
1123typedef int (*sched_domain_flags_f)(void);
1124
1125#define SDTL_OVERLAP    0x01
1126
1127struct sd_data {
1128        struct sched_domain **__percpu sd;
1129        struct sched_group **__percpu sg;
1130        struct sched_group_capacity **__percpu sgc;
1131};
1132
1133struct sched_domain_topology_level {
1134        sched_domain_mask_f mask;
1135        sched_domain_flags_f sd_flags;
1136        int                 flags;
1137        int                 numa_level;
1138        struct sd_data      data;
1139#ifdef CONFIG_SCHED_DEBUG
1140        char                *name;
1141#endif
1142};
1143
1144extern void set_sched_topology(struct sched_domain_topology_level *tl);
1145extern void wake_up_if_idle(int cpu);
1146
1147#ifdef CONFIG_SCHED_DEBUG
1148# define SD_INIT_NAME(type)             .name = #type
1149#else
1150# define SD_INIT_NAME(type)
1151#endif
1152
1153#else /* CONFIG_SMP */
1154
1155struct sched_domain_attr;
1156
1157static inline void
1158partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
1159                        struct sched_domain_attr *dattr_new)
1160{
1161}
1162
1163static inline bool cpus_share_cache(int this_cpu, int that_cpu)
1164{
1165        return true;
1166}
1167
1168#endif  /* !CONFIG_SMP */
1169
1170
1171struct io_context;                      /* See blkdev.h */
1172
1173
1174#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
1175extern void prefetch_stack(struct task_struct *t);
1176#else
1177static inline void prefetch_stack(struct task_struct *t) { }
1178#endif
1179
1180struct audit_context;           /* See audit.c */
1181struct mempolicy;
1182struct pipe_inode_info;
1183struct uts_namespace;
1184
1185struct load_weight {
1186        unsigned long weight;
1187        u32 inv_weight;
1188};
1189
1190/*
1191 * The load_avg/util_avg accumulates an infinite geometric series.
1192 * 1) load_avg factors frequency scaling into the amount of time that a
1193 * sched_entity is runnable on a rq into its weight. For cfs_rq, it is the
1194 * aggregated such weights of all runnable and blocked sched_entities.
1195 * 2) util_avg factors frequency and cpu scaling into the amount of time
1196 * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE].
1197 * For cfs_rq, it is the aggregated such times of all runnable and
1198 * blocked sched_entities.
1199 * The 64 bit load_sum can:
1200 * 1) for cfs_rq, afford 4353082796 (=2^64/47742/88761) entities with
1201 * the highest weight (=88761) always runnable, we should not overflow
1202 * 2) for entity, support any load.weight always runnable
1203 */
1204struct sched_avg {
1205        u64 last_update_time, load_sum;
1206        u32 util_sum, period_contrib;
1207        unsigned long load_avg, util_avg;
1208};
1209
1210#ifdef CONFIG_SCHEDSTATS
1211struct sched_statistics {
1212        u64                     wait_start;
1213        u64                     wait_max;
1214        u64                     wait_count;
1215        u64                     wait_sum;
1216        u64                     iowait_count;
1217        u64                     iowait_sum;
1218
1219        u64                     sleep_start;
1220        u64                     sleep_max;
1221        s64                     sum_sleep_runtime;
1222
1223        u64                     block_start;
1224        u64                     block_max;
1225        u64                     exec_max;
1226        u64                     slice_max;
1227
1228        u64                     nr_migrations_cold;
1229        u64                     nr_failed_migrations_affine;
1230        u64                     nr_failed_migrations_running;
1231        u64                     nr_failed_migrations_hot;
1232        u64                     nr_forced_migrations;
1233
1234        u64                     nr_wakeups;
1235        u64                     nr_wakeups_sync;
1236        u64                     nr_wakeups_migrate;
1237        u64                     nr_wakeups_local;
1238        u64                     nr_wakeups_remote;
1239        u64                     nr_wakeups_affine;
1240        u64                     nr_wakeups_affine_attempts;
1241        u64                     nr_wakeups_passive;
1242        u64                     nr_wakeups_idle;
1243};
1244#endif
1245
1246struct sched_entity {
1247        struct load_weight      load;           /* for load-balancing */
1248        struct rb_node          run_node;
1249        struct list_head        group_node;
1250        unsigned int            on_rq;
1251
1252        u64                     exec_start;
1253        u64                     sum_exec_runtime;
1254        u64                     vruntime;
1255        u64                     prev_sum_exec_runtime;
1256
1257        u64                     nr_migrations;
1258
1259#ifdef CONFIG_SCHEDSTATS
1260        struct sched_statistics statistics;
1261#endif
1262
1263#ifdef CONFIG_FAIR_GROUP_SCHED
1264        int                     depth;
1265        struct sched_entity     *parent;
1266        /* rq on which this entity is (to be) queued: */
1267        struct cfs_rq           *cfs_rq;
1268        /* rq "owned" by this entity/group: */
1269        struct cfs_rq           *my_q;
1270#endif
1271
1272#ifdef CONFIG_SMP
1273        /* Per entity load average tracking */
1274        struct sched_avg        avg;
1275#endif
1276};
1277
1278struct sched_rt_entity {
1279        struct list_head run_list;
1280        unsigned long timeout;
1281        unsigned long watchdog_stamp;
1282        unsigned int time_slice;
1283
1284        struct sched_rt_entity *back;
1285#ifdef CONFIG_RT_GROUP_SCHED
1286        struct sched_rt_entity  *parent;
1287        /* rq on which this entity is (to be) queued: */
1288        struct rt_rq            *rt_rq;
1289        /* rq "owned" by this entity/group: */
1290        struct rt_rq            *my_q;
1291#endif
1292};
1293
1294struct sched_dl_entity {
1295        struct rb_node  rb_node;
1296
1297        /*
1298         * Original scheduling parameters. Copied here from sched_attr
1299         * during sched_setattr(), they will remain the same until
1300         * the next sched_setattr().
1301         */
1302        u64 dl_runtime;         /* maximum runtime for each instance    */
1303        u64 dl_deadline;        /* relative deadline of each instance   */
1304        u64 dl_period;          /* separation of two instances (period) */
1305        u64 dl_bw;              /* dl_runtime / dl_deadline             */
1306
1307        /*
1308         * Actual scheduling parameters. Initialized with the values above,
1309         * they are continously updated during task execution. Note that
1310         * the remaining runtime could be < 0 in case we are in overrun.
1311         */
1312        s64 runtime;            /* remaining runtime for this instance  */
1313        u64 deadline;           /* absolute deadline for this instance  */
1314        unsigned int flags;     /* specifying the scheduler behaviour   */
1315
1316        /*
1317         * Some bool flags:
1318         *
1319         * @dl_throttled tells if we exhausted the runtime. If so, the
1320         * task has to wait for a replenishment to be performed at the
1321         * next firing of dl_timer.
1322         *
1323         * @dl_new tells if a new instance arrived. If so we must
1324         * start executing it with full runtime and reset its absolute
1325         * deadline;
1326         *
1327         * @dl_boosted tells if we are boosted due to DI. If so we are
1328         * outside bandwidth enforcement mechanism (but only until we
1329         * exit the critical section);
1330         *
1331         * @dl_yielded tells if task gave up the cpu before consuming
1332         * all its available runtime during the last job.
1333         */
1334        int dl_throttled, dl_new, dl_boosted, dl_yielded;
1335
1336        /*
1337         * Bandwidth enforcement timer. Each -deadline task has its
1338         * own bandwidth to be enforced, thus we need one timer per task.
1339         */
1340        struct hrtimer dl_timer;
1341};
1342
1343union rcu_special {
1344        struct {
1345                u8 blocked;
1346                u8 need_qs;
1347                u8 exp_need_qs;
1348                u8 pad; /* Otherwise the compiler can store garbage here. */
1349        } b; /* Bits. */
1350        u32 s; /* Set of bits. */
1351};
1352struct rcu_node;
1353
1354enum perf_event_task_context {
1355        perf_invalid_context = -1,
1356        perf_hw_context = 0,
1357        perf_sw_context,
1358        perf_nr_task_contexts,
1359};
1360
1361/* Track pages that require TLB flushes */
1362struct tlbflush_unmap_batch {
1363        /*
1364         * Each bit set is a CPU that potentially has a TLB entry for one of
1365         * the PFNs being flushed. See set_tlb_ubc_flush_pending().
1366         */
1367        struct cpumask cpumask;
1368
1369        /* True if any bit in cpumask is set */
1370        bool flush_required;
1371
1372        /*
1373         * If true then the PTE was dirty when unmapped. The entry must be
1374         * flushed before IO is initiated or a stale TLB entry potentially
1375         * allows an update without redirtying the page.
1376         */
1377        bool writable;
1378};
1379
1380struct task_struct {
1381        volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
1382        void *stack;
1383        atomic_t usage;
1384        unsigned int flags;     /* per process flags, defined below */
1385        unsigned int ptrace;
1386
1387#ifdef CONFIG_SMP
1388        struct llist_node wake_entry;
1389        int on_cpu;
1390        unsigned int wakee_flips;
1391        unsigned long wakee_flip_decay_ts;
1392        struct task_struct *last_wakee;
1393
1394        int wake_cpu;
1395#endif
1396        int on_rq;
1397
1398        int prio, static_prio, normal_prio;
1399        unsigned int rt_priority;
1400        const struct sched_class *sched_class;
1401        struct sched_entity se;
1402        struct sched_rt_entity rt;
1403#ifdef CONFIG_CGROUP_SCHED
1404        struct task_group *sched_task_group;
1405#endif
1406        struct sched_dl_entity dl;
1407
1408#ifdef CONFIG_PREEMPT_NOTIFIERS
1409        /* list of struct preempt_notifier: */
1410        struct hlist_head preempt_notifiers;
1411#endif
1412
1413#ifdef CONFIG_BLK_DEV_IO_TRACE
1414        unsigned int btrace_seq;
1415#endif
1416
1417        unsigned int policy;
1418        int nr_cpus_allowed;
1419        cpumask_t cpus_allowed;
1420
1421#ifdef CONFIG_PREEMPT_RCU
1422        int rcu_read_lock_nesting;
1423        union rcu_special rcu_read_unlock_special;
1424        struct list_head rcu_node_entry;
1425        struct rcu_node *rcu_blocked_node;
1426#endif /* #ifdef CONFIG_PREEMPT_RCU */
1427#ifdef CONFIG_TASKS_RCU
1428        unsigned long rcu_tasks_nvcsw;
1429        bool rcu_tasks_holdout;
1430        struct list_head rcu_tasks_holdout_list;
1431        int rcu_tasks_idle_cpu;
1432#endif /* #ifdef CONFIG_TASKS_RCU */
1433
1434#ifdef CONFIG_SCHED_INFO
1435        struct sched_info sched_info;
1436#endif
1437
1438        struct list_head tasks;
1439#ifdef CONFIG_SMP
1440        struct plist_node pushable_tasks;
1441        struct rb_node pushable_dl_tasks;
1442#endif
1443
1444        struct mm_struct *mm, *active_mm;
1445        /* per-thread vma caching */
1446        u32 vmacache_seqnum;
1447        struct vm_area_struct *vmacache[VMACACHE_SIZE];
1448#if defined(SPLIT_RSS_COUNTING)
1449        struct task_rss_stat    rss_stat;
1450#endif
1451/* task state */
1452        int exit_state;
1453        int exit_code, exit_signal;
1454        int pdeath_signal;  /*  The signal sent when the parent dies  */
1455        unsigned long jobctl;   /* JOBCTL_*, siglock protected */
1456
1457        /* Used for emulating ABI behavior of previous Linux versions */
1458        unsigned int personality;
1459
1460        /* scheduler bits, serialized by scheduler locks */
1461        unsigned sched_reset_on_fork:1;
1462        unsigned sched_contributes_to_load:1;
1463        unsigned sched_migrated:1;
1464        unsigned :0; /* force alignment to the next boundary */
1465
1466        /* unserialized, strictly 'current' */
1467        unsigned in_execve:1; /* bit to tell LSMs we're in execve */
1468        unsigned in_iowait:1;
1469#ifdef CONFIG_MEMCG
1470        unsigned memcg_may_oom:1;
1471#endif
1472#ifdef CONFIG_MEMCG_KMEM
1473        unsigned memcg_kmem_skip_account:1;
1474#endif
1475#ifdef CONFIG_COMPAT_BRK
1476        unsigned brk_randomized:1;
1477#endif
1478#ifdef CONFIG_CGROUPS
1479        /* disallow userland-initiated cgroup migration */
1480        unsigned no_cgroup_migration:1;
1481#endif
1482
1483        unsigned long atomic_flags; /* Flags needing atomic access. */
1484
1485        struct restart_block restart_block;
1486
1487        pid_t pid;
1488        pid_t tgid;
1489
1490#ifdef CONFIG_CC_STACKPROTECTOR
1491        /* Canary value for the -fstack-protector gcc feature */
1492        unsigned long stack_canary;
1493#endif
1494        /*
1495         * pointers to (original) parent process, youngest child, younger sibling,
1496         * older sibling, respectively.  (p->father can be replaced with
1497         * p->real_parent->pid)
1498         */
1499        struct task_struct __rcu *real_parent; /* real parent process */
1500        struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
1501        /*
1502         * children/sibling forms the list of my natural children
1503         */
1504        struct list_head children;      /* list of my children */
1505        struct list_head sibling;       /* linkage in my parent's children list */
1506        struct task_struct *group_leader;       /* threadgroup leader */
1507
1508        /*
1509         * ptraced is the list of tasks this task is using ptrace on.
1510         * This includes both natural children and PTRACE_ATTACH targets.
1511         * p->ptrace_entry is p's link on the p->parent->ptraced list.
1512         */
1513        struct list_head ptraced;
1514        struct list_head ptrace_entry;
1515
1516        /* PID/PID hash table linkage. */
1517        struct pid_link pids[PIDTYPE_MAX];
1518        struct list_head thread_group;
1519        struct list_head thread_node;
1520
1521        struct completion *vfork_done;          /* for vfork() */
1522        int __user *set_child_tid;              /* CLONE_CHILD_SETTID */
1523        int __user *clear_child_tid;            /* CLONE_CHILD_CLEARTID */
1524
1525        cputime_t utime, stime, utimescaled, stimescaled;
1526        cputime_t gtime;
1527        struct prev_cputime prev_cputime;
1528#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1529        seqlock_t vtime_seqlock;
1530        unsigned long long vtime_snap;
1531        enum {
1532                VTIME_SLEEPING = 0,
1533                VTIME_USER,
1534                VTIME_SYS,
1535        } vtime_snap_whence;
1536#endif
1537        unsigned long nvcsw, nivcsw; /* context switch counts */
1538        u64 start_time;         /* monotonic time in nsec */
1539        u64 real_start_time;    /* boot based time in nsec */
1540/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
1541        unsigned long min_flt, maj_flt;
1542
1543        struct task_cputime cputime_expires;
1544        struct list_head cpu_timers[3];
1545
1546/* process credentials */
1547        const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
1548        const struct cred __rcu *real_cred; /* objective and real subjective task
1549                                         * credentials (COW) */
1550        const struct cred __rcu *cred;  /* effective (overridable) subjective task
1551                                         * credentials (COW) */
1552        char comm[TASK_COMM_LEN]; /* executable name excluding path
1553                                     - access with [gs]et_task_comm (which lock
1554                                       it with task_lock())
1555                                     - initialized normally by setup_new_exec */
1556/* file system info */
1557        struct nameidata *nameidata;
1558#ifdef CONFIG_SYSVIPC
1559/* ipc stuff */
1560        struct sysv_sem sysvsem;
1561        struct sysv_shm sysvshm;
1562#endif
1563#ifdef CONFIG_DETECT_HUNG_TASK
1564/* hung task detection */
1565        unsigned long last_switch_count;
1566#endif
1567/* filesystem information */
1568        struct fs_struct *fs;
1569/* open file information */
1570        struct files_struct *files;
1571/* namespaces */
1572        struct nsproxy *nsproxy;
1573/* signal handlers */
1574        struct signal_struct *signal;
1575        struct sighand_struct *sighand;
1576
1577        sigset_t blocked, real_blocked;
1578        sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
1579        struct sigpending pending;
1580
1581        unsigned long sas_ss_sp;
1582        size_t sas_ss_size;
1583
1584        struct callback_head *task_works;
1585
1586        struct audit_context *audit_context;
1587#ifdef CONFIG_AUDITSYSCALL
1588        kuid_t loginuid;
1589        unsigned int sessionid;
1590#endif
1591        struct seccomp seccomp;
1592
1593/* Thread group tracking */
1594        u32 parent_exec_id;
1595        u32 self_exec_id;
1596/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
1597 * mempolicy */
1598        spinlock_t alloc_lock;
1599
1600        /* Protection of the PI data structures: */
1601        raw_spinlock_t pi_lock;
1602
1603        struct wake_q_node wake_q;
1604
1605#ifdef CONFIG_RT_MUTEXES
1606        /* PI waiters blocked on a rt_mutex held by this task */
1607        struct rb_root pi_waiters;
1608        struct rb_node *pi_waiters_leftmost;
1609        /* Deadlock detection and priority inheritance handling */
1610        struct rt_mutex_waiter *pi_blocked_on;
1611#endif
1612
1613#ifdef CONFIG_DEBUG_MUTEXES
1614        /* mutex deadlock detection */
1615        struct mutex_waiter *blocked_on;
1616#endif
1617#ifdef CONFIG_TRACE_IRQFLAGS
1618        unsigned int irq_events;
1619        unsigned long hardirq_enable_ip;
1620        unsigned long hardirq_disable_ip;
1621        unsigned int hardirq_enable_event;
1622        unsigned int hardirq_disable_event;
1623        int hardirqs_enabled;
1624        int hardirq_context;
1625        unsigned long softirq_disable_ip;
1626        unsigned long softirq_enable_ip;
1627        unsigned int softirq_disable_event;
1628        unsigned int softirq_enable_event;
1629        int softirqs_enabled;
1630        int softirq_context;
1631#endif
1632#ifdef CONFIG_LOCKDEP
1633# define MAX_LOCK_DEPTH 48UL
1634        u64 curr_chain_key;
1635        int lockdep_depth;
1636        unsigned int lockdep_recursion;
1637        struct held_lock held_locks[MAX_LOCK_DEPTH];
1638        gfp_t lockdep_reclaim_gfp;
1639#endif
1640
1641/* journalling filesystem info */
1642        void *journal_info;
1643
1644/* stacked block device info */
1645        struct bio_list *bio_list;
1646
1647#ifdef CONFIG_BLOCK
1648/* stack plugging */
1649        struct blk_plug *plug;
1650#endif
1651
1652/* VM state */
1653        struct reclaim_state *reclaim_state;
1654
1655        struct backing_dev_info *backing_dev_info;
1656
1657        struct io_context *io_context;
1658
1659        unsigned long ptrace_message;
1660        siginfo_t *last_siginfo; /* For ptrace use.  */
1661        struct task_io_accounting ioac;
1662#if defined(CONFIG_TASK_XACCT)
1663        u64 acct_rss_mem1;      /* accumulated rss usage */
1664        u64 acct_vm_mem1;       /* accumulated virtual memory usage */
1665        cputime_t acct_timexpd; /* stime + utime since last update */
1666#endif
1667#ifdef CONFIG_CPUSETS
1668        nodemask_t mems_allowed;        /* Protected by alloc_lock */
1669        seqcount_t mems_allowed_seq;    /* Seqence no to catch updates */
1670        int cpuset_mem_spread_rotor;
1671        int cpuset_slab_spread_rotor;
1672#endif
1673#ifdef CONFIG_CGROUPS
1674        /* Control Group info protected by css_set_lock */
1675        struct css_set __rcu *cgroups;
1676        /* cg_list protected by css_set_lock and tsk->alloc_lock */
1677        struct list_head cg_list;
1678#endif
1679#ifdef CONFIG_FUTEX
1680        struct robust_list_head __user *robust_list;
1681#ifdef CONFIG_COMPAT
1682        struct compat_robust_list_head __user *compat_robust_list;
1683#endif
1684        struct list_head pi_state_list;
1685        struct futex_pi_state *pi_state_cache;
1686#endif
1687#ifdef CONFIG_PERF_EVENTS
1688        struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
1689        struct mutex perf_event_mutex;
1690        struct list_head perf_event_list;
1691#endif
1692#ifdef CONFIG_DEBUG_PREEMPT
1693        unsigned long preempt_disable_ip;
1694#endif
1695#ifdef CONFIG_NUMA
1696        struct mempolicy *mempolicy;    /* Protected by alloc_lock */
1697        short il_next;
1698        short pref_node_fork;
1699#endif
1700#ifdef CONFIG_NUMA_BALANCING
1701        int numa_scan_seq;
1702        unsigned int numa_scan_period;
1703        unsigned int numa_scan_period_max;
1704        int numa_preferred_nid;
1705        unsigned long numa_migrate_retry;
1706        u64 node_stamp;                 /* migration stamp  */
1707        u64 last_task_numa_placement;
1708        u64 last_sum_exec_runtime;
1709        struct callback_head numa_work;
1710
1711        struct list_head numa_entry;
1712        struct numa_group *numa_group;
1713
1714        /*
1715         * numa_faults is an array split into four regions:
1716         * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
1717         * in this precise order.
1718         *
1719         * faults_memory: Exponential decaying average of faults on a per-node
1720         * basis. Scheduling placement decisions are made based on these
1721         * counts. The values remain static for the duration of a PTE scan.
1722         * faults_cpu: Track the nodes the process was running on when a NUMA
1723         * hinting fault was incurred.
1724         * faults_memory_buffer and faults_cpu_buffer: Record faults per node
1725         * during the current scan window. When the scan completes, the counts
1726         * in faults_memory and faults_cpu decay and these values are copied.
1727         */
1728        unsigned long *numa_faults;
1729        unsigned long total_numa_faults;
1730
1731        /*
1732         * numa_faults_locality tracks if faults recorded during the last
1733         * scan window were remote/local or failed to migrate. The task scan
1734         * period is adapted based on the locality of the faults with different
1735         * weights depending on whether they were shared or private faults
1736         */
1737        unsigned long numa_faults_locality[3];
1738
1739        unsigned long numa_pages_migrated;
1740#endif /* CONFIG_NUMA_BALANCING */
1741
1742#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
1743        struct tlbflush_unmap_batch tlb_ubc;
1744#endif
1745
1746        struct rcu_head rcu;
1747
1748        /*
1749         * cache last used pipe for splice
1750         */
1751        struct pipe_inode_info *splice_pipe;
1752
1753        struct page_frag task_frag;
1754
1755#ifdef  CONFIG_TASK_DELAY_ACCT
1756        struct task_delay_info *delays;
1757#endif
1758#ifdef CONFIG_FAULT_INJECTION
1759        int make_it_fail;
1760#endif
1761        /*
1762         * when (nr_dirtied >= nr_dirtied_pause), it's time to call
1763         * balance_dirty_pages() for some dirty throttling pause
1764         */
1765        int nr_dirtied;
1766        int nr_dirtied_pause;
1767        unsigned long dirty_paused_when; /* start of a write-and-pause period */
1768
1769#ifdef CONFIG_LATENCYTOP
1770        int latency_record_count;
1771        struct latency_record latency_record[LT_SAVECOUNT];
1772#endif
1773        /*
1774         * time slack values; these are used to round up poll() and
1775         * select() etc timeout values. These are in nanoseconds.
1776         */
1777        unsigned long timer_slack_ns;
1778        unsigned long default_timer_slack_ns;
1779
1780#ifdef CONFIG_KASAN
1781        unsigned int kasan_depth;
1782#endif
1783#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1784        /* Index of current stored address in ret_stack */
1785        int curr_ret_stack;
1786        /* Stack of return addresses for return function tracing */
1787        struct ftrace_ret_stack *ret_stack;
1788        /* time stamp for last schedule */
1789        unsigned long long ftrace_timestamp;
1790        /*
1791         * Number of functions that haven't been traced
1792         * because of depth overrun.
1793         */
1794        atomic_t trace_overrun;
1795        /* Pause for the tracing */
1796        atomic_t tracing_graph_pause;
1797#endif
1798#ifdef CONFIG_TRACING
1799        /* state flags for use by tracers */
1800        unsigned long trace;
1801        /* bitmask and counter of trace recursion */
1802        unsigned long trace_recursion;
1803#endif /* CONFIG_TRACING */
1804#ifdef CONFIG_MEMCG
1805        struct mem_cgroup *memcg_in_oom;
1806        gfp_t memcg_oom_gfp_mask;
1807        int memcg_oom_order;
1808
1809        /* number of pages to reclaim on returning to userland */
1810        unsigned int memcg_nr_pages_over_high;
1811#endif
1812#ifdef CONFIG_UPROBES
1813        struct uprobe_task *utask;
1814#endif
1815#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
1816        unsigned int    sequential_io;
1817        unsigned int    sequential_io_avg;
1818#endif
1819#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
1820        unsigned long   task_state_change;
1821#endif
1822        int pagefault_disabled;
1823/* CPU-specific state of this task */
1824        struct thread_struct thread;
1825/*
1826 * WARNING: on x86, 'thread_struct' contains a variable-sized
1827 * structure.  It *MUST* be at the end of 'task_struct'.
1828 *
1829 * Do not put anything below here!
1830 */
1831};
1832
1833#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
1834extern int arch_task_struct_size __read_mostly;
1835#else
1836# define arch_task_struct_size (sizeof(struct task_struct))
1837#endif
1838
1839/* Future-safe accessor for struct task_struct's cpus_allowed. */
1840#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
1841
1842#define TNF_MIGRATED    0x01
1843#define TNF_NO_GROUP    0x02
1844#define TNF_SHARED      0x04
1845#define TNF_FAULT_LOCAL 0x08
1846#define TNF_MIGRATE_FAIL 0x10
1847
1848#ifdef CONFIG_NUMA_BALANCING
1849extern void task_numa_fault(int last_node, int node, int pages, int flags);
1850extern pid_t task_numa_group_id(struct task_struct *p);
1851extern void set_numabalancing_state(bool enabled);
1852extern void task_numa_free(struct task_struct *p);
1853extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
1854                                        int src_nid, int dst_cpu);
1855#else
1856static inline void task_numa_fault(int last_node, int node, int pages,
1857                                   int flags)
1858{
1859}
1860static inline pid_t task_numa_group_id(struct task_struct *p)
1861{
1862        return 0;
1863}
1864static inline void set_numabalancing_state(bool enabled)
1865{
1866}
1867static inline void task_numa_free(struct task_struct *p)
1868{
1869}
1870static inline bool should_numa_migrate_memory(struct task_struct *p,
1871                                struct page *page, int src_nid, int dst_cpu)
1872{
1873        return true;
1874}
1875#endif
1876
1877static inline struct pid *task_pid(struct task_struct *task)
1878{
1879        return task->pids[PIDTYPE_PID].pid;
1880}
1881
1882static inline struct pid *task_tgid(struct task_struct *task)
1883{
1884        return task->group_leader->pids[PIDTYPE_PID].pid;
1885}
1886
1887/*
1888 * Without tasklist or rcu lock it is not safe to dereference
1889 * the result of task_pgrp/task_session even if task == current,
1890 * we can race with another thread doing sys_setsid/sys_setpgid.
1891 */
1892static inline struct pid *task_pgrp(struct task_struct *task)
1893{
1894        return task->group_leader->pids[PIDTYPE_PGID].pid;
1895}
1896
1897static inline struct pid *task_session(struct task_struct *task)
1898{
1899        return task->group_leader->pids[PIDTYPE_SID].pid;
1900}
1901
1902struct pid_namespace;
1903
1904/*
1905 * the helpers to get the task's different pids as they are seen
1906 * from various namespaces
1907 *
1908 * task_xid_nr()     : global id, i.e. the id seen from the init namespace;
1909 * task_xid_vnr()    : virtual id, i.e. the id seen from the pid namespace of
1910 *                     current.
1911 * task_xid_nr_ns()  : id seen from the ns specified;
1912 *
1913 * set_task_vxid()   : assigns a virtual id to a task;
1914 *
1915 * see also pid_nr() etc in include/linux/pid.h
1916 */
1917pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
1918                        struct pid_namespace *ns);
1919
1920static inline pid_t task_pid_nr(struct task_struct *tsk)
1921{
1922        return tsk->pid;
1923}
1924
1925static inline pid_t task_pid_nr_ns(struct task_struct *tsk,
1926                                        struct pid_namespace *ns)
1927{
1928        return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
1929}
1930
1931static inline pid_t task_pid_vnr(struct task_struct *tsk)
1932{
1933        return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
1934}
1935
1936
1937static inline pid_t task_tgid_nr(struct task_struct *tsk)
1938{
1939        return tsk->tgid;
1940}
1941
1942pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
1943
1944static inline pid_t task_tgid_vnr(struct task_struct *tsk)
1945{
1946        return pid_vnr(task_tgid(tsk));
1947}
1948
1949
1950static inline int pid_alive(const struct task_struct *p);
1951static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
1952{
1953        pid_t pid = 0;
1954
1955        rcu_read_lock();
1956        if (pid_alive(tsk))
1957                pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns);
1958        rcu_read_unlock();
1959
1960        return pid;
1961}
1962
1963static inline pid_t task_ppid_nr(const struct task_struct *tsk)
1964{
1965        return task_ppid_nr_ns(tsk, &init_pid_ns);
1966}
1967
1968static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk,
1969                                        struct pid_namespace *ns)
1970{
1971        return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
1972}
1973
1974static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
1975{
1976        return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL);
1977}
1978
1979
1980static inline pid_t task_session_nr_ns(struct task_struct *tsk,
1981                                        struct pid_namespace *ns)
1982{
1983        return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
1984}
1985
1986static inline pid_t task_session_vnr(struct task_struct *tsk)
1987{
1988        return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
1989}
1990
1991/* obsolete, do not use */
1992static inline pid_t task_pgrp_nr(struct task_struct *tsk)
1993{
1994        return task_pgrp_nr_ns(tsk, &init_pid_ns);
1995}
1996
1997/**
1998 * pid_alive - check that a task structure is not stale
1999 * @p: Task structure to be checked.
2000 *
2001 * Test if a process is not yet dead (at most zombie state)
2002 * If pid_alive fails, then pointers within the task structure
2003 * can be stale and must not be dereferenced.
2004 *
2005 * Return: 1 if the process is alive. 0 otherwise.
2006 */
2007static inline int pid_alive(const struct task_struct *p)
2008{
2009        return p->pids[PIDTYPE_PID].pid != NULL;
2010}
2011
2012/**
2013 * is_global_init - check if a task structure is init. Since init
2014 * is free to have sub-threads we need to check tgid.
2015 * @tsk: Task structure to be checked.
2016 *
2017 * Check if a task structure is the first user space task the kernel created.
2018 *
2019 * Return: 1 if the task structure is init. 0 otherwise.
2020 */
2021static inline int is_global_init(struct task_struct *tsk)
2022{
2023        return task_tgid_nr(tsk) == 1;
2024}
2025
2026extern struct pid *cad_pid;
2027
2028extern void free_task(struct task_struct *tsk);
2029#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
2030
2031extern void __put_task_struct(struct task_struct *t);
2032
2033static inline void put_task_struct(struct task_struct *t)
2034{
2035        if (atomic_dec_and_test(&t->usage))
2036                __put_task_struct(t);
2037}
2038
2039#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
2040extern void task_cputime(struct task_struct *t,
2041                         cputime_t *utime, cputime_t *stime);
2042extern void task_cputime_scaled(struct task_struct *t,
2043                                cputime_t *utimescaled, cputime_t *stimescaled);
2044extern cputime_t task_gtime(struct task_struct *t);
2045#else
2046static inline void task_cputime(struct task_struct *t,
2047                                cputime_t *utime, cputime_t *stime)
2048{
2049        if (utime)
2050                *utime = t->utime;
2051        if (stime)
2052                *stime = t->stime;
2053}
2054
2055static inline void task_cputime_scaled(struct task_struct *t,
2056                                       cputime_t *utimescaled,
2057                                       cputime_t *stimescaled)
2058{
2059        if (utimescaled)
2060                *utimescaled = t->utimescaled;
2061        if (stimescaled)
2062                *stimescaled = t->stimescaled;
2063}
2064
2065static inline cputime_t task_gtime(struct task_struct *t)
2066{
2067        return t->gtime;
2068}
2069#endif
2070extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
2071extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
2072
2073/*
2074 * Per process flags
2075 */
2076#define PF_EXITING      0x00000004      /* getting shut down */
2077#define PF_EXITPIDONE   0x00000008      /* pi exit done on shut down */
2078#define PF_VCPU         0x00000010      /* I'm a virtual CPU */
2079#define PF_WQ_WORKER    0x00000020      /* I'm a workqueue worker */
2080#define PF_FORKNOEXEC   0x00000040      /* forked but didn't exec */
2081#define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
2082#define PF_SUPERPRIV    0x00000100      /* used super-user privileges */
2083#define PF_DUMPCORE     0x00000200      /* dumped core */
2084#define PF_SIGNALED     0x00000400      /* killed by a signal */
2085#define PF_MEMALLOC     0x00000800      /* Allocating memory */
2086#define PF_NPROC_EXCEEDED 0x00001000    /* set_user noticed that RLIMIT_NPROC was exceeded */
2087#define PF_USED_MATH    0x00002000      /* if unset the fpu must be initialized before use */
2088#define PF_USED_ASYNC   0x00004000      /* used async_schedule*(), used by module init */
2089#define PF_NOFREEZE     0x00008000      /* this thread should not be frozen */
2090#define PF_FROZEN       0x00010000      /* frozen for system suspend */
2091#define PF_FSTRANS      0x00020000      /* inside a filesystem transaction */
2092#define PF_KSWAPD       0x00040000      /* I am kswapd */
2093#define PF_MEMALLOC_NOIO 0x00080000     /* Allocating memory without IO involved */
2094#define PF_LESS_THROTTLE 0x00100000     /* Throttle me less: I clean memory */
2095#define PF_KTHREAD      0x00200000      /* I am a kernel thread */
2096#define PF_RANDOMIZE    0x00400000      /* randomize virtual address space */
2097#define PF_SWAPWRITE    0x00800000      /* Allowed to write to swap */
2098#define PF_NO_SETAFFINITY 0x04000000    /* Userland is not allowed to meddle with cpus_allowed */
2099#define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
2100#define PF_MUTEX_TESTER 0x20000000      /* Thread belongs to the rt mutex tester */
2101#define PF_FREEZER_SKIP 0x40000000      /* Freezer should not count it as freezable */
2102#define PF_SUSPEND_TASK 0x80000000      /* this thread called freeze_processes and should not be frozen */
2103
2104/*
2105 * Only the _current_ task can read/write to tsk->flags, but other
2106 * tasks can access tsk->flags in readonly mode for example
2107 * with tsk_used_math (like during threaded core dumping).
2108 * There is however an exception to this rule during ptrace
2109 * or during fork: the ptracer task is allowed to write to the
2110 * child->flags of its traced child (same goes for fork, the parent
2111 * can write to the child->flags), because we're guaranteed the
2112 * child is not running and in turn not changing child->flags
2113 * at the same time the parent does it.
2114 */
2115#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0)
2116#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0)
2117#define clear_used_math() clear_stopped_child_used_math(current)
2118#define set_used_math() set_stopped_child_used_math(current)
2119#define conditional_stopped_child_used_math(condition, child) \
2120        do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)
2121#define conditional_used_math(condition) \
2122        conditional_stopped_child_used_math(condition, current)
2123#define copy_to_stopped_child_used_math(child) \
2124        do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)
2125/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
2126#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
2127#define used_math() tsk_used_math(current)
2128
2129/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags
2130 * __GFP_FS is also cleared as it implies __GFP_IO.
2131 */
2132static inline gfp_t memalloc_noio_flags(gfp_t flags)
2133{
2134        if (unlikely(current->flags & PF_MEMALLOC_NOIO))
2135                flags &= ~(__GFP_IO | __GFP_FS);
2136        return flags;
2137}
2138
2139static inline unsigned int memalloc_noio_save(void)
2140{
2141        unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
2142        current->flags |= PF_MEMALLOC_NOIO;
2143        return flags;
2144}
2145
2146static inline void memalloc_noio_restore(unsigned int flags)
2147{
2148        current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
2149}
2150
2151/* Per-process atomic flags. */
2152#define PFA_NO_NEW_PRIVS 0      /* May not gain new privileges. */
2153#define PFA_SPREAD_PAGE  1      /* Spread page cache over cpuset */
2154#define PFA_SPREAD_SLAB  2      /* Spread some slab caches over cpuset */
2155
2156
2157#define TASK_PFA_TEST(name, func)                                       \
2158        static inline bool task_##func(struct task_struct *p)           \
2159        { return test_bit(PFA_##name, &p->atomic_flags); }
2160#define TASK_PFA_SET(name, func)                                        \
2161        static inline void task_set_##func(struct task_struct *p)       \
2162        { set_bit(PFA_##name, &p->atomic_flags); }
2163#define TASK_PFA_CLEAR(name, func)                                      \
2164        static inline void task_clear_##func(struct task_struct *p)     \
2165        { clear_bit(PFA_##name, &p->atomic_flags); }
2166
2167TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
2168TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)
2169
2170TASK_PFA_TEST(SPREAD_PAGE, spread_page)
2171TASK_PFA_SET(SPREAD_PAGE, spread_page)
2172TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)
2173
2174TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
2175TASK_PFA_SET(SPREAD_SLAB, spread_slab)
2176TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
2177
2178/*
2179 * task->jobctl flags
2180 */
2181#define JOBCTL_STOP_SIGMASK     0xffff  /* signr of the last group stop */
2182
2183#define JOBCTL_STOP_DEQUEUED_BIT 16     /* stop signal dequeued */
2184#define JOBCTL_STOP_PENDING_BIT 17      /* task should stop for group stop */
2185#define JOBCTL_STOP_CONSUME_BIT 18      /* consume group stop count */
2186#define JOBCTL_TRAP_STOP_BIT    19      /* trap for STOP */
2187#define JOBCTL_TRAP_NOTIFY_BIT  20      /* trap for NOTIFY */
2188#define JOBCTL_TRAPPING_BIT     21      /* switching to TRACED */
2189#define JOBCTL_LISTENING_BIT    22      /* ptracer is listening for events */
2190
2191#define JOBCTL_STOP_DEQUEUED    (1UL << JOBCTL_STOP_DEQUEUED_BIT)
2192#define JOBCTL_STOP_PENDING     (1UL << JOBCTL_STOP_PENDING_BIT)
2193#define JOBCTL_STOP_CONSUME     (1UL << JOBCTL_STOP_CONSUME_BIT)
2194#define JOBCTL_TRAP_STOP        (1UL << JOBCTL_TRAP_STOP_BIT)
2195#define JOBCTL_TRAP_NOTIFY      (1UL << JOBCTL_TRAP_NOTIFY_BIT)
2196#define JOBCTL_TRAPPING         (1UL << JOBCTL_TRAPPING_BIT)
2197#define JOBCTL_LISTENING        (1UL << JOBCTL_LISTENING_BIT)
2198
2199#define JOBCTL_TRAP_MASK        (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
2200#define JOBCTL_PENDING_MASK     (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
2201
2202extern bool task_set_jobctl_pending(struct task_struct *task,
2203                                    unsigned long mask);
2204extern void task_clear_jobctl_trapping(struct task_struct *task);
2205extern void task_clear_jobctl_pending(struct task_struct *task,
2206                                      unsigned long mask);
2207
2208static inline void rcu_copy_process(struct task_struct *p)
2209{
2210#ifdef CONFIG_PREEMPT_RCU
2211        p->rcu_read_lock_nesting = 0;
2212        p->rcu_read_unlock_special.s = 0;
2213        p->rcu_blocked_node = NULL;
2214        INIT_LIST_HEAD(&p->rcu_node_entry);
2215#endif /* #ifdef CONFIG_PREEMPT_RCU */
2216#ifdef CONFIG_TASKS_RCU
2217        p->rcu_tasks_holdout = false;
2218        INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
2219        p->rcu_tasks_idle_cpu = -1;
2220#endif /* #ifdef CONFIG_TASKS_RCU */
2221}
2222
2223static inline void tsk_restore_flags(struct task_struct *task,
2224                                unsigned long orig_flags, unsigned long flags)
2225{
2226        task->flags &= ~flags;
2227        task->flags |= orig_flags & flags;
2228}
2229
2230extern int cpuset_cpumask_can_shrink(const struct cpumask *cur,
2231                                     const struct cpumask *trial);
2232extern int task_can_attach(struct task_struct *p,
2233                           const struct cpumask *cs_cpus_allowed);
2234#ifdef CONFIG_SMP
2235extern void do_set_cpus_allowed(struct task_struct *p,
2236                               const struct cpumask *new_mask);
2237
2238extern int set_cpus_allowed_ptr(struct task_struct *p,
2239                                const struct cpumask *new_mask);
2240#else
2241static inline void do_set_cpus_allowed(struct task_struct *p,
2242                                      const struct cpumask *new_mask)
2243{
2244}
2245static inline int set_cpus_allowed_ptr(struct task_struct *p,
2246                                       const struct cpumask *new_mask)
2247{
2248        if (!cpumask_test_cpu(0, new_mask))
2249                return -EINVAL;
2250        return 0;
2251}
2252#endif
2253
2254#ifdef CONFIG_NO_HZ_COMMON
2255void calc_load_enter_idle(void);
2256void calc_load_exit_idle(void);
2257#else
2258static inline void calc_load_enter_idle(void) { }
2259static inline void calc_load_exit_idle(void) { }
2260#endif /* CONFIG_NO_HZ_COMMON */
2261
2262/*
2263 * Do not use outside of architecture code which knows its limitations.
2264 *
2265 * sched_clock() has no promise of monotonicity or bounded drift between
2266 * CPUs, use (which you should not) requires disabling IRQs.
2267 *
2268 * Please use one of the three interfaces below.
2269 */
2270extern unsigned long long notrace sched_clock(void);
2271/*
2272 * See the comment in kernel/sched/clock.c
2273 */
2274extern u64 cpu_clock(int cpu);
2275extern u64 local_clock(void);
2276extern u64 running_clock(void);
2277extern u64 sched_clock_cpu(int cpu);
2278
2279
2280extern void sched_clock_init(void);
2281
2282#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2283static inline void sched_clock_tick(void)
2284{
2285}
2286
2287static inline void sched_clock_idle_sleep_event(void)
2288{
2289}
2290
2291static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
2292{
2293}
2294#else
2295/*
2296 * Architectures can set this to 1 if they have specified
2297 * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
2298 * but then during bootup it turns out that sched_clock()
2299 * is reliable after all:
2300 */
2301extern int sched_clock_stable(void);
2302extern void set_sched_clock_stable(void);
2303extern void clear_sched_clock_stable(void);
2304
2305extern void sched_clock_tick(void);
2306extern void sched_clock_idle_sleep_event(void);
2307extern void sched_clock_idle_wakeup_event(u64 delta_ns);
2308#endif
2309
2310#ifdef CONFIG_IRQ_TIME_ACCOUNTING
2311/*
2312 * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
2313 * The reason for this explicit opt-in is not to have perf penalty with
2314 * slow sched_clocks.
2315 */
2316extern void enable_sched_clock_irqtime(void);
2317extern void disable_sched_clock_irqtime(void);
2318#else
2319static inline void enable_sched_clock_irqtime(void) {}
2320static inline void disable_sched_clock_irqtime(void) {}
2321#endif
2322
2323extern unsigned long long
2324task_sched_runtime(struct task_struct *task);
2325
2326/* sched_exec is called by processes performing an exec */
2327#ifdef CONFIG_SMP
2328extern void sched_exec(void);
2329#else
2330#define sched_exec()   {}
2331#endif
2332
2333extern void sched_clock_idle_sleep_event(void);
2334extern void sched_clock_idle_wakeup_event(u64 delta_ns);
2335
2336#ifdef CONFIG_HOTPLUG_CPU
2337extern void idle_task_exit(void);
2338#else
2339static inline void idle_task_exit(void) {}
2340#endif
2341
2342#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
2343extern void wake_up_nohz_cpu(int cpu);
2344#else
2345static inline void wake_up_nohz_cpu(int cpu) { }
2346#endif
2347
2348#ifdef CONFIG_NO_HZ_FULL
2349extern bool sched_can_stop_tick(void);
2350extern u64 scheduler_tick_max_deferment(void);
2351#else
2352static inline bool sched_can_stop_tick(void) { return false; }
2353#endif
2354
2355#ifdef CONFIG_SCHED_AUTOGROUP
2356extern void sched_autogroup_create_attach(struct task_struct *p);
2357extern void sched_autogroup_detach(struct task_struct *p);
2358extern void sched_autogroup_fork(struct signal_struct *sig);
2359extern void sched_autogroup_exit(struct signal_struct *sig);
2360#ifdef CONFIG_PROC_FS
2361extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
2362extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
2363#endif
2364#else
2365static inline void sched_autogroup_create_attach(struct task_struct *p) { }
2366static inline void sched_autogroup_detach(struct task_struct *p) { }
2367static inline void sched_autogroup_fork(struct signal_struct *sig) { }
2368static inline void sched_autogroup_exit(struct signal_struct *sig) { }
2369#endif
2370
2371extern int yield_to(struct task_struct *p, bool preempt);
2372extern void set_user_nice(struct task_struct *p, long nice);
2373extern int task_prio(const struct task_struct *p);
2374/**
2375 * task_nice - return the nice value of a given task.
2376 * @p: the task in question.
2377 *
2378 * Return: The nice value [ -20 ... 0 ... 19 ].
2379 */
2380static inline int task_nice(const struct task_struct *p)
2381{
2382        return PRIO_TO_NICE((p)->static_prio);
2383}
2384extern int can_nice(const struct task_struct *p, const int nice);
2385extern int task_curr(const struct task_struct *p);
2386extern int idle_cpu(int cpu);
2387extern int sched_setscheduler(struct task_struct *, int,
2388                              const struct sched_param *);
2389extern int sched_setscheduler_nocheck(struct task_struct *, int,
2390                                      const struct sched_param *);
2391extern int sched_setattr(struct task_struct *,
2392                         const struct sched_attr *);
2393extern struct task_struct *idle_task(int cpu);
2394/**
2395 * is_idle_task - is the specified task an idle task?
2396 * @p: the task in question.
2397 *
2398 * Return: 1 if @p is an idle task. 0 otherwise.
2399 */
2400static inline bool is_idle_task(const struct task_struct *p)
2401{
2402        return p->pid == 0;
2403}
2404extern struct task_struct *curr_task(int cpu);
2405extern void set_curr_task(int cpu, struct task_struct *p);
2406
2407void yield(void);
2408
2409union thread_union {
2410        struct thread_info thread_info;
2411        unsigned long stack[THREAD_SIZE/sizeof(long)];
2412};
2413
2414#ifndef __HAVE_ARCH_KSTACK_END
2415static inline int kstack_end(void *addr)
2416{
2417        /* Reliable end of stack detection:
2418         * Some APM bios versions misalign the stack
2419         */
2420        return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
2421}
2422#endif
2423
2424extern union thread_union init_thread_union;
2425extern struct task_struct init_task;
2426
2427extern struct   mm_struct init_mm;
2428
2429extern struct pid_namespace init_pid_ns;
2430
2431/*
2432 * find a task by one of its numerical ids
2433 *
2434 * find_task_by_pid_ns():
2435 *      finds a task by its pid in the specified namespace
2436 * find_task_by_vpid():
2437 *      finds a task by its virtual pid
2438 *
2439 * see also find_vpid() etc in include/linux/pid.h
2440 */
2441
2442extern struct task_struct *find_task_by_vpid(pid_t nr);
2443extern struct task_struct *find_task_by_pid_ns(pid_t nr,
2444                struct pid_namespace *ns);
2445
2446/* per-UID process charging. */
2447extern struct user_struct * alloc_uid(kuid_t);
2448static inline struct user_struct *get_uid(struct user_struct *u)
2449{
2450        atomic_inc(&u->__count);
2451        return u;
2452}
2453extern void free_uid(struct user_struct *);
2454
2455#include <asm/current.h>
2456
2457extern void xtime_update(unsigned long ticks);
2458
2459extern int wake_up_state(struct task_struct *tsk, unsigned int state);
2460extern int wake_up_process(struct task_struct *tsk);
2461extern void wake_up_new_task(struct task_struct *tsk);
2462#ifdef CONFIG_SMP
2463 extern void kick_process(struct task_struct *tsk);
2464#else
2465 static inline void kick_process(struct task_struct *tsk) { }
2466#endif
2467extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
2468extern void sched_dead(struct task_struct *p);
2469
2470extern void proc_caches_init(void);
2471extern void flush_signals(struct task_struct *);
2472extern void ignore_signals(struct task_struct *);
2473extern void flush_signal_handlers(struct task_struct *, int force_default);
2474extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
2475
2476static inline int kernel_dequeue_signal(siginfo_t *info)
2477{
2478        struct task_struct *tsk = current;
2479        siginfo_t __info;
2480        int ret;
2481
2482        spin_lock_irq(&tsk->sighand->siglock);
2483        ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info);
2484        spin_unlock_irq(&tsk->sighand->siglock);
2485
2486        return ret;
2487}
2488
2489static inline void kernel_signal_stop(void)
2490{
2491        spin_lock_irq(&current->sighand->siglock);
2492        if (current->jobctl & JOBCTL_STOP_DEQUEUED)
2493                __set_current_state(TASK_STOPPED);
2494        spin_unlock_irq(&current->sighand->siglock);
2495
2496        schedule();
2497}
2498
2499extern void release_task(struct task_struct * p);
2500extern int send_sig_info(int, struct siginfo *, struct task_struct *);
2501extern int force_sigsegv(int, struct task_struct *);
2502extern int force_sig_info(int, struct siginfo *, struct task_struct *);
2503extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
2504extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
2505extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *,
2506                                const struct cred *, u32);
2507extern int kill_pgrp(struct pid *pid, int sig, int priv);
2508extern int kill_pid(struct pid *pid, int sig, int priv);
2509extern int kill_proc_info(int, struct siginfo *, pid_t);
2510extern __must_check bool do_notify_parent(struct task_struct *, int);
2511extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
2512extern void force_sig(int, struct task_struct *);
2513extern int send_sig(int, struct task_struct *, int);
2514extern int zap_other_threads(struct task_struct *p);
2515extern struct sigqueue *sigqueue_alloc(void);
2516extern void sigqueue_free(struct sigqueue *);
2517extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
2518extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
2519
2520static inline void restore_saved_sigmask(void)
2521{
2522        if (test_and_clear_restore_sigmask())
2523                __set_current_blocked(&current->saved_sigmask);
2524}
2525
2526static inline sigset_t *sigmask_to_save(void)
2527{
2528        sigset_t *res = &current->blocked;
2529        if (unlikely(test_restore_sigmask()))
2530                res = &current->saved_sigmask;
2531        return res;
2532}
2533
2534static inline int kill_cad_pid(int sig, int priv)
2535{
2536        return kill_pid(cad_pid, sig, priv);
2537}
2538
2539/* These can be the second arg to send_sig_info/send_group_sig_info.  */
2540#define SEND_SIG_NOINFO ((struct siginfo *) 0)
2541#define SEND_SIG_PRIV   ((struct siginfo *) 1)
2542#define SEND_SIG_FORCED ((struct siginfo *) 2)
2543
2544/*
2545 * True if we are on the alternate signal stack.
2546 */
2547static inline int on_sig_stack(unsigned long sp)
2548{
2549#ifdef CONFIG_STACK_GROWSUP
2550        return sp >= current->sas_ss_sp &&
2551                sp - current->sas_ss_sp < current->sas_ss_size;
2552#else
2553        return sp > current->sas_ss_sp &&
2554                sp - current->sas_ss_sp <= current->sas_ss_size;
2555#endif
2556}
2557
2558static inline int sas_ss_flags(unsigned long sp)
2559{
2560        if (!current->sas_ss_size)
2561                return SS_DISABLE;
2562
2563        return on_sig_stack(sp) ? SS_ONSTACK : 0;
2564}
2565
2566static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
2567{
2568        if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp))
2569#ifdef CONFIG_STACK_GROWSUP
2570                return current->sas_ss_sp;
2571#else
2572                return current->sas_ss_sp + current->sas_ss_size;
2573#endif
2574        return sp;
2575}
2576
2577/*
2578 * Routines for handling mm_structs
2579 */
2580extern struct mm_struct * mm_alloc(void);
2581
2582/* mmdrop drops the mm and the page tables */
2583extern void __mmdrop(struct mm_struct *);
2584static inline void mmdrop(struct mm_struct * mm)
2585{
2586        if (unlikely(atomic_dec_and_test(&mm->mm_count)))
2587                __mmdrop(mm);
2588}
2589
2590/* mmput gets rid of the mappings and all user-space */
2591extern void mmput(struct mm_struct *);
2592/* Grab a reference to a task's mm, if it is not already going away */
2593extern struct mm_struct *get_task_mm(struct task_struct *task);
2594/*
2595 * Grab a reference to a task's mm, if it is not already going away
2596 * and ptrace_may_access with the mode parameter passed to it
2597 * succeeds.
2598 */
2599extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
2600/* Remove the current tasks stale references to the old mm_struct */
2601extern void mm_release(struct task_struct *, struct mm_struct *);
2602
2603#ifdef CONFIG_HAVE_COPY_THREAD_TLS
2604extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
2605                        struct task_struct *, unsigned long);
2606#else
2607extern int copy_thread(unsigned long, unsigned long, unsigned long,
2608                        struct task_struct *);
2609
2610/* Architectures that haven't opted into copy_thread_tls get the tls argument
2611 * via pt_regs, so ignore the tls argument passed via C. */
2612static inline int copy_thread_tls(
2613                unsigned long clone_flags, unsigned long sp, unsigned long arg,
2614                struct task_struct *p, unsigned long tls)
2615{
2616        return copy_thread(clone_flags, sp, arg, p);
2617}
2618#endif
2619extern void flush_thread(void);
2620extern void exit_thread(void);
2621
2622extern void exit_files(struct task_struct *);
2623extern void __cleanup_sighand(struct sighand_struct *);
2624
2625extern void exit_itimers(struct signal_struct *);
2626extern void flush_itimer_signals(void);
2627
2628extern void do_group_exit(int);
2629
2630extern int do_execve(struct filename *,
2631                     const char __user * const __user *,
2632                     const char __user * const __user *);
2633extern int do_execveat(int, struct filename *,
2634                       const char __user * const __user *,
2635                       const char __user * const __user *,
2636                       int);
2637extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
2638extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
2639struct task_struct *fork_idle(int);
2640extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
2641
2642extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
2643static inline void set_task_comm(struct task_struct *tsk, const char *from)
2644{
2645        __set_task_comm(tsk, from, false);
2646}
2647extern char *get_task_comm(char *to, struct task_struct *tsk);
2648
2649#ifdef CONFIG_SMP
2650void scheduler_ipi(void);
2651extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
2652#else
2653static inline void scheduler_ipi(void) { }
2654static inline unsigned long wait_task_inactive(struct task_struct *p,
2655                                               long match_state)
2656{
2657        return 1;
2658}
2659#endif
2660
2661#define tasklist_empty() \
2662        list_empty(&init_task.tasks)
2663
2664#define next_task(p) \
2665        list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
2666
2667#define for_each_process(p) \
2668        for (p = &init_task ; (p = next_task(p)) != &init_task ; )
2669
2670extern bool current_is_single_threaded(void);
2671
2672/*
2673 * Careful: do_each_thread/while_each_thread is a double loop so
2674 *          'break' will not work as expected - use goto instead.
2675 */
2676#define do_each_thread(g, t) \
2677        for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
2678
2679#define while_each_thread(g, t) \
2680        while ((t = next_thread(t)) != g)
2681
2682#define __for_each_thread(signal, t)    \
2683        list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
2684
2685#define for_each_thread(p, t)           \
2686        __for_each_thread((p)->signal, t)
2687
2688/* Careful: this is a double loop, 'break' won't work as expected. */
2689#define for_each_process_thread(p, t)   \
2690        for_each_process(p) for_each_thread(p, t)
2691
2692static inline int get_nr_threads(struct task_struct *tsk)
2693{
2694        return tsk->signal->nr_threads;
2695}
2696
2697static inline bool thread_group_leader(struct task_struct *p)
2698{
2699        return p->exit_signal >= 0;
2700}
2701
2702/* Do to the insanities of de_thread it is possible for a process
2703 * to have the pid of the thread group leader without actually being
2704 * the thread group leader.  For iteration through the pids in proc
2705 * all we care about is that we have a task with the appropriate
2706 * pid, we don't actually care if we have the right task.
2707 */
2708static inline bool has_group_leader_pid(struct task_struct *p)
2709{
2710        return task_pid(p) == p->signal->leader_pid;
2711}
2712
2713static inline
2714bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
2715{
2716        return p1->signal == p2->signal;
2717}
2718
2719static inline struct task_struct *next_thread(const struct task_struct *p)
2720{
2721        return list_entry_rcu(p->thread_group.next,
2722                              struct task_struct, thread_group);
2723}
2724
2725static inline int thread_group_empty(struct task_struct *p)
2726{
2727        return list_empty(&p->thread_group);
2728}
2729
2730#define delay_group_leader(p) \
2731                (thread_group_leader(p) && !thread_group_empty(p))
2732
2733/*
2734 * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
2735 * subscriptions and synchronises with wait4().  Also used in procfs.  Also
2736 * pins the final release of task.io_context.  Also protects ->cpuset and
2737 * ->cgroup.subsys[]. And ->vfork_done.
2738 *
2739 * Nests both inside and outside of read_lock(&tasklist_lock).
2740 * It must not be nested with write_lock_irq(&tasklist_lock),
2741 * neither inside nor outside.
2742 */
2743static inline void task_lock(struct task_struct *p)
2744{
2745        spin_lock(&p->alloc_lock);
2746}
2747
2748static inline void task_unlock(struct task_struct *p)
2749{
2750        spin_unlock(&p->alloc_lock);
2751}
2752
2753extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
2754                                                        unsigned long *flags);
2755
2756static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
2757                                                       unsigned long *flags)
2758{
2759        struct sighand_struct *ret;
2760
2761        ret = __lock_task_sighand(tsk, flags);
2762        (void)__cond_lock(&tsk->sighand->siglock, ret);
2763        return ret;
2764}
2765
2766static inline void unlock_task_sighand(struct task_struct *tsk,
2767                                                unsigned long *flags)
2768{
2769        spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
2770}
2771
2772/**
2773 * threadgroup_change_begin - mark the beginning of changes to a threadgroup
2774 * @tsk: task causing the changes
2775 *
2776 * All operations which modify a threadgroup - a new thread joining the
2777 * group, death of a member thread (the assertion of PF_EXITING) and
2778 * exec(2) dethreading the process and replacing the leader - are wrapped
2779 * by threadgroup_change_{begin|end}().  This is to provide a place which
2780 * subsystems needing threadgroup stability can hook into for
2781 * synchronization.
2782 */
2783static inline void threadgroup_change_begin(struct task_struct *tsk)
2784{
2785        might_sleep();
2786        cgroup_threadgroup_change_begin(tsk);
2787}
2788
2789/**
2790 * threadgroup_change_end - mark the end of changes to a threadgroup
2791 * @tsk: task causing the changes
2792 *
2793 * See threadgroup_change_begin().
2794 */
2795static inline void threadgroup_change_end(struct task_struct *tsk)
2796{
2797        cgroup_threadgroup_change_end(tsk);
2798}
2799
2800#ifndef __HAVE_THREAD_FUNCTIONS
2801
2802#define task_thread_info(task)  ((struct thread_info *)(task)->stack)
2803#define task_stack_page(task)   ((task)->stack)
2804
2805static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
2806{
2807        *task_thread_info(p) = *task_thread_info(org);
2808        task_thread_info(p)->task = p;
2809}
2810
2811/*
2812 * Return the address of the last usable long on the stack.
2813 *
2814 * When the stack grows down, this is just above the thread
2815 * info struct. Going any lower will corrupt the threadinfo.
2816 *
2817 * When the stack grows up, this is the highest address.
2818 * Beyond that position, we corrupt data on the next page.
2819 */
2820static inline unsigned long *end_of_stack(struct task_struct *p)
2821{
2822#ifdef CONFIG_STACK_GROWSUP
2823        return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1;
2824#else
2825        return (unsigned long *)(task_thread_info(p) + 1);
2826#endif
2827}
2828
2829#endif
2830#define task_stack_end_corrupted(task) \
2831                (*(end_of_stack(task)) != STACK_END_MAGIC)
2832
2833static inline int object_is_on_stack(void *obj)
2834{
2835        void *stack = task_stack_page(current);
2836
2837        return (obj >= stack) && (obj < (stack + THREAD_SIZE));
2838}
2839
2840extern void thread_info_cache_init(void);
2841
2842#ifdef CONFIG_DEBUG_STACK_USAGE
2843static inline unsigned long stack_not_used(struct task_struct *p)
2844{
2845        unsigned long *n = end_of_stack(p);
2846
2847        do {    /* Skip over canary */
2848                n++;
2849        } while (!*n);
2850
2851        return (unsigned long)n - (unsigned long)end_of_stack(p);
2852}
2853#endif
2854extern void set_task_stack_end_magic(struct task_struct *tsk);
2855
2856/* set thread flags in other task's structures
2857 * - see asm/thread_info.h for TIF_xxxx flags available
2858 */
2859static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
2860{
2861        set_ti_thread_flag(task_thread_info(tsk), flag);
2862}
2863
2864static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
2865{
2866        clear_ti_thread_flag(task_thread_info(tsk), flag);
2867}
2868
2869static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
2870{
2871        return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
2872}
2873
2874static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
2875{
2876        return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
2877}
2878
2879static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
2880{
2881        return test_ti_thread_flag(task_thread_info(tsk), flag);
2882}
2883
2884static inline void set_tsk_need_resched(struct task_struct *tsk)
2885{
2886        set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
2887}
2888
2889static inline void clear_tsk_need_resched(struct task_struct *tsk)
2890{
2891        clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
2892}
2893
2894static inline int test_tsk_need_resched(struct task_struct *tsk)
2895{
2896        return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
2897}
2898
2899static inline int restart_syscall(void)
2900{
2901        set_tsk_thread_flag(current, TIF_SIGPENDING);
2902        return -ERESTARTNOINTR;
2903}
2904
2905static inline int signal_pending(struct task_struct *p)
2906{
2907        return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
2908}
2909
2910static inline int __fatal_signal_pending(struct task_struct *p)
2911{
2912        return unlikely(sigismember(&p->pending.signal, SIGKILL));
2913}
2914
2915static inline int fatal_signal_pending(struct task_struct *p)
2916{
2917        return signal_pending(p) && __fatal_signal_pending(p);
2918}
2919
2920static inline int signal_pending_state(long state, struct task_struct *p)
2921{
2922        if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
2923                return 0;
2924        if (!signal_pending(p))
2925                return 0;
2926
2927        return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
2928}
2929
2930/*
2931 * cond_resched() and cond_resched_lock(): latency reduction via
2932 * explicit rescheduling in places that are safe. The return
2933 * value indicates whether a reschedule was done in fact.
2934 * cond_resched_lock() will drop the spinlock before scheduling,
2935 * cond_resched_softirq() will enable bhs before scheduling.
2936 */
2937extern int _cond_resched(void);
2938
2939#define cond_resched() ({                       \
2940        ___might_sleep(__FILE__, __LINE__, 0);  \
2941        _cond_resched();                        \
2942})
2943
2944extern int __cond_resched_lock(spinlock_t *lock);
2945
2946#define cond_resched_lock(lock) ({                              \
2947        ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
2948        __cond_resched_lock(lock);                              \
2949})
2950
2951extern int __cond_resched_softirq(void);
2952
2953#define cond_resched_softirq() ({                                       \
2954        ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);     \
2955        __cond_resched_softirq();                                       \
2956})
2957
2958static inline void cond_resched_rcu(void)
2959{
2960#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
2961        rcu_read_unlock();
2962        cond_resched();
2963        rcu_read_lock();
2964#endif
2965}
2966
2967/*
2968 * Does a critical section need to be broken due to another
2969 * task waiting?: (technically does not depend on CONFIG_PREEMPT,
2970 * but a general need for low latency)
2971 */
2972static inline int spin_needbreak(spinlock_t *lock)
2973{
2974#ifdef CONFIG_PREEMPT
2975        return spin_is_contended(lock);
2976#else
2977        return 0;
2978#endif
2979}
2980
2981/*
2982 * Idle thread specific functions to determine the need_resched
2983 * polling state.
2984 */
2985#ifdef TIF_POLLING_NRFLAG
2986static inline int tsk_is_polling(struct task_struct *p)
2987{
2988        return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
2989}
2990
2991static inline void __current_set_polling(void)
2992{
2993        set_thread_flag(TIF_POLLING_NRFLAG);
2994}
2995
2996static inline bool __must_check current_set_polling_and_test(void)
2997{
2998        __current_set_polling();
2999
3000        /*
3001         * Polling state must be visible before we test NEED_RESCHED,
3002         * paired by resched_curr()
3003         */
3004        smp_mb__after_atomic();
3005
3006        return unlikely(tif_need_resched());
3007}
3008
3009static inline void __current_clr_polling(void)
3010{
3011        clear_thread_flag(TIF_POLLING_NRFLAG);
3012}
3013
3014static inline bool __must_check current_clr_polling_and_test(void)
3015{
3016        __current_clr_polling();
3017
3018        /*
3019         * Polling state must be visible before we test NEED_RESCHED,
3020         * paired by resched_curr()
3021         */
3022        smp_mb__after_atomic();
3023
3024        return unlikely(tif_need_resched());
3025}
3026
3027#else
3028static inline int tsk_is_polling(struct task_struct *p) { return 0; }
3029static inline void __current_set_polling(void) { }
3030static inline void __current_clr_polling(void) { }
3031
3032static inline bool __must_check current_set_polling_and_test(void)
3033{
3034        return unlikely(tif_need_resched());
3035}
3036static inline bool __must_check current_clr_polling_and_test(void)
3037{
3038        return unlikely(tif_need_resched());
3039}
3040#endif
3041
3042static inline void current_clr_polling(void)
3043{
3044        __current_clr_polling();
3045
3046        /*
3047         * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
3048         * Once the bit is cleared, we'll get IPIs with every new
3049         * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
3050         * fold.
3051         */
3052        smp_mb(); /* paired with resched_curr() */
3053
3054        preempt_fold_need_resched();
3055}
3056
3057static __always_inline bool need_resched(void)
3058{
3059        return unlikely(tif_need_resched());
3060}
3061
3062/*
3063 * Thread group CPU time accounting.
3064 */
3065void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
3066void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
3067
3068/*
3069 * Reevaluate whether the task has signals pending delivery.
3070 * Wake the task if so.
3071 * This is required every time the blocked sigset_t changes.
3072 * callers must hold sighand->siglock.
3073 */
3074extern void recalc_sigpending_and_wake(struct task_struct *t);
3075extern void recalc_sigpending(void);
3076
3077extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
3078
3079static inline void signal_wake_up(struct task_struct *t, bool resume)
3080{
3081        signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
3082}
3083static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
3084{
3085        signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
3086}
3087
3088/*
3089 * Wrappers for p->thread_info->cpu access. No-op on UP.
3090 */
3091#ifdef CONFIG_SMP
3092
3093static inline unsigned int task_cpu(const struct task_struct *p)
3094{
3095        return task_thread_info(p)->cpu;
3096}
3097
3098static inline int task_node(const struct task_struct *p)
3099{
3100        return cpu_to_node(task_cpu(p));
3101}
3102
3103extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
3104
3105#else
3106
3107static inline unsigned int task_cpu(const struct task_struct *p)
3108{
3109        return 0;
3110}
3111
3112static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
3113{
3114}
3115
3116#endif /* CONFIG_SMP */
3117
3118extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
3119extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
3120
3121#ifdef CONFIG_CGROUP_SCHED
3122extern struct task_group root_task_group;
3123#endif /* CONFIG_CGROUP_SCHED */
3124
3125extern int task_can_switch_user(struct user_struct *up,
3126                                        struct task_struct *tsk);
3127
3128#ifdef CONFIG_TASK_XACCT
3129static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
3130{
3131        tsk->ioac.rchar += amt;
3132}
3133
3134static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
3135{
3136        tsk->ioac.wchar += amt;
3137}
3138
3139static inline void inc_syscr(struct task_struct *tsk)
3140{
3141        tsk->ioac.syscr++;
3142}
3143
3144static inline void inc_syscw(struct task_struct *tsk)
3145{
3146        tsk->ioac.syscw++;
3147}
3148#else
3149static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
3150{
3151}
3152
3153static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
3154{
3155}
3156
3157static inline void inc_syscr(struct task_struct *tsk)
3158{
3159}
3160
3161static inline void inc_syscw(struct task_struct *tsk)
3162{
3163}
3164#endif
3165
3166#ifndef TASK_SIZE_OF
3167#define TASK_SIZE_OF(tsk)       TASK_SIZE
3168#endif
3169
3170#ifdef CONFIG_MEMCG
3171extern void mm_update_next_owner(struct mm_struct *mm);
3172#else
3173static inline void mm_update_next_owner(struct mm_struct *mm)
3174{
3175}
3176#endif /* CONFIG_MEMCG */
3177
3178static inline unsigned long task_rlimit(const struct task_struct *tsk,
3179                unsigned int limit)
3180{
3181        return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
3182}
3183
3184static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
3185                unsigned int limit)
3186{
3187        return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
3188}
3189
3190static inline unsigned long rlimit(unsigned int limit)
3191{
3192        return task_rlimit(current, limit);
3193}
3194
3195static inline unsigned long rlimit_max(unsigned int limit)
3196{
3197        return task_rlimit_max(current, limit);
3198}
3199
3200#endif
Note: See TracBrowser for help on using the repository browser.