7875 lines
235 KiB
Diff
7875 lines
235 KiB
Diff
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
|
|
index e24a9820e12fa..ba2cf1cec3d9c 100644
|
|
--- a/arch/arm/Kconfig
|
|
+++ b/arch/arm/Kconfig
|
|
@@ -34,6 +34,7 @@ config ARM
|
|
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
|
|
select ARCH_SUPPORTS_ATOMIC_RMW
|
|
select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
|
|
+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
|
|
select ARCH_USE_BUILTIN_BSWAP
|
|
select ARCH_USE_CMPXCHG_LOCKREF
|
|
select ARCH_USE_MEMTEST
|
|
@@ -72,7 +73,7 @@ config ARM
|
|
select HARDIRQS_SW_RESEND
|
|
select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
|
|
select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
|
|
- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
|
|
+ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT
|
|
select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL
|
|
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
|
|
select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
|
|
@@ -117,6 +118,8 @@ config ARM
|
|
select HAVE_PERF_EVENTS
|
|
select HAVE_PERF_REGS
|
|
select HAVE_PERF_USER_STACK_DUMP
|
|
+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
|
|
+ select HAVE_PREEMPT_LAZY
|
|
select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
|
|
select HAVE_REGS_AND_STACK_ACCESS_API
|
|
select HAVE_RSEQ
|
|
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
|
|
index 7f092cb55a417..ffcbf8ebed4bf 100644
|
|
--- a/arch/arm/include/asm/thread_info.h
|
|
+++ b/arch/arm/include/asm/thread_info.h
|
|
@@ -62,6 +62,7 @@ struct cpu_context_save {
|
|
struct thread_info {
|
|
unsigned long flags; /* low level flags */
|
|
int preempt_count; /* 0 => preemptable, <0 => bug */
|
|
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
|
|
__u32 cpu; /* cpu */
|
|
__u32 cpu_domain; /* cpu domain */
|
|
struct cpu_context_save cpu_context; /* cpu context */
|
|
@@ -129,6 +130,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
|
|
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
|
|
#define TIF_UPROBE 3 /* breakpointed or singlestepping */
|
|
#define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */
|
|
+#define TIF_NEED_RESCHED_LAZY 5
|
|
|
|
#define TIF_USING_IWMMXT 17
|
|
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
|
|
@@ -148,6 +150,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
|
|
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
|
|
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
|
|
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
|
|
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
|
|
#define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT)
|
|
|
|
/* Checks for any syscall work in entry-common.S */
|
|
@@ -157,7 +160,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
|
|
/*
|
|
* Change these and you break ASM code in entry-common.S
|
|
*/
|
|
-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
|
|
+#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
|
|
+ _TIF_SIGPENDING | \
|
|
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
|
|
_TIF_NOTIFY_SIGNAL)
|
|
|
|
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
|
|
index 38121c59cbc26..c6fafd53d5bea 100644
|
|
--- a/arch/arm/kernel/asm-offsets.c
|
|
+++ b/arch/arm/kernel/asm-offsets.c
|
|
@@ -43,6 +43,7 @@ int main(void)
|
|
BLANK();
|
|
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
|
|
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
|
|
+ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
|
|
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
|
|
DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain));
|
|
DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context));
|
|
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
|
|
index c39303e5c2347..cfb4660e9feab 100644
|
|
--- a/arch/arm/kernel/entry-armv.S
|
|
+++ b/arch/arm/kernel/entry-armv.S
|
|
@@ -222,11 +222,18 @@ ENDPROC(__dabt_svc)
|
|
|
|
#ifdef CONFIG_PREEMPTION
|
|
ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
|
|
- ldr r0, [tsk, #TI_FLAGS] @ get flags
|
|
teq r8, #0 @ if preempt count != 0
|
|
+ bne 1f @ return from exeption
|
|
+ ldr r0, [tsk, #TI_FLAGS] @ get flags
|
|
+ tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
|
|
+ blne svc_preempt @ preempt!
|
|
+
|
|
+ ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
|
|
+ teq r8, #0 @ if preempt lazy count != 0
|
|
movne r0, #0 @ force flags to 0
|
|
- tst r0, #_TIF_NEED_RESCHED
|
|
+ tst r0, #_TIF_NEED_RESCHED_LAZY
|
|
blne svc_preempt
|
|
+1:
|
|
#endif
|
|
|
|
svc_exit r5, irq = 1 @ return from exception
|
|
@@ -241,8 +248,14 @@ ENDPROC(__irq_svc)
|
|
1: bl preempt_schedule_irq @ irq en/disable is done inside
|
|
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
|
|
tst r0, #_TIF_NEED_RESCHED
|
|
+ bne 1b
|
|
+ tst r0, #_TIF_NEED_RESCHED_LAZY
|
|
reteq r8 @ go again
|
|
- b 1b
|
|
+ ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
|
|
+ teq r0, #0 @ if preempt lazy count != 0
|
|
+ beq 1b
|
|
+ ret r8 @ go again
|
|
+
|
|
#endif
|
|
|
|
__und_fault:
|
|
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
|
|
index e07f359254c3c..b50a3248e79f3 100644
|
|
--- a/arch/arm/kernel/signal.c
|
|
+++ b/arch/arm/kernel/signal.c
|
|
@@ -607,7 +607,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
|
|
*/
|
|
trace_hardirqs_off();
|
|
do {
|
|
- if (likely(thread_flags & _TIF_NEED_RESCHED)) {
|
|
+ if (likely(thread_flags & (_TIF_NEED_RESCHED |
|
|
+ _TIF_NEED_RESCHED_LAZY))) {
|
|
schedule();
|
|
} else {
|
|
if (unlikely(!user_mode(regs)))
|
|
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
|
|
index 2418f1efabd87..79ab2138ab0a8 100644
|
|
--- a/arch/arm/mm/fault.c
|
|
+++ b/arch/arm/mm/fault.c
|
|
@@ -436,6 +436,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
|
|
if (addr < TASK_SIZE)
|
|
return do_page_fault(addr, fsr, regs);
|
|
|
|
+ if (interrupts_enabled(regs))
|
|
+ local_irq_enable();
|
|
+
|
|
if (user_mode(regs))
|
|
goto bad_area;
|
|
|
|
@@ -506,6 +509,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
|
|
static int
|
|
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
|
|
{
|
|
+ if (interrupts_enabled(regs))
|
|
+ local_irq_enable();
|
|
+
|
|
do_bad_area(addr, fsr, regs);
|
|
return 0;
|
|
}
|
|
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
|
|
index 1023e896d46b8..29fcf54cf68ad 100644
|
|
--- a/arch/arm64/Kconfig
|
|
+++ b/arch/arm64/Kconfig
|
|
@@ -95,6 +95,7 @@ config ARM64
|
|
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
|
|
select ARCH_SUPPORTS_NUMA_BALANCING
|
|
select ARCH_SUPPORTS_PAGE_TABLE_CHECK
|
|
+ select ARCH_SUPPORTS_RT
|
|
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
|
|
select ARCH_WANT_DEFAULT_BPF_JIT
|
|
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
|
|
@@ -207,6 +208,7 @@ config ARM64
|
|
select HAVE_PERF_USER_STACK_DUMP
|
|
select HAVE_PREEMPT_DYNAMIC_KEY
|
|
select HAVE_REGS_AND_STACK_ACCESS_API
|
|
+ select HAVE_PREEMPT_LAZY
|
|
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
|
|
select HAVE_FUNCTION_ARG_ACCESS_API
|
|
select MMU_GATHER_RCU_TABLE_FREE
|
|
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
|
|
index 0159b625cc7f0..a5486918e5eeb 100644
|
|
--- a/arch/arm64/include/asm/preempt.h
|
|
+++ b/arch/arm64/include/asm/preempt.h
|
|
@@ -71,13 +71,36 @@ static inline bool __preempt_count_dec_and_test(void)
|
|
* interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
|
|
* pair.
|
|
*/
|
|
- return !pc || !READ_ONCE(ti->preempt_count);
|
|
+ if (!pc || !READ_ONCE(ti->preempt_count))
|
|
+ return true;
|
|
+#ifdef CONFIG_PREEMPT_LAZY
|
|
+ if ((pc & ~PREEMPT_NEED_RESCHED))
|
|
+ return false;
|
|
+ if (current_thread_info()->preempt_lazy_count)
|
|
+ return false;
|
|
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
|
|
+#else
|
|
+ return false;
|
|
+#endif
|
|
}
|
|
|
|
static inline bool should_resched(int preempt_offset)
|
|
{
|
|
+#ifdef CONFIG_PREEMPT_LAZY
|
|
+ u64 pc = READ_ONCE(current_thread_info()->preempt_count);
|
|
+ if (pc == preempt_offset)
|
|
+ return true;
|
|
+
|
|
+ if ((pc & ~PREEMPT_NEED_RESCHED) != preempt_offset)
|
|
+ return false;
|
|
+
|
|
+ if (current_thread_info()->preempt_lazy_count)
|
|
+ return false;
|
|
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
|
|
+#else
|
|
u64 pc = READ_ONCE(current_thread_info()->preempt_count);
|
|
return pc == preempt_offset;
|
|
+#endif
|
|
}
|
|
|
|
#ifdef CONFIG_PREEMPTION
|
|
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
|
|
index 848739c15de82..4b7148fd5551f 100644
|
|
--- a/arch/arm64/include/asm/thread_info.h
|
|
+++ b/arch/arm64/include/asm/thread_info.h
|
|
@@ -26,6 +26,7 @@ struct thread_info {
|
|
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
|
|
u64 ttbr0; /* saved TTBR0_EL1 */
|
|
#endif
|
|
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
|
|
union {
|
|
u64 preempt_count; /* 0 => preemptible, <0 => bug */
|
|
struct {
|
|
@@ -68,6 +69,7 @@ int arch_dup_task_struct(struct task_struct *dst,
|
|
#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
|
|
#define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */
|
|
#define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */
|
|
+#define TIF_NEED_RESCHED_LAZY 7
|
|
#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
|
|
#define TIF_SYSCALL_AUDIT 9 /* syscall auditing */
|
|
#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
|
|
@@ -100,8 +102,10 @@ int arch_dup_task_struct(struct task_struct *dst,
|
|
#define _TIF_SVE (1 << TIF_SVE)
|
|
#define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT)
|
|
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
|
|
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
|
|
|
|
-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
|
|
+#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
|
|
+ _TIF_SIGPENDING | \
|
|
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
|
|
_TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
|
|
_TIF_NOTIFY_SIGNAL)
|
|
@@ -110,6 +114,8 @@ int arch_dup_task_struct(struct task_struct *dst,
|
|
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
|
|
_TIF_SYSCALL_EMU)
|
|
|
|
+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
|
|
+
|
|
#ifdef CONFIG_SHADOW_CALL_STACK
|
|
#define INIT_SCS \
|
|
.scs_base = init_shadow_call_stack, \
|
|
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
|
|
index ae345b06e9f7e..da7e10ff11a81 100644
|
|
--- a/arch/arm64/kernel/asm-offsets.c
|
|
+++ b/arch/arm64/kernel/asm-offsets.c
|
|
@@ -33,6 +33,7 @@ int main(void)
|
|
DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu));
|
|
DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
|
|
DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
|
|
+ DEFINE(TSK_TI_PREEMPT_LAZY, offsetof(struct task_struct, thread_info.preempt_lazy_count));
|
|
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
|
|
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
|
|
#endif
|
|
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
|
|
index 06a02707f4882..e7f5284d5daf1 100644
|
|
--- a/arch/arm64/kernel/signal.c
|
|
+++ b/arch/arm64/kernel/signal.c
|
|
@@ -1278,7 +1278,7 @@ static void do_signal(struct pt_regs *regs)
|
|
void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
|
|
{
|
|
do {
|
|
- if (thread_flags & _TIF_NEED_RESCHED) {
|
|
+ if (thread_flags & _TIF_NEED_RESCHED_MASK) {
|
|
/* Unmask Debug and SError for the next task */
|
|
local_daif_restore(DAIF_PROCCTX_NOIRQ);
|
|
|
|
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
|
|
index a6c4407d3ec83..25f98b854d32f 100644
|
|
--- a/arch/powerpc/Kconfig
|
|
+++ b/arch/powerpc/Kconfig
|
|
@@ -154,6 +154,7 @@ config PPC
|
|
select ARCH_STACKWALK
|
|
select ARCH_SUPPORTS_ATOMIC_RMW
|
|
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x
|
|
+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
|
|
select ARCH_USE_BUILTIN_BSWAP
|
|
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
|
|
select ARCH_USE_MEMTEST
|
|
@@ -247,8 +248,10 @@ config PPC
|
|
select HAVE_PERF_EVENTS_NMI if PPC64
|
|
select HAVE_PERF_REGS
|
|
select HAVE_PERF_USER_STACK_DUMP
|
|
+ select HAVE_PREEMPT_LAZY
|
|
select HAVE_REGS_AND_STACK_ACCESS_API
|
|
select HAVE_RELIABLE_STACKTRACE
|
|
+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
|
|
select HAVE_RSEQ
|
|
select HAVE_SETUP_PER_CPU_AREA if PPC64
|
|
select HAVE_SOFTIRQ_ON_OWN_STACK
|
|
diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h
|
|
index 283c346478565..4727f40052ddd 100644
|
|
--- a/arch/powerpc/include/asm/stackprotector.h
|
|
+++ b/arch/powerpc/include/asm/stackprotector.h
|
|
@@ -19,8 +19,13 @@
|
|
*/
|
|
static __always_inline void boot_init_stack_canary(void)
|
|
{
|
|
- unsigned long canary = get_random_canary();
|
|
+ unsigned long canary;
|
|
|
|
+#ifndef CONFIG_PREEMPT_RT
|
|
+ canary = get_random_canary();
|
|
+#else
|
|
+ canary = ((unsigned long)&canary) & CANARY_MASK;
|
|
+#endif
|
|
current->stack_canary = canary;
|
|
#ifdef CONFIG_PPC64
|
|
get_paca()->canary = canary;
|
|
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
|
|
index af58f1ed3952e..520864de8bb27 100644
|
|
--- a/arch/powerpc/include/asm/thread_info.h
|
|
+++ b/arch/powerpc/include/asm/thread_info.h
|
|
@@ -53,6 +53,8 @@
|
|
struct thread_info {
|
|
int preempt_count; /* 0 => preemptable,
|
|
<0 => BUG */
|
|
+ int preempt_lazy_count; /* 0 => preemptable,
|
|
+ <0 => BUG */
|
|
#ifdef CONFIG_SMP
|
|
unsigned int cpu;
|
|
#endif
|
|
@@ -77,6 +79,7 @@ struct thread_info {
|
|
#define INIT_THREAD_INFO(tsk) \
|
|
{ \
|
|
.preempt_count = INIT_PREEMPT_COUNT, \
|
|
+ .preempt_lazy_count = 0, \
|
|
.flags = 0, \
|
|
}
|
|
|
|
@@ -102,6 +105,7 @@ void arch_setup_new_exec(void);
|
|
#define TIF_PATCH_PENDING 6 /* pending live patching update */
|
|
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
|
|
#define TIF_SINGLESTEP 8 /* singlestepping active */
|
|
+#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
|
|
#define TIF_SECCOMP 10 /* secure computing */
|
|
#define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */
|
|
#define TIF_NOERROR 12 /* Force successful syscall return */
|
|
@@ -117,6 +121,7 @@ void arch_setup_new_exec(void);
|
|
#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */
|
|
#define TIF_32BIT 20 /* 32 bit binary */
|
|
|
|
+
|
|
/* as above, but as bit values */
|
|
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
|
|
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
|
|
@@ -128,6 +133,7 @@ void arch_setup_new_exec(void);
|
|
#define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING)
|
|
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
|
|
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
|
|
+#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
|
|
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
|
|
#define _TIF_RESTOREALL (1<<TIF_RESTOREALL)
|
|
#define _TIF_NOERROR (1<<TIF_NOERROR)
|
|
@@ -141,10 +147,12 @@ void arch_setup_new_exec(void);
|
|
_TIF_SYSCALL_EMU)
|
|
|
|
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
|
|
+ _TIF_NEED_RESCHED_LAZY | \
|
|
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
|
|
_TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
|
|
_TIF_NOTIFY_SIGNAL)
|
|
#define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
|
|
+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
|
|
|
|
/* Bits in local_flags */
|
|
/* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
|
|
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
|
|
index 0ec1581619db5..e333feb12c984 100644
|
|
--- a/arch/powerpc/kernel/interrupt.c
|
|
+++ b/arch/powerpc/kernel/interrupt.c
|
|
@@ -186,7 +186,7 @@ interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
|
|
ti_flags = read_thread_flags();
|
|
while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
|
|
local_irq_enable();
|
|
- if (ti_flags & _TIF_NEED_RESCHED) {
|
|
+ if (ti_flags & _TIF_NEED_RESCHED_MASK) {
|
|
schedule();
|
|
} else {
|
|
/*
|
|
@@ -398,11 +398,15 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
|
|
/* Returning to a kernel context with local irqs enabled. */
|
|
WARN_ON_ONCE(!(regs->msr & MSR_EE));
|
|
again:
|
|
- if (IS_ENABLED(CONFIG_PREEMPT)) {
|
|
+ if (IS_ENABLED(CONFIG_PREEMPTION)) {
|
|
/* Return to preemptible kernel context */
|
|
if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) {
|
|
if (preempt_count() == 0)
|
|
preempt_schedule_irq();
|
|
+ } else if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED_LAZY)) {
|
|
+ if ((preempt_count() == 0) &&
|
|
+ (current_thread_info()->preempt_lazy_count == 0))
|
|
+ preempt_schedule_irq();
|
|
}
|
|
}
|
|
|
|
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
|
|
index 9bdd79aa51cfc..038f8355b29ca 100644
|
|
--- a/arch/powerpc/kernel/traps.c
|
|
+++ b/arch/powerpc/kernel/traps.c
|
|
@@ -261,12 +261,17 @@ static char *get_mmu_str(void)
|
|
|
|
static int __die(const char *str, struct pt_regs *regs, long err)
|
|
{
|
|
+ const char *pr = "";
|
|
+
|
|
printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
|
|
|
|
+ if (IS_ENABLED(CONFIG_PREEMPTION))
|
|
+ pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
|
|
+
|
|
printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
|
|
IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
|
|
PAGE_SIZE / 1024, get_mmu_str(),
|
|
- IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
|
|
+ pr,
|
|
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
|
|
IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
|
|
debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
|
|
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
|
|
index a9f57dad6d916..a0b528d4bb7cd 100644
|
|
--- a/arch/powerpc/kvm/Kconfig
|
|
+++ b/arch/powerpc/kvm/Kconfig
|
|
@@ -225,6 +225,7 @@ config KVM_E500MC
|
|
config KVM_MPIC
|
|
bool "KVM in-kernel MPIC emulation"
|
|
depends on KVM && PPC_E500
|
|
+ depends on !PREEMPT_RT
|
|
select HAVE_KVM_IRQCHIP
|
|
select HAVE_KVM_IRQFD
|
|
select HAVE_KVM_IRQ_ROUTING
|
|
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
|
|
index 9d229ef7f86ef..ada817c49b722 100644
|
|
--- a/arch/powerpc/perf/imc-pmu.c
|
|
+++ b/arch/powerpc/perf/imc-pmu.c
|
|
@@ -51,7 +51,7 @@ static int trace_imc_mem_size;
|
|
* core and trace-imc
|
|
*/
|
|
static struct imc_pmu_ref imc_global_refc = {
|
|
- .lock = __SPIN_LOCK_INITIALIZER(imc_global_refc.lock),
|
|
+ .lock = __SPIN_LOCK_UNLOCKED(imc_global_refc.lock),
|
|
.id = 0,
|
|
.refc = 0,
|
|
};
|
|
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
|
|
index 21b22bf16ce66..b506377a16a74 100644
|
|
--- a/arch/powerpc/platforms/pseries/Kconfig
|
|
+++ b/arch/powerpc/platforms/pseries/Kconfig
|
|
@@ -2,6 +2,7 @@
|
|
config PPC_PSERIES
|
|
depends on PPC64 && PPC_BOOK3S
|
|
bool "IBM pSeries & new (POWER5-based) iSeries"
|
|
+ select GENERIC_ALLOCATOR
|
|
select HAVE_PCSPKR_PLATFORM
|
|
select MPIC
|
|
select OF_DYNAMIC
|
|
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
|
|
index c74b71d4733d4..64ba14baabd30 100644
|
|
--- a/arch/powerpc/platforms/pseries/iommu.c
|
|
+++ b/arch/powerpc/platforms/pseries/iommu.c
|
|
@@ -24,6 +24,7 @@
|
|
#include <linux/of.h>
|
|
#include <linux/iommu.h>
|
|
#include <linux/rculist.h>
|
|
+#include <linux/local_lock.h>
|
|
#include <asm/io.h>
|
|
#include <asm/prom.h>
|
|
#include <asm/rtas.h>
|
|
@@ -195,7 +196,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
|
|
return ret;
|
|
}
|
|
|
|
-static DEFINE_PER_CPU(__be64 *, tce_page);
|
|
+struct tce_page {
|
|
+ __be64 * page;
|
|
+ local_lock_t lock;
|
|
+};
|
|
+static DEFINE_PER_CPU(struct tce_page, tce_page) = {
|
|
+ .lock = INIT_LOCAL_LOCK(lock),
|
|
+};
|
|
|
|
static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
|
|
long npages, unsigned long uaddr,
|
|
@@ -218,9 +225,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
|
|
direction, attrs);
|
|
}
|
|
|
|
- local_irq_save(flags); /* to protect tcep and the page behind it */
|
|
+ /* to protect tcep and the page behind it */
|
|
+ local_lock_irqsave(&tce_page.lock, flags);
|
|
|
|
- tcep = __this_cpu_read(tce_page);
|
|
+ tcep = __this_cpu_read(tce_page.page);
|
|
|
|
/* This is safe to do since interrupts are off when we're called
|
|
* from iommu_alloc{,_sg}()
|
|
@@ -229,12 +237,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
|
|
tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
|
|
/* If allocation fails, fall back to the loop implementation */
|
|
if (!tcep) {
|
|
- local_irq_restore(flags);
|
|
+ local_unlock_irqrestore(&tce_page.lock, flags);
|
|
return tce_build_pSeriesLP(tbl->it_index, tcenum,
|
|
tceshift,
|
|
npages, uaddr, direction, attrs);
|
|
}
|
|
- __this_cpu_write(tce_page, tcep);
|
|
+ __this_cpu_write(tce_page.page, tcep);
|
|
}
|
|
|
|
rpn = __pa(uaddr) >> tceshift;
|
|
@@ -264,7 +272,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
|
|
tcenum += limit;
|
|
} while (npages > 0 && !rc);
|
|
|
|
- local_irq_restore(flags);
|
|
+ local_unlock_irqrestore(&tce_page.lock, flags);
|
|
|
|
if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
|
|
ret = (int)rc;
|
|
@@ -440,16 +448,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
|
|
DMA_BIDIRECTIONAL, 0);
|
|
}
|
|
|
|
- local_irq_disable(); /* to protect tcep and the page behind it */
|
|
- tcep = __this_cpu_read(tce_page);
|
|
+ /* to protect tcep and the page behind it */
|
|
+ local_lock_irq(&tce_page.lock);
|
|
+ tcep = __this_cpu_read(tce_page.page);
|
|
|
|
if (!tcep) {
|
|
tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
|
|
if (!tcep) {
|
|
- local_irq_enable();
|
|
+ local_unlock_irq(&tce_page.lock);
|
|
return -ENOMEM;
|
|
}
|
|
- __this_cpu_write(tce_page, tcep);
|
|
+ __this_cpu_write(tce_page.page, tcep);
|
|
}
|
|
|
|
proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
|
|
@@ -492,7 +501,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
|
|
|
|
/* error cleanup: caller will clear whole range */
|
|
|
|
- local_irq_enable();
|
|
+ local_unlock_irq(&tce_page.lock);
|
|
return rc;
|
|
}
|
|
|
|
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
|
index a825bf031f495..dcbf3c08926b1 100644
|
|
--- a/arch/x86/Kconfig
|
|
+++ b/arch/x86/Kconfig
|
|
@@ -114,6 +114,7 @@ config X86
|
|
select ARCH_USES_CFI_TRAPS if X86_64 && CFI_CLANG
|
|
select ARCH_SUPPORTS_LTO_CLANG
|
|
select ARCH_SUPPORTS_LTO_CLANG_THIN
|
|
+ select ARCH_SUPPORTS_RT
|
|
select ARCH_USE_BUILTIN_BSWAP
|
|
select ARCH_USE_MEMTEST
|
|
select ARCH_USE_QUEUED_RWLOCKS
|
|
@@ -252,6 +253,7 @@ config X86
|
|
select HAVE_PCI
|
|
select HAVE_PERF_REGS
|
|
select HAVE_PERF_USER_STACK_DUMP
|
|
+ select HAVE_PREEMPT_LAZY
|
|
select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
|
|
select MMU_GATHER_MERGE_VMAS
|
|
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
|
|
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
|
|
index 2d13f25b1bd8f..5b096893f6a21 100644
|
|
--- a/arch/x86/include/asm/preempt.h
|
|
+++ b/arch/x86/include/asm/preempt.h
|
|
@@ -90,18 +90,49 @@ static __always_inline void __preempt_count_sub(int val)
|
|
* a decrement which hits zero means we have no preempt_count and should
|
|
* reschedule.
|
|
*/
|
|
-static __always_inline bool __preempt_count_dec_and_test(void)
|
|
+static __always_inline bool ____preempt_count_dec_and_test(void)
|
|
{
|
|
return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e,
|
|
__percpu_arg([var]));
|
|
}
|
|
|
|
+static __always_inline bool __preempt_count_dec_and_test(void)
|
|
+{
|
|
+ if (____preempt_count_dec_and_test())
|
|
+ return true;
|
|
+#ifdef CONFIG_PREEMPT_LAZY
|
|
+ if (preempt_count())
|
|
+ return false;
|
|
+ if (current_thread_info()->preempt_lazy_count)
|
|
+ return false;
|
|
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
|
|
+#else
|
|
+ return false;
|
|
+#endif
|
|
+}
|
|
+
|
|
/*
|
|
* Returns true when we need to resched and can (barring IRQ state).
|
|
*/
|
|
static __always_inline bool should_resched(int preempt_offset)
|
|
{
|
|
+#ifdef CONFIG_PREEMPT_LAZY
|
|
+ u32 tmp;
|
|
+ tmp = raw_cpu_read_4(pcpu_hot.preempt_count);
|
|
+ if (tmp == preempt_offset)
|
|
+ return true;
|
|
+
|
|
+ /* preempt count == 0 ? */
|
|
+ tmp &= ~PREEMPT_NEED_RESCHED;
|
|
+ if (tmp != preempt_offset)
|
|
+ return false;
|
|
+ /* XXX PREEMPT_LOCK_OFFSET */
|
|
+ if (current_thread_info()->preempt_lazy_count)
|
|
+ return false;
|
|
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
|
|
+#else
|
|
return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset);
|
|
+#endif
|
|
}
|
|
|
|
#ifdef CONFIG_PREEMPTION
|
|
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
|
|
index f1cccba52eb97..c8697ca0378f4 100644
|
|
--- a/arch/x86/include/asm/thread_info.h
|
|
+++ b/arch/x86/include/asm/thread_info.h
|
|
@@ -57,6 +57,8 @@ struct thread_info {
|
|
unsigned long flags; /* low level flags */
|
|
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
|
|
u32 status; /* thread synchronous flags */
|
|
+ int preempt_lazy_count; /* 0 => lazy preemptable
|
|
+ <0 => BUG */
|
|
#ifdef CONFIG_SMP
|
|
u32 cpu; /* current CPU */
|
|
#endif
|
|
@@ -65,6 +67,7 @@ struct thread_info {
|
|
#define INIT_THREAD_INFO(tsk) \
|
|
{ \
|
|
.flags = 0, \
|
|
+ .preempt_lazy_count = 0, \
|
|
}
|
|
|
|
#else /* !__ASSEMBLY__ */
|
|
@@ -92,6 +95,7 @@ struct thread_info {
|
|
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
|
|
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
|
|
#define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */
|
|
+#define TIF_NEED_RESCHED_LAZY 19 /* lazy rescheduling necessary */
|
|
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
|
|
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
|
|
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
|
|
@@ -115,6 +119,7 @@ struct thread_info {
|
|
#define _TIF_NOCPUID (1 << TIF_NOCPUID)
|
|
#define _TIF_NOTSC (1 << TIF_NOTSC)
|
|
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
|
|
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
|
|
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
|
|
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
|
|
#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
|
|
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
|
|
index aa490da3cef23..d73b6d32bd827 100644
|
|
--- a/drivers/block/zram/zram_drv.c
|
|
+++ b/drivers/block/zram/zram_drv.c
|
|
@@ -57,6 +57,40 @@ static void zram_free_page(struct zram *zram, size_t index);
|
|
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
|
|
u32 index, int offset, struct bio *bio);
|
|
|
|
+#ifdef CONFIG_PREEMPT_RT
|
|
+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages)
|
|
+{
|
|
+ size_t index;
|
|
+
|
|
+ for (index = 0; index < num_pages; index++)
|
|
+ spin_lock_init(&zram->table[index].lock);
|
|
+}
|
|
+
|
|
+static int zram_slot_trylock(struct zram *zram, u32 index)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ ret = spin_trylock(&zram->table[index].lock);
|
|
+ if (ret)
|
|
+ __set_bit(ZRAM_LOCK, &zram->table[index].flags);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void zram_slot_lock(struct zram *zram, u32 index)
|
|
+{
|
|
+ spin_lock(&zram->table[index].lock);
|
|
+ __set_bit(ZRAM_LOCK, &zram->table[index].flags);
|
|
+}
|
|
+
|
|
+static void zram_slot_unlock(struct zram *zram, u32 index)
|
|
+{
|
|
+ __clear_bit(ZRAM_LOCK, &zram->table[index].flags);
|
|
+ spin_unlock(&zram->table[index].lock);
|
|
+}
|
|
+
|
|
+#else
|
|
+
|
|
+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { }
|
|
|
|
static int zram_slot_trylock(struct zram *zram, u32 index)
|
|
{
|
|
@@ -72,6 +106,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index)
|
|
{
|
|
bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
|
|
}
|
|
+#endif
|
|
|
|
static inline bool init_done(struct zram *zram)
|
|
{
|
|
@@ -1311,6 +1346,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
|
|
|
|
if (!huge_class_size)
|
|
huge_class_size = zs_huge_class_size(zram->mem_pool);
|
|
+ zram_meta_init_table_locks(zram, num_pages);
|
|
return true;
|
|
}
|
|
|
|
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
|
|
index c5254626f051f..a6b24dfec95fb 100644
|
|
--- a/drivers/block/zram/zram_drv.h
|
|
+++ b/drivers/block/zram/zram_drv.h
|
|
@@ -69,6 +69,9 @@ struct zram_table_entry {
|
|
unsigned long element;
|
|
};
|
|
unsigned long flags;
|
|
+#ifdef CONFIG_PREEMPT_RT
|
|
+ spinlock_t lock;
|
|
+#endif
|
|
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
|
|
ktime_t ac_time;
|
|
#endif
|
|
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
|
|
index ed5dabd3c72d6..450d7985ff346 100644
|
|
--- a/drivers/char/tpm/tpm_tis.c
|
|
+++ b/drivers/char/tpm/tpm_tis.c
|
|
@@ -50,6 +50,45 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da
|
|
return container_of(data, struct tpm_tis_tcg_phy, priv);
|
|
}
|
|
|
|
+#ifdef CONFIG_PREEMPT_RT
|
|
+/*
|
|
+ * Flush previous write operations with a dummy read operation to the
|
|
+ * TPM MMIO base address.
|
|
+ */
|
|
+static inline void tpm_tis_flush(void __iomem *iobase)
|
|
+{
|
|
+ ioread8(iobase + TPM_ACCESS(0));
|
|
+}
|
|
+#else
|
|
+#define tpm_tis_flush(iobase) do { } while (0)
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Write a byte word to the TPM MMIO address, and flush the write queue.
|
|
+ * The flush ensures that the data is sent immediately over the bus and not
|
|
+ * aggregated with further requests and transferred later in a batch. The large
|
|
+ * write requests can lead to unwanted latency spikes by blocking the CPU until
|
|
+ * the complete batch has been transferred.
|
|
+ */
|
|
+static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr)
|
|
+{
|
|
+ iowrite8(b, iobase + addr);
|
|
+ tpm_tis_flush(iobase);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Write a 32-bit word to the TPM MMIO address, and flush the write queue.
|
|
+ * The flush ensures that the data is sent immediately over the bus and not
|
|
+ * aggregated with further requests and transferred later in a batch. The large
|
|
+ * write requests can lead to unwanted latency spikes by blocking the CPU until
|
|
+ * the complete batch has been transferred.
|
|
+ */
|
|
+static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr)
|
|
+{
|
|
+ iowrite32(b, iobase + addr);
|
|
+ tpm_tis_flush(iobase);
|
|
+}
|
|
+
|
|
static int interrupts = -1;
|
|
module_param(interrupts, int, 0444);
|
|
MODULE_PARM_DESC(interrupts, "Enable interrupts");
|
|
@@ -186,12 +225,12 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
|
|
switch (io_mode) {
|
|
case TPM_TIS_PHYS_8:
|
|
while (len--)
|
|
- iowrite8(*value++, phy->iobase + addr);
|
|
+ tpm_tis_iowrite8(*value++, phy->iobase, addr);
|
|
break;
|
|
case TPM_TIS_PHYS_16:
|
|
return -EINVAL;
|
|
case TPM_TIS_PHYS_32:
|
|
- iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase + addr);
|
|
+ tpm_tis_iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase, addr);
|
|
break;
|
|
}
|
|
|
|
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
|
|
index 98f4e44976e09..d4dba1f89fde5 100644
|
|
--- a/drivers/gpu/drm/i915/Kconfig
|
|
+++ b/drivers/gpu/drm/i915/Kconfig
|
|
@@ -3,7 +3,6 @@ config DRM_I915
|
|
tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics"
|
|
depends on DRM
|
|
depends on X86 && PCI
|
|
- depends on !PREEMPT_RT
|
|
select INTEL_GTT if X86
|
|
select INTERVAL_TREE
|
|
# we need shmfs for the swappable backing store, and in particular
|
|
diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c
|
|
index d5b5d40ed817f..710e51d2377fe 100644
|
|
--- a/drivers/gpu/drm/i915/display/intel_crtc.c
|
|
+++ b/drivers/gpu/drm/i915/display/intel_crtc.c
|
|
@@ -520,7 +520,8 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state)
|
|
*/
|
|
intel_psr_wait_for_idle_locked(new_crtc_state);
|
|
|
|
- local_irq_disable();
|
|
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
+ local_irq_disable();
|
|
|
|
crtc->debug.min_vbl = min;
|
|
crtc->debug.max_vbl = max;
|
|
@@ -545,11 +546,13 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state)
|
|
break;
|
|
}
|
|
|
|
- local_irq_enable();
|
|
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
+ local_irq_enable();
|
|
|
|
timeout = schedule_timeout(timeout);
|
|
|
|
- local_irq_disable();
|
|
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
+ local_irq_disable();
|
|
}
|
|
|
|
finish_wait(wq, &wait);
|
|
@@ -582,7 +585,8 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state)
|
|
return;
|
|
|
|
irq_disable:
|
|
- local_irq_disable();
|
|
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
+ local_irq_disable();
|
|
}
|
|
|
|
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE)
|
|
@@ -691,7 +695,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
|
|
if (new_crtc_state->seamless_m_n && intel_crtc_needs_fastset(new_crtc_state))
|
|
intel_crtc_update_active_timings(new_crtc_state);
|
|
|
|
- local_irq_enable();
|
|
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
+ local_irq_enable();
|
|
|
|
if (intel_vgpu_active(dev_priv))
|
|
return;
|
|
diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c
|
|
index 4c83e2320bcac..2dd4ac8b30266 100644
|
|
--- a/drivers/gpu/drm/i915/display/intel_vblank.c
|
|
+++ b/drivers/gpu/drm/i915/display/intel_vblank.c
|
|
@@ -293,7 +293,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
|
|
*/
|
|
spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
|
|
|
|
- /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
|
|
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
+ preempt_disable();
|
|
|
|
/* Get optional system timestamp before query. */
|
|
if (stime)
|
|
@@ -358,7 +359,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
|
|
if (etime)
|
|
*etime = ktime_get();
|
|
|
|
- /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
|
|
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
+ preempt_enable();
|
|
|
|
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
|
|
|
|
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
|
|
index ecc990ec1b952..8d04b10681f0d 100644
|
|
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
|
|
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
|
|
@@ -312,10 +312,9 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
|
|
/* Kick the work once more to drain the signalers, and disarm the irq */
|
|
irq_work_sync(&b->irq_work);
|
|
while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
|
|
- local_irq_disable();
|
|
- signal_irq_work(&b->irq_work);
|
|
- local_irq_enable();
|
|
+ irq_work_queue(&b->irq_work);
|
|
cond_resched();
|
|
+ irq_work_sync(&b->irq_work);
|
|
}
|
|
}
|
|
|
|
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
|
|
index 750326434677f..a2658a8ff7353 100644
|
|
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
|
|
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
|
|
@@ -1303,7 +1303,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
|
|
* and context switches) submission.
|
|
*/
|
|
|
|
- spin_lock(&sched_engine->lock);
|
|
+ spin_lock_irq(&sched_engine->lock);
|
|
|
|
/*
|
|
* If the queue is higher priority than the last
|
|
@@ -1403,7 +1403,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
|
|
* Even if ELSP[1] is occupied and not worthy
|
|
* of timeslices, our queue might be.
|
|
*/
|
|
- spin_unlock(&sched_engine->lock);
|
|
+ spin_unlock_irq(&sched_engine->lock);
|
|
return;
|
|
}
|
|
}
|
|
@@ -1429,7 +1429,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
|
|
|
|
if (last && !can_merge_rq(last, rq)) {
|
|
spin_unlock(&ve->base.sched_engine->lock);
|
|
- spin_unlock(&engine->sched_engine->lock);
|
|
+ spin_unlock_irq(&engine->sched_engine->lock);
|
|
return; /* leave this for another sibling */
|
|
}
|
|
|
|
@@ -1591,7 +1591,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
|
|
*/
|
|
sched_engine->queue_priority_hint = queue_prio(sched_engine);
|
|
i915_sched_engine_reset_on_empty(sched_engine);
|
|
- spin_unlock(&sched_engine->lock);
|
|
+ spin_unlock_irq(&sched_engine->lock);
|
|
|
|
/*
|
|
* We can skip poking the HW if we ended up with exactly the same set
|
|
@@ -1617,13 +1617,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
|
|
}
|
|
}
|
|
|
|
-static void execlists_dequeue_irq(struct intel_engine_cs *engine)
|
|
-{
|
|
- local_irq_disable(); /* Suspend interrupts across request submission */
|
|
- execlists_dequeue(engine);
|
|
- local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
|
|
-}
|
|
-
|
|
static void clear_ports(struct i915_request **ports, int count)
|
|
{
|
|
memset_p((void **)ports, NULL, count);
|
|
@@ -2477,7 +2470,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
|
|
}
|
|
|
|
if (!engine->execlists.pending[0]) {
|
|
- execlists_dequeue_irq(engine);
|
|
+ execlists_dequeue(engine);
|
|
start_timeslice(engine);
|
|
}
|
|
|
|
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
|
|
index 7503dcb9043bb..799fb8083470f 100644
|
|
--- a/drivers/gpu/drm/i915/i915_request.c
|
|
+++ b/drivers/gpu/drm/i915/i915_request.c
|
|
@@ -613,7 +613,6 @@ bool __i915_request_submit(struct i915_request *request)
|
|
|
|
RQ_TRACE(request, "\n");
|
|
|
|
- GEM_BUG_ON(!irqs_disabled());
|
|
lockdep_assert_held(&engine->sched_engine->lock);
|
|
|
|
/*
|
|
@@ -722,7 +721,6 @@ void __i915_request_unsubmit(struct i915_request *request)
|
|
*/
|
|
RQ_TRACE(request, "\n");
|
|
|
|
- GEM_BUG_ON(!irqs_disabled());
|
|
lockdep_assert_held(&engine->sched_engine->lock);
|
|
|
|
/*
|
|
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
|
|
index f6f9228a13518..0ff1b60be8382 100644
|
|
--- a/drivers/gpu/drm/i915/i915_trace.h
|
|
+++ b/drivers/gpu/drm/i915/i915_trace.h
|
|
@@ -6,6 +6,10 @@
|
|
#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
|
|
#define _I915_TRACE_H_
|
|
|
|
+#ifdef CONFIG_PREEMPT_RT
|
|
+#define NOTRACE
|
|
+#endif
|
|
+
|
|
#include <linux/stringify.h>
|
|
#include <linux/types.h>
|
|
#include <linux/tracepoint.h>
|
|
@@ -322,7 +326,7 @@ DEFINE_EVENT(i915_request, i915_request_add,
|
|
TP_ARGS(rq)
|
|
);
|
|
|
|
-#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS)
|
|
+#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE)
|
|
DEFINE_EVENT(i915_request, i915_request_guc_submit,
|
|
TP_PROTO(struct i915_request *rq),
|
|
TP_ARGS(rq)
|
|
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
|
|
index 2c430c0c3badd..7ec828637d622 100644
|
|
--- a/drivers/gpu/drm/i915/i915_utils.h
|
|
+++ b/drivers/gpu/drm/i915/i915_utils.h
|
|
@@ -288,7 +288,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
|
|
#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000)
|
|
|
|
/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
|
|
-#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT)
|
|
+#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT)
|
|
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
|
|
#else
|
|
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
|
|
diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
|
|
index 1e8fe44a7099f..b336d00d7988e 100644
|
|
--- a/drivers/tty/serial/8250/8250.h
|
|
+++ b/drivers/tty/serial/8250/8250.h
|
|
@@ -177,12 +177,277 @@ static inline void serial_dl_write(struct uart_8250_port *up, int value)
|
|
up->dl_write(up, value);
|
|
}
|
|
|
|
+static inline bool serial8250_is_console(struct uart_port *port)
|
|
+{
|
|
+ return uart_console(port) && !hlist_unhashed_lockless(&port->cons->node);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * serial8250_init_wctxt - Initialize a write context for
|
|
+ * non-console-printing usage
|
|
+ * @wctxt: The write context to initialize
|
|
+ * @cons: The console to assign to the write context
|
|
+ *
|
|
+ * In order to mark an unsafe region, drivers must acquire the console. This
|
|
+ * requires providing an initialized write context (even if that driver will
|
|
+ * not be doing any printing).
|
|
+ *
|
|
+ * This function should not be used for console printing contexts.
|
|
+ */
|
|
+static inline void serial8250_init_wctxt(struct cons_write_context *wctxt,
|
|
+ struct console *cons)
|
|
+{
|
|
+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
|
|
+
|
|
+ memset(wctxt, 0, sizeof(*wctxt));
|
|
+ ctxt->console = cons;
|
|
+ ctxt->prio = CONS_PRIO_NORMAL;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * __serial8250_console_acquire - Acquire a console for
|
|
+ * non-console-printing usage
|
|
+ * @wctxt: An uninitialized write context to use for acquiring
|
|
+ * @cons: The console to assign to the write context
|
|
+ *
|
|
+ * The caller is holding the port->lock.
|
|
+ * The caller is holding the console_srcu_read_lock.
|
|
+ *
|
|
+ * This function should not be used for console printing contexts.
|
|
+ */
|
|
+static inline void __serial8250_console_acquire(struct cons_write_context *wctxt,
|
|
+ struct console *cons)
|
|
+{
|
|
+ for (;;) {
|
|
+ serial8250_init_wctxt(wctxt, cons);
|
|
+ if (console_try_acquire(wctxt))
|
|
+ break;
|
|
+ cpu_relax();
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * serial8250_enter_unsafe - Mark the beginning of an unsafe region for
|
|
+ * non-console-printing usage
|
|
+ * @up: The port that is entering the unsafe state
|
|
+ *
|
|
+ * The caller should ensure @up is a console before calling this function.
|
|
+ *
|
|
+ * The caller is holding the port->lock.
|
|
+ * This function takes the console_srcu_read_lock and becomes owner of the
|
|
+ * console associated with @up.
|
|
+ *
|
|
+ * This function should not be used for console printing contexts.
|
|
+ */
|
|
+static inline void serial8250_enter_unsafe(struct uart_8250_port *up)
|
|
+{
|
|
+ struct uart_port *port = &up->port;
|
|
+
|
|
+ lockdep_assert_held_once(&port->lock);
|
|
+
|
|
+ for (;;) {
|
|
+ up->cookie = console_srcu_read_lock();
|
|
+
|
|
+ __serial8250_console_acquire(&up->wctxt, port->cons);
|
|
+
|
|
+ if (console_enter_unsafe(&up->wctxt))
|
|
+ break;
|
|
+
|
|
+ console_srcu_read_unlock(up->cookie);
|
|
+ cpu_relax();
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * serial8250_exit_unsafe - Mark the end of an unsafe region for
|
|
+ * non-console-printing usage
|
|
+ * @up: The port that is exiting the unsafe state
|
|
+ *
|
|
+ * The caller is holding the port->lock.
|
|
+ * This function releases ownership of the console associated with @up and
|
|
+ * releases the console_srcu_read_lock.
|
|
+ *
|
|
+ * This function should not be used for console printing contexts.
|
|
+ */
|
|
+static inline void serial8250_exit_unsafe(struct uart_8250_port *up)
|
|
+{
|
|
+ struct uart_port *port = &up->port;
|
|
+
|
|
+ lockdep_assert_held_once(&port->lock);
|
|
+
|
|
+ if (console_exit_unsafe(&up->wctxt))
|
|
+ console_release(&up->wctxt);
|
|
+
|
|
+ console_srcu_read_unlock(up->cookie);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * serial8250_in_IER - Read the IER register for
|
|
+ * non-console-printing usage
|
|
+ * @up: The port to work on
|
|
+ *
|
|
+ * Returns: The value read from IER
|
|
+ *
|
|
+ * The caller is holding the port->lock.
|
|
+ *
|
|
+ * This is the top-level function for non-console-printing contexts to
|
|
+ * read the IER register. The caller does not need to care if @up is a
|
|
+ * console before calling this function.
|
|
+ *
|
|
+ * This function should not be used for printing contexts.
|
|
+ */
|
|
+static inline int serial8250_in_IER(struct uart_8250_port *up)
|
|
+{
|
|
+ struct uart_port *port = &up->port;
|
|
+ bool is_console;
|
|
+ int ier;
|
|
+
|
|
+ is_console = serial8250_is_console(port);
|
|
+
|
|
+ if (is_console)
|
|
+ serial8250_enter_unsafe(up);
|
|
+
|
|
+ ier = serial_in(up, UART_IER);
|
|
+
|
|
+ if (is_console)
|
|
+ serial8250_exit_unsafe(up);
|
|
+
|
|
+ return ier;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * __serial8250_set_IER - Directly write to the IER register
|
|
+ * @up: The port to work on
|
|
+ * @wctxt: The current write context
|
|
+ * @ier: The value to write
|
|
+ *
|
|
+ * Returns: True if IER was written to. False otherwise
|
|
+ *
|
|
+ * The caller is holding the port->lock.
|
|
+ * The caller is holding the console_srcu_read_unlock.
|
|
+ * The caller is the owner of the console associated with @up.
|
|
+ *
|
|
+ * This function should only be directly called within console printing
|
|
+ * contexts. Other contexts should use serial8250_set_IER().
|
|
+ */
|
|
+static inline bool __serial8250_set_IER(struct uart_8250_port *up,
|
|
+ struct cons_write_context *wctxt,
|
|
+ int ier)
|
|
+{
|
|
+ if (wctxt && !console_can_proceed(wctxt))
|
|
+ return false;
|
|
+ serial_out(up, UART_IER, ier);
|
|
+ return true;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * serial8250_set_IER - Write a new value to the IER register for
|
|
+ * non-console-printing usage
|
|
+ * @up: The port to work on
|
|
+ * @ier: The value to write
|
|
+ *
|
|
+ * The caller is holding the port->lock.
|
|
+ *
|
|
+ * This is the top-level function for non-console-printing contexts to
|
|
+ * write to the IER register. The caller does not need to care if @up is a
|
|
+ * console before calling this function.
|
|
+ *
|
|
+ * This function should not be used for printing contexts.
|
|
+ */
|
|
+static inline void serial8250_set_IER(struct uart_8250_port *up, int ier)
|
|
+{
|
|
+ struct uart_port *port = &up->port;
|
|
+ bool is_console;
|
|
+
|
|
+ is_console = serial8250_is_console(port);
|
|
+
|
|
+ if (is_console) {
|
|
+ serial8250_enter_unsafe(up);
|
|
+ while (!__serial8250_set_IER(up, &up->wctxt, ier)) {
|
|
+ console_srcu_read_unlock(up->cookie);
|
|
+ console_enter_unsafe(&up->wctxt);
|
|
+ }
|
|
+ serial8250_exit_unsafe(up);
|
|
+ } else {
|
|
+ __serial8250_set_IER(up, NULL, ier);
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * __serial8250_clear_IER - Directly clear the IER register
|
|
+ * @up: The port to work on
|
|
+ * @wctxt: The current write context
|
|
+ * @prior: Gets set to the previous value of IER
|
|
+ *
|
|
+ * Returns: True if IER was cleared and @prior points to the previous
|
|
+ * value of IER. False otherwise and @prior is invalid
|
|
+ *
|
|
+ * The caller is holding the port->lock.
|
|
+ * The caller is holding the console_srcu_read_unlock.
|
|
+ * The caller is the owner of the console associated with @up.
|
|
+ *
|
|
+ * This function should only be directly called within console printing
|
|
+ * contexts. Other contexts should use serial8250_clear_IER().
|
|
+ */
|
|
+static inline bool __serial8250_clear_IER(struct uart_8250_port *up,
|
|
+ struct cons_write_context *wctxt,
|
|
+ int *prior)
|
|
+{
|
|
+ unsigned int clearval = 0;
|
|
+
|
|
+ if (up->capabilities & UART_CAP_UUE)
|
|
+ clearval = UART_IER_UUE;
|
|
+
|
|
+ *prior = serial_in(up, UART_IER);
|
|
+ if (wctxt && !console_can_proceed(wctxt))
|
|
+ return false;
|
|
+ serial_out(up, UART_IER, clearval);
|
|
+ return true;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * serial8250_clear_IER - Clear the IER register for
|
|
+ * non-console-printing usage
|
|
+ * @up: The port to work on
|
|
+ *
|
|
+ * Returns: The previous value of IER
|
|
+ *
|
|
+ * The caller is holding the port->lock.
|
|
+ *
|
|
+ * This is the top-level function for non-console-printing contexts to
|
|
+ * clear the IER register. The caller does not need to care if @up is a
|
|
+ * console before calling this function.
|
|
+ *
|
|
+ * This function should not be used for printing contexts.
|
|
+ */
|
|
+static inline int serial8250_clear_IER(struct uart_8250_port *up)
|
|
+{
|
|
+ struct uart_port *port = &up->port;
|
|
+ bool is_console;
|
|
+ int prior;
|
|
+
|
|
+ is_console = serial8250_is_console(port);
|
|
+
|
|
+ if (is_console) {
|
|
+ serial8250_enter_unsafe(up);
|
|
+ while (!__serial8250_clear_IER(up, &up->wctxt, &prior)) {
|
|
+ console_srcu_read_unlock(up->cookie);
|
|
+ console_enter_unsafe(&up->wctxt);
|
|
+ }
|
|
+ serial8250_exit_unsafe(up);
|
|
+ } else {
|
|
+ __serial8250_clear_IER(up, NULL, &prior);
|
|
+ }
|
|
+
|
|
+ return prior;
|
|
+}
|
|
+
|
|
static inline bool serial8250_set_THRI(struct uart_8250_port *up)
|
|
{
|
|
if (up->ier & UART_IER_THRI)
|
|
return false;
|
|
up->ier |= UART_IER_THRI;
|
|
- serial_out(up, UART_IER, up->ier);
|
|
+ serial8250_set_IER(up, up->ier);
|
|
return true;
|
|
}
|
|
|
|
@@ -191,7 +456,7 @@ static inline bool serial8250_clear_THRI(struct uart_8250_port *up)
|
|
if (!(up->ier & UART_IER_THRI))
|
|
return false;
|
|
up->ier &= ~UART_IER_THRI;
|
|
- serial_out(up, UART_IER, up->ier);
|
|
+ serial8250_set_IER(up, up->ier);
|
|
return true;
|
|
}
|
|
|
|
diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c
|
|
index 9d2a7856784f7..7cc6b527c088b 100644
|
|
--- a/drivers/tty/serial/8250/8250_aspeed_vuart.c
|
|
+++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c
|
|
@@ -278,7 +278,7 @@ static void __aspeed_vuart_set_throttle(struct uart_8250_port *up,
|
|
up->ier &= ~irqs;
|
|
if (!throttle)
|
|
up->ier |= irqs;
|
|
- serial_out(up, UART_IER, up->ier);
|
|
+ serial8250_set_IER(up, up->ier);
|
|
}
|
|
static void aspeed_vuart_set_throttle(struct uart_port *port, bool throttle)
|
|
{
|
|
diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c
|
|
index f801b1f5b46c0..a29f5f45d22f2 100644
|
|
--- a/drivers/tty/serial/8250/8250_bcm7271.c
|
|
+++ b/drivers/tty/serial/8250/8250_bcm7271.c
|
|
@@ -606,8 +606,10 @@ static int brcmuart_startup(struct uart_port *port)
|
|
* Disable the Receive Data Interrupt because the DMA engine
|
|
* will handle this.
|
|
*/
|
|
+ spin_lock_irq(&port->lock);
|
|
up->ier &= ~UART_IER_RDI;
|
|
- serial_port_out(port, UART_IER, up->ier);
|
|
+ serial8250_set_IER(up, up->ier);
|
|
+ spin_unlock_irq(&port->lock);
|
|
|
|
priv->tx_running = false;
|
|
priv->dma.rx_dma = NULL;
|
|
@@ -787,6 +789,12 @@ static int brcmuart_handle_irq(struct uart_port *p)
|
|
spin_lock_irqsave(&p->lock, flags);
|
|
status = serial_port_in(p, UART_LSR);
|
|
if ((status & UART_LSR_DR) == 0) {
|
|
+ bool is_console;
|
|
+
|
|
+ is_console = serial8250_is_console(p);
|
|
+
|
|
+ if (is_console)
|
|
+ serial8250_enter_unsafe(up);
|
|
|
|
ier = serial_port_in(p, UART_IER);
|
|
/*
|
|
@@ -807,6 +815,9 @@ static int brcmuart_handle_irq(struct uart_port *p)
|
|
serial_port_in(p, UART_RX);
|
|
}
|
|
|
|
+ if (is_console)
|
|
+ serial8250_exit_unsafe(up);
|
|
+
|
|
handled = 1;
|
|
}
|
|
spin_unlock_irqrestore(&p->lock, flags);
|
|
@@ -844,12 +855,22 @@ static enum hrtimer_restart brcmuart_hrtimer_func(struct hrtimer *t)
|
|
/* re-enable receive unless upper layer has disabled it */
|
|
if ((up->ier & (UART_IER_RLSI | UART_IER_RDI)) ==
|
|
(UART_IER_RLSI | UART_IER_RDI)) {
|
|
+ bool is_console;
|
|
+
|
|
+ is_console = serial8250_is_console(p);
|
|
+
|
|
+ if (is_console)
|
|
+ serial8250_enter_unsafe(up);
|
|
+
|
|
status = serial_port_in(p, UART_IER);
|
|
status |= (UART_IER_RLSI | UART_IER_RDI);
|
|
serial_port_out(p, UART_IER, status);
|
|
status = serial_port_in(p, UART_MCR);
|
|
status |= UART_MCR_RTS;
|
|
serial_port_out(p, UART_MCR, status);
|
|
+
|
|
+ if (is_console)
|
|
+ serial8250_exit_unsafe(up);
|
|
}
|
|
spin_unlock_irqrestore(&p->lock, flags);
|
|
return HRTIMER_NORESTART;
|
|
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
|
|
index ab63c308be0a2..8e89bffa1f121 100644
|
|
--- a/drivers/tty/serial/8250/8250_core.c
|
|
+++ b/drivers/tty/serial/8250/8250_core.c
|
|
@@ -256,6 +256,7 @@ static void serial8250_timeout(struct timer_list *t)
|
|
static void serial8250_backup_timeout(struct timer_list *t)
|
|
{
|
|
struct uart_8250_port *up = from_timer(up, t, timer);
|
|
+ struct uart_port *port = &up->port;
|
|
unsigned int iir, ier = 0, lsr;
|
|
unsigned long flags;
|
|
|
|
@@ -266,8 +267,23 @@ static void serial8250_backup_timeout(struct timer_list *t)
|
|
* based handler.
|
|
*/
|
|
if (up->port.irq) {
|
|
+ bool is_console;
|
|
+
|
|
+ /*
|
|
+ * Do not use serial8250_clear_IER() because this code
|
|
+ * ignores capabilties.
|
|
+ */
|
|
+
|
|
+ is_console = serial8250_is_console(port);
|
|
+
|
|
+ if (is_console)
|
|
+ serial8250_enter_unsafe(up);
|
|
+
|
|
ier = serial_in(up, UART_IER);
|
|
serial_out(up, UART_IER, 0);
|
|
+
|
|
+ if (is_console)
|
|
+ serial8250_exit_unsafe(up);
|
|
}
|
|
|
|
iir = serial_in(up, UART_IIR);
|
|
@@ -290,7 +306,7 @@ static void serial8250_backup_timeout(struct timer_list *t)
|
|
serial8250_tx_chars(up);
|
|
|
|
if (up->port.irq)
|
|
- serial_out(up, UART_IER, ier);
|
|
+ serial8250_set_IER(up, ier);
|
|
|
|
spin_unlock_irqrestore(&up->port.lock, flags);
|
|
|
|
@@ -576,12 +592,30 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev)
|
|
|
|
#ifdef CONFIG_SERIAL_8250_CONSOLE
|
|
|
|
-static void univ8250_console_write(struct console *co, const char *s,
|
|
- unsigned int count)
|
|
+static void univ8250_console_port_lock(struct console *con, bool do_lock, unsigned long *flags)
|
|
+{
|
|
+ struct uart_8250_port *up = &serial8250_ports[con->index];
|
|
+
|
|
+ if (do_lock)
|
|
+ spin_lock_irqsave(&up->port.lock, *flags);
|
|
+ else
|
|
+ spin_unlock_irqrestore(&up->port.lock, *flags);
|
|
+}
|
|
+
|
|
+static bool univ8250_console_write_atomic(struct console *co,
|
|
+ struct cons_write_context *wctxt)
|
|
{
|
|
struct uart_8250_port *up = &serial8250_ports[co->index];
|
|
|
|
- serial8250_console_write(up, s, count);
|
|
+ return serial8250_console_write_atomic(up, wctxt);
|
|
+}
|
|
+
|
|
+static bool univ8250_console_write_thread(struct console *co,
|
|
+ struct cons_write_context *wctxt)
|
|
+{
|
|
+ struct uart_8250_port *up = &serial8250_ports[co->index];
|
|
+
|
|
+ return serial8250_console_write_thread(up, wctxt);
|
|
}
|
|
|
|
static int univ8250_console_setup(struct console *co, char *options)
|
|
@@ -669,12 +703,14 @@ static int univ8250_console_match(struct console *co, char *name, int idx,
|
|
|
|
static struct console univ8250_console = {
|
|
.name = "ttyS",
|
|
- .write = univ8250_console_write,
|
|
+ .write_atomic = univ8250_console_write_atomic,
|
|
+ .write_thread = univ8250_console_write_thread,
|
|
+ .port_lock = univ8250_console_port_lock,
|
|
.device = uart_console_device,
|
|
.setup = univ8250_console_setup,
|
|
.exit = univ8250_console_exit,
|
|
.match = univ8250_console_match,
|
|
- .flags = CON_PRINTBUFFER | CON_ANYTIME,
|
|
+ .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_NO_BKL,
|
|
.index = -1,
|
|
.data = &serial8250_reg,
|
|
};
|
|
@@ -962,7 +998,7 @@ static void serial_8250_overrun_backoff_work(struct work_struct *work)
|
|
spin_lock_irqsave(&port->lock, flags);
|
|
up->ier |= UART_IER_RLSI | UART_IER_RDI;
|
|
up->port.read_status_mask |= UART_LSR_DR;
|
|
- serial_out(up, UART_IER, up->ier);
|
|
+ serial8250_set_IER(up, up->ier);
|
|
spin_unlock_irqrestore(&port->lock, flags);
|
|
}
|
|
|
|
diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c
|
|
index 64770c62bbec5..ccb70b20b1f4f 100644
|
|
--- a/drivers/tty/serial/8250/8250_exar.c
|
|
+++ b/drivers/tty/serial/8250/8250_exar.c
|
|
@@ -185,6 +185,10 @@ static void xr17v35x_set_divisor(struct uart_port *p, unsigned int baud,
|
|
|
|
static int xr17v35x_startup(struct uart_port *port)
|
|
{
|
|
+ struct uart_8250_port *up = up_to_u8250p(port);
|
|
+
|
|
+ spin_lock_irq(&port->lock);
|
|
+
|
|
/*
|
|
* First enable access to IER [7:5], ISR [5:4], FCR [5:4],
|
|
* MCR [7:5] and MSR [7:0]
|
|
@@ -195,7 +199,9 @@ static int xr17v35x_startup(struct uart_port *port)
|
|
* Make sure all interrups are masked until initialization is
|
|
* complete and the FIFOs are cleared
|
|
*/
|
|
- serial_port_out(port, UART_IER, 0);
|
|
+ serial8250_set_IER(up, 0);
|
|
+
|
|
+ spin_unlock_irq(&port->lock);
|
|
|
|
return serial8250_do_startup(port);
|
|
}
|
|
diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c
|
|
index 8adfaa183f778..eaf148245a10d 100644
|
|
--- a/drivers/tty/serial/8250/8250_fsl.c
|
|
+++ b/drivers/tty/serial/8250/8250_fsl.c
|
|
@@ -58,7 +58,8 @@ int fsl8250_handle_irq(struct uart_port *port)
|
|
if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) {
|
|
unsigned long delay;
|
|
|
|
- up->ier = port->serial_in(port, UART_IER);
|
|
+ up->ier = serial8250_in_IER(up);
|
|
+
|
|
if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) {
|
|
port->ops->stop_rx(port);
|
|
} else {
|
|
diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
|
|
index fb1d5ec0940e6..bf7ab55c8923f 100644
|
|
--- a/drivers/tty/serial/8250/8250_mtk.c
|
|
+++ b/drivers/tty/serial/8250/8250_mtk.c
|
|
@@ -222,12 +222,38 @@ static void mtk8250_shutdown(struct uart_port *port)
|
|
|
|
static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask)
|
|
{
|
|
- serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask));
|
|
+ struct uart_port *port = &up->port;
|
|
+ bool is_console;
|
|
+ int ier;
|
|
+
|
|
+ is_console = serial8250_is_console(port);
|
|
+
|
|
+ if (is_console)
|
|
+ serial8250_enter_unsafe(up);
|
|
+
|
|
+ ier = serial_in(up, UART_IER);
|
|
+ serial_out(up, UART_IER, ier & (~mask));
|
|
+
|
|
+ if (is_console)
|
|
+ serial8250_exit_unsafe(up);
|
|
}
|
|
|
|
static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask)
|
|
{
|
|
- serial_out(up, UART_IER, serial_in(up, UART_IER) | mask);
|
|
+ struct uart_port *port = &up->port;
|
|
+ bool is_console;
|
|
+ int ier;
|
|
+
|
|
+ is_console = serial8250_is_console(port);
|
|
+
|
|
+ if (is_console)
|
|
+ serial8250_enter_unsafe(up);
|
|
+
|
|
+ ier = serial_in(up, UART_IER);
|
|
+ serial_out(up, UART_IER, ier | mask);
|
|
+
|
|
+ if (is_console)
|
|
+ serial8250_exit_unsafe(up);
|
|
}
|
|
|
|
static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
|
|
diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
|
|
index 734f092ef839a..bfa50a26349dd 100644
|
|
--- a/drivers/tty/serial/8250/8250_omap.c
|
|
+++ b/drivers/tty/serial/8250/8250_omap.c
|
|
@@ -334,8 +334,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up)
|
|
|
|
/* drop TCR + TLR access, we setup XON/XOFF later */
|
|
serial8250_out_MCR(up, mcr);
|
|
-
|
|
- serial_out(up, UART_IER, up->ier);
|
|
+ serial8250_set_IER(up, up->ier);
|
|
|
|
serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
|
|
serial_dl_write(up, priv->quot);
|
|
@@ -523,16 +522,21 @@ static void omap_8250_pm(struct uart_port *port, unsigned int state,
|
|
u8 efr;
|
|
|
|
pm_runtime_get_sync(port->dev);
|
|
+
|
|
+ spin_lock_irq(&port->lock);
|
|
+
|
|
serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
|
|
efr = serial_in(up, UART_EFR);
|
|
serial_out(up, UART_EFR, efr | UART_EFR_ECB);
|
|
serial_out(up, UART_LCR, 0);
|
|
|
|
- serial_out(up, UART_IER, (state != 0) ? UART_IERX_SLEEP : 0);
|
|
+ serial8250_set_IER(up, (state != 0) ? UART_IERX_SLEEP : 0);
|
|
serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
|
|
serial_out(up, UART_EFR, efr);
|
|
serial_out(up, UART_LCR, 0);
|
|
|
|
+ spin_unlock_irq(&port->lock);
|
|
+
|
|
pm_runtime_mark_last_busy(port->dev);
|
|
pm_runtime_put_autosuspend(port->dev);
|
|
}
|
|
@@ -649,7 +653,8 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id)
|
|
if ((lsr & UART_LSR_OE) && up->overrun_backoff_time_ms > 0) {
|
|
unsigned long delay;
|
|
|
|
- up->ier = port->serial_in(port, UART_IER);
|
|
+ spin_lock(&port->lock);
|
|
+ up->ier = serial8250_in_IER(up);
|
|
if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) {
|
|
port->ops->stop_rx(port);
|
|
} else {
|
|
@@ -658,6 +663,7 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id)
|
|
*/
|
|
cancel_delayed_work(&up->overrun_backoff);
|
|
}
|
|
+ spin_unlock(&port->lock);
|
|
|
|
delay = msecs_to_jiffies(up->overrun_backoff_time_ms);
|
|
schedule_delayed_work(&up->overrun_backoff, delay);
|
|
@@ -707,8 +713,10 @@ static int omap_8250_startup(struct uart_port *port)
|
|
if (ret < 0)
|
|
goto err;
|
|
|
|
+ spin_lock_irq(&port->lock);
|
|
up->ier = UART_IER_RLSI | UART_IER_RDI;
|
|
- serial_out(up, UART_IER, up->ier);
|
|
+ serial8250_set_IER(up, up->ier);
|
|
+ spin_unlock_irq(&port->lock);
|
|
|
|
#ifdef CONFIG_PM
|
|
up->capabilities |= UART_CAP_RPM;
|
|
@@ -748,8 +756,10 @@ static void omap_8250_shutdown(struct uart_port *port)
|
|
if (priv->habit & UART_HAS_EFR2)
|
|
serial_out(up, UART_OMAP_EFR2, 0x0);
|
|
|
|
+ spin_lock_irq(&port->lock);
|
|
up->ier = 0;
|
|
- serial_out(up, UART_IER, 0);
|
|
+ serial8250_set_IER(up, 0);
|
|
+ spin_unlock_irq(&port->lock);
|
|
|
|
if (up->dma)
|
|
serial8250_release_dma(up);
|
|
@@ -797,7 +807,7 @@ static void omap_8250_unthrottle(struct uart_port *port)
|
|
up->dma->rx_dma(up);
|
|
up->ier |= UART_IER_RLSI | UART_IER_RDI;
|
|
port->read_status_mask |= UART_LSR_DR;
|
|
- serial_out(up, UART_IER, up->ier);
|
|
+ serial8250_set_IER(up, up->ier);
|
|
spin_unlock_irqrestore(&port->lock, flags);
|
|
|
|
pm_runtime_mark_last_busy(port->dev);
|
|
@@ -956,7 +966,7 @@ static void __dma_rx_complete(void *param)
|
|
__dma_rx_do_complete(p);
|
|
if (!priv->throttled) {
|
|
p->ier |= UART_IER_RLSI | UART_IER_RDI;
|
|
- serial_out(p, UART_IER, p->ier);
|
|
+ serial8250_set_IER(p, p->ier);
|
|
if (!(priv->habit & UART_HAS_EFR2))
|
|
omap_8250_rx_dma(p);
|
|
}
|
|
@@ -1013,7 +1023,7 @@ static int omap_8250_rx_dma(struct uart_8250_port *p)
|
|
* callback to run.
|
|
*/
|
|
p->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
|
|
- serial_out(p, UART_IER, p->ier);
|
|
+ serial8250_set_IER(p, p->ier);
|
|
}
|
|
goto out;
|
|
}
|
|
@@ -1226,12 +1236,12 @@ static void am654_8250_handle_rx_dma(struct uart_8250_port *up, u8 iir,
|
|
* periodic timeouts, re-enable interrupts.
|
|
*/
|
|
up->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
|
|
- serial_out(up, UART_IER, up->ier);
|
|
+ serial8250_set_IER(up, up->ier);
|
|
omap_8250_rx_dma_flush(up);
|
|
serial_in(up, UART_IIR);
|
|
serial_out(up, UART_OMAP_EFR2, 0x0);
|
|
up->ier |= UART_IER_RLSI | UART_IER_RDI;
|
|
- serial_out(up, UART_IER, up->ier);
|
|
+ serial8250_set_IER(up, up->ier);
|
|
}
|
|
}
|
|
|
|
@@ -1717,12 +1727,16 @@ static int omap8250_runtime_resume(struct device *dev)
|
|
|
|
up = serial8250_get_port(priv->line);
|
|
|
|
+ spin_lock_irq(&up->port.lock);
|
|
+
|
|
if (omap8250_lost_context(up))
|
|
omap8250_restore_regs(up);
|
|
|
|
if (up->dma && up->dma->rxchan && !(priv->habit & UART_HAS_EFR2))
|
|
omap_8250_rx_dma(up);
|
|
|
|
+ spin_unlock_irq(&up->port.lock);
|
|
+
|
|
priv->latency = priv->calc_latency;
|
|
schedule_work(&priv->qos_work);
|
|
return 0;
|
|
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
|
|
index fe8d79c4ae95e..68f01f8bdf64b 100644
|
|
--- a/drivers/tty/serial/8250/8250_port.c
|
|
+++ b/drivers/tty/serial/8250/8250_port.c
|
|
@@ -745,6 +745,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
|
|
serial8250_rpm_get(p);
|
|
|
|
if (p->capabilities & UART_CAP_SLEEP) {
|
|
+ spin_lock_irq(&p->port.lock);
|
|
if (p->capabilities & UART_CAP_EFR) {
|
|
lcr = serial_in(p, UART_LCR);
|
|
efr = serial_in(p, UART_EFR);
|
|
@@ -752,25 +753,18 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
|
|
serial_out(p, UART_EFR, UART_EFR_ECB);
|
|
serial_out(p, UART_LCR, 0);
|
|
}
|
|
- serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0);
|
|
+ serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0);
|
|
if (p->capabilities & UART_CAP_EFR) {
|
|
serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
|
|
serial_out(p, UART_EFR, efr);
|
|
serial_out(p, UART_LCR, lcr);
|
|
}
|
|
+ spin_unlock_irq(&p->port.lock);
|
|
}
|
|
|
|
serial8250_rpm_put(p);
|
|
}
|
|
|
|
-static void serial8250_clear_IER(struct uart_8250_port *up)
|
|
-{
|
|
- if (up->capabilities & UART_CAP_UUE)
|
|
- serial_out(up, UART_IER, UART_IER_UUE);
|
|
- else
|
|
- serial_out(up, UART_IER, 0);
|
|
-}
|
|
-
|
|
#ifdef CONFIG_SERIAL_8250_RSA
|
|
/*
|
|
* Attempts to turn on the RSA FIFO. Returns zero on failure.
|
|
@@ -1034,8 +1028,10 @@ static int broken_efr(struct uart_8250_port *up)
|
|
*/
|
|
static void autoconfig_16550a(struct uart_8250_port *up)
|
|
{
|
|
+ struct uart_port *port = &up->port;
|
|
unsigned char status1, status2;
|
|
unsigned int iersave;
|
|
+ bool is_console;
|
|
|
|
up->port.type = PORT_16550A;
|
|
up->capabilities |= UART_CAP_FIFO;
|
|
@@ -1151,6 +1147,11 @@ static void autoconfig_16550a(struct uart_8250_port *up)
|
|
return;
|
|
}
|
|
|
|
+ is_console = serial8250_is_console(port);
|
|
+
|
|
+ if (is_console)
|
|
+ serial8250_enter_unsafe(up);
|
|
+
|
|
/*
|
|
* Try writing and reading the UART_IER_UUE bit (b6).
|
|
* If it works, this is probably one of the Xscale platform's
|
|
@@ -1186,6 +1187,9 @@ static void autoconfig_16550a(struct uart_8250_port *up)
|
|
}
|
|
serial_out(up, UART_IER, iersave);
|
|
|
|
+ if (is_console)
|
|
+ serial8250_exit_unsafe(up);
|
|
+
|
|
/*
|
|
* We distinguish between 16550A and U6 16550A by counting
|
|
* how many bytes are in the FIFO.
|
|
@@ -1227,6 +1231,13 @@ static void autoconfig(struct uart_8250_port *up)
|
|
up->bugs = 0;
|
|
|
|
if (!(port->flags & UPF_BUGGY_UART)) {
|
|
+ bool is_console;
|
|
+
|
|
+ is_console = serial8250_is_console(port);
|
|
+
|
|
+ if (is_console)
|
|
+ serial8250_enter_unsafe(up);
|
|
+
|
|
/*
|
|
* Do a simple existence test first; if we fail this,
|
|
* there's no point trying anything else.
|
|
@@ -1256,6 +1267,10 @@ static void autoconfig(struct uart_8250_port *up)
|
|
#endif
|
|
scratch3 = serial_in(up, UART_IER) & UART_IER_ALL_INTR;
|
|
serial_out(up, UART_IER, scratch);
|
|
+
|
|
+ if (is_console)
|
|
+ serial8250_exit_unsafe(up);
|
|
+
|
|
if (scratch2 != 0 || scratch3 != UART_IER_ALL_INTR) {
|
|
/*
|
|
* We failed; there's nothing here
|
|
@@ -1377,6 +1392,7 @@ static void autoconfig_irq(struct uart_8250_port *up)
|
|
unsigned char save_ICP = 0;
|
|
unsigned int ICP = 0;
|
|
unsigned long irqs;
|
|
+ bool is_console;
|
|
int irq;
|
|
|
|
if (port->flags & UPF_FOURPORT) {
|
|
@@ -1386,8 +1402,12 @@ static void autoconfig_irq(struct uart_8250_port *up)
|
|
inb_p(ICP);
|
|
}
|
|
|
|
- if (uart_console(port))
|
|
+ is_console = serial8250_is_console(port);
|
|
+
|
|
+ if (is_console) {
|
|
console_lock();
|
|
+ serial8250_enter_unsafe(up);
|
|
+ }
|
|
|
|
/* forget possible initially masked and pending IRQ */
|
|
probe_irq_off(probe_irq_on());
|
|
@@ -1419,8 +1439,10 @@ static void autoconfig_irq(struct uart_8250_port *up)
|
|
if (port->flags & UPF_FOURPORT)
|
|
outb_p(save_ICP, ICP);
|
|
|
|
- if (uart_console(port))
|
|
+ if (is_console) {
|
|
+ serial8250_exit_unsafe(up);
|
|
console_unlock();
|
|
+ }
|
|
|
|
port->irq = (irq > 0) ? irq : 0;
|
|
}
|
|
@@ -1433,7 +1455,7 @@ static void serial8250_stop_rx(struct uart_port *port)
|
|
|
|
up->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
|
|
up->port.read_status_mask &= ~UART_LSR_DR;
|
|
- serial_port_out(port, UART_IER, up->ier);
|
|
+ serial8250_set_IER(up, up->ier);
|
|
|
|
serial8250_rpm_put(up);
|
|
}
|
|
@@ -1463,7 +1485,7 @@ void serial8250_em485_stop_tx(struct uart_8250_port *p)
|
|
serial8250_clear_and_reinit_fifos(p);
|
|
|
|
p->ier |= UART_IER_RLSI | UART_IER_RDI;
|
|
- serial_port_out(&p->port, UART_IER, p->ier);
|
|
+ serial8250_set_IER(p, p->ier);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx);
|
|
@@ -1710,7 +1732,7 @@ static void serial8250_disable_ms(struct uart_port *port)
|
|
mctrl_gpio_disable_ms(up->gpios);
|
|
|
|
up->ier &= ~UART_IER_MSI;
|
|
- serial_port_out(port, UART_IER, up->ier);
|
|
+ serial8250_set_IER(up, up->ier);
|
|
}
|
|
|
|
static void serial8250_enable_ms(struct uart_port *port)
|
|
@@ -1726,7 +1748,7 @@ static void serial8250_enable_ms(struct uart_port *port)
|
|
up->ier |= UART_IER_MSI;
|
|
|
|
serial8250_rpm_get(up);
|
|
- serial_port_out(port, UART_IER, up->ier);
|
|
+ serial8250_set_IER(up, up->ier);
|
|
serial8250_rpm_put(up);
|
|
}
|
|
|
|
@@ -2176,9 +2198,10 @@ static void serial8250_put_poll_char(struct uart_port *port,
|
|
serial8250_rpm_get(up);
|
|
/*
|
|
* First save the IER then disable the interrupts
|
|
+ *
|
|
+ * Best-effort IER access because other CPUs are quiesced.
|
|
*/
|
|
- ier = serial_port_in(port, UART_IER);
|
|
- serial8250_clear_IER(up);
|
|
+ __serial8250_clear_IER(up, NULL, &ier);
|
|
|
|
wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
|
|
/*
|
|
@@ -2191,7 +2214,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
|
|
* and restore the IER
|
|
*/
|
|
wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
|
|
- serial_port_out(port, UART_IER, ier);
|
|
+ __serial8250_set_IER(up, NULL, ier);
|
|
serial8250_rpm_put(up);
|
|
}
|
|
|
|
@@ -2202,6 +2225,7 @@ int serial8250_do_startup(struct uart_port *port)
|
|
struct uart_8250_port *up = up_to_u8250p(port);
|
|
unsigned long flags;
|
|
unsigned char iir;
|
|
+ bool is_console;
|
|
int retval;
|
|
u16 lsr;
|
|
|
|
@@ -2219,21 +2243,25 @@ int serial8250_do_startup(struct uart_port *port)
|
|
serial8250_rpm_get(up);
|
|
if (port->type == PORT_16C950) {
|
|
/* Wake up and initialize UART */
|
|
+ spin_lock_irqsave(&port->lock, flags);
|
|
up->acr = 0;
|
|
serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B);
|
|
serial_port_out(port, UART_EFR, UART_EFR_ECB);
|
|
- serial_port_out(port, UART_IER, 0);
|
|
+ serial8250_set_IER(up, 0);
|
|
serial_port_out(port, UART_LCR, 0);
|
|
serial_icr_write(up, UART_CSR, 0); /* Reset the UART */
|
|
serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B);
|
|
serial_port_out(port, UART_EFR, UART_EFR_ECB);
|
|
serial_port_out(port, UART_LCR, 0);
|
|
+ spin_unlock_irqrestore(&port->lock, flags);
|
|
}
|
|
|
|
if (port->type == PORT_DA830) {
|
|
/* Reset the port */
|
|
- serial_port_out(port, UART_IER, 0);
|
|
+ spin_lock_irqsave(&port->lock, flags);
|
|
+ serial8250_set_IER(up, 0);
|
|
serial_port_out(port, UART_DA830_PWREMU_MGMT, 0);
|
|
+ spin_unlock_irqrestore(&port->lock, flags);
|
|
mdelay(10);
|
|
|
|
/* Enable Tx, Rx and free run mode */
|
|
@@ -2331,6 +2359,8 @@ int serial8250_do_startup(struct uart_port *port)
|
|
if (retval)
|
|
goto out;
|
|
|
|
+ is_console = serial8250_is_console(port);
|
|
+
|
|
if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) {
|
|
unsigned char iir1;
|
|
|
|
@@ -2347,6 +2377,9 @@ int serial8250_do_startup(struct uart_port *port)
|
|
*/
|
|
spin_lock_irqsave(&port->lock, flags);
|
|
|
|
+ if (is_console)
|
|
+ serial8250_enter_unsafe(up);
|
|
+
|
|
wait_for_xmitr(up, UART_LSR_THRE);
|
|
serial_port_out_sync(port, UART_IER, UART_IER_THRI);
|
|
udelay(1); /* allow THRE to set */
|
|
@@ -2357,6 +2390,9 @@ int serial8250_do_startup(struct uart_port *port)
|
|
iir = serial_port_in(port, UART_IIR);
|
|
serial_port_out(port, UART_IER, 0);
|
|
|
|
+ if (is_console)
|
|
+ serial8250_exit_unsafe(up);
|
|
+
|
|
spin_unlock_irqrestore(&port->lock, flags);
|
|
|
|
if (port->irqflags & IRQF_SHARED)
|
|
@@ -2411,10 +2447,14 @@ int serial8250_do_startup(struct uart_port *port)
|
|
* Do a quick test to see if we receive an interrupt when we enable
|
|
* the TX irq.
|
|
*/
|
|
+ if (is_console)
|
|
+ serial8250_enter_unsafe(up);
|
|
serial_port_out(port, UART_IER, UART_IER_THRI);
|
|
lsr = serial_port_in(port, UART_LSR);
|
|
iir = serial_port_in(port, UART_IIR);
|
|
serial_port_out(port, UART_IER, 0);
|
|
+ if (is_console)
|
|
+ serial8250_exit_unsafe(up);
|
|
|
|
if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) {
|
|
if (!(up->bugs & UART_BUG_TXEN)) {
|
|
@@ -2446,7 +2486,7 @@ int serial8250_do_startup(struct uart_port *port)
|
|
if (up->dma) {
|
|
const char *msg = NULL;
|
|
|
|
- if (uart_console(port))
|
|
+ if (is_console)
|
|
msg = "forbid DMA for kernel console";
|
|
else if (serial8250_request_dma(up))
|
|
msg = "failed to request DMA";
|
|
@@ -2497,7 +2537,7 @@ void serial8250_do_shutdown(struct uart_port *port)
|
|
*/
|
|
spin_lock_irqsave(&port->lock, flags);
|
|
up->ier = 0;
|
|
- serial_port_out(port, UART_IER, 0);
|
|
+ serial8250_set_IER(up, 0);
|
|
spin_unlock_irqrestore(&port->lock, flags);
|
|
|
|
synchronize_irq(port->irq);
|
|
@@ -2863,7 +2903,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
|
|
if (up->capabilities & UART_CAP_RTOIE)
|
|
up->ier |= UART_IER_RTOIE;
|
|
|
|
- serial_port_out(port, UART_IER, up->ier);
|
|
+ serial8250_set_IER(up, up->ier);
|
|
|
|
if (up->capabilities & UART_CAP_EFR) {
|
|
unsigned char efr = 0;
|
|
@@ -3328,12 +3368,21 @@ EXPORT_SYMBOL_GPL(serial8250_set_defaults);
|
|
|
|
#ifdef CONFIG_SERIAL_8250_CONSOLE
|
|
|
|
-static void serial8250_console_putchar(struct uart_port *port, unsigned char ch)
|
|
+static bool serial8250_console_putchar(struct uart_port *port, unsigned char ch,
|
|
+ struct cons_write_context *wctxt)
|
|
{
|
|
struct uart_8250_port *up = up_to_u8250p(port);
|
|
|
|
wait_for_xmitr(up, UART_LSR_THRE);
|
|
+ if (!console_can_proceed(wctxt))
|
|
+ return false;
|
|
serial_port_out(port, UART_TX, ch);
|
|
+ if (ch == '\n')
|
|
+ up->console_newline_needed = false;
|
|
+ else
|
|
+ up->console_newline_needed = true;
|
|
+
|
|
+ return true;
|
|
}
|
|
|
|
/*
|
|
@@ -3362,33 +3411,119 @@ static void serial8250_console_restore(struct uart_8250_port *up)
|
|
serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
|
|
}
|
|
|
|
-/*
|
|
- * Print a string to the serial port using the device FIFO
|
|
- *
|
|
- * It sends fifosize bytes and then waits for the fifo
|
|
- * to get empty.
|
|
- */
|
|
-static void serial8250_console_fifo_write(struct uart_8250_port *up,
|
|
- const char *s, unsigned int count)
|
|
+static bool __serial8250_console_write(struct uart_port *port, struct cons_write_context *wctxt,
|
|
+ const char *s, unsigned int count,
|
|
+ bool (*putchar)(struct uart_port *, unsigned char, struct cons_write_context *))
|
|
{
|
|
- int i;
|
|
- const char *end = s + count;
|
|
- unsigned int fifosize = up->tx_loadsz;
|
|
- bool cr_sent = false;
|
|
+ bool finished = false;
|
|
+ unsigned int i;
|
|
|
|
- while (s != end) {
|
|
- wait_for_lsr(up, UART_LSR_THRE);
|
|
-
|
|
- for (i = 0; i < fifosize && s != end; ++i) {
|
|
- if (*s == '\n' && !cr_sent) {
|
|
- serial_out(up, UART_TX, '\r');
|
|
- cr_sent = true;
|
|
- } else {
|
|
- serial_out(up, UART_TX, *s++);
|
|
- cr_sent = false;
|
|
- }
|
|
+ for (i = 0; i < count; i++, s++) {
|
|
+ if (*s == '\n') {
|
|
+ if (!putchar(port, '\r', wctxt))
|
|
+ goto out;
|
|
}
|
|
+ if (!putchar(port, *s, wctxt))
|
|
+ goto out;
|
|
}
|
|
+ finished = true;
|
|
+out:
|
|
+ return finished;
|
|
+}
|
|
+
|
|
+static bool serial8250_console_write(struct uart_port *port, struct cons_write_context *wctxt,
|
|
+ const char *s, unsigned int count,
|
|
+ bool (*putchar)(struct uart_port *, unsigned char, struct cons_write_context *))
|
|
+{
|
|
+ return __serial8250_console_write(port, wctxt, s, count, putchar);
|
|
+}
|
|
+
|
|
+static bool atomic_print_line(struct uart_8250_port *up,
|
|
+ struct cons_write_context *wctxt)
|
|
+{
|
|
+ struct uart_port *port = &up->port;
|
|
+
|
|
+ if (up->console_newline_needed &&
|
|
+ !__serial8250_console_write(port, wctxt, "\n", 1, serial8250_console_putchar)) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ return __serial8250_console_write(port, wctxt, wctxt->outbuf, wctxt->len,
|
|
+ serial8250_console_putchar);
|
|
+}
|
|
+
|
|
+static void atomic_console_reacquire(struct cons_write_context *wctxt,
|
|
+ struct cons_write_context *wctxt_init)
|
|
+{
|
|
+ memcpy(wctxt, wctxt_init, sizeof(*wctxt));
|
|
+ while (!console_try_acquire(wctxt)) {
|
|
+ cpu_relax();
|
|
+ memcpy(wctxt, wctxt_init, sizeof(*wctxt));
|
|
+ }
|
|
+}
|
|
+
|
|
+bool serial8250_console_write_atomic(struct uart_8250_port *up,
|
|
+ struct cons_write_context *wctxt)
|
|
+{
|
|
+ struct cons_write_context wctxt_init = { };
|
|
+ struct cons_context *ctxt_init = &ACCESS_PRIVATE(&wctxt_init, ctxt);
|
|
+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
|
|
+ bool finished = false;
|
|
+ unsigned int ier;
|
|
+
|
|
+ touch_nmi_watchdog();
|
|
+
|
|
+ /* With write_atomic, another context may hold the port->lock. */
|
|
+
|
|
+ ctxt_init->console = ctxt->console;
|
|
+ ctxt_init->prio = ctxt->prio;
|
|
+ ctxt_init->thread = ctxt->thread;
|
|
+
|
|
+ /*
|
|
+ * Enter unsafe in order to disable interrupts. If the console is
|
|
+ * lost before the interrupts are disabled, bail out because another
|
|
+ * context took over the printing. If the console is lost after the
|
|
+ * interrutps are disabled, the console must be reacquired in order
|
|
+ * to re-enable the interrupts. However in that case no printing is
|
|
+ * allowed because another context took over the printing.
|
|
+ */
|
|
+
|
|
+ if (!console_enter_unsafe(wctxt))
|
|
+ return false;
|
|
+
|
|
+ if (!__serial8250_clear_IER(up, wctxt, &ier))
|
|
+ return false;
|
|
+
|
|
+ if (!console_exit_unsafe(wctxt)) {
|
|
+ atomic_console_reacquire(wctxt, &wctxt_init);
|
|
+ goto enable_irq;
|
|
+ }
|
|
+
|
|
+ if (!atomic_print_line(up, wctxt)) {
|
|
+ atomic_console_reacquire(wctxt, &wctxt_init);
|
|
+ goto enable_irq;
|
|
+ }
|
|
+
|
|
+ wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
|
|
+ finished = true;
|
|
+enable_irq:
|
|
+ /*
|
|
+ * Enter unsafe in order to enable interrupts. If the console is
|
|
+ * lost before the interrupts are enabled, the console must be
|
|
+ * reacquired in order to re-enable the interrupts.
|
|
+ */
|
|
+ for (;;) {
|
|
+ if (console_enter_unsafe(wctxt) &&
|
|
+ __serial8250_set_IER(up, wctxt, ier)) {
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* HW-IRQs still disabled. Reacquire to enable them. */
|
|
+ atomic_console_reacquire(wctxt, &wctxt_init);
|
|
+ }
|
|
+ console_exit_unsafe(wctxt);
|
|
+
|
|
+ return finished;
|
|
}
|
|
|
|
/*
|
|
@@ -3400,78 +3535,116 @@ static void serial8250_console_fifo_write(struct uart_8250_port *up,
|
|
* Doing runtime PM is really a bad idea for the kernel console.
|
|
* Thus, we assume the function is called when device is powered up.
|
|
*/
|
|
-void serial8250_console_write(struct uart_8250_port *up, const char *s,
|
|
- unsigned int count)
|
|
+bool serial8250_console_write_thread(struct uart_8250_port *up,
|
|
+ struct cons_write_context *wctxt)
|
|
{
|
|
+ struct cons_write_context wctxt_init = { };
|
|
+ struct cons_context *ctxt_init = &ACCESS_PRIVATE(&wctxt_init, ctxt);
|
|
+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
|
|
struct uart_8250_em485 *em485 = up->em485;
|
|
struct uart_port *port = &up->port;
|
|
- unsigned long flags;
|
|
- unsigned int ier, use_fifo;
|
|
- int locked = 1;
|
|
+ unsigned int count = wctxt->len;
|
|
+ const char *s = wctxt->outbuf;
|
|
+ bool rs485_started = false;
|
|
+ bool finished = false;
|
|
+ unsigned int ier;
|
|
|
|
- touch_nmi_watchdog();
|
|
-
|
|
- if (oops_in_progress)
|
|
- locked = spin_trylock_irqsave(&port->lock, flags);
|
|
- else
|
|
- spin_lock_irqsave(&port->lock, flags);
|
|
+ ctxt_init->console = ctxt->console;
|
|
+ ctxt_init->prio = ctxt->prio;
|
|
+ ctxt_init->thread = ctxt->thread;
|
|
|
|
/*
|
|
- * First save the IER then disable the interrupts
|
|
+ * Enter unsafe in order to disable interrupts. If the console is
|
|
+ * lost before the interrupts are disabled, bail out because another
|
|
+ * context took over the printing. If the console is lost after the
|
|
+ * interrutps are disabled, the console must be reacquired in order
|
|
+ * to re-enable the interrupts. However in that case no printing is
|
|
+ * allowed because another context took over the printing.
|
|
*/
|
|
- ier = serial_port_in(port, UART_IER);
|
|
- serial8250_clear_IER(up);
|
|
+
|
|
+ if (!console_enter_unsafe(wctxt))
|
|
+ return false;
|
|
+
|
|
+ if (!__serial8250_clear_IER(up, wctxt, &ier))
|
|
+ return false;
|
|
+
|
|
+ if (!console_exit_unsafe(wctxt)) {
|
|
+ atomic_console_reacquire(wctxt, &wctxt_init);
|
|
+ goto enable_irq;
|
|
+ }
|
|
|
|
/* check scratch reg to see if port powered off during system sleep */
|
|
if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
|
|
+ if (!console_enter_unsafe(wctxt)) {
|
|
+ atomic_console_reacquire(wctxt, &wctxt_init);
|
|
+ goto enable_irq;
|
|
+ }
|
|
serial8250_console_restore(up);
|
|
+ if (!console_exit_unsafe(wctxt)) {
|
|
+ atomic_console_reacquire(wctxt, &wctxt_init);
|
|
+ goto enable_irq;
|
|
+ }
|
|
up->canary = 0;
|
|
}
|
|
|
|
if (em485) {
|
|
- if (em485->tx_stopped)
|
|
+ if (em485->tx_stopped) {
|
|
+ if (!console_enter_unsafe(wctxt)) {
|
|
+ atomic_console_reacquire(wctxt, &wctxt_init);
|
|
+ goto enable_irq;
|
|
+ }
|
|
up->rs485_start_tx(up);
|
|
- mdelay(port->rs485.delay_rts_before_send);
|
|
+ rs485_started = true;
|
|
+ if (!console_exit_unsafe(wctxt)) {
|
|
+ atomic_console_reacquire(wctxt, &wctxt_init);
|
|
+ goto enable_irq;
|
|
+ }
|
|
+ }
|
|
+ if (port->rs485.delay_rts_before_send) {
|
|
+ mdelay(port->rs485.delay_rts_before_send);
|
|
+ if (!console_can_proceed(wctxt)) {
|
|
+ atomic_console_reacquire(wctxt, &wctxt_init);
|
|
+ goto enable_irq;
|
|
+ }
|
|
+ }
|
|
}
|
|
|
|
- use_fifo = (up->capabilities & UART_CAP_FIFO) &&
|
|
- /*
|
|
- * BCM283x requires to check the fifo
|
|
- * after each byte.
|
|
- */
|
|
- !(up->capabilities & UART_CAP_MINI) &&
|
|
- /*
|
|
- * tx_loadsz contains the transmit fifo size
|
|
- */
|
|
- up->tx_loadsz > 1 &&
|
|
- (up->fcr & UART_FCR_ENABLE_FIFO) &&
|
|
- port->state &&
|
|
- test_bit(TTY_PORT_INITIALIZED, &port->state->port.iflags) &&
|
|
- /*
|
|
- * After we put a data in the fifo, the controller will send
|
|
- * it regardless of the CTS state. Therefore, only use fifo
|
|
- * if we don't use control flow.
|
|
- */
|
|
- !(up->port.flags & UPF_CONS_FLOW);
|
|
+ if (!serial8250_console_write(port, wctxt, s, count, serial8250_console_putchar)) {
|
|
+ atomic_console_reacquire(wctxt, &wctxt_init);
|
|
+ goto enable_irq;
|
|
+ }
|
|
|
|
- if (likely(use_fifo))
|
|
- serial8250_console_fifo_write(up, s, count);
|
|
- else
|
|
- uart_console_write(port, s, count, serial8250_console_putchar);
|
|
-
|
|
- /*
|
|
- * Finally, wait for transmitter to become empty
|
|
- * and restore the IER
|
|
- */
|
|
wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
|
|
-
|
|
+ finished = true;
|
|
+enable_irq:
|
|
+ /*
|
|
+ * Enter unsafe in order to stop rs485_tx. If the console is
|
|
+ * lost before the rs485_tx is stopped, the console must be
|
|
+ * reacquired in order to stop rs485_tx.
|
|
+ */
|
|
if (em485) {
|
|
mdelay(port->rs485.delay_rts_after_send);
|
|
- if (em485->tx_stopped)
|
|
+ if (em485->tx_stopped && rs485_started) {
|
|
+ while (!console_enter_unsafe(wctxt))
|
|
+ atomic_console_reacquire(wctxt, &wctxt_init);
|
|
up->rs485_stop_tx(up);
|
|
+ if (!console_exit_unsafe(wctxt))
|
|
+ atomic_console_reacquire(wctxt, &wctxt_init);
|
|
+ }
|
|
}
|
|
|
|
- serial_port_out(port, UART_IER, ier);
|
|
+ /*
|
|
+ * Enter unsafe in order to enable interrupts. If the console is
|
|
+ * lost before the interrupts are enabled, the console must be
|
|
+ * reacquired in order to re-enable the interrupts.
|
|
+ */
|
|
+ for (;;) {
|
|
+ if (console_enter_unsafe(wctxt) &&
|
|
+ __serial8250_set_IER(up, wctxt, ier)) {
|
|
+ break;
|
|
+ }
|
|
+ atomic_console_reacquire(wctxt, &wctxt_init);
|
|
+ }
|
|
|
|
/*
|
|
* The receive handling will happen properly because the
|
|
@@ -3483,8 +3656,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
|
|
if (up->msr_saved_flags)
|
|
serial8250_modem_status(up);
|
|
|
|
- if (locked)
|
|
- spin_unlock_irqrestore(&port->lock, flags);
|
|
+ console_exit_unsafe(wctxt);
|
|
+
|
|
+ return finished;
|
|
}
|
|
|
|
static unsigned int probe_baud(struct uart_port *port)
|
|
@@ -3504,6 +3678,7 @@ static unsigned int probe_baud(struct uart_port *port)
|
|
|
|
int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
|
|
{
|
|
+ struct uart_8250_port *up = up_to_u8250p(port);
|
|
int baud = 9600;
|
|
int bits = 8;
|
|
int parity = 'n';
|
|
@@ -3513,6 +3688,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
|
|
if (!port->iobase && !port->membase)
|
|
return -ENODEV;
|
|
|
|
+ up->console_newline_needed = false;
|
|
+
|
|
if (options)
|
|
uart_parse_options(options, &baud, &parity, &bits, &flow);
|
|
else if (probe)
|
|
diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
|
|
index 5313aa31930f4..16715f01bdb5a 100644
|
|
--- a/drivers/tty/serial/8250/Kconfig
|
|
+++ b/drivers/tty/serial/8250/Kconfig
|
|
@@ -9,6 +9,7 @@ config SERIAL_8250
|
|
depends on !S390
|
|
select SERIAL_CORE
|
|
select SERIAL_MCTRL_GPIO if GPIOLIB
|
|
+ select HAVE_ATOMIC_CONSOLE
|
|
help
|
|
This selects whether you want to include the driver for the standard
|
|
serial ports. The standard answer is Y. People who might say N
|
|
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
|
|
index d8c2f3455eeba..a4e142ac6ec46 100644
|
|
--- a/drivers/tty/serial/amba-pl011.c
|
|
+++ b/drivers/tty/serial/amba-pl011.c
|
|
@@ -2319,18 +2319,24 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
|
|
{
|
|
struct uart_amba_port *uap = amba_ports[co->index];
|
|
unsigned int old_cr = 0, new_cr;
|
|
- unsigned long flags;
|
|
+ unsigned long flags = 0;
|
|
int locked = 1;
|
|
|
|
clk_enable(uap->clk);
|
|
|
|
- local_irq_save(flags);
|
|
+ /*
|
|
+ * local_irq_save(flags);
|
|
+ *
|
|
+ * This local_irq_save() is nonsense. If we come in via sysrq
|
|
+ * handling then interrupts are already disabled. Aside of
|
|
+ * that the port.sysrq check is racy on SMP regardless.
|
|
+ */
|
|
if (uap->port.sysrq)
|
|
locked = 0;
|
|
else if (oops_in_progress)
|
|
- locked = spin_trylock(&uap->port.lock);
|
|
+ locked = spin_trylock_irqsave(&uap->port.lock, flags);
|
|
else
|
|
- spin_lock(&uap->port.lock);
|
|
+ spin_lock_irqsave(&uap->port.lock, flags);
|
|
|
|
/*
|
|
* First save the CR then disable the interrupts
|
|
@@ -2356,8 +2362,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
|
|
pl011_write(old_cr, uap, REG_CR);
|
|
|
|
if (locked)
|
|
- spin_unlock(&uap->port.lock);
|
|
- local_irq_restore(flags);
|
|
+ spin_unlock_irqrestore(&uap->port.lock, flags);
|
|
|
|
clk_disable(uap->clk);
|
|
}
|
|
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
|
|
index 82d35dbbfa6cb..511cf17d87958 100644
|
|
--- a/drivers/tty/serial/omap-serial.c
|
|
+++ b/drivers/tty/serial/omap-serial.c
|
|
@@ -1219,13 +1219,10 @@ serial_omap_console_write(struct console *co, const char *s,
|
|
unsigned int ier;
|
|
int locked = 1;
|
|
|
|
- local_irq_save(flags);
|
|
- if (up->port.sysrq)
|
|
- locked = 0;
|
|
- else if (oops_in_progress)
|
|
- locked = spin_trylock(&up->port.lock);
|
|
+ if (up->port.sysrq || oops_in_progress)
|
|
+ locked = spin_trylock_irqsave(&up->port.lock, flags);
|
|
else
|
|
- spin_lock(&up->port.lock);
|
|
+ spin_lock_irqsave(&up->port.lock, flags);
|
|
|
|
/*
|
|
* First save the IER then disable the interrupts
|
|
@@ -1252,8 +1249,7 @@ serial_omap_console_write(struct console *co, const char *s,
|
|
check_modem_status(up);
|
|
|
|
if (locked)
|
|
- spin_unlock(&up->port.lock);
|
|
- local_irq_restore(flags);
|
|
+ spin_unlock_irqrestore(&up->port.lock, flags);
|
|
}
|
|
|
|
static int __init
|
|
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
|
|
index 728cb72be0666..409892b777d16 100644
|
|
--- a/drivers/tty/serial/serial_core.c
|
|
+++ b/drivers/tty/serial/serial_core.c
|
|
@@ -2336,8 +2336,11 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
|
|
* able to Re-start_rx later.
|
|
*/
|
|
if (!console_suspend_enabled && uart_console(uport)) {
|
|
- if (uport->ops->start_rx)
|
|
+ if (uport->ops->start_rx) {
|
|
+ spin_lock_irq(&uport->lock);
|
|
uport->ops->stop_rx(uport);
|
|
+ spin_unlock_irq(&uport->lock);
|
|
+ }
|
|
goto unlock;
|
|
}
|
|
|
|
@@ -2430,8 +2433,11 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
|
|
if (console_suspend_enabled)
|
|
uart_change_pm(state, UART_PM_STATE_ON);
|
|
uport->ops->set_termios(uport, &termios, NULL);
|
|
- if (!console_suspend_enabled && uport->ops->start_rx)
|
|
+ if (!console_suspend_enabled && uport->ops->start_rx) {
|
|
+ spin_lock_irq(&uport->lock);
|
|
uport->ops->start_rx(uport);
|
|
+ spin_unlock_irq(&uport->lock);
|
|
+ }
|
|
if (console_suspend_enabled)
|
|
console_start(uport->cons);
|
|
}
|
|
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
|
|
index 8e3de07f103da..af933c7a61d98 100644
|
|
--- a/drivers/tty/tty_io.c
|
|
+++ b/drivers/tty/tty_io.c
|
|
@@ -3543,8 +3543,13 @@ static ssize_t show_cons_active(struct device *dev,
|
|
for_each_console(c) {
|
|
if (!c->device)
|
|
continue;
|
|
- if (!c->write)
|
|
- continue;
|
|
+ if (c->flags & CON_NO_BKL) {
|
|
+ if (!(c->write_thread || c->write_atomic))
|
|
+ continue;
|
|
+ } else {
|
|
+ if (!c->write)
|
|
+ continue;
|
|
+ }
|
|
if ((c->flags & CON_ENABLED) == 0)
|
|
continue;
|
|
cs[i++] = c;
|
|
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
|
|
index e0758fe7936dc..ab9f42d478c8c 100644
|
|
--- a/fs/proc/consoles.c
|
|
+++ b/fs/proc/consoles.c
|
|
@@ -21,12 +21,14 @@ static int show_console_dev(struct seq_file *m, void *v)
|
|
{ CON_ENABLED, 'E' },
|
|
{ CON_CONSDEV, 'C' },
|
|
{ CON_BOOT, 'B' },
|
|
+ { CON_NO_BKL, 'N' },
|
|
{ CON_PRINTBUFFER, 'p' },
|
|
{ CON_BRL, 'b' },
|
|
{ CON_ANYTIME, 'a' },
|
|
};
|
|
char flags[ARRAY_SIZE(con_flags) + 1];
|
|
struct console *con = v;
|
|
+ char con_write = '-';
|
|
unsigned int a;
|
|
dev_t dev = 0;
|
|
|
|
@@ -57,9 +59,15 @@ static int show_console_dev(struct seq_file *m, void *v)
|
|
seq_setwidth(m, 21 - 1);
|
|
seq_printf(m, "%s%d", con->name, con->index);
|
|
seq_pad(m, ' ');
|
|
- seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-',
|
|
- con->write ? 'W' : '-', con->unblank ? 'U' : '-',
|
|
- flags);
|
|
+ if (con->flags & CON_NO_BKL) {
|
|
+ if (con->write_thread || con->write_atomic)
|
|
+ con_write = 'W';
|
|
+ } else {
|
|
+ if (con->write)
|
|
+ con_write = 'W';
|
|
+ }
|
|
+ seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', con_write,
|
|
+ con->unblank ? 'U' : '-', flags);
|
|
if (dev)
|
|
seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev));
|
|
|
|
diff --git a/include/linux/console.h b/include/linux/console.h
|
|
index d3195664baa5a..1e9d5bc8fa76e 100644
|
|
--- a/include/linux/console.h
|
|
+++ b/include/linux/console.h
|
|
@@ -16,7 +16,9 @@
|
|
|
|
#include <linux/atomic.h>
|
|
#include <linux/bits.h>
|
|
+#include <linux/irq_work.h>
|
|
#include <linux/rculist.h>
|
|
+#include <linux/rcuwait.h>
|
|
#include <linux/types.h>
|
|
|
|
struct vc_data;
|
|
@@ -154,6 +156,10 @@ static inline int con_debug_leave(void)
|
|
* receiving the printk spam for obvious reasons.
|
|
* @CON_EXTENDED: The console supports the extended output format of
|
|
* /dev/kmesg which requires a larger output buffer.
|
|
+ * @CON_SUSPENDED: Indicates if a console is suspended. If true, the
|
|
+ * printing callbacks must not be called.
|
|
+ * @CON_NO_BKL: Console can operate outside of the BKL style console_lock
|
|
+ * constraints.
|
|
*/
|
|
enum cons_flags {
|
|
CON_PRINTBUFFER = BIT(0),
|
|
@@ -163,8 +169,133 @@ enum cons_flags {
|
|
CON_ANYTIME = BIT(4),
|
|
CON_BRL = BIT(5),
|
|
CON_EXTENDED = BIT(6),
|
|
+ CON_SUSPENDED = BIT(7),
|
|
+ CON_NO_BKL = BIT(8),
|
|
};
|
|
|
|
+/**
|
|
+ * struct cons_state - console state for NOBKL consoles
|
|
+ * @atom: Compound of the state fields for atomic operations
|
|
+ * @seq: Sequence for record tracking (64bit only)
|
|
+ * @bits: Compound of the state bits below
|
|
+ *
|
|
+ * @locked: Console is locked by a writer
|
|
+ * @unsafe: Console is busy in a non takeover region
|
|
+ * @thread: Current owner is the printk thread
|
|
+ * @cur_prio: The priority of the current output
|
|
+ * @req_prio: The priority of a handover request
|
|
+ * @cpu: The CPU on which the writer runs
|
|
+ *
|
|
+ * To be used for state read and preparation of atomic_long_cmpxchg()
|
|
+ * operations.
|
|
+ *
|
|
+ * The @req_prio field is particularly important to allow spin-waiting to
|
|
+ * timeout and give up without the risk of it being assigned the lock
|
|
+ * after giving up. The @req_prio field has a nice side-effect that it
|
|
+ * also makes it possible for a single read+cmpxchg in the common case of
|
|
+ * acquire and release.
|
|
+ */
|
|
+struct cons_state {
|
|
+ union {
|
|
+ unsigned long atom;
|
|
+ struct {
|
|
+#ifdef CONFIG_64BIT
|
|
+ u32 seq;
|
|
+#endif
|
|
+ union {
|
|
+ u32 bits;
|
|
+ struct {
|
|
+ u32 locked : 1;
|
|
+ u32 unsafe : 1;
|
|
+ u32 thread : 1;
|
|
+ u32 cur_prio : 2;
|
|
+ u32 req_prio : 2;
|
|
+ u32 cpu : 18;
|
|
+ };
|
|
+ };
|
|
+ };
|
|
+ };
|
|
+};
|
|
+
|
|
+/**
|
|
+ * cons_prio - console writer priority for NOBKL consoles
|
|
+ * @CONS_PRIO_NONE: Unused
|
|
+ * @CONS_PRIO_NORMAL: Regular printk
|
|
+ * @CONS_PRIO_EMERGENCY: Emergency output (WARN/OOPS...)
|
|
+ * @CONS_PRIO_PANIC: Panic output
|
|
+ * @CONS_PRIO_MAX: The number of priority levels
|
|
+ *
|
|
+ * Emergency output can carefully takeover the console even without consent
|
|
+ * of the owner, ideally only when @cons_state::unsafe is not set. Panic
|
|
+ * output can ignore the unsafe flag as a last resort. If panic output is
|
|
+ * active no takeover is possible until the panic output releases the
|
|
+ * console.
|
|
+ */
|
|
+enum cons_prio {
|
|
+ CONS_PRIO_NONE = 0,
|
|
+ CONS_PRIO_NORMAL,
|
|
+ CONS_PRIO_EMERGENCY,
|
|
+ CONS_PRIO_PANIC,
|
|
+ CONS_PRIO_MAX,
|
|
+};
|
|
+
|
|
+struct console;
|
|
+struct printk_buffers;
|
|
+
|
|
+/**
|
|
+ * struct cons_context - Context for console acquire/release
|
|
+ * @console: The associated console
|
|
+ * @state: The state at acquire time
|
|
+ * @old_state: The old state when try_acquire() failed for analysis
|
|
+ * by the caller
|
|
+ * @hov_state: The handover state for spin and cleanup
|
|
+ * @req_state: The request state for spin and cleanup
|
|
+ * @spinwait_max_us: Limit for spinwait acquire
|
|
+ * @oldseq: The sequence number at acquire()
|
|
+ * @newseq: The sequence number for progress
|
|
+ * @prio: Priority of the context
|
|
+ * @pbufs: Pointer to the text buffer for this context
|
|
+ * @dropped: Dropped counter for the current context
|
|
+ * @thread: The acquire is printk thread context
|
|
+ * @hostile: Hostile takeover requested. Cleared on normal
|
|
+ * acquire or friendly handover
|
|
+ * @spinwait: Spinwait on acquire if possible
|
|
+ * @backlog: Ringbuffer has pending records
|
|
+ */
|
|
+struct cons_context {
|
|
+ struct console *console;
|
|
+ struct cons_state state;
|
|
+ struct cons_state old_state;
|
|
+ struct cons_state hov_state;
|
|
+ struct cons_state req_state;
|
|
+ u64 oldseq;
|
|
+ u64 newseq;
|
|
+ unsigned int spinwait_max_us;
|
|
+ enum cons_prio prio;
|
|
+ struct printk_buffers *pbufs;
|
|
+ unsigned long dropped;
|
|
+ unsigned int thread : 1;
|
|
+ unsigned int hostile : 1;
|
|
+ unsigned int spinwait : 1;
|
|
+ unsigned int backlog : 1;
|
|
+};
|
|
+
|
|
+/**
|
|
+ * struct cons_write_context - Context handed to the write callbacks
|
|
+ * @ctxt: The core console context
|
|
+ * @outbuf: Pointer to the text buffer for output
|
|
+ * @len: Length to write
|
|
+ * @unsafe: Invoked in unsafe state due to force takeover
|
|
+ */
|
|
+struct cons_write_context {
|
|
+ struct cons_context __private ctxt;
|
|
+ char *outbuf;
|
|
+ unsigned int len;
|
|
+ bool unsafe;
|
|
+};
|
|
+
|
|
+struct cons_context_data;
|
|
+
|
|
/**
|
|
* struct console - The console descriptor structure
|
|
* @name: The name of the console driver
|
|
@@ -184,6 +315,18 @@ enum cons_flags {
|
|
* @dropped: Number of unreported dropped ringbuffer records
|
|
* @data: Driver private data
|
|
* @node: hlist node for the console list
|
|
+ *
|
|
+ * @atomic_state: State array for NOBKL consoles; real and handover
|
|
+ * @atomic_seq: Sequence for record tracking (32bit only)
|
|
+ * @thread_pbufs: Pointer to thread private buffer
|
|
+ * @kthread: Pointer to kernel thread
|
|
+ * @rcuwait: RCU wait for the kernel thread
|
|
+ * @irq_work: IRQ work for thread wakeup
|
|
+ * @kthread_waiting: Indicator whether the kthread is waiting to be woken
|
|
+ * @write_atomic: Write callback for atomic context
|
|
+ * @write_thread: Write callback for printk threaded printing
|
|
+ * @port_lock: Callback to lock/unlock the port lock
|
|
+ * @pcpu_data: Pointer to percpu context data
|
|
*/
|
|
struct console {
|
|
char name[16];
|
|
@@ -203,6 +346,23 @@ struct console {
|
|
unsigned long dropped;
|
|
void *data;
|
|
struct hlist_node node;
|
|
+
|
|
+ /* NOBKL console specific members */
|
|
+ atomic_long_t __private atomic_state[2];
|
|
+#ifndef CONFIG_64BIT
|
|
+ atomic_t __private atomic_seq;
|
|
+#endif
|
|
+ struct printk_buffers *thread_pbufs;
|
|
+ struct task_struct *kthread;
|
|
+ struct rcuwait rcuwait;
|
|
+ struct irq_work irq_work;
|
|
+ atomic_t kthread_waiting;
|
|
+
|
|
+ bool (*write_atomic)(struct console *con, struct cons_write_context *wctxt);
|
|
+ bool (*write_thread)(struct console *con, struct cons_write_context *wctxt);
|
|
+ void (*port_lock)(struct console *con, bool do_lock, unsigned long *flags);
|
|
+
|
|
+ struct cons_context_data __percpu *pcpu_data;
|
|
};
|
|
|
|
#ifdef CONFIG_LOCKDEP
|
|
@@ -329,6 +489,24 @@ static inline bool console_is_registered(const struct console *con)
|
|
lockdep_assert_console_list_lock_held(); \
|
|
hlist_for_each_entry(con, &console_list, node)
|
|
|
|
+#ifdef CONFIG_PRINTK
|
|
+extern enum cons_prio cons_atomic_enter(enum cons_prio prio);
|
|
+extern void cons_atomic_exit(enum cons_prio prio, enum cons_prio prev_prio);
|
|
+extern bool console_can_proceed(struct cons_write_context *wctxt);
|
|
+extern bool console_enter_unsafe(struct cons_write_context *wctxt);
|
|
+extern bool console_exit_unsafe(struct cons_write_context *wctxt);
|
|
+extern bool console_try_acquire(struct cons_write_context *wctxt);
|
|
+extern bool console_release(struct cons_write_context *wctxt);
|
|
+#else
|
|
+static inline enum cons_prio cons_atomic_enter(enum cons_prio prio) { return CONS_PRIO_NONE; }
|
|
+static inline void cons_atomic_exit(enum cons_prio prio, enum cons_prio prev_prio) { }
|
|
+static inline bool console_can_proceed(struct cons_write_context *wctxt) { return false; }
|
|
+static inline bool console_enter_unsafe(struct cons_write_context *wctxt) { return false; }
|
|
+static inline bool console_exit_unsafe(struct cons_write_context *wctxt) { return false; }
|
|
+static inline bool console_try_acquire(struct cons_write_context *wctxt) { return false; }
|
|
+static inline bool console_release(struct cons_write_context *wctxt) { return false; }
|
|
+#endif
|
|
+
|
|
extern int console_set_on_cmdline;
|
|
extern struct console *early_console;
|
|
|
|
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
|
|
index d95ab85f96ba5..3dc3704a3cdbb 100644
|
|
--- a/include/linux/entry-common.h
|
|
+++ b/include/linux/entry-common.h
|
|
@@ -57,9 +57,15 @@
|
|
# define ARCH_EXIT_TO_USER_MODE_WORK (0)
|
|
#endif
|
|
|
|
+#ifdef CONFIG_PREEMPT_LAZY
|
|
+# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
|
|
+#else
|
|
+# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED)
|
|
+#endif
|
|
+
|
|
#define EXIT_TO_USER_MODE_WORK \
|
|
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
|
|
- _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
|
|
+ _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
|
|
ARCH_EXIT_TO_USER_MODE_WORK)
|
|
|
|
/**
|
|
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
|
|
index a92bce40b04b3..bf82980f569df 100644
|
|
--- a/include/linux/interrupt.h
|
|
+++ b/include/linux/interrupt.h
|
|
@@ -605,6 +605,35 @@ extern void __raise_softirq_irqoff(unsigned int nr);
|
|
extern void raise_softirq_irqoff(unsigned int nr);
|
|
extern void raise_softirq(unsigned int nr);
|
|
|
|
+#ifdef CONFIG_PREEMPT_RT
|
|
+DECLARE_PER_CPU(struct task_struct *, timersd);
|
|
+DECLARE_PER_CPU(unsigned long, pending_timer_softirq);
|
|
+
|
|
+extern void raise_timer_softirq(void);
|
|
+extern void raise_hrtimer_softirq(void);
|
|
+
|
|
+static inline unsigned int local_pending_timers(void)
|
|
+{
|
|
+ return __this_cpu_read(pending_timer_softirq);
|
|
+}
|
|
+
|
|
+#else
|
|
+static inline void raise_timer_softirq(void)
|
|
+{
|
|
+ raise_softirq(TIMER_SOFTIRQ);
|
|
+}
|
|
+
|
|
+static inline void raise_hrtimer_softirq(void)
|
|
+{
|
|
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
|
+}
|
|
+
|
|
+static inline unsigned int local_pending_timers(void)
|
|
+{
|
|
+ return local_softirq_pending();
|
|
+}
|
|
+#endif
|
|
+
|
|
DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
|
|
|
|
static inline struct task_struct *this_cpu_ksoftirqd(void)
|
|
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
|
|
index 09d4f17c8d3b6..7376c1df9c901 100644
|
|
--- a/include/linux/io-mapping.h
|
|
+++ b/include/linux/io-mapping.h
|
|
@@ -69,7 +69,10 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping,
|
|
|
|
BUG_ON(offset >= mapping->size);
|
|
phys_addr = mapping->base + offset;
|
|
- preempt_disable();
|
|
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
+ preempt_disable();
|
|
+ else
|
|
+ migrate_disable();
|
|
pagefault_disable();
|
|
return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot);
|
|
}
|
|
@@ -79,7 +82,10 @@ io_mapping_unmap_atomic(void __iomem *vaddr)
|
|
{
|
|
kunmap_local_indexed((void __force *)vaddr);
|
|
pagefault_enable();
|
|
- preempt_enable();
|
|
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
+ preempt_enable();
|
|
+ else
|
|
+ migrate_enable();
|
|
}
|
|
|
|
static inline void __iomem *
|
|
@@ -162,7 +168,10 @@ static inline void __iomem *
|
|
io_mapping_map_atomic_wc(struct io_mapping *mapping,
|
|
unsigned long offset)
|
|
{
|
|
- preempt_disable();
|
|
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
+ preempt_disable();
|
|
+ else
|
|
+ migrate_disable();
|
|
pagefault_disable();
|
|
return io_mapping_map_wc(mapping, offset, PAGE_SIZE);
|
|
}
|
|
@@ -172,7 +181,10 @@ io_mapping_unmap_atomic(void __iomem *vaddr)
|
|
{
|
|
io_mapping_unmap(vaddr);
|
|
pagefault_enable();
|
|
- preempt_enable();
|
|
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
+ preempt_enable();
|
|
+ else
|
|
+ migrate_enable();
|
|
}
|
|
|
|
static inline void __iomem *
|
|
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
|
|
index c35f04f636f15..0c014424b1312 100644
|
|
--- a/include/linux/netdevice.h
|
|
+++ b/include/linux/netdevice.h
|
|
@@ -3202,7 +3202,11 @@ struct softnet_data {
|
|
int defer_count;
|
|
int defer_ipi_scheduled;
|
|
struct sk_buff *defer_list;
|
|
+#ifndef CONFIG_PREEMPT_RT
|
|
call_single_data_t defer_csd;
|
|
+#else
|
|
+ struct work_struct defer_work;
|
|
+#endif
|
|
};
|
|
|
|
static inline void input_queue_head_incr(struct softnet_data *sd)
|
|
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
|
|
index 0df425bf9bd75..05338f00a5907 100644
|
|
--- a/include/linux/preempt.h
|
|
+++ b/include/linux/preempt.h
|
|
@@ -196,6 +196,20 @@ extern void preempt_count_sub(int val);
|
|
#define preempt_count_inc() preempt_count_add(1)
|
|
#define preempt_count_dec() preempt_count_sub(1)
|
|
|
|
+#ifdef CONFIG_PREEMPT_LAZY
|
|
+#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
|
|
+#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
|
|
+#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
|
|
+#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
|
|
+#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
|
|
+#else
|
|
+#define add_preempt_lazy_count(val) do { } while (0)
|
|
+#define sub_preempt_lazy_count(val) do { } while (0)
|
|
+#define inc_preempt_lazy_count() do { } while (0)
|
|
+#define dec_preempt_lazy_count() do { } while (0)
|
|
+#define preempt_lazy_count() (0)
|
|
+#endif
|
|
+
|
|
#ifdef CONFIG_PREEMPT_COUNT
|
|
|
|
#define preempt_disable() \
|
|
@@ -204,6 +218,12 @@ do { \
|
|
barrier(); \
|
|
} while (0)
|
|
|
|
+#define preempt_lazy_disable() \
|
|
+do { \
|
|
+ inc_preempt_lazy_count(); \
|
|
+ barrier(); \
|
|
+} while (0)
|
|
+
|
|
#define sched_preempt_enable_no_resched() \
|
|
do { \
|
|
barrier(); \
|
|
@@ -218,15 +238,21 @@ do { \
|
|
#define preempt_enable() \
|
|
do { \
|
|
barrier(); \
|
|
- if (unlikely(preempt_count_dec_and_test())) \
|
|
+ if (unlikely(preempt_count_dec_and_test())) { \
|
|
+ instrumentation_begin(); \
|
|
__preempt_schedule(); \
|
|
+ instrumentation_end(); \
|
|
+ } \
|
|
} while (0)
|
|
|
|
#define preempt_enable_notrace() \
|
|
do { \
|
|
barrier(); \
|
|
- if (unlikely(__preempt_count_dec_and_test())) \
|
|
+ if (unlikely(__preempt_count_dec_and_test())) { \
|
|
+ instrumentation_begin(); \
|
|
__preempt_schedule_notrace(); \
|
|
+ instrumentation_end(); \
|
|
+ } \
|
|
} while (0)
|
|
|
|
#define preempt_check_resched() \
|
|
@@ -235,6 +261,18 @@ do { \
|
|
__preempt_schedule(); \
|
|
} while (0)
|
|
|
|
+/*
|
|
+ * open code preempt_check_resched() because it is not exported to modules and
|
|
+ * used by local_unlock() or bpf_enable_instrumentation().
|
|
+ */
|
|
+#define preempt_lazy_enable() \
|
|
+do { \
|
|
+ dec_preempt_lazy_count(); \
|
|
+ barrier(); \
|
|
+ if (should_resched(0)) \
|
|
+ __preempt_schedule(); \
|
|
+} while (0)
|
|
+
|
|
#else /* !CONFIG_PREEMPTION */
|
|
#define preempt_enable() \
|
|
do { \
|
|
@@ -242,6 +280,12 @@ do { \
|
|
preempt_count_dec(); \
|
|
} while (0)
|
|
|
|
+#define preempt_lazy_enable() \
|
|
+do { \
|
|
+ dec_preempt_lazy_count(); \
|
|
+ barrier(); \
|
|
+} while (0)
|
|
+
|
|
#define preempt_enable_notrace() \
|
|
do { \
|
|
barrier(); \
|
|
@@ -282,6 +326,9 @@ do { \
|
|
#define preempt_enable_notrace() barrier()
|
|
#define preemptible() 0
|
|
|
|
+#define preempt_lazy_disable() barrier()
|
|
+#define preempt_lazy_enable() barrier()
|
|
+
|
|
#endif /* CONFIG_PREEMPT_COUNT */
|
|
|
|
#ifdef MODULE
|
|
@@ -300,7 +347,7 @@ do { \
|
|
} while (0)
|
|
#define preempt_fold_need_resched() \
|
|
do { \
|
|
- if (tif_need_resched()) \
|
|
+ if (tif_need_resched_now()) \
|
|
set_preempt_need_resched(); \
|
|
} while (0)
|
|
|
|
@@ -416,8 +463,15 @@ extern void migrate_enable(void);
|
|
|
|
#else
|
|
|
|
-static inline void migrate_disable(void) { }
|
|
-static inline void migrate_enable(void) { }
|
|
+static inline void migrate_disable(void)
|
|
+{
|
|
+ preempt_lazy_disable();
|
|
+}
|
|
+
|
|
+static inline void migrate_enable(void)
|
|
+{
|
|
+ preempt_lazy_enable();
|
|
+}
|
|
|
|
#endif /* CONFIG_SMP */
|
|
|
|
diff --git a/include/linux/printk.h b/include/linux/printk.h
|
|
index 8ef499ab3c1ed..b55662624ff87 100644
|
|
--- a/include/linux/printk.h
|
|
+++ b/include/linux/printk.h
|
|
@@ -139,6 +139,7 @@ void early_printk(const char *s, ...) { }
|
|
#endif
|
|
|
|
struct dev_printk_info;
|
|
+struct cons_write_context;
|
|
|
|
#ifdef CONFIG_PRINTK
|
|
asmlinkage __printf(4, 0)
|
|
@@ -157,15 +158,17 @@ int _printk(const char *fmt, ...);
|
|
*/
|
|
__printf(1, 2) __cold int _printk_deferred(const char *fmt, ...);
|
|
|
|
-extern void __printk_safe_enter(void);
|
|
-extern void __printk_safe_exit(void);
|
|
+extern void __printk_safe_enter(unsigned long *flags);
|
|
+extern void __printk_safe_exit(unsigned long *flags);
|
|
+extern void __printk_deferred_enter(void);
|
|
+extern void __printk_deferred_exit(void);
|
|
/*
|
|
* The printk_deferred_enter/exit macros are available only as a hack for
|
|
* some code paths that need to defer all printk console printing. Interrupts
|
|
* must be disabled for the deferred duration.
|
|
*/
|
|
-#define printk_deferred_enter __printk_safe_enter
|
|
-#define printk_deferred_exit __printk_safe_exit
|
|
+#define printk_deferred_enter() __printk_deferred_enter()
|
|
+#define printk_deferred_exit() __printk_deferred_exit()
|
|
|
|
/*
|
|
* Please don't use printk_ratelimit(), because it shares ratelimiting state
|
|
@@ -192,6 +195,8 @@ void show_regs_print_info(const char *log_lvl);
|
|
extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
|
|
extern asmlinkage void dump_stack(void) __cold;
|
|
void printk_trigger_flush(void);
|
|
+extern void cons_atomic_flush(struct cons_write_context *printk_caller_wctxt,
|
|
+ bool skip_unsafe);
|
|
#else
|
|
static inline __printf(1, 0)
|
|
int vprintk(const char *s, va_list args)
|
|
@@ -271,6 +276,12 @@ static inline void dump_stack(void)
|
|
static inline void printk_trigger_flush(void)
|
|
{
|
|
}
|
|
+
|
|
+static inline void cons_atomic_flush(struct cons_write_context *printk_caller_wctxt,
|
|
+ bool skip_unsafe)
|
|
+{
|
|
+}
|
|
+
|
|
#endif
|
|
|
|
#ifdef CONFIG_SMP
|
|
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
|
index 63d242164b1a9..593d7e7c2e4c7 100644
|
|
--- a/include/linux/sched.h
|
|
+++ b/include/linux/sched.h
|
|
@@ -303,6 +303,11 @@ extern long schedule_timeout_idle(long timeout);
|
|
asmlinkage void schedule(void);
|
|
extern void schedule_preempt_disabled(void);
|
|
asmlinkage void preempt_schedule_irq(void);
|
|
+
|
|
+extern void sched_submit_work(void);
|
|
+extern void sched_resume_work(void);
|
|
+extern void schedule_rtmutex(void);
|
|
+
|
|
#ifdef CONFIG_PREEMPT_RT
|
|
extern void schedule_rtlock(void);
|
|
#endif
|
|
@@ -2059,6 +2064,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
|
|
return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
|
|
}
|
|
|
|
+#ifdef CONFIG_PREEMPT_LAZY
|
|
+static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
|
|
+{
|
|
+ set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
|
|
+}
|
|
+
|
|
+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
|
|
+{
|
|
+ clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
|
|
+}
|
|
+
|
|
+static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
|
|
+{
|
|
+ return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
|
|
+}
|
|
+
|
|
+static inline int need_resched_lazy(void)
|
|
+{
|
|
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
|
|
+}
|
|
+
|
|
+static inline int need_resched_now(void)
|
|
+{
|
|
+ return test_thread_flag(TIF_NEED_RESCHED);
|
|
+}
|
|
+
|
|
+#else
|
|
+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
|
|
+static inline int need_resched_lazy(void) { return 0; }
|
|
+
|
|
+static inline int need_resched_now(void)
|
|
+{
|
|
+ return test_thread_flag(TIF_NEED_RESCHED);
|
|
+}
|
|
+
|
|
+#endif
|
|
+
|
|
/*
|
|
* cond_resched() and cond_resched_lock(): latency reduction via
|
|
* explicit rescheduling in places that are safe. The return
|
|
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
|
|
index 20099268fa257..b38ce53576000 100644
|
|
--- a/include/linux/sched/signal.h
|
|
+++ b/include/linux/sched/signal.h
|
|
@@ -22,6 +22,7 @@ struct sighand_struct {
|
|
refcount_t count;
|
|
wait_queue_head_t signalfd_wqh;
|
|
struct k_sigaction action[_NSIG];
|
|
+ struct sigqueue *sigqueue_cache;
|
|
};
|
|
|
|
/*
|
|
@@ -135,7 +136,7 @@ struct signal_struct {
|
|
#ifdef CONFIG_POSIX_TIMERS
|
|
|
|
/* POSIX.1b Interval Timers */
|
|
- int posix_timer_id;
|
|
+ unsigned int next_posix_timer_id;
|
|
struct list_head posix_timers;
|
|
|
|
/* ITIMER_REAL timer for the process */
|
|
@@ -349,6 +350,7 @@ extern int send_sig(int, struct task_struct *, int);
|
|
extern int zap_other_threads(struct task_struct *p);
|
|
extern struct sigqueue *sigqueue_alloc(void);
|
|
extern void sigqueue_free(struct sigqueue *);
|
|
+extern void sigqueue_free_cached_entry(struct sigqueue *q);
|
|
extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type);
|
|
extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
|
|
|
|
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
|
|
index 19376bee96676..9055a22992edc 100644
|
|
--- a/include/linux/serial_8250.h
|
|
+++ b/include/linux/serial_8250.h
|
|
@@ -125,6 +125,8 @@ struct uart_8250_port {
|
|
#define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
|
|
unsigned char msr_saved_flags;
|
|
|
|
+ bool console_newline_needed;
|
|
+
|
|
struct uart_8250_dma *dma;
|
|
const struct uart_8250_ops *ops;
|
|
|
|
@@ -139,6 +141,9 @@ struct uart_8250_port {
|
|
/* Serial port overrun backoff */
|
|
struct delayed_work overrun_backoff;
|
|
u32 overrun_backoff_time_ms;
|
|
+
|
|
+ struct cons_write_context wctxt;
|
|
+ int cookie;
|
|
};
|
|
|
|
static inline struct uart_8250_port *up_to_u8250p(struct uart_port *up)
|
|
@@ -178,8 +183,10 @@ void serial8250_tx_chars(struct uart_8250_port *up);
|
|
unsigned int serial8250_modem_status(struct uart_8250_port *up);
|
|
void serial8250_init_port(struct uart_8250_port *up);
|
|
void serial8250_set_defaults(struct uart_8250_port *up);
|
|
-void serial8250_console_write(struct uart_8250_port *up, const char *s,
|
|
- unsigned int count);
|
|
+bool serial8250_console_write_atomic(struct uart_8250_port *up,
|
|
+ struct cons_write_context *wctxt);
|
|
+bool serial8250_console_write_thread(struct uart_8250_port *up,
|
|
+ struct cons_write_context *wctxt);
|
|
int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
|
|
int serial8250_console_exit(struct uart_port *port);
|
|
|
|
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
|
|
index c02646884fa83..c1acb81b81948 100644
|
|
--- a/include/linux/thread_info.h
|
|
+++ b/include/linux/thread_info.h
|
|
@@ -178,6 +178,26 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti
|
|
#endif /* !CONFIG_GENERIC_ENTRY */
|
|
|
|
#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
|
|
+# ifdef CONFIG_PREEMPT_LAZY
|
|
+
|
|
+static __always_inline bool tif_need_resched(void)
|
|
+{
|
|
+ return read_thread_flags() & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY);
|
|
+}
|
|
+
|
|
+static __always_inline bool tif_need_resched_now(void)
|
|
+{
|
|
+ return arch_test_bit(TIF_NEED_RESCHED,
|
|
+ (unsigned long *)(¤t_thread_info()->flags));
|
|
+}
|
|
+
|
|
+static __always_inline bool tif_need_resched_lazy(void)
|
|
+{
|
|
+ return arch_test_bit(TIF_NEED_RESCHED_LAZY,
|
|
+ (unsigned long *)(¤t_thread_info()->flags));
|
|
+}
|
|
+
|
|
+# else /* !CONFIG_PREEMPT_LAZY */
|
|
|
|
static __always_inline bool tif_need_resched(void)
|
|
{
|
|
@@ -185,7 +205,38 @@ static __always_inline bool tif_need_resched(void)
|
|
(unsigned long *)(¤t_thread_info()->flags));
|
|
}
|
|
|
|
-#else
|
|
+static __always_inline bool tif_need_resched_now(void)
|
|
+{
|
|
+ return tif_need_resched();
|
|
+}
|
|
+
|
|
+static __always_inline bool tif_need_resched_lazy(void)
|
|
+{
|
|
+ return false;
|
|
+}
|
|
+
|
|
+# endif /* CONFIG_PREEMPT_LAZY */
|
|
+#else /* !_ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
|
|
+# ifdef CONFIG_PREEMPT_LAZY
|
|
+
|
|
+static __always_inline bool tif_need_resched(void)
|
|
+{
|
|
+ return read_thread_flags() & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY);
|
|
+}
|
|
+
|
|
+static __always_inline bool tif_need_resched_now(void)
|
|
+{
|
|
+ return test_bit(TIF_NEED_RESCHED,
|
|
+ (unsigned long *)(¤t_thread_info()->flags));
|
|
+}
|
|
+
|
|
+static __always_inline bool tif_need_resched_lazy(void)
|
|
+{
|
|
+ return test_bit(TIF_NEED_RESCHED_LAZY,
|
|
+ (unsigned long *)(¤t_thread_info()->flags));
|
|
+}
|
|
+
|
|
+# else /* !CONFIG_PREEMPT_LAZY */
|
|
|
|
static __always_inline bool tif_need_resched(void)
|
|
{
|
|
@@ -193,6 +244,17 @@ static __always_inline bool tif_need_resched(void)
|
|
(unsigned long *)(¤t_thread_info()->flags));
|
|
}
|
|
|
|
+static __always_inline bool tif_need_resched_now(void)
|
|
+{
|
|
+ return tif_need_resched();
|
|
+}
|
|
+
|
|
+static __always_inline bool tif_need_resched_lazy(void)
|
|
+{
|
|
+ return false;
|
|
+}
|
|
+
|
|
+# endif /* !CONFIG_PREEMPT_LAZY */
|
|
#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
|
|
|
|
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
|
|
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
|
|
index 0e373222a6df8..47017fcf5481f 100644
|
|
--- a/include/linux/trace_events.h
|
|
+++ b/include/linux/trace_events.h
|
|
@@ -70,6 +70,7 @@ struct trace_entry {
|
|
unsigned char flags;
|
|
unsigned char preempt_count;
|
|
int pid;
|
|
+ unsigned char preempt_lazy_count;
|
|
};
|
|
|
|
#define TRACE_EVENT_TYPE_MAX \
|
|
@@ -158,9 +159,10 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry,
|
|
unsigned int trace_ctx)
|
|
{
|
|
entry->preempt_count = trace_ctx & 0xff;
|
|
+ entry->preempt_lazy_count = (trace_ctx >> 16) & 0xff;
|
|
entry->pid = current->pid;
|
|
entry->type = type;
|
|
- entry->flags = trace_ctx >> 16;
|
|
+ entry->flags = trace_ctx >> 24;
|
|
}
|
|
|
|
unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status);
|
|
@@ -171,7 +173,13 @@ enum trace_flag_type {
|
|
TRACE_FLAG_NEED_RESCHED = 0x04,
|
|
TRACE_FLAG_HARDIRQ = 0x08,
|
|
TRACE_FLAG_SOFTIRQ = 0x10,
|
|
+#ifdef CONFIG_PREEMPT_LAZY
|
|
+ TRACE_FLAG_PREEMPT_RESCHED = 0x00,
|
|
+ TRACE_FLAG_NEED_RESCHED_LAZY = 0x20,
|
|
+#else
|
|
+ TRACE_FLAG_NEED_RESCHED_LAZY = 0x00,
|
|
TRACE_FLAG_PREEMPT_RESCHED = 0x20,
|
|
+#endif
|
|
TRACE_FLAG_NMI = 0x40,
|
|
TRACE_FLAG_BH_OFF = 0x80,
|
|
};
|
|
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
|
|
index 3e8619c72f774..b4bc2828fa09f 100644
|
|
--- a/include/trace/events/timer.h
|
|
+++ b/include/trace/events/timer.h
|
|
@@ -158,7 +158,11 @@ DEFINE_EVENT(timer_class, timer_cancel,
|
|
{ HRTIMER_MODE_ABS_SOFT, "ABS|SOFT" }, \
|
|
{ HRTIMER_MODE_REL_SOFT, "REL|SOFT" }, \
|
|
{ HRTIMER_MODE_ABS_PINNED_SOFT, "ABS|PINNED|SOFT" }, \
|
|
- { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" })
|
|
+ { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }, \
|
|
+ { HRTIMER_MODE_ABS_HARD, "ABS|HARD" }, \
|
|
+ { HRTIMER_MODE_REL_HARD, "REL|HARD" }, \
|
|
+ { HRTIMER_MODE_ABS_PINNED_HARD, "ABS|PINNED|HARD" }, \
|
|
+ { HRTIMER_MODE_REL_PINNED_HARD, "REL|PINNED|HARD" })
|
|
|
|
/**
|
|
* hrtimer_init - called when the hrtimer is initialized
|
|
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
|
|
index c2f1fd95a8214..260c08efeb486 100644
|
|
--- a/kernel/Kconfig.preempt
|
|
+++ b/kernel/Kconfig.preempt
|
|
@@ -1,5 +1,11 @@
|
|
# SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
+config HAVE_PREEMPT_LAZY
|
|
+ bool
|
|
+
|
|
+config PREEMPT_LAZY
|
|
+ def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT
|
|
+
|
|
config PREEMPT_NONE_BUILD
|
|
bool
|
|
|
|
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
|
|
index 5c7e9ba7cd6b2..e9139dfc1f0a8 100644
|
|
--- a/kernel/debug/kdb/kdb_io.c
|
|
+++ b/kernel/debug/kdb/kdb_io.c
|
|
@@ -576,6 +576,8 @@ static void kdb_msg_write(const char *msg, int msg_len)
|
|
continue;
|
|
if (c == dbg_io_ops->cons)
|
|
continue;
|
|
+ if (!c->write)
|
|
+ continue;
|
|
/*
|
|
* Set oops_in_progress to encourage the console drivers to
|
|
* disregard their internal spin locks: in the current calling
|
|
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
|
|
index be61332c66b54..c6301e520d47b 100644
|
|
--- a/kernel/entry/common.c
|
|
+++ b/kernel/entry/common.c
|
|
@@ -155,7 +155,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
|
|
|
|
local_irq_enable_exit_to_user(ti_work);
|
|
|
|
- if (ti_work & _TIF_NEED_RESCHED)
|
|
+ if (ti_work & _TIF_NEED_RESCHED_MASK)
|
|
schedule();
|
|
|
|
if (ti_work & _TIF_UPROBE)
|
|
@@ -386,7 +386,7 @@ void raw_irqentry_exit_cond_resched(void)
|
|
rcu_irq_exit_check_preempt();
|
|
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
|
|
WARN_ON_ONCE(!on_thread_stack());
|
|
- if (need_resched())
|
|
+ if (should_resched(0))
|
|
preempt_schedule_irq();
|
|
}
|
|
}
|
|
diff --git a/kernel/fork.c b/kernel/fork.c
|
|
index ea332319dffea..d78954a3834cc 100644
|
|
--- a/kernel/fork.c
|
|
+++ b/kernel/fork.c
|
|
@@ -1665,6 +1665,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
|
|
RCU_INIT_POINTER(tsk->sighand, sig);
|
|
if (!sig)
|
|
return -ENOMEM;
|
|
+ sig->sigqueue_cache = NULL;
|
|
|
|
refcount_set(&sig->count, 1);
|
|
spin_lock_irq(¤t->sighand->siglock);
|
|
@@ -1681,7 +1682,17 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
|
|
void __cleanup_sighand(struct sighand_struct *sighand)
|
|
{
|
|
if (refcount_dec_and_test(&sighand->count)) {
|
|
+ struct sigqueue *sigqueue = NULL;
|
|
+
|
|
signalfd_cleanup(sighand);
|
|
+ spin_lock_irq(&sighand->siglock);
|
|
+ if (sighand->sigqueue_cache) {
|
|
+ sigqueue = sighand->sigqueue_cache;
|
|
+ sighand->sigqueue_cache = NULL;
|
|
+ }
|
|
+ spin_unlock_irq(&sighand->siglock);
|
|
+
|
|
+ sigqueue_free_cached_entry(sigqueue);
|
|
/*
|
|
* sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it
|
|
* without an RCU grace period, see __lock_task_sighand().
|
|
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
|
|
index 0408aab80941b..ee063d28f94f8 100644
|
|
--- a/kernel/ksysfs.c
|
|
+++ b/kernel/ksysfs.c
|
|
@@ -167,6 +167,15 @@ KERNEL_ATTR_RO(vmcoreinfo);
|
|
|
|
#endif /* CONFIG_CRASH_CORE */
|
|
|
|
+#if defined(CONFIG_PREEMPT_RT)
|
|
+static ssize_t realtime_show(struct kobject *kobj,
|
|
+ struct kobj_attribute *attr, char *buf)
|
|
+{
|
|
+ return sprintf(buf, "%d\n", 1);
|
|
+}
|
|
+KERNEL_ATTR_RO(realtime);
|
|
+#endif
|
|
+
|
|
/* whether file capabilities are enabled */
|
|
static ssize_t fscaps_show(struct kobject *kobj,
|
|
struct kobj_attribute *attr, char *buf)
|
|
@@ -259,6 +268,9 @@ static struct attribute * kernel_attrs[] = {
|
|
#ifndef CONFIG_TINY_RCU
|
|
&rcu_expedited_attr.attr,
|
|
&rcu_normal_attr.attr,
|
|
+#endif
|
|
+#ifdef CONFIG_PREEMPT_RT
|
|
+ &realtime_attr.attr,
|
|
#endif
|
|
NULL
|
|
};
|
|
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
|
|
index 728f434de2bbf..479a9487edcc2 100644
|
|
--- a/kernel/locking/rtmutex.c
|
|
+++ b/kernel/locking/rtmutex.c
|
|
@@ -218,6 +218,11 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
|
|
return try_cmpxchg_acquire(&lock->owner, &old, new);
|
|
}
|
|
|
|
+static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock)
|
|
+{
|
|
+ return rt_mutex_cmpxchg_acquire(lock, NULL, current);
|
|
+}
|
|
+
|
|
static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
|
|
struct task_struct *old,
|
|
struct task_struct *new)
|
|
@@ -297,6 +302,24 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
|
|
|
|
}
|
|
|
|
+static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock);
|
|
+
|
|
+static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock)
|
|
+{
|
|
+ /*
|
|
+ * With debug enabled rt_mutex_cmpxchg trylock() will always fail,
|
|
+ * which will unconditionally invoke sched_submit/resume_work() in
|
|
+ * the slow path of __rt_mutex_lock() and __ww_rt_mutex_lock() even
|
|
+ * in the non-contended case.
|
|
+ *
|
|
+ * Avoid that by using rt_mutex_slow_trylock() which is covered by
|
|
+ * the debug code and can acquire a non-contended rtmutex. On
|
|
+ * success the callsite avoids the sched_submit/resume_work()
|
|
+ * dance.
|
|
+ */
|
|
+ return rt_mutex_slowtrylock(lock);
|
|
+}
|
|
+
|
|
static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
|
|
struct task_struct *old,
|
|
struct task_struct *new)
|
|
@@ -1555,7 +1578,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
|
|
raw_spin_unlock_irq(&lock->wait_lock);
|
|
|
|
if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
|
|
- schedule();
|
|
+ schedule_rtmutex();
|
|
|
|
raw_spin_lock_irq(&lock->wait_lock);
|
|
set_current_state(state);
|
|
@@ -1584,7 +1607,7 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
|
|
WARN(1, "rtmutex deadlock detected\n");
|
|
while (1) {
|
|
set_current_state(TASK_INTERRUPTIBLE);
|
|
- schedule();
|
|
+ schedule_rtmutex();
|
|
}
|
|
}
|
|
|
|
@@ -1679,6 +1702,12 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
|
|
unsigned long flags;
|
|
int ret;
|
|
|
|
+ /*
|
|
+ * The task is about to sleep. Invoke sched_submit_work() before
|
|
+ * blocking as that might take locks and corrupt tsk::pi_blocked_on.
|
|
+ */
|
|
+ sched_submit_work();
|
|
+
|
|
/*
|
|
* Technically we could use raw_spin_[un]lock_irq() here, but this can
|
|
* be called in early boot if the cmpxchg() fast path is disabled
|
|
@@ -1691,13 +1720,16 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
|
|
ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
|
|
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
|
|
|
|
+ sched_resume_work();
|
|
return ret;
|
|
}
|
|
|
|
static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
|
|
unsigned int state)
|
|
{
|
|
- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
|
|
+ lockdep_assert(!current->pi_blocked_on);
|
|
+
|
|
+ if (likely(rt_mutex_try_acquire(lock)))
|
|
return 0;
|
|
|
|
return rt_mutex_slowlock(lock, NULL, state);
|
|
diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
|
|
index c201aadb93017..5be92ca5afabc 100644
|
|
--- a/kernel/locking/rwbase_rt.c
|
|
+++ b/kernel/locking/rwbase_rt.c
|
|
@@ -72,15 +72,6 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
|
|
int ret;
|
|
|
|
raw_spin_lock_irq(&rtm->wait_lock);
|
|
- /*
|
|
- * Allow readers, as long as the writer has not completely
|
|
- * acquired the semaphore for write.
|
|
- */
|
|
- if (atomic_read(&rwb->readers) != WRITER_BIAS) {
|
|
- atomic_inc(&rwb->readers);
|
|
- raw_spin_unlock_irq(&rtm->wait_lock);
|
|
- return 0;
|
|
- }
|
|
|
|
/*
|
|
* Call into the slow lock path with the rtmutex->wait_lock
|
|
@@ -140,10 +131,23 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
|
|
static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb,
|
|
unsigned int state)
|
|
{
|
|
+ int ret;
|
|
+
|
|
+ lockdep_assert(!current->pi_blocked_on);
|
|
+
|
|
if (rwbase_read_trylock(rwb))
|
|
return 0;
|
|
|
|
- return __rwbase_read_lock(rwb, state);
|
|
+ /*
|
|
+ * The task is about to sleep. For rwsems this submits work as that
|
|
+ * might take locks and corrupt tsk::pi_blocked_on. Must be
|
|
+ * explicit here because __rwbase_read_lock() cannot invoke
|
|
+ * rt_mutex_slowlock(). NOP for rwlocks.
|
|
+ */
|
|
+ rwbase_sched_submit_work();
|
|
+ ret = __rwbase_read_lock(rwb, state);
|
|
+ rwbase_sched_resume_work();
|
|
+ return ret;
|
|
}
|
|
|
|
static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb,
|
|
@@ -239,7 +243,10 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
|
|
struct rt_mutex_base *rtm = &rwb->rtmutex;
|
|
unsigned long flags;
|
|
|
|
- /* Take the rtmutex as a first step */
|
|
+ /*
|
|
+ * Take the rtmutex as a first step. For rwsem this will also
|
|
+ * invoke sched_submit_work() to flush IO and workers.
|
|
+ */
|
|
if (rwbase_rtmutex_lock_state(rtm, state))
|
|
return -EINTR;
|
|
|
|
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
|
|
index 9eabd585ce7af..e304db9ebfd95 100644
|
|
--- a/kernel/locking/rwsem.c
|
|
+++ b/kernel/locking/rwsem.c
|
|
@@ -1415,6 +1415,12 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
|
|
#define rwbase_rtmutex_lock_state(rtm, state) \
|
|
__rt_mutex_lock(rtm, state)
|
|
|
|
+#define rwbase_sched_submit_work() \
|
|
+ sched_submit_work()
|
|
+
|
|
+#define rwbase_sched_resume_work() \
|
|
+ sched_resume_work()
|
|
+
|
|
#define rwbase_rtmutex_slowlock_locked(rtm, state) \
|
|
__rt_mutex_slowlock_locked(rtm, NULL, state)
|
|
|
|
diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c
|
|
index 48a19ed8486d8..9fe282cd145d9 100644
|
|
--- a/kernel/locking/spinlock_rt.c
|
|
+++ b/kernel/locking/spinlock_rt.c
|
|
@@ -37,6 +37,8 @@
|
|
|
|
static __always_inline void rtlock_lock(struct rt_mutex_base *rtm)
|
|
{
|
|
+ lockdep_assert(!current->pi_blocked_on);
|
|
+
|
|
if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
|
|
rtlock_slowlock(rtm);
|
|
}
|
|
@@ -159,6 +161,9 @@ rwbase_rtmutex_lock_state(struct rt_mutex_base *rtm, unsigned int state)
|
|
return 0;
|
|
}
|
|
|
|
+static __always_inline void rwbase_sched_submit_work(void) { }
|
|
+static __always_inline void rwbase_sched_resume_work(void) { }
|
|
+
|
|
static __always_inline int
|
|
rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state)
|
|
{
|
|
diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c
|
|
index d1473c624105c..c7196de838edc 100644
|
|
--- a/kernel/locking/ww_rt_mutex.c
|
|
+++ b/kernel/locking/ww_rt_mutex.c
|
|
@@ -62,7 +62,7 @@ __ww_rt_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx,
|
|
}
|
|
mutex_acquire_nest(&rtm->dep_map, 0, 0, nest_lock, ip);
|
|
|
|
- if (likely(rt_mutex_cmpxchg_acquire(&rtm->rtmutex, NULL, current))) {
|
|
+ if (likely(rt_mutex_try_acquire(&rtm->rtmutex))) {
|
|
if (ww_ctx)
|
|
ww_mutex_set_context_fastpath(lock, ww_ctx);
|
|
return 0;
|
|
diff --git a/kernel/panic.c b/kernel/panic.c
|
|
index 5cfea8302d23a..190f7f2bc6cfd 100644
|
|
--- a/kernel/panic.c
|
|
+++ b/kernel/panic.c
|
|
@@ -275,6 +275,7 @@ static void panic_other_cpus_shutdown(bool crash_kexec)
|
|
*/
|
|
void panic(const char *fmt, ...)
|
|
{
|
|
+ enum cons_prio prev_prio;
|
|
static char buf[1024];
|
|
va_list args;
|
|
long i, i_next = 0, len;
|
|
@@ -322,6 +323,8 @@ void panic(const char *fmt, ...)
|
|
if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
|
|
panic_smp_self_stop();
|
|
|
|
+ prev_prio = cons_atomic_enter(CONS_PRIO_PANIC);
|
|
+
|
|
console_verbose();
|
|
bust_spinlocks(1);
|
|
va_start(args, fmt);
|
|
@@ -382,6 +385,8 @@ void panic(const char *fmt, ...)
|
|
if (_crash_kexec_post_notifiers)
|
|
__crash_kexec(NULL);
|
|
|
|
+ cons_atomic_flush(NULL, true);
|
|
+
|
|
console_unblank();
|
|
|
|
/*
|
|
@@ -406,6 +411,7 @@ void panic(const char *fmt, ...)
|
|
* We can't use the "normal" timers since we just panicked.
|
|
*/
|
|
pr_emerg("Rebooting in %d seconds..\n", panic_timeout);
|
|
+ cons_atomic_flush(NULL, true);
|
|
|
|
for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {
|
|
touch_nmi_watchdog();
|
|
@@ -424,6 +430,7 @@ void panic(const char *fmt, ...)
|
|
*/
|
|
if (panic_reboot_mode != REBOOT_UNDEFINED)
|
|
reboot_mode = panic_reboot_mode;
|
|
+ cons_atomic_flush(NULL, true);
|
|
emergency_restart();
|
|
}
|
|
#ifdef __sparc__
|
|
@@ -436,12 +443,16 @@ void panic(const char *fmt, ...)
|
|
}
|
|
#endif
|
|
#if defined(CONFIG_S390)
|
|
+ cons_atomic_flush(NULL, true);
|
|
disabled_wait();
|
|
#endif
|
|
pr_emerg("---[ end Kernel panic - not syncing: %s ]---\n", buf);
|
|
|
|
/* Do not scroll important messages printed above */
|
|
suppress_printk = 1;
|
|
+
|
|
+ cons_atomic_exit(CONS_PRIO_PANIC, prev_prio);
|
|
+
|
|
local_irq_enable();
|
|
for (i = 0; ; i += PANIC_TIMER_STEP) {
|
|
touch_softlockup_watchdog();
|
|
@@ -652,6 +663,10 @@ struct warn_args {
|
|
void __warn(const char *file, int line, void *caller, unsigned taint,
|
|
struct pt_regs *regs, struct warn_args *args)
|
|
{
|
|
+ enum cons_prio prev_prio;
|
|
+
|
|
+ prev_prio = cons_atomic_enter(CONS_PRIO_EMERGENCY);
|
|
+
|
|
disable_trace_on_warning();
|
|
|
|
if (file)
|
|
@@ -682,6 +697,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
|
|
|
|
/* Just a warning, don't kill lockdep. */
|
|
add_taint(taint, LOCKDEP_STILL_OK);
|
|
+
|
|
+ cons_atomic_exit(CONS_PRIO_EMERGENCY, prev_prio);
|
|
}
|
|
|
|
#ifndef __WARN_FLAGS
|
|
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
|
|
index f5b388e810b9f..b36683bd2f821 100644
|
|
--- a/kernel/printk/Makefile
|
|
+++ b/kernel/printk/Makefile
|
|
@@ -1,6 +1,6 @@
|
|
# SPDX-License-Identifier: GPL-2.0-only
|
|
obj-y = printk.o
|
|
-obj-$(CONFIG_PRINTK) += printk_safe.o
|
|
+obj-$(CONFIG_PRINTK) += printk_safe.o printk_nobkl.o
|
|
obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
|
|
obj-$(CONFIG_PRINTK_INDEX) += index.o
|
|
|
|
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
|
|
index 2a17704136f1d..6631fd70542f9 100644
|
|
--- a/kernel/printk/internal.h
|
|
+++ b/kernel/printk/internal.h
|
|
@@ -3,6 +3,8 @@
|
|
* internal.h - printk internal definitions
|
|
*/
|
|
#include <linux/percpu.h>
|
|
+#include <linux/console.h>
|
|
+#include "printk_ringbuffer.h"
|
|
|
|
#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
|
|
void __init printk_sysctl_init(void);
|
|
@@ -12,8 +14,13 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
|
|
#define printk_sysctl_init() do { } while (0)
|
|
#endif
|
|
|
|
-#ifdef CONFIG_PRINTK
|
|
+#define con_printk(lvl, con, fmt, ...) \
|
|
+ printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \
|
|
+ (con->flags & CON_NO_BKL) ? "" : "legacy ", \
|
|
+ (con->flags & CON_BOOT) ? "boot" : "", \
|
|
+ con->name, con->index, ##__VA_ARGS__)
|
|
|
|
+#ifdef CONFIG_PRINTK
|
|
#ifdef CONFIG_PRINTK_CALLER
|
|
#define PRINTK_PREFIX_MAX 48
|
|
#else
|
|
@@ -35,6 +42,12 @@ enum printk_info_flags {
|
|
LOG_CONT = 8, /* text is a fragment of a continuation line */
|
|
};
|
|
|
|
+extern struct printk_ringbuffer *prb;
|
|
+extern bool have_bkl_console;
|
|
+extern bool printk_threads_enabled;
|
|
+
|
|
+extern bool have_boot_console;
|
|
+
|
|
__printf(4, 0)
|
|
int vprintk_store(int facility, int level,
|
|
const struct dev_printk_info *dev_info,
|
|
@@ -45,28 +58,98 @@ __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
|
|
|
|
bool printk_percpu_data_ready(void);
|
|
|
|
+/*
|
|
+ * The printk_safe_enter()/_exit() macros mark code blocks using locks that
|
|
+ * would lead to deadlock if an interrupting context were to call printk()
|
|
+ * while the interrupted context was within such code blocks.
|
|
+ *
|
|
+ * When a CPU is in such a code block, an interrupting context calling
|
|
+ * printk() will only log the new message to the lockless ringbuffer and
|
|
+ * then trigger console printing using irqwork.
|
|
+ */
|
|
+
|
|
#define printk_safe_enter_irqsave(flags) \
|
|
do { \
|
|
- local_irq_save(flags); \
|
|
- __printk_safe_enter(); \
|
|
+ __printk_safe_enter(&flags); \
|
|
} while (0)
|
|
|
|
#define printk_safe_exit_irqrestore(flags) \
|
|
do { \
|
|
- __printk_safe_exit(); \
|
|
- local_irq_restore(flags); \
|
|
+ __printk_safe_exit(&flags); \
|
|
} while (0)
|
|
|
|
void defer_console_output(void);
|
|
|
|
u16 printk_parse_prefix(const char *text, int *level,
|
|
enum printk_info_flags *flags);
|
|
+
|
|
+u64 cons_read_seq(struct console *con);
|
|
+void cons_nobkl_cleanup(struct console *con);
|
|
+bool cons_nobkl_init(struct console *con);
|
|
+bool cons_alloc_percpu_data(struct console *con);
|
|
+void cons_kthread_create(struct console *con);
|
|
+void cons_wake_threads(void);
|
|
+void cons_force_seq(struct console *con, u64 seq);
|
|
+void console_bkl_kthread_create(void);
|
|
+
|
|
+/*
|
|
+ * Check if the given console is currently capable and allowed to print
|
|
+ * records. If the caller only works with certain types of consoles, the
|
|
+ * caller is responsible for checking the console type before calling
|
|
+ * this function.
|
|
+ */
|
|
+static inline bool console_is_usable(struct console *con, short flags)
|
|
+{
|
|
+ if (!(flags & CON_ENABLED))
|
|
+ return false;
|
|
+
|
|
+ if ((flags & CON_SUSPENDED))
|
|
+ return false;
|
|
+
|
|
+ /*
|
|
+ * The usability of a console varies depending on whether
|
|
+ * it is a NOBKL console or not.
|
|
+ */
|
|
+
|
|
+ if (flags & CON_NO_BKL) {
|
|
+ if (have_boot_console)
|
|
+ return false;
|
|
+
|
|
+ } else {
|
|
+ if (!con->write)
|
|
+ return false;
|
|
+ /*
|
|
+ * Console drivers may assume that per-cpu resources have
|
|
+ * been allocated. So unless they're explicitly marked as
|
|
+ * being able to cope (CON_ANYTIME) don't call them until
|
|
+ * this CPU is officially up.
|
|
+ */
|
|
+ if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME))
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ return true;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_kthread_wake - Wake up a printk thread
|
|
+ * @con: Console to operate on
|
|
+ */
|
|
+static inline void cons_kthread_wake(struct console *con)
|
|
+{
|
|
+ rcuwait_wake_up(&con->rcuwait);
|
|
+}
|
|
+
|
|
#else
|
|
|
|
#define PRINTK_PREFIX_MAX 0
|
|
#define PRINTK_MESSAGE_MAX 0
|
|
#define PRINTKRB_RECORD_MAX 0
|
|
|
|
+static inline void cons_kthread_wake(struct console *con) { }
|
|
+static inline void cons_kthread_create(struct console *con) { }
|
|
+#define printk_threads_enabled (false)
|
|
+
|
|
/*
|
|
* In !PRINTK builds we still export console_sem
|
|
* semaphore and some of console functions (console_unlock()/etc.), so
|
|
@@ -76,8 +159,15 @@ u16 printk_parse_prefix(const char *text, int *level,
|
|
#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags)
|
|
|
|
static inline bool printk_percpu_data_ready(void) { return false; }
|
|
+static inline bool cons_nobkl_init(struct console *con) { return true; }
|
|
+static inline void cons_nobkl_cleanup(struct console *con) { }
|
|
+static inline bool console_is_usable(struct console *con, short flags) { return false; }
|
|
+static inline void cons_force_seq(struct console *con, u64 seq) { }
|
|
+
|
|
#endif /* CONFIG_PRINTK */
|
|
|
|
+extern bool have_boot_console;
|
|
+
|
|
/**
|
|
* struct printk_buffers - Buffers to read/format/output printk messages.
|
|
* @outbuf: After formatting, contains text to output.
|
|
@@ -103,3 +193,28 @@ struct printk_message {
|
|
u64 seq;
|
|
unsigned long dropped;
|
|
};
|
|
+
|
|
+/**
|
|
+ * struct cons_context_data - console context data
|
|
+ * @wctxt: Write context per priority level
|
|
+ * @pbufs: Buffer for storing the text
|
|
+ *
|
|
+ * Used for early boot and for per CPU data.
|
|
+ *
|
|
+ * The write contexts are allocated to avoid having them on stack, e.g. in
|
|
+ * warn() or panic().
|
|
+ */
|
|
+struct cons_context_data {
|
|
+ struct cons_write_context wctxt[CONS_PRIO_MAX];
|
|
+ struct printk_buffers pbufs;
|
|
+};
|
|
+
|
|
+bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
|
|
+ bool is_extended, bool may_supress);
|
|
+
|
|
+#ifdef CONFIG_PRINTK
|
|
+
|
|
+void console_prepend_dropped(struct printk_message *pmsg,
|
|
+ unsigned long dropped);
|
|
+
|
|
+#endif
|
|
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
|
|
index fd0c9f913940a..e2466366d4f84 100644
|
|
--- a/kernel/printk/printk.c
|
|
+++ b/kernel/printk/printk.c
|
|
@@ -442,6 +442,21 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
|
|
/* syslog_lock protects syslog_* variables and write access to clear_seq. */
|
|
static DEFINE_MUTEX(syslog_lock);
|
|
|
|
+/*
|
|
+ * Specifies if a BKL console was ever registered. Used to determine if the
|
|
+ * console lock/unlock dance is needed for console printing.
|
|
+ */
|
|
+bool have_bkl_console;
|
|
+
|
|
+/*
|
|
+ * Specifies if a boot console is registered. Used to determine if NOBKL
|
|
+ * consoles may be used since NOBKL consoles cannot synchronize with boot
|
|
+ * consoles.
|
|
+ */
|
|
+bool have_boot_console;
|
|
+
|
|
+static int unregister_console_locked(struct console *console);
|
|
+
|
|
#ifdef CONFIG_PRINTK
|
|
DECLARE_WAIT_QUEUE_HEAD(log_wait);
|
|
/* All 3 protected by @syslog_lock. */
|
|
@@ -492,7 +507,7 @@ _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS,
|
|
|
|
static struct printk_ringbuffer printk_rb_dynamic;
|
|
|
|
-static struct printk_ringbuffer *prb = &printk_rb_static;
|
|
+struct printk_ringbuffer *prb = &printk_rb_static;
|
|
|
|
/*
|
|
* We cannot access per-CPU data (e.g. per-CPU flush irq_work) before
|
|
@@ -696,9 +711,6 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
|
|
return len;
|
|
}
|
|
|
|
-static bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
|
|
- bool is_extended, bool may_supress);
|
|
-
|
|
/* /dev/kmsg - userspace message inject/listen interface */
|
|
struct devkmsg_user {
|
|
atomic64_t seq;
|
|
@@ -1100,7 +1112,19 @@ static inline void log_buf_add_cpu(void) {}
|
|
|
|
static void __init set_percpu_data_ready(void)
|
|
{
|
|
+ struct hlist_node *tmp;
|
|
+ struct console *con;
|
|
+
|
|
+ console_list_lock();
|
|
+
|
|
+ hlist_for_each_entry_safe(con, tmp, &console_list, node) {
|
|
+ if (!cons_alloc_percpu_data(con))
|
|
+ unregister_console_locked(con);
|
|
+ }
|
|
+
|
|
__printk_percpu_data_ready = true;
|
|
+
|
|
+ console_list_unlock();
|
|
}
|
|
|
|
static unsigned int __init add_to_rb(struct printk_ringbuffer *rb,
|
|
@@ -2276,6 +2300,7 @@ asmlinkage int vprintk_emit(int facility, int level,
|
|
const struct dev_printk_info *dev_info,
|
|
const char *fmt, va_list args)
|
|
{
|
|
+ struct cons_write_context wctxt = { };
|
|
int printed_len;
|
|
bool in_sched = false;
|
|
|
|
@@ -2296,16 +2321,25 @@ asmlinkage int vprintk_emit(int facility, int level,
|
|
|
|
printed_len = vprintk_store(facility, level, dev_info, fmt, args);
|
|
|
|
+ /*
|
|
+ * The caller may be holding system-critical or
|
|
+ * timing-sensitive locks. Disable preemption during
|
|
+ * printing of all remaining records to all consoles so that
|
|
+ * this context can return as soon as possible. Hopefully
|
|
+ * another printk() caller will take over the printing.
|
|
+ */
|
|
+ preempt_disable();
|
|
+
|
|
+ /*
|
|
+ * Flush the non-BKL consoles. This only leads to direct atomic
|
|
+ * printing for non-BKL consoles that do not have a printer
|
|
+ * thread available. Otherwise the printer thread will perform
|
|
+ * the printing.
|
|
+ */
|
|
+ cons_atomic_flush(&wctxt, true);
|
|
+
|
|
/* If called from the scheduler, we can not call up(). */
|
|
- if (!in_sched) {
|
|
- /*
|
|
- * The caller may be holding system-critical or
|
|
- * timing-sensitive locks. Disable preemption during
|
|
- * printing of all remaining records to all consoles so that
|
|
- * this context can return as soon as possible. Hopefully
|
|
- * another printk() caller will take over the printing.
|
|
- */
|
|
- preempt_disable();
|
|
+ if (!in_sched && have_bkl_console && !IS_ENABLED(CONFIG_PREEMPT_RT)) {
|
|
/*
|
|
* Try to acquire and then immediately release the console
|
|
* semaphore. The release will print out buffers. With the
|
|
@@ -2314,10 +2348,15 @@ asmlinkage int vprintk_emit(int facility, int level,
|
|
*/
|
|
if (console_trylock_spinning())
|
|
console_unlock();
|
|
- preempt_enable();
|
|
}
|
|
|
|
- wake_up_klogd();
|
|
+ preempt_enable();
|
|
+
|
|
+ cons_wake_threads();
|
|
+ if (in_sched)
|
|
+ defer_console_output();
|
|
+ else
|
|
+ wake_up_klogd();
|
|
return printed_len;
|
|
}
|
|
EXPORT_SYMBOL(vprintk_emit);
|
|
@@ -2556,10 +2595,26 @@ MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to hig
|
|
*/
|
|
void suspend_console(void)
|
|
{
|
|
+ struct console *con;
|
|
+
|
|
if (!console_suspend_enabled)
|
|
return;
|
|
pr_info("Suspending console(s) (use no_console_suspend to debug)\n");
|
|
pr_flush(1000, true);
|
|
+
|
|
+ console_list_lock();
|
|
+ for_each_console(con)
|
|
+ console_srcu_write_flags(con, con->flags | CON_SUSPENDED);
|
|
+ console_list_unlock();
|
|
+
|
|
+ /*
|
|
+ * Ensure that all SRCU list walks have completed. All printing
|
|
+ * contexts must be able to see that they are suspended so that it
|
|
+ * is guaranteed that all printing has stopped when this function
|
|
+ * completes.
|
|
+ */
|
|
+ synchronize_srcu(&console_srcu);
|
|
+
|
|
console_lock();
|
|
console_suspended = 1;
|
|
up_console_sem();
|
|
@@ -2567,11 +2622,39 @@ void suspend_console(void)
|
|
|
|
void resume_console(void)
|
|
{
|
|
+ struct console *con;
|
|
+ short flags;
|
|
+ int cookie;
|
|
+
|
|
if (!console_suspend_enabled)
|
|
return;
|
|
down_console_sem();
|
|
console_suspended = 0;
|
|
console_unlock();
|
|
+
|
|
+ console_list_lock();
|
|
+ for_each_console(con)
|
|
+ console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED);
|
|
+ console_list_unlock();
|
|
+
|
|
+ /*
|
|
+ * Ensure that all SRCU list walks have completed. All printing
|
|
+ * contexts must be able to see they are no longer suspended so
|
|
+ * that they are guaranteed to wake up and resume printing.
|
|
+ */
|
|
+ synchronize_srcu(&console_srcu);
|
|
+
|
|
+ cookie = console_srcu_read_lock();
|
|
+ for_each_console_srcu(con) {
|
|
+ flags = console_srcu_read_flags(con);
|
|
+ if (flags & CON_NO_BKL)
|
|
+ cons_kthread_wake(con);
|
|
+ }
|
|
+ console_srcu_read_unlock(cookie);
|
|
+
|
|
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && have_bkl_console)
|
|
+ wake_up_interruptible(&log_wait);
|
|
+
|
|
pr_flush(1000, true);
|
|
}
|
|
|
|
@@ -2586,7 +2669,7 @@ void resume_console(void)
|
|
*/
|
|
static int console_cpu_notify(unsigned int cpu)
|
|
{
|
|
- if (!cpuhp_tasks_frozen) {
|
|
+ if (!cpuhp_tasks_frozen && have_bkl_console) {
|
|
/* If trylock fails, someone else is doing the printing */
|
|
if (console_trylock())
|
|
console_unlock();
|
|
@@ -2661,33 +2744,6 @@ static bool abandon_console_lock_in_panic(void)
|
|
return atomic_read(&panic_cpu) != raw_smp_processor_id();
|
|
}
|
|
|
|
-/*
|
|
- * Check if the given console is currently capable and allowed to print
|
|
- * records.
|
|
- *
|
|
- * Requires the console_srcu_read_lock.
|
|
- */
|
|
-static inline bool console_is_usable(struct console *con)
|
|
-{
|
|
- short flags = console_srcu_read_flags(con);
|
|
-
|
|
- if (!(flags & CON_ENABLED))
|
|
- return false;
|
|
-
|
|
- if (!con->write)
|
|
- return false;
|
|
-
|
|
- /*
|
|
- * Console drivers may assume that per-cpu resources have been
|
|
- * allocated. So unless they're explicitly marked as being able to
|
|
- * cope (CON_ANYTIME) don't call them until this CPU is officially up.
|
|
- */
|
|
- if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME))
|
|
- return false;
|
|
-
|
|
- return true;
|
|
-}
|
|
-
|
|
static void __console_unlock(void)
|
|
{
|
|
console_locked = 0;
|
|
@@ -2709,7 +2765,7 @@ static void __console_unlock(void)
|
|
* If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated.
|
|
*/
|
|
#ifdef CONFIG_PRINTK
|
|
-static void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped)
|
|
+void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped)
|
|
{
|
|
struct printk_buffers *pbufs = pmsg->pbufs;
|
|
const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
|
|
@@ -2741,7 +2797,8 @@ static void console_prepend_dropped(struct printk_message *pmsg, unsigned long d
|
|
pmsg->outbuf_len += len;
|
|
}
|
|
#else
|
|
-#define console_prepend_dropped(pmsg, dropped)
|
|
+static inline void console_prepend_dropped(struct printk_message *pmsg,
|
|
+ unsigned long dropped) { }
|
|
#endif /* CONFIG_PRINTK */
|
|
|
|
/*
|
|
@@ -2763,8 +2820,8 @@ static void console_prepend_dropped(struct printk_message *pmsg, unsigned long d
|
|
* of @pmsg are valid. (See the documentation of struct printk_message
|
|
* for information about the @pmsg fields.)
|
|
*/
|
|
-static bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
|
|
- bool is_extended, bool may_suppress)
|
|
+bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
|
|
+ bool is_extended, bool may_suppress)
|
|
{
|
|
static int panic_console_dropped;
|
|
|
|
@@ -2933,9 +2990,14 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
|
|
|
|
cookie = console_srcu_read_lock();
|
|
for_each_console_srcu(con) {
|
|
+ short flags = console_srcu_read_flags(con);
|
|
bool progress;
|
|
|
|
- if (!console_is_usable(con))
|
|
+ /* console_flush_all() is only for legacy consoles. */
|
|
+ if (flags & CON_NO_BKL)
|
|
+ continue;
|
|
+
|
|
+ if (!console_is_usable(con, flags))
|
|
continue;
|
|
any_usable = true;
|
|
|
|
@@ -2973,30 +3035,13 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
|
|
return false;
|
|
}
|
|
|
|
-/**
|
|
- * console_unlock - unblock the console subsystem from printing
|
|
- *
|
|
- * Releases the console_lock which the caller holds to block printing of
|
|
- * the console subsystem.
|
|
- *
|
|
- * While the console_lock was held, console output may have been buffered
|
|
- * by printk(). If this is the case, console_unlock(); emits
|
|
- * the output prior to releasing the lock.
|
|
- *
|
|
- * console_unlock(); may be called from any context.
|
|
- */
|
|
-void console_unlock(void)
|
|
+static u64 console_flush_and_unlock(void)
|
|
{
|
|
bool do_cond_resched;
|
|
bool handover;
|
|
bool flushed;
|
|
u64 next_seq;
|
|
|
|
- if (console_suspended) {
|
|
- up_console_sem();
|
|
- return;
|
|
- }
|
|
-
|
|
/*
|
|
* Console drivers are called with interrupts disabled, so
|
|
* @console_may_schedule should be cleared before; however, we may
|
|
@@ -3033,6 +3078,39 @@ void console_unlock(void)
|
|
* fails, another context is already handling the printing.
|
|
*/
|
|
} while (prb_read_valid(prb, next_seq, NULL) && console_trylock());
|
|
+
|
|
+ return next_seq;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * console_unlock - unblock the console subsystem from printing
|
|
+ *
|
|
+ * Releases the console_lock which the caller holds to block printing of
|
|
+ * the console subsystem.
|
|
+ *
|
|
+ * While the console_lock was held, console output may have been buffered
|
|
+ * by printk(). If this is the case, console_unlock(); emits
|
|
+ * the output prior to releasing the lock.
|
|
+ *
|
|
+ * console_unlock(); may be called from any context.
|
|
+ */
|
|
+void console_unlock(void)
|
|
+{
|
|
+ if (console_suspended) {
|
|
+ up_console_sem();
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * PREEMPT_RT relies on kthread and atomic consoles for printing.
|
|
+ * It never attempts to print from console_unlock().
|
|
+ */
|
|
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
|
|
+ __console_unlock();
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ console_flush_and_unlock();
|
|
}
|
|
EXPORT_SYMBOL(console_unlock);
|
|
|
|
@@ -3057,6 +3135,9 @@ void console_unblank(void)
|
|
struct console *c;
|
|
int cookie;
|
|
|
|
+ if (!have_bkl_console)
|
|
+ return;
|
|
+
|
|
/*
|
|
* Stop console printing because the unblank() callback may
|
|
* assume the console is not within its write() callback.
|
|
@@ -3065,6 +3146,10 @@ void console_unblank(void)
|
|
* In that case, attempt a trylock as best-effort.
|
|
*/
|
|
if (oops_in_progress) {
|
|
+ /* Semaphores are not NMI-safe. */
|
|
+ if (in_nmi())
|
|
+ return;
|
|
+
|
|
if (down_trylock_console_sem() != 0)
|
|
return;
|
|
} else
|
|
@@ -3094,23 +3179,46 @@ void console_unblank(void)
|
|
*/
|
|
void console_flush_on_panic(enum con_flush_mode mode)
|
|
{
|
|
+ struct console *c;
|
|
+ short flags;
|
|
+ int cookie;
|
|
+ u64 seq;
|
|
+
|
|
+ seq = prb_first_valid_seq(prb);
|
|
+
|
|
+ /*
|
|
+ * Safely flush the atomic consoles before trying to flush any
|
|
+ * BKL/legacy consoles.
|
|
+ */
|
|
+ if (mode == CONSOLE_REPLAY_ALL) {
|
|
+ cookie = console_srcu_read_lock();
|
|
+ for_each_console_srcu(c) {
|
|
+ flags = console_srcu_read_flags(c);
|
|
+ if (flags & CON_NO_BKL)
|
|
+ cons_force_seq(c, seq);
|
|
+ }
|
|
+ console_srcu_read_unlock(cookie);
|
|
+ }
|
|
+ cons_atomic_flush(NULL, true);
|
|
+
|
|
+ if (!have_bkl_console)
|
|
+ return;
|
|
+
|
|
/*
|
|
* If someone else is holding the console lock, trylock will fail
|
|
* and may_schedule may be set. Ignore and proceed to unlock so
|
|
* that messages are flushed out. As this can be called from any
|
|
* context and we don't want to get preempted while flushing,
|
|
* ensure may_schedule is cleared.
|
|
+ *
|
|
+ * Since semaphores are not NMI-safe, the console lock must be
|
|
+ * ignored if the panic is in NMI context.
|
|
*/
|
|
- console_trylock();
|
|
+ if (!in_nmi())
|
|
+ console_trylock();
|
|
console_may_schedule = 0;
|
|
|
|
if (mode == CONSOLE_REPLAY_ALL) {
|
|
- struct console *c;
|
|
- int cookie;
|
|
- u64 seq;
|
|
-
|
|
- seq = prb_first_valid_seq(prb);
|
|
-
|
|
cookie = console_srcu_read_lock();
|
|
for_each_console_srcu(c) {
|
|
/*
|
|
@@ -3122,7 +3230,8 @@ void console_flush_on_panic(enum con_flush_mode mode)
|
|
}
|
|
console_srcu_read_unlock(cookie);
|
|
}
|
|
- console_unlock();
|
|
+ if (!in_nmi())
|
|
+ console_unlock();
|
|
}
|
|
|
|
/*
|
|
@@ -3179,13 +3288,118 @@ EXPORT_SYMBOL(console_stop);
|
|
|
|
void console_start(struct console *console)
|
|
{
|
|
+ short flags;
|
|
+
|
|
console_list_lock();
|
|
console_srcu_write_flags(console, console->flags | CON_ENABLED);
|
|
+ flags = console->flags;
|
|
console_list_unlock();
|
|
+
|
|
+ /*
|
|
+ * Ensure that all SRCU list walks have completed. The related
|
|
+ * printing context must be able to see it is enabled so that
|
|
+ * it is guaranteed to wake up and resume printing.
|
|
+ */
|
|
+ synchronize_srcu(&console_srcu);
|
|
+
|
|
+ if (flags & CON_NO_BKL)
|
|
+ cons_kthread_wake(console);
|
|
+ else if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
+ wake_up_interruptible(&log_wait);
|
|
+
|
|
__pr_flush(console, 1000, true);
|
|
}
|
|
EXPORT_SYMBOL(console_start);
|
|
|
|
+static struct task_struct *console_bkl_kthread;
|
|
+
|
|
+static bool printer_should_wake(u64 seq)
|
|
+{
|
|
+ bool available = false;
|
|
+ struct console *con;
|
|
+ int cookie;
|
|
+
|
|
+ if (kthread_should_stop())
|
|
+ return true;
|
|
+
|
|
+ cookie = console_srcu_read_lock();
|
|
+ for_each_console_srcu(con) {
|
|
+ short flags = console_srcu_read_flags(con);
|
|
+
|
|
+ if (flags & CON_NO_BKL)
|
|
+ continue;
|
|
+ if (!console_is_usable(con, flags))
|
|
+ continue;
|
|
+ /*
|
|
+ * It is safe to read @seq because only this
|
|
+ * thread context updates @seq.
|
|
+ */
|
|
+ if (prb_read_valid(prb, con->seq, NULL)) {
|
|
+ available = true;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ console_srcu_read_unlock(cookie);
|
|
+
|
|
+ return available;
|
|
+}
|
|
+
|
|
+static int console_bkl_kthread_func(void *unused)
|
|
+{
|
|
+ u64 seq = 0;
|
|
+ int error;
|
|
+
|
|
+ for (;;) {
|
|
+ error = wait_event_interruptible(log_wait, printer_should_wake(seq));
|
|
+
|
|
+ if (kthread_should_stop())
|
|
+ break;
|
|
+
|
|
+ if (error)
|
|
+ continue;
|
|
+
|
|
+ console_lock();
|
|
+ if (console_suspended)
|
|
+ up_console_sem();
|
|
+ else
|
|
+ seq = console_flush_and_unlock();
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void console_bkl_kthread_create(void)
|
|
+{
|
|
+ struct task_struct *kt;
|
|
+ struct console *c;
|
|
+
|
|
+ lockdep_assert_held(&console_mutex);
|
|
+
|
|
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
+ return;
|
|
+
|
|
+ if (!printk_threads_enabled || console_bkl_kthread)
|
|
+ return;
|
|
+
|
|
+ for_each_console(c) {
|
|
+ if (c->flags & CON_BOOT)
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ kt = kthread_run(console_bkl_kthread_func, NULL, "pr/bkl");
|
|
+ if (IS_ERR(kt)) {
|
|
+ pr_err("unable to start BKL printing thread\n");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ console_bkl_kthread = kt;
|
|
+
|
|
+ /*
|
|
+ * It is important that console printing threads are scheduled
|
|
+ * shortly after a printk call and with generous runtime budgets.
|
|
+ */
|
|
+ sched_set_normal(console_bkl_kthread, -20);
|
|
+}
|
|
+
|
|
static int __read_mostly keep_bootcon;
|
|
|
|
static int __init keep_bootcon_setup(char *str)
|
|
@@ -3269,11 +3483,6 @@ static void try_enable_default_console(struct console *newcon)
|
|
newcon->flags |= CON_CONSDEV;
|
|
}
|
|
|
|
-#define con_printk(lvl, con, fmt, ...) \
|
|
- printk(lvl pr_fmt("%sconsole [%s%d] " fmt), \
|
|
- (con->flags & CON_BOOT) ? "boot" : "", \
|
|
- con->name, con->index, ##__VA_ARGS__)
|
|
-
|
|
static void console_init_seq(struct console *newcon, bool bootcon_registered)
|
|
{
|
|
struct console *con;
|
|
@@ -3338,8 +3547,6 @@ static void console_init_seq(struct console *newcon, bool bootcon_registered)
|
|
#define console_first() \
|
|
hlist_entry(console_list.first, struct console, node)
|
|
|
|
-static int unregister_console_locked(struct console *console);
|
|
-
|
|
/*
|
|
* The console driver calls this routine during kernel initialization
|
|
* to register the console printing procedure with printk() and to
|
|
@@ -3431,6 +3638,16 @@ void register_console(struct console *newcon)
|
|
newcon->dropped = 0;
|
|
console_init_seq(newcon, bootcon_registered);
|
|
|
|
+ if (!(newcon->flags & CON_NO_BKL)) {
|
|
+ have_bkl_console = true;
|
|
+ console_bkl_kthread_create();
|
|
+ } else if (!cons_nobkl_init(newcon)) {
|
|
+ goto unlock;
|
|
+ }
|
|
+
|
|
+ if (newcon->flags & CON_BOOT)
|
|
+ have_boot_console = true;
|
|
+
|
|
/*
|
|
* Put this console in the list - keep the
|
|
* preferred driver at the head of the list.
|
|
@@ -3474,6 +3691,9 @@ void register_console(struct console *newcon)
|
|
if (con->flags & CON_BOOT)
|
|
unregister_console_locked(con);
|
|
}
|
|
+
|
|
+ /* All boot consoles have been unregistered. */
|
|
+ have_boot_console = false;
|
|
}
|
|
unlock:
|
|
console_list_unlock();
|
|
@@ -3483,11 +3703,13 @@ EXPORT_SYMBOL(register_console);
|
|
/* Must be called under console_list_lock(). */
|
|
static int unregister_console_locked(struct console *console)
|
|
{
|
|
+ struct console *c;
|
|
+ bool is_boot_con;
|
|
int res;
|
|
|
|
lockdep_assert_console_list_lock_held();
|
|
|
|
- con_printk(KERN_INFO, console, "disabled\n");
|
|
+ is_boot_con = console->flags & CON_BOOT;
|
|
|
|
res = _braille_unregister_console(console);
|
|
if (res < 0)
|
|
@@ -3495,12 +3717,13 @@ static int unregister_console_locked(struct console *console)
|
|
if (res > 0)
|
|
return 0;
|
|
|
|
- /* Disable it unconditionally */
|
|
- console_srcu_write_flags(console, console->flags & ~CON_ENABLED);
|
|
-
|
|
if (!console_is_registered_locked(console))
|
|
return -ENODEV;
|
|
|
|
+ console_srcu_write_flags(console, console->flags & ~CON_ENABLED);
|
|
+
|
|
+ con_printk(KERN_INFO, console, "disabled\n");
|
|
+
|
|
hlist_del_init_rcu(&console->node);
|
|
|
|
/*
|
|
@@ -3522,11 +3745,23 @@ static int unregister_console_locked(struct console *console)
|
|
*/
|
|
synchronize_srcu(&console_srcu);
|
|
|
|
+ if (console->flags & CON_NO_BKL)
|
|
+ cons_nobkl_cleanup(console);
|
|
+
|
|
console_sysfs_notify();
|
|
|
|
if (console->exit)
|
|
res = console->exit(console);
|
|
|
|
+ /*
|
|
+ * Each time a boot console unregisters, try to start up the printing
|
|
+ * threads. They will only start if this was the last boot console.
|
|
+ */
|
|
+ if (is_boot_con) {
|
|
+ for_each_console(c)
|
|
+ cons_kthread_create(c);
|
|
+ }
|
|
+
|
|
return res;
|
|
}
|
|
|
|
@@ -3688,31 +3923,36 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
|
|
|
|
/*
|
|
* Hold the console_lock to guarantee safe access to
|
|
- * console->seq and to prevent changes to @console_suspended
|
|
- * until all consoles have been processed.
|
|
+ * console->seq.
|
|
*/
|
|
console_lock();
|
|
|
|
cookie = console_srcu_read_lock();
|
|
for_each_console_srcu(c) {
|
|
+ short flags;
|
|
+
|
|
if (con && con != c)
|
|
continue;
|
|
- if (!console_is_usable(c))
|
|
+
|
|
+ flags = console_srcu_read_flags(c);
|
|
+
|
|
+ if (!console_is_usable(c, flags))
|
|
continue;
|
|
+
|
|
+ /*
|
|
+ * Since the console is locked, use this opportunity
|
|
+ * to update console->seq for NOBKL consoles.
|
|
+ */
|
|
+ if (flags & CON_NO_BKL)
|
|
+ c->seq = cons_read_seq(c);
|
|
+
|
|
printk_seq = c->seq;
|
|
if (printk_seq < seq)
|
|
diff += seq - printk_seq;
|
|
}
|
|
console_srcu_read_unlock(cookie);
|
|
|
|
- /*
|
|
- * If consoles are suspended, it cannot be expected that they
|
|
- * make forward progress, so timeout immediately. @diff is
|
|
- * still used to return a valid flush status.
|
|
- */
|
|
- if (console_suspended)
|
|
- remaining = 0;
|
|
- else if (diff != last_diff && reset_on_progress)
|
|
+ if (diff != last_diff && reset_on_progress)
|
|
remaining = timeout_ms;
|
|
|
|
console_unlock();
|
|
@@ -3770,9 +4010,17 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
|
|
int pending = this_cpu_xchg(printk_pending, 0);
|
|
|
|
if (pending & PRINTK_PENDING_OUTPUT) {
|
|
- /* If trylock fails, someone else is doing the printing */
|
|
- if (console_trylock())
|
|
- console_unlock();
|
|
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
|
|
+ /* The BKL thread waits on @log_wait. */
|
|
+ pending |= PRINTK_PENDING_WAKEUP;
|
|
+ } else {
|
|
+ /*
|
|
+ * If trylock fails, some other context
|
|
+ * will do the printing.
|
|
+ */
|
|
+ if (console_trylock())
|
|
+ console_unlock();
|
|
+ }
|
|
}
|
|
|
|
if (pending & PRINTK_PENDING_WAKEUP)
|
|
@@ -3807,33 +4055,58 @@ static void __wake_up_klogd(int val)
|
|
preempt_enable();
|
|
}
|
|
|
|
+/**
|
|
+ * wake_up_klogd - Wake kernel logging daemon
|
|
+ *
|
|
+ * Use this function when new records have been added to the ringbuffer
|
|
+ * and the console printing for those records is handled elsewhere. In
|
|
+ * this case only the logging daemon needs to be woken.
|
|
+ *
|
|
+ * Context: Any context.
|
|
+ */
|
|
void wake_up_klogd(void)
|
|
{
|
|
__wake_up_klogd(PRINTK_PENDING_WAKEUP);
|
|
}
|
|
|
|
+/**
|
|
+ * defer_console_output - Wake kernel logging daemon and trigger
|
|
+ * console printing in a deferred context
|
|
+ *
|
|
+ * Use this function when new records have been added to the ringbuffer
|
|
+ * but the current context is unable to perform the console printing.
|
|
+ * This function also wakes the logging daemon.
|
|
+ *
|
|
+ * Context: Any context.
|
|
+ */
|
|
void defer_console_output(void)
|
|
{
|
|
+ int val = PRINTK_PENDING_WAKEUP;
|
|
+
|
|
/*
|
|
* New messages may have been added directly to the ringbuffer
|
|
* using vprintk_store(), so wake any waiters as well.
|
|
*/
|
|
- __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT);
|
|
+ if (have_bkl_console)
|
|
+ val |= PRINTK_PENDING_OUTPUT;
|
|
+ __wake_up_klogd(val);
|
|
}
|
|
|
|
void printk_trigger_flush(void)
|
|
{
|
|
+ struct cons_write_context wctxt = { };
|
|
+
|
|
+ preempt_disable();
|
|
+ cons_atomic_flush(&wctxt, true);
|
|
+ preempt_enable();
|
|
+
|
|
+ cons_wake_threads();
|
|
defer_console_output();
|
|
}
|
|
|
|
int vprintk_deferred(const char *fmt, va_list args)
|
|
{
|
|
- int r;
|
|
-
|
|
- r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
|
|
- defer_console_output();
|
|
-
|
|
- return r;
|
|
+ return vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
|
|
}
|
|
|
|
int _printk_deferred(const char *fmt, ...)
|
|
diff --git a/kernel/printk/printk_nobkl.c b/kernel/printk/printk_nobkl.c
|
|
new file mode 100644
|
|
index 0000000000000..e0b818a4f8b38
|
|
--- /dev/null
|
|
+++ b/kernel/printk/printk_nobkl.c
|
|
@@ -0,0 +1,1825 @@
|
|
+// SPDX-License-Identifier: GPL-2.0-only
|
|
+// Copyright (C) 2022 Linutronix GmbH, John Ogness
|
|
+// Copyright (C) 2022 Intel, Thomas Gleixner
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/console.h>
|
|
+#include <linux/delay.h>
|
|
+#include <linux/kthread.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/syscore_ops.h>
|
|
+#include "printk_ringbuffer.h"
|
|
+#include "internal.h"
|
|
+/*
|
|
+ * Printk implementation for consoles that do not depend on the BKL style
|
|
+ * console_lock mechanism.
|
|
+ *
|
|
+ * Console is locked on a CPU when state::locked is set and state:cpu ==
|
|
+ * current CPU. This is valid for the current execution context.
|
|
+ *
|
|
+ * Nesting execution contexts on the same CPU can carefully take over
|
|
+ * if the driver allows reentrancy via state::unsafe = false. When the
|
|
+ * interrupted context resumes it checks the state before entering
|
|
+ * an unsafe region and aborts the operation if it detects a takeover.
|
|
+ *
|
|
+ * In case of panic or emergency the nesting context can take over the
|
|
+ * console forcefully. The write callback is then invoked with the unsafe
|
|
+ * flag set in the write context data, which allows the driver side to avoid
|
|
+ * locks and to evaluate the driver state so it can use an emergency path
|
|
+ * or repair the state instead of blindly assuming that it works.
|
|
+ *
|
|
+ * If the interrupted context touches the assigned record buffer after
|
|
+ * takeover, it does not cause harm because at the same execution level
|
|
+ * there is no concurrency on the same CPU. A threaded printer always has
|
|
+ * its own record buffer so it can never interfere with any of the per CPU
|
|
+ * record buffers.
|
|
+ *
|
|
+ * A concurrent writer on a different CPU can request to take over the
|
|
+ * console by:
|
|
+ *
|
|
+ * 1) Carefully writing the desired state into state[REQ]
|
|
+ * if there is no same or higher priority request pending.
|
|
+ * This locks state[REQ] except for higher priority
|
|
+ * waiters.
|
|
+ *
|
|
+ * 2) Setting state[CUR].req_prio unless a same or higher
|
|
+ * priority waiter won the race.
|
|
+ *
|
|
+ * 3) Carefully spin on state[CUR] until that is locked with the
|
|
+ * expected state. When the state is not the expected one then it
|
|
+ * has to verify that state[REQ] is still the same and that
|
|
+ * state[CUR] has not been taken over or unlocked.
|
|
+ *
|
|
+ * The unlocker hands over to state[REQ], but only if state[CUR]
|
|
+ * matches.
|
|
+ *
|
|
+ * In case that the owner does not react on the request and does not make
|
|
+ * observable progress, the waiter will timeout and can then decide to do
|
|
+ * a hostile takeover.
|
|
+ */
|
|
+
|
|
+#define copy_full_state(_dst, _src) do { _dst = _src; } while (0)
|
|
+#define copy_bit_state(_dst, _src) do { _dst.bits = _src.bits; } while (0)
|
|
+
|
|
+#ifdef CONFIG_64BIT
|
|
+#define copy_seq_state64(_dst, _src) do { _dst.seq = _src.seq; } while (0)
|
|
+#else
|
|
+#define copy_seq_state64(_dst, _src) do { } while (0)
|
|
+#endif
|
|
+
|
|
+enum state_selector {
|
|
+ CON_STATE_CUR,
|
|
+ CON_STATE_REQ,
|
|
+};
|
|
+
|
|
+/**
|
|
+ * cons_state_set - Helper function to set the console state
|
|
+ * @con: Console to update
|
|
+ * @which: Selects real state or handover state
|
|
+ * @new: The new state to write
|
|
+ *
|
|
+ * Only to be used when the console is not yet or no longer visible in the
|
|
+ * system. Otherwise use cons_state_try_cmpxchg().
|
|
+ */
|
|
+static inline void cons_state_set(struct console *con, enum state_selector which,
|
|
+ struct cons_state *new)
|
|
+{
|
|
+ atomic_long_set(&ACCESS_PRIVATE(con, atomic_state[which]), new->atom);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_state_read - Helper function to read the console state
|
|
+ * @con: Console to update
|
|
+ * @which: Selects real state or handover state
|
|
+ * @state: The state to store the result
|
|
+ */
|
|
+static inline void cons_state_read(struct console *con, enum state_selector which,
|
|
+ struct cons_state *state)
|
|
+{
|
|
+ state->atom = atomic_long_read(&ACCESS_PRIVATE(con, atomic_state[which]));
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_state_try_cmpxchg() - Helper function for atomic_long_try_cmpxchg() on console state
|
|
+ * @con: Console to update
|
|
+ * @which: Selects real state or handover state
|
|
+ * @old: Old/expected state
|
|
+ * @new: New state
|
|
+ *
|
|
+ * Returns: True on success, false on fail
|
|
+ */
|
|
+static inline bool cons_state_try_cmpxchg(struct console *con,
|
|
+ enum state_selector which,
|
|
+ struct cons_state *old,
|
|
+ struct cons_state *new)
|
|
+{
|
|
+ return atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, atomic_state[which]),
|
|
+ &old->atom, new->atom);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_state_full_match - Check whether the full state matches
|
|
+ * @cur: The state to check
|
|
+ * @prev: The previous state
|
|
+ *
|
|
+ * Returns: True if matching, false otherwise.
|
|
+ *
|
|
+ * Check the full state including state::seq on 64bit. For take over
|
|
+ * detection.
|
|
+ */
|
|
+static inline bool cons_state_full_match(struct cons_state cur,
|
|
+ struct cons_state prev)
|
|
+{
|
|
+ /*
|
|
+ * req_prio can be set by a concurrent writer for friendly
|
|
+ * handover. Ignore it in the comparison.
|
|
+ */
|
|
+ cur.req_prio = prev.req_prio;
|
|
+ return cur.atom == prev.atom;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_state_bits_match - Check for matching state bits
|
|
+ * @cur: The state to check
|
|
+ * @prev: The previous state
|
|
+ *
|
|
+ * Returns: True if state matches, false otherwise.
|
|
+ *
|
|
+ * Contrary to cons_state_full_match this checks only the bits and ignores
|
|
+ * a sequence change on 64bits. On 32bit the two functions are identical.
|
|
+ */
|
|
+static inline bool cons_state_bits_match(struct cons_state cur, struct cons_state prev)
|
|
+{
|
|
+ /*
|
|
+ * req_prio can be set by a concurrent writer for friendly
|
|
+ * handover. Ignore it in the comparison.
|
|
+ */
|
|
+ cur.req_prio = prev.req_prio;
|
|
+ return cur.bits == prev.bits;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_check_panic - Check whether a remote CPU is in panic
|
|
+ *
|
|
+ * Returns: True if a remote CPU is in panic, false otherwise.
|
|
+ */
|
|
+static inline bool cons_check_panic(void)
|
|
+{
|
|
+ unsigned int pcpu = atomic_read(&panic_cpu);
|
|
+
|
|
+ return pcpu != PANIC_CPU_INVALID && pcpu != smp_processor_id();
|
|
+}
|
|
+
|
|
+static struct cons_context_data early_cons_ctxt_data __initdata;
|
|
+
|
|
+/**
|
|
+ * cons_context_set_pbufs - Set the output text buffer for the current context
|
|
+ * @ctxt: Pointer to the acquire context
|
|
+ *
|
|
+ * Buffer selection:
|
|
+ * 1) Early boot uses the global (initdata) buffer
|
|
+ * 2) Printer threads use the dynamically allocated per-console buffers
|
|
+ * 3) All other contexts use the per CPU buffers
|
|
+ *
|
|
+ * This guarantees that there is no concurrency on the output records ever.
|
|
+ * Early boot and per CPU nesting is not a problem. The takeover logic
|
|
+ * tells the interrupted context that the buffer has been overwritten.
|
|
+ *
|
|
+ * There are two critical regions that matter:
|
|
+ *
|
|
+ * 1) Context is filling the buffer with a record. After interruption
|
|
+ * it continues to sprintf() the record and before it goes to
|
|
+ * write it out, it checks the state, notices the takeover, discards
|
|
+ * the content and backs out.
|
|
+ *
|
|
+ * 2) Context is in a unsafe critical region in the driver. After
|
|
+ * interruption it might read overwritten data from the output
|
|
+ * buffer. When it leaves the critical region it notices and backs
|
|
+ * out. Hostile takeovers in driver critical regions are best effort
|
|
+ * and there is not much that can be done about that.
|
|
+ */
|
|
+static __ref void cons_context_set_pbufs(struct cons_context *ctxt)
|
|
+{
|
|
+ struct console *con = ctxt->console;
|
|
+
|
|
+ /* Thread context or early boot? */
|
|
+ if (ctxt->thread)
|
|
+ ctxt->pbufs = con->thread_pbufs;
|
|
+ else if (!con->pcpu_data)
|
|
+ ctxt->pbufs = &early_cons_ctxt_data.pbufs;
|
|
+ else
|
|
+ ctxt->pbufs = &(this_cpu_ptr(con->pcpu_data)->pbufs);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_seq_init - Helper function to initialize the console sequence
|
|
+ * @con: Console to work on
|
|
+ *
|
|
+ * Set @con->atomic_seq to the starting record, or if that record no
|
|
+ * longer exists, the oldest available record. For init only. Do not
|
|
+ * use for runtime updates.
|
|
+ */
|
|
+static void cons_seq_init(struct console *con)
|
|
+{
|
|
+ u32 seq = (u32)max_t(u64, con->seq, prb_first_valid_seq(prb));
|
|
+#ifdef CONFIG_64BIT
|
|
+ struct cons_state state;
|
|
+
|
|
+ cons_state_read(con, CON_STATE_CUR, &state);
|
|
+ state.seq = seq;
|
|
+ cons_state_set(con, CON_STATE_CUR, &state);
|
|
+#else
|
|
+ atomic_set(&ACCESS_PRIVATE(con, atomic_seq), seq);
|
|
+#endif
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_force_seq - Force a specified sequence number for a console
|
|
+ * @con: Console to work on
|
|
+ * @seq: Sequence number to force
|
|
+ *
|
|
+ * This function is only intended to be used in emergency situations. In
|
|
+ * particular: console_flush_on_panic(CONSOLE_REPLAY_ALL)
|
|
+ */
|
|
+void cons_force_seq(struct console *con, u64 seq)
|
|
+{
|
|
+#ifdef CONFIG_64BIT
|
|
+ struct cons_state old;
|
|
+ struct cons_state new;
|
|
+
|
|
+ do {
|
|
+ cons_state_read(con, CON_STATE_CUR, &old);
|
|
+ copy_bit_state(new, old);
|
|
+ new.seq = seq;
|
|
+ } while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new));
|
|
+#else
|
|
+ atomic_set(&ACCESS_PRIVATE(con, atomic_seq), seq);
|
|
+#endif
|
|
+}
|
|
+
|
|
+static inline u64 cons_expand_seq(u64 seq)
|
|
+{
|
|
+ u64 rbseq;
|
|
+
|
|
+ /*
|
|
+ * The provided sequence is only the lower 32bits of the ringbuffer
|
|
+ * sequence. It needs to be expanded to 64bit. Get the next sequence
|
|
+ * number from the ringbuffer and fold it.
|
|
+ */
|
|
+ rbseq = prb_next_seq(prb);
|
|
+ seq = rbseq - ((u32)rbseq - (u32)seq);
|
|
+
|
|
+ return seq;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_read_seq - Read the current console sequence
|
|
+ * @con: Console to read the sequence of
|
|
+ *
|
|
+ * Returns: Sequence number of the next record to print on @con.
|
|
+ */
|
|
+u64 cons_read_seq(struct console *con)
|
|
+{
|
|
+ u64 seq;
|
|
+#ifdef CONFIG_64BIT
|
|
+ struct cons_state state;
|
|
+
|
|
+ cons_state_read(con, CON_STATE_CUR, &state);
|
|
+ seq = state.seq;
|
|
+#else
|
|
+ seq = atomic_read(&ACCESS_PRIVATE(con, atomic_seq));
|
|
+#endif
|
|
+ return cons_expand_seq(seq);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_context_set_seq - Setup the context with the next sequence to print
|
|
+ * @ctxt: Pointer to an acquire context that contains
|
|
+ * all information about the acquire mode
|
|
+ *
|
|
+ * On return the retrieved sequence number is stored in ctxt->oldseq.
|
|
+ *
|
|
+ * The sequence number is safe in forceful takeover situations.
|
|
+ *
|
|
+ * Either the writer succeeded to update before it got interrupted
|
|
+ * or it failed. In the latter case the takeover will print the
|
|
+ * same line again.
|
|
+ *
|
|
+ * The sequence is only the lower 32bits of the ringbuffer sequence. The
|
|
+ * ringbuffer must be 2^31 records ahead to get out of sync. This needs
|
|
+ * some care when starting a console, i.e setting the sequence to 0 is
|
|
+ * wrong. It has to be set to the oldest valid sequence in the ringbuffer
|
|
+ * as that cannot be more than 2^31 records away
|
|
+ *
|
|
+ * On 64bit the 32bit sequence is part of console::state, which is saved
|
|
+ * in @ctxt->state. This prevents the 32bit update race.
|
|
+ */
|
|
+static void cons_context_set_seq(struct cons_context *ctxt)
|
|
+{
|
|
+#ifdef CONFIG_64BIT
|
|
+ ctxt->oldseq = ctxt->state.seq;
|
|
+#else
|
|
+ ctxt->oldseq = atomic_read(&ACCESS_PRIVATE(ctxt->console, atomic_seq));
|
|
+#endif
|
|
+ ctxt->oldseq = cons_expand_seq(ctxt->oldseq);
|
|
+ ctxt->newseq = ctxt->oldseq;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_seq_try_update - Try to update the console sequence number
|
|
+ * @ctxt: Pointer to an acquire context that contains
|
|
+ * all information about the acquire mode
|
|
+ *
|
|
+ * Returns: True if the console sequence was updated, false otherwise.
|
|
+ *
|
|
+ * Internal helper as the logic is different on 32bit and 64bit.
|
|
+ *
|
|
+ * On 32 bit the sequence is separate from state and therefore
|
|
+ * subject to a subtle race in the case of hostile takeovers.
|
|
+ *
|
|
+ * On 64 bit the sequence is part of the state and therefore safe
|
|
+ * vs. hostile takeovers.
|
|
+ *
|
|
+ * In case of fail the console has been taken over and @ctxt is
|
|
+ * invalid. Caller has to reacquire the console.
|
|
+ */
|
|
+#ifdef CONFIG_64BIT
|
|
+static bool cons_seq_try_update(struct cons_context *ctxt)
|
|
+{
|
|
+ struct console *con = ctxt->console;
|
|
+ struct cons_state old;
|
|
+ struct cons_state new;
|
|
+
|
|
+ cons_state_read(con, CON_STATE_CUR, &old);
|
|
+ do {
|
|
+ /* Make sure this context is still the owner. */
|
|
+ if (!cons_state_bits_match(old, ctxt->state))
|
|
+ return false;
|
|
+
|
|
+ /* Preserve bit state */
|
|
+ copy_bit_state(new, old);
|
|
+ new.seq = ctxt->newseq;
|
|
+
|
|
+ /*
|
|
+ * Can race with hostile takeover or with a handover
|
|
+ * request.
|
|
+ */
|
|
+ } while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new));
|
|
+
|
|
+ copy_full_state(ctxt->state, new);
|
|
+ ctxt->oldseq = ctxt->newseq;
|
|
+
|
|
+ return true;
|
|
+}
|
|
+#else
|
|
+static bool cons_release(struct cons_context *ctxt);
|
|
+static bool cons_seq_try_update(struct cons_context *ctxt)
|
|
+{
|
|
+ struct console *con = ctxt->console;
|
|
+ struct cons_state state;
|
|
+ int pcpu;
|
|
+ u32 old;
|
|
+ u32 new;
|
|
+
|
|
+ /*
|
|
+ * There is a corner case that needs to be considered here:
|
|
+ *
|
|
+ * CPU0 CPU1
|
|
+ * printk()
|
|
+ * acquire() -> emergency
|
|
+ * write() acquire()
|
|
+ * update_seq()
|
|
+ * state == OK
|
|
+ * --> NMI
|
|
+ * takeover()
|
|
+ * <--- write()
|
|
+ * cmpxchg() succeeds update_seq()
|
|
+ * cmpxchg() fails
|
|
+ *
|
|
+ * There is nothing that can be done about this other than having
|
|
+ * yet another state bit that needs to be tracked and analyzed,
|
|
+ * but fails to cover the problem completely.
|
|
+ *
|
|
+ * No other scenarios expose such a problem. On same CPU takeovers
|
|
+ * the cmpxchg() always fails on the interrupted context after the
|
|
+ * interrupting context finished printing, but that's fine as it
|
|
+ * does not own the console anymore. The state check after the
|
|
+ * failed cmpxchg prevents that.
|
|
+ */
|
|
+ cons_state_read(con, CON_STATE_CUR, &state);
|
|
+ /* Make sure this context is still the owner. */
|
|
+ if (!cons_state_bits_match(state, ctxt->state))
|
|
+ return false;
|
|
+
|
|
+ /*
|
|
+ * Get the original sequence number that was retrieved
|
|
+ * from @con->atomic_seq. @con->atomic_seq should be still
|
|
+ * the same. 32bit truncates. See cons_context_set_seq().
|
|
+ */
|
|
+ old = (u32)ctxt->oldseq;
|
|
+ new = (u32)ctxt->newseq;
|
|
+ if (atomic_try_cmpxchg(&ACCESS_PRIVATE(con, atomic_seq), &old, new)) {
|
|
+ ctxt->oldseq = ctxt->newseq;
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Reread the state. If this context does not own the console anymore
|
|
+ * then it cannot touch the sequence again.
|
|
+ */
|
|
+ cons_state_read(con, CON_STATE_CUR, &state);
|
|
+ if (!cons_state_bits_match(state, ctxt->state))
|
|
+ return false;
|
|
+
|
|
+ pcpu = atomic_read(&panic_cpu);
|
|
+ if (pcpu == smp_processor_id()) {
|
|
+ /*
|
|
+ * This is the panic CPU. Emitting a warning here does not
|
|
+ * help at all. The callchain is clear and the priority is
|
|
+ * to get the messages out. In the worst case duplicated
|
|
+ * ones. That's a job for postprocessing.
|
|
+ */
|
|
+ atomic_set(&ACCESS_PRIVATE(con, atomic_seq), new);
|
|
+ ctxt->oldseq = ctxt->newseq;
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Only emit a warning when this happens outside of a panic
|
|
+ * situation as on panic it's neither useful nor helping to let the
|
|
+ * panic CPU get the important stuff out.
|
|
+ */
|
|
+ WARN_ON_ONCE(pcpu == PANIC_CPU_INVALID);
|
|
+
|
|
+ cons_release(ctxt);
|
|
+ return false;
|
|
+}
|
|
+#endif
|
|
+
|
|
+/**
|
|
+ * cons_cleanup_handover - Cleanup a handover request
|
|
+ * @ctxt: Pointer to acquire context
|
|
+ *
|
|
+ * @ctxt->hov_state contains the state to clean up
|
|
+ */
|
|
+static void cons_cleanup_handover(struct cons_context *ctxt)
|
|
+{
|
|
+ struct console *con = ctxt->console;
|
|
+ struct cons_state new;
|
|
+
|
|
+ /*
|
|
+ * No loop required. Either hov_state is still the same or
|
|
+ * not.
|
|
+ */
|
|
+ new.atom = 0;
|
|
+ cons_state_try_cmpxchg(con, CON_STATE_REQ, &ctxt->hov_state, &new);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_setup_handover - Setup a handover request
|
|
+ * @ctxt: Pointer to acquire context
|
|
+ *
|
|
+ * Returns: True if a handover request was setup, false otherwise.
|
|
+ *
|
|
+ * On success @ctxt->hov_state contains the requested handover state
|
|
+ *
|
|
+ * On failure this context is not allowed to request a handover from the
|
|
+ * current owner. Reasons would be priority too low or a remote CPU in panic.
|
|
+ * In both cases this context should give up trying to acquire the console.
|
|
+ */
|
|
+static bool cons_setup_handover(struct cons_context *ctxt)
|
|
+{
|
|
+ unsigned int cpu = smp_processor_id();
|
|
+ struct console *con = ctxt->console;
|
|
+ struct cons_state old;
|
|
+ struct cons_state hstate = {
|
|
+ .locked = 1,
|
|
+ .cur_prio = ctxt->prio,
|
|
+ .cpu = cpu,
|
|
+ };
|
|
+
|
|
+ /*
|
|
+ * Try to store hstate in @con->atomic_state[REQ]. This might
|
|
+ * race with a higher priority waiter.
|
|
+ */
|
|
+ cons_state_read(con, CON_STATE_REQ, &old);
|
|
+ do {
|
|
+ if (cons_check_panic())
|
|
+ return false;
|
|
+
|
|
+ /* Same or higher priority waiter exists? */
|
|
+ if (old.cur_prio >= ctxt->prio)
|
|
+ return false;
|
|
+
|
|
+ } while (!cons_state_try_cmpxchg(con, CON_STATE_REQ, &old, &hstate));
|
|
+
|
|
+ /* Save that state for comparison in spinwait */
|
|
+ copy_full_state(ctxt->hov_state, hstate);
|
|
+ return true;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_setup_request - Setup a handover request in state[CUR]
|
|
+ * @ctxt: Pointer to acquire context
|
|
+ * @old: The state that was used to make the decision to spin wait
|
|
+ *
|
|
+ * Returns: True if a handover request was setup in state[CUR], false
|
|
+ * otherwise.
|
|
+ *
|
|
+ * On success @ctxt->req_state contains the request state that was set in
|
|
+ * state[CUR]
|
|
+ *
|
|
+ * On failure this context encountered unexpected state values. This
|
|
+ * context should retry the full handover request setup process (the
|
|
+ * handover request setup by cons_setup_handover() is now invalidated
|
|
+ * and must be performed again).
|
|
+ */
|
|
+static bool cons_setup_request(struct cons_context *ctxt, struct cons_state old)
|
|
+{
|
|
+ struct console *con = ctxt->console;
|
|
+ struct cons_state cur;
|
|
+ struct cons_state new;
|
|
+
|
|
+ /* Now set the request in state[CUR] */
|
|
+ cons_state_read(con, CON_STATE_CUR, &cur);
|
|
+ do {
|
|
+ if (cons_check_panic())
|
|
+ goto cleanup;
|
|
+
|
|
+ /* Bit state changed vs. the decision to spinwait? */
|
|
+ if (!cons_state_bits_match(cur, old))
|
|
+ goto cleanup;
|
|
+
|
|
+ /*
|
|
+ * A higher or equal priority context already setup a
|
|
+ * request?
|
|
+ */
|
|
+ if (cur.req_prio >= ctxt->prio)
|
|
+ goto cleanup;
|
|
+
|
|
+ /* Setup a request for handover. */
|
|
+ copy_full_state(new, cur);
|
|
+ new.req_prio = ctxt->prio;
|
|
+ } while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &cur, &new));
|
|
+
|
|
+ /* Save that state for comparison in spinwait */
|
|
+ copy_bit_state(ctxt->req_state, new);
|
|
+ return true;
|
|
+
|
|
+cleanup:
|
|
+ cons_cleanup_handover(ctxt);
|
|
+ return false;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_try_acquire_spin - Complete the spinwait attempt
|
|
+ * @ctxt: Pointer to an acquire context that contains
|
|
+ * all information about the acquire mode
|
|
+ *
|
|
+ * @ctxt->hov_state contains the handover state that was set in
|
|
+ * state[REQ]
|
|
+ * @ctxt->req_state contains the request state that was set in
|
|
+ * state[CUR]
|
|
+ *
|
|
+ * Returns: 0 if successfully locked. -EBUSY on timeout. -EAGAIN on
|
|
+ * unexpected state values.
|
|
+ *
|
|
+ * On success @ctxt->state contains the new state that was set in
|
|
+ * state[CUR]
|
|
+ *
|
|
+ * On -EBUSY failure this context timed out. This context should either
|
|
+ * give up or attempt a hostile takeover.
|
|
+ *
|
|
+ * On -EAGAIN failure this context encountered unexpected state values.
|
|
+ * This context should retry the full handover request setup process (the
|
|
+ * handover request setup by cons_setup_handover() is now invalidated and
|
|
+ * must be performed again).
|
|
+ */
|
|
+static int cons_try_acquire_spin(struct cons_context *ctxt)
|
|
+{
|
|
+ struct console *con = ctxt->console;
|
|
+ struct cons_state cur;
|
|
+ struct cons_state new;
|
|
+ int err = -EAGAIN;
|
|
+ int timeout;
|
|
+
|
|
+ /* Now wait for the other side to hand over */
|
|
+ for (timeout = ctxt->spinwait_max_us; timeout >= 0; timeout--) {
|
|
+ /* Timeout immediately if a remote panic is detected. */
|
|
+ if (cons_check_panic())
|
|
+ break;
|
|
+
|
|
+ cons_state_read(con, CON_STATE_CUR, &cur);
|
|
+
|
|
+ /*
|
|
+ * If the real state of the console matches the handover state
|
|
+ * that this context setup, then the handover was a success
|
|
+ * and this context is now the owner.
|
|
+ *
|
|
+ * Note that this might have raced with a new higher priority
|
|
+ * requester coming in after the lock was handed over.
|
|
+ * However, that requester will see that the owner changes and
|
|
+ * setup a new request for the current owner (this context).
|
|
+ */
|
|
+ if (cons_state_bits_match(cur, ctxt->hov_state))
|
|
+ goto success;
|
|
+
|
|
+ /*
|
|
+ * If state changed since the request was made, give up as
|
|
+ * it is no longer consistent. This must include
|
|
+ * state::req_prio since there could be a higher priority
|
|
+ * request available.
|
|
+ */
|
|
+ if (cur.bits != ctxt->req_state.bits)
|
|
+ goto cleanup;
|
|
+
|
|
+ /*
|
|
+ * Finally check whether the handover state is still
|
|
+ * the same.
|
|
+ */
|
|
+ cons_state_read(con, CON_STATE_REQ, &cur);
|
|
+ if (cur.atom != ctxt->hov_state.atom)
|
|
+ goto cleanup;
|
|
+
|
|
+ /* Account time */
|
|
+ if (timeout > 0)
|
|
+ udelay(1);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Timeout. Cleanup the handover state and carefully try to reset
|
|
+ * req_prio in the real state. The reset is important to ensure
|
|
+ * that the owner does not hand over the lock after this context
|
|
+ * has given up waiting.
|
|
+ */
|
|
+ cons_cleanup_handover(ctxt);
|
|
+
|
|
+ cons_state_read(con, CON_STATE_CUR, &cur);
|
|
+ do {
|
|
+ /*
|
|
+ * The timeout might have raced with the owner coming late
|
|
+ * and handing it over gracefully.
|
|
+ */
|
|
+ if (cons_state_bits_match(cur, ctxt->hov_state))
|
|
+ goto success;
|
|
+
|
|
+ /*
|
|
+ * Validate that the state matches with the state at request
|
|
+ * time. If this check fails, there is already a higher
|
|
+ * priority context waiting or the owner has changed (either
|
|
+ * by higher priority or by hostile takeover). In all fail
|
|
+ * cases this context is no longer in line for a handover to
|
|
+ * take place, so no reset is necessary.
|
|
+ */
|
|
+ if (cur.bits != ctxt->req_state.bits)
|
|
+ goto cleanup;
|
|
+
|
|
+ copy_full_state(new, cur);
|
|
+ new.req_prio = 0;
|
|
+ } while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &cur, &new));
|
|
+ /* Reset worked. Report timeout. */
|
|
+ return -EBUSY;
|
|
+
|
|
+success:
|
|
+ /* Store the real state */
|
|
+ copy_full_state(ctxt->state, cur);
|
|
+ ctxt->hostile = false;
|
|
+ err = 0;
|
|
+
|
|
+cleanup:
|
|
+ cons_cleanup_handover(ctxt);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * __cons_try_acquire - Try to acquire the console for printk output
|
|
+ * @ctxt: Pointer to an acquire context that contains
|
|
+ * all information about the acquire mode
|
|
+ *
|
|
+ * Returns: True if the acquire was successful. False on fail.
|
|
+ *
|
|
+ * In case of success @ctxt->state contains the acquisition
|
|
+ * state.
|
|
+ *
|
|
+ * In case of fail @ctxt->old_state contains the state
|
|
+ * that was read from @con->state for analysis by the caller.
|
|
+ */
|
|
+static bool __cons_try_acquire(struct cons_context *ctxt)
|
|
+{
|
|
+ unsigned int cpu = smp_processor_id();
|
|
+ struct console *con = ctxt->console;
|
|
+ short flags = console_srcu_read_flags(con);
|
|
+ struct cons_state old;
|
|
+ struct cons_state new;
|
|
+ int err;
|
|
+
|
|
+ if (WARN_ON_ONCE(!(flags & CON_NO_BKL)))
|
|
+ return false;
|
|
+again:
|
|
+ cons_state_read(con, CON_STATE_CUR, &old);
|
|
+
|
|
+ /* Preserve it for the caller and for spinwait */
|
|
+ copy_full_state(ctxt->old_state, old);
|
|
+
|
|
+ if (cons_check_panic())
|
|
+ return false;
|
|
+
|
|
+ /* Set up the new state for takeover */
|
|
+ copy_full_state(new, old);
|
|
+ new.locked = 1;
|
|
+ new.thread = ctxt->thread;
|
|
+ new.cur_prio = ctxt->prio;
|
|
+ new.req_prio = CONS_PRIO_NONE;
|
|
+ new.cpu = cpu;
|
|
+
|
|
+ /* Attempt to acquire it directly if unlocked */
|
|
+ if (!old.locked) {
|
|
+ if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new))
|
|
+ goto again;
|
|
+
|
|
+ ctxt->hostile = false;
|
|
+ copy_full_state(ctxt->state, new);
|
|
+ goto success;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * A threaded printer context will never spin or perform a
|
|
+ * hostile takeover. The atomic writer will wake the thread
|
|
+ * when it is done with the important output.
|
|
+ */
|
|
+ if (ctxt->thread)
|
|
+ return false;
|
|
+
|
|
+ /*
|
|
+ * If the active context is on the same CPU then there is
|
|
+ * obviously no handshake possible.
|
|
+ */
|
|
+ if (old.cpu == cpu)
|
|
+ goto check_hostile;
|
|
+
|
|
+ /*
|
|
+ * If a handover request with same or higher priority is already
|
|
+ * pending then this context cannot setup a handover request.
|
|
+ */
|
|
+ if (old.req_prio >= ctxt->prio)
|
|
+ goto check_hostile;
|
|
+
|
|
+ /*
|
|
+ * If the caller did not request spin-waiting then performing a
|
|
+ * handover is not an option.
|
|
+ */
|
|
+ if (!ctxt->spinwait)
|
|
+ goto check_hostile;
|
|
+
|
|
+ /*
|
|
+ * Setup the request in state[REQ]. If this fails then this
|
|
+ * context is not allowed to setup a handover request.
|
|
+ */
|
|
+ if (!cons_setup_handover(ctxt))
|
|
+ goto check_hostile;
|
|
+
|
|
+ /*
|
|
+ * Setup the request in state[CUR]. Hand in the state that was
|
|
+ * used to make the decision to spinwait above, for comparison. If
|
|
+ * this fails then unexpected state values were encountered and the
|
|
+ * full request setup process is retried.
|
|
+ */
|
|
+ if (!cons_setup_request(ctxt, old))
|
|
+ goto again;
|
|
+
|
|
+ /*
|
|
+ * Spin-wait to acquire the console. If this fails then unexpected
|
|
+ * state values were encountered (for example, a hostile takeover by
|
|
+ * another context) and the full request setup process is retried.
|
|
+ */
|
|
+ err = cons_try_acquire_spin(ctxt);
|
|
+ if (err) {
|
|
+ if (err == -EAGAIN)
|
|
+ goto again;
|
|
+ goto check_hostile;
|
|
+ }
|
|
+success:
|
|
+ /* Common updates on success */
|
|
+ cons_context_set_seq(ctxt);
|
|
+ cons_context_set_pbufs(ctxt);
|
|
+ return true;
|
|
+
|
|
+check_hostile:
|
|
+ if (!ctxt->hostile)
|
|
+ return false;
|
|
+
|
|
+ if (cons_check_panic())
|
|
+ return false;
|
|
+
|
|
+ if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new))
|
|
+ goto again;
|
|
+
|
|
+ copy_full_state(ctxt->state, new);
|
|
+ goto success;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_try_acquire - Try to acquire the console for printk output
|
|
+ * @ctxt: Pointer to an acquire context that contains
|
|
+ * all information about the acquire mode
|
|
+ *
|
|
+ * Returns: True if the acquire was successful. False on fail.
|
|
+ *
|
|
+ * In case of success @ctxt->state contains the acquisition
|
|
+ * state.
|
|
+ *
|
|
+ * In case of fail @ctxt->old_state contains the state
|
|
+ * that was read from @con->state for analysis by the caller.
|
|
+ */
|
|
+static bool cons_try_acquire(struct cons_context *ctxt)
|
|
+{
|
|
+ if (__cons_try_acquire(ctxt))
|
|
+ return true;
|
|
+
|
|
+ ctxt->state.atom = 0;
|
|
+ return false;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * __cons_release - Release the console after output is done
|
|
+ * @ctxt: The acquire context that contains the state
|
|
+ * at cons_try_acquire()
|
|
+ *
|
|
+ * Returns: True if the release was regular
|
|
+ *
|
|
+ * False if the console is in unusable state or was handed over
|
|
+ * with handshake or taken over hostile without handshake.
|
|
+ *
|
|
+ * The return value tells the caller whether it needs to evaluate further
|
|
+ * printing.
|
|
+ */
|
|
+static bool __cons_release(struct cons_context *ctxt)
|
|
+{
|
|
+ struct console *con = ctxt->console;
|
|
+ short flags = console_srcu_read_flags(con);
|
|
+ struct cons_state hstate;
|
|
+ struct cons_state old;
|
|
+ struct cons_state new;
|
|
+
|
|
+ if (WARN_ON_ONCE(!(flags & CON_NO_BKL)))
|
|
+ return false;
|
|
+
|
|
+ cons_state_read(con, CON_STATE_CUR, &old);
|
|
+again:
|
|
+ if (!cons_state_bits_match(old, ctxt->state))
|
|
+ return false;
|
|
+
|
|
+ /* Release it directly when no handover request is pending. */
|
|
+ if (!old.req_prio)
|
|
+ goto unlock;
|
|
+
|
|
+ /* Read the handover target state */
|
|
+ cons_state_read(con, CON_STATE_REQ, &hstate);
|
|
+
|
|
+ /* If the waiter gave up hstate is 0 */
|
|
+ if (!hstate.atom)
|
|
+ goto unlock;
|
|
+
|
|
+ /*
|
|
+ * If a higher priority waiter raced against a lower priority
|
|
+ * waiter then unlock instead of handing over to either. The
|
|
+ * higher priority waiter will notice the updated state and
|
|
+ * retry.
|
|
+ */
|
|
+ if (hstate.cur_prio != old.req_prio)
|
|
+ goto unlock;
|
|
+
|
|
+ /* Switch the state and preserve the sequence on 64bit */
|
|
+ copy_bit_state(new, hstate);
|
|
+ copy_seq_state64(new, old);
|
|
+ if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new))
|
|
+ goto again;
|
|
+
|
|
+ return true;
|
|
+
|
|
+unlock:
|
|
+ /* Clear the state and preserve the sequence on 64bit */
|
|
+ new.atom = 0;
|
|
+ copy_seq_state64(new, old);
|
|
+ if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new))
|
|
+ goto again;
|
|
+
|
|
+ return true;
|
|
+}
|
|
+
|
|
+bool printk_threads_enabled __ro_after_init;
|
|
+static bool printk_force_atomic __initdata;
|
|
+
|
|
+/**
|
|
+ * cons_release - Release the console after output is done
|
|
+ * @ctxt: The acquire context that contains the state
|
|
+ * at cons_try_acquire()
|
|
+ *
|
|
+ * Returns: True if the release was regular
|
|
+ *
|
|
+ * False if the console is in unusable state or was handed over
|
|
+ * with handshake or taken over hostile without handshake.
|
|
+ *
|
|
+ * The return value tells the caller whether it needs to evaluate further
|
|
+ * printing.
|
|
+ */
|
|
+static bool cons_release(struct cons_context *ctxt)
|
|
+{
|
|
+ bool ret = __cons_release(ctxt);
|
|
+
|
|
+ /* Invalidate the buffer pointer. It is no longer valid. */
|
|
+ ctxt->pbufs = NULL;
|
|
+
|
|
+ ctxt->state.atom = 0;
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+bool console_try_acquire(struct cons_write_context *wctxt)
|
|
+{
|
|
+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
|
|
+
|
|
+ return cons_try_acquire(ctxt);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(console_try_acquire);
|
|
+
|
|
+bool console_release(struct cons_write_context *wctxt)
|
|
+{
|
|
+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
|
|
+
|
|
+ return cons_release(ctxt);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(console_release);
|
|
+
|
|
+/**
|
|
+ * cons_alloc_percpu_data - Allocate percpu data for a console
|
|
+ * @con: Console to allocate for
|
|
+ *
|
|
+ * Returns: True on success. False otherwise and the console cannot be used.
|
|
+ *
|
|
+ * If it is not yet possible to allocate per CPU data, success is returned.
|
|
+ * When per CPU data becomes possible, set_percpu_data_ready() will call
|
|
+ * this function again for all registered consoles.
|
|
+ */
|
|
+bool cons_alloc_percpu_data(struct console *con)
|
|
+{
|
|
+ if (!printk_percpu_data_ready())
|
|
+ return true;
|
|
+
|
|
+ con->pcpu_data = alloc_percpu(typeof(*con->pcpu_data));
|
|
+ if (con->pcpu_data)
|
|
+ return true;
|
|
+
|
|
+ con_printk(KERN_WARNING, con, "failed to allocate percpu buffers\n");
|
|
+ return false;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_free_percpu_data - Free percpu data of a console on unregister
|
|
+ * @con: Console to clean up
|
|
+ */
|
|
+static void cons_free_percpu_data(struct console *con)
|
|
+{
|
|
+ if (!con->pcpu_data)
|
|
+ return;
|
|
+
|
|
+ free_percpu(con->pcpu_data);
|
|
+ con->pcpu_data = NULL;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * console_can_proceed - Check whether printing can proceed
|
|
+ * @wctxt: The write context that was handed to the write function
|
|
+ *
|
|
+ * Returns: True if the state is correct. False if a handover
|
|
+ * has been requested or if the console was taken
|
|
+ * over.
|
|
+ *
|
|
+ * Must be invoked after the record was dumped into the assigned record
|
|
+ * buffer and at appropriate safe places in the driver. For unsafe driver
|
|
+ * sections see console_enter_unsafe().
|
|
+ *
|
|
+ * When this function returns false then the calling context is not allowed
|
|
+ * to go forward and has to back out immediately and carefully. The buffer
|
|
+ * content is no longer trusted either and the console lock is no longer
|
|
+ * held.
|
|
+ */
|
|
+bool console_can_proceed(struct cons_write_context *wctxt)
|
|
+{
|
|
+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
|
|
+ struct console *con = ctxt->console;
|
|
+ struct cons_state state;
|
|
+
|
|
+ cons_state_read(con, CON_STATE_CUR, &state);
|
|
+ /* Store it for analysis or reuse */
|
|
+ copy_full_state(ctxt->old_state, state);
|
|
+
|
|
+ /* Make sure this context is still the owner. */
|
|
+ if (!cons_state_full_match(state, ctxt->state))
|
|
+ return false;
|
|
+
|
|
+ /*
|
|
+ * Having a safe point for take over and eventually a few
|
|
+ * duplicated characters or a full line is way better than a
|
|
+ * hostile takeover. Post processing can take care of the garbage.
|
|
+ * Continue if the requested priority is not sufficient.
|
|
+ */
|
|
+ if (state.req_prio <= state.cur_prio)
|
|
+ return true;
|
|
+
|
|
+ /*
|
|
+ * A console printer within an unsafe region is allowed to continue.
|
|
+ * It can perform the handover when exiting the safe region. Otherwise
|
|
+ * a hostile takeover will be necessary.
|
|
+ */
|
|
+ if (state.unsafe)
|
|
+ return true;
|
|
+
|
|
+ /* Release and hand over */
|
|
+ cons_release(ctxt);
|
|
+ /*
|
|
+ * This does not check whether the handover succeeded. The
|
|
+ * outermost callsite has to make the final decision whether printing
|
|
+ * should continue or not (via reacquire, possibly hostile). The
|
|
+ * console is unlocked already so go back all the way instead of
|
|
+ * trying to implement heuristics in tons of places.
|
|
+ */
|
|
+ return false;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(console_can_proceed);
|
|
+
|
|
+/**
|
|
+ * __console_update_unsafe - Update the unsafe bit in @con->atomic_state
|
|
+ * @wctxt: The write context that was handed to the write function
|
|
+ *
|
|
+ * Returns: True if the state is correct. False if a handover
|
|
+ * has been requested or if the console was taken
|
|
+ * over.
|
|
+ *
|
|
+ * Must be invoked before an unsafe driver section is entered.
|
|
+ *
|
|
+ * When this function returns false then the calling context is not allowed
|
|
+ * to go forward and has to back out immediately and carefully. The buffer
|
|
+ * content is no longer trusted either and the console lock is no longer
|
|
+ * held.
|
|
+ *
|
|
+ * Internal helper to avoid duplicated code
|
|
+ */
|
|
+static bool __console_update_unsafe(struct cons_write_context *wctxt, bool unsafe)
|
|
+{
|
|
+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
|
|
+ struct console *con = ctxt->console;
|
|
+ struct cons_state new;
|
|
+
|
|
+ do {
|
|
+ if (!console_can_proceed(wctxt))
|
|
+ return false;
|
|
+ /*
|
|
+ * console_can_proceed() saved the real state in
|
|
+ * ctxt->old_state
|
|
+ */
|
|
+ copy_full_state(new, ctxt->old_state);
|
|
+ new.unsafe = unsafe;
|
|
+
|
|
+ } while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &ctxt->old_state, &new));
|
|
+
|
|
+ copy_full_state(ctxt->state, new);
|
|
+ return true;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * console_enter_unsafe - Enter an unsafe region in the driver
|
|
+ * @wctxt: The write context that was handed to the write function
|
|
+ *
|
|
+ * Returns: True if the state is correct. False if a handover
|
|
+ * has been requested or if the console was taken
|
|
+ * over.
|
|
+ *
|
|
+ * Must be invoked before an unsafe driver section is entered.
|
|
+ *
|
|
+ * When this function returns false then the calling context is not allowed
|
|
+ * to go forward and has to back out immediately and carefully. The buffer
|
|
+ * content is no longer trusted either and the console lock is no longer
|
|
+ * held.
|
|
+ */
|
|
+bool console_enter_unsafe(struct cons_write_context *wctxt)
|
|
+{
|
|
+ return __console_update_unsafe(wctxt, true);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(console_enter_unsafe);
|
|
+
|
|
+/**
|
|
+ * console_exit_unsafe - Exit an unsafe region in the driver
|
|
+ * @wctxt: The write context that was handed to the write function
|
|
+ *
|
|
+ * Returns: True if the state is correct. False if a handover
|
|
+ * has been requested or if the console was taken
|
|
+ * over.
|
|
+ *
|
|
+ * Must be invoked before an unsafe driver section is exited.
|
|
+ *
|
|
+ * When this function returns false then the calling context is not allowed
|
|
+ * to go forward and has to back out immediately and carefully. The buffer
|
|
+ * content is no longer trusted either and the console lock is no longer
|
|
+ * held.
|
|
+ */
|
|
+bool console_exit_unsafe(struct cons_write_context *wctxt)
|
|
+{
|
|
+ return __console_update_unsafe(wctxt, false);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(console_exit_unsafe);
|
|
+
|
|
+/**
|
|
+ * cons_get_record - Fill the buffer with the next pending ringbuffer record
|
|
+ * @wctxt: The write context which will be handed to the write function
|
|
+ *
|
|
+ * Returns: True if there are records available. If the next record should
|
|
+ * be printed, the output buffer is filled and @wctxt->outbuf
|
|
+ * points to the text to print. If @wctxt->outbuf is NULL after
|
|
+ * the call, the record should not be printed but the caller must
|
|
+ * still update the console sequence number.
|
|
+ *
|
|
+ * False means that there are no pending records anymore and the
|
|
+ * printing can stop.
|
|
+ */
|
|
+static bool cons_get_record(struct cons_write_context *wctxt)
|
|
+{
|
|
+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
|
|
+ struct console *con = ctxt->console;
|
|
+ bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED;
|
|
+ struct printk_message pmsg = {
|
|
+ .pbufs = ctxt->pbufs,
|
|
+ };
|
|
+
|
|
+ if (!printk_get_next_message(&pmsg, ctxt->newseq, is_extended, true))
|
|
+ return false;
|
|
+
|
|
+ ctxt->newseq = pmsg.seq;
|
|
+ ctxt->dropped += pmsg.dropped;
|
|
+
|
|
+ if (pmsg.outbuf_len == 0) {
|
|
+ wctxt->outbuf = NULL;
|
|
+ } else {
|
|
+ if (ctxt->dropped && !is_extended)
|
|
+ console_prepend_dropped(&pmsg, ctxt->dropped);
|
|
+ wctxt->outbuf = &pmsg.pbufs->outbuf[0];
|
|
+ }
|
|
+
|
|
+ wctxt->len = pmsg.outbuf_len;
|
|
+
|
|
+ return true;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_emit_record - Emit record in the acquired context
|
|
+ * @wctxt: The write context that will be handed to the write function
|
|
+ *
|
|
+ * Returns: False if the operation was aborted (takeover or handover).
|
|
+ * True otherwise
|
|
+ *
|
|
+ * When false is returned, the caller is not allowed to touch console state.
|
|
+ * The console is owned by someone else. If the caller wants to print more
|
|
+ * it has to reacquire the console first.
|
|
+ *
|
|
+ * When true is returned, @wctxt->ctxt.backlog indicates whether there are
|
|
+ * still records pending in the ringbuffer,
|
|
+ */
|
|
+static bool cons_emit_record(struct cons_write_context *wctxt)
|
|
+{
|
|
+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
|
|
+ struct console *con = ctxt->console;
|
|
+ bool done = false;
|
|
+
|
|
+ /*
|
|
+ * @con->dropped is not protected in case of hostile takeovers so
|
|
+ * the update below is racy. Annotate it accordingly.
|
|
+ */
|
|
+ ctxt->dropped = data_race(READ_ONCE(con->dropped));
|
|
+
|
|
+ /* Fill the output buffer with the next record */
|
|
+ ctxt->backlog = cons_get_record(wctxt);
|
|
+ if (!ctxt->backlog)
|
|
+ return true;
|
|
+
|
|
+ /* Safety point. Don't touch state in case of takeover */
|
|
+ if (!console_can_proceed(wctxt))
|
|
+ return false;
|
|
+
|
|
+ /* Counterpart to the read above */
|
|
+ WRITE_ONCE(con->dropped, ctxt->dropped);
|
|
+
|
|
+ /*
|
|
+ * In case of skipped records, Update sequence state in @con.
|
|
+ */
|
|
+ if (!wctxt->outbuf)
|
|
+ goto update;
|
|
+
|
|
+ /* Tell the driver about potential unsafe state */
|
|
+ wctxt->unsafe = ctxt->state.unsafe;
|
|
+
|
|
+ if (!ctxt->thread && con->write_atomic) {
|
|
+ done = con->write_atomic(con, wctxt);
|
|
+ } else if (ctxt->thread && con->write_thread) {
|
|
+ done = con->write_thread(con, wctxt);
|
|
+ } else {
|
|
+ cons_release(ctxt);
|
|
+ WARN_ON_ONCE(1);
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ /* If not done, the write was aborted due to takeover */
|
|
+ if (!done)
|
|
+ return false;
|
|
+
|
|
+ /* If there was a dropped message, it has now been output. */
|
|
+ if (ctxt->dropped) {
|
|
+ ctxt->dropped = 0;
|
|
+ /* Counterpart to the read above */
|
|
+ WRITE_ONCE(con->dropped, ctxt->dropped);
|
|
+ }
|
|
+update:
|
|
+ ctxt->newseq++;
|
|
+ /*
|
|
+ * The sequence update attempt is not part of console_release()
|
|
+ * because in panic situations the console is not released by
|
|
+ * the panic CPU until all records are written. On 32bit the
|
|
+ * sequence is separate from state anyway.
|
|
+ */
|
|
+ return cons_seq_try_update(ctxt);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_kthread_should_wakeup - Check whether the printk thread should wakeup
|
|
+ * @con: Console to operate on
|
|
+ * @ctxt: The acquire context that contains the state
|
|
+ * at console_acquire()
|
|
+ *
|
|
+ * Returns: True if the thread should shutdown or if the console is allowed to
|
|
+ * print and a record is available. False otherwise
|
|
+ *
|
|
+ * After the thread wakes up, it must first check if it should shutdown before
|
|
+ * attempting any printing.
|
|
+ */
|
|
+static bool cons_kthread_should_wakeup(struct console *con, struct cons_context *ctxt)
|
|
+{
|
|
+ bool is_usable;
|
|
+ short flags;
|
|
+ int cookie;
|
|
+
|
|
+ if (kthread_should_stop())
|
|
+ return true;
|
|
+
|
|
+ cookie = console_srcu_read_lock();
|
|
+ flags = console_srcu_read_flags(con);
|
|
+ is_usable = console_is_usable(con, flags);
|
|
+ console_srcu_read_unlock(cookie);
|
|
+
|
|
+ if (!is_usable)
|
|
+ return false;
|
|
+
|
|
+ /* This reads state and sequence on 64bit. On 32bit only state */
|
|
+ cons_state_read(con, CON_STATE_CUR, &ctxt->state);
|
|
+
|
|
+ /*
|
|
+ * Atomic printing is running on some other CPU. The owner
|
|
+ * will wake the console thread on unlock if necessary.
|
|
+ */
|
|
+ if (ctxt->state.locked)
|
|
+ return false;
|
|
+
|
|
+ /* Bring the sequence in @ctxt up to date */
|
|
+ cons_context_set_seq(ctxt);
|
|
+
|
|
+ return prb_read_valid(prb, ctxt->oldseq, NULL);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_kthread_func - The printk thread function
|
|
+ * @__console: Console to operate on
|
|
+ */
|
|
+static int cons_kthread_func(void *__console)
|
|
+{
|
|
+ struct console *con = __console;
|
|
+ struct cons_write_context wctxt = {
|
|
+ .ctxt.console = con,
|
|
+ .ctxt.prio = CONS_PRIO_NORMAL,
|
|
+ .ctxt.thread = 1,
|
|
+ };
|
|
+ struct cons_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
|
|
+ unsigned long flags;
|
|
+ short con_flags;
|
|
+ bool backlog;
|
|
+ int cookie;
|
|
+ int ret;
|
|
+
|
|
+ for (;;) {
|
|
+ atomic_inc(&con->kthread_waiting);
|
|
+
|
|
+ /*
|
|
+ * Provides a full memory barrier vs. cons_kthread_wake().
|
|
+ */
|
|
+ ret = rcuwait_wait_event(&con->rcuwait,
|
|
+ cons_kthread_should_wakeup(con, ctxt),
|
|
+ TASK_INTERRUPTIBLE);
|
|
+
|
|
+ atomic_dec(&con->kthread_waiting);
|
|
+
|
|
+ if (kthread_should_stop())
|
|
+ break;
|
|
+
|
|
+ /* Wait was interrupted by a spurious signal, go back to sleep */
|
|
+ if (ret)
|
|
+ continue;
|
|
+
|
|
+ for (;;) {
|
|
+ cookie = console_srcu_read_lock();
|
|
+
|
|
+ /*
|
|
+ * Ensure this stays on the CPU to make handover and
|
|
+ * takeover possible.
|
|
+ */
|
|
+ if (con->port_lock)
|
|
+ con->port_lock(con, true, &flags);
|
|
+ else
|
|
+ migrate_disable();
|
|
+
|
|
+ /*
|
|
+ * Try to acquire the console without attempting to
|
|
+ * take over. If an atomic printer wants to hand
|
|
+ * back to the thread it simply wakes it up.
|
|
+ */
|
|
+ if (!cons_try_acquire(ctxt))
|
|
+ break;
|
|
+
|
|
+ con_flags = console_srcu_read_flags(con);
|
|
+
|
|
+ if (console_is_usable(con, con_flags)) {
|
|
+ /*
|
|
+ * If the emit fails, this context is no
|
|
+ * longer the owner. Abort the processing and
|
|
+ * wait for new records to print.
|
|
+ */
|
|
+ if (!cons_emit_record(&wctxt))
|
|
+ break;
|
|
+ backlog = ctxt->backlog;
|
|
+ } else {
|
|
+ backlog = false;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * If the release fails, this context was not the
|
|
+ * owner. Abort the processing and wait for new
|
|
+ * records to print.
|
|
+ */
|
|
+ if (!cons_release(ctxt))
|
|
+ break;
|
|
+
|
|
+ /* Backlog done? */
|
|
+ if (!backlog)
|
|
+ break;
|
|
+
|
|
+ if (con->port_lock)
|
|
+ con->port_lock(con, false, &flags);
|
|
+ else
|
|
+ migrate_enable();
|
|
+
|
|
+ console_srcu_read_unlock(cookie);
|
|
+
|
|
+ cond_resched();
|
|
+ }
|
|
+ if (con->port_lock)
|
|
+ con->port_lock(con, false, &flags);
|
|
+ else
|
|
+ migrate_enable();
|
|
+
|
|
+ console_srcu_read_unlock(cookie);
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_irq_work - irq work to wake printk thread
|
|
+ * @irq_work: The irq work to operate on
|
|
+ */
|
|
+static void cons_irq_work(struct irq_work *irq_work)
|
|
+{
|
|
+ struct console *con = container_of(irq_work, struct console, irq_work);
|
|
+
|
|
+ cons_kthread_wake(con);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_wake_threads - Wake up printing threads
|
|
+ *
|
|
+ * A printing thread is only woken if it is within the @kthread_waiting
|
|
+ * block. If it is not within the block (or enters the block later), it
|
|
+ * will see any new records and continue printing on its own.
|
|
+ */
|
|
+void cons_wake_threads(void)
|
|
+{
|
|
+ struct console *con;
|
|
+ int cookie;
|
|
+
|
|
+ cookie = console_srcu_read_lock();
|
|
+ for_each_console_srcu(con) {
|
|
+ if (con->kthread && atomic_read(&con->kthread_waiting))
|
|
+ irq_work_queue(&con->irq_work);
|
|
+ }
|
|
+ console_srcu_read_unlock(cookie);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * struct cons_cpu_state - Per CPU printk context state
|
|
+ * @prio: The current context priority level
|
|
+ * @nesting: Per priority nest counter
|
|
+ */
|
|
+struct cons_cpu_state {
|
|
+ enum cons_prio prio;
|
|
+ int nesting[CONS_PRIO_MAX];
|
|
+};
|
|
+
|
|
+static DEFINE_PER_CPU(struct cons_cpu_state, cons_pcpu_state);
|
|
+static struct cons_cpu_state early_cons_pcpu_state __initdata;
|
|
+
|
|
+/**
|
|
+ * cons_get_cpu_state - Get the per CPU console state pointer
|
|
+ *
|
|
+ * Returns either a pointer to the per CPU state of the current CPU or to
|
|
+ * the init data state during early boot.
|
|
+ */
|
|
+static __ref struct cons_cpu_state *cons_get_cpu_state(void)
|
|
+{
|
|
+ if (!printk_percpu_data_ready())
|
|
+ return &early_cons_pcpu_state;
|
|
+
|
|
+ return this_cpu_ptr(&cons_pcpu_state);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_get_wctxt - Get the write context for atomic printing
|
|
+ * @con: Console to operate on
|
|
+ * @prio: Priority of the context
|
|
+ *
|
|
+ * Returns either the per CPU context or the builtin context for
|
|
+ * early boot.
|
|
+ */
|
|
+static __ref struct cons_write_context *cons_get_wctxt(struct console *con,
|
|
+ enum cons_prio prio)
|
|
+{
|
|
+ if (!con->pcpu_data)
|
|
+ return &early_cons_ctxt_data.wctxt[prio];
|
|
+
|
|
+ return &this_cpu_ptr(con->pcpu_data)->wctxt[prio];
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_atomic_try_acquire - Try to acquire the console for atomic printing
|
|
+ * @con: The console to acquire
|
|
+ * @ctxt: The console context instance to work on
|
|
+ * @prio: The priority of the current context
|
|
+ */
|
|
+static bool cons_atomic_try_acquire(struct console *con, struct cons_context *ctxt,
|
|
+ enum cons_prio prio, bool skip_unsafe)
|
|
+{
|
|
+ memset(ctxt, 0, sizeof(*ctxt));
|
|
+ ctxt->console = con;
|
|
+ ctxt->spinwait_max_us = 2000;
|
|
+ ctxt->prio = prio;
|
|
+ ctxt->spinwait = 1;
|
|
+
|
|
+ /* Try to acquire it directly or via a friendly handover */
|
|
+ if (cons_try_acquire(ctxt))
|
|
+ return true;
|
|
+
|
|
+ /* Investigate whether a hostile takeover is due */
|
|
+ if (ctxt->old_state.cur_prio >= prio)
|
|
+ return false;
|
|
+
|
|
+ if (!ctxt->old_state.unsafe || !skip_unsafe)
|
|
+ ctxt->hostile = 1;
|
|
+ return cons_try_acquire(ctxt);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_atomic_flush_con - Flush one console in atomic mode
|
|
+ * @wctxt: The write context struct to use for this context
|
|
+ * @con: The console to flush
|
|
+ * @prio: The priority of the current context
|
|
+ * @skip_unsafe: True, to avoid unsafe hostile takeovers
|
|
+ */
|
|
+static void cons_atomic_flush_con(struct cons_write_context *wctxt, struct console *con,
|
|
+ enum cons_prio prio, bool skip_unsafe)
|
|
+{
|
|
+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
|
|
+ bool wake_thread = false;
|
|
+ short flags;
|
|
+
|
|
+ if (!cons_atomic_try_acquire(con, ctxt, prio, skip_unsafe))
|
|
+ return;
|
|
+
|
|
+ do {
|
|
+ flags = console_srcu_read_flags(con);
|
|
+
|
|
+ if (!console_is_usable(con, flags))
|
|
+ break;
|
|
+
|
|
+ /*
|
|
+ * For normal prio messages let the printer thread handle
|
|
+ * the printing if it is available.
|
|
+ */
|
|
+ if (prio <= CONS_PRIO_NORMAL && con->kthread) {
|
|
+ wake_thread = true;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * cons_emit_record() returns false when the console was
|
|
+ * handed over or taken over. In both cases the context is
|
|
+ * no longer valid.
|
|
+ */
|
|
+ if (!cons_emit_record(wctxt))
|
|
+ return;
|
|
+ } while (ctxt->backlog);
|
|
+
|
|
+ cons_release(ctxt);
|
|
+
|
|
+ if (wake_thread && atomic_read(&con->kthread_waiting))
|
|
+ irq_work_queue(&con->irq_work);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_atomic_flush - Flush consoles in atomic mode if required
|
|
+ * @printk_caller_wctxt: The write context struct to use for this
|
|
+ * context (for printk() context only)
|
|
+ * @skip_unsafe: True, to avoid unsafe hostile takeovers
|
|
+ */
|
|
+void cons_atomic_flush(struct cons_write_context *printk_caller_wctxt, bool skip_unsafe)
|
|
+{
|
|
+ struct cons_write_context *wctxt;
|
|
+ struct cons_cpu_state *cpu_state;
|
|
+ struct console *con;
|
|
+ short flags;
|
|
+ int cookie;
|
|
+
|
|
+ cpu_state = cons_get_cpu_state();
|
|
+
|
|
+ /*
|
|
+ * When in an elevated priority, the printk() calls are not
|
|
+ * individually flushed. This is to allow the full output to
|
|
+ * be dumped to the ringbuffer before starting with printing
|
|
+ * the backlog.
|
|
+ */
|
|
+ if (cpu_state->prio > CONS_PRIO_NORMAL && printk_caller_wctxt)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * Let the outermost write of this priority print. This avoids
|
|
+ * nasty hackery for nested WARN() where the printing itself
|
|
+ * generates one.
|
|
+ *
|
|
+ * cpu_state->prio <= CONS_PRIO_NORMAL is not subject to nesting
|
|
+ * and can proceed in order to allow atomic printing when consoles
|
|
+ * do not have a printer thread.
|
|
+ */
|
|
+ if (cpu_state->prio > CONS_PRIO_NORMAL &&
|
|
+ cpu_state->nesting[cpu_state->prio] != 1)
|
|
+ return;
|
|
+
|
|
+ cookie = console_srcu_read_lock();
|
|
+ for_each_console_srcu(con) {
|
|
+ if (!con->write_atomic)
|
|
+ continue;
|
|
+
|
|
+ flags = console_srcu_read_flags(con);
|
|
+
|
|
+ if (!console_is_usable(con, flags))
|
|
+ continue;
|
|
+
|
|
+ if (cpu_state->prio > CONS_PRIO_NORMAL || !con->kthread) {
|
|
+ if (printk_caller_wctxt)
|
|
+ wctxt = printk_caller_wctxt;
|
|
+ else
|
|
+ wctxt = cons_get_wctxt(con, cpu_state->prio);
|
|
+ cons_atomic_flush_con(wctxt, con, cpu_state->prio, skip_unsafe);
|
|
+ }
|
|
+ }
|
|
+ console_srcu_read_unlock(cookie);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_atomic_enter - Enter a context that enforces atomic printing
|
|
+ * @prio: Priority of the context
|
|
+ *
|
|
+ * Returns: The previous priority that needs to be fed into
|
|
+ * the corresponding cons_atomic_exit()
|
|
+ */
|
|
+enum cons_prio cons_atomic_enter(enum cons_prio prio)
|
|
+{
|
|
+ struct cons_cpu_state *cpu_state;
|
|
+ enum cons_prio prev_prio;
|
|
+
|
|
+ migrate_disable();
|
|
+ cpu_state = cons_get_cpu_state();
|
|
+
|
|
+ prev_prio = cpu_state->prio;
|
|
+ if (prev_prio < prio)
|
|
+ cpu_state->prio = prio;
|
|
+
|
|
+ /*
|
|
+ * Increment the nesting on @cpu_state->prio so a WARN()
|
|
+ * nested into a panic printout does not attempt to
|
|
+ * scribble state.
|
|
+ */
|
|
+ cpu_state->nesting[cpu_state->prio]++;
|
|
+
|
|
+ return prev_prio;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_atomic_exit - Exit a context that enforces atomic printing
|
|
+ * @prio: Priority of the context to leave
|
|
+ * @prev_prio: Priority of the previous context for restore
|
|
+ *
|
|
+ * @prev_prio is the priority returned by the corresponding cons_atomic_enter().
|
|
+ */
|
|
+void cons_atomic_exit(enum cons_prio prio, enum cons_prio prev_prio)
|
|
+{
|
|
+ struct cons_cpu_state *cpu_state;
|
|
+
|
|
+ cons_atomic_flush(NULL, true);
|
|
+
|
|
+ cpu_state = cons_get_cpu_state();
|
|
+
|
|
+ if (cpu_state->prio == CONS_PRIO_PANIC)
|
|
+ cons_atomic_flush(NULL, false);
|
|
+
|
|
+ /*
|
|
+ * Undo the nesting of cons_atomic_enter() at the CPU state
|
|
+ * priority.
|
|
+ */
|
|
+ cpu_state->nesting[cpu_state->prio]--;
|
|
+
|
|
+ /*
|
|
+ * Restore the previous priority, which was returned by
|
|
+ * cons_atomic_enter().
|
|
+ */
|
|
+ cpu_state->prio = prev_prio;
|
|
+
|
|
+ migrate_enable();
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_kthread_stop - Stop a printk thread
|
|
+ * @con: Console to operate on
|
|
+ */
|
|
+static void cons_kthread_stop(struct console *con)
|
|
+{
|
|
+ lockdep_assert_console_list_lock_held();
|
|
+
|
|
+ if (!con->kthread)
|
|
+ return;
|
|
+
|
|
+ kthread_stop(con->kthread);
|
|
+ con->kthread = NULL;
|
|
+
|
|
+ kfree(con->thread_pbufs);
|
|
+ con->thread_pbufs = NULL;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_kthread_create - Create a printk thread
|
|
+ * @con: Console to operate on
|
|
+ *
|
|
+ * If it fails, let the console proceed. The atomic part might
|
|
+ * be usable and useful.
|
|
+ */
|
|
+void cons_kthread_create(struct console *con)
|
|
+{
|
|
+ struct task_struct *kt;
|
|
+ struct console *c;
|
|
+
|
|
+ lockdep_assert_console_list_lock_held();
|
|
+
|
|
+ if (!(con->flags & CON_NO_BKL) || !con->write_thread)
|
|
+ return;
|
|
+
|
|
+ if (!printk_threads_enabled || con->kthread)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * Printer threads cannot be started as long as any boot console is
|
|
+ * registered because there is no way to synchronize the hardware
|
|
+ * registers between boot console code and regular console code.
|
|
+ */
|
|
+ for_each_console(c) {
|
|
+ if (c->flags & CON_BOOT)
|
|
+ return;
|
|
+ }
|
|
+ have_boot_console = false;
|
|
+
|
|
+ con->thread_pbufs = kmalloc(sizeof(*con->thread_pbufs), GFP_KERNEL);
|
|
+ if (!con->thread_pbufs) {
|
|
+ con_printk(KERN_ERR, con, "failed to allocate printing thread buffers\n");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ kt = kthread_run(cons_kthread_func, con, "pr/%s%d", con->name, con->index);
|
|
+ if (IS_ERR(kt)) {
|
|
+ con_printk(KERN_ERR, con, "failed to start printing thread\n");
|
|
+ kfree(con->thread_pbufs);
|
|
+ con->thread_pbufs = NULL;
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ con->kthread = kt;
|
|
+
|
|
+ /*
|
|
+ * It is important that console printing threads are scheduled
|
|
+ * shortly after a printk call and with generous runtime budgets.
|
|
+ */
|
|
+ sched_set_normal(con->kthread, -20);
|
|
+}
|
|
+
|
|
+static int __init printk_setup_threads(void)
|
|
+{
|
|
+ struct console *con;
|
|
+
|
|
+ if (printk_force_atomic)
|
|
+ return 0;
|
|
+
|
|
+ console_list_lock();
|
|
+ printk_threads_enabled = true;
|
|
+ for_each_console(con)
|
|
+ cons_kthread_create(con);
|
|
+ if (have_bkl_console)
|
|
+ console_bkl_kthread_create();
|
|
+ console_list_unlock();
|
|
+ return 0;
|
|
+}
|
|
+early_initcall(printk_setup_threads);
|
|
+
|
|
+/**
|
|
+ * cons_nobkl_init - Initialize the NOBKL console specific data
|
|
+ * @con: Console to initialize
|
|
+ *
|
|
+ * Returns: True on success. False otherwise and the console cannot be used.
|
|
+ */
|
|
+bool cons_nobkl_init(struct console *con)
|
|
+{
|
|
+ struct cons_state state = { };
|
|
+
|
|
+ if (!cons_alloc_percpu_data(con))
|
|
+ return false;
|
|
+
|
|
+ rcuwait_init(&con->rcuwait);
|
|
+ atomic_set(&con->kthread_waiting, 0);
|
|
+ init_irq_work(&con->irq_work, cons_irq_work);
|
|
+ cons_state_set(con, CON_STATE_CUR, &state);
|
|
+ cons_state_set(con, CON_STATE_REQ, &state);
|
|
+ cons_seq_init(con);
|
|
+ cons_kthread_create(con);
|
|
+ return true;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * cons_nobkl_cleanup - Cleanup the NOBKL console specific data
|
|
+ * @con: Console to cleanup
|
|
+ */
|
|
+void cons_nobkl_cleanup(struct console *con)
|
|
+{
|
|
+ struct cons_state state = { };
|
|
+
|
|
+ cons_kthread_stop(con);
|
|
+ cons_state_set(con, CON_STATE_CUR, &state);
|
|
+ cons_state_set(con, CON_STATE_REQ, &state);
|
|
+ cons_free_percpu_data(con);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * printk_kthread_shutdown - shutdown all threaded printers
|
|
+ *
|
|
+ * On system shutdown all threaded printers are stopped. This allows printk
|
|
+ * to transition back to atomic printing, thus providing a robust mechanism
|
|
+ * for the final shutdown/reboot messages to be output.
|
|
+ */
|
|
+static void printk_kthread_shutdown(void)
|
|
+{
|
|
+ struct console *con;
|
|
+
|
|
+ console_list_lock();
|
|
+ for_each_console(con) {
|
|
+ if (con->flags & CON_NO_BKL)
|
|
+ cons_kthread_stop(con);
|
|
+ }
|
|
+ console_list_unlock();
|
|
+}
|
|
+
|
|
+static struct syscore_ops printk_syscore_ops = {
|
|
+ .shutdown = printk_kthread_shutdown,
|
|
+};
|
|
+
|
|
+static int __init printk_init_ops(void)
|
|
+{
|
|
+ register_syscore_ops(&printk_syscore_ops);
|
|
+ return 0;
|
|
+}
|
|
+device_initcall(printk_init_ops);
|
|
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
|
|
index ef0f9a2044da1..5c1470bd60bcb 100644
|
|
--- a/kernel/printk/printk_safe.c
|
|
+++ b/kernel/printk/printk_safe.c
|
|
@@ -12,18 +12,41 @@
|
|
|
|
#include "internal.h"
|
|
|
|
-static DEFINE_PER_CPU(int, printk_context);
|
|
+struct printk_context {
|
|
+ local_lock_t cpu;
|
|
+ int recursion;
|
|
+};
|
|
+
|
|
+static DEFINE_PER_CPU(struct printk_context, printk_context) = {
|
|
+ .cpu = INIT_LOCAL_LOCK(cpu),
|
|
+};
|
|
|
|
/* Can be preempted by NMI. */
|
|
-void __printk_safe_enter(void)
|
|
+void __printk_safe_enter(unsigned long *flags)
|
|
{
|
|
- this_cpu_inc(printk_context);
|
|
+ WARN_ON_ONCE(in_nmi());
|
|
+ local_lock_irqsave(&printk_context.cpu, *flags);
|
|
+ this_cpu_inc(printk_context.recursion);
|
|
}
|
|
|
|
/* Can be preempted by NMI. */
|
|
-void __printk_safe_exit(void)
|
|
+void __printk_safe_exit(unsigned long *flags)
|
|
{
|
|
- this_cpu_dec(printk_context);
|
|
+ WARN_ON_ONCE(in_nmi());
|
|
+ this_cpu_dec(printk_context.recursion);
|
|
+ local_unlock_irqrestore(&printk_context.cpu, *flags);
|
|
+}
|
|
+
|
|
+void __printk_deferred_enter(void)
|
|
+{
|
|
+ WARN_ON_ONCE(!in_atomic());
|
|
+ this_cpu_inc(printk_context.recursion);
|
|
+}
|
|
+
|
|
+void __printk_deferred_exit(void)
|
|
+{
|
|
+ WARN_ON_ONCE(!in_atomic());
|
|
+ this_cpu_dec(printk_context.recursion);
|
|
}
|
|
|
|
asmlinkage int vprintk(const char *fmt, va_list args)
|
|
@@ -38,13 +61,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
|
|
* Use the main logbuf even in NMI. But avoid calling console
|
|
* drivers that might have their own locks.
|
|
*/
|
|
- if (this_cpu_read(printk_context) || in_nmi()) {
|
|
- int len;
|
|
-
|
|
- len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
|
|
- defer_console_output();
|
|
- return len;
|
|
- }
|
|
+ if (this_cpu_read(printk_context.recursion) || in_nmi())
|
|
+ return vprintk_deferred(fmt, args);
|
|
|
|
/* No obstacles. */
|
|
return vprintk_default(fmt, args);
|
|
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
|
|
index 8e6c023212cb3..34f8adf7c0a39 100644
|
|
--- a/kernel/rcu/rcutorture.c
|
|
+++ b/kernel/rcu/rcutorture.c
|
|
@@ -2407,6 +2407,12 @@ static int rcutorture_booster_init(unsigned int cpu)
|
|
WARN_ON_ONCE(!t);
|
|
sp.sched_priority = 2;
|
|
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
|
|
+#ifdef CONFIG_PREEMPT_RT
|
|
+ t = per_cpu(timersd, cpu);
|
|
+ WARN_ON_ONCE(!t);
|
|
+ sp.sched_priority = 2;
|
|
+ sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
|
|
+#endif
|
|
}
|
|
|
|
/* Don't allow time recalculation while creating a new task. */
|
|
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
|
|
index b10b8349bb2a4..804306204d0d0 100644
|
|
--- a/kernel/rcu/tree_stall.h
|
|
+++ b/kernel/rcu/tree_stall.h
|
|
@@ -8,6 +8,7 @@
|
|
*/
|
|
|
|
#include <linux/kvm_para.h>
|
|
+#include <linux/console.h>
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
@@ -582,6 +583,7 @@ static void rcu_check_gp_kthread_expired_fqs_timer(void)
|
|
|
|
static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
|
|
{
|
|
+ enum cons_prio prev_prio;
|
|
int cpu;
|
|
unsigned long flags;
|
|
unsigned long gpa;
|
|
@@ -597,6 +599,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
|
|
if (rcu_stall_is_suppressed())
|
|
return;
|
|
|
|
+ prev_prio = cons_atomic_enter(CONS_PRIO_EMERGENCY);
|
|
+
|
|
/*
|
|
* OK, time to rat on our buddy...
|
|
* See Documentation/RCU/stallwarn.rst for info on how to debug
|
|
@@ -651,6 +655,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
|
|
panic_on_rcu_stall();
|
|
|
|
rcu_force_quiescent_state(); /* Kick them all. */
|
|
+
|
|
+ cons_atomic_exit(CONS_PRIO_EMERGENCY, prev_prio);
|
|
}
|
|
|
|
static void print_cpu_stall(unsigned long gps)
|
|
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
|
index 0d18c3969f904..a57a1a3beeba1 100644
|
|
--- a/kernel/sched/core.c
|
|
+++ b/kernel/sched/core.c
|
|
@@ -1042,6 +1042,46 @@ void resched_curr(struct rq *rq)
|
|
trace_sched_wake_idle_without_ipi(cpu);
|
|
}
|
|
|
|
+#ifdef CONFIG_PREEMPT_LAZY
|
|
+
|
|
+static int tsk_is_polling(struct task_struct *p)
|
|
+{
|
|
+#ifdef TIF_POLLING_NRFLAG
|
|
+ return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
|
|
+#else
|
|
+ return 0;
|
|
+#endif
|
|
+}
|
|
+
|
|
+void resched_curr_lazy(struct rq *rq)
|
|
+{
|
|
+ struct task_struct *curr = rq->curr;
|
|
+ int cpu;
|
|
+
|
|
+ if (!sched_feat(PREEMPT_LAZY)) {
|
|
+ resched_curr(rq);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (test_tsk_need_resched(curr))
|
|
+ return;
|
|
+
|
|
+ if (test_tsk_need_resched_lazy(curr))
|
|
+ return;
|
|
+
|
|
+ set_tsk_need_resched_lazy(curr);
|
|
+
|
|
+ cpu = cpu_of(rq);
|
|
+ if (cpu == smp_processor_id())
|
|
+ return;
|
|
+
|
|
+ /* NEED_RESCHED_LAZY must be visible before we test polling */
|
|
+ smp_mb();
|
|
+ if (!tsk_is_polling(curr))
|
|
+ smp_send_reschedule(cpu);
|
|
+}
|
|
+#endif
|
|
+
|
|
void resched_cpu(int cpu)
|
|
{
|
|
struct rq *rq = cpu_rq(cpu);
|
|
@@ -2230,6 +2270,7 @@ void migrate_disable(void)
|
|
preempt_disable();
|
|
this_rq()->nr_pinned++;
|
|
p->migration_disabled = 1;
|
|
+ preempt_lazy_disable();
|
|
preempt_enable();
|
|
}
|
|
EXPORT_SYMBOL_GPL(migrate_disable);
|
|
@@ -2265,6 +2306,7 @@ void migrate_enable(void)
|
|
barrier();
|
|
p->migration_disabled = 0;
|
|
this_rq()->nr_pinned--;
|
|
+ preempt_lazy_enable();
|
|
preempt_enable();
|
|
}
|
|
EXPORT_SYMBOL_GPL(migrate_enable);
|
|
@@ -3318,6 +3360,76 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
|
|
}
|
|
#endif /* CONFIG_NUMA_BALANCING */
|
|
|
|
+#ifdef CONFIG_PREEMPT_RT
|
|
+
|
|
+/*
|
|
+ * Consider:
|
|
+ *
|
|
+ * set_special_state(X);
|
|
+ *
|
|
+ * do_things()
|
|
+ * // Somewhere in there is an rtlock that can be contended:
|
|
+ * current_save_and_set_rtlock_wait_state();
|
|
+ * [...]
|
|
+ * schedule_rtlock(); (A)
|
|
+ * [...]
|
|
+ * current_restore_rtlock_saved_state();
|
|
+ *
|
|
+ * schedule(); (B)
|
|
+ *
|
|
+ * If p->saved_state is anything else than TASK_RUNNING, then p blocked on an
|
|
+ * rtlock (A) *before* voluntarily calling into schedule() (B) after setting its
|
|
+ * state to X. For things like ptrace (X=TASK_TRACED), the task could have more
|
|
+ * work to do upon acquiring the lock in do_things() before whoever called
|
|
+ * wait_task_inactive() should return. IOW, we have to wait for:
|
|
+ *
|
|
+ * p.saved_state = TASK_RUNNING
|
|
+ * p.__state = X
|
|
+ *
|
|
+ * which implies the task isn't blocked on an RT lock and got to schedule() (B).
|
|
+ *
|
|
+ * Also see comments in ttwu_state_match().
|
|
+ */
|
|
+
|
|
+static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ bool mismatch;
|
|
+
|
|
+ raw_spin_lock_irqsave(&p->pi_lock, flags);
|
|
+ if (READ_ONCE(p->__state) & match_state)
|
|
+ mismatch = false;
|
|
+ else if (READ_ONCE(p->saved_state) & match_state)
|
|
+ mismatch = false;
|
|
+ else
|
|
+ mismatch = true;
|
|
+
|
|
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
|
+ return mismatch;
|
|
+}
|
|
+static __always_inline bool state_match(struct task_struct *p, unsigned int match_state,
|
|
+ bool *wait)
|
|
+{
|
|
+ if (READ_ONCE(p->__state) & match_state)
|
|
+ return true;
|
|
+ if (READ_ONCE(p->saved_state) & match_state) {
|
|
+ *wait = true;
|
|
+ return true;
|
|
+ }
|
|
+ return false;
|
|
+}
|
|
+#else
|
|
+static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state)
|
|
+{
|
|
+ return !(READ_ONCE(p->__state) & match_state);
|
|
+}
|
|
+static __always_inline bool state_match(struct task_struct *p, unsigned int match_state,
|
|
+ bool *wait)
|
|
+{
|
|
+ return (READ_ONCE(p->__state) & match_state);
|
|
+}
|
|
+#endif
|
|
+
|
|
/*
|
|
* wait_task_inactive - wait for a thread to unschedule.
|
|
*
|
|
@@ -3336,7 +3448,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
|
|
*/
|
|
unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
|
|
{
|
|
- int running, queued;
|
|
+ bool running, wait;
|
|
struct rq_flags rf;
|
|
unsigned long ncsw;
|
|
struct rq *rq;
|
|
@@ -3362,7 +3474,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
|
|
* is actually now running somewhere else!
|
|
*/
|
|
while (task_on_cpu(rq, p)) {
|
|
- if (!(READ_ONCE(p->__state) & match_state))
|
|
+ if (state_mismatch(p, match_state))
|
|
return 0;
|
|
cpu_relax();
|
|
}
|
|
@@ -3375,9 +3487,10 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
|
|
rq = task_rq_lock(p, &rf);
|
|
trace_sched_wait_task(p);
|
|
running = task_on_cpu(rq, p);
|
|
- queued = task_on_rq_queued(p);
|
|
+ wait = task_on_rq_queued(p);
|
|
ncsw = 0;
|
|
- if (READ_ONCE(p->__state) & match_state)
|
|
+
|
|
+ if (state_match(p, match_state, &wait))
|
|
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
|
|
task_rq_unlock(rq, p, &rf);
|
|
|
|
@@ -3407,7 +3520,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
|
|
* running right now), it's preempted, and we should
|
|
* yield - it could be a while.
|
|
*/
|
|
- if (unlikely(queued)) {
|
|
+ if (unlikely(wait)) {
|
|
ktime_t to = NSEC_PER_SEC / HZ;
|
|
|
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
|
@@ -4712,6 +4825,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|
p->on_cpu = 0;
|
|
#endif
|
|
init_task_preempt_count(p);
|
|
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
|
|
+ task_thread_info(p)->preempt_lazy_count = 0;
|
|
+#endif
|
|
#ifdef CONFIG_SMP
|
|
plist_node_init(&p->pushable_tasks, MAX_PRIO);
|
|
RB_CLEAR_NODE(&p->pushable_dl_tasks);
|
|
@@ -6588,6 +6704,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
|
|
|
|
next = pick_next_task(rq, prev, &rf);
|
|
clear_tsk_need_resched(prev);
|
|
+ clear_tsk_need_resched_lazy(prev);
|
|
clear_preempt_need_resched();
|
|
#ifdef CONFIG_SCHED_DEBUG
|
|
rq->last_seen_need_resched_ns = 0;
|
|
@@ -6648,14 +6765,11 @@ void __noreturn do_task_dead(void)
|
|
cpu_relax();
|
|
}
|
|
|
|
-static inline void sched_submit_work(struct task_struct *tsk)
|
|
+void sched_submit_work(void)
|
|
{
|
|
- unsigned int task_flags;
|
|
+ struct task_struct *tsk = current;
|
|
+ unsigned int task_flags = tsk->flags;
|
|
|
|
- if (task_is_running(tsk))
|
|
- return;
|
|
-
|
|
- task_flags = tsk->flags;
|
|
/*
|
|
* If a worker goes to sleep, notify and ask workqueue whether it
|
|
* wants to wake up a task to maintain concurrency.
|
|
@@ -6681,8 +6795,10 @@ static inline void sched_submit_work(struct task_struct *tsk)
|
|
blk_flush_plug(tsk->plug, true);
|
|
}
|
|
|
|
-static void sched_update_worker(struct task_struct *tsk)
|
|
+void sched_resume_work(void)
|
|
{
|
|
+ struct task_struct *tsk = current;
|
|
+
|
|
if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
|
|
if (tsk->flags & PF_WQ_WORKER)
|
|
wq_worker_running(tsk);
|
|
@@ -6691,20 +6807,29 @@ static void sched_update_worker(struct task_struct *tsk)
|
|
}
|
|
}
|
|
|
|
-asmlinkage __visible void __sched schedule(void)
|
|
+static void schedule_loop(unsigned int sched_mode)
|
|
{
|
|
- struct task_struct *tsk = current;
|
|
-
|
|
- sched_submit_work(tsk);
|
|
do {
|
|
preempt_disable();
|
|
- __schedule(SM_NONE);
|
|
+ __schedule(sched_mode);
|
|
sched_preempt_enable_no_resched();
|
|
} while (need_resched());
|
|
- sched_update_worker(tsk);
|
|
+}
|
|
+
|
|
+asmlinkage __visible void __sched schedule(void)
|
|
+{
|
|
+ if (!task_is_running(current))
|
|
+ sched_submit_work();
|
|
+ schedule_loop(SM_NONE);
|
|
+ sched_resume_work();
|
|
}
|
|
EXPORT_SYMBOL(schedule);
|
|
|
|
+void schedule_rtmutex(void)
|
|
+{
|
|
+ schedule_loop(SM_NONE);
|
|
+}
|
|
+
|
|
/*
|
|
* synchronize_rcu_tasks() makes sure that no task is stuck in preempted
|
|
* state (have scheduled out non-voluntarily) by making sure that all
|
|
@@ -6764,11 +6889,7 @@ void __sched schedule_preempt_disabled(void)
|
|
#ifdef CONFIG_PREEMPT_RT
|
|
void __sched notrace schedule_rtlock(void)
|
|
{
|
|
- do {
|
|
- preempt_disable();
|
|
- __schedule(SM_RTLOCK_WAIT);
|
|
- sched_preempt_enable_no_resched();
|
|
- } while (need_resched());
|
|
+ schedule_loop(SM_RTLOCK_WAIT);
|
|
}
|
|
NOKPROBE_SYMBOL(schedule_rtlock);
|
|
#endif
|
|
@@ -6802,6 +6923,30 @@ static void __sched notrace preempt_schedule_common(void)
|
|
} while (need_resched());
|
|
}
|
|
|
|
+#ifdef CONFIG_PREEMPT_LAZY
|
|
+/*
|
|
+ * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
|
|
+ * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
|
|
+ * preempt_lazy_count counter >0.
|
|
+ */
|
|
+static __always_inline int preemptible_lazy(void)
|
|
+{
|
|
+ if (test_thread_flag(TIF_NEED_RESCHED))
|
|
+ return 1;
|
|
+ if (current_thread_info()->preempt_lazy_count)
|
|
+ return 0;
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+#else
|
|
+
|
|
+static inline int preemptible_lazy(void)
|
|
+{
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+#endif
|
|
+
|
|
#ifdef CONFIG_PREEMPTION
|
|
/*
|
|
* This is the entry point to schedule() from in-kernel preemption
|
|
@@ -6815,6 +6960,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
|
|
*/
|
|
if (likely(!preemptible()))
|
|
return;
|
|
+ if (!preemptible_lazy())
|
|
+ return;
|
|
preempt_schedule_common();
|
|
}
|
|
NOKPROBE_SYMBOL(preempt_schedule);
|
|
@@ -6862,6 +7009,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
|
|
if (likely(!preemptible()))
|
|
return;
|
|
|
|
+ if (!preemptible_lazy())
|
|
+ return;
|
|
+
|
|
do {
|
|
/*
|
|
* Because the function tracer can trace preempt_count_sub()
|
|
@@ -9167,7 +9317,9 @@ void __init init_idle(struct task_struct *idle, int cpu)
|
|
|
|
/* Set the preempt count _outside_ the spinlocks! */
|
|
init_idle_preempt_count(idle, cpu);
|
|
-
|
|
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
|
|
+ task_thread_info(idle)->preempt_lazy_count = 0;
|
|
+#endif
|
|
/*
|
|
* The idle tasks have their own, simple scheduling class:
|
|
*/
|
|
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
|
|
index ed89be0aa6503..46ffbbfde97b0 100644
|
|
--- a/kernel/sched/fair.c
|
|
+++ b/kernel/sched/fair.c
|
|
@@ -4948,7 +4948,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
|
|
|
delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
|
|
if (delta_exec > ideal_runtime) {
|
|
- resched_curr(rq_of(cfs_rq));
|
|
+ resched_curr_lazy(rq_of(cfs_rq));
|
|
/*
|
|
* The current task ran long enough, ensure it doesn't get
|
|
* re-elected due to buddy favours.
|
|
@@ -4972,7 +4972,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
|
return;
|
|
|
|
if (delta > ideal_runtime)
|
|
- resched_curr(rq_of(cfs_rq));
|
|
+ resched_curr_lazy(rq_of(cfs_rq));
|
|
}
|
|
|
|
static void
|
|
@@ -5118,7 +5118,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
|
|
* validating it and just reschedule.
|
|
*/
|
|
if (queued) {
|
|
- resched_curr(rq_of(cfs_rq));
|
|
+ resched_curr_lazy(rq_of(cfs_rq));
|
|
return;
|
|
}
|
|
/*
|
|
@@ -5267,7 +5267,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
|
|
* hierarchy can be throttled
|
|
*/
|
|
if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
|
|
- resched_curr(rq_of(cfs_rq));
|
|
+ resched_curr_lazy(rq_of(cfs_rq));
|
|
}
|
|
|
|
static __always_inline
|
|
@@ -6142,7 +6142,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
|
|
|
|
if (delta < 0) {
|
|
if (task_current(rq, p))
|
|
- resched_curr(rq);
|
|
+ resched_curr_lazy(rq);
|
|
return;
|
|
}
|
|
hrtick_start(rq, delta);
|
|
@@ -7871,7 +7871,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
|
return;
|
|
|
|
preempt:
|
|
- resched_curr(rq);
|
|
+ resched_curr_lazy(rq);
|
|
/*
|
|
* Only set the backward buddy when the current task is still
|
|
* on the rq. This can happen when a wakeup gets interleaved
|
|
@@ -12036,7 +12036,7 @@ static void task_fork_fair(struct task_struct *p)
|
|
* 'current' within the tree based on its new key value.
|
|
*/
|
|
swap(curr->vruntime, se->vruntime);
|
|
- resched_curr(rq);
|
|
+ resched_curr_lazy(rq);
|
|
}
|
|
|
|
se->vruntime -= cfs_rq->min_vruntime;
|
|
@@ -12063,7 +12063,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
|
|
*/
|
|
if (task_current(rq, p)) {
|
|
if (p->prio > oldprio)
|
|
- resched_curr(rq);
|
|
+ resched_curr_lazy(rq);
|
|
} else
|
|
check_preempt_curr(rq, p, 0);
|
|
}
|
|
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
|
|
index ee7f23c76bd33..e13090e33f3c4 100644
|
|
--- a/kernel/sched/features.h
|
|
+++ b/kernel/sched/features.h
|
|
@@ -48,6 +48,9 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
|
|
|
|
#ifdef CONFIG_PREEMPT_RT
|
|
SCHED_FEAT(TTWU_QUEUE, false)
|
|
+# ifdef CONFIG_PREEMPT_LAZY
|
|
+SCHED_FEAT(PREEMPT_LAZY, true)
|
|
+# endif
|
|
#else
|
|
|
|
/*
|
|
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
|
|
index 3e8df6d31c1e3..6f272ef973675 100644
|
|
--- a/kernel/sched/sched.h
|
|
+++ b/kernel/sched/sched.h
|
|
@@ -2370,6 +2370,15 @@ extern void reweight_task(struct task_struct *p, int prio);
|
|
extern void resched_curr(struct rq *rq);
|
|
extern void resched_cpu(int cpu);
|
|
|
|
+#ifdef CONFIG_PREEMPT_LAZY
|
|
+extern void resched_curr_lazy(struct rq *rq);
|
|
+#else
|
|
+static inline void resched_curr_lazy(struct rq *rq)
|
|
+{
|
|
+ resched_curr(rq);
|
|
+}
|
|
+#endif
|
|
+
|
|
extern struct rt_bandwidth def_rt_bandwidth;
|
|
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
|
|
extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
|
|
diff --git a/kernel/signal.c b/kernel/signal.c
|
|
index 8cb28f1df2941..138d68cfc204d 100644
|
|
--- a/kernel/signal.c
|
|
+++ b/kernel/signal.c
|
|
@@ -432,7 +432,18 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
|
|
return NULL;
|
|
|
|
if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
|
|
- q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
|
|
+
|
|
+ if (!sigqueue_flags) {
|
|
+ struct sighand_struct *sighand = t->sighand;
|
|
+
|
|
+ lockdep_assert_held(&sighand->siglock);
|
|
+ if (sighand->sigqueue_cache) {
|
|
+ q = sighand->sigqueue_cache;
|
|
+ sighand->sigqueue_cache = NULL;
|
|
+ }
|
|
+ }
|
|
+ if (!q)
|
|
+ q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
|
|
} else {
|
|
print_dropped_signal(sig);
|
|
}
|
|
@@ -447,14 +458,43 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
|
|
return q;
|
|
}
|
|
|
|
-static void __sigqueue_free(struct sigqueue *q)
|
|
+static bool sigqueue_cleanup_accounting(struct sigqueue *q)
|
|
{
|
|
if (q->flags & SIGQUEUE_PREALLOC)
|
|
- return;
|
|
+ return false;
|
|
if (q->ucounts) {
|
|
dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING);
|
|
q->ucounts = NULL;
|
|
}
|
|
+ return true;
|
|
+}
|
|
+
|
|
+static void __sigqueue_free(struct sigqueue *q)
|
|
+{
|
|
+ if (!sigqueue_cleanup_accounting(q))
|
|
+ return;
|
|
+ kmem_cache_free(sigqueue_cachep, q);
|
|
+}
|
|
+
|
|
+void sigqueue_free_cached_entry(struct sigqueue *q)
|
|
+{
|
|
+ if (!q)
|
|
+ return;
|
|
+ kmem_cache_free(sigqueue_cachep, q);
|
|
+}
|
|
+
|
|
+static void sigqueue_cache_or_free(struct sigqueue *q)
|
|
+{
|
|
+ struct sighand_struct *sighand = current->sighand;
|
|
+
|
|
+ if (!sigqueue_cleanup_accounting(q))
|
|
+ return;
|
|
+
|
|
+ lockdep_assert_held(&sighand->siglock);
|
|
+ if (!sighand->sigqueue_cache) {
|
|
+ sighand->sigqueue_cache = q;
|
|
+ return;
|
|
+ }
|
|
kmem_cache_free(sigqueue_cachep, q);
|
|
}
|
|
|
|
@@ -594,7 +634,7 @@ static void collect_signal(int sig, struct sigpending *list, kernel_siginfo_t *i
|
|
(info->si_code == SI_TIMER) &&
|
|
(info->si_sys_private);
|
|
|
|
- __sigqueue_free(first);
|
|
+ sigqueue_cache_or_free(first);
|
|
} else {
|
|
/*
|
|
* Ok, it wasn't in the queue. This must be
|
|
@@ -2296,15 +2336,31 @@ static int ptrace_stop(int exit_code, int why, unsigned long message,
|
|
do_notify_parent_cldstop(current, false, why);
|
|
|
|
/*
|
|
- * Don't want to allow preemption here, because
|
|
- * sys_ptrace() needs this task to be inactive.
|
|
+ * The previous do_notify_parent_cldstop() invocation woke ptracer.
|
|
+ * One a PREEMPTION kernel this can result in preemption requirement
|
|
+ * which will be fulfilled after read_unlock() and the ptracer will be
|
|
+ * put on the CPU.
|
|
+ * The ptracer is in wait_task_inactive(, __TASK_TRACED) waiting for
|
|
+ * this task wait in schedule(). If this task gets preempted then it
|
|
+ * remains enqueued on the runqueue. The ptracer will observe this and
|
|
+ * then sleep for a delay of one HZ tick. In the meantime this task
|
|
+ * gets scheduled, enters schedule() and will wait for the ptracer.
|
|
*
|
|
- * XXX: implement read_unlock_no_resched().
|
|
+ * This preemption point is not bad from correctness point of view but
|
|
+ * extends the runtime by one HZ tick time due to the ptracer's sleep.
|
|
+ * The preempt-disable section ensures that there will be no preemption
|
|
+ * between unlock and schedule() and so improving the performance since
|
|
+ * the ptracer has no reason to sleep.
|
|
+ *
|
|
+ * This optimisation is not doable on PREEMPT_RT due to the spinlock_t
|
|
+ * within the preempt-disable section.
|
|
*/
|
|
- preempt_disable();
|
|
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
+ preempt_disable();
|
|
read_unlock(&tasklist_lock);
|
|
cgroup_enter_frozen();
|
|
- preempt_enable_no_resched();
|
|
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
|
|
+ preempt_enable_no_resched();
|
|
schedule();
|
|
cgroup_leave_frozen(true);
|
|
|
|
diff --git a/kernel/softirq.c b/kernel/softirq.c
|
|
index c8a6913c067d9..af9e879bbbf75 100644
|
|
--- a/kernel/softirq.c
|
|
+++ b/kernel/softirq.c
|
|
@@ -80,21 +80,6 @@ static void wakeup_softirqd(void)
|
|
wake_up_process(tsk);
|
|
}
|
|
|
|
-/*
|
|
- * If ksoftirqd is scheduled, we do not want to process pending softirqs
|
|
- * right now. Let ksoftirqd handle this at its own rate, to get fairness,
|
|
- * unless we're doing some of the synchronous softirqs.
|
|
- */
|
|
-#define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ))
|
|
-static bool ksoftirqd_running(unsigned long pending)
|
|
-{
|
|
- struct task_struct *tsk = __this_cpu_read(ksoftirqd);
|
|
-
|
|
- if (pending & SOFTIRQ_NOW_MASK)
|
|
- return false;
|
|
- return tsk && task_is_running(tsk) && !__kthread_should_park(tsk);
|
|
-}
|
|
-
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
|
DEFINE_PER_CPU(int, hardirqs_enabled);
|
|
DEFINE_PER_CPU(int, hardirq_context);
|
|
@@ -236,7 +221,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
|
|
goto out;
|
|
|
|
pending = local_softirq_pending();
|
|
- if (!pending || ksoftirqd_running(pending))
|
|
+ if (!pending)
|
|
goto out;
|
|
|
|
/*
|
|
@@ -432,9 +417,6 @@ static inline bool should_wake_ksoftirqd(void)
|
|
|
|
static inline void invoke_softirq(void)
|
|
{
|
|
- if (ksoftirqd_running(local_softirq_pending()))
|
|
- return;
|
|
-
|
|
if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) {
|
|
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
|
|
/*
|
|
@@ -468,7 +450,7 @@ asmlinkage __visible void do_softirq(void)
|
|
|
|
pending = local_softirq_pending();
|
|
|
|
- if (pending && !ksoftirqd_running(pending))
|
|
+ if (pending)
|
|
do_softirq_own_stack();
|
|
|
|
local_irq_restore(flags);
|
|
@@ -637,6 +619,24 @@ static inline void tick_irq_exit(void)
|
|
#endif
|
|
}
|
|
|
|
+#ifdef CONFIG_PREEMPT_RT
|
|
+DEFINE_PER_CPU(struct task_struct *, timersd);
|
|
+DEFINE_PER_CPU(unsigned long, pending_timer_softirq);
|
|
+
|
|
+static void wake_timersd(void)
|
|
+{
|
|
+ struct task_struct *tsk = __this_cpu_read(timersd);
|
|
+
|
|
+ if (tsk)
|
|
+ wake_up_process(tsk);
|
|
+}
|
|
+
|
|
+#else
|
|
+
|
|
+static inline void wake_timersd(void) { }
|
|
+
|
|
+#endif
|
|
+
|
|
static inline void __irq_exit_rcu(void)
|
|
{
|
|
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
|
|
@@ -649,6 +649,10 @@ static inline void __irq_exit_rcu(void)
|
|
if (!in_interrupt() && local_softirq_pending())
|
|
invoke_softirq();
|
|
|
|
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers() &&
|
|
+ !(in_nmi() | in_hardirq()))
|
|
+ wake_timersd();
|
|
+
|
|
tick_irq_exit();
|
|
}
|
|
|
|
@@ -976,12 +980,70 @@ static struct smp_hotplug_thread softirq_threads = {
|
|
.thread_comm = "ksoftirqd/%u",
|
|
};
|
|
|
|
+#ifdef CONFIG_PREEMPT_RT
|
|
+static void timersd_setup(unsigned int cpu)
|
|
+{
|
|
+ sched_set_fifo_low(current);
|
|
+}
|
|
+
|
|
+static int timersd_should_run(unsigned int cpu)
|
|
+{
|
|
+ return local_pending_timers();
|
|
+}
|
|
+
|
|
+static void run_timersd(unsigned int cpu)
|
|
+{
|
|
+ unsigned int timer_si;
|
|
+
|
|
+ ksoftirqd_run_begin();
|
|
+
|
|
+ timer_si = local_pending_timers();
|
|
+ __this_cpu_write(pending_timer_softirq, 0);
|
|
+ or_softirq_pending(timer_si);
|
|
+
|
|
+ __do_softirq();
|
|
+
|
|
+ ksoftirqd_run_end();
|
|
+}
|
|
+
|
|
+static void raise_ktimers_thread(unsigned int nr)
|
|
+{
|
|
+ trace_softirq_raise(nr);
|
|
+ __this_cpu_or(pending_timer_softirq, 1 << nr);
|
|
+}
|
|
+
|
|
+void raise_hrtimer_softirq(void)
|
|
+{
|
|
+ raise_ktimers_thread(HRTIMER_SOFTIRQ);
|
|
+}
|
|
+
|
|
+void raise_timer_softirq(void)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ local_irq_save(flags);
|
|
+ raise_ktimers_thread(TIMER_SOFTIRQ);
|
|
+ wake_timersd();
|
|
+ local_irq_restore(flags);
|
|
+}
|
|
+
|
|
+static struct smp_hotplug_thread timer_threads = {
|
|
+ .store = &timersd,
|
|
+ .setup = timersd_setup,
|
|
+ .thread_should_run = timersd_should_run,
|
|
+ .thread_fn = run_timersd,
|
|
+ .thread_comm = "ktimers/%u",
|
|
+};
|
|
+#endif
|
|
+
|
|
static __init int spawn_ksoftirqd(void)
|
|
{
|
|
cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
|
|
takeover_tasklets);
|
|
BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
|
|
-
|
|
+#ifdef CONFIG_PREEMPT_RT
|
|
+ BUG_ON(smpboot_register_percpu_thread(&timer_threads));
|
|
+#endif
|
|
return 0;
|
|
}
|
|
early_initcall(spawn_ksoftirqd);
|
|
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
|
|
index e8c08292defcb..10c1246cdba76 100644
|
|
--- a/kernel/time/hrtimer.c
|
|
+++ b/kernel/time/hrtimer.c
|
|
@@ -1805,7 +1805,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
|
|
if (!ktime_before(now, cpu_base->softirq_expires_next)) {
|
|
cpu_base->softirq_expires_next = KTIME_MAX;
|
|
cpu_base->softirq_activated = 1;
|
|
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
|
+ raise_hrtimer_softirq();
|
|
}
|
|
|
|
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
|
|
@@ -1918,7 +1918,7 @@ void hrtimer_run_queues(void)
|
|
if (!ktime_before(now, cpu_base->softirq_expires_next)) {
|
|
cpu_base->softirq_expires_next = KTIME_MAX;
|
|
cpu_base->softirq_activated = 1;
|
|
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
|
+ raise_hrtimer_softirq();
|
|
}
|
|
|
|
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
|
|
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
|
|
index 808a247205a9a..c0a32aab8f276 100644
|
|
--- a/kernel/time/posix-timers.c
|
|
+++ b/kernel/time/posix-timers.c
|
|
@@ -140,25 +140,29 @@ static struct k_itimer *posix_timer_by_id(timer_t id)
|
|
static int posix_timer_add(struct k_itimer *timer)
|
|
{
|
|
struct signal_struct *sig = current->signal;
|
|
- int first_free_id = sig->posix_timer_id;
|
|
struct hlist_head *head;
|
|
- int ret = -ENOENT;
|
|
+ unsigned int start, id;
|
|
|
|
- do {
|
|
+ /* Can be written by a different task concurrently in the loop below */
|
|
+ start = READ_ONCE(sig->next_posix_timer_id);
|
|
+
|
|
+ for (id = ~start; start != id; id++) {
|
|
spin_lock(&hash_lock);
|
|
- head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)];
|
|
- if (!__posix_timers_find(head, sig, sig->posix_timer_id)) {
|
|
+ id = sig->next_posix_timer_id;
|
|
+
|
|
+ /* Write the next ID back. Clamp it to the positive space */
|
|
+ WRITE_ONCE(sig->next_posix_timer_id, (id + 1) & INT_MAX);
|
|
+
|
|
+ head = &posix_timers_hashtable[hash(sig, id)];
|
|
+ if (!__posix_timers_find(head, sig, id)) {
|
|
hlist_add_head_rcu(&timer->t_hash, head);
|
|
- ret = sig->posix_timer_id;
|
|
+ spin_unlock(&hash_lock);
|
|
+ return id;
|
|
}
|
|
- if (++sig->posix_timer_id < 0)
|
|
- sig->posix_timer_id = 0;
|
|
- if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT))
|
|
- /* Loop over all possible ids completed */
|
|
- ret = -EAGAIN;
|
|
spin_unlock(&hash_lock);
|
|
- } while (ret == -ENOENT);
|
|
- return ret;
|
|
+ }
|
|
+ /* POSIX return code when no timer ID could be allocated */
|
|
+ return -EAGAIN;
|
|
}
|
|
|
|
static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
|
|
@@ -1037,27 +1041,59 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
|
|
}
|
|
|
|
/*
|
|
- * return timer owned by the process, used by exit_itimers
|
|
+ * Delete a timer if it is armed, remove it from the hash and schedule it
|
|
+ * for RCU freeing.
|
|
*/
|
|
static void itimer_delete(struct k_itimer *timer)
|
|
{
|
|
-retry_delete:
|
|
- spin_lock_irq(&timer->it_lock);
|
|
+ unsigned long flags;
|
|
|
|
+retry_delete:
|
|
+ /*
|
|
+ * irqsave is required to make timer_wait_running() work.
|
|
+ */
|
|
+ spin_lock_irqsave(&timer->it_lock, flags);
|
|
+
|
|
+ /*
|
|
+ * Even if the timer is not longer accessible from other tasks
|
|
+ * it still might be armed and queued in the underlying timer
|
|
+ * mechanism. Worse, that timer mechanism might run the expiry
|
|
+ * function concurrently.
|
|
+ */
|
|
if (timer_delete_hook(timer) == TIMER_RETRY) {
|
|
- spin_unlock_irq(&timer->it_lock);
|
|
+ /*
|
|
+ * Timer is expired concurrently, prevent livelocks
|
|
+ * and pointless spinning on RT.
|
|
+ *
|
|
+ * The CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y case is
|
|
+ * irrelevant here because obviously the exiting task
|
|
+ * cannot be expiring timer in task work concurrently.
|
|
+ * Ditto for CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n as the
|
|
+ * tick interrupt cannot run on this CPU because the above
|
|
+ * spin_lock disabled interrupts.
|
|
+ *
|
|
+ * timer_wait_running() drops timer::it_lock, which opens
|
|
+ * the possibility for another task to delete the timer.
|
|
+ *
|
|
+ * That's not possible here because this is invoked from
|
|
+ * do_exit() only for the last thread of the thread group.
|
|
+ * So no other task can access that timer.
|
|
+ */
|
|
+ if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer))
|
|
+ return;
|
|
+
|
|
goto retry_delete;
|
|
}
|
|
list_del(&timer->list);
|
|
|
|
- spin_unlock_irq(&timer->it_lock);
|
|
+ spin_unlock_irqrestore(&timer->it_lock, flags);
|
|
release_posix_timer(timer, IT_ID_SET);
|
|
}
|
|
|
|
/*
|
|
- * This is called by do_exit or de_thread, only when nobody else can
|
|
- * modify the signal->posix_timers list. Yet we need sighand->siglock
|
|
- * to prevent the race with /proc/pid/timers.
|
|
+ * Invoked from do_exit() when the last thread of a thread group exits.
|
|
+ * At that point no other task can access the timers of the dying
|
|
+ * task anymore.
|
|
*/
|
|
void exit_itimers(struct task_struct *tsk)
|
|
{
|
|
@@ -1067,10 +1103,12 @@ void exit_itimers(struct task_struct *tsk)
|
|
if (list_empty(&tsk->signal->posix_timers))
|
|
return;
|
|
|
|
+ /* Protect against concurrent read via /proc/$PID/timers */
|
|
spin_lock_irq(&tsk->sighand->siglock);
|
|
list_replace_init(&tsk->signal->posix_timers, &timers);
|
|
spin_unlock_irq(&tsk->sighand->siglock);
|
|
|
|
+ /* The timers are not longer accessible via tsk::signal */
|
|
while (!list_empty(&timers)) {
|
|
tmr = list_first_entry(&timers, struct k_itimer, list);
|
|
itimer_delete(tmr);
|
|
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
|
|
index a46506f7ec6d0..1ae9e4e8a0715 100644
|
|
--- a/kernel/time/tick-sched.c
|
|
+++ b/kernel/time/tick-sched.c
|
|
@@ -789,7 +789,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
|
|
|
|
static inline bool local_timer_softirq_pending(void)
|
|
{
|
|
- return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
|
|
+ return local_pending_timers() & BIT(TIMER_SOFTIRQ);
|
|
}
|
|
|
|
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
|
|
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
|
|
index 63a8ce7177dd4..7cad6fe3c035c 100644
|
|
--- a/kernel/time/timer.c
|
|
+++ b/kernel/time/timer.c
|
|
@@ -2054,7 +2054,7 @@ static void run_local_timers(void)
|
|
if (time_before(jiffies, base->next_expiry))
|
|
return;
|
|
}
|
|
- raise_softirq(TIMER_SOFTIRQ);
|
|
+ raise_timer_softirq();
|
|
}
|
|
|
|
/*
|
|
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
|
|
index 5909aaf2f4c08..2867def70f441 100644
|
|
--- a/kernel/trace/trace.c
|
|
+++ b/kernel/trace/trace.c
|
|
@@ -2694,11 +2694,19 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
|
|
if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
|
|
trace_flags |= TRACE_FLAG_BH_OFF;
|
|
|
|
- if (tif_need_resched())
|
|
+ if (tif_need_resched_now())
|
|
trace_flags |= TRACE_FLAG_NEED_RESCHED;
|
|
+#ifdef CONFIG_PREEMPT_LAZY
|
|
+ /* Run out of bits. Share the LAZY and PREEMPT_RESCHED */
|
|
+ if (need_resched_lazy())
|
|
+ trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
|
|
+#else
|
|
if (test_preempt_need_resched())
|
|
trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
|
|
- return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
|
|
+#endif
|
|
+
|
|
+ return (trace_flags << 24) | (min_t(unsigned int, pc & 0xff, 0xf)) |
|
|
+ (preempt_lazy_count() & 0xff) << 16 |
|
|
(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
|
|
}
|
|
|
|
@@ -4287,15 +4295,17 @@ unsigned long trace_total_entries(struct trace_array *tr)
|
|
|
|
static void print_lat_help_header(struct seq_file *m)
|
|
{
|
|
- seq_puts(m, "# _------=> CPU# \n"
|
|
- "# / _-----=> irqs-off/BH-disabled\n"
|
|
- "# | / _----=> need-resched \n"
|
|
- "# || / _---=> hardirq/softirq \n"
|
|
- "# ||| / _--=> preempt-depth \n"
|
|
- "# |||| / _-=> migrate-disable \n"
|
|
- "# ||||| / delay \n"
|
|
- "# cmd pid |||||| time | caller \n"
|
|
- "# \\ / |||||| \\ | / \n");
|
|
+ seq_puts(m, "# _--------=> CPU# \n"
|
|
+ "# / _-------=> irqs-off/BH-disabled\n"
|
|
+ "# | / _------=> need-resched \n"
|
|
+ "# || / _-----=> need-resched-lazy\n"
|
|
+ "# ||| / _----=> hardirq/softirq \n"
|
|
+ "# |||| / _---=> preempt-depth \n"
|
|
+ "# ||||| / _--=> preempt-lazy-depth\n"
|
|
+ "# |||||| / _-=> migrate-disable \n"
|
|
+ "# ||||||| / delay \n"
|
|
+ "# cmd pid |||||||| time | caller \n"
|
|
+ "# \\ / |||||||| \\ | / \n");
|
|
}
|
|
|
|
static void print_event_info(struct array_buffer *buf, struct seq_file *m)
|
|
@@ -4329,14 +4339,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
|
|
|
|
print_event_info(buf, m);
|
|
|
|
- seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
|
|
- seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
|
|
- seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
|
|
- seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
|
|
- seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
|
|
- seq_printf(m, "# %.*s|||| / delay\n", prec, space);
|
|
- seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
|
|
- seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
|
|
+ seq_printf(m, "# %.*s _-------=> irqs-off/BH-disabled\n", prec, space);
|
|
+ seq_printf(m, "# %.*s / _------=> need-resched\n", prec, space);
|
|
+ seq_printf(m, "# %.*s| / _-----=> need-resched-lazy\n", prec, space);
|
|
+ seq_printf(m, "# %.*s|| / _----=> hardirq/softirq\n", prec, space);
|
|
+ seq_printf(m, "# %.*s||| / _---=> preempt-depth\n", prec, space);
|
|
+ seq_printf(m, "# %.*s|||| / _--=> preempt-lazy-depth\n", prec, space);
|
|
+ seq_printf(m, "# %.*s||||| / _-=> migrate-disable\n", prec, space);
|
|
+ seq_printf(m, "# %.*s|||||| / delay\n", prec, space);
|
|
+ seq_printf(m, "# TASK-PID %.*s CPU# ||||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
|
|
+ seq_printf(m, "# | | %.*s | ||||||| | |\n", prec, " | ");
|
|
}
|
|
|
|
void
|
|
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
|
|
index 654ffa40457aa..b2d52f8355b70 100644
|
|
--- a/kernel/trace/trace_events.c
|
|
+++ b/kernel/trace/trace_events.c
|
|
@@ -208,6 +208,7 @@ static int trace_define_common_fields(void)
|
|
/* Holds both preempt_count and migrate_disable */
|
|
__common_field(unsigned char, preempt_count);
|
|
__common_field(int, pid);
|
|
+ __common_field(unsigned char, preempt_lazy_count);
|
|
|
|
return ret;
|
|
}
|
|
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
|
|
index bd475a00f96d1..89d4a3bfdc6d5 100644
|
|
--- a/kernel/trace/trace_output.c
|
|
+++ b/kernel/trace/trace_output.c
|
|
@@ -442,6 +442,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
|
|
{
|
|
char hardsoft_irq;
|
|
char need_resched;
|
|
+ char need_resched_lazy;
|
|
char irqs_off;
|
|
int hardirq;
|
|
int softirq;
|
|
@@ -462,20 +463,27 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
|
|
|
|
switch (entry->flags & (TRACE_FLAG_NEED_RESCHED |
|
|
TRACE_FLAG_PREEMPT_RESCHED)) {
|
|
+#ifndef CONFIG_PREEMPT_LAZY
|
|
case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED:
|
|
need_resched = 'N';
|
|
break;
|
|
+#endif
|
|
case TRACE_FLAG_NEED_RESCHED:
|
|
need_resched = 'n';
|
|
break;
|
|
+#ifndef CONFIG_PREEMPT_LAZY
|
|
case TRACE_FLAG_PREEMPT_RESCHED:
|
|
need_resched = 'p';
|
|
break;
|
|
+#endif
|
|
default:
|
|
need_resched = '.';
|
|
break;
|
|
}
|
|
|
|
+ need_resched_lazy =
|
|
+ (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
|
|
+
|
|
hardsoft_irq =
|
|
(nmi && hardirq) ? 'Z' :
|
|
nmi ? 'z' :
|
|
@@ -484,14 +492,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
|
|
softirq ? 's' :
|
|
'.' ;
|
|
|
|
- trace_seq_printf(s, "%c%c%c",
|
|
- irqs_off, need_resched, hardsoft_irq);
|
|
+ trace_seq_printf(s, "%c%c%c%c",
|
|
+ irqs_off, need_resched, need_resched_lazy,
|
|
+ hardsoft_irq);
|
|
|
|
if (entry->preempt_count & 0xf)
|
|
trace_seq_printf(s, "%x", entry->preempt_count & 0xf);
|
|
else
|
|
trace_seq_putc(s, '.');
|
|
|
|
+ if (entry->preempt_lazy_count)
|
|
+ trace_seq_printf(s, "%x", entry->preempt_lazy_count);
|
|
+ else
|
|
+ trace_seq_putc(s, '.');
|
|
+
|
|
if (entry->preempt_count & 0xf0)
|
|
trace_seq_printf(s, "%x", entry->preempt_count >> 4);
|
|
else
|
|
diff --git a/localversion-rt b/localversion-rt
|
|
new file mode 100644
|
|
index 0000000000000..18777ec0c27d4
|
|
--- /dev/null
|
|
+++ b/localversion-rt
|
|
@@ -0,0 +1 @@
|
|
+-rt15
|
|
diff --git a/net/core/dev.c b/net/core/dev.c
|
|
index 1488f700bf819..8c3b70160be8c 100644
|
|
--- a/net/core/dev.c
|
|
+++ b/net/core/dev.c
|
|
@@ -4573,15 +4573,6 @@ static void rps_trigger_softirq(void *data)
|
|
|
|
#endif /* CONFIG_RPS */
|
|
|
|
-/* Called from hardirq (IPI) context */
|
|
-static void trigger_rx_softirq(void *data)
|
|
-{
|
|
- struct softnet_data *sd = data;
|
|
-
|
|
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
|
|
- smp_store_release(&sd->defer_ipi_scheduled, 0);
|
|
-}
|
|
-
|
|
/*
|
|
* Check if this softnet_data structure is another cpu one
|
|
* If yes, queue it to our IPI list and return 1
|
|
@@ -6632,6 +6623,30 @@ static void skb_defer_free_flush(struct softnet_data *sd)
|
|
}
|
|
}
|
|
|
|
+#ifndef CONFIG_PREEMPT_RT
|
|
+/* Called from hardirq (IPI) context */
|
|
+static void trigger_rx_softirq(void *data)
|
|
+{
|
|
+ struct softnet_data *sd = data;
|
|
+
|
|
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
|
|
+ smp_store_release(&sd->defer_ipi_scheduled, 0);
|
|
+}
|
|
+
|
|
+#else
|
|
+
|
|
+static void trigger_rx_softirq(struct work_struct *defer_work)
|
|
+{
|
|
+ struct softnet_data *sd;
|
|
+
|
|
+ sd = container_of(defer_work, struct softnet_data, defer_work);
|
|
+ smp_store_release(&sd->defer_ipi_scheduled, 0);
|
|
+ local_bh_disable();
|
|
+ skb_defer_free_flush(sd);
|
|
+ local_bh_enable();
|
|
+}
|
|
+#endif
|
|
+
|
|
static __latent_entropy void net_rx_action(struct softirq_action *h)
|
|
{
|
|
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
|
|
@@ -11409,7 +11424,11 @@ static int __init net_dev_init(void)
|
|
INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
|
|
sd->cpu = i;
|
|
#endif
|
|
+#ifndef CONFIG_PREEMPT_RT
|
|
INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
|
|
+#else
|
|
+ INIT_WORK(&sd->defer_work, trigger_rx_softirq);
|
|
+#endif
|
|
spin_lock_init(&sd->defer_lock);
|
|
|
|
init_gro_hash(&sd->backlog);
|
|
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
|
|
index 14bb41aafee30..3f8dac23205c6 100644
|
|
--- a/net/core/skbuff.c
|
|
+++ b/net/core/skbuff.c
|
|
@@ -6856,6 +6856,11 @@ nodefer: __kfree_skb(skb);
|
|
/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
|
|
* if we are unlucky enough (this seems very unlikely).
|
|
*/
|
|
- if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
|
|
+ if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) {
|
|
+#ifndef CONFIG_PREEMPT_RT
|
|
smp_call_function_single_async(cpu, &sd->defer_csd);
|
|
+#else
|
|
+ schedule_work_on(cpu, &sd->defer_work);
|
|
+#endif
|
|
+ }
|
|
}
|