distribution/packages/kernel/linux/patches/X86_64/patch-5.19-rt10.patch

5198 lines
166 KiB
Diff

diff -rupN linux-5.19.16.orig/arch/arm/include/asm/thread_info.h linux-5.19.16/arch/arm/include/asm/thread_info.h
--- linux-5.19.16.orig/arch/arm/include/asm/thread_info.h 2022-10-18 17:21:09.088513860 -0400
+++ linux-5.19.16/arch/arm/include/asm/thread_info.h 2022-10-18 17:21:17.796446755 -0400
@@ -62,6 +62,7 @@ struct cpu_context_save {
struct thread_info {
unsigned long flags; /* low level flags */
int preempt_count; /* 0 => preemptable, <0 => bug */
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
__u32 cpu; /* cpu */
__u32 cpu_domain; /* cpu domain */
struct cpu_context_save cpu_context; /* cpu context */
@@ -133,6 +134,7 @@ extern int vfp_restore_user_hwstate(stru
#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
#define TIF_SECCOMP 7 /* seccomp syscall filtering active */
#define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */
+#define TIF_NEED_RESCHED_LAZY 9
#define TIF_USING_IWMMXT 17
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
@@ -147,6 +149,7 @@ extern int vfp_restore_user_hwstate(stru
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT)
/* Checks for any syscall work in entry-common.S */
@@ -156,7 +159,8 @@ extern int vfp_restore_user_hwstate(stru
/*
* Change these and you break ASM code in entry-common.S
*/
-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
+ _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_NOTIFY_SIGNAL)
diff -rupN linux-5.19.16.orig/arch/arm/Kconfig linux-5.19.16/arch/arm/Kconfig
--- linux-5.19.16.orig/arch/arm/Kconfig 2022-10-18 17:21:09.020514385 -0400
+++ linux-5.19.16/arch/arm/Kconfig 2022-10-18 17:21:17.796446755 -0400
@@ -34,6 +34,7 @@ config ARM
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_MEMTEST
@@ -71,7 +72,7 @@ config ARM
select HARDIRQS_SW_RESEND
select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
+ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT
select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
@@ -113,6 +114,7 @@ config ARM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_LAZY
select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ
@@ -128,6 +130,7 @@ config ARM
select OLD_SIGSUSPEND3
select PCI_SYSCALL if PCI
select PERF_USE_VMALLOC
+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
select RTC_LIB
select SYS_SUPPORTS_APM_EMULATION
select THREAD_INFO_IN_TASK
diff -rupN linux-5.19.16.orig/arch/arm/kernel/asm-offsets.c linux-5.19.16/arch/arm/kernel/asm-offsets.c
--- linux-5.19.16.orig/arch/arm/kernel/asm-offsets.c 2022-10-18 17:21:09.092513830 -0400
+++ linux-5.19.16/arch/arm/kernel/asm-offsets.c 2022-10-18 17:21:17.796446755 -0400
@@ -43,6 +43,7 @@ int main(void)
BLANK();
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
+ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain));
DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context));
diff -rupN linux-5.19.16.orig/arch/arm/kernel/entry-armv.S linux-5.19.16/arch/arm/kernel/entry-armv.S
--- linux-5.19.16.orig/arch/arm/kernel/entry-armv.S 2022-10-18 17:21:09.092513830 -0400
+++ linux-5.19.16/arch/arm/kernel/entry-armv.S 2022-10-18 17:21:17.796446755 -0400
@@ -222,11 +222,18 @@ __irq_svc:
#ifdef CONFIG_PREEMPTION
ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
- ldr r0, [tsk, #TI_FLAGS] @ get flags
teq r8, #0 @ if preempt count != 0
+ bne 1f @ return from exeption
+ ldr r0, [tsk, #TI_FLAGS] @ get flags
+ tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
+ blne svc_preempt @ preempt!
+
+ ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
+ teq r8, #0 @ if preempt lazy count != 0
movne r0, #0 @ force flags to 0
- tst r0, #_TIF_NEED_RESCHED
+ tst r0, #_TIF_NEED_RESCHED_LAZY
blne svc_preempt
+1:
#endif
svc_exit r5, irq = 1 @ return from exception
@@ -241,8 +248,14 @@ svc_preempt:
1: bl preempt_schedule_irq @ irq en/disable is done inside
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
tst r0, #_TIF_NEED_RESCHED
+ bne 1b
+ tst r0, #_TIF_NEED_RESCHED_LAZY
reteq r8 @ go again
- b 1b
+ ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
+ teq r0, #0 @ if preempt lazy count != 0
+ beq 1b
+ ret r8 @ go again
+
#endif
__und_fault:
diff -rupN linux-5.19.16.orig/arch/arm/kernel/irq.c linux-5.19.16/arch/arm/kernel/irq.c
--- linux-5.19.16.orig/arch/arm/kernel/irq.c 2022-10-18 17:21:09.092513830 -0400
+++ linux-5.19.16/arch/arm/kernel/irq.c 2022-10-18 17:21:17.796446755 -0400
@@ -70,6 +70,7 @@ static void __init init_irq_stacks(void)
}
}
+#ifndef CONFIG_PREEMPT_RT
static void ____do_softirq(void *arg)
{
__do_softirq();
@@ -80,7 +81,7 @@ void do_softirq_own_stack(void)
call_with_stack(____do_softirq, NULL,
__this_cpu_read(irq_stack_ptr));
}
-
+#endif
#endif
int arch_show_interrupts(struct seq_file *p, int prec)
diff -rupN linux-5.19.16.orig/arch/arm/kernel/signal.c linux-5.19.16/arch/arm/kernel/signal.c
--- linux-5.19.16.orig/arch/arm/kernel/signal.c 2022-10-18 17:21:09.092513830 -0400
+++ linux-5.19.16/arch/arm/kernel/signal.c 2022-10-18 17:21:17.796446755 -0400
@@ -607,7 +607,8 @@ do_work_pending(struct pt_regs *regs, un
*/
trace_hardirqs_off();
do {
- if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+ if (likely(thread_flags & (_TIF_NEED_RESCHED |
+ _TIF_NEED_RESCHED_LAZY))) {
schedule();
} else {
if (unlikely(!user_mode(regs)))
diff -rupN linux-5.19.16.orig/arch/arm/mm/fault.c linux-5.19.16/arch/arm/mm/fault.c
--- linux-5.19.16.orig/arch/arm/mm/fault.c 2022-10-18 17:21:09.124513584 -0400
+++ linux-5.19.16/arch/arm/mm/fault.c 2022-10-18 17:21:17.796446755 -0400
@@ -417,6 +417,9 @@ do_translation_fault(unsigned long addr,
if (addr < TASK_SIZE)
return do_page_fault(addr, fsr, regs);
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
if (user_mode(regs))
goto bad_area;
@@ -487,6 +490,9 @@ do_translation_fault(unsigned long addr,
static int
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
do_bad_area(addr, fsr, regs);
return 0;
}
diff -rupN linux-5.19.16.orig/arch/arm64/include/asm/preempt.h linux-5.19.16/arch/arm64/include/asm/preempt.h
--- linux-5.19.16.orig/arch/arm64/include/asm/preempt.h 2022-10-18 17:21:09.164513276 -0400
+++ linux-5.19.16/arch/arm64/include/asm/preempt.h 2022-10-18 17:21:17.796446755 -0400
@@ -71,13 +71,36 @@ static inline bool __preempt_count_dec_a
* interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
* pair.
*/
- return !pc || !READ_ONCE(ti->preempt_count);
+ if (!pc || !READ_ONCE(ti->preempt_count))
+ return true;
+#ifdef CONFIG_PREEMPT_LAZY
+ if ((pc & ~PREEMPT_NEED_RESCHED))
+ return false;
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
+ return false;
+#endif
}
static inline bool should_resched(int preempt_offset)
{
+#ifdef CONFIG_PREEMPT_LAZY
+ u64 pc = READ_ONCE(current_thread_info()->preempt_count);
+ if (pc == preempt_offset)
+ return true;
+
+ if ((pc & ~PREEMPT_NEED_RESCHED) != preempt_offset)
+ return false;
+
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
u64 pc = READ_ONCE(current_thread_info()->preempt_count);
return pc == preempt_offset;
+#endif
}
#ifdef CONFIG_PREEMPTION
diff -rupN linux-5.19.16.orig/arch/arm64/include/asm/thread_info.h linux-5.19.16/arch/arm64/include/asm/thread_info.h
--- linux-5.19.16.orig/arch/arm64/include/asm/thread_info.h 2022-10-18 17:21:09.164513276 -0400
+++ linux-5.19.16/arch/arm64/include/asm/thread_info.h 2022-10-18 17:21:17.796446755 -0400
@@ -26,6 +26,7 @@ struct thread_info {
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
u64 ttbr0; /* saved TTBR0_EL1 */
#endif
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
union {
u64 preempt_count; /* 0 => preemptible, <0 => bug */
struct {
@@ -68,6 +69,7 @@ int arch_dup_task_struct(struct task_str
#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
#define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */
#define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */
+#define TIF_NEED_RESCHED_LAZY 7
#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 9 /* syscall auditing */
#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
@@ -100,8 +102,10 @@ int arch_dup_task_struct(struct task_str
#define _TIF_SVE (1 << TIF_SVE)
#define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
+ _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
_TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
_TIF_NOTIFY_SIGNAL)
@@ -110,6 +114,8 @@ int arch_dup_task_struct(struct task_str
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
_TIF_SYSCALL_EMU)
+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+
#ifdef CONFIG_SHADOW_CALL_STACK
#define INIT_SCS \
.scs_base = init_shadow_call_stack, \
diff -rupN linux-5.19.16.orig/arch/arm64/Kconfig linux-5.19.16/arch/arm64/Kconfig
--- linux-5.19.16.orig/arch/arm64/Kconfig 2022-10-18 17:21:09.128513553 -0400
+++ linux-5.19.16/arch/arm64/Kconfig 2022-10-18 17:21:17.796446755 -0400
@@ -93,6 +93,7 @@ config ARM64
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ select ARCH_SUPPORTS_RT
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
@@ -197,6 +198,7 @@ config ARM64
select HAVE_PERF_USER_STACK_DUMP
select HAVE_PREEMPT_DYNAMIC_KEY
select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_PREEMPT_LAZY
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_FUNCTION_ARG_ACCESS_API
select MMU_GATHER_RCU_TABLE_FREE
diff -rupN linux-5.19.16.orig/arch/arm64/kernel/asm-offsets.c linux-5.19.16/arch/arm64/kernel/asm-offsets.c
--- linux-5.19.16.orig/arch/arm64/kernel/asm-offsets.c 2022-10-18 17:21:09.164513276 -0400
+++ linux-5.19.16/arch/arm64/kernel/asm-offsets.c 2022-10-18 17:21:17.796446755 -0400
@@ -32,6 +32,7 @@ int main(void)
DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu));
DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
+ DEFINE(TSK_TI_PREEMPT_LAZY, offsetof(struct task_struct, thread_info.preempt_lazy_count));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
#endif
diff -rupN linux-5.19.16.orig/arch/arm64/kernel/signal.c linux-5.19.16/arch/arm64/kernel/signal.c
--- linux-5.19.16.orig/arch/arm64/kernel/signal.c 2022-10-18 17:21:09.168513245 -0400
+++ linux-5.19.16/arch/arm64/kernel/signal.c 2022-10-18 17:21:17.796446755 -0400
@@ -1099,7 +1099,7 @@ static void do_signal(struct pt_regs *re
void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
{
do {
- if (thread_flags & _TIF_NEED_RESCHED) {
+ if (thread_flags & _TIF_NEED_RESCHED_MASK) {
/* Unmask Debug and SError for the next task */
local_daif_restore(DAIF_PROCCTX_NOIRQ);
diff -rupN linux-5.19.16.orig/arch/parisc/kernel/irq.c linux-5.19.16/arch/parisc/kernel/irq.c
--- linux-5.19.16.orig/arch/parisc/kernel/irq.c 2022-10-18 17:21:09.252512598 -0400
+++ linux-5.19.16/arch/parisc/kernel/irq.c 2022-10-18 17:21:17.796446755 -0400
@@ -480,10 +480,12 @@ static void execute_on_irq_stack(void *f
*irq_stack_in_use = 1;
}
+#ifndef CONFIG_PREEMPT_RT
void do_softirq_own_stack(void)
{
execute_on_irq_stack(__do_softirq, 0);
}
+#endif
#endif /* CONFIG_IRQSTACKS */
/* ONLY called from entry.S:intr_extint() */
diff -rupN linux-5.19.16.orig/arch/powerpc/include/asm/stackprotector.h linux-5.19.16/arch/powerpc/include/asm/stackprotector.h
--- linux-5.19.16.orig/arch/powerpc/include/asm/stackprotector.h 2022-10-18 17:21:09.272512444 -0400
+++ linux-5.19.16/arch/powerpc/include/asm/stackprotector.h 2022-10-18 17:21:17.796446755 -0400
@@ -24,7 +24,11 @@ static __always_inline void boot_init_st
unsigned long canary;
/* Try to get a semi random initial value. */
+#ifdef CONFIG_PREEMPT_RT
+ canary = (unsigned long)&canary;
+#else
canary = get_random_canary();
+#endif
canary ^= mftb();
canary ^= LINUX_VERSION_CODE;
canary &= CANARY_MASK;
diff -rupN linux-5.19.16.orig/arch/powerpc/include/asm/thread_info.h linux-5.19.16/arch/powerpc/include/asm/thread_info.h
--- linux-5.19.16.orig/arch/powerpc/include/asm/thread_info.h 2022-10-18 17:21:09.276512413 -0400
+++ linux-5.19.16/arch/powerpc/include/asm/thread_info.h 2022-10-18 17:21:17.796446755 -0400
@@ -53,6 +53,8 @@
struct thread_info {
int preempt_count; /* 0 => preemptable,
<0 => BUG */
+ int preempt_lazy_count; /* 0 => preemptable,
+ <0 => BUG */
#ifdef CONFIG_SMP
unsigned int cpu;
#endif
@@ -77,6 +79,7 @@ struct thread_info {
#define INIT_THREAD_INFO(tsk) \
{ \
.preempt_count = INIT_PREEMPT_COUNT, \
+ .preempt_lazy_count = 0, \
.flags = 0, \
}
@@ -102,6 +105,7 @@ void arch_setup_new_exec(void);
#define TIF_PATCH_PENDING 6 /* pending live patching update */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SINGLESTEP 8 /* singlestepping active */
+#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
#define TIF_SECCOMP 10 /* secure computing */
#define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */
#define TIF_NOERROR 12 /* Force successful syscall return */
@@ -117,6 +121,7 @@ void arch_setup_new_exec(void);
#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_32BIT 20 /* 32 bit binary */
+
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
@@ -128,6 +133,7 @@ void arch_setup_new_exec(void);
#define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
+#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
#define _TIF_RESTOREALL (1<<TIF_RESTOREALL)
#define _TIF_NOERROR (1<<TIF_NOERROR)
@@ -141,10 +147,12 @@ void arch_setup_new_exec(void);
_TIF_SYSCALL_EMU)
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+ _TIF_NEED_RESCHED_LAZY | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
_TIF_NOTIFY_SIGNAL)
#define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
/* Bits in local_flags */
/* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
diff -rupN linux-5.19.16.orig/arch/powerpc/Kconfig linux-5.19.16/arch/powerpc/Kconfig
--- linux-5.19.16.orig/arch/powerpc/Kconfig 2022-10-18 17:21:09.256512567 -0400
+++ linux-5.19.16/arch/powerpc/Kconfig 2022-10-18 17:21:17.796446755 -0400
@@ -150,6 +150,7 @@ config PPC
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x
+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
select ARCH_USE_MEMTEST
@@ -240,8 +241,10 @@ config PPC
select HAVE_PERF_EVENTS_NMI if PPC64
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_LAZY
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE
+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
select HAVE_RSEQ
select HAVE_SETUP_PER_CPU_AREA if PPC64
select HAVE_SOFTIRQ_ON_OWN_STACK
diff -rupN linux-5.19.16.orig/arch/powerpc/kernel/interrupt.c linux-5.19.16/arch/powerpc/kernel/interrupt.c
--- linux-5.19.16.orig/arch/powerpc/kernel/interrupt.c 2022-10-18 17:21:09.280512382 -0400
+++ linux-5.19.16/arch/powerpc/kernel/interrupt.c 2022-10-18 17:21:17.796446755 -0400
@@ -345,7 +345,7 @@ again:
ti_flags = read_thread_flags();
while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
local_irq_enable();
- if (ti_flags & _TIF_NEED_RESCHED) {
+ if (ti_flags & _TIF_NEED_RESCHED_MASK) {
schedule();
} else {
/*
@@ -549,11 +549,15 @@ notrace unsigned long interrupt_exit_ker
/* Returning to a kernel context with local irqs enabled. */
WARN_ON_ONCE(!(regs->msr & MSR_EE));
again:
- if (IS_ENABLED(CONFIG_PREEMPT)) {
+ if (IS_ENABLED(CONFIG_PREEMPTION)) {
/* Return to preemptible kernel context */
if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) {
if (preempt_count() == 0)
preempt_schedule_irq();
+ } else if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED_LAZY)) {
+ if ((preempt_count() == 0) &&
+ (current_thread_info()->preempt_lazy_count == 0))
+ preempt_schedule_irq();
}
}
diff -rupN linux-5.19.16.orig/arch/powerpc/kernel/irq.c linux-5.19.16/arch/powerpc/kernel/irq.c
--- linux-5.19.16.orig/arch/powerpc/kernel/irq.c 2022-10-18 17:21:09.280512382 -0400
+++ linux-5.19.16/arch/powerpc/kernel/irq.c 2022-10-18 17:21:17.796446755 -0400
@@ -611,6 +611,7 @@ static inline void check_stack_overflow(
}
}
+#ifndef CONFIG_PREEMPT_RT
static __always_inline void call_do_softirq(const void *sp)
{
/* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
@@ -629,6 +630,7 @@ static __always_inline void call_do_soft
"r11", "r12"
);
}
+#endif
static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
{
@@ -747,10 +749,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_most
void *softirq_ctx[NR_CPUS] __read_mostly;
void *hardirq_ctx[NR_CPUS] __read_mostly;
+#ifndef CONFIG_PREEMPT_RT
void do_softirq_own_stack(void)
{
call_do_softirq(softirq_ctx[smp_processor_id()]);
}
+#endif
irq_hw_number_t virq_to_hw(unsigned int virq)
{
diff -rupN linux-5.19.16.orig/arch/powerpc/kernel/traps.c linux-5.19.16/arch/powerpc/kernel/traps.c
--- linux-5.19.16.orig/arch/powerpc/kernel/traps.c 2022-10-18 17:21:09.280512382 -0400
+++ linux-5.19.16/arch/powerpc/kernel/traps.c 2022-10-18 17:21:17.796446755 -0400
@@ -260,12 +260,17 @@ static char *get_mmu_str(void)
static int __die(const char *str, struct pt_regs *regs, long err)
{
+ const char *pr = "";
+
printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
+ if (IS_ENABLED(CONFIG_PREEMPTION))
+ pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
+
printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
PAGE_SIZE / 1024, get_mmu_str(),
- IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+ pr,
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
diff -rupN linux-5.19.16.orig/arch/powerpc/kvm/Kconfig linux-5.19.16/arch/powerpc/kvm/Kconfig
--- linux-5.19.16.orig/arch/powerpc/kvm/Kconfig 2022-10-18 17:21:09.284512352 -0400
+++ linux-5.19.16/arch/powerpc/kvm/Kconfig 2022-10-18 17:21:17.796446755 -0400
@@ -204,6 +204,7 @@ config KVM_E500MC
config KVM_MPIC
bool "KVM in-kernel MPIC emulation"
depends on KVM && E500
+ depends on !PREEMPT_RT
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
diff -rupN linux-5.19.16.orig/arch/powerpc/platforms/pseries/iommu.c linux-5.19.16/arch/powerpc/platforms/pseries/iommu.c
--- linux-5.19.16.orig/arch/powerpc/platforms/pseries/iommu.c 2022-10-18 17:21:09.296512259 -0400
+++ linux-5.19.16/arch/powerpc/platforms/pseries/iommu.c 2022-10-18 17:21:17.796446755 -0400
@@ -24,6 +24,7 @@
#include <linux/of.h>
#include <linux/iommu.h>
#include <linux/rculist.h>
+#include <linux/local_lock.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/rtas.h>
@@ -195,7 +196,13 @@ static int tce_build_pSeriesLP(unsigned
return ret;
}
-static DEFINE_PER_CPU(__be64 *, tce_page);
+struct tce_page {
+ __be64 * page;
+ local_lock_t lock;
+};
+static DEFINE_PER_CPU(struct tce_page, tce_page) = {
+ .lock = INIT_LOCAL_LOCK(lock),
+};
static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
long npages, unsigned long uaddr,
@@ -218,9 +225,10 @@ static int tce_buildmulti_pSeriesLP(stru
direction, attrs);
}
- local_irq_save(flags); /* to protect tcep and the page behind it */
+ /* to protect tcep and the page behind it */
+ local_lock_irqsave(&tce_page.lock, flags);
- tcep = __this_cpu_read(tce_page);
+ tcep = __this_cpu_read(tce_page.page);
/* This is safe to do since interrupts are off when we're called
* from iommu_alloc{,_sg}()
@@ -229,12 +237,12 @@ static int tce_buildmulti_pSeriesLP(stru
tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
/* If allocation fails, fall back to the loop implementation */
if (!tcep) {
- local_irq_restore(flags);
+ local_unlock_irqrestore(&tce_page.lock, flags);
return tce_build_pSeriesLP(tbl->it_index, tcenum,
tceshift,
npages, uaddr, direction, attrs);
}
- __this_cpu_write(tce_page, tcep);
+ __this_cpu_write(tce_page.page, tcep);
}
rpn = __pa(uaddr) >> tceshift;
@@ -264,7 +272,7 @@ static int tce_buildmulti_pSeriesLP(stru
tcenum += limit;
} while (npages > 0 && !rc);
- local_irq_restore(flags);
+ local_unlock_irqrestore(&tce_page.lock, flags);
if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
ret = (int)rc;
@@ -440,16 +448,17 @@ static int tce_setrange_multi_pSeriesLP(
DMA_BIDIRECTIONAL, 0);
}
- local_irq_disable(); /* to protect tcep and the page behind it */
- tcep = __this_cpu_read(tce_page);
+ /* to protect tcep and the page behind it */
+ local_lock_irq(&tce_page.lock);
+ tcep = __this_cpu_read(tce_page.page);
if (!tcep) {
tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
if (!tcep) {
- local_irq_enable();
+ local_unlock_irq(&tce_page.lock);
return -ENOMEM;
}
- __this_cpu_write(tce_page, tcep);
+ __this_cpu_write(tce_page.page, tcep);
}
proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
@@ -492,7 +501,7 @@ static int tce_setrange_multi_pSeriesLP(
/* error cleanup: caller will clear whole range */
- local_irq_enable();
+ local_unlock_irq(&tce_page.lock);
return rc;
}
diff -rupN linux-5.19.16.orig/arch/s390/include/asm/softirq_stack.h linux-5.19.16/arch/s390/include/asm/softirq_stack.h
--- linux-5.19.16.orig/arch/s390/include/asm/softirq_stack.h 2022-10-18 17:21:09.312512136 -0400
+++ linux-5.19.16/arch/s390/include/asm/softirq_stack.h 2022-10-18 17:21:17.796446755 -0400
@@ -5,9 +5,10 @@
#include <asm/lowcore.h>
#include <asm/stacktrace.h>
+#ifndef CONFIG_PREEMPT_RT
static inline void do_softirq_own_stack(void)
{
call_on_stack(0, S390_lowcore.async_stack, void, __do_softirq);
}
-
+#endif
#endif /* __ASM_S390_SOFTIRQ_STACK_H */
diff -rupN linux-5.19.16.orig/arch/sh/kernel/irq.c linux-5.19.16/arch/sh/kernel/irq.c
--- linux-5.19.16.orig/arch/sh/kernel/irq.c 2022-10-18 17:21:09.328512013 -0400
+++ linux-5.19.16/arch/sh/kernel/irq.c 2022-10-18 17:21:17.796446755 -0400
@@ -149,6 +149,7 @@ void irq_ctx_exit(int cpu)
hardirq_ctx[cpu] = NULL;
}
+#ifndef CONFIG_PREEMPT_RT
void do_softirq_own_stack(void)
{
struct thread_info *curctx;
@@ -176,6 +177,7 @@ void do_softirq_own_stack(void)
"r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
);
}
+#endif
#else
static inline void handle_one_irq(unsigned int irq)
{
diff -rupN linux-5.19.16.orig/arch/sparc/kernel/irq_64.c linux-5.19.16/arch/sparc/kernel/irq_64.c
--- linux-5.19.16.orig/arch/sparc/kernel/irq_64.c 2022-10-18 17:21:09.344511889 -0400
+++ linux-5.19.16/arch/sparc/kernel/irq_64.c 2022-10-18 17:21:17.800446725 -0400
@@ -855,6 +855,7 @@ void __irq_entry handler_irq(int pil, st
set_irq_regs(old_regs);
}
+#ifndef CONFIG_PREEMPT_RT
void do_softirq_own_stack(void)
{
void *orig_sp, *sp = softirq_stack[smp_processor_id()];
@@ -869,6 +870,7 @@ void do_softirq_own_stack(void)
__asm__ __volatile__("mov %0, %%sp"
: : "r" (orig_sp));
}
+#endif
#ifdef CONFIG_HOTPLUG_CPU
void fixup_irqs(void)
diff -rupN linux-5.19.16.orig/arch/x86/include/asm/preempt.h linux-5.19.16/arch/x86/include/asm/preempt.h
--- linux-5.19.16.orig/arch/x86/include/asm/preempt.h 2022-10-18 17:21:09.364511736 -0400
+++ linux-5.19.16/arch/x86/include/asm/preempt.h 2022-10-18 17:21:17.800446725 -0400
@@ -90,17 +90,48 @@ static __always_inline void __preempt_co
* a decrement which hits zero means we have no preempt_count and should
* reschedule.
*/
-static __always_inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool ____preempt_count_dec_and_test(void)
{
return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var]));
}
+static __always_inline bool __preempt_count_dec_and_test(void)
+{
+ if (____preempt_count_dec_and_test())
+ return true;
+#ifdef CONFIG_PREEMPT_LAZY
+ if (preempt_count())
+ return false;
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
+ return false;
+#endif
+}
+
/*
* Returns true when we need to resched and can (barring IRQ state).
*/
static __always_inline bool should_resched(int preempt_offset)
{
+#ifdef CONFIG_PREEMPT_LAZY
+ u32 tmp;
+ tmp = raw_cpu_read_4(__preempt_count);
+ if (tmp == preempt_offset)
+ return true;
+
+ /* preempt count == 0 ? */
+ tmp &= ~PREEMPT_NEED_RESCHED;
+ if (tmp != preempt_offset)
+ return false;
+ /* XXX PREEMPT_LOCK_OFFSET */
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
+#endif
}
#ifdef CONFIG_PREEMPTION
diff -rupN linux-5.19.16.orig/arch/x86/include/asm/thread_info.h linux-5.19.16/arch/x86/include/asm/thread_info.h
--- linux-5.19.16.orig/arch/x86/include/asm/thread_info.h 2022-10-18 17:21:09.368511705 -0400
+++ linux-5.19.16/arch/x86/include/asm/thread_info.h 2022-10-18 17:21:17.800446725 -0400
@@ -57,6 +57,8 @@ struct thread_info {
unsigned long flags; /* low level flags */
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
u32 status; /* thread synchronous flags */
+ int preempt_lazy_count; /* 0 => lazy preemptable
+ <0 => BUG */
#ifdef CONFIG_SMP
u32 cpu; /* current CPU */
#endif
@@ -65,6 +67,7 @@ struct thread_info {
#define INIT_THREAD_INFO(tsk) \
{ \
.flags = 0, \
+ .preempt_lazy_count = 0, \
}
#else /* !__ASSEMBLY__ */
@@ -92,6 +95,7 @@ struct thread_info {
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */
+#define TIF_NEED_RESCHED_LAZY 19 /* lazy rescheduling necessary */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
@@ -115,6 +119,7 @@ struct thread_info {
#define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
diff -rupN linux-5.19.16.orig/arch/x86/Kconfig linux-5.19.16/arch/x86/Kconfig
--- linux-5.19.16.orig/arch/x86/Kconfig 2022-10-18 17:21:09.352511828 -0400
+++ linux-5.19.16/arch/x86/Kconfig 2022-10-18 17:21:17.800446725 -0400
@@ -110,6 +110,7 @@ config X86
select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096
select ARCH_SUPPORTS_LTO_CLANG
select ARCH_SUPPORTS_LTO_CLANG_THIN
+ select ARCH_SUPPORTS_RT
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
@@ -244,6 +245,7 @@ config X86
select HAVE_PCI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_LAZY
select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
select MMU_GATHER_MERGE_VMAS
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
diff -rupN linux-5.19.16.orig/block/blk-mq.c linux-5.19.16/block/blk-mq.c
--- linux-5.19.16.orig/block/blk-mq.c 2022-10-18 17:21:09.396511489 -0400
+++ linux-5.19.16/block/blk-mq.c 2022-10-18 17:21:17.800446725 -0400
@@ -2086,14 +2086,10 @@ static void __blk_mq_delay_run_hw_queue(
return;
if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
- int cpu = get_cpu();
- if (cpumask_test_cpu(cpu, hctx->cpumask)) {
+ if (cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) {
__blk_mq_run_hw_queue(hctx);
- put_cpu();
return;
}
-
- put_cpu();
}
kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
diff -rupN linux-5.19.16.orig/drivers/bcma/driver_gpio.c linux-5.19.16/drivers/bcma/driver_gpio.c
--- linux-5.19.16.orig/drivers/bcma/driver_gpio.c 2022-10-18 17:21:09.432511212 -0400
+++ linux-5.19.16/drivers/bcma/driver_gpio.c 2022-10-18 17:21:17.800446725 -0400
@@ -115,7 +115,7 @@ static irqreturn_t bcma_gpio_irq_handler
return IRQ_NONE;
for_each_set_bit(gpio, &irqs, gc->ngpio)
- generic_handle_irq(irq_find_mapping(gc->irq.domain, gpio));
+ generic_handle_domain_irq_safe(gc->irq.domain, gpio);
bcma_chipco_gpio_polarity(cc, irqs, val & irqs);
return IRQ_HANDLED;
diff -rupN linux-5.19.16.orig/drivers/block/zram/zram_drv.c linux-5.19.16/drivers/block/zram/zram_drv.c
--- linux-5.19.16.orig/drivers/block/zram/zram_drv.c 2022-10-18 17:21:09.436511181 -0400
+++ linux-5.19.16/drivers/block/zram/zram_drv.c 2022-10-18 17:21:17.800446725 -0400
@@ -58,6 +58,40 @@ static void zram_free_page(struct zram *
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
u32 index, int offset, struct bio *bio);
+#ifdef CONFIG_PREEMPT_RT
+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages)
+{
+ size_t index;
+
+ for (index = 0; index < num_pages; index++)
+ spin_lock_init(&zram->table[index].lock);
+}
+
+static int zram_slot_trylock(struct zram *zram, u32 index)
+{
+ int ret;
+
+ ret = spin_trylock(&zram->table[index].lock);
+ if (ret)
+ __set_bit(ZRAM_LOCK, &zram->table[index].flags);
+ return ret;
+}
+
+static void zram_slot_lock(struct zram *zram, u32 index)
+{
+ spin_lock(&zram->table[index].lock);
+ __set_bit(ZRAM_LOCK, &zram->table[index].flags);
+}
+
+static void zram_slot_unlock(struct zram *zram, u32 index)
+{
+ __clear_bit(ZRAM_LOCK, &zram->table[index].flags);
+ spin_unlock(&zram->table[index].lock);
+}
+
+#else
+
+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { }
static int zram_slot_trylock(struct zram *zram, u32 index)
{
@@ -73,6 +107,7 @@ static void zram_slot_unlock(struct zram
{
bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
}
+#endif
static inline bool init_done(struct zram *zram)
{
@@ -1196,6 +1231,7 @@ static bool zram_meta_alloc(struct zram
if (!huge_class_size)
huge_class_size = zs_huge_class_size(zram->mem_pool);
+ zram_meta_init_table_locks(zram, num_pages);
return true;
}
diff -rupN linux-5.19.16.orig/drivers/block/zram/zram_drv.h linux-5.19.16/drivers/block/zram/zram_drv.h
--- linux-5.19.16.orig/drivers/block/zram/zram_drv.h 2022-10-18 17:21:09.436511181 -0400
+++ linux-5.19.16/drivers/block/zram/zram_drv.h 2022-10-18 17:21:17.800446725 -0400
@@ -63,6 +63,9 @@ struct zram_table_entry {
unsigned long element;
};
unsigned long flags;
+#ifdef CONFIG_PREEMPT_RT
+ spinlock_t lock;
+#endif
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
ktime_t ac_time;
#endif
diff -rupN linux-5.19.16.orig/drivers/char/tpm/tpm_tis.c linux-5.19.16/drivers/char/tpm/tpm_tis.c
--- linux-5.19.16.orig/drivers/char/tpm/tpm_tis.c 2022-10-18 17:21:09.444511120 -0400
+++ linux-5.19.16/drivers/char/tpm/tpm_tis.c 2022-10-18 17:21:17.800446725 -0400
@@ -50,6 +50,31 @@ static inline struct tpm_tis_tcg_phy *to
return container_of(data, struct tpm_tis_tcg_phy, priv);
}
+#ifdef CONFIG_PREEMPT_RT
+/*
+ * Flushes previous write operations to chip so that a subsequent
+ * ioread*()s won't stall a cpu.
+ */
+static inline void tpm_tis_flush(void __iomem *iobase)
+{
+ ioread8(iobase + TPM_ACCESS(0));
+}
+#else
+#define tpm_tis_flush(iobase) do { } while (0)
+#endif
+
+static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr)
+{
+ iowrite8(b, iobase + addr);
+ tpm_tis_flush(iobase);
+}
+
+static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr)
+{
+ iowrite32(b, iobase + addr);
+ tpm_tis_flush(iobase);
+}
+
static int interrupts = -1;
module_param(interrupts, int, 0444);
MODULE_PARM_DESC(interrupts, "Enable interrupts");
@@ -185,12 +210,12 @@ static int tpm_tcg_write_bytes(struct tp
switch (io_mode) {
case TPM_TIS_PHYS_8:
while (len--)
- iowrite8(*value++, phy->iobase + addr);
+ tpm_tis_iowrite8(*value++, phy->iobase, addr);
break;
case TPM_TIS_PHYS_16:
return -EINVAL;
case TPM_TIS_PHYS_32:
- iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase + addr);
+ tpm_tis_iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase, addr);
break;
}
diff -rupN linux-5.19.16.orig/drivers/gpio/gpio-mlxbf2.c linux-5.19.16/drivers/gpio/gpio-mlxbf2.c
--- linux-5.19.16.orig/drivers/gpio/gpio-mlxbf2.c 2022-10-18 17:21:09.528510473 -0400
+++ linux-5.19.16/drivers/gpio/gpio-mlxbf2.c 2022-10-18 17:21:17.800446725 -0400
@@ -273,10 +273,8 @@ static irqreturn_t mlxbf2_gpio_irq_handl
pending = readl(gs->gpio_io + YU_GPIO_CAUSE_OR_CAUSE_EVTEN0);
writel(pending, gs->gpio_io + YU_GPIO_CAUSE_OR_CLRCAUSE);
- for_each_set_bit(level, &pending, gc->ngpio) {
- int gpio_irq = irq_find_mapping(gc->irq.domain, level);
- generic_handle_irq(gpio_irq);
- }
+ for_each_set_bit(level, &pending, gc->ngpio)
+ generic_handle_domain_irq_safe(gc->irq.domain, level);
return IRQ_RETVAL(pending);
}
diff -rupN linux-5.19.16.orig/drivers/gpu/drm/i915/display/intel_crtc.c linux-5.19.16/drivers/gpu/drm/i915/display/intel_crtc.c
--- linux-5.19.16.orig/drivers/gpu/drm/i915/display/intel_crtc.c 2022-10-18 17:21:09.804508347 -0400
+++ linux-5.19.16/drivers/gpu/drm/i915/display/intel_crtc.c 2022-10-18 17:21:17.800446725 -0400
@@ -522,7 +522,8 @@ void intel_pipe_update_start(struct inte
*/
intel_psr_wait_for_idle_locked(new_crtc_state);
- local_irq_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
crtc->debug.min_vbl = min;
crtc->debug.max_vbl = max;
@@ -547,11 +548,13 @@ void intel_pipe_update_start(struct inte
break;
}
- local_irq_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_enable();
timeout = schedule_timeout(timeout);
- local_irq_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
}
finish_wait(wq, &wait);
@@ -584,7 +587,8 @@ void intel_pipe_update_start(struct inte
return;
irq_disable:
- local_irq_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
}
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE)
@@ -685,7 +689,8 @@ void intel_pipe_update_end(struct intel_
*/
intel_vrr_send_push(new_crtc_state);
- local_irq_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_enable();
if (intel_vgpu_active(dev_priv))
return;
diff -rupN linux-5.19.16.orig/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c linux-5.19.16/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
--- linux-5.19.16.orig/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c 2022-10-18 17:21:09.812508286 -0400
+++ linux-5.19.16/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c 2022-10-18 17:21:17.800446725 -0400
@@ -312,10 +312,9 @@ void __intel_breadcrumbs_park(struct int
/* Kick the work once more to drain the signalers, and disarm the irq */
irq_work_sync(&b->irq_work);
while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
- local_irq_disable();
- signal_irq_work(&b->irq_work);
- local_irq_enable();
+ irq_work_queue(&b->irq_work);
cond_resched();
+ irq_work_sync(&b->irq_work);
}
}
diff -rupN linux-5.19.16.orig/drivers/gpu/drm/i915/gt/intel_execlists_submission.c linux-5.19.16/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
--- linux-5.19.16.orig/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 2022-10-18 17:21:09.812508286 -0400
+++ linux-5.19.16/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 2022-10-18 17:21:17.800446725 -0400
@@ -1302,7 +1302,7 @@ static void execlists_dequeue(struct int
* and context switches) submission.
*/
- spin_lock(&sched_engine->lock);
+ spin_lock_irq(&sched_engine->lock);
/*
* If the queue is higher priority than the last
@@ -1402,7 +1402,7 @@ static void execlists_dequeue(struct int
* Even if ELSP[1] is occupied and not worthy
* of timeslices, our queue might be.
*/
- spin_unlock(&sched_engine->lock);
+ spin_unlock_irq(&sched_engine->lock);
return;
}
}
@@ -1428,7 +1428,7 @@ static void execlists_dequeue(struct int
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.sched_engine->lock);
- spin_unlock(&engine->sched_engine->lock);
+ spin_unlock_irq(&engine->sched_engine->lock);
return; /* leave this for another sibling */
}
@@ -1590,7 +1590,7 @@ done:
*/
sched_engine->queue_priority_hint = queue_prio(sched_engine);
i915_sched_engine_reset_on_empty(sched_engine);
- spin_unlock(&sched_engine->lock);
+ spin_unlock_irq(&sched_engine->lock);
/*
* We can skip poking the HW if we ended up with exactly the same set
@@ -1616,13 +1616,6 @@ done:
}
}
-static void execlists_dequeue_irq(struct intel_engine_cs *engine)
-{
- local_irq_disable(); /* Suspend interrupts across request submission */
- execlists_dequeue(engine);
- local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
-}
-
static void clear_ports(struct i915_request **ports, int count)
{
memset_p((void **)ports, NULL, count);
@@ -2468,7 +2461,7 @@ static void execlists_submission_tasklet
}
if (!engine->execlists.pending[0]) {
- execlists_dequeue_irq(engine);
+ execlists_dequeue(engine);
start_timeslice(engine);
}
diff -rupN linux-5.19.16.orig/drivers/gpu/drm/i915/i915_irq.c linux-5.19.16/drivers/gpu/drm/i915/i915_irq.c
--- linux-5.19.16.orig/drivers/gpu/drm/i915/i915_irq.c 2022-10-18 17:21:09.820508224 -0400
+++ linux-5.19.16/drivers/gpu/drm/i915/i915_irq.c 2022-10-18 17:21:17.800446725 -0400
@@ -917,7 +917,8 @@ static bool i915_get_crtc_scanoutpos(str
*/
spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
- /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
/* Get optional system timestamp before query. */
if (stime)
@@ -981,7 +982,8 @@ static bool i915_get_crtc_scanoutpos(str
if (etime)
*etime = ktime_get();
- /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
diff -rupN linux-5.19.16.orig/drivers/gpu/drm/i915/i915_request.c linux-5.19.16/drivers/gpu/drm/i915/i915_request.c
--- linux-5.19.16.orig/drivers/gpu/drm/i915/i915_request.c 2022-10-18 17:21:09.820508224 -0400
+++ linux-5.19.16/drivers/gpu/drm/i915/i915_request.c 2022-10-18 17:21:17.800446725 -0400
@@ -587,7 +587,6 @@ bool __i915_request_submit(struct i915_r
RQ_TRACE(request, "\n");
- GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->sched_engine->lock);
/*
@@ -696,7 +695,6 @@ void __i915_request_unsubmit(struct i915
*/
RQ_TRACE(request, "\n");
- GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->sched_engine->lock);
/*
diff -rupN linux-5.19.16.orig/drivers/gpu/drm/i915/i915_trace.h linux-5.19.16/drivers/gpu/drm/i915/i915_trace.h
--- linux-5.19.16.orig/drivers/gpu/drm/i915/i915_trace.h 2022-10-18 17:21:09.820508224 -0400
+++ linux-5.19.16/drivers/gpu/drm/i915/i915_trace.h 2022-10-18 17:21:17.800446725 -0400
@@ -6,6 +6,10 @@
#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
#define _I915_TRACE_H_
+#ifdef CONFIG_PREEMPT_RT
+#define NOTRACE
+#endif
+
#include <linux/stringify.h>
#include <linux/types.h>
#include <linux/tracepoint.h>
@@ -323,7 +327,7 @@ DEFINE_EVENT(i915_request, i915_request_
TP_ARGS(rq)
);
-#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS)
+#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE)
DEFINE_EVENT(i915_request, i915_request_guc_submit,
TP_PROTO(struct i915_request *rq),
TP_ARGS(rq)
diff -rupN linux-5.19.16.orig/drivers/gpu/drm/i915/i915_utils.h linux-5.19.16/drivers/gpu/drm/i915/i915_utils.h
--- linux-5.19.16.orig/drivers/gpu/drm/i915/i915_utils.h 2022-10-18 17:21:09.820508224 -0400
+++ linux-5.19.16/drivers/gpu/drm/i915/i915_utils.h 2022-10-18 17:21:17.800446725 -0400
@@ -334,7 +334,7 @@ wait_remaining_ms_from_jiffies(unsigned
#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000)
/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
-#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT)
+#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT)
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
#else
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
diff -rupN linux-5.19.16.orig/drivers/gpu/drm/i915/Kconfig linux-5.19.16/drivers/gpu/drm/i915/Kconfig
--- linux-5.19.16.orig/drivers/gpu/drm/i915/Kconfig 2022-10-18 17:21:09.804508347 -0400
+++ linux-5.19.16/drivers/gpu/drm/i915/Kconfig 2022-10-18 17:21:17.800446725 -0400
@@ -3,7 +3,6 @@ config DRM_I915
tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics"
depends on DRM
depends on X86 && PCI
- depends on !PREEMPT_RT
select INTEL_GTT if X86
select INTERVAL_TREE
# we need shmfs for the swappable backing store, and in particular
diff -rupN linux-5.19.16.orig/drivers/iio/adc/stm32-adc-core.c linux-5.19.16/drivers/iio/adc/stm32-adc-core.c
--- linux-5.19.16.orig/drivers/iio/adc/stm32-adc-core.c 2022-10-18 17:21:09.956507177 -0400
+++ linux-5.19.16/drivers/iio/adc/stm32-adc-core.c 2022-10-18 17:21:17.800446725 -0400
@@ -358,7 +358,7 @@ static void stm32_adc_irq_handler(struct
if ((status & priv->cfg->regs->eoc_msk[i] &&
stm32_adc_eoc_enabled(priv, i)) ||
(status & priv->cfg->regs->ovr_msk[i]))
- generic_handle_irq(irq_find_mapping(priv->domain, i));
+ generic_handle_domain_irq(priv->domain, i);
}
chained_irq_exit(chip, desc);
diff -rupN linux-5.19.16.orig/drivers/pinctrl/pinctrl-amd.c linux-5.19.16/drivers/pinctrl/pinctrl-amd.c
--- linux-5.19.16.orig/drivers/pinctrl/pinctrl-amd.c 2022-10-18 17:21:10.508502926 -0400
+++ linux-5.19.16/drivers/pinctrl/pinctrl-amd.c 2022-10-18 17:21:17.800446725 -0400
@@ -643,7 +643,7 @@ static bool do_amd_gpio_irq_handler(int
if (!(regval & PIN_IRQ_PENDING) ||
!(regval & BIT(INTERRUPT_MASK_OFF)))
continue;
- generic_handle_domain_irq(gc->irq.domain, irqnr + i);
+ generic_handle_domain_irq_safe(gc->irq.domain, irqnr + i);
/* Clear interrupt.
* We must read the pin register again, in case the
diff -rupN linux-5.19.16.orig/drivers/platform/x86/intel/int0002_vgpio.c linux-5.19.16/drivers/platform/x86/intel/int0002_vgpio.c
--- linux-5.19.16.orig/drivers/platform/x86/intel/int0002_vgpio.c 2022-10-18 17:21:10.532502741 -0400
+++ linux-5.19.16/drivers/platform/x86/intel/int0002_vgpio.c 2022-10-18 17:21:17.800446725 -0400
@@ -125,8 +125,7 @@ static irqreturn_t int0002_irq(int irq,
if (!(gpe_sts_reg & GPE0A_PME_B0_STS_BIT))
return IRQ_NONE;
- generic_handle_irq(irq_find_mapping(chip->irq.domain,
- GPE0A_PME_B0_VIRT_GPIO_PIN));
+ generic_handle_domain_irq_safe(chip->irq.domain, GPE0A_PME_B0_VIRT_GPIO_PIN);
pm_wakeup_hard_event(chip->parent);
diff -rupN linux-5.19.16.orig/drivers/ssb/driver_gpio.c linux-5.19.16/drivers/ssb/driver_gpio.c
--- linux-5.19.16.orig/drivers/ssb/driver_gpio.c 2022-10-18 17:21:10.612502125 -0400
+++ linux-5.19.16/drivers/ssb/driver_gpio.c 2022-10-18 17:21:17.800446725 -0400
@@ -132,7 +132,8 @@ static irqreturn_t ssb_gpio_irq_chipco_h
return IRQ_NONE;
for_each_set_bit(gpio, &irqs, bus->gpio.ngpio)
- generic_handle_irq(ssb_gpio_to_irq(&bus->gpio, gpio));
+ generic_handle_domain_irq_safe(bus->irq_domain, gpio);
+
ssb_chipco_gpio_polarity(chipco, irqs, val & irqs);
return IRQ_HANDLED;
@@ -330,7 +331,8 @@ static irqreturn_t ssb_gpio_irq_extif_ha
return IRQ_NONE;
for_each_set_bit(gpio, &irqs, bus->gpio.ngpio)
- generic_handle_irq(ssb_gpio_to_irq(&bus->gpio, gpio));
+ generic_handle_domain_irq_safe(bus->irq_domain, gpio);
+
ssb_extif_gpio_polarity(extif, irqs, val & irqs);
return IRQ_HANDLED;
diff -rupN linux-5.19.16.orig/drivers/tty/serial/8250/8250_aspeed_vuart.c linux-5.19.16/drivers/tty/serial/8250/8250_aspeed_vuart.c
--- linux-5.19.16.orig/drivers/tty/serial/8250/8250_aspeed_vuart.c 2022-10-18 17:21:10.672501663 -0400
+++ linux-5.19.16/drivers/tty/serial/8250/8250_aspeed_vuart.c 2022-10-18 17:21:17.800446725 -0400
@@ -278,7 +278,7 @@ static void __aspeed_vuart_set_throttle(
up->ier &= ~irqs;
if (!throttle)
up->ier |= irqs;
- serial_out(up, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
}
static void aspeed_vuart_set_throttle(struct uart_port *port, bool throttle)
{
diff -rupN linux-5.19.16.orig/drivers/tty/serial/8250/8250_bcm7271.c linux-5.19.16/drivers/tty/serial/8250/8250_bcm7271.c
--- linux-5.19.16.orig/drivers/tty/serial/8250/8250_bcm7271.c 2022-10-18 17:21:10.672501663 -0400
+++ linux-5.19.16/drivers/tty/serial/8250/8250_bcm7271.c 2022-10-18 17:21:17.804446694 -0400
@@ -609,7 +609,7 @@ static int brcmuart_startup(struct uart_
* will handle this.
*/
up->ier &= ~UART_IER_RDI;
- serial_port_out(port, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
priv->tx_running = false;
priv->dma.rx_dma = NULL;
@@ -775,10 +775,12 @@ static int brcmuart_handle_irq(struct ua
unsigned int iir = serial_port_in(p, UART_IIR);
struct brcmuart_priv *priv = p->private_data;
struct uart_8250_port *up = up_to_u8250p(p);
+ unsigned long cs_flags;
unsigned int status;
unsigned long flags;
unsigned int ier;
unsigned int mcr;
+ bool is_console;
int handled = 0;
/*
@@ -789,6 +791,10 @@ static int brcmuart_handle_irq(struct ua
spin_lock_irqsave(&p->lock, flags);
status = serial_port_in(p, UART_LSR);
if ((status & UART_LSR_DR) == 0) {
+ is_console = uart_console(p);
+
+ if (is_console)
+ printk_cpu_sync_get_irqsave(cs_flags);
ier = serial_port_in(p, UART_IER);
/*
@@ -809,6 +815,9 @@ static int brcmuart_handle_irq(struct ua
serial_port_in(p, UART_RX);
}
+ if (is_console)
+ printk_cpu_sync_put_irqrestore(cs_flags);
+
handled = 1;
}
spin_unlock_irqrestore(&p->lock, flags);
@@ -823,8 +832,10 @@ static enum hrtimer_restart brcmuart_hrt
struct brcmuart_priv *priv = container_of(t, struct brcmuart_priv, hrt);
struct uart_port *p = priv->up;
struct uart_8250_port *up = up_to_u8250p(p);
+ unsigned long cs_flags;
unsigned int status;
unsigned long flags;
+ bool is_console;
if (priv->shutdown)
return HRTIMER_NORESTART;
@@ -846,12 +857,20 @@ static enum hrtimer_restart brcmuart_hrt
/* re-enable receive unless upper layer has disabled it */
if ((up->ier & (UART_IER_RLSI | UART_IER_RDI)) ==
(UART_IER_RLSI | UART_IER_RDI)) {
+ is_console = uart_console(p);
+
+ if (is_console)
+ printk_cpu_sync_get_irqsave(cs_flags);
+
status = serial_port_in(p, UART_IER);
status |= (UART_IER_RLSI | UART_IER_RDI);
serial_port_out(p, UART_IER, status);
status = serial_port_in(p, UART_MCR);
status |= UART_MCR_RTS;
serial_port_out(p, UART_MCR, status);
+
+ if (is_console)
+ printk_cpu_sync_put_irqrestore(cs_flags);
}
spin_unlock_irqrestore(&p->lock, flags);
return HRTIMER_NORESTART;
diff -rupN linux-5.19.16.orig/drivers/tty/serial/8250/8250_core.c linux-5.19.16/drivers/tty/serial/8250/8250_core.c
--- linux-5.19.16.orig/drivers/tty/serial/8250/8250_core.c 2022-10-18 17:21:10.672501663 -0400
+++ linux-5.19.16/drivers/tty/serial/8250/8250_core.c 2022-10-18 17:21:17.804446694 -0400
@@ -255,8 +255,11 @@ static void serial8250_timeout(struct ti
static void serial8250_backup_timeout(struct timer_list *t)
{
struct uart_8250_port *up = from_timer(up, t, timer);
+ struct uart_port *port = &up->port;
unsigned int iir, ier = 0, lsr;
+ unsigned long cs_flags;
unsigned long flags;
+ bool is_console;
spin_lock_irqsave(&up->port.lock, flags);
@@ -265,8 +268,16 @@ static void serial8250_backup_timeout(st
* based handler.
*/
if (up->port.irq) {
+ is_console = uart_console(port);
+
+ if (is_console)
+ printk_cpu_sync_get_irqsave(cs_flags);
+
ier = serial_in(up, UART_IER);
serial_out(up, UART_IER, 0);
+
+ if (is_console)
+ printk_cpu_sync_put_irqrestore(cs_flags);
}
iir = serial_in(up, UART_IIR);
@@ -289,7 +300,7 @@ static void serial8250_backup_timeout(st
serial8250_tx_chars(up);
if (up->port.irq)
- serial_out(up, UART_IER, ier);
+ serial8250_set_IER(up, ier);
spin_unlock_irqrestore(&up->port.lock, flags);
@@ -569,6 +580,14 @@ serial8250_register_ports(struct uart_dr
#ifdef CONFIG_SERIAL_8250_CONSOLE
+static void univ8250_console_write_atomic(struct console *co, const char *s,
+ unsigned int count)
+{
+ struct uart_8250_port *up = &serial8250_ports[co->index];
+
+ serial8250_console_write_atomic(up, s, count);
+}
+
static void univ8250_console_write(struct console *co, const char *s,
unsigned int count)
{
@@ -662,6 +681,7 @@ static int univ8250_console_match(struct
static struct console univ8250_console = {
.name = "ttyS",
+ .write_atomic = univ8250_console_write_atomic,
.write = univ8250_console_write,
.device = uart_console_device,
.setup = univ8250_console_setup,
@@ -955,7 +975,7 @@ static void serial_8250_overrun_backoff_
spin_lock_irqsave(&port->lock, flags);
up->ier |= UART_IER_RLSI | UART_IER_RDI;
up->port.read_status_mask |= UART_LSR_DR;
- serial_out(up, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
spin_unlock_irqrestore(&port->lock, flags);
}
diff -rupN linux-5.19.16.orig/drivers/tty/serial/8250/8250_exar.c linux-5.19.16/drivers/tty/serial/8250/8250_exar.c
--- linux-5.19.16.orig/drivers/tty/serial/8250/8250_exar.c 2022-10-18 17:21:10.672501663 -0400
+++ linux-5.19.16/drivers/tty/serial/8250/8250_exar.c 2022-10-18 17:21:17.804446694 -0400
@@ -177,6 +177,8 @@ static void xr17v35x_set_divisor(struct
static int xr17v35x_startup(struct uart_port *port)
{
+ struct uart_8250_port *up = up_to_u8250p(port);
+
/*
* First enable access to IER [7:5], ISR [5:4], FCR [5:4],
* MCR [7:5] and MSR [7:0]
@@ -187,7 +189,7 @@ static int xr17v35x_startup(struct uart_
* Make sure all interrups are masked until initialization is
* complete and the FIFOs are cleared
*/
- serial_port_out(port, UART_IER, 0);
+ serial8250_set_IER(up, 0);
return serial8250_do_startup(port);
}
diff -rupN linux-5.19.16.orig/drivers/tty/serial/8250/8250_fsl.c linux-5.19.16/drivers/tty/serial/8250/8250_fsl.c
--- linux-5.19.16.orig/drivers/tty/serial/8250/8250_fsl.c 2022-10-18 17:21:10.672501663 -0400
+++ linux-5.19.16/drivers/tty/serial/8250/8250_fsl.c 2022-10-18 17:21:17.804446694 -0400
@@ -58,7 +58,8 @@ int fsl8250_handle_irq(struct uart_port
if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) {
unsigned long delay;
- up->ier = port->serial_in(port, UART_IER);
+ up->ier = serial8250_in_IER(up);
+
if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) {
port->ops->stop_rx(port);
} else {
diff -rupN linux-5.19.16.orig/drivers/tty/serial/8250/8250.h linux-5.19.16/drivers/tty/serial/8250/8250.h
--- linux-5.19.16.orig/drivers/tty/serial/8250/8250.h 2022-10-18 17:21:10.672501663 -0400
+++ linux-5.19.16/drivers/tty/serial/8250/8250.h 2022-10-18 17:21:17.800446725 -0400
@@ -177,12 +177,74 @@ static inline void serial_dl_write(struc
up->dl_write(up, value);
}
+static inline int serial8250_in_IER(struct uart_8250_port *up)
+{
+ struct uart_port *port = &up->port;
+ unsigned long flags;
+ bool is_console;
+ int ier;
+
+ is_console = uart_console(port);
+
+ if (is_console)
+ printk_cpu_sync_get_irqsave(flags);
+
+ ier = serial_in(up, UART_IER);
+
+ if (is_console)
+ printk_cpu_sync_put_irqrestore(flags);
+
+ return ier;
+}
+
+static inline void serial8250_set_IER(struct uart_8250_port *up, int ier)
+{
+ struct uart_port *port = &up->port;
+ unsigned long flags;
+ bool is_console;
+
+ is_console = uart_console(port);
+
+ if (is_console)
+ printk_cpu_sync_get_irqsave(flags);
+
+ serial_out(up, UART_IER, ier);
+
+ if (is_console)
+ printk_cpu_sync_put_irqrestore(flags);
+}
+
+static inline int serial8250_clear_IER(struct uart_8250_port *up)
+{
+ struct uart_port *port = &up->port;
+ unsigned int clearval = 0;
+ unsigned long flags;
+ bool is_console;
+ int prior;
+
+ is_console = uart_console(port);
+
+ if (up->capabilities & UART_CAP_UUE)
+ clearval = UART_IER_UUE;
+
+ if (is_console)
+ printk_cpu_sync_get_irqsave(flags);
+
+ prior = serial_in(up, UART_IER);
+ serial_out(up, UART_IER, clearval);
+
+ if (is_console)
+ printk_cpu_sync_put_irqrestore(flags);
+
+ return prior;
+}
+
static inline bool serial8250_set_THRI(struct uart_8250_port *up)
{
if (up->ier & UART_IER_THRI)
return false;
up->ier |= UART_IER_THRI;
- serial_out(up, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
return true;
}
@@ -191,7 +253,7 @@ static inline bool serial8250_clear_THRI
if (!(up->ier & UART_IER_THRI))
return false;
up->ier &= ~UART_IER_THRI;
- serial_out(up, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
return true;
}
diff -rupN linux-5.19.16.orig/drivers/tty/serial/8250/8250_ingenic.c linux-5.19.16/drivers/tty/serial/8250/8250_ingenic.c
--- linux-5.19.16.orig/drivers/tty/serial/8250/8250_ingenic.c 2022-10-18 17:21:10.672501663 -0400
+++ linux-5.19.16/drivers/tty/serial/8250/8250_ingenic.c 2022-10-18 17:21:17.804446694 -0400
@@ -146,6 +146,7 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic
static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value)
{
+ struct uart_8250_port *up = up_to_u8250p(p);
int ier;
switch (offset) {
@@ -167,7 +168,7 @@ static void ingenic_uart_serial_out(stru
* If we have enabled modem status IRQs we should enable
* modem mode.
*/
- ier = p->serial_in(p, UART_IER);
+ ier = serial8250_in_IER(up);
if (ier & UART_IER_MSI)
value |= UART_MCR_MDCE | UART_MCR_FCM;
diff -rupN linux-5.19.16.orig/drivers/tty/serial/8250/8250_mtk.c linux-5.19.16/drivers/tty/serial/8250/8250_mtk.c
--- linux-5.19.16.orig/drivers/tty/serial/8250/8250_mtk.c 2022-10-18 17:21:10.672501663 -0400
+++ linux-5.19.16/drivers/tty/serial/8250/8250_mtk.c 2022-10-18 17:21:17.804446694 -0400
@@ -222,12 +222,40 @@ static void mtk8250_shutdown(struct uart
static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask)
{
- serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask));
+ struct uart_port *port = &up->port;
+ unsigned long flags;
+ bool is_console;
+ int ier;
+
+ is_console = uart_console(port);
+
+ if (is_console)
+ printk_cpu_sync_get_irqsave(flags);
+
+ ier = serial_in(up, UART_IER);
+ serial_out(up, UART_IER, ier & (~mask));
+
+ if (is_console)
+ printk_cpu_sync_put_irqrestore(flags);
}
static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask)
{
- serial_out(up, UART_IER, serial_in(up, UART_IER) | mask);
+ struct uart_port *port = &up->port;
+ unsigned long flags;
+ bool is_console;
+ int ier;
+
+ is_console = uart_console(port);
+
+ if (is_console)
+ printk_cpu_sync_get_irqsave(flags);
+
+ ier = serial_in(up, UART_IER);
+ serial_out(up, UART_IER, ier | mask);
+
+ if (is_console)
+ printk_cpu_sync_put_irqrestore(flags);
}
static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
diff -rupN linux-5.19.16.orig/drivers/tty/serial/8250/8250_omap.c linux-5.19.16/drivers/tty/serial/8250/8250_omap.c
--- linux-5.19.16.orig/drivers/tty/serial/8250/8250_omap.c 2022-10-18 17:21:10.672501663 -0400
+++ linux-5.19.16/drivers/tty/serial/8250/8250_omap.c 2022-10-18 17:21:17.804446694 -0400
@@ -325,7 +325,7 @@ static void omap8250_restore_regs(struct
/* drop TCR + TLR access, we setup XON/XOFF later */
serial8250_out_MCR(up, up->mcr);
- serial_out(up, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
serial_dl_write(up, priv->quot);
@@ -512,7 +512,7 @@ static void omap_8250_pm(struct uart_por
serial_out(up, UART_EFR, efr | UART_EFR_ECB);
serial_out(up, UART_LCR, 0);
- serial_out(up, UART_IER, (state != 0) ? UART_IERX_SLEEP : 0);
+ serial8250_set_IER(up, (state != 0) ? UART_IERX_SLEEP : 0);
serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
serial_out(up, UART_EFR, efr);
serial_out(up, UART_LCR, 0);
@@ -633,7 +633,7 @@ static irqreturn_t omap8250_irq(int irq,
if ((lsr & UART_LSR_OE) && up->overrun_backoff_time_ms > 0) {
unsigned long delay;
- up->ier = port->serial_in(port, UART_IER);
+ up->ier = serial8250_in_IER(up);
if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) {
port->ops->stop_rx(port);
} else {
@@ -693,7 +693,7 @@ static int omap_8250_startup(struct uart
goto err;
up->ier = UART_IER_RLSI | UART_IER_RDI;
- serial_out(up, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
#ifdef CONFIG_PM
up->capabilities |= UART_CAP_RPM;
@@ -734,7 +734,7 @@ static void omap_8250_shutdown(struct ua
serial_out(up, UART_OMAP_EFR2, 0x0);
up->ier = 0;
- serial_out(up, UART_IER, 0);
+ serial8250_set_IER(up, 0);
if (up->dma)
serial8250_release_dma(up);
@@ -782,7 +782,7 @@ static void omap_8250_unthrottle(struct
up->dma->rx_dma(up);
up->ier |= UART_IER_RLSI | UART_IER_RDI;
port->read_status_mask |= UART_LSR_DR;
- serial_out(up, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
spin_unlock_irqrestore(&port->lock, flags);
pm_runtime_mark_last_busy(port->dev);
@@ -873,7 +873,7 @@ static void __dma_rx_complete(void *para
__dma_rx_do_complete(p);
if (!priv->throttled) {
p->ier |= UART_IER_RLSI | UART_IER_RDI;
- serial_out(p, UART_IER, p->ier);
+ serial8250_set_IER(p, p->ier);
if (!(priv->habit & UART_HAS_EFR2))
omap_8250_rx_dma(p);
}
@@ -930,7 +930,7 @@ static int omap_8250_rx_dma(struct uart_
* callback to run.
*/
p->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
- serial_out(p, UART_IER, p->ier);
+ serial8250_set_IER(p, p->ier);
}
goto out;
}
@@ -1146,12 +1146,12 @@ static void am654_8250_handle_rx_dma(str
* periodic timeouts, re-enable interrupts.
*/
up->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
- serial_out(up, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
omap_8250_rx_dma_flush(up);
serial_in(up, UART_IIR);
serial_out(up, UART_OMAP_EFR2, 0x0);
up->ier |= UART_IER_RLSI | UART_IER_RDI;
- serial_out(up, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
}
}
diff -rupN linux-5.19.16.orig/drivers/tty/serial/8250/8250_port.c linux-5.19.16/drivers/tty/serial/8250/8250_port.c
--- linux-5.19.16.orig/drivers/tty/serial/8250/8250_port.c 2022-10-18 17:21:10.672501663 -0400
+++ linux-5.19.16/drivers/tty/serial/8250/8250_port.c 2022-10-18 17:21:17.804446694 -0400
@@ -749,7 +749,7 @@ static void serial8250_set_sleep(struct
serial_out(p, UART_EFR, UART_EFR_ECB);
serial_out(p, UART_LCR, 0);
}
- serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0);
+ serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0);
if (p->capabilities & UART_CAP_EFR) {
serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
serial_out(p, UART_EFR, efr);
@@ -1023,8 +1023,11 @@ static int broken_efr(struct uart_8250_p
*/
static void autoconfig_16550a(struct uart_8250_port *up)
{
+ struct uart_port *port = &up->port;
unsigned char status1, status2;
unsigned int iersave;
+ unsigned long flags;
+ bool is_console;
up->port.type = PORT_16550A;
up->capabilities |= UART_CAP_FIFO;
@@ -1135,6 +1138,11 @@ static void autoconfig_16550a(struct uar
return;
}
+ is_console = uart_console(port);
+
+ if (is_console)
+ printk_cpu_sync_get_irqsave(flags);
+
/*
* Try writing and reading the UART_IER_UUE bit (b6).
* If it works, this is probably one of the Xscale platform's
@@ -1170,6 +1178,9 @@ static void autoconfig_16550a(struct uar
}
serial_out(up, UART_IER, iersave);
+ if (is_console)
+ printk_cpu_sync_put_irqrestore(flags);
+
/*
* We distinguish between 16550A and U6 16550A by counting
* how many bytes are in the FIFO.
@@ -1192,8 +1203,10 @@ static void autoconfig(struct uart_8250_
unsigned char status1, scratch, scratch2, scratch3;
unsigned char save_lcr, save_mcr;
struct uart_port *port = &up->port;
+ unsigned long cs_flags;
unsigned long flags;
unsigned int old_capabilities;
+ bool is_console;
if (!port->iobase && !port->mapbase && !port->membase)
return;
@@ -1211,6 +1224,11 @@ static void autoconfig(struct uart_8250_
up->bugs = 0;
if (!(port->flags & UPF_BUGGY_UART)) {
+ is_console = uart_console(port);
+
+ if (is_console)
+ printk_cpu_sync_get_irqsave(cs_flags);
+
/*
* Do a simple existence test first; if we fail this,
* there's no point trying anything else.
@@ -1240,6 +1258,10 @@ static void autoconfig(struct uart_8250_
#endif
scratch3 = serial_in(up, UART_IER) & 0x0f;
serial_out(up, UART_IER, scratch);
+
+ if (is_console)
+ printk_cpu_sync_put_irqrestore(cs_flags);
+
if (scratch2 != 0 || scratch3 != 0x0F) {
/*
* We failed; there's nothing here
@@ -1337,10 +1359,7 @@ static void autoconfig(struct uart_8250_
serial8250_out_MCR(up, save_mcr);
serial8250_clear_fifos(up);
serial_in(up, UART_RX);
- if (up->capabilities & UART_CAP_UUE)
- serial_out(up, UART_IER, UART_IER_UUE);
- else
- serial_out(up, UART_IER, 0);
+ serial8250_clear_IER(up);
out_unlock:
spin_unlock_irqrestore(&port->lock, flags);
@@ -1366,7 +1385,9 @@ static void autoconfig_irq(struct uart_8
unsigned char save_mcr, save_ier;
unsigned char save_ICP = 0;
unsigned int ICP = 0;
+ unsigned long flags;
unsigned long irqs;
+ bool is_console;
int irq;
if (port->flags & UPF_FOURPORT) {
@@ -1376,8 +1397,12 @@ static void autoconfig_irq(struct uart_8
inb_p(ICP);
}
- if (uart_console(port))
+ is_console = uart_console(port);
+
+ if (is_console) {
console_lock();
+ printk_cpu_sync_get_irqsave(flags);
+ }
/* forget possible initially masked and pending IRQ */
probe_irq_off(probe_irq_on());
@@ -1409,8 +1434,10 @@ static void autoconfig_irq(struct uart_8
if (port->flags & UPF_FOURPORT)
outb_p(save_ICP, ICP);
- if (uart_console(port))
+ if (is_console) {
+ printk_cpu_sync_put_irqrestore(flags);
console_unlock();
+ }
port->irq = (irq > 0) ? irq : 0;
}
@@ -1423,7 +1450,7 @@ static void serial8250_stop_rx(struct ua
up->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
up->port.read_status_mask &= ~UART_LSR_DR;
- serial_port_out(port, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
serial8250_rpm_put(up);
}
@@ -1453,7 +1480,7 @@ void serial8250_em485_stop_tx(struct uar
serial8250_clear_and_reinit_fifos(p);
p->ier |= UART_IER_RLSI | UART_IER_RDI;
- serial_port_out(&p->port, UART_IER, p->ier);
+ serial8250_set_IER(p, p->ier);
}
}
EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx);
@@ -1705,7 +1732,7 @@ static void serial8250_disable_ms(struct
mctrl_gpio_disable_ms(up->gpios);
up->ier &= ~UART_IER_MSI;
- serial_port_out(port, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
}
static void serial8250_enable_ms(struct uart_port *port)
@@ -1721,7 +1748,7 @@ static void serial8250_enable_ms(struct
up->ier |= UART_IER_MSI;
serial8250_rpm_get(up);
- serial_port_out(port, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
serial8250_rpm_put(up);
}
@@ -2147,14 +2174,7 @@ static void serial8250_put_poll_char(str
struct uart_8250_port *up = up_to_u8250p(port);
serial8250_rpm_get(up);
- /*
- * First save the IER then disable the interrupts
- */
- ier = serial_port_in(port, UART_IER);
- if (up->capabilities & UART_CAP_UUE)
- serial_port_out(port, UART_IER, UART_IER_UUE);
- else
- serial_port_out(port, UART_IER, 0);
+ ier = serial8250_clear_IER(up);
wait_for_xmitr(up, BOTH_EMPTY);
/*
@@ -2167,7 +2187,7 @@ static void serial8250_put_poll_char(str
* and restore the IER
*/
wait_for_xmitr(up, BOTH_EMPTY);
- serial_port_out(port, UART_IER, ier);
+ serial8250_set_IER(up, ier);
serial8250_rpm_put(up);
}
@@ -2176,8 +2196,10 @@ static void serial8250_put_poll_char(str
int serial8250_do_startup(struct uart_port *port)
{
struct uart_8250_port *up = up_to_u8250p(port);
+ unsigned long cs_flags;
unsigned long flags;
unsigned char lsr, iir;
+ bool is_console;
int retval;
if (!port->fifosize)
@@ -2197,7 +2219,7 @@ int serial8250_do_startup(struct uart_po
up->acr = 0;
serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B);
serial_port_out(port, UART_EFR, UART_EFR_ECB);
- serial_port_out(port, UART_IER, 0);
+ serial8250_set_IER(up, 0);
serial_port_out(port, UART_LCR, 0);
serial_icr_write(up, UART_CSR, 0); /* Reset the UART */
serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B);
@@ -2207,7 +2229,7 @@ int serial8250_do_startup(struct uart_po
if (port->type == PORT_DA830) {
/* Reset the port */
- serial_port_out(port, UART_IER, 0);
+ serial8250_set_IER(up, 0);
serial_port_out(port, UART_DA830_PWREMU_MGMT, 0);
mdelay(10);
@@ -2302,6 +2324,8 @@ int serial8250_do_startup(struct uart_po
if (port->irq && (up->port.flags & UPF_SHARE_IRQ))
up->port.irqflags |= IRQF_SHARED;
+ is_console = uart_console(port);
+
if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) {
unsigned char iir1;
@@ -2318,6 +2342,9 @@ int serial8250_do_startup(struct uart_po
*/
spin_lock_irqsave(&port->lock, flags);
+ if (is_console)
+ printk_cpu_sync_get_irqsave(cs_flags);
+
wait_for_xmitr(up, UART_LSR_THRE);
serial_port_out_sync(port, UART_IER, UART_IER_THRI);
udelay(1); /* allow THRE to set */
@@ -2328,6 +2355,9 @@ int serial8250_do_startup(struct uart_po
iir = serial_port_in(port, UART_IIR);
serial_port_out(port, UART_IER, 0);
+ if (is_console)
+ printk_cpu_sync_put_irqrestore(cs_flags);
+
spin_unlock_irqrestore(&port->lock, flags);
if (port->irqflags & IRQF_SHARED)
@@ -2384,10 +2414,14 @@ int serial8250_do_startup(struct uart_po
* Do a quick test to see if we receive an interrupt when we enable
* the TX irq.
*/
+ if (is_console)
+ printk_cpu_sync_get_irqsave(cs_flags);
serial_port_out(port, UART_IER, UART_IER_THRI);
lsr = serial_port_in(port, UART_LSR);
iir = serial_port_in(port, UART_IIR);
serial_port_out(port, UART_IER, 0);
+ if (is_console)
+ printk_cpu_sync_put_irqrestore(cs_flags);
if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) {
if (!(up->bugs & UART_BUG_TXEN)) {
@@ -2419,7 +2453,7 @@ dont_test_tx_en:
if (up->dma) {
const char *msg = NULL;
- if (uart_console(port))
+ if (is_console)
msg = "forbid DMA for kernel console";
else if (serial8250_request_dma(up))
msg = "failed to request DMA";
@@ -2470,7 +2504,7 @@ void serial8250_do_shutdown(struct uart_
*/
spin_lock_irqsave(&port->lock, flags);
up->ier = 0;
- serial_port_out(port, UART_IER, 0);
+ serial8250_set_IER(up, 0);
spin_unlock_irqrestore(&port->lock, flags);
synchronize_irq(port->irq);
@@ -2836,7 +2870,7 @@ serial8250_do_set_termios(struct uart_po
if (up->capabilities & UART_CAP_RTOIE)
up->ier |= UART_IER_RTOIE;
- serial_port_out(port, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
if (up->capabilities & UART_CAP_EFR) {
unsigned char efr = 0;
@@ -3304,7 +3338,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default
#ifdef CONFIG_SERIAL_8250_CONSOLE
-static void serial8250_console_putchar(struct uart_port *port, unsigned char ch)
+static void serial8250_console_putchar_locked(struct uart_port *port, unsigned char ch)
{
struct uart_8250_port *up = up_to_u8250p(port);
@@ -3312,6 +3346,18 @@ static void serial8250_console_putchar(s
serial_port_out(port, UART_TX, ch);
}
+static void serial8250_console_putchar(struct uart_port *port, unsigned char ch)
+{
+ struct uart_8250_port *up = up_to_u8250p(port);
+ unsigned long flags;
+
+ wait_for_xmitr(up, UART_LSR_THRE);
+
+ printk_cpu_sync_get_irqsave(flags);
+ serial8250_console_putchar_locked(port, ch);
+ printk_cpu_sync_put_irqrestore(flags);
+}
+
/*
* Restore serial console when h/w power-off detected
*/
@@ -3333,6 +3379,32 @@ static void serial8250_console_restore(s
serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
}
+void serial8250_console_write_atomic(struct uart_8250_port *up,
+ const char *s, unsigned int count)
+{
+ struct uart_port *port = &up->port;
+ unsigned long flags;
+ unsigned int ier;
+
+ printk_cpu_sync_get_irqsave(flags);
+
+ touch_nmi_watchdog();
+
+ ier = serial8250_clear_IER(up);
+
+ if (atomic_fetch_inc(&up->console_printing)) {
+ uart_console_write(port, "\n", 1,
+ serial8250_console_putchar_locked);
+ }
+ uart_console_write(port, s, count, serial8250_console_putchar_locked);
+ atomic_dec(&up->console_printing);
+
+ wait_for_xmitr(up, BOTH_EMPTY);
+ serial8250_set_IER(up, ier);
+
+ printk_cpu_sync_put_irqrestore(flags);
+}
+
/*
* Print a string to the serial port using the device FIFO
*
@@ -3378,24 +3450,12 @@ void serial8250_console_write(struct uar
struct uart_port *port = &up->port;
unsigned long flags;
unsigned int ier, use_fifo;
- int locked = 1;
touch_nmi_watchdog();
- if (oops_in_progress)
- locked = spin_trylock_irqsave(&port->lock, flags);
- else
- spin_lock_irqsave(&port->lock, flags);
-
- /*
- * First save the IER then disable the interrupts
- */
- ier = serial_port_in(port, UART_IER);
+ spin_lock_irqsave(&port->lock, flags);
- if (up->capabilities & UART_CAP_UUE)
- serial_port_out(port, UART_IER, UART_IER_UUE);
- else
- serial_port_out(port, UART_IER, 0);
+ ier = serial8250_clear_IER(up);
/* check scratch reg to see if port powered off during system sleep */
if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
@@ -3429,10 +3489,12 @@ void serial8250_console_write(struct uar
*/
!(up->port.flags & UPF_CONS_FLOW);
+ atomic_inc(&up->console_printing);
if (likely(use_fifo))
serial8250_console_fifo_write(up, s, count);
else
uart_console_write(port, s, count, serial8250_console_putchar);
+ atomic_dec(&up->console_printing);
/*
* Finally, wait for transmitter to become empty
@@ -3445,8 +3507,7 @@ void serial8250_console_write(struct uar
if (em485->tx_stopped)
up->rs485_stop_tx(up);
}
-
- serial_port_out(port, UART_IER, ier);
+ serial8250_set_IER(up, ier);
/*
* The receive handling will happen properly because the
@@ -3458,8 +3519,7 @@ void serial8250_console_write(struct uar
if (up->msr_saved_flags)
serial8250_modem_status(up);
- if (locked)
- spin_unlock_irqrestore(&port->lock, flags);
+ spin_unlock_irqrestore(&port->lock, flags);
}
static unsigned int probe_baud(struct uart_port *port)
@@ -3479,6 +3539,7 @@ static unsigned int probe_baud(struct ua
int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
{
+ struct uart_8250_port *up = up_to_u8250p(port);
int baud = 9600;
int bits = 8;
int parity = 'n';
@@ -3488,6 +3549,8 @@ int serial8250_console_setup(struct uart
if (!port->iobase && !port->membase)
return -ENODEV;
+ atomic_set(&up->console_printing, 0);
+
if (options)
uart_parse_options(options, &baud, &parity, &bits, &flow);
else if (probe)
diff -rupN linux-5.19.16.orig/drivers/tty/serial/8250/Kconfig linux-5.19.16/drivers/tty/serial/8250/Kconfig
--- linux-5.19.16.orig/drivers/tty/serial/8250/Kconfig 2022-10-18 17:21:10.672501663 -0400
+++ linux-5.19.16/drivers/tty/serial/8250/Kconfig 2022-10-18 17:21:17.804446694 -0400
@@ -9,6 +9,7 @@ config SERIAL_8250
depends on !S390
select SERIAL_CORE
select SERIAL_MCTRL_GPIO if GPIOLIB
+ select HAVE_ATOMIC_CONSOLE
help
This selects whether you want to include the driver for the standard
serial ports. The standard answer is Y. People who might say N
diff -rupN linux-5.19.16.orig/drivers/tty/serial/amba-pl011.c linux-5.19.16/drivers/tty/serial/amba-pl011.c
--- linux-5.19.16.orig/drivers/tty/serial/amba-pl011.c 2022-10-18 17:21:10.672501663 -0400
+++ linux-5.19.16/drivers/tty/serial/amba-pl011.c 2022-10-18 17:21:17.804446694 -0400
@@ -2308,18 +2308,24 @@ pl011_console_write(struct console *co,
{
struct uart_amba_port *uap = amba_ports[co->index];
unsigned int old_cr = 0, new_cr;
- unsigned long flags;
+ unsigned long flags = 0;
int locked = 1;
clk_enable(uap->clk);
- local_irq_save(flags);
+ /*
+ * local_irq_save(flags);
+ *
+ * This local_irq_save() is nonsense. If we come in via sysrq
+ * handling then interrupts are already disabled. Aside of
+ * that the port.sysrq check is racy on SMP regardless.
+ */
if (uap->port.sysrq)
locked = 0;
else if (oops_in_progress)
- locked = spin_trylock(&uap->port.lock);
+ locked = spin_trylock_irqsave(&uap->port.lock, flags);
else
- spin_lock(&uap->port.lock);
+ spin_lock_irqsave(&uap->port.lock, flags);
/*
* First save the CR then disable the interrupts
@@ -2345,8 +2351,7 @@ pl011_console_write(struct console *co,
pl011_write(old_cr, uap, REG_CR);
if (locked)
- spin_unlock(&uap->port.lock);
- local_irq_restore(flags);
+ spin_unlock_irqrestore(&uap->port.lock, flags);
clk_disable(uap->clk);
}
diff -rupN linux-5.19.16.orig/drivers/tty/serial/omap-serial.c linux-5.19.16/drivers/tty/serial/omap-serial.c
--- linux-5.19.16.orig/drivers/tty/serial/omap-serial.c 2022-10-18 17:21:10.672501663 -0400
+++ linux-5.19.16/drivers/tty/serial/omap-serial.c 2022-10-18 17:21:17.804446694 -0400
@@ -1241,13 +1241,10 @@ serial_omap_console_write(struct console
unsigned int ier;
int locked = 1;
- local_irq_save(flags);
- if (up->port.sysrq)
- locked = 0;
- else if (oops_in_progress)
- locked = spin_trylock(&up->port.lock);
+ if (up->port.sysrq || oops_in_progress)
+ locked = spin_trylock_irqsave(&up->port.lock, flags);
else
- spin_lock(&up->port.lock);
+ spin_lock_irqsave(&up->port.lock, flags);
/*
* First save the IER then disable the interrupts
@@ -1274,8 +1271,7 @@ serial_omap_console_write(struct console
check_modem_status(up);
if (locked)
- spin_unlock(&up->port.lock);
- local_irq_restore(flags);
+ spin_unlock_irqrestore(&up->port.lock, flags);
}
static int __init
diff -rupN linux-5.19.16.orig/drivers/tty/sysrq.c linux-5.19.16/drivers/tty/sysrq.c
--- linux-5.19.16.orig/drivers/tty/sysrq.c 2022-10-18 17:21:10.676501631 -0400
+++ linux-5.19.16/drivers/tty/sysrq.c 2022-10-18 17:21:17.804446694 -0400
@@ -581,6 +581,7 @@ void __handle_sysrq(int key, bool check_
rcu_sysrq_start();
rcu_read_lock();
+ printk_prefer_direct_enter();
/*
* Raise the apparent loglevel to maximum so that the sysrq header
* is shown to provide the user with positive feedback. We do not
@@ -622,6 +623,7 @@ void __handle_sysrq(int key, bool check_
pr_cont("\n");
console_loglevel = orig_log_level;
}
+ printk_prefer_direct_exit();
rcu_read_unlock();
rcu_sysrq_end();
diff -rupN linux-5.19.16.orig/fs/dcache.c linux-5.19.16/fs/dcache.c
--- linux-5.19.16.orig/fs/dcache.c 2022-10-18 17:21:10.732501201 -0400
+++ linux-5.19.16/fs/dcache.c 2022-10-18 17:21:17.804446694 -0400
@@ -2239,6 +2239,7 @@ struct dentry *d_add_ci(struct dentry *d
}
}
res = d_splice_alias(inode, found);
+ d_lookup_done(found);
if (res) {
dput(found);
return res;
@@ -2563,7 +2564,15 @@ EXPORT_SYMBOL(d_rehash);
static inline unsigned start_dir_add(struct inode *dir)
{
-
+ /*
+ * The caller holds a spinlock (dentry::d_lock). On !PREEMPT_RT
+ * kernels spin_lock() implicitly disables preemption, but not on
+ * PREEMPT_RT. So for RT it has to be done explicitly to protect
+ * the sequence count write side critical section against a reader
+ * or another writer preempting, which would result in a live lock.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
for (;;) {
unsigned n = dir->i_dir_seq;
if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
@@ -2572,9 +2581,13 @@ static inline unsigned start_dir_add(str
}
}
-static inline void end_dir_add(struct inode *dir, unsigned n)
+static inline void end_dir_add(struct inode *dir, unsigned int n,
+ wait_queue_head_t *d_wait)
{
smp_store_release(&dir->i_dir_seq, n + 2);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
+ wake_up_all(d_wait);
}
static void d_wait_lookup(struct dentry *dentry)
@@ -2701,32 +2714,50 @@ mismatch:
}
EXPORT_SYMBOL(d_alloc_parallel);
-void __d_lookup_done(struct dentry *dentry)
+/*
+ * - Unhash the dentry
+ * - Retrieve and clear the waitqueue head in dentry
+ * - Return the waitqueue head
+ */
+static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry)
{
- struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent,
- dentry->d_name.hash);
+ wait_queue_head_t *d_wait;
+ struct hlist_bl_head *b;
+
+ lockdep_assert_held(&dentry->d_lock);
+
+ b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash);
hlist_bl_lock(b);
dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
__hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
- wake_up_all(dentry->d_wait);
+ d_wait = dentry->d_wait;
dentry->d_wait = NULL;
hlist_bl_unlock(b);
INIT_HLIST_NODE(&dentry->d_u.d_alias);
INIT_LIST_HEAD(&dentry->d_lru);
+ return d_wait;
+}
+
+void __d_lookup_unhash_wake(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ wake_up_all(__d_lookup_unhash(dentry));
+ spin_unlock(&dentry->d_lock);
}
-EXPORT_SYMBOL(__d_lookup_done);
+EXPORT_SYMBOL(__d_lookup_unhash_wake);
/* inode->i_lock held if inode is non-NULL */
static inline void __d_add(struct dentry *dentry, struct inode *inode)
{
+ wait_queue_head_t *d_wait;
struct inode *dir = NULL;
unsigned n;
spin_lock(&dentry->d_lock);
if (unlikely(d_in_lookup(dentry))) {
dir = dentry->d_parent->d_inode;
n = start_dir_add(dir);
- __d_lookup_done(dentry);
+ d_wait = __d_lookup_unhash(dentry);
}
if (inode) {
unsigned add_flags = d_flags_for_inode(inode);
@@ -2738,7 +2769,7 @@ static inline void __d_add(struct dentry
}
__d_rehash(dentry);
if (dir)
- end_dir_add(dir, n);
+ end_dir_add(dir, n, d_wait);
spin_unlock(&dentry->d_lock);
if (inode)
spin_unlock(&inode->i_lock);
@@ -2885,6 +2916,7 @@ static void __d_move(struct dentry *dent
bool exchange)
{
struct dentry *old_parent, *p;
+ wait_queue_head_t *d_wait;
struct inode *dir = NULL;
unsigned n;
@@ -2915,7 +2947,7 @@ static void __d_move(struct dentry *dent
if (unlikely(d_in_lookup(target))) {
dir = target->d_parent->d_inode;
n = start_dir_add(dir);
- __d_lookup_done(target);
+ d_wait = __d_lookup_unhash(target);
}
write_seqcount_begin(&dentry->d_seq);
@@ -2951,7 +2983,7 @@ static void __d_move(struct dentry *dent
write_seqcount_end(&dentry->d_seq);
if (dir)
- end_dir_add(dir, n);
+ end_dir_add(dir, n, d_wait);
if (dentry->d_parent != old_parent)
spin_unlock(&dentry->d_parent->d_lock);
diff -rupN linux-5.19.16.orig/include/asm-generic/softirq_stack.h linux-5.19.16/include/asm-generic/softirq_stack.h
--- linux-5.19.16.orig/include/asm-generic/softirq_stack.h 2022-10-18 17:21:10.800500677 -0400
+++ linux-5.19.16/include/asm-generic/softirq_stack.h 2022-10-18 17:21:17.804446694 -0400
@@ -2,7 +2,7 @@
#ifndef __ASM_GENERIC_SOFTIRQ_STACK_H
#define __ASM_GENERIC_SOFTIRQ_STACK_H
-#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK
+#if defined(CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK) && !defined(CONFIG_PREEMPT_RT)
void do_softirq_own_stack(void);
#else
static inline void do_softirq_own_stack(void)
diff -rupN linux-5.19.16.orig/include/linux/console.h linux-5.19.16/include/linux/console.h
--- linux-5.19.16.orig/include/linux/console.h 2022-10-18 17:21:10.828500461 -0400
+++ linux-5.19.16/include/linux/console.h 2022-10-18 17:21:17.804446694 -0400
@@ -16,6 +16,7 @@
#include <linux/atomic.h>
#include <linux/types.h>
+#include <linux/mutex.h>
struct vc_data;
struct console_font_op;
@@ -137,9 +138,19 @@ static inline int con_debug_leave(void)
#define CON_BRL (32) /* Used for a braille device */
#define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */
+#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
+struct console_atomic_data {
+ u64 seq;
+ char *text;
+ char *ext_text;
+ char *dropped_text;
+};
+#endif
+
struct console {
char name[16];
void (*write)(struct console *, const char *, unsigned);
+ void (*write_atomic)(struct console *, const char *, unsigned);
int (*read)(struct console *, char *, unsigned);
struct tty_driver *(*device)(struct console *, int *);
void (*unblank)(void);
@@ -152,7 +163,26 @@ struct console {
uint ispeed;
uint ospeed;
u64 seq;
- unsigned long dropped;
+ atomic_long_t dropped;
+#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
+ struct console_atomic_data *atomic_data;
+#endif
+ struct task_struct *thread;
+ bool blocked;
+
+ /*
+ * The per-console lock is used by printing kthreads to synchronize
+ * this console with callers of console_lock(). This is necessary in
+ * order to allow printing kthreads to run in parallel to each other,
+ * while each safely accessing the @blocked field and synchronizing
+ * against direct printing via console_lock/console_unlock.
+ *
+ * Note: For synchronizing against direct printing via
+ * console_trylock/console_unlock, see the static global
+ * variable @console_kthreads_active.
+ */
+ struct mutex lock;
+
void *data;
struct console *next;
};
@@ -167,6 +197,7 @@ extern int console_set_on_cmdline;
extern struct console *early_console;
enum con_flush_mode {
+ CONSOLE_ATOMIC_FLUSH_PENDING,
CONSOLE_FLUSH_PENDING,
CONSOLE_REPLAY_ALL,
};
diff -rupN linux-5.19.16.orig/include/linux/dcache.h linux-5.19.16/include/linux/dcache.h
--- linux-5.19.16.orig/include/linux/dcache.h 2022-10-18 17:21:10.828500461 -0400
+++ linux-5.19.16/include/linux/dcache.h 2022-10-18 17:21:17.804446694 -0400
@@ -349,7 +349,7 @@ static inline void dont_mount(struct den
spin_unlock(&dentry->d_lock);
}
-extern void __d_lookup_done(struct dentry *);
+extern void __d_lookup_unhash_wake(struct dentry *dentry);
static inline int d_in_lookup(const struct dentry *dentry)
{
@@ -358,11 +358,8 @@ static inline int d_in_lookup(const stru
static inline void d_lookup_done(struct dentry *dentry)
{
- if (unlikely(d_in_lookup(dentry))) {
- spin_lock(&dentry->d_lock);
- __d_lookup_done(dentry);
- spin_unlock(&dentry->d_lock);
- }
+ if (unlikely(d_in_lookup(dentry)))
+ __d_lookup_unhash_wake(dentry);
}
extern void dput(struct dentry *);
diff -rupN linux-5.19.16.orig/include/linux/entry-common.h linux-5.19.16/include/linux/entry-common.h
--- linux-5.19.16.orig/include/linux/entry-common.h 2022-10-18 17:21:10.832500430 -0400
+++ linux-5.19.16/include/linux/entry-common.h 2022-10-18 17:21:17.804446694 -0400
@@ -57,9 +57,15 @@
# define ARCH_EXIT_TO_USER_MODE_WORK (0)
#endif
+#ifdef CONFIG_PREEMPT_LAZY
+# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+#else
+# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED)
+#endif
+
#define EXIT_TO_USER_MODE_WORK \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
- _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
+ _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
ARCH_EXIT_TO_USER_MODE_WORK)
/**
diff -rupN linux-5.19.16.orig/include/linux/interrupt.h linux-5.19.16/include/linux/interrupt.h
--- linux-5.19.16.orig/include/linux/interrupt.h 2022-10-18 17:21:10.836500399 -0400
+++ linux-5.19.16/include/linux/interrupt.h 2022-10-18 17:21:17.804446694 -0400
@@ -605,6 +605,35 @@ extern void __raise_softirq_irqoff(unsig
extern void raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq(unsigned int nr);
+#ifdef CONFIG_PREEMPT_RT
+DECLARE_PER_CPU(struct task_struct *, timersd);
+DECLARE_PER_CPU(unsigned long, pending_timer_softirq);
+
+extern void raise_timer_softirq(void);
+extern void raise_hrtimer_softirq(void);
+
+static inline unsigned int local_pending_timers(void)
+{
+ return __this_cpu_read(pending_timer_softirq);
+}
+
+#else
+static inline void raise_timer_softirq(void)
+{
+ raise_softirq(TIMER_SOFTIRQ);
+}
+
+static inline void raise_hrtimer_softirq(void)
+{
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+}
+
+static inline unsigned int local_pending_timers(void)
+{
+ return local_softirq_pending();
+}
+#endif
+
DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
static inline struct task_struct *this_cpu_ksoftirqd(void)
diff -rupN linux-5.19.16.orig/include/linux/irqdesc.h linux-5.19.16/include/linux/irqdesc.h
--- linux-5.19.16.orig/include/linux/irqdesc.h 2022-10-18 17:21:10.840500369 -0400
+++ linux-5.19.16/include/linux/irqdesc.h 2022-10-18 17:21:17.804446694 -0400
@@ -169,6 +169,7 @@ int generic_handle_irq_safe(unsigned int
* conversion failed.
*/
int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq);
+int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq);
int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq);
#endif
diff -rupN linux-5.19.16.orig/include/linux/lockdep.h linux-5.19.16/include/linux/lockdep.h
--- linux-5.19.16.orig/include/linux/lockdep.h 2022-10-18 17:21:10.840500369 -0400
+++ linux-5.19.16/include/linux/lockdep.h 2022-10-18 17:21:17.804446694 -0400
@@ -435,7 +435,6 @@ enum xhlock_context_t {
XHLOCK_CTX_NR,
};
-#define lockdep_init_map_crosslock(m, n, k, s) do {} while (0)
/*
* To initialize a lockdep_map statically use this macro.
* Note that _name must not be NULL.
diff -rupN linux-5.19.16.orig/include/linux/preempt.h linux-5.19.16/include/linux/preempt.h
--- linux-5.19.16.orig/include/linux/preempt.h 2022-10-18 17:21:10.848500307 -0400
+++ linux-5.19.16/include/linux/preempt.h 2022-10-18 17:21:17.804446694 -0400
@@ -196,6 +196,20 @@ extern void preempt_count_sub(int val);
#define preempt_count_inc() preempt_count_add(1)
#define preempt_count_dec() preempt_count_sub(1)
+#ifdef CONFIG_PREEMPT_LAZY
+#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
+#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
+#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
+#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
+#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
+#else
+#define add_preempt_lazy_count(val) do { } while (0)
+#define sub_preempt_lazy_count(val) do { } while (0)
+#define inc_preempt_lazy_count() do { } while (0)
+#define dec_preempt_lazy_count() do { } while (0)
+#define preempt_lazy_count() (0)
+#endif
+
#ifdef CONFIG_PREEMPT_COUNT
#define preempt_disable() \
@@ -204,6 +218,12 @@ do { \
barrier(); \
} while (0)
+#define preempt_lazy_disable() \
+do { \
+ inc_preempt_lazy_count(); \
+ barrier(); \
+} while (0)
+
#define sched_preempt_enable_no_resched() \
do { \
barrier(); \
@@ -235,6 +255,18 @@ do { \
__preempt_schedule(); \
} while (0)
+/*
+ * open code preempt_check_resched() because it is not exported to modules and
+ * used by local_unlock() or bpf_enable_instrumentation().
+ */
+#define preempt_lazy_enable() \
+do { \
+ dec_preempt_lazy_count(); \
+ barrier(); \
+ if (should_resched(0)) \
+ __preempt_schedule(); \
+} while (0)
+
#else /* !CONFIG_PREEMPTION */
#define preempt_enable() \
do { \
@@ -242,6 +274,12 @@ do { \
preempt_count_dec(); \
} while (0)
+#define preempt_lazy_enable() \
+do { \
+ dec_preempt_lazy_count(); \
+ barrier(); \
+} while (0)
+
#define preempt_enable_notrace() \
do { \
barrier(); \
@@ -282,6 +320,9 @@ do { \
#define preempt_enable_notrace() barrier()
#define preemptible() 0
+#define preempt_lazy_disable() barrier()
+#define preempt_lazy_enable() barrier()
+
#endif /* CONFIG_PREEMPT_COUNT */
#ifdef MODULE
@@ -300,7 +341,7 @@ do { \
} while (0)
#define preempt_fold_need_resched() \
do { \
- if (tif_need_resched()) \
+ if (tif_need_resched_now()) \
set_preempt_need_resched(); \
} while (0)
@@ -416,8 +457,15 @@ extern void migrate_enable(void);
#else
-static inline void migrate_disable(void) { }
-static inline void migrate_enable(void) { }
+static inline void migrate_disable(void)
+{
+ preempt_lazy_disable();
+}
+
+static inline void migrate_enable(void)
+{
+ preempt_lazy_enable();
+}
#endif /* CONFIG_SMP */
diff -rupN linux-5.19.16.orig/include/linux/printk.h linux-5.19.16/include/linux/printk.h
--- linux-5.19.16.orig/include/linux/printk.h 2022-10-18 17:21:10.848500307 -0400
+++ linux-5.19.16/include/linux/printk.h 2022-10-18 17:21:17.804446694 -0400
@@ -169,7 +169,11 @@ extern void __printk_safe_exit(void);
#define printk_deferred_enter __printk_safe_enter
#define printk_deferred_exit __printk_safe_exit
+extern void printk_prefer_direct_enter(void);
+extern void printk_prefer_direct_exit(void);
+
extern bool pr_flush(int timeout_ms, bool reset_on_progress);
+extern void try_block_console_kthreads(int timeout_ms);
/*
* Please don't use printk_ratelimit(), because it shares ratelimiting state
@@ -221,11 +225,23 @@ static inline void printk_deferred_exit(
{
}
+static inline void printk_prefer_direct_enter(void)
+{
+}
+
+static inline void printk_prefer_direct_exit(void)
+{
+}
+
static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
{
return true;
}
+static inline void try_block_console_kthreads(int timeout_ms)
+{
+}
+
static inline int printk_ratelimit(void)
{
return 0;
diff -rupN linux-5.19.16.orig/include/linux/sched.h linux-5.19.16/include/linux/sched.h
--- linux-5.19.16.orig/include/linux/sched.h 2022-10-18 17:21:10.852500276 -0400
+++ linux-5.19.16/include/linux/sched.h 2022-10-18 17:21:17.804446694 -0400
@@ -2026,6 +2026,43 @@ static inline int test_tsk_need_resched(
return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
}
+#ifdef CONFIG_PREEMPT_LAZY
+static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+ set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
+}
+
+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+ clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
+}
+
+static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+ return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
+}
+
+static inline int need_resched_lazy(void)
+{
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+}
+
+static inline int need_resched_now(void)
+{
+ return test_thread_flag(TIF_NEED_RESCHED);
+}
+
+#else
+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
+static inline int need_resched_lazy(void) { return 0; }
+
+static inline int need_resched_now(void)
+{
+ return test_thread_flag(TIF_NEED_RESCHED);
+}
+
+#endif
+
/*
* cond_resched() and cond_resched_lock(): latency reduction via
* explicit rescheduling in places that are safe. The return
diff -rupN linux-5.19.16.orig/include/linux/serial_8250.h linux-5.19.16/include/linux/serial_8250.h
--- linux-5.19.16.orig/include/linux/serial_8250.h 2022-10-18 17:21:10.852500276 -0400
+++ linux-5.19.16/include/linux/serial_8250.h 2022-10-18 17:21:17.804446694 -0400
@@ -7,6 +7,7 @@
#ifndef _LINUX_SERIAL_8250_H
#define _LINUX_SERIAL_8250_H
+#include <linux/atomic.h>
#include <linux/serial_core.h>
#include <linux/serial_reg.h>
#include <linux/platform_device.h>
@@ -123,6 +124,8 @@ struct uart_8250_port {
#define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
unsigned char msr_saved_flags;
+ atomic_t console_printing;
+
struct uart_8250_dma *dma;
const struct uart_8250_ops *ops;
@@ -178,6 +181,8 @@ void serial8250_init_port(struct uart_82
void serial8250_set_defaults(struct uart_8250_port *up);
void serial8250_console_write(struct uart_8250_port *up, const char *s,
unsigned int count);
+void serial8250_console_write_atomic(struct uart_8250_port *up, const char *s,
+ unsigned int count);
int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
int serial8250_console_exit(struct uart_port *port);
diff -rupN linux-5.19.16.orig/include/linux/thread_info.h linux-5.19.16/include/linux/thread_info.h
--- linux-5.19.16.orig/include/linux/thread_info.h 2022-10-18 17:21:10.892499968 -0400
+++ linux-5.19.16/include/linux/thread_info.h 2022-10-18 17:21:17.804446694 -0400
@@ -177,7 +177,17 @@ static __always_inline unsigned long rea
clear_ti_thread_flag(task_thread_info(t), TIF_##fl)
#endif /* !CONFIG_GENERIC_ENTRY */
-#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+#ifdef CONFIG_PREEMPT_LAZY
+#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \
+ test_thread_flag(TIF_NEED_RESCHED_LAZY))
+#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED))
+#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY)
+
+#else
+#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED)
+#define tif_need_resched_lazy() 0
+#endif
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
static inline int arch_within_stack_frames(const void * const stack,
diff -rupN linux-5.19.16.orig/include/linux/trace_events.h linux-5.19.16/include/linux/trace_events.h
--- linux-5.19.16.orig/include/linux/trace_events.h 2022-10-18 17:21:10.892499968 -0400
+++ linux-5.19.16/include/linux/trace_events.h 2022-10-18 17:21:17.804446694 -0400
@@ -70,6 +70,7 @@ struct trace_entry {
unsigned char flags;
unsigned char preempt_count;
int pid;
+ unsigned char preempt_lazy_count;
};
#define TRACE_EVENT_TYPE_MAX \
@@ -158,9 +159,10 @@ static inline void tracing_generic_entry
unsigned int trace_ctx)
{
entry->preempt_count = trace_ctx & 0xff;
+ entry->preempt_lazy_count = (trace_ctx >> 16) & 0xff;
entry->pid = current->pid;
entry->type = type;
- entry->flags = trace_ctx >> 16;
+ entry->flags = trace_ctx >> 24;
}
unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status);
@@ -171,7 +173,13 @@ enum trace_flag_type {
TRACE_FLAG_NEED_RESCHED = 0x04,
TRACE_FLAG_HARDIRQ = 0x08,
TRACE_FLAG_SOFTIRQ = 0x10,
+#ifdef CONFIG_PREEMPT_LAZY
+ TRACE_FLAG_PREEMPT_RESCHED = 0x00,
+ TRACE_FLAG_NEED_RESCHED_LAZY = 0x20,
+#else
+ TRACE_FLAG_NEED_RESCHED_LAZY = 0x00,
TRACE_FLAG_PREEMPT_RESCHED = 0x20,
+#endif
TRACE_FLAG_NMI = 0x40,
TRACE_FLAG_BH_OFF = 0x80,
};
diff -rupN linux-5.19.16.orig/init/Kconfig linux-5.19.16/init/Kconfig
--- linux-5.19.16.orig/init/Kconfig 2022-10-18 17:21:10.948499537 -0400
+++ linux-5.19.16/init/Kconfig 2022-10-18 17:21:17.808446663 -0400
@@ -1574,6 +1574,10 @@ config PRINTK
very difficult to diagnose system problems, saying N here is
strongly discouraged.
+config HAVE_ATOMIC_CONSOLE
+ bool
+ default n
+
config BUG
bool "BUG() support" if EXPERT
default y
diff -rupN linux-5.19.16.orig/kernel/entry/common.c linux-5.19.16/kernel/entry/common.c
--- linux-5.19.16.orig/kernel/entry/common.c 2022-10-18 17:21:10.956499475 -0400
+++ linux-5.19.16/kernel/entry/common.c 2022-10-18 17:21:17.808446663 -0400
@@ -153,7 +153,7 @@ static unsigned long exit_to_user_mode_l
local_irq_enable_exit_to_user(ti_work);
- if (ti_work & _TIF_NEED_RESCHED)
+ if (ti_work & _TIF_NEED_RESCHED_MASK)
schedule();
if (ti_work & _TIF_UPROBE)
@@ -381,7 +381,7 @@ void raw_irqentry_exit_cond_resched(void
rcu_irq_exit_check_preempt();
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
WARN_ON_ONCE(!on_thread_stack());
- if (need_resched())
+ if (should_resched(0))
preempt_schedule_irq();
}
}
diff -rupN linux-5.19.16.orig/kernel/hung_task.c linux-5.19.16/kernel/hung_task.c
--- linux-5.19.16.orig/kernel/hung_task.c 2022-10-18 17:21:10.952499506 -0400
+++ linux-5.19.16/kernel/hung_task.c 2022-10-18 17:21:17.808446663 -0400
@@ -127,6 +127,8 @@ static void check_hung_task(struct task_
* complain:
*/
if (sysctl_hung_task_warnings) {
+ printk_prefer_direct_enter();
+
if (sysctl_hung_task_warnings > 0)
sysctl_hung_task_warnings--;
pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
@@ -142,6 +144,8 @@ static void check_hung_task(struct task_
if (sysctl_hung_task_all_cpu_backtrace)
hung_task_show_all_bt = true;
+
+ printk_prefer_direct_exit();
}
touch_nmi_watchdog();
@@ -204,12 +208,17 @@ static void check_hung_uninterruptible_t
}
unlock:
rcu_read_unlock();
- if (hung_task_show_lock)
+ if (hung_task_show_lock) {
+ printk_prefer_direct_enter();
debug_show_all_locks();
+ printk_prefer_direct_exit();
+ }
if (hung_task_show_all_bt) {
hung_task_show_all_bt = false;
+ printk_prefer_direct_enter();
trigger_all_cpu_backtrace();
+ printk_prefer_direct_exit();
}
if (hung_task_call_panic)
diff -rupN linux-5.19.16.orig/kernel/irq/irqdesc.c linux-5.19.16/kernel/irq/irqdesc.c
--- linux-5.19.16.orig/kernel/irq/irqdesc.c 2022-10-18 17:21:10.960499444 -0400
+++ linux-5.19.16/kernel/irq/irqdesc.c 2022-10-18 17:21:17.808446663 -0400
@@ -705,6 +705,30 @@ int generic_handle_domain_irq(struct irq
}
EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
+ /**
+ * generic_handle_irq_safe - Invoke the handler for a HW irq belonging
+ * to a domain from any context.
+ * @domain: The domain where to perform the lookup
+ * @hwirq: The HW irq number to convert to a logical one
+ *
+ * Returns: 0 on success, a negative value on error.
+ *
+ * This function can be called from any context (IRQ or process context). It
+ * will report an error if not invoked from IRQ context and the irq has been
+ * marked to enforce IRQ-context only.
+ */
+int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq)
+{
+ unsigned long flags;
+ int ret;
+
+ local_irq_save(flags);
+ ret = handle_irq_desc(irq_resolve_mapping(domain, hwirq));
+ local_irq_restore(flags);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(generic_handle_domain_irq_safe);
+
/**
* generic_handle_domain_nmi - Invoke the handler for a HW nmi belonging
* to a domain.
diff -rupN linux-5.19.16.orig/kernel/Kconfig.preempt linux-5.19.16/kernel/Kconfig.preempt
--- linux-5.19.16.orig/kernel/Kconfig.preempt 2022-10-18 17:21:10.952499506 -0400
+++ linux-5.19.16/kernel/Kconfig.preempt 2022-10-18 17:21:17.808446663 -0400
@@ -1,5 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
+config HAVE_PREEMPT_LAZY
+ bool
+
+config PREEMPT_LAZY
+ def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT
+
config PREEMPT_NONE_BUILD
bool
diff -rupN linux-5.19.16.orig/kernel/ksysfs.c linux-5.19.16/kernel/ksysfs.c
--- linux-5.19.16.orig/kernel/ksysfs.c 2022-10-18 17:21:10.952499506 -0400
+++ linux-5.19.16/kernel/ksysfs.c 2022-10-18 17:21:17.808446663 -0400
@@ -137,6 +137,15 @@ KERNEL_ATTR_RO(vmcoreinfo);
#endif /* CONFIG_CRASH_CORE */
+#if defined(CONFIG_PREEMPT_RT)
+static ssize_t realtime_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", 1);
+}
+KERNEL_ATTR_RO(realtime);
+#endif
+
/* whether file capabilities are enabled */
static ssize_t fscaps_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
@@ -228,6 +237,9 @@ static struct attribute * kernel_attrs[]
&rcu_expedited_attr.attr,
&rcu_normal_attr.attr,
#endif
+#ifdef CONFIG_PREEMPT_RT
+ &realtime_attr.attr,
+#endif
NULL
};
diff -rupN linux-5.19.16.orig/kernel/panic.c linux-5.19.16/kernel/panic.c
--- linux-5.19.16.orig/kernel/panic.c 2022-10-18 17:21:10.952499506 -0400
+++ linux-5.19.16/kernel/panic.c 2022-10-18 17:21:17.808446663 -0400
@@ -257,7 +257,6 @@ void panic(const char *fmt, ...)
panic_smp_self_stop();
console_verbose();
- bust_spinlocks(1);
va_start(args, fmt);
len = vscnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
@@ -274,6 +273,11 @@ void panic(const char *fmt, ...)
dump_stack();
#endif
+ /* If atomic consoles are available, flush the kernel log. */
+ console_flush_on_panic(CONSOLE_ATOMIC_FLUSH_PENDING);
+
+ bust_spinlocks(1);
+
/*
* If kgdb is enabled, give it a chance to run before we stop all
* the other CPUs or else we won't be able to debug processes left
@@ -297,6 +301,7 @@ void panic(const char *fmt, ...)
* unfortunately means it may not be hardened to work in a
* panic situation.
*/
+ try_block_console_kthreads(10000);
smp_send_stop();
} else {
/*
@@ -304,6 +309,7 @@ void panic(const char *fmt, ...)
* kmsg_dump, we will need architecture dependent extra
* works in addition to stopping other CPUs.
*/
+ try_block_console_kthreads(10000);
crash_smp_send_stop();
}
@@ -603,6 +609,8 @@ void __warn(const char *file, int line,
{
disable_trace_on_warning();
+ printk_prefer_direct_enter();
+
if (file)
pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n",
raw_smp_processor_id(), current->pid, file, line,
@@ -632,6 +640,8 @@ void __warn(const char *file, int line,
/* Just a warning, don't kill lockdep. */
add_taint(taint, LOCKDEP_STILL_OK);
+
+ printk_prefer_direct_exit();
}
#ifndef __WARN_FLAGS
diff -rupN linux-5.19.16.orig/kernel/printk/internal.h linux-5.19.16/kernel/printk/internal.h
--- linux-5.19.16.orig/kernel/printk/internal.h 2022-10-18 17:21:10.960499444 -0400
+++ linux-5.19.16/kernel/printk/internal.h 2022-10-18 17:21:17.808446663 -0400
@@ -20,6 +20,8 @@ enum printk_info_flags {
LOG_CONT = 8, /* text is a fragment of a continuation line */
};
+extern bool block_console_kthreads;
+
__printf(4, 0)
int vprintk_store(int facility, int level,
const struct dev_printk_info *dev_info,
diff -rupN linux-5.19.16.orig/kernel/printk/printk.c linux-5.19.16/kernel/printk/printk.c
--- linux-5.19.16.orig/kernel/printk/printk.c 2022-10-18 17:21:10.960499444 -0400
+++ linux-5.19.16/kernel/printk/printk.c 2022-10-18 17:21:17.808446663 -0400
@@ -44,6 +44,7 @@
#include <linux/irq_work.h>
#include <linux/ctype.h>
#include <linux/uio.h>
+#include <linux/clocksource.h>
#include <linux/sched/clock.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
@@ -224,6 +225,36 @@ int devkmsg_sysctl_set_loglvl(struct ctl
static int nr_ext_console_drivers;
/*
+ * Used to synchronize printing kthreads against direct printing via
+ * console_trylock/console_unlock.
+ *
+ * Values:
+ * -1 = console kthreads atomically blocked (via global trylock)
+ * 0 = no kthread printing, console not locked (via trylock)
+ * >0 = kthread(s) actively printing
+ *
+ * Note: For synchronizing against direct printing via
+ * console_lock/console_unlock, see the @lock variable in
+ * struct console.
+ */
+static atomic_t console_kthreads_active = ATOMIC_INIT(0);
+
+#define console_kthreads_atomic_tryblock() \
+ (atomic_cmpxchg(&console_kthreads_active, 0, -1) == 0)
+#define console_kthreads_atomic_unblock() \
+ atomic_cmpxchg(&console_kthreads_active, -1, 0)
+#define console_kthreads_atomically_blocked() \
+ (atomic_read(&console_kthreads_active) == -1)
+
+#define console_kthread_printing_tryenter() \
+ atomic_inc_unless_negative(&console_kthreads_active)
+#define console_kthread_printing_exit() \
+ atomic_dec(&console_kthreads_active)
+
+/* Block console kthreads to avoid processing new messages. */
+bool block_console_kthreads;
+
+/*
* Helper macros to handle lockdep when locking/unlocking console_sem. We use
* macros instead of functions so that _RET_IP_ contains useful information.
*/
@@ -271,14 +302,49 @@ static bool panic_in_progress(void)
}
/*
- * This is used for debugging the mess that is the VT code by
- * keeping track if we have the console semaphore held. It's
- * definitely not the perfect debug tool (we don't know if _WE_
- * hold it and are racing, but it helps tracking those weird code
- * paths in the console code where we end up in places I want
- * locked without the console semaphore held).
+ * Tracks whether kthread printers are all blocked. A value of true implies
+ * that the console is locked via console_lock() or the console is suspended.
+ * Writing to this variable requires holding @console_sem.
+ */
+static bool console_kthreads_blocked;
+
+/*
+ * Block all kthread printers from a schedulable context.
+ *
+ * Requires holding @console_sem.
*/
-static int console_locked, console_suspended;
+static void console_kthreads_block(void)
+{
+ struct console *con;
+
+ for_each_console(con) {
+ mutex_lock(&con->lock);
+ con->blocked = true;
+ mutex_unlock(&con->lock);
+ }
+
+ console_kthreads_blocked = true;
+}
+
+/*
+ * Unblock all kthread printers from a schedulable context.
+ *
+ * Requires holding @console_sem.
+ */
+static void console_kthreads_unblock(void)
+{
+ struct console *con;
+
+ for_each_console(con) {
+ mutex_lock(&con->lock);
+ con->blocked = false;
+ mutex_unlock(&con->lock);
+ }
+
+ console_kthreads_blocked = false;
+}
+
+static int console_suspended;
/*
* Array of consoles built from command line options (console=)
@@ -361,7 +427,75 @@ static int console_msg_format = MSG_FORM
/* syslog_lock protects syslog_* variables and write access to clear_seq. */
static DEFINE_MUTEX(syslog_lock);
+/*
+ * A flag to signify if printk_activate_kthreads() has already started the
+ * kthread printers. If true, any later registered consoles must start their
+ * own kthread directly. The flag is write protected by the console_lock.
+ */
+static bool printk_kthreads_available;
+
#ifdef CONFIG_PRINTK
+static atomic_t printk_prefer_direct = ATOMIC_INIT(0);
+
+/**
+ * printk_prefer_direct_enter - cause printk() calls to attempt direct
+ * printing to all enabled consoles
+ *
+ * Since it is not possible to call into the console printing code from any
+ * context, there is no guarantee that direct printing will occur.
+ *
+ * This globally effects all printk() callers.
+ *
+ * Context: Any context.
+ */
+void printk_prefer_direct_enter(void)
+{
+ atomic_inc(&printk_prefer_direct);
+}
+
+/**
+ * printk_prefer_direct_exit - restore printk() behavior
+ *
+ * Context: Any context.
+ */
+void printk_prefer_direct_exit(void)
+{
+ WARN_ON(atomic_dec_if_positive(&printk_prefer_direct) < 0);
+}
+
+/*
+ * Calling printk() always wakes kthread printers so that they can
+ * flush the new message to their respective consoles. Also, if direct
+ * printing is allowed, printk() tries to flush the messages directly.
+ *
+ * Direct printing is allowed in situations when the kthreads
+ * are not available or the system is in a problematic state.
+ *
+ * See the implementation about possible races.
+ */
+static inline bool allow_direct_printing(void)
+{
+ /*
+ * Checking kthread availability is a possible race because the
+ * kthread printers can become permanently disabled during runtime.
+ * However, doing that requires holding the console_lock, so any
+ * pending messages will be direct printed by console_unlock().
+ */
+ if (!printk_kthreads_available)
+ return true;
+
+ /*
+ * Prefer direct printing when the system is in a problematic state.
+ * The context that sets this state will always see the updated value.
+ * The other contexts do not care. Anyway, direct printing is just a
+ * best effort. The direct output is only possible when console_lock
+ * is not already taken and no kthread printers are actively printing.
+ */
+ return (system_state > SYSTEM_RUNNING ||
+ oops_in_progress ||
+ atomic_read(&printk_prefer_direct));
+}
+
DECLARE_WAIT_QUEUE_HEAD(log_wait);
/* All 3 protected by @syslog_lock. */
/* the next printk record to read by syslog(READ) or /proc/kmsg */
@@ -1850,6 +1984,7 @@ static int console_lock_spinning_disable
return 1;
}
+#if !IS_ENABLED(CONFIG_PREEMPT_RT)
/**
* console_trylock_spinning - try to get console_lock by busy waiting
*
@@ -1923,6 +2058,7 @@ static int console_trylock_spinning(void
return 1;
}
+#endif /* CONFIG_PREEMPT_RT */
/*
* Call the specified console driver, asking it to write out the specified
@@ -1930,19 +2066,28 @@ static int console_trylock_spinning(void
* dropped, a dropped message will be written out first.
*/
static void call_console_driver(struct console *con, const char *text, size_t len,
- char *dropped_text)
+ char *dropped_text, bool atomic_printing)
{
+ unsigned long dropped = 0;
size_t dropped_len;
- if (con->dropped && dropped_text) {
+ if (dropped_text)
+ dropped = atomic_long_xchg_relaxed(&con->dropped, 0);
+
+ if (dropped) {
dropped_len = snprintf(dropped_text, DROPPED_TEXT_MAX,
"** %lu printk messages dropped **\n",
- con->dropped);
- con->dropped = 0;
- con->write(con, dropped_text, dropped_len);
+ dropped);
+ if (atomic_printing)
+ con->write_atomic(con, dropped_text, dropped_len);
+ else
+ con->write(con, dropped_text, dropped_len);
}
- con->write(con, text, len);
+ if (atomic_printing)
+ con->write_atomic(con, text, len);
+ else
+ con->write(con, text, len);
}
/*
@@ -2252,10 +2397,22 @@ asmlinkage int vprintk_emit(int facility
printed_len = vprintk_store(facility, level, dev_info, fmt, args);
/* If called from the scheduler, we can not call up(). */
- if (!in_sched) {
+ if (!in_sched && allow_direct_printing()) {
+#if IS_ENABLED(CONFIG_PREEMPT_RT)
+ /*
+ * Use the non-spinning trylock since PREEMPT_RT does not
+ * support console lock handovers.
+ *
+ * Direct printing will most likely involve taking spinlocks.
+ * For PREEMPT_RT, this is only allowed if in a preemptible
+ * context.
+ */
+ if (preemptible() && console_trylock())
+ console_unlock();
+#else
/*
* The caller may be holding system-critical or
- * timing-sensitive locks. Disable preemption during
+ * timing-sensitive locks. Disable preemption during direct
* printing of all remaining records to all consoles so that
* this context can return as soon as possible. Hopefully
* another printk() caller will take over the printing.
@@ -2270,6 +2427,7 @@ asmlinkage int vprintk_emit(int facility
if (console_trylock_spinning())
console_unlock();
preempt_enable();
+#endif
}
wake_up_klogd();
@@ -2296,8 +2454,80 @@ asmlinkage __visible int _printk(const c
}
EXPORT_SYMBOL(_printk);
+#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
+static void __free_atomic_data(struct console_atomic_data *d)
+{
+ kfree(d->text);
+ kfree(d->ext_text);
+ kfree(d->dropped_text);
+}
+
+static void free_atomic_data(struct console_atomic_data *d)
+{
+ int count = 1;
+ int i;
+
+ if (!d)
+ return;
+
+#ifdef CONFIG_HAVE_NMI
+ count = 2;
+#endif
+
+ for (i = 0; i < count; i++)
+ __free_atomic_data(&d[i]);
+ kfree(d);
+}
+
+static int __alloc_atomic_data(struct console_atomic_data *d, short flags)
+{
+ d->text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
+ if (!d->text)
+ return -1;
+
+ if (flags & CON_EXTENDED) {
+ d->ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
+ if (!d->ext_text)
+ return -1;
+ } else {
+ d->dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL);
+ if (!d->dropped_text)
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct console_atomic_data *alloc_atomic_data(short flags)
+{
+ struct console_atomic_data *d;
+ int count = 1;
+ int i;
+
+#ifdef CONFIG_HAVE_NMI
+ count = 2;
+#endif
+
+ d = kzalloc(sizeof(*d) * count, GFP_KERNEL);
+ if (!d)
+ goto err_out;
+
+ for (i = 0; i < count; i++) {
+ if (__alloc_atomic_data(&d[i], flags) != 0)
+ goto err_out;
+ }
+
+ return d;
+err_out:
+ free_atomic_data(d);
+ return NULL;
+}
+#endif /* CONFIG_HAVE_ATOMIC_CONSOLE */
+
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress);
+static void printk_start_kthread(struct console *con);
+
#else /* CONFIG_PRINTK */
#define CONSOLE_LOG_MAX 0
@@ -2308,6 +2538,8 @@ static bool __pr_flush(struct console *c
#define prb_first_valid_seq(rb) 0
#define prb_next_seq(rb) 0
+#define free_atomic_data(d)
+
static u64 syslog_seq;
static size_t record_print_text(const struct printk_record *r,
@@ -2326,11 +2558,13 @@ static ssize_t msg_print_ext_body(char *
static void console_lock_spinning_enable(void) { }
static int console_lock_spinning_disable_and_check(void) { return 0; }
static void call_console_driver(struct console *con, const char *text, size_t len,
- char *dropped_text)
+ char *dropped_text, bool atomic_printing)
{
}
static bool suppress_message_printing(int level) { return false; }
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; }
+static void printk_start_kthread(struct console *con) { }
+static bool allow_direct_printing(void) { return true; }
#endif /* CONFIG_PRINTK */
@@ -2549,6 +2783,14 @@ static int console_cpu_notify(unsigned i
/* If trylock fails, someone else is doing the printing */
if (console_trylock())
console_unlock();
+ else {
+ /*
+ * If a new CPU comes online, the conditions for
+ * printer_should_wake() may have changed for some
+ * kthread printer with !CON_ANYTIME.
+ */
+ wake_up_klogd();
+ }
}
return 0;
}
@@ -2568,7 +2810,7 @@ void console_lock(void)
down_console_sem();
if (console_suspended)
return;
- console_locked = 1;
+ console_kthreads_block();
console_may_schedule = 1;
}
EXPORT_SYMBOL(console_lock);
@@ -2589,15 +2831,30 @@ int console_trylock(void)
up_console_sem();
return 0;
}
- console_locked = 1;
+ if (!console_kthreads_atomic_tryblock()) {
+ up_console_sem();
+ return 0;
+ }
console_may_schedule = 0;
return 1;
}
EXPORT_SYMBOL(console_trylock);
+/*
+ * This is used to help to make sure that certain paths within the VT code are
+ * running with the console lock held. It is definitely not the perfect debug
+ * tool (it is not known if the VT code is the task holding the console lock),
+ * but it helps tracking those weird code paths in the console code such as
+ * when the console is suspended: where the console is not locked but no
+ * console printing may occur.
+ *
+ * Note: This returns true when the console is suspended but is not locked.
+ * This is intentional because the VT code must consider that situation
+ * the same as if the console was locked.
+ */
int is_console_locked(void)
{
- return console_locked;
+ return (console_kthreads_blocked || atomic_read(&console_kthreads_active));
}
EXPORT_SYMBOL(is_console_locked);
@@ -2620,18 +2877,9 @@ static bool abandon_console_lock_in_pani
return atomic_read(&panic_cpu) != raw_smp_processor_id();
}
-/*
- * Check if the given console is currently capable and allowed to print
- * records.
- *
- * Requires the console_lock.
- */
-static inline bool console_is_usable(struct console *con)
+static inline bool __console_is_usable(short flags)
{
- if (!(con->flags & CON_ENABLED))
- return false;
-
- if (!con->write)
+ if (!(flags & CON_ENABLED))
return false;
/*
@@ -2640,18 +2888,116 @@ static inline bool console_is_usable(str
* cope (CON_ANYTIME) don't call them until this CPU is officially up.
*/
if (!cpu_online(raw_smp_processor_id()) &&
- !(con->flags & CON_ANYTIME))
+ !(flags & CON_ANYTIME))
return false;
return true;
}
+/*
+ * Check if the given console is currently capable and allowed to print
+ * records.
+ *
+ * Requires holding the console_lock.
+ */
+static inline bool console_is_usable(struct console *con, bool atomic_printing)
+{
+ if (atomic_printing) {
+#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
+ if (!con->write_atomic)
+ return false;
+ if (!con->atomic_data)
+ return false;
+#else
+ return false;
+#endif
+ } else if (!con->write) {
+ return false;
+ }
+
+ return __console_is_usable(con->flags);
+}
+
static void __console_unlock(void)
{
- console_locked = 0;
+ /*
+ * Depending on whether console_lock() or console_trylock() was used,
+ * appropriately allow the kthread printers to continue.
+ */
+ if (console_kthreads_blocked)
+ console_kthreads_unblock();
+ else
+ console_kthreads_atomic_unblock();
+
+ /*
+ * New records may have arrived while the console was locked.
+ * Wake the kthread printers to print them.
+ */
+ wake_up_klogd();
+
up_console_sem();
}
+static u64 read_console_seq(struct console *con)
+{
+#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
+ unsigned long flags;
+ u64 seq2;
+ u64 seq;
+
+ if (!con->atomic_data)
+ return con->seq;
+
+ printk_cpu_sync_get_irqsave(flags);
+
+ seq = con->seq;
+ seq2 = con->atomic_data[0].seq;
+ if (seq2 > seq)
+ seq = seq2;
+#ifdef CONFIG_HAVE_NMI
+ seq2 = con->atomic_data[1].seq;
+ if (seq2 > seq)
+ seq = seq2;
+#endif
+
+ printk_cpu_sync_put_irqrestore(flags);
+
+ return seq;
+#else /* CONFIG_HAVE_ATOMIC_CONSOLE */
+ return con->seq;
+#endif
+}
+
+static void write_console_seq(struct console *con, u64 val, bool atomic_printing)
+{
+#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
+ unsigned long flags;
+ u64 *seq;
+
+ if (!con->atomic_data) {
+ con->seq = val;
+ return;
+ }
+
+ printk_cpu_sync_get_irqsave(flags);
+
+ if (atomic_printing) {
+ seq = &con->atomic_data[0].seq;
+#ifdef CONFIG_HAVE_NMI
+ if (in_nmi())
+ seq = &con->atomic_data[1].seq;
+#endif
+ } else {
+ seq = &con->seq;
+ }
+ *seq = val;
+
+ printk_cpu_sync_put_irqrestore(flags);
+#else /* CONFIG_HAVE_ATOMIC_CONSOLE */
+ con->seq = val;
+#endif
+}
+
/*
* Print one record for the given console. The record printed is whatever
* record is the next available record for the given console.
@@ -2664,36 +3010,47 @@ static void __console_unlock(void)
* If dropped messages should be printed, @dropped_text is a buffer of size
* DROPPED_TEXT_MAX. Otherwise @dropped_text must be NULL.
*
+ * @atomic_printing specifies if atomic printing should be used.
+ *
* @handover will be set to true if a printk waiter has taken over the
* console_lock, in which case the caller is no longer holding the
- * console_lock. Otherwise it is set to false.
+ * console_lock. Otherwise it is set to false. A NULL pointer may be provided
+ * to disable allowing the console_lock to be taken over by a printk waiter.
*
* Returns false if the given console has no next record to print, otherwise
* true.
*
- * Requires the console_lock.
+ * Requires the console_lock if @handover is non-NULL.
+ * Requires con->lock otherwise.
*/
-static bool console_emit_next_record(struct console *con, char *text, char *ext_text,
- char *dropped_text, bool *handover)
+static bool __console_emit_next_record(struct console *con, char *text, char *ext_text,
+ char *dropped_text, bool atomic_printing,
+ bool *handover)
{
- static int panic_console_dropped;
+ static atomic_t panic_console_dropped = ATOMIC_INIT(0);
struct printk_info info;
struct printk_record r;
unsigned long flags;
char *write_text;
size_t len;
+ u64 seq;
prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
- *handover = false;
+ if (handover)
+ *handover = false;
- if (!prb_read_valid(prb, con->seq, &r))
+ seq = read_console_seq(con);
+
+ if (!prb_read_valid(prb, seq, &r))
return false;
- if (con->seq != r.info->seq) {
- con->dropped += r.info->seq - con->seq;
- con->seq = r.info->seq;
- if (panic_in_progress() && panic_console_dropped++ > 10) {
+ if (seq != r.info->seq) {
+ atomic_long_add((unsigned long)(r.info->seq - seq), &con->dropped);
+ write_console_seq(con, r.info->seq, atomic_printing);
+ seq = r.info->seq;
+ if (panic_in_progress() &&
+ atomic_fetch_inc_relaxed(&panic_console_dropped) > 10) {
suppress_panic_printk = 1;
pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n");
}
@@ -2701,7 +3058,7 @@ static bool console_emit_next_record(str
/* Skip record that has level above the console loglevel. */
if (suppress_message_printing(r.info->level)) {
- con->seq++;
+ write_console_seq(con, seq + 1, atomic_printing);
goto skip;
}
@@ -2715,32 +3072,66 @@ static bool console_emit_next_record(str
len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
}
- /*
- * While actively printing out messages, if another printk()
- * were to occur on another CPU, it may wait for this one to
- * finish. This task can not be preempted if there is a
- * waiter waiting to take over.
- *
- * Interrupts are disabled because the hand over to a waiter
- * must not be interrupted until the hand over is completed
- * (@console_waiter is cleared).
- */
- printk_safe_enter_irqsave(flags);
- console_lock_spinning_enable();
+ if (handover) {
+ /*
+ * While actively printing out messages, if another printk()
+ * were to occur on another CPU, it may wait for this one to
+ * finish. This task can not be preempted if there is a
+ * waiter waiting to take over.
+ *
+ * Interrupts are disabled because the hand over to a waiter
+ * must not be interrupted until the hand over is completed
+ * (@console_waiter is cleared).
+ */
+ printk_safe_enter_irqsave(flags);
+ console_lock_spinning_enable();
- stop_critical_timings(); /* don't trace print latency */
- call_console_driver(con, write_text, len, dropped_text);
- start_critical_timings();
+ /* don't trace irqsoff print latency */
+ stop_critical_timings();
+ }
- con->seq++;
+ call_console_driver(con, write_text, len, dropped_text, atomic_printing);
- *handover = console_lock_spinning_disable_and_check();
- printk_safe_exit_irqrestore(flags);
+ write_console_seq(con, seq + 1, atomic_printing);
+
+ if (handover) {
+ start_critical_timings();
+ *handover = console_lock_spinning_disable_and_check();
+ printk_safe_exit_irqrestore(flags);
+ }
skip:
return true;
}
/*
+ * Print a record for a given console, but allow another printk() caller to
+ * take over the console_lock and continue printing.
+ *
+ * Requires the console_lock, but depending on @handover after the call, the
+ * caller may no longer have the console_lock.
+ *
+ * See __console_emit_next_record() for argument and return details.
+ */
+static bool console_emit_next_record_transferable(struct console *con, char *text, char *ext_text,
+ char *dropped_text, bool *handover)
+{
+ /*
+ * Handovers are only supported if threaded printers are atomically
+ * blocked. The context taking over the console_lock may be atomic.
+ *
+ * PREEMPT_RT also does not support handovers because the spinning
+ * waiter can cause large latencies.
+ */
+ if (!console_kthreads_atomically_blocked() ||
+ IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ *handover = false;
+ handover = NULL;
+ }
+
+ return __console_emit_next_record(con, text, ext_text, dropped_text, false, handover);
+}
+
+/*
* Print out all remaining records to all consoles.
*
* @do_cond_resched is set by the caller. It can be true only in schedulable
@@ -2758,8 +3149,8 @@ skip:
* were flushed to all usable consoles. A returned false informs the caller
* that everything was not flushed (either there were no usable consoles or
* another context has taken over printing or it is a panic situation and this
- * is not the panic CPU). Regardless the reason, the caller should assume it
- * is not useful to immediately try again.
+ * is not the panic CPU or direct printing is not preferred). Regardless the
+ * reason, the caller should assume it is not useful to immediately try again.
*
* Requires the console_lock.
*/
@@ -2776,24 +3167,26 @@ static bool console_flush_all(bool do_co
*handover = false;
do {
+ /* Let the kthread printers do the work if they can. */
+ if (!allow_direct_printing())
+ return false;
+
any_progress = false;
for_each_console(con) {
bool progress;
- if (!console_is_usable(con))
+ if (!console_is_usable(con, false))
continue;
any_usable = true;
if (con->flags & CON_EXTENDED) {
/* Extended consoles do not print "dropped messages". */
- progress = console_emit_next_record(con, &text[0],
- &ext_text[0], NULL,
- handover);
+ progress = console_emit_next_record_transferable(con, &text[0],
+ &ext_text[0], NULL, handover);
} else {
- progress = console_emit_next_record(con, &text[0],
- NULL, &dropped_text[0],
- handover);
+ progress = console_emit_next_record_transferable(con, &text[0],
+ NULL, &dropped_text[0], handover);
}
if (*handover)
return false;
@@ -2818,6 +3211,68 @@ static bool console_flush_all(bool do_co
return any_usable;
}
+#if defined(CONFIG_HAVE_ATOMIC_CONSOLE) && defined(CONFIG_PRINTK)
+static bool console_emit_next_record(struct console *con, char *text, char *ext_text,
+ char *dropped_text, bool atomic_printing);
+
+static void atomic_console_flush_all(void)
+{
+ unsigned long flags;
+ struct console *con;
+ bool any_progress;
+ int index = 0;
+
+ if (console_suspended)
+ return;
+
+#ifdef CONFIG_HAVE_NMI
+ if (in_nmi())
+ index = 1;
+#endif
+
+ printk_cpu_sync_get_irqsave(flags);
+
+ do {
+ any_progress = false;
+
+ for_each_console(con) {
+ bool progress;
+
+ if (!console_is_usable(con, true))
+ continue;
+
+ if (con->flags & CON_EXTENDED) {
+ /* Extended consoles do not print "dropped messages". */
+ progress = console_emit_next_record(con,
+ &con->atomic_data->text[index],
+ &con->atomic_data->ext_text[index],
+ NULL,
+ true);
+ } else {
+ progress = console_emit_next_record(con,
+ &con->atomic_data->text[index],
+ NULL,
+ &con->atomic_data->dropped_text[index],
+ true);
+ }
+
+ if (!progress)
+ continue;
+ any_progress = true;
+
+ touch_softlockup_watchdog_sync();
+ clocksource_touch_watchdog();
+ rcu_cpu_stall_reset();
+ touch_nmi_watchdog();
+ }
+ } while (any_progress);
+
+ printk_cpu_sync_put_irqrestore(flags);
+}
+#else /* CONFIG_HAVE_ATOMIC_CONSOLE && CONFIG_PRINTK */
+#define atomic_console_flush_all()
+#endif
+
/**
* console_unlock - unlock the console system
*
@@ -2908,10 +3363,13 @@ void console_unblank(void)
if (oops_in_progress) {
if (down_trylock_console_sem() != 0)
return;
+ if (!console_kthreads_atomic_tryblock()) {
+ up_console_sem();
+ return;
+ }
} else
console_lock();
- console_locked = 1;
console_may_schedule = 0;
for_each_console(c)
if ((c->flags & CON_ENABLED) && c->unblank)
@@ -2930,6 +3388,11 @@ void console_unblank(void)
*/
void console_flush_on_panic(enum con_flush_mode mode)
{
+ if (mode == CONSOLE_ATOMIC_FLUSH_PENDING) {
+ atomic_console_flush_all();
+ return;
+ }
+
/*
* If someone else is holding the console lock, trylock will fail
* and may_schedule may be set. Ignore and proceed to unlock so
@@ -2946,7 +3409,7 @@ void console_flush_on_panic(enum con_flu
seq = prb_first_valid_seq(prb);
for_each_console(c)
- c->seq = seq;
+ write_console_seq(c, seq, false);
}
console_unlock();
}
@@ -3189,16 +3652,27 @@ void register_console(struct console *ne
if (newcon->flags & CON_EXTENDED)
nr_ext_console_drivers++;
- newcon->dropped = 0;
+ atomic_long_set(&newcon->dropped, 0);
+ newcon->thread = NULL;
+ newcon->blocked = true;
+ mutex_init(&newcon->lock);
+#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
+ newcon->atomic_data = NULL;
+#endif
+
if (newcon->flags & CON_PRINTBUFFER) {
/* Get a consistent copy of @syslog_seq. */
mutex_lock(&syslog_lock);
- newcon->seq = syslog_seq;
+ write_console_seq(newcon, syslog_seq, false);
mutex_unlock(&syslog_lock);
} else {
/* Begin with next message. */
- newcon->seq = prb_next_seq(prb);
+ write_console_seq(newcon, prb_next_seq(prb), false);
}
+
+ if (printk_kthreads_available)
+ printk_start_kthread(newcon);
+
console_unlock();
console_sysfs_notify();
@@ -3225,6 +3699,7 @@ EXPORT_SYMBOL(register_console);
int unregister_console(struct console *console)
{
+ struct task_struct *thd;
struct console *con;
int res;
@@ -3265,9 +3740,26 @@ int unregister_console(struct console *c
console_drivers->flags |= CON_CONSDEV;
console->flags &= ~CON_ENABLED;
+
+ /*
+ * console->thread can only be cleared under the console lock. But
+ * stopping the thread must be done without the console lock. The
+ * task that clears @thread is the task that stops the kthread.
+ */
+ thd = console->thread;
+ console->thread = NULL;
+
console_unlock();
+
+ if (thd)
+ kthread_stop(thd);
+
console_sysfs_notify();
+#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
+ free_atomic_data(console->atomic_data);
+#endif
+
if (console->exit)
res = console->exit(console);
@@ -3361,6 +3853,20 @@ static int __init printk_late_init(void)
}
late_initcall(printk_late_init);
+static int __init printk_activate_kthreads(void)
+{
+ struct console *con;
+
+ console_lock();
+ printk_kthreads_available = true;
+ for_each_console(con)
+ printk_start_kthread(con);
+ console_unlock();
+
+ return 0;
+}
+early_initcall(printk_activate_kthreads);
+
#if defined CONFIG_PRINTK
/* If @con is specified, only wait for that console. Otherwise wait for all. */
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress)
@@ -3384,7 +3890,7 @@ static bool __pr_flush(struct console *c
for_each_console(c) {
if (con && con != c)
continue;
- if (!console_is_usable(c))
+ if (!console_is_usable(c, false))
continue;
printk_seq = c->seq;
if (printk_seq < seq)
@@ -3444,11 +3950,215 @@ bool pr_flush(int timeout_ms, bool reset
}
EXPORT_SYMBOL(pr_flush);
+static void __printk_fallback_preferred_direct(void)
+{
+ printk_prefer_direct_enter();
+ pr_err("falling back to preferred direct printing\n");
+ printk_kthreads_available = false;
+}
+
+/*
+ * Enter preferred direct printing, but never exit. Mark console threads as
+ * unavailable. The system is then forever in preferred direct printing and
+ * any printing threads will exit.
+ *
+ * Must *not* be called under console_lock. Use
+ * __printk_fallback_preferred_direct() if already holding console_lock.
+ */
+static void printk_fallback_preferred_direct(void)
+{
+ console_lock();
+ __printk_fallback_preferred_direct();
+ console_unlock();
+}
+
+/*
+ * Print a record for a given console, not allowing another printk() caller
+ * to take over. This is appropriate for contexts that do not have the
+ * console_lock.
+ *
+ * See __console_emit_next_record() for argument and return details.
+ */
+static bool console_emit_next_record(struct console *con, char *text, char *ext_text,
+ char *dropped_text, bool atomic_printing)
+{
+ return __console_emit_next_record(con, text, ext_text, dropped_text,
+ atomic_printing, NULL);
+}
+
+static bool printer_should_wake(struct console *con, u64 seq)
+{
+ short flags;
+
+ if (kthread_should_stop() || !printk_kthreads_available)
+ return true;
+
+ if (con->blocked ||
+ console_kthreads_atomically_blocked() ||
+ block_console_kthreads ||
+ system_state > SYSTEM_RUNNING ||
+ oops_in_progress) {
+ return false;
+ }
+
+ /*
+ * This is an unsafe read from con->flags, but a false positive is
+ * not a problem. Worst case it would allow the printer to wake up
+ * although it is disabled. But the printer will notice that when
+ * attempting to print and instead go back to sleep.
+ */
+ flags = data_race(READ_ONCE(con->flags));
+
+ if (!__console_is_usable(flags))
+ return false;
+
+ return prb_read_valid(prb, seq, NULL);
+}
+
+static int printk_kthread_func(void *data)
+{
+ struct console *con = data;
+ char *dropped_text = NULL;
+ char *ext_text = NULL;
+ u64 seq = 0;
+ char *text;
+ int error;
+
+#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
+ if (con->write_atomic)
+ con->atomic_data = alloc_atomic_data(con->flags);
+#endif
+
+ text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
+ if (!text) {
+ con_printk(KERN_ERR, con, "failed to allocate text buffer\n");
+ printk_fallback_preferred_direct();
+ goto out;
+ }
+
+ if (con->flags & CON_EXTENDED) {
+ ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
+ if (!ext_text) {
+ con_printk(KERN_ERR, con, "failed to allocate ext_text buffer\n");
+ printk_fallback_preferred_direct();
+ goto out;
+ }
+ } else {
+ dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL);
+ if (!dropped_text) {
+ con_printk(KERN_ERR, con, "failed to allocate dropped_text buffer\n");
+ printk_fallback_preferred_direct();
+ goto out;
+ }
+ }
+
+ con_printk(KERN_INFO, con, "printing thread started\n");
+
+ for (;;) {
+ /*
+ * Guarantee this task is visible on the waitqueue before
+ * checking the wake condition.
+ *
+ * The full memory barrier within set_current_state() of
+ * prepare_to_wait_event() pairs with the full memory barrier
+ * within wq_has_sleeper().
+ *
+ * This pairs with __wake_up_klogd:A.
+ */
+ error = wait_event_interruptible(log_wait,
+ printer_should_wake(con, seq)); /* LMM(printk_kthread_func:A) */
+
+ if (kthread_should_stop() || !printk_kthreads_available)
+ break;
+
+ if (error)
+ continue;
+
+ error = mutex_lock_interruptible(&con->lock);
+ if (error)
+ continue;
+
+ if (con->blocked ||
+ !console_kthread_printing_tryenter()) {
+ /* Another context has locked the console_lock. */
+ mutex_unlock(&con->lock);
+ continue;
+ }
+
+ /*
+ * Although this context has not locked the console_lock, it
+ * is known that the console_lock is not locked and it is not
+ * possible for any other context to lock the console_lock.
+ * Therefore it is safe to read con->flags.
+ */
+
+ if (!__console_is_usable(con->flags)) {
+ console_kthread_printing_exit();
+ mutex_unlock(&con->lock);
+ continue;
+ }
+
+ /*
+ * Even though the printk kthread is always preemptible, it is
+ * still not allowed to call cond_resched() from within
+ * console drivers. The task may become non-preemptible in the
+ * console driver call chain. For example, vt_console_print()
+ * takes a spinlock and then can call into fbcon_redraw(),
+ * which can conditionally invoke cond_resched().
+ */
+ console_may_schedule = 0;
+ console_emit_next_record(con, text, ext_text, dropped_text, false);
+
+ seq = con->seq;
+
+ console_kthread_printing_exit();
+
+ mutex_unlock(&con->lock);
+ }
+
+ con_printk(KERN_INFO, con, "printing thread stopped\n");
+out:
+ kfree(dropped_text);
+ kfree(ext_text);
+ kfree(text);
+
+ console_lock();
+ /*
+ * If this kthread is being stopped by another task, con->thread will
+ * already be NULL. That is fine. The important thing is that it is
+ * NULL after the kthread exits.
+ */
+ con->thread = NULL;
+ console_unlock();
+
+ return 0;
+}
+
+/* Must be called under console_lock. */
+static void printk_start_kthread(struct console *con)
+{
+ /*
+ * Do not start a kthread if there is no write() callback. The
+ * kthreads assume the write() callback exists.
+ */
+ if (!con->write)
+ return;
+
+ con->thread = kthread_run(printk_kthread_func, con,
+ "pr/%s%d", con->name, con->index);
+ if (IS_ERR(con->thread)) {
+ con->thread = NULL;
+ con_printk(KERN_ERR, con, "unable to start printing thread\n");
+ __printk_fallback_preferred_direct();
+ return;
+ }
+}
+
/*
* Delayed printk version, for scheduler-internal messages:
*/
-#define PRINTK_PENDING_WAKEUP 0x01
-#define PRINTK_PENDING_OUTPUT 0x02
+#define PRINTK_PENDING_WAKEUP 0x01
+#define PRINTK_PENDING_DIRECT_OUTPUT 0x02
static DEFINE_PER_CPU(int, printk_pending);
@@ -3456,10 +4166,14 @@ static void wake_up_klogd_work_func(stru
{
int pending = this_cpu_xchg(printk_pending, 0);
- if (pending & PRINTK_PENDING_OUTPUT) {
+ if (pending & PRINTK_PENDING_DIRECT_OUTPUT) {
+ printk_prefer_direct_enter();
+
/* If trylock fails, someone else is doing the printing */
if (console_trylock())
console_unlock();
+
+ printk_prefer_direct_exit();
}
if (pending & PRINTK_PENDING_WAKEUP)
@@ -3484,10 +4198,11 @@ static void __wake_up_klogd(int val)
* prepare_to_wait_event(), which is called after ___wait_event() adds
* the waiter but before it has checked the wait condition.
*
- * This pairs with devkmsg_read:A and syslog_print:A.
+ * This pairs with devkmsg_read:A, syslog_print:A, and
+ * printk_kthread_func:A.
*/
if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */
- (val & PRINTK_PENDING_OUTPUT)) {
+ (val & PRINTK_PENDING_DIRECT_OUTPUT)) {
this_cpu_or(printk_pending, val);
irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
}
@@ -3505,7 +4220,17 @@ void defer_console_output(void)
* New messages may have been added directly to the ringbuffer
* using vprintk_store(), so wake any waiters as well.
*/
- __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT);
+ int val = PRINTK_PENDING_WAKEUP;
+
+ /*
+ * Make sure that some context will print the messages when direct
+ * printing is allowed. This happens in situations when the kthreads
+ * may not be as reliable or perhaps unusable.
+ */
+ if (allow_direct_printing())
+ val |= PRINTK_PENDING_DIRECT_OUTPUT;
+
+ __wake_up_klogd(val);
}
void printk_trigger_flush(void)
diff -rupN linux-5.19.16.orig/kernel/printk/printk_safe.c linux-5.19.16/kernel/printk/printk_safe.c
--- linux-5.19.16.orig/kernel/printk/printk_safe.c 2022-10-18 17:21:10.960499444 -0400
+++ linux-5.19.16/kernel/printk/printk_safe.c 2022-10-18 17:21:17.808446663 -0400
@@ -8,7 +8,9 @@
#include <linux/smp.h>
#include <linux/cpumask.h>
#include <linux/printk.h>
+#include <linux/console.h>
#include <linux/kprobes.h>
+#include <linux/delay.h>
#include "internal.h"
@@ -50,3 +52,33 @@ asmlinkage int vprintk(const char *fmt,
return vprintk_default(fmt, args);
}
EXPORT_SYMBOL(vprintk);
+
+/**
+ * try_block_console_kthreads() - Try to block console kthreads and
+ * make the global console_lock() avaialble
+ *
+ * @timeout_ms: The maximum time (in ms) to wait.
+ *
+ * Prevent console kthreads from starting processing new messages. Wait
+ * until the global console_lock() become available.
+ *
+ * Context: Can be called in any context.
+ */
+void try_block_console_kthreads(int timeout_ms)
+{
+ block_console_kthreads = true;
+
+ /* Do not wait when the console lock could not be safely taken. */
+ if (this_cpu_read(printk_context) || in_nmi())
+ return;
+
+ while (timeout_ms > 0) {
+ if (console_trylock()) {
+ console_unlock();
+ return;
+ }
+
+ udelay(1000);
+ timeout_ms -= 1;
+ }
+}
diff -rupN linux-5.19.16.orig/kernel/rcu/rcutorture.c linux-5.19.16/kernel/rcu/rcutorture.c
--- linux-5.19.16.orig/kernel/rcu/rcutorture.c 2022-10-18 17:21:10.960499444 -0400
+++ linux-5.19.16/kernel/rcu/rcutorture.c 2022-10-18 17:22:27.075909103 -0400
@@ -2086,6 +2086,12 @@ static int rcutorture_booster_init(unsig
WARN_ON_ONCE(!t);
sp.sched_priority = 2;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+#ifdef CONFIG_PREEMPT_RT
+ t = per_cpu(timersd, cpu);
+ WARN_ON_ONCE(!t);
+ sp.sched_priority = 2;
+ sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+#endif
}
/* Don't allow time recalculation while creating a new task. */
Binary files linux-5.19.16.orig/kernel/rcu/.rcutorture.c.rej.swp and linux-5.19.16/kernel/rcu/.rcutorture.c.rej.swp differ
diff -rupN linux-5.19.16.orig/kernel/rcu/tree_stall.h linux-5.19.16/kernel/rcu/tree_stall.h
--- linux-5.19.16.orig/kernel/rcu/tree_stall.h 2022-10-18 17:21:10.960499444 -0400
+++ linux-5.19.16/kernel/rcu/tree_stall.h 2022-10-18 17:21:17.808446663 -0400
@@ -647,6 +647,7 @@ static void print_cpu_stall(unsigned lon
* See Documentation/RCU/stallwarn.rst for info on how to debug
* RCU CPU stall warnings.
*/
+ printk_prefer_direct_enter();
trace_rcu_stall_warning(rcu_state.name, TPS("SelfDetected"));
pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name);
raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);
@@ -684,6 +685,7 @@ static void print_cpu_stall(unsigned lon
*/
set_tsk_need_resched(current);
set_preempt_need_resched();
+ printk_prefer_direct_exit();
}
static void check_cpu_stall(struct rcu_data *rdp)
diff -rupN linux-5.19.16.orig/kernel/reboot.c linux-5.19.16/kernel/reboot.c
--- linux-5.19.16.orig/kernel/reboot.c 2022-10-18 17:21:10.952499506 -0400
+++ linux-5.19.16/kernel/reboot.c 2022-10-18 17:21:17.808446663 -0400
@@ -82,6 +82,7 @@ void kernel_restart_prepare(char *cmd)
{
blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
system_state = SYSTEM_RESTART;
+ try_block_console_kthreads(10000);
usermodehelper_disable();
device_shutdown();
}
@@ -270,6 +271,7 @@ static void kernel_shutdown_prepare(enum
blocking_notifier_call_chain(&reboot_notifier_list,
(state == SYSTEM_HALT) ? SYS_HALT : SYS_POWER_OFF, NULL);
system_state = state;
+ try_block_console_kthreads(10000);
usermodehelper_disable();
device_shutdown();
}
@@ -819,9 +821,11 @@ static int __orderly_reboot(void)
ret = run_cmd(reboot_cmd);
if (ret) {
+ printk_prefer_direct_enter();
pr_warn("Failed to start orderly reboot: forcing the issue\n");
emergency_sync();
kernel_restart(NULL);
+ printk_prefer_direct_exit();
}
return ret;
@@ -834,6 +838,7 @@ static int __orderly_poweroff(bool force
ret = run_cmd(poweroff_cmd);
if (ret && force) {
+ printk_prefer_direct_enter();
pr_warn("Failed to start orderly shutdown: forcing the issue\n");
/*
@@ -843,6 +848,7 @@ static int __orderly_poweroff(bool force
*/
emergency_sync();
kernel_power_off();
+ printk_prefer_direct_exit();
}
return ret;
@@ -900,6 +906,8 @@ EXPORT_SYMBOL_GPL(orderly_reboot);
*/
static void hw_failure_emergency_poweroff_func(struct work_struct *work)
{
+ printk_prefer_direct_enter();
+
/*
* We have reached here after the emergency shutdown waiting period has
* expired. This means orderly_poweroff has not been able to shut off
@@ -916,6 +924,8 @@ static void hw_failure_emergency_powerof
*/
pr_emerg("Hardware protection shutdown failed. Trying emergency restart\n");
emergency_restart();
+
+ printk_prefer_direct_exit();
}
static DECLARE_DELAYED_WORK(hw_failure_emergency_poweroff_work,
@@ -954,11 +964,13 @@ void hw_protection_shutdown(const char *
{
static atomic_t allow_proceed = ATOMIC_INIT(1);
+ printk_prefer_direct_enter();
+
pr_emerg("HARDWARE PROTECTION shutdown (%s)\n", reason);
/* Shutdown should be initiated only once. */
if (!atomic_dec_and_test(&allow_proceed))
- return;
+ goto out;
/*
* Queue a backup emergency shutdown in the event of
@@ -966,6 +978,8 @@ void hw_protection_shutdown(const char *
*/
hw_failure_emergency_poweroff(ms_until_forced);
orderly_poweroff(true);
+out:
+ printk_prefer_direct_exit();
}
EXPORT_SYMBOL_GPL(hw_protection_shutdown);
diff -rupN linux-5.19.16.orig/kernel/sched/core.c linux-5.19.16/kernel/sched/core.c
--- linux-5.19.16.orig/kernel/sched/core.c 2022-10-18 17:21:10.964499413 -0400
+++ linux-5.19.16/kernel/sched/core.c 2022-10-18 17:21:17.808446663 -0400
@@ -1052,6 +1052,46 @@ void resched_curr(struct rq *rq)
trace_sched_wake_idle_without_ipi(cpu);
}
+#ifdef CONFIG_PREEMPT_LAZY
+
+static int tsk_is_polling(struct task_struct *p)
+{
+#ifdef TIF_POLLING_NRFLAG
+ return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
+#else
+ return 0;
+#endif
+}
+
+void resched_curr_lazy(struct rq *rq)
+{
+ struct task_struct *curr = rq->curr;
+ int cpu;
+
+ if (!sched_feat(PREEMPT_LAZY)) {
+ resched_curr(rq);
+ return;
+ }
+
+ if (test_tsk_need_resched(curr))
+ return;
+
+ if (test_tsk_need_resched_lazy(curr))
+ return;
+
+ set_tsk_need_resched_lazy(curr);
+
+ cpu = cpu_of(rq);
+ if (cpu == smp_processor_id())
+ return;
+
+ /* NEED_RESCHED_LAZY must be visible before we test polling */
+ smp_mb();
+ if (!tsk_is_polling(curr))
+ smp_send_reschedule(cpu);
+}
+#endif
+
void resched_cpu(int cpu)
{
struct rq *rq = cpu_rq(cpu);
@@ -2233,6 +2273,7 @@ void migrate_disable(void)
preempt_disable();
this_rq()->nr_pinned++;
p->migration_disabled = 1;
+ preempt_lazy_disable();
preempt_enable();
}
EXPORT_SYMBOL_GPL(migrate_disable);
@@ -2264,6 +2305,7 @@ void migrate_enable(void)
barrier();
p->migration_disabled = 0;
this_rq()->nr_pinned--;
+ preempt_lazy_enable();
preempt_enable();
}
EXPORT_SYMBOL_GPL(migrate_enable);
@@ -3257,6 +3299,70 @@ out:
}
#endif /* CONFIG_NUMA_BALANCING */
+#ifdef CONFIG_PREEMPT_RT
+
+/*
+ * Consider:
+ *
+ * set_special_state(X);
+ *
+ * do_things()
+ * // Somewhere in there is an rtlock that can be contended:
+ * current_save_and_set_rtlock_wait_state();
+ * [...]
+ * schedule_rtlock(); (A)
+ * [...]
+ * current_restore_rtlock_saved_state();
+ *
+ * schedule(); (B)
+ *
+ * If p->saved_state is anything else than TASK_RUNNING, then p blocked on an
+ * rtlock (A) *before* voluntarily calling into schedule() (B) after setting its
+ * state to X. For things like ptrace (X=TASK_TRACED), the task could have more
+ * work to do upon acquiring the lock in do_things() before whoever called
+ * wait_task_inactive() should return. IOW, we have to wait for:
+ *
+ * p.saved_state = TASK_RUNNING
+ * p.__state = X
+ *
+ * which implies the task isn't blocked on an RT lock and got to schedule() (B).
+ *
+ * Also see comments in ttwu_state_match().
+ */
+
+static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state)
+{
+ unsigned long flags;
+ bool mismatch;
+
+ raw_spin_lock_irqsave(&p->pi_lock, flags);
+ mismatch = READ_ONCE(p->__state) != match_state &&
+ READ_ONCE(p->saved_state) != match_state;
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+ return mismatch;
+}
+static __always_inline bool state_match(struct task_struct *p, unsigned int match_state,
+ bool *wait)
+{
+ if (READ_ONCE(p->__state) == match_state)
+ return true;
+ if (READ_ONCE(p->saved_state) != match_state)
+ return false;
+ *wait = true;
+ return true;
+}
+#else
+static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state)
+{
+ return READ_ONCE(p->__state) != match_state;
+}
+static __always_inline bool state_match(struct task_struct *p, unsigned int match_state,
+ bool *wait)
+{
+ return READ_ONCE(p->__state) == match_state;
+}
+#endif
+
/*
* wait_task_inactive - wait for a thread to unschedule.
*
@@ -3275,7 +3381,7 @@ out:
*/
unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
{
- int running, queued;
+ bool running, wait;
struct rq_flags rf;
unsigned long ncsw;
struct rq *rq;
@@ -3301,7 +3407,7 @@ unsigned long wait_task_inactive(struct
* is actually now running somewhere else!
*/
while (task_running(rq, p)) {
- if (match_state && unlikely(READ_ONCE(p->__state) != match_state))
+ if (match_state && state_mismatch(p, match_state))
return 0;
cpu_relax();
}
@@ -3314,10 +3420,12 @@ unsigned long wait_task_inactive(struct
rq = task_rq_lock(p, &rf);
trace_sched_wait_task(p);
running = task_running(rq, p);
- queued = task_on_rq_queued(p);
+ wait = task_on_rq_queued(p);
ncsw = 0;
- if (!match_state || READ_ONCE(p->__state) == match_state)
+
+ if (!match_state || state_match(p, match_state, &wait))
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
+
task_rq_unlock(rq, p, &rf);
/*
@@ -3346,7 +3454,7 @@ unsigned long wait_task_inactive(struct
* running right now), it's preempted, and we should
* yield - it could be a while.
*/
- if (unlikely(queued)) {
+ if (unlikely(wait)) {
ktime_t to = NSEC_PER_SEC / HZ;
set_current_state(TASK_UNINTERRUPTIBLE);
@@ -4563,6 +4671,9 @@ int sched_fork(unsigned long clone_flags
p->on_cpu = 0;
#endif
init_task_preempt_count(p);
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
+ task_thread_info(p)->preempt_lazy_count = 0;
+#endif
#ifdef CONFIG_SMP
plist_node_init(&p->pushable_tasks, MAX_PRIO);
RB_CLEAR_NODE(&p->pushable_dl_tasks);
@@ -6431,6 +6542,7 @@ static void __sched notrace __schedule(u
next = pick_next_task(rq, prev, &rf);
clear_tsk_need_resched(prev);
+ clear_tsk_need_resched_lazy(prev);
clear_preempt_need_resched();
#ifdef CONFIG_SCHED_DEBUG
rq->last_seen_need_resched_ns = 0;
@@ -6645,6 +6757,30 @@ static void __sched notrace preempt_sche
} while (need_resched());
}
+#ifdef CONFIG_PREEMPT_LAZY
+/*
+ * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
+ * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
+ * preempt_lazy_count counter >0.
+ */
+static __always_inline int preemptible_lazy(void)
+{
+ if (test_thread_flag(TIF_NEED_RESCHED))
+ return 1;
+ if (current_thread_info()->preempt_lazy_count)
+ return 0;
+ return 1;
+}
+
+#else
+
+static inline int preemptible_lazy(void)
+{
+ return 1;
+}
+
+#endif
+
#ifdef CONFIG_PREEMPTION
/*
* This is the entry point to schedule() from in-kernel preemption
@@ -6658,6 +6794,8 @@ asmlinkage __visible void __sched notrac
*/
if (likely(!preemptible()))
return;
+ if (!preemptible_lazy())
+ return;
preempt_schedule_common();
}
NOKPROBE_SYMBOL(preempt_schedule);
@@ -6705,6 +6843,9 @@ asmlinkage __visible void __sched notrac
if (likely(!preemptible()))
return;
+ if (!preemptible_lazy())
+ return;
+
do {
/*
* Because the function tracer can trace preempt_count_sub()
@@ -8961,7 +9102,9 @@ void __init init_idle(struct task_struct
/* Set the preempt count _outside_ the spinlocks! */
init_idle_preempt_count(idle, cpu);
-
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
+ task_thread_info(idle)->preempt_lazy_count = 0;
+#endif
/*
* The idle tasks have their own, simple scheduling class:
*/
diff -rupN linux-5.19.16.orig/kernel/sched/fair.c linux-5.19.16/kernel/sched/fair.c
--- linux-5.19.16.orig/kernel/sched/fair.c 2022-10-18 17:21:10.964499413 -0400
+++ linux-5.19.16/kernel/sched/fair.c 2022-10-18 17:21:17.812446632 -0400
@@ -4493,7 +4493,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq
ideal_runtime = sched_slice(cfs_rq, curr);
delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
if (delta_exec > ideal_runtime) {
- resched_curr(rq_of(cfs_rq));
+ resched_curr_lazy(rq_of(cfs_rq));
/*
* The current task ran long enough, ensure it doesn't get
* re-elected due to buddy favours.
@@ -4517,7 +4517,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq
return;
if (delta > ideal_runtime)
- resched_curr(rq_of(cfs_rq));
+ resched_curr_lazy(rq_of(cfs_rq));
}
static void
@@ -4663,7 +4663,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc
* validating it and just reschedule.
*/
if (queued) {
- resched_curr(rq_of(cfs_rq));
+ resched_curr_lazy(rq_of(cfs_rq));
return;
}
/*
@@ -4812,7 +4812,7 @@ static void __account_cfs_rq_runtime(str
* hierarchy can be throttled
*/
if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
- resched_curr(rq_of(cfs_rq));
+ resched_curr_lazy(rq_of(cfs_rq));
}
static __always_inline
@@ -5575,7 +5575,7 @@ static void hrtick_start_fair(struct rq
if (delta < 0) {
if (task_current(rq, p))
- resched_curr(rq);
+ resched_curr_lazy(rq);
return;
}
hrtick_start(rq, delta);
@@ -7194,7 +7194,7 @@ static void check_preempt_wakeup(struct
return;
preempt:
- resched_curr(rq);
+ resched_curr_lazy(rq);
/*
* Only set the backward buddy when the current task is still
* on the rq. This can happen when a wakeup gets interleaved
@@ -11338,7 +11338,7 @@ static void task_fork_fair(struct task_s
* 'current' within the tree based on its new key value.
*/
swap(curr->vruntime, se->vruntime);
- resched_curr(rq);
+ resched_curr_lazy(rq);
}
se->vruntime -= cfs_rq->min_vruntime;
@@ -11365,7 +11365,7 @@ prio_changed_fair(struct rq *rq, struct
*/
if (task_current(rq, p)) {
if (p->prio > oldprio)
- resched_curr(rq);
+ resched_curr_lazy(rq);
} else
check_preempt_curr(rq, p, 0);
}
diff -rupN linux-5.19.16.orig/kernel/sched/features.h linux-5.19.16/kernel/sched/features.h
--- linux-5.19.16.orig/kernel/sched/features.h 2022-10-18 17:21:10.964499413 -0400
+++ linux-5.19.16/kernel/sched/features.h 2022-10-18 17:21:17.812446632 -0400
@@ -48,6 +48,9 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
#ifdef CONFIG_PREEMPT_RT
SCHED_FEAT(TTWU_QUEUE, false)
+# ifdef CONFIG_PREEMPT_LAZY
+SCHED_FEAT(PREEMPT_LAZY, true)
+# endif
#else
/*
diff -rupN linux-5.19.16.orig/kernel/sched/sched.h linux-5.19.16/kernel/sched/sched.h
--- linux-5.19.16.orig/kernel/sched/sched.h 2022-10-18 17:21:10.964499413 -0400
+++ linux-5.19.16/kernel/sched/sched.h 2022-10-18 17:21:17.812446632 -0400
@@ -2314,6 +2314,15 @@ extern void reweight_task(struct task_st
extern void resched_curr(struct rq *rq);
extern void resched_cpu(int cpu);
+#ifdef CONFIG_PREEMPT_LAZY
+extern void resched_curr_lazy(struct rq *rq);
+#else
+static inline void resched_curr_lazy(struct rq *rq)
+{
+ resched_curr(rq);
+}
+#endif
+
extern struct rt_bandwidth def_rt_bandwidth;
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
diff -rupN linux-5.19.16.orig/kernel/signal.c linux-5.19.16/kernel/signal.c
--- linux-5.19.16.orig/kernel/signal.c 2022-10-18 17:21:10.952499506 -0400
+++ linux-5.19.16/kernel/signal.c 2022-10-18 17:21:17.812446632 -0400
@@ -2297,13 +2297,13 @@ static int ptrace_stop(int exit_code, in
/*
* Don't want to allow preemption here, because
* sys_ptrace() needs this task to be inactive.
- *
- * XXX: implement read_unlock_no_resched().
*/
- preempt_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
read_unlock(&tasklist_lock);
cgroup_enter_frozen();
- preempt_enable_no_resched();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable_no_resched();
freezable_schedule();
cgroup_leave_frozen(true);
diff -rupN linux-5.19.16.orig/kernel/softirq.c linux-5.19.16/kernel/softirq.c
--- linux-5.19.16.orig/kernel/softirq.c 2022-10-18 17:21:10.952499506 -0400
+++ linux-5.19.16/kernel/softirq.c 2022-10-18 17:21:17.812446632 -0400
@@ -637,6 +637,17 @@ static inline void tick_irq_exit(void)
#endif
}
+DEFINE_PER_CPU(struct task_struct *, timersd);
+DEFINE_PER_CPU(unsigned long, pending_timer_softirq);
+
+static void wake_timersd(void)
+{
+ struct task_struct *tsk = __this_cpu_read(timersd);
+
+ if (tsk)
+ wake_up_process(tsk);
+}
+
static inline void __irq_exit_rcu(void)
{
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
@@ -648,6 +659,8 @@ static inline void __irq_exit_rcu(void)
preempt_count_sub(HARDIRQ_OFFSET);
if (!in_interrupt() && local_softirq_pending())
invoke_softirq();
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && !in_interrupt() && local_pending_timers())
+ wake_timersd();
tick_irq_exit();
}
@@ -976,11 +989,69 @@ static struct smp_hotplug_thread softirq
.thread_comm = "ksoftirqd/%u",
};
+static void timersd_setup(unsigned int cpu)
+{
+ sched_set_fifo_low(current);
+}
+
+static int timersd_should_run(unsigned int cpu)
+{
+ return local_pending_timers();
+}
+
+static void run_timersd(unsigned int cpu)
+{
+ unsigned int timer_si;
+
+ ksoftirqd_run_begin();
+
+ timer_si = local_pending_timers();
+ __this_cpu_write(pending_timer_softirq, 0);
+ or_softirq_pending(timer_si);
+
+ __do_softirq();
+
+ ksoftirqd_run_end();
+}
+
+#ifdef CONFIG_PREEMPT_RT
+static void raise_ktimers_thread(unsigned int nr)
+{
+ trace_softirq_raise(nr);
+ __this_cpu_or(pending_timer_softirq, 1 << nr);
+}
+
+void raise_hrtimer_softirq(void)
+{
+ raise_ktimers_thread(HRTIMER_SOFTIRQ);
+}
+
+void raise_timer_softirq(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ raise_ktimers_thread(TIMER_SOFTIRQ);
+ wake_timersd();
+ local_irq_restore(flags);
+}
+#endif
+
+static struct smp_hotplug_thread timer_threads = {
+ .store = &timersd,
+ .setup = timersd_setup,
+ .thread_should_run = timersd_should_run,
+ .thread_fn = run_timersd,
+ .thread_comm = "ktimers/%u",
+};
+
static __init int spawn_ksoftirqd(void)
{
cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
takeover_tasklets);
BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ BUG_ON(smpboot_register_percpu_thread(&timer_threads));
return 0;
}
diff -rupN linux-5.19.16.orig/kernel/time/hrtimer.c linux-5.19.16/kernel/time/hrtimer.c
--- linux-5.19.16.orig/kernel/time/hrtimer.c 2022-10-18 17:21:10.964499413 -0400
+++ linux-5.19.16/kernel/time/hrtimer.c 2022-10-18 17:21:17.812446632 -0400
@@ -1805,7 +1805,7 @@ retry:
if (!ktime_before(now, cpu_base->softirq_expires_next)) {
cpu_base->softirq_expires_next = KTIME_MAX;
cpu_base->softirq_activated = 1;
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ raise_hrtimer_softirq();
}
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
@@ -1918,7 +1918,7 @@ void hrtimer_run_queues(void)
if (!ktime_before(now, cpu_base->softirq_expires_next)) {
cpu_base->softirq_expires_next = KTIME_MAX;
cpu_base->softirq_activated = 1;
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ raise_hrtimer_softirq();
}
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
diff -rupN linux-5.19.16.orig/kernel/time/tick-sched.c linux-5.19.16/kernel/time/tick-sched.c
--- linux-5.19.16.orig/kernel/time/tick-sched.c 2022-10-18 17:21:10.964499413 -0400
+++ linux-5.19.16/kernel/time/tick-sched.c 2022-10-18 17:21:17.812446632 -0400
@@ -779,7 +779,7 @@ static void tick_nohz_restart(struct tic
static inline bool local_timer_softirq_pending(void)
{
- return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
+ return local_pending_timers() & BIT(TIMER_SOFTIRQ);
}
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
diff -rupN linux-5.19.16.orig/kernel/time/timer.c linux-5.19.16/kernel/time/timer.c
--- linux-5.19.16.orig/kernel/time/timer.c 2022-10-18 17:21:10.964499413 -0400
+++ linux-5.19.16/kernel/time/timer.c 2022-10-18 17:21:17.812446632 -0400
@@ -1822,7 +1822,7 @@ static void run_local_timers(void)
if (time_before(jiffies, base->next_expiry))
return;
}
- raise_softirq(TIMER_SOFTIRQ);
+ raise_timer_softirq();
}
/*
diff -rupN linux-5.19.16.orig/kernel/trace/trace.c linux-5.19.16/kernel/trace/trace.c
--- linux-5.19.16.orig/kernel/trace/trace.c 2022-10-18 17:21:10.964499413 -0400
+++ linux-5.19.16/kernel/trace/trace.c 2022-10-18 17:21:17.812446632 -0400
@@ -2625,11 +2625,19 @@ unsigned int tracing_gen_ctx_irq_test(un
if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
trace_flags |= TRACE_FLAG_BH_OFF;
- if (tif_need_resched())
+ if (tif_need_resched_now())
trace_flags |= TRACE_FLAG_NEED_RESCHED;
+#ifdef CONFIG_PREEMPT_LAZY
+ /* Run out of bits. Share the LAZY and PREEMPT_RESCHED */
+ if (need_resched_lazy())
+ trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
+#else
if (test_preempt_need_resched())
trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
- return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
+#endif
+
+ return (trace_flags << 24) | (min_t(unsigned int, pc & 0xff, 0xf)) |
+ (preempt_lazy_count() & 0xff) << 16 |
(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
}
@@ -4215,15 +4223,17 @@ unsigned long trace_total_entries(struct
static void print_lat_help_header(struct seq_file *m)
{
- seq_puts(m, "# _------=> CPU# \n"
- "# / _-----=> irqs-off/BH-disabled\n"
- "# | / _----=> need-resched \n"
- "# || / _---=> hardirq/softirq \n"
- "# ||| / _--=> preempt-depth \n"
- "# |||| / _-=> migrate-disable \n"
- "# ||||| / delay \n"
- "# cmd pid |||||| time | caller \n"
- "# \\ / |||||| \\ | / \n");
+ seq_puts(m, "# _--------=> CPU# \n"
+ "# / _-------=> irqs-off/BH-disabled\n"
+ "# | / _------=> need-resched \n"
+ "# || / _-----=> need-resched-lazy\n"
+ "# ||| / _----=> hardirq/softirq \n"
+ "# |||| / _---=> preempt-depth \n"
+ "# ||||| / _--=> preempt-lazy-depth\n"
+ "# |||||| / _-=> migrate-disable \n"
+ "# ||||||| / delay \n"
+ "# cmd pid |||||||| time | caller \n"
+ "# \\ / |||||||| \\ | / \n");
}
static void print_event_info(struct array_buffer *buf, struct seq_file *m)
@@ -4257,14 +4267,16 @@ static void print_func_help_header_irq(s
print_event_info(buf, m);
- seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
- seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
- seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
- seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
- seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
- seq_printf(m, "# %.*s|||| / delay\n", prec, space);
- seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
- seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
+ seq_printf(m, "# %.*s _-------=> irqs-off/BH-disabled\n", prec, space);
+ seq_printf(m, "# %.*s / _------=> need-resched\n", prec, space);
+ seq_printf(m, "# %.*s| / _-----=> need-resched-lazy\n", prec, space);
+ seq_printf(m, "# %.*s|| / _----=> hardirq/softirq\n", prec, space);
+ seq_printf(m, "# %.*s||| / _---=> preempt-depth\n", prec, space);
+ seq_printf(m, "# %.*s|||| / _--=> preempt-lazy-depth\n", prec, space);
+ seq_printf(m, "# %.*s||||| / _-=> migrate-disable\n", prec, space);
+ seq_printf(m, "# %.*s|||||| / delay\n", prec, space);
+ seq_printf(m, "# TASK-PID %.*s CPU# ||||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
+ seq_printf(m, "# | | %.*s | ||||||| | |\n", prec, " | ");
}
void
diff -rupN linux-5.19.16.orig/kernel/trace/trace_events.c linux-5.19.16/kernel/trace/trace_events.c
--- linux-5.19.16.orig/kernel/trace/trace_events.c 2022-10-18 17:21:10.964499413 -0400
+++ linux-5.19.16/kernel/trace/trace_events.c 2022-10-18 17:21:17.812446632 -0400
@@ -193,6 +193,7 @@ static int trace_define_common_fields(vo
/* Holds both preempt_count and migrate_disable */
__common_field(unsigned char, preempt_count);
__common_field(int, pid);
+ __common_field(unsigned char, preempt_lazy_count);
return ret;
}
diff -rupN linux-5.19.16.orig/kernel/trace/trace_output.c linux-5.19.16/kernel/trace/trace_output.c
--- linux-5.19.16.orig/kernel/trace/trace_output.c 2022-10-18 17:21:10.968499383 -0400
+++ linux-5.19.16/kernel/trace/trace_output.c 2022-10-18 17:21:17.812446632 -0400
@@ -442,6 +442,7 @@ int trace_print_lat_fmt(struct trace_seq
{
char hardsoft_irq;
char need_resched;
+ char need_resched_lazy;
char irqs_off;
int hardirq;
int softirq;
@@ -462,20 +463,27 @@ int trace_print_lat_fmt(struct trace_seq
switch (entry->flags & (TRACE_FLAG_NEED_RESCHED |
TRACE_FLAG_PREEMPT_RESCHED)) {
+#ifndef CONFIG_PREEMPT_LAZY
case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED:
need_resched = 'N';
break;
+#endif
case TRACE_FLAG_NEED_RESCHED:
need_resched = 'n';
break;
+#ifndef CONFIG_PREEMPT_LAZY
case TRACE_FLAG_PREEMPT_RESCHED:
need_resched = 'p';
break;
+#endif
default:
need_resched = '.';
break;
}
+ need_resched_lazy =
+ (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
+
hardsoft_irq =
(nmi && hardirq) ? 'Z' :
nmi ? 'z' :
@@ -484,14 +492,20 @@ int trace_print_lat_fmt(struct trace_seq
softirq ? 's' :
'.' ;
- trace_seq_printf(s, "%c%c%c",
- irqs_off, need_resched, hardsoft_irq);
+ trace_seq_printf(s, "%c%c%c%c",
+ irqs_off, need_resched, need_resched_lazy,
+ hardsoft_irq);
if (entry->preempt_count & 0xf)
trace_seq_printf(s, "%x", entry->preempt_count & 0xf);
else
trace_seq_putc(s, '.');
+ if (entry->preempt_lazy_count)
+ trace_seq_printf(s, "%x", entry->preempt_lazy_count);
+ else
+ trace_seq_putc(s, '.');
+
if (entry->preempt_count & 0xf0)
trace_seq_printf(s, "%x", entry->preempt_count >> 4);
else
diff -rupN linux-5.19.16.orig/kernel/watchdog.c linux-5.19.16/kernel/watchdog.c
--- linux-5.19.16.orig/kernel/watchdog.c 2022-10-18 17:21:10.952499506 -0400
+++ linux-5.19.16/kernel/watchdog.c 2022-10-18 17:21:17.812446632 -0400
@@ -424,6 +424,8 @@ static enum hrtimer_restart watchdog_tim
/* Start period for the next softlockup warning. */
update_report_ts();
+ printk_prefer_direct_enter();
+
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
@@ -442,6 +444,8 @@ static enum hrtimer_restart watchdog_tim
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
if (softlockup_panic)
panic("softlockup: hung tasks");
+
+ printk_prefer_direct_exit();
}
return HRTIMER_RESTART;
diff -rupN linux-5.19.16.orig/kernel/watchdog_hld.c linux-5.19.16/kernel/watchdog_hld.c
--- linux-5.19.16.orig/kernel/watchdog_hld.c 2022-10-18 17:21:10.952499506 -0400
+++ linux-5.19.16/kernel/watchdog_hld.c 2022-10-18 17:21:17.812446632 -0400
@@ -135,6 +135,8 @@ static void watchdog_overflow_callback(s
if (__this_cpu_read(hard_watchdog_warn) == true)
return;
+ printk_prefer_direct_enter();
+
pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
this_cpu);
print_modules();
@@ -155,6 +157,8 @@ static void watchdog_overflow_callback(s
if (hardlockup_panic)
nmi_panic(regs, "Hard LOCKUP");
+ printk_prefer_direct_exit();
+
__this_cpu_write(hard_watchdog_warn, true);
return;
}
diff -rupN linux-5.19.16.orig/lib/vsprintf.c linux-5.19.16/lib/vsprintf.c
--- linux-5.19.16.orig/lib/vsprintf.c 2022-10-18 17:21:10.972499352 -0400
+++ linux-5.19.16/lib/vsprintf.c 2022-10-18 17:21:17.812446632 -0400
@@ -750,37 +750,40 @@ static int __init debug_boot_weak_hash_e
}
early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable);
-static DEFINE_STATIC_KEY_FALSE(filled_random_ptr_key);
+static bool filled_random_ptr_key;
+static siphash_key_t ptr_key __read_mostly;
-static void enable_ptr_key_workfn(struct work_struct *work)
+static void fill_ptr_key_workfn(struct work_struct *work)
{
- static_branch_enable(&filled_random_ptr_key);
+ int ret;
+
+ ret = get_random_bytes_wait(&ptr_key, sizeof(ptr_key));
+ if (WARN_ON(ret < 0))
+ return;
+ /* Pairs with smp_rmb() before reading ptr_key. */
+ smp_wmb();
+ WRITE_ONCE(filled_random_ptr_key, true);
+}
+
+static int vsprintf_init_hashval(void)
+{
+ static DECLARE_WORK(fill_ptr_key_work, fill_ptr_key_workfn);
+
+ queue_work(system_unbound_wq, &fill_ptr_key_work);
+ return 0;
}
+subsys_initcall(vsprintf_init_hashval)
/* Maps a pointer to a 32 bit unique identifier. */
static inline int __ptr_to_hashval(const void *ptr, unsigned long *hashval_out)
{
- static siphash_key_t ptr_key __read_mostly;
unsigned long hashval;
- if (!static_branch_likely(&filled_random_ptr_key)) {
- static bool filled = false;
- static DEFINE_SPINLOCK(filling);
- static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
- unsigned long flags;
-
- if (!system_unbound_wq || !rng_is_initialized() ||
- !spin_trylock_irqsave(&filling, flags))
- return -EAGAIN;
-
- if (!filled) {
- get_random_bytes(&ptr_key, sizeof(ptr_key));
- queue_work(system_unbound_wq, &enable_ptr_key_work);
- filled = true;
- }
- spin_unlock_irqrestore(&filling, flags);
- }
+ if (!READ_ONCE(filled_random_ptr_key))
+ return -EBUSY;
+ /* Pairs with smp_wmb() after writing ptr_key. */
+ smp_rmb();
#ifdef CONFIG_64BIT
hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key);
diff -rupN linux-5.19.16.orig/localversion-rt linux-5.19.16/localversion-rt
--- linux-5.19.16.orig/localversion-rt 1969-12-31 19:00:00.000000000 -0500
+++ linux-5.19.16/localversion-rt 2022-10-18 17:21:17.816446601 -0400
@@ -0,0 +1 @@
+-rt10