diff --git a/packages/kernel/linux/patches/AMD64/001-patch-6.1-rc7-rt5.patch b/packages/kernel/linux/patches/AMD64/001-patch-6.1-rc7-rt5.patch deleted file mode 100644 index e69de29bb..000000000 diff --git a/packages/kernel/linux/patches/AMD64/001-patch-6.3.3-rt15.patch b/packages/kernel/linux/patches/AMD64/001-patch-6.3.3-rt15.patch new file mode 100644 index 000000000..1228bf9fd --- /dev/null +++ b/packages/kernel/linux/patches/AMD64/001-patch-6.3.3-rt15.patch @@ -0,0 +1,7875 @@ +diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig +index e24a9820e12fa..ba2cf1cec3d9c 100644 +--- a/arch/arm/Kconfig ++++ b/arch/arm/Kconfig +@@ -34,6 +34,7 @@ config ARM + select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE ++ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_MEMTEST +@@ -72,7 +73,7 @@ config ARM + select HARDIRQS_SW_RESEND + select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT + select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 +- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU ++ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT + select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL + select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL +@@ -117,6 +118,8 @@ config ARM + select HAVE_PERF_EVENTS + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP ++ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM ++ select HAVE_PREEMPT_LAZY + select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RSEQ +diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h +index 7f092cb55a417..ffcbf8ebed4bf 100644 +--- a/arch/arm/include/asm/thread_info.h ++++ b/arch/arm/include/asm/thread_info.h +@@ -62,6 +62,7 @@ struct cpu_context_save { + struct thread_info { + unsigned long flags; /* low level flags */ + int preempt_count; /* 0 => preemptable, <0 => bug */ ++ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ + __u32 cpu; /* cpu */ + __u32 cpu_domain; /* cpu domain */ + struct cpu_context_save cpu_context; /* cpu context */ +@@ -129,6 +130,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, + #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ + #define TIF_UPROBE 3 /* breakpointed or singlestepping */ + #define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */ ++#define TIF_NEED_RESCHED_LAZY 5 + + #define TIF_USING_IWMMXT 17 + #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ +@@ -148,6 +150,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, + #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) + #define _TIF_SECCOMP (1 << TIF_SECCOMP) + #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) ++#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) + #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) + + /* Checks for any syscall work in entry-common.S */ +@@ -157,7 +160,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, + /* + * Change these and you break ASM code in entry-common.S + */ +-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ ++#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ ++ _TIF_SIGPENDING | \ + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NOTIFY_SIGNAL) + +diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c +index 38121c59cbc26..c6fafd53d5bea 100644 +--- a/arch/arm/kernel/asm-offsets.c ++++ b/arch/arm/kernel/asm-offsets.c +@@ -43,6 +43,7 @@ int main(void) + BLANK(); + DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); + DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); ++ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); + DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); + DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain)); + DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context)); +diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S +index c39303e5c2347..cfb4660e9feab 100644 +--- a/arch/arm/kernel/entry-armv.S ++++ b/arch/arm/kernel/entry-armv.S +@@ -222,11 +222,18 @@ ENDPROC(__dabt_svc) + + #ifdef CONFIG_PREEMPTION + ldr r8, [tsk, #TI_PREEMPT] @ get preempt count +- ldr r0, [tsk, #TI_FLAGS] @ get flags + teq r8, #0 @ if preempt count != 0 ++ bne 1f @ return from exeption ++ ldr r0, [tsk, #TI_FLAGS] @ get flags ++ tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set ++ blne svc_preempt @ preempt! ++ ++ ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count ++ teq r8, #0 @ if preempt lazy count != 0 + movne r0, #0 @ force flags to 0 +- tst r0, #_TIF_NEED_RESCHED ++ tst r0, #_TIF_NEED_RESCHED_LAZY + blne svc_preempt ++1: + #endif + + svc_exit r5, irq = 1 @ return from exception +@@ -241,8 +248,14 @@ ENDPROC(__irq_svc) + 1: bl preempt_schedule_irq @ irq en/disable is done inside + ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS + tst r0, #_TIF_NEED_RESCHED ++ bne 1b ++ tst r0, #_TIF_NEED_RESCHED_LAZY + reteq r8 @ go again +- b 1b ++ ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count ++ teq r0, #0 @ if preempt lazy count != 0 ++ beq 1b ++ ret r8 @ go again ++ + #endif + + __und_fault: +diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c +index e07f359254c3c..b50a3248e79f3 100644 +--- a/arch/arm/kernel/signal.c ++++ b/arch/arm/kernel/signal.c +@@ -607,7 +607,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) + */ + trace_hardirqs_off(); + do { +- if (likely(thread_flags & _TIF_NEED_RESCHED)) { ++ if (likely(thread_flags & (_TIF_NEED_RESCHED | ++ _TIF_NEED_RESCHED_LAZY))) { + schedule(); + } else { + if (unlikely(!user_mode(regs))) +diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c +index 2418f1efabd87..79ab2138ab0a8 100644 +--- a/arch/arm/mm/fault.c ++++ b/arch/arm/mm/fault.c +@@ -436,6 +436,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, + if (addr < TASK_SIZE) + return do_page_fault(addr, fsr, regs); + ++ if (interrupts_enabled(regs)) ++ local_irq_enable(); ++ + if (user_mode(regs)) + goto bad_area; + +@@ -506,6 +509,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, + static int + do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) + { ++ if (interrupts_enabled(regs)) ++ local_irq_enable(); ++ + do_bad_area(addr, fsr, regs); + return 0; + } +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index 1023e896d46b8..29fcf54cf68ad 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -95,6 +95,7 @@ config ARM64 + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 + select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_SUPPORTS_PAGE_TABLE_CHECK ++ select ARCH_SUPPORTS_RT + select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT + select ARCH_WANT_DEFAULT_BPF_JIT + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT +@@ -207,6 +208,7 @@ config ARM64 + select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_DYNAMIC_KEY + select HAVE_REGS_AND_STACK_ACCESS_API ++ select HAVE_PREEMPT_LAZY + select HAVE_POSIX_CPU_TIMERS_TASK_WORK + select HAVE_FUNCTION_ARG_ACCESS_API + select MMU_GATHER_RCU_TABLE_FREE +diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h +index 0159b625cc7f0..a5486918e5eeb 100644 +--- a/arch/arm64/include/asm/preempt.h ++++ b/arch/arm64/include/asm/preempt.h +@@ -71,13 +71,36 @@ static inline bool __preempt_count_dec_and_test(void) + * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE + * pair. + */ +- return !pc || !READ_ONCE(ti->preempt_count); ++ if (!pc || !READ_ONCE(ti->preempt_count)) ++ return true; ++#ifdef CONFIG_PREEMPT_LAZY ++ if ((pc & ~PREEMPT_NEED_RESCHED)) ++ return false; ++ if (current_thread_info()->preempt_lazy_count) ++ return false; ++ return test_thread_flag(TIF_NEED_RESCHED_LAZY); ++#else ++ return false; ++#endif + } + + static inline bool should_resched(int preempt_offset) + { ++#ifdef CONFIG_PREEMPT_LAZY ++ u64 pc = READ_ONCE(current_thread_info()->preempt_count); ++ if (pc == preempt_offset) ++ return true; ++ ++ if ((pc & ~PREEMPT_NEED_RESCHED) != preempt_offset) ++ return false; ++ ++ if (current_thread_info()->preempt_lazy_count) ++ return false; ++ return test_thread_flag(TIF_NEED_RESCHED_LAZY); ++#else + u64 pc = READ_ONCE(current_thread_info()->preempt_count); + return pc == preempt_offset; ++#endif + } + + #ifdef CONFIG_PREEMPTION +diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h +index 848739c15de82..4b7148fd5551f 100644 +--- a/arch/arm64/include/asm/thread_info.h ++++ b/arch/arm64/include/asm/thread_info.h +@@ -26,6 +26,7 @@ struct thread_info { + #ifdef CONFIG_ARM64_SW_TTBR0_PAN + u64 ttbr0; /* saved TTBR0_EL1 */ + #endif ++ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ + union { + u64 preempt_count; /* 0 => preemptible, <0 => bug */ + struct { +@@ -68,6 +69,7 @@ int arch_dup_task_struct(struct task_struct *dst, + #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ + #define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */ + #define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */ ++#define TIF_NEED_RESCHED_LAZY 7 + #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ + #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ + #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ +@@ -100,8 +102,10 @@ int arch_dup_task_struct(struct task_struct *dst, + #define _TIF_SVE (1 << TIF_SVE) + #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) + #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) ++#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) + +-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ ++#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ ++ _TIF_SIGPENDING | \ + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ + _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ + _TIF_NOTIFY_SIGNAL) +@@ -110,6 +114,8 @@ int arch_dup_task_struct(struct task_struct *dst, + _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ + _TIF_SYSCALL_EMU) + ++#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) ++ + #ifdef CONFIG_SHADOW_CALL_STACK + #define INIT_SCS \ + .scs_base = init_shadow_call_stack, \ +diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c +index ae345b06e9f7e..da7e10ff11a81 100644 +--- a/arch/arm64/kernel/asm-offsets.c ++++ b/arch/arm64/kernel/asm-offsets.c +@@ -33,6 +33,7 @@ int main(void) + DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu)); + DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); + DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); ++ DEFINE(TSK_TI_PREEMPT_LAZY, offsetof(struct task_struct, thread_info.preempt_lazy_count)); + #ifdef CONFIG_ARM64_SW_TTBR0_PAN + DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); + #endif +diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c +index 06a02707f4882..e7f5284d5daf1 100644 +--- a/arch/arm64/kernel/signal.c ++++ b/arch/arm64/kernel/signal.c +@@ -1278,7 +1278,7 @@ static void do_signal(struct pt_regs *regs) + void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) + { + do { +- if (thread_flags & _TIF_NEED_RESCHED) { ++ if (thread_flags & _TIF_NEED_RESCHED_MASK) { + /* Unmask Debug and SError for the next task */ + local_daif_restore(DAIF_PROCCTX_NOIRQ); + +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig +index a6c4407d3ec83..25f98b854d32f 100644 +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -154,6 +154,7 @@ config PPC + select ARCH_STACKWALK + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x ++ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_USE_MEMTEST +@@ -247,8 +248,10 @@ config PPC + select HAVE_PERF_EVENTS_NMI if PPC64 + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP ++ select HAVE_PREEMPT_LAZY + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE ++ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM + select HAVE_RSEQ + select HAVE_SETUP_PER_CPU_AREA if PPC64 + select HAVE_SOFTIRQ_ON_OWN_STACK +diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h +index 283c346478565..4727f40052ddd 100644 +--- a/arch/powerpc/include/asm/stackprotector.h ++++ b/arch/powerpc/include/asm/stackprotector.h +@@ -19,8 +19,13 @@ + */ + static __always_inline void boot_init_stack_canary(void) + { +- unsigned long canary = get_random_canary(); ++ unsigned long canary; + ++#ifndef CONFIG_PREEMPT_RT ++ canary = get_random_canary(); ++#else ++ canary = ((unsigned long)&canary) & CANARY_MASK; ++#endif + current->stack_canary = canary; + #ifdef CONFIG_PPC64 + get_paca()->canary = canary; +diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h +index af58f1ed3952e..520864de8bb27 100644 +--- a/arch/powerpc/include/asm/thread_info.h ++++ b/arch/powerpc/include/asm/thread_info.h +@@ -53,6 +53,8 @@ + struct thread_info { + int preempt_count; /* 0 => preemptable, + <0 => BUG */ ++ int preempt_lazy_count; /* 0 => preemptable, ++ <0 => BUG */ + #ifdef CONFIG_SMP + unsigned int cpu; + #endif +@@ -77,6 +79,7 @@ struct thread_info { + #define INIT_THREAD_INFO(tsk) \ + { \ + .preempt_count = INIT_PREEMPT_COUNT, \ ++ .preempt_lazy_count = 0, \ + .flags = 0, \ + } + +@@ -102,6 +105,7 @@ void arch_setup_new_exec(void); + #define TIF_PATCH_PENDING 6 /* pending live patching update */ + #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ + #define TIF_SINGLESTEP 8 /* singlestepping active */ ++#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ + #define TIF_SECCOMP 10 /* secure computing */ + #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ + #define TIF_NOERROR 12 /* Force successful syscall return */ +@@ -117,6 +121,7 @@ void arch_setup_new_exec(void); + #define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */ + #define TIF_32BIT 20 /* 32 bit binary */ + ++ + /* as above, but as bit values */ + #define _TIF_SYSCALL_TRACE (1<msr & MSR_EE)); + again: +- if (IS_ENABLED(CONFIG_PREEMPT)) { ++ if (IS_ENABLED(CONFIG_PREEMPTION)) { + /* Return to preemptible kernel context */ + if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) { + if (preempt_count() == 0) + preempt_schedule_irq(); ++ } else if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED_LAZY)) { ++ if ((preempt_count() == 0) && ++ (current_thread_info()->preempt_lazy_count == 0)) ++ preempt_schedule_irq(); + } + } + +diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c +index 9bdd79aa51cfc..038f8355b29ca 100644 +--- a/arch/powerpc/kernel/traps.c ++++ b/arch/powerpc/kernel/traps.c +@@ -261,12 +261,17 @@ static char *get_mmu_str(void) + + static int __die(const char *str, struct pt_regs *regs, long err) + { ++ const char *pr = ""; ++ + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); + ++ if (IS_ENABLED(CONFIG_PREEMPTION)) ++ pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT"; ++ + printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n", + IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE", + PAGE_SIZE / 1024, get_mmu_str(), +- IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", ++ pr, + IS_ENABLED(CONFIG_SMP) ? " SMP" : "", + IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "", + debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", +diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig +index a9f57dad6d916..a0b528d4bb7cd 100644 +--- a/arch/powerpc/kvm/Kconfig ++++ b/arch/powerpc/kvm/Kconfig +@@ -225,6 +225,7 @@ config KVM_E500MC + config KVM_MPIC + bool "KVM in-kernel MPIC emulation" + depends on KVM && PPC_E500 ++ depends on !PREEMPT_RT + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD + select HAVE_KVM_IRQ_ROUTING +diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c +index 9d229ef7f86ef..ada817c49b722 100644 +--- a/arch/powerpc/perf/imc-pmu.c ++++ b/arch/powerpc/perf/imc-pmu.c +@@ -51,7 +51,7 @@ static int trace_imc_mem_size; + * core and trace-imc + */ + static struct imc_pmu_ref imc_global_refc = { +- .lock = __SPIN_LOCK_INITIALIZER(imc_global_refc.lock), ++ .lock = __SPIN_LOCK_UNLOCKED(imc_global_refc.lock), + .id = 0, + .refc = 0, + }; +diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig +index 21b22bf16ce66..b506377a16a74 100644 +--- a/arch/powerpc/platforms/pseries/Kconfig ++++ b/arch/powerpc/platforms/pseries/Kconfig +@@ -2,6 +2,7 @@ + config PPC_PSERIES + depends on PPC64 && PPC_BOOK3S + bool "IBM pSeries & new (POWER5-based) iSeries" ++ select GENERIC_ALLOCATOR + select HAVE_PCSPKR_PLATFORM + select MPIC + select OF_DYNAMIC +diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c +index c74b71d4733d4..64ba14baabd30 100644 +--- a/arch/powerpc/platforms/pseries/iommu.c ++++ b/arch/powerpc/platforms/pseries/iommu.c +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -195,7 +196,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, + return ret; + } + +-static DEFINE_PER_CPU(__be64 *, tce_page); ++struct tce_page { ++ __be64 * page; ++ local_lock_t lock; ++}; ++static DEFINE_PER_CPU(struct tce_page, tce_page) = { ++ .lock = INIT_LOCAL_LOCK(lock), ++}; + + static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + long npages, unsigned long uaddr, +@@ -218,9 +225,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + direction, attrs); + } + +- local_irq_save(flags); /* to protect tcep and the page behind it */ ++ /* to protect tcep and the page behind it */ ++ local_lock_irqsave(&tce_page.lock, flags); + +- tcep = __this_cpu_read(tce_page); ++ tcep = __this_cpu_read(tce_page.page); + + /* This is safe to do since interrupts are off when we're called + * from iommu_alloc{,_sg}() +@@ -229,12 +237,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + tcep = (__be64 *)__get_free_page(GFP_ATOMIC); + /* If allocation fails, fall back to the loop implementation */ + if (!tcep) { +- local_irq_restore(flags); ++ local_unlock_irqrestore(&tce_page.lock, flags); + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tceshift, + npages, uaddr, direction, attrs); + } +- __this_cpu_write(tce_page, tcep); ++ __this_cpu_write(tce_page.page, tcep); + } + + rpn = __pa(uaddr) >> tceshift; +@@ -264,7 +272,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + tcenum += limit; + } while (npages > 0 && !rc); + +- local_irq_restore(flags); ++ local_unlock_irqrestore(&tce_page.lock, flags); + + if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { + ret = (int)rc; +@@ -440,16 +448,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, + DMA_BIDIRECTIONAL, 0); + } + +- local_irq_disable(); /* to protect tcep and the page behind it */ +- tcep = __this_cpu_read(tce_page); ++ /* to protect tcep and the page behind it */ ++ local_lock_irq(&tce_page.lock); ++ tcep = __this_cpu_read(tce_page.page); + + if (!tcep) { + tcep = (__be64 *)__get_free_page(GFP_ATOMIC); + if (!tcep) { +- local_irq_enable(); ++ local_unlock_irq(&tce_page.lock); + return -ENOMEM; + } +- __this_cpu_write(tce_page, tcep); ++ __this_cpu_write(tce_page.page, tcep); + } + + proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; +@@ -492,7 +501,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, + + /* error cleanup: caller will clear whole range */ + +- local_irq_enable(); ++ local_unlock_irq(&tce_page.lock); + return rc; + } + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index a825bf031f495..dcbf3c08926b1 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -114,6 +114,7 @@ config X86 + select ARCH_USES_CFI_TRAPS if X86_64 && CFI_CLANG + select ARCH_SUPPORTS_LTO_CLANG + select ARCH_SUPPORTS_LTO_CLANG_THIN ++ select ARCH_SUPPORTS_RT + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_MEMTEST + select ARCH_USE_QUEUED_RWLOCKS +@@ -252,6 +253,7 @@ config X86 + select HAVE_PCI + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP ++ select HAVE_PREEMPT_LAZY + select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT + select MMU_GATHER_MERGE_VMAS + select HAVE_POSIX_CPU_TIMERS_TASK_WORK +diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h +index 2d13f25b1bd8f..5b096893f6a21 100644 +--- a/arch/x86/include/asm/preempt.h ++++ b/arch/x86/include/asm/preempt.h +@@ -90,18 +90,49 @@ static __always_inline void __preempt_count_sub(int val) + * a decrement which hits zero means we have no preempt_count and should + * reschedule. + */ +-static __always_inline bool __preempt_count_dec_and_test(void) ++static __always_inline bool ____preempt_count_dec_and_test(void) + { + return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e, + __percpu_arg([var])); + } + ++static __always_inline bool __preempt_count_dec_and_test(void) ++{ ++ if (____preempt_count_dec_and_test()) ++ return true; ++#ifdef CONFIG_PREEMPT_LAZY ++ if (preempt_count()) ++ return false; ++ if (current_thread_info()->preempt_lazy_count) ++ return false; ++ return test_thread_flag(TIF_NEED_RESCHED_LAZY); ++#else ++ return false; ++#endif ++} ++ + /* + * Returns true when we need to resched and can (barring IRQ state). + */ + static __always_inline bool should_resched(int preempt_offset) + { ++#ifdef CONFIG_PREEMPT_LAZY ++ u32 tmp; ++ tmp = raw_cpu_read_4(pcpu_hot.preempt_count); ++ if (tmp == preempt_offset) ++ return true; ++ ++ /* preempt count == 0 ? */ ++ tmp &= ~PREEMPT_NEED_RESCHED; ++ if (tmp != preempt_offset) ++ return false; ++ /* XXX PREEMPT_LOCK_OFFSET */ ++ if (current_thread_info()->preempt_lazy_count) ++ return false; ++ return test_thread_flag(TIF_NEED_RESCHED_LAZY); ++#else + return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset); ++#endif + } + + #ifdef CONFIG_PREEMPTION +diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h +index f1cccba52eb97..c8697ca0378f4 100644 +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -57,6 +57,8 @@ struct thread_info { + unsigned long flags; /* low level flags */ + unsigned long syscall_work; /* SYSCALL_WORK_ flags */ + u32 status; /* thread synchronous flags */ ++ int preempt_lazy_count; /* 0 => lazy preemptable ++ <0 => BUG */ + #ifdef CONFIG_SMP + u32 cpu; /* current CPU */ + #endif +@@ -65,6 +67,7 @@ struct thread_info { + #define INIT_THREAD_INFO(tsk) \ + { \ + .flags = 0, \ ++ .preempt_lazy_count = 0, \ + } + + #else /* !__ASSEMBLY__ */ +@@ -92,6 +95,7 @@ struct thread_info { + #define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ + #define TIF_NOTSC 16 /* TSC is not accessible in userland */ + #define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */ ++#define TIF_NEED_RESCHED_LAZY 19 /* lazy rescheduling necessary */ + #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ + #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ + #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ +@@ -115,6 +119,7 @@ struct thread_info { + #define _TIF_NOCPUID (1 << TIF_NOCPUID) + #define _TIF_NOTSC (1 << TIF_NOTSC) + #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) ++#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) + #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) + #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) + #define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) +diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c +index aa490da3cef23..d73b6d32bd827 100644 +--- a/drivers/block/zram/zram_drv.c ++++ b/drivers/block/zram/zram_drv.c +@@ -57,6 +57,40 @@ static void zram_free_page(struct zram *zram, size_t index); + static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset, struct bio *bio); + ++#ifdef CONFIG_PREEMPT_RT ++static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) ++{ ++ size_t index; ++ ++ for (index = 0; index < num_pages; index++) ++ spin_lock_init(&zram->table[index].lock); ++} ++ ++static int zram_slot_trylock(struct zram *zram, u32 index) ++{ ++ int ret; ++ ++ ret = spin_trylock(&zram->table[index].lock); ++ if (ret) ++ __set_bit(ZRAM_LOCK, &zram->table[index].flags); ++ return ret; ++} ++ ++static void zram_slot_lock(struct zram *zram, u32 index) ++{ ++ spin_lock(&zram->table[index].lock); ++ __set_bit(ZRAM_LOCK, &zram->table[index].flags); ++} ++ ++static void zram_slot_unlock(struct zram *zram, u32 index) ++{ ++ __clear_bit(ZRAM_LOCK, &zram->table[index].flags); ++ spin_unlock(&zram->table[index].lock); ++} ++ ++#else ++ ++static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { } + + static int zram_slot_trylock(struct zram *zram, u32 index) + { +@@ -72,6 +106,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index) + { + bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); + } ++#endif + + static inline bool init_done(struct zram *zram) + { +@@ -1311,6 +1346,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) + + if (!huge_class_size) + huge_class_size = zs_huge_class_size(zram->mem_pool); ++ zram_meta_init_table_locks(zram, num_pages); + return true; + } + +diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h +index c5254626f051f..a6b24dfec95fb 100644 +--- a/drivers/block/zram/zram_drv.h ++++ b/drivers/block/zram/zram_drv.h +@@ -69,6 +69,9 @@ struct zram_table_entry { + unsigned long element; + }; + unsigned long flags; ++#ifdef CONFIG_PREEMPT_RT ++ spinlock_t lock; ++#endif + #ifdef CONFIG_ZRAM_MEMORY_TRACKING + ktime_t ac_time; + #endif +diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c +index ed5dabd3c72d6..450d7985ff346 100644 +--- a/drivers/char/tpm/tpm_tis.c ++++ b/drivers/char/tpm/tpm_tis.c +@@ -50,6 +50,45 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da + return container_of(data, struct tpm_tis_tcg_phy, priv); + } + ++#ifdef CONFIG_PREEMPT_RT ++/* ++ * Flush previous write operations with a dummy read operation to the ++ * TPM MMIO base address. ++ */ ++static inline void tpm_tis_flush(void __iomem *iobase) ++{ ++ ioread8(iobase + TPM_ACCESS(0)); ++} ++#else ++#define tpm_tis_flush(iobase) do { } while (0) ++#endif ++ ++/* ++ * Write a byte word to the TPM MMIO address, and flush the write queue. ++ * The flush ensures that the data is sent immediately over the bus and not ++ * aggregated with further requests and transferred later in a batch. The large ++ * write requests can lead to unwanted latency spikes by blocking the CPU until ++ * the complete batch has been transferred. ++ */ ++static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr) ++{ ++ iowrite8(b, iobase + addr); ++ tpm_tis_flush(iobase); ++} ++ ++/* ++ * Write a 32-bit word to the TPM MMIO address, and flush the write queue. ++ * The flush ensures that the data is sent immediately over the bus and not ++ * aggregated with further requests and transferred later in a batch. The large ++ * write requests can lead to unwanted latency spikes by blocking the CPU until ++ * the complete batch has been transferred. ++ */ ++static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr) ++{ ++ iowrite32(b, iobase + addr); ++ tpm_tis_flush(iobase); ++} ++ + static int interrupts = -1; + module_param(interrupts, int, 0444); + MODULE_PARM_DESC(interrupts, "Enable interrupts"); +@@ -186,12 +225,12 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len, + switch (io_mode) { + case TPM_TIS_PHYS_8: + while (len--) +- iowrite8(*value++, phy->iobase + addr); ++ tpm_tis_iowrite8(*value++, phy->iobase, addr); + break; + case TPM_TIS_PHYS_16: + return -EINVAL; + case TPM_TIS_PHYS_32: +- iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase + addr); ++ tpm_tis_iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase, addr); + break; + } + +diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig +index 98f4e44976e09..d4dba1f89fde5 100644 +--- a/drivers/gpu/drm/i915/Kconfig ++++ b/drivers/gpu/drm/i915/Kconfig +@@ -3,7 +3,6 @@ config DRM_I915 + tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics" + depends on DRM + depends on X86 && PCI +- depends on !PREEMPT_RT + select INTEL_GTT if X86 + select INTERVAL_TREE + # we need shmfs for the swappable backing store, and in particular +diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c +index d5b5d40ed817f..710e51d2377fe 100644 +--- a/drivers/gpu/drm/i915/display/intel_crtc.c ++++ b/drivers/gpu/drm/i915/display/intel_crtc.c +@@ -520,7 +520,8 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state) + */ + intel_psr_wait_for_idle_locked(new_crtc_state); + +- local_irq_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_disable(); + + crtc->debug.min_vbl = min; + crtc->debug.max_vbl = max; +@@ -545,11 +546,13 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state) + break; + } + +- local_irq_enable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_enable(); + + timeout = schedule_timeout(timeout); + +- local_irq_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_disable(); + } + + finish_wait(wq, &wait); +@@ -582,7 +585,8 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state) + return; + + irq_disable: +- local_irq_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_disable(); + } + + #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE) +@@ -691,7 +695,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) + if (new_crtc_state->seamless_m_n && intel_crtc_needs_fastset(new_crtc_state)) + intel_crtc_update_active_timings(new_crtc_state); + +- local_irq_enable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_enable(); + + if (intel_vgpu_active(dev_priv)) + return; +diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c +index 4c83e2320bcac..2dd4ac8b30266 100644 +--- a/drivers/gpu/drm/i915/display/intel_vblank.c ++++ b/drivers/gpu/drm/i915/display/intel_vblank.c +@@ -293,7 +293,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, + */ + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + +- /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); + + /* Get optional system timestamp before query. */ + if (stime) +@@ -358,7 +359,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, + if (etime) + *etime = ktime_get(); + +- /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable(); + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + +diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +index ecc990ec1b952..8d04b10681f0d 100644 +--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c ++++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +@@ -312,10 +312,9 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) + /* Kick the work once more to drain the signalers, and disarm the irq */ + irq_work_sync(&b->irq_work); + while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { +- local_irq_disable(); +- signal_irq_work(&b->irq_work); +- local_irq_enable(); ++ irq_work_queue(&b->irq_work); + cond_resched(); ++ irq_work_sync(&b->irq_work); + } + } + +diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +index 750326434677f..a2658a8ff7353 100644 +--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c ++++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +@@ -1303,7 +1303,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) + * and context switches) submission. + */ + +- spin_lock(&sched_engine->lock); ++ spin_lock_irq(&sched_engine->lock); + + /* + * If the queue is higher priority than the last +@@ -1403,7 +1403,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) + * Even if ELSP[1] is occupied and not worthy + * of timeslices, our queue might be. + */ +- spin_unlock(&sched_engine->lock); ++ spin_unlock_irq(&sched_engine->lock); + return; + } + } +@@ -1429,7 +1429,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) + + if (last && !can_merge_rq(last, rq)) { + spin_unlock(&ve->base.sched_engine->lock); +- spin_unlock(&engine->sched_engine->lock); ++ spin_unlock_irq(&engine->sched_engine->lock); + return; /* leave this for another sibling */ + } + +@@ -1591,7 +1591,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) + */ + sched_engine->queue_priority_hint = queue_prio(sched_engine); + i915_sched_engine_reset_on_empty(sched_engine); +- spin_unlock(&sched_engine->lock); ++ spin_unlock_irq(&sched_engine->lock); + + /* + * We can skip poking the HW if we ended up with exactly the same set +@@ -1617,13 +1617,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) + } + } + +-static void execlists_dequeue_irq(struct intel_engine_cs *engine) +-{ +- local_irq_disable(); /* Suspend interrupts across request submission */ +- execlists_dequeue(engine); +- local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ +-} +- + static void clear_ports(struct i915_request **ports, int count) + { + memset_p((void **)ports, NULL, count); +@@ -2477,7 +2470,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t) + } + + if (!engine->execlists.pending[0]) { +- execlists_dequeue_irq(engine); ++ execlists_dequeue(engine); + start_timeslice(engine); + } + +diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c +index 7503dcb9043bb..799fb8083470f 100644 +--- a/drivers/gpu/drm/i915/i915_request.c ++++ b/drivers/gpu/drm/i915/i915_request.c +@@ -613,7 +613,6 @@ bool __i915_request_submit(struct i915_request *request) + + RQ_TRACE(request, "\n"); + +- GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&engine->sched_engine->lock); + + /* +@@ -722,7 +721,6 @@ void __i915_request_unsubmit(struct i915_request *request) + */ + RQ_TRACE(request, "\n"); + +- GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&engine->sched_engine->lock); + + /* +diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h +index f6f9228a13518..0ff1b60be8382 100644 +--- a/drivers/gpu/drm/i915/i915_trace.h ++++ b/drivers/gpu/drm/i915/i915_trace.h +@@ -6,6 +6,10 @@ + #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) + #define _I915_TRACE_H_ + ++#ifdef CONFIG_PREEMPT_RT ++#define NOTRACE ++#endif ++ + #include + #include + #include +@@ -322,7 +326,7 @@ DEFINE_EVENT(i915_request, i915_request_add, + TP_ARGS(rq) + ); + +-#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) ++#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE) + DEFINE_EVENT(i915_request, i915_request_guc_submit, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) +diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h +index 2c430c0c3badd..7ec828637d622 100644 +--- a/drivers/gpu/drm/i915/i915_utils.h ++++ b/drivers/gpu/drm/i915/i915_utils.h +@@ -288,7 +288,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) + #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) + + /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ +-#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) ++#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT) + # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) + #else + # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) +diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h +index 1e8fe44a7099f..b336d00d7988e 100644 +--- a/drivers/tty/serial/8250/8250.h ++++ b/drivers/tty/serial/8250/8250.h +@@ -177,12 +177,277 @@ static inline void serial_dl_write(struct uart_8250_port *up, int value) + up->dl_write(up, value); + } + ++static inline bool serial8250_is_console(struct uart_port *port) ++{ ++ return uart_console(port) && !hlist_unhashed_lockless(&port->cons->node); ++} ++ ++/** ++ * serial8250_init_wctxt - Initialize a write context for ++ * non-console-printing usage ++ * @wctxt: The write context to initialize ++ * @cons: The console to assign to the write context ++ * ++ * In order to mark an unsafe region, drivers must acquire the console. This ++ * requires providing an initialized write context (even if that driver will ++ * not be doing any printing). ++ * ++ * This function should not be used for console printing contexts. ++ */ ++static inline void serial8250_init_wctxt(struct cons_write_context *wctxt, ++ struct console *cons) ++{ ++ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); ++ ++ memset(wctxt, 0, sizeof(*wctxt)); ++ ctxt->console = cons; ++ ctxt->prio = CONS_PRIO_NORMAL; ++} ++ ++/** ++ * __serial8250_console_acquire - Acquire a console for ++ * non-console-printing usage ++ * @wctxt: An uninitialized write context to use for acquiring ++ * @cons: The console to assign to the write context ++ * ++ * The caller is holding the port->lock. ++ * The caller is holding the console_srcu_read_lock. ++ * ++ * This function should not be used for console printing contexts. ++ */ ++static inline void __serial8250_console_acquire(struct cons_write_context *wctxt, ++ struct console *cons) ++{ ++ for (;;) { ++ serial8250_init_wctxt(wctxt, cons); ++ if (console_try_acquire(wctxt)) ++ break; ++ cpu_relax(); ++ } ++} ++ ++/** ++ * serial8250_enter_unsafe - Mark the beginning of an unsafe region for ++ * non-console-printing usage ++ * @up: The port that is entering the unsafe state ++ * ++ * The caller should ensure @up is a console before calling this function. ++ * ++ * The caller is holding the port->lock. ++ * This function takes the console_srcu_read_lock and becomes owner of the ++ * console associated with @up. ++ * ++ * This function should not be used for console printing contexts. ++ */ ++static inline void serial8250_enter_unsafe(struct uart_8250_port *up) ++{ ++ struct uart_port *port = &up->port; ++ ++ lockdep_assert_held_once(&port->lock); ++ ++ for (;;) { ++ up->cookie = console_srcu_read_lock(); ++ ++ __serial8250_console_acquire(&up->wctxt, port->cons); ++ ++ if (console_enter_unsafe(&up->wctxt)) ++ break; ++ ++ console_srcu_read_unlock(up->cookie); ++ cpu_relax(); ++ } ++} ++ ++/** ++ * serial8250_exit_unsafe - Mark the end of an unsafe region for ++ * non-console-printing usage ++ * @up: The port that is exiting the unsafe state ++ * ++ * The caller is holding the port->lock. ++ * This function releases ownership of the console associated with @up and ++ * releases the console_srcu_read_lock. ++ * ++ * This function should not be used for console printing contexts. ++ */ ++static inline void serial8250_exit_unsafe(struct uart_8250_port *up) ++{ ++ struct uart_port *port = &up->port; ++ ++ lockdep_assert_held_once(&port->lock); ++ ++ if (console_exit_unsafe(&up->wctxt)) ++ console_release(&up->wctxt); ++ ++ console_srcu_read_unlock(up->cookie); ++} ++ ++/** ++ * serial8250_in_IER - Read the IER register for ++ * non-console-printing usage ++ * @up: The port to work on ++ * ++ * Returns: The value read from IER ++ * ++ * The caller is holding the port->lock. ++ * ++ * This is the top-level function for non-console-printing contexts to ++ * read the IER register. The caller does not need to care if @up is a ++ * console before calling this function. ++ * ++ * This function should not be used for printing contexts. ++ */ ++static inline int serial8250_in_IER(struct uart_8250_port *up) ++{ ++ struct uart_port *port = &up->port; ++ bool is_console; ++ int ier; ++ ++ is_console = serial8250_is_console(port); ++ ++ if (is_console) ++ serial8250_enter_unsafe(up); ++ ++ ier = serial_in(up, UART_IER); ++ ++ if (is_console) ++ serial8250_exit_unsafe(up); ++ ++ return ier; ++} ++ ++/** ++ * __serial8250_set_IER - Directly write to the IER register ++ * @up: The port to work on ++ * @wctxt: The current write context ++ * @ier: The value to write ++ * ++ * Returns: True if IER was written to. False otherwise ++ * ++ * The caller is holding the port->lock. ++ * The caller is holding the console_srcu_read_unlock. ++ * The caller is the owner of the console associated with @up. ++ * ++ * This function should only be directly called within console printing ++ * contexts. Other contexts should use serial8250_set_IER(). ++ */ ++static inline bool __serial8250_set_IER(struct uart_8250_port *up, ++ struct cons_write_context *wctxt, ++ int ier) ++{ ++ if (wctxt && !console_can_proceed(wctxt)) ++ return false; ++ serial_out(up, UART_IER, ier); ++ return true; ++} ++ ++/** ++ * serial8250_set_IER - Write a new value to the IER register for ++ * non-console-printing usage ++ * @up: The port to work on ++ * @ier: The value to write ++ * ++ * The caller is holding the port->lock. ++ * ++ * This is the top-level function for non-console-printing contexts to ++ * write to the IER register. The caller does not need to care if @up is a ++ * console before calling this function. ++ * ++ * This function should not be used for printing contexts. ++ */ ++static inline void serial8250_set_IER(struct uart_8250_port *up, int ier) ++{ ++ struct uart_port *port = &up->port; ++ bool is_console; ++ ++ is_console = serial8250_is_console(port); ++ ++ if (is_console) { ++ serial8250_enter_unsafe(up); ++ while (!__serial8250_set_IER(up, &up->wctxt, ier)) { ++ console_srcu_read_unlock(up->cookie); ++ console_enter_unsafe(&up->wctxt); ++ } ++ serial8250_exit_unsafe(up); ++ } else { ++ __serial8250_set_IER(up, NULL, ier); ++ } ++} ++ ++/** ++ * __serial8250_clear_IER - Directly clear the IER register ++ * @up: The port to work on ++ * @wctxt: The current write context ++ * @prior: Gets set to the previous value of IER ++ * ++ * Returns: True if IER was cleared and @prior points to the previous ++ * value of IER. False otherwise and @prior is invalid ++ * ++ * The caller is holding the port->lock. ++ * The caller is holding the console_srcu_read_unlock. ++ * The caller is the owner of the console associated with @up. ++ * ++ * This function should only be directly called within console printing ++ * contexts. Other contexts should use serial8250_clear_IER(). ++ */ ++static inline bool __serial8250_clear_IER(struct uart_8250_port *up, ++ struct cons_write_context *wctxt, ++ int *prior) ++{ ++ unsigned int clearval = 0; ++ ++ if (up->capabilities & UART_CAP_UUE) ++ clearval = UART_IER_UUE; ++ ++ *prior = serial_in(up, UART_IER); ++ if (wctxt && !console_can_proceed(wctxt)) ++ return false; ++ serial_out(up, UART_IER, clearval); ++ return true; ++} ++ ++/** ++ * serial8250_clear_IER - Clear the IER register for ++ * non-console-printing usage ++ * @up: The port to work on ++ * ++ * Returns: The previous value of IER ++ * ++ * The caller is holding the port->lock. ++ * ++ * This is the top-level function for non-console-printing contexts to ++ * clear the IER register. The caller does not need to care if @up is a ++ * console before calling this function. ++ * ++ * This function should not be used for printing contexts. ++ */ ++static inline int serial8250_clear_IER(struct uart_8250_port *up) ++{ ++ struct uart_port *port = &up->port; ++ bool is_console; ++ int prior; ++ ++ is_console = serial8250_is_console(port); ++ ++ if (is_console) { ++ serial8250_enter_unsafe(up); ++ while (!__serial8250_clear_IER(up, &up->wctxt, &prior)) { ++ console_srcu_read_unlock(up->cookie); ++ console_enter_unsafe(&up->wctxt); ++ } ++ serial8250_exit_unsafe(up); ++ } else { ++ __serial8250_clear_IER(up, NULL, &prior); ++ } ++ ++ return prior; ++} ++ + static inline bool serial8250_set_THRI(struct uart_8250_port *up) + { + if (up->ier & UART_IER_THRI) + return false; + up->ier |= UART_IER_THRI; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + return true; + } + +@@ -191,7 +456,7 @@ static inline bool serial8250_clear_THRI(struct uart_8250_port *up) + if (!(up->ier & UART_IER_THRI)) + return false; + up->ier &= ~UART_IER_THRI; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + return true; + } + +diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c +index 9d2a7856784f7..7cc6b527c088b 100644 +--- a/drivers/tty/serial/8250/8250_aspeed_vuart.c ++++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c +@@ -278,7 +278,7 @@ static void __aspeed_vuart_set_throttle(struct uart_8250_port *up, + up->ier &= ~irqs; + if (!throttle) + up->ier |= irqs; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + } + static void aspeed_vuart_set_throttle(struct uart_port *port, bool throttle) + { +diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c +index f801b1f5b46c0..a29f5f45d22f2 100644 +--- a/drivers/tty/serial/8250/8250_bcm7271.c ++++ b/drivers/tty/serial/8250/8250_bcm7271.c +@@ -606,8 +606,10 @@ static int brcmuart_startup(struct uart_port *port) + * Disable the Receive Data Interrupt because the DMA engine + * will handle this. + */ ++ spin_lock_irq(&port->lock); + up->ier &= ~UART_IER_RDI; +- serial_port_out(port, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); ++ spin_unlock_irq(&port->lock); + + priv->tx_running = false; + priv->dma.rx_dma = NULL; +@@ -787,6 +789,12 @@ static int brcmuart_handle_irq(struct uart_port *p) + spin_lock_irqsave(&p->lock, flags); + status = serial_port_in(p, UART_LSR); + if ((status & UART_LSR_DR) == 0) { ++ bool is_console; ++ ++ is_console = serial8250_is_console(p); ++ ++ if (is_console) ++ serial8250_enter_unsafe(up); + + ier = serial_port_in(p, UART_IER); + /* +@@ -807,6 +815,9 @@ static int brcmuart_handle_irq(struct uart_port *p) + serial_port_in(p, UART_RX); + } + ++ if (is_console) ++ serial8250_exit_unsafe(up); ++ + handled = 1; + } + spin_unlock_irqrestore(&p->lock, flags); +@@ -844,12 +855,22 @@ static enum hrtimer_restart brcmuart_hrtimer_func(struct hrtimer *t) + /* re-enable receive unless upper layer has disabled it */ + if ((up->ier & (UART_IER_RLSI | UART_IER_RDI)) == + (UART_IER_RLSI | UART_IER_RDI)) { ++ bool is_console; ++ ++ is_console = serial8250_is_console(p); ++ ++ if (is_console) ++ serial8250_enter_unsafe(up); ++ + status = serial_port_in(p, UART_IER); + status |= (UART_IER_RLSI | UART_IER_RDI); + serial_port_out(p, UART_IER, status); + status = serial_port_in(p, UART_MCR); + status |= UART_MCR_RTS; + serial_port_out(p, UART_MCR, status); ++ ++ if (is_console) ++ serial8250_exit_unsafe(up); + } + spin_unlock_irqrestore(&p->lock, flags); + return HRTIMER_NORESTART; +diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c +index ab63c308be0a2..8e89bffa1f121 100644 +--- a/drivers/tty/serial/8250/8250_core.c ++++ b/drivers/tty/serial/8250/8250_core.c +@@ -256,6 +256,7 @@ static void serial8250_timeout(struct timer_list *t) + static void serial8250_backup_timeout(struct timer_list *t) + { + struct uart_8250_port *up = from_timer(up, t, timer); ++ struct uart_port *port = &up->port; + unsigned int iir, ier = 0, lsr; + unsigned long flags; + +@@ -266,8 +267,23 @@ static void serial8250_backup_timeout(struct timer_list *t) + * based handler. + */ + if (up->port.irq) { ++ bool is_console; ++ ++ /* ++ * Do not use serial8250_clear_IER() because this code ++ * ignores capabilties. ++ */ ++ ++ is_console = serial8250_is_console(port); ++ ++ if (is_console) ++ serial8250_enter_unsafe(up); ++ + ier = serial_in(up, UART_IER); + serial_out(up, UART_IER, 0); ++ ++ if (is_console) ++ serial8250_exit_unsafe(up); + } + + iir = serial_in(up, UART_IIR); +@@ -290,7 +306,7 @@ static void serial8250_backup_timeout(struct timer_list *t) + serial8250_tx_chars(up); + + if (up->port.irq) +- serial_out(up, UART_IER, ier); ++ serial8250_set_IER(up, ier); + + spin_unlock_irqrestore(&up->port.lock, flags); + +@@ -576,12 +592,30 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev) + + #ifdef CONFIG_SERIAL_8250_CONSOLE + +-static void univ8250_console_write(struct console *co, const char *s, +- unsigned int count) ++static void univ8250_console_port_lock(struct console *con, bool do_lock, unsigned long *flags) ++{ ++ struct uart_8250_port *up = &serial8250_ports[con->index]; ++ ++ if (do_lock) ++ spin_lock_irqsave(&up->port.lock, *flags); ++ else ++ spin_unlock_irqrestore(&up->port.lock, *flags); ++} ++ ++static bool univ8250_console_write_atomic(struct console *co, ++ struct cons_write_context *wctxt) + { + struct uart_8250_port *up = &serial8250_ports[co->index]; + +- serial8250_console_write(up, s, count); ++ return serial8250_console_write_atomic(up, wctxt); ++} ++ ++static bool univ8250_console_write_thread(struct console *co, ++ struct cons_write_context *wctxt) ++{ ++ struct uart_8250_port *up = &serial8250_ports[co->index]; ++ ++ return serial8250_console_write_thread(up, wctxt); + } + + static int univ8250_console_setup(struct console *co, char *options) +@@ -669,12 +703,14 @@ static int univ8250_console_match(struct console *co, char *name, int idx, + + static struct console univ8250_console = { + .name = "ttyS", +- .write = univ8250_console_write, ++ .write_atomic = univ8250_console_write_atomic, ++ .write_thread = univ8250_console_write_thread, ++ .port_lock = univ8250_console_port_lock, + .device = uart_console_device, + .setup = univ8250_console_setup, + .exit = univ8250_console_exit, + .match = univ8250_console_match, +- .flags = CON_PRINTBUFFER | CON_ANYTIME, ++ .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_NO_BKL, + .index = -1, + .data = &serial8250_reg, + }; +@@ -962,7 +998,7 @@ static void serial_8250_overrun_backoff_work(struct work_struct *work) + spin_lock_irqsave(&port->lock, flags); + up->ier |= UART_IER_RLSI | UART_IER_RDI; + up->port.read_status_mask |= UART_LSR_DR; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + spin_unlock_irqrestore(&port->lock, flags); + } + +diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c +index 64770c62bbec5..ccb70b20b1f4f 100644 +--- a/drivers/tty/serial/8250/8250_exar.c ++++ b/drivers/tty/serial/8250/8250_exar.c +@@ -185,6 +185,10 @@ static void xr17v35x_set_divisor(struct uart_port *p, unsigned int baud, + + static int xr17v35x_startup(struct uart_port *port) + { ++ struct uart_8250_port *up = up_to_u8250p(port); ++ ++ spin_lock_irq(&port->lock); ++ + /* + * First enable access to IER [7:5], ISR [5:4], FCR [5:4], + * MCR [7:5] and MSR [7:0] +@@ -195,7 +199,9 @@ static int xr17v35x_startup(struct uart_port *port) + * Make sure all interrups are masked until initialization is + * complete and the FIFOs are cleared + */ +- serial_port_out(port, UART_IER, 0); ++ serial8250_set_IER(up, 0); ++ ++ spin_unlock_irq(&port->lock); + + return serial8250_do_startup(port); + } +diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c +index 8adfaa183f778..eaf148245a10d 100644 +--- a/drivers/tty/serial/8250/8250_fsl.c ++++ b/drivers/tty/serial/8250/8250_fsl.c +@@ -58,7 +58,8 @@ int fsl8250_handle_irq(struct uart_port *port) + if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) { + unsigned long delay; + +- up->ier = port->serial_in(port, UART_IER); ++ up->ier = serial8250_in_IER(up); ++ + if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { + port->ops->stop_rx(port); + } else { +diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c +index fb1d5ec0940e6..bf7ab55c8923f 100644 +--- a/drivers/tty/serial/8250/8250_mtk.c ++++ b/drivers/tty/serial/8250/8250_mtk.c +@@ -222,12 +222,38 @@ static void mtk8250_shutdown(struct uart_port *port) + + static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask) + { +- serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask)); ++ struct uart_port *port = &up->port; ++ bool is_console; ++ int ier; ++ ++ is_console = serial8250_is_console(port); ++ ++ if (is_console) ++ serial8250_enter_unsafe(up); ++ ++ ier = serial_in(up, UART_IER); ++ serial_out(up, UART_IER, ier & (~mask)); ++ ++ if (is_console) ++ serial8250_exit_unsafe(up); + } + + static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask) + { +- serial_out(up, UART_IER, serial_in(up, UART_IER) | mask); ++ struct uart_port *port = &up->port; ++ bool is_console; ++ int ier; ++ ++ is_console = serial8250_is_console(port); ++ ++ if (is_console) ++ serial8250_enter_unsafe(up); ++ ++ ier = serial_in(up, UART_IER); ++ serial_out(up, UART_IER, ier | mask); ++ ++ if (is_console) ++ serial8250_exit_unsafe(up); + } + + static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) +diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c +index 734f092ef839a..bfa50a26349dd 100644 +--- a/drivers/tty/serial/8250/8250_omap.c ++++ b/drivers/tty/serial/8250/8250_omap.c +@@ -334,8 +334,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up) + + /* drop TCR + TLR access, we setup XON/XOFF later */ + serial8250_out_MCR(up, mcr); +- +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); + serial_dl_write(up, priv->quot); +@@ -523,16 +522,21 @@ static void omap_8250_pm(struct uart_port *port, unsigned int state, + u8 efr; + + pm_runtime_get_sync(port->dev); ++ ++ spin_lock_irq(&port->lock); ++ + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); + efr = serial_in(up, UART_EFR); + serial_out(up, UART_EFR, efr | UART_EFR_ECB); + serial_out(up, UART_LCR, 0); + +- serial_out(up, UART_IER, (state != 0) ? UART_IERX_SLEEP : 0); ++ serial8250_set_IER(up, (state != 0) ? UART_IERX_SLEEP : 0); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); + serial_out(up, UART_EFR, efr); + serial_out(up, UART_LCR, 0); + ++ spin_unlock_irq(&port->lock); ++ + pm_runtime_mark_last_busy(port->dev); + pm_runtime_put_autosuspend(port->dev); + } +@@ -649,7 +653,8 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) + if ((lsr & UART_LSR_OE) && up->overrun_backoff_time_ms > 0) { + unsigned long delay; + +- up->ier = port->serial_in(port, UART_IER); ++ spin_lock(&port->lock); ++ up->ier = serial8250_in_IER(up); + if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { + port->ops->stop_rx(port); + } else { +@@ -658,6 +663,7 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) + */ + cancel_delayed_work(&up->overrun_backoff); + } ++ spin_unlock(&port->lock); + + delay = msecs_to_jiffies(up->overrun_backoff_time_ms); + schedule_delayed_work(&up->overrun_backoff, delay); +@@ -707,8 +713,10 @@ static int omap_8250_startup(struct uart_port *port) + if (ret < 0) + goto err; + ++ spin_lock_irq(&port->lock); + up->ier = UART_IER_RLSI | UART_IER_RDI; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); ++ spin_unlock_irq(&port->lock); + + #ifdef CONFIG_PM + up->capabilities |= UART_CAP_RPM; +@@ -748,8 +756,10 @@ static void omap_8250_shutdown(struct uart_port *port) + if (priv->habit & UART_HAS_EFR2) + serial_out(up, UART_OMAP_EFR2, 0x0); + ++ spin_lock_irq(&port->lock); + up->ier = 0; +- serial_out(up, UART_IER, 0); ++ serial8250_set_IER(up, 0); ++ spin_unlock_irq(&port->lock); + + if (up->dma) + serial8250_release_dma(up); +@@ -797,7 +807,7 @@ static void omap_8250_unthrottle(struct uart_port *port) + up->dma->rx_dma(up); + up->ier |= UART_IER_RLSI | UART_IER_RDI; + port->read_status_mask |= UART_LSR_DR; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + spin_unlock_irqrestore(&port->lock, flags); + + pm_runtime_mark_last_busy(port->dev); +@@ -956,7 +966,7 @@ static void __dma_rx_complete(void *param) + __dma_rx_do_complete(p); + if (!priv->throttled) { + p->ier |= UART_IER_RLSI | UART_IER_RDI; +- serial_out(p, UART_IER, p->ier); ++ serial8250_set_IER(p, p->ier); + if (!(priv->habit & UART_HAS_EFR2)) + omap_8250_rx_dma(p); + } +@@ -1013,7 +1023,7 @@ static int omap_8250_rx_dma(struct uart_8250_port *p) + * callback to run. + */ + p->ier &= ~(UART_IER_RLSI | UART_IER_RDI); +- serial_out(p, UART_IER, p->ier); ++ serial8250_set_IER(p, p->ier); + } + goto out; + } +@@ -1226,12 +1236,12 @@ static void am654_8250_handle_rx_dma(struct uart_8250_port *up, u8 iir, + * periodic timeouts, re-enable interrupts. + */ + up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + omap_8250_rx_dma_flush(up); + serial_in(up, UART_IIR); + serial_out(up, UART_OMAP_EFR2, 0x0); + up->ier |= UART_IER_RLSI | UART_IER_RDI; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + } + } + +@@ -1717,12 +1727,16 @@ static int omap8250_runtime_resume(struct device *dev) + + up = serial8250_get_port(priv->line); + ++ spin_lock_irq(&up->port.lock); ++ + if (omap8250_lost_context(up)) + omap8250_restore_regs(up); + + if (up->dma && up->dma->rxchan && !(priv->habit & UART_HAS_EFR2)) + omap_8250_rx_dma(up); + ++ spin_unlock_irq(&up->port.lock); ++ + priv->latency = priv->calc_latency; + schedule_work(&priv->qos_work); + return 0; +diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c +index fe8d79c4ae95e..68f01f8bdf64b 100644 +--- a/drivers/tty/serial/8250/8250_port.c ++++ b/drivers/tty/serial/8250/8250_port.c +@@ -745,6 +745,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) + serial8250_rpm_get(p); + + if (p->capabilities & UART_CAP_SLEEP) { ++ spin_lock_irq(&p->port.lock); + if (p->capabilities & UART_CAP_EFR) { + lcr = serial_in(p, UART_LCR); + efr = serial_in(p, UART_EFR); +@@ -752,25 +753,18 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) + serial_out(p, UART_EFR, UART_EFR_ECB); + serial_out(p, UART_LCR, 0); + } +- serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0); ++ serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0); + if (p->capabilities & UART_CAP_EFR) { + serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); + serial_out(p, UART_EFR, efr); + serial_out(p, UART_LCR, lcr); + } ++ spin_unlock_irq(&p->port.lock); + } + + serial8250_rpm_put(p); + } + +-static void serial8250_clear_IER(struct uart_8250_port *up) +-{ +- if (up->capabilities & UART_CAP_UUE) +- serial_out(up, UART_IER, UART_IER_UUE); +- else +- serial_out(up, UART_IER, 0); +-} +- + #ifdef CONFIG_SERIAL_8250_RSA + /* + * Attempts to turn on the RSA FIFO. Returns zero on failure. +@@ -1034,8 +1028,10 @@ static int broken_efr(struct uart_8250_port *up) + */ + static void autoconfig_16550a(struct uart_8250_port *up) + { ++ struct uart_port *port = &up->port; + unsigned char status1, status2; + unsigned int iersave; ++ bool is_console; + + up->port.type = PORT_16550A; + up->capabilities |= UART_CAP_FIFO; +@@ -1151,6 +1147,11 @@ static void autoconfig_16550a(struct uart_8250_port *up) + return; + } + ++ is_console = serial8250_is_console(port); ++ ++ if (is_console) ++ serial8250_enter_unsafe(up); ++ + /* + * Try writing and reading the UART_IER_UUE bit (b6). + * If it works, this is probably one of the Xscale platform's +@@ -1186,6 +1187,9 @@ static void autoconfig_16550a(struct uart_8250_port *up) + } + serial_out(up, UART_IER, iersave); + ++ if (is_console) ++ serial8250_exit_unsafe(up); ++ + /* + * We distinguish between 16550A and U6 16550A by counting + * how many bytes are in the FIFO. +@@ -1227,6 +1231,13 @@ static void autoconfig(struct uart_8250_port *up) + up->bugs = 0; + + if (!(port->flags & UPF_BUGGY_UART)) { ++ bool is_console; ++ ++ is_console = serial8250_is_console(port); ++ ++ if (is_console) ++ serial8250_enter_unsafe(up); ++ + /* + * Do a simple existence test first; if we fail this, + * there's no point trying anything else. +@@ -1256,6 +1267,10 @@ static void autoconfig(struct uart_8250_port *up) + #endif + scratch3 = serial_in(up, UART_IER) & UART_IER_ALL_INTR; + serial_out(up, UART_IER, scratch); ++ ++ if (is_console) ++ serial8250_exit_unsafe(up); ++ + if (scratch2 != 0 || scratch3 != UART_IER_ALL_INTR) { + /* + * We failed; there's nothing here +@@ -1377,6 +1392,7 @@ static void autoconfig_irq(struct uart_8250_port *up) + unsigned char save_ICP = 0; + unsigned int ICP = 0; + unsigned long irqs; ++ bool is_console; + int irq; + + if (port->flags & UPF_FOURPORT) { +@@ -1386,8 +1402,12 @@ static void autoconfig_irq(struct uart_8250_port *up) + inb_p(ICP); + } + +- if (uart_console(port)) ++ is_console = serial8250_is_console(port); ++ ++ if (is_console) { + console_lock(); ++ serial8250_enter_unsafe(up); ++ } + + /* forget possible initially masked and pending IRQ */ + probe_irq_off(probe_irq_on()); +@@ -1419,8 +1439,10 @@ static void autoconfig_irq(struct uart_8250_port *up) + if (port->flags & UPF_FOURPORT) + outb_p(save_ICP, ICP); + +- if (uart_console(port)) ++ if (is_console) { ++ serial8250_exit_unsafe(up); + console_unlock(); ++ } + + port->irq = (irq > 0) ? irq : 0; + } +@@ -1433,7 +1455,7 @@ static void serial8250_stop_rx(struct uart_port *port) + + up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); + up->port.read_status_mask &= ~UART_LSR_DR; +- serial_port_out(port, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + + serial8250_rpm_put(up); + } +@@ -1463,7 +1485,7 @@ void serial8250_em485_stop_tx(struct uart_8250_port *p) + serial8250_clear_and_reinit_fifos(p); + + p->ier |= UART_IER_RLSI | UART_IER_RDI; +- serial_port_out(&p->port, UART_IER, p->ier); ++ serial8250_set_IER(p, p->ier); + } + } + EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx); +@@ -1710,7 +1732,7 @@ static void serial8250_disable_ms(struct uart_port *port) + mctrl_gpio_disable_ms(up->gpios); + + up->ier &= ~UART_IER_MSI; +- serial_port_out(port, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + } + + static void serial8250_enable_ms(struct uart_port *port) +@@ -1726,7 +1748,7 @@ static void serial8250_enable_ms(struct uart_port *port) + up->ier |= UART_IER_MSI; + + serial8250_rpm_get(up); +- serial_port_out(port, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + serial8250_rpm_put(up); + } + +@@ -2176,9 +2198,10 @@ static void serial8250_put_poll_char(struct uart_port *port, + serial8250_rpm_get(up); + /* + * First save the IER then disable the interrupts ++ * ++ * Best-effort IER access because other CPUs are quiesced. + */ +- ier = serial_port_in(port, UART_IER); +- serial8250_clear_IER(up); ++ __serial8250_clear_IER(up, NULL, &ier); + + wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); + /* +@@ -2191,7 +2214,7 @@ static void serial8250_put_poll_char(struct uart_port *port, + * and restore the IER + */ + wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); +- serial_port_out(port, UART_IER, ier); ++ __serial8250_set_IER(up, NULL, ier); + serial8250_rpm_put(up); + } + +@@ -2202,6 +2225,7 @@ int serial8250_do_startup(struct uart_port *port) + struct uart_8250_port *up = up_to_u8250p(port); + unsigned long flags; + unsigned char iir; ++ bool is_console; + int retval; + u16 lsr; + +@@ -2219,21 +2243,25 @@ int serial8250_do_startup(struct uart_port *port) + serial8250_rpm_get(up); + if (port->type == PORT_16C950) { + /* Wake up and initialize UART */ ++ spin_lock_irqsave(&port->lock, flags); + up->acr = 0; + serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); + serial_port_out(port, UART_EFR, UART_EFR_ECB); +- serial_port_out(port, UART_IER, 0); ++ serial8250_set_IER(up, 0); + serial_port_out(port, UART_LCR, 0); + serial_icr_write(up, UART_CSR, 0); /* Reset the UART */ + serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); + serial_port_out(port, UART_EFR, UART_EFR_ECB); + serial_port_out(port, UART_LCR, 0); ++ spin_unlock_irqrestore(&port->lock, flags); + } + + if (port->type == PORT_DA830) { + /* Reset the port */ +- serial_port_out(port, UART_IER, 0); ++ spin_lock_irqsave(&port->lock, flags); ++ serial8250_set_IER(up, 0); + serial_port_out(port, UART_DA830_PWREMU_MGMT, 0); ++ spin_unlock_irqrestore(&port->lock, flags); + mdelay(10); + + /* Enable Tx, Rx and free run mode */ +@@ -2331,6 +2359,8 @@ int serial8250_do_startup(struct uart_port *port) + if (retval) + goto out; + ++ is_console = serial8250_is_console(port); ++ + if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) { + unsigned char iir1; + +@@ -2347,6 +2377,9 @@ int serial8250_do_startup(struct uart_port *port) + */ + spin_lock_irqsave(&port->lock, flags); + ++ if (is_console) ++ serial8250_enter_unsafe(up); ++ + wait_for_xmitr(up, UART_LSR_THRE); + serial_port_out_sync(port, UART_IER, UART_IER_THRI); + udelay(1); /* allow THRE to set */ +@@ -2357,6 +2390,9 @@ int serial8250_do_startup(struct uart_port *port) + iir = serial_port_in(port, UART_IIR); + serial_port_out(port, UART_IER, 0); + ++ if (is_console) ++ serial8250_exit_unsafe(up); ++ + spin_unlock_irqrestore(&port->lock, flags); + + if (port->irqflags & IRQF_SHARED) +@@ -2411,10 +2447,14 @@ int serial8250_do_startup(struct uart_port *port) + * Do a quick test to see if we receive an interrupt when we enable + * the TX irq. + */ ++ if (is_console) ++ serial8250_enter_unsafe(up); + serial_port_out(port, UART_IER, UART_IER_THRI); + lsr = serial_port_in(port, UART_LSR); + iir = serial_port_in(port, UART_IIR); + serial_port_out(port, UART_IER, 0); ++ if (is_console) ++ serial8250_exit_unsafe(up); + + if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) { + if (!(up->bugs & UART_BUG_TXEN)) { +@@ -2446,7 +2486,7 @@ int serial8250_do_startup(struct uart_port *port) + if (up->dma) { + const char *msg = NULL; + +- if (uart_console(port)) ++ if (is_console) + msg = "forbid DMA for kernel console"; + else if (serial8250_request_dma(up)) + msg = "failed to request DMA"; +@@ -2497,7 +2537,7 @@ void serial8250_do_shutdown(struct uart_port *port) + */ + spin_lock_irqsave(&port->lock, flags); + up->ier = 0; +- serial_port_out(port, UART_IER, 0); ++ serial8250_set_IER(up, 0); + spin_unlock_irqrestore(&port->lock, flags); + + synchronize_irq(port->irq); +@@ -2863,7 +2903,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, + if (up->capabilities & UART_CAP_RTOIE) + up->ier |= UART_IER_RTOIE; + +- serial_port_out(port, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + + if (up->capabilities & UART_CAP_EFR) { + unsigned char efr = 0; +@@ -3328,12 +3368,21 @@ EXPORT_SYMBOL_GPL(serial8250_set_defaults); + + #ifdef CONFIG_SERIAL_8250_CONSOLE + +-static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) ++static bool serial8250_console_putchar(struct uart_port *port, unsigned char ch, ++ struct cons_write_context *wctxt) + { + struct uart_8250_port *up = up_to_u8250p(port); + + wait_for_xmitr(up, UART_LSR_THRE); ++ if (!console_can_proceed(wctxt)) ++ return false; + serial_port_out(port, UART_TX, ch); ++ if (ch == '\n') ++ up->console_newline_needed = false; ++ else ++ up->console_newline_needed = true; ++ ++ return true; + } + + /* +@@ -3362,33 +3411,119 @@ static void serial8250_console_restore(struct uart_8250_port *up) + serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); + } + +-/* +- * Print a string to the serial port using the device FIFO +- * +- * It sends fifosize bytes and then waits for the fifo +- * to get empty. +- */ +-static void serial8250_console_fifo_write(struct uart_8250_port *up, +- const char *s, unsigned int count) ++static bool __serial8250_console_write(struct uart_port *port, struct cons_write_context *wctxt, ++ const char *s, unsigned int count, ++ bool (*putchar)(struct uart_port *, unsigned char, struct cons_write_context *)) + { +- int i; +- const char *end = s + count; +- unsigned int fifosize = up->tx_loadsz; +- bool cr_sent = false; ++ bool finished = false; ++ unsigned int i; + +- while (s != end) { +- wait_for_lsr(up, UART_LSR_THRE); +- +- for (i = 0; i < fifosize && s != end; ++i) { +- if (*s == '\n' && !cr_sent) { +- serial_out(up, UART_TX, '\r'); +- cr_sent = true; +- } else { +- serial_out(up, UART_TX, *s++); +- cr_sent = false; +- } ++ for (i = 0; i < count; i++, s++) { ++ if (*s == '\n') { ++ if (!putchar(port, '\r', wctxt)) ++ goto out; + } ++ if (!putchar(port, *s, wctxt)) ++ goto out; + } ++ finished = true; ++out: ++ return finished; ++} ++ ++static bool serial8250_console_write(struct uart_port *port, struct cons_write_context *wctxt, ++ const char *s, unsigned int count, ++ bool (*putchar)(struct uart_port *, unsigned char, struct cons_write_context *)) ++{ ++ return __serial8250_console_write(port, wctxt, s, count, putchar); ++} ++ ++static bool atomic_print_line(struct uart_8250_port *up, ++ struct cons_write_context *wctxt) ++{ ++ struct uart_port *port = &up->port; ++ ++ if (up->console_newline_needed && ++ !__serial8250_console_write(port, wctxt, "\n", 1, serial8250_console_putchar)) { ++ return false; ++ } ++ ++ return __serial8250_console_write(port, wctxt, wctxt->outbuf, wctxt->len, ++ serial8250_console_putchar); ++} ++ ++static void atomic_console_reacquire(struct cons_write_context *wctxt, ++ struct cons_write_context *wctxt_init) ++{ ++ memcpy(wctxt, wctxt_init, sizeof(*wctxt)); ++ while (!console_try_acquire(wctxt)) { ++ cpu_relax(); ++ memcpy(wctxt, wctxt_init, sizeof(*wctxt)); ++ } ++} ++ ++bool serial8250_console_write_atomic(struct uart_8250_port *up, ++ struct cons_write_context *wctxt) ++{ ++ struct cons_write_context wctxt_init = { }; ++ struct cons_context *ctxt_init = &ACCESS_PRIVATE(&wctxt_init, ctxt); ++ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); ++ bool finished = false; ++ unsigned int ier; ++ ++ touch_nmi_watchdog(); ++ ++ /* With write_atomic, another context may hold the port->lock. */ ++ ++ ctxt_init->console = ctxt->console; ++ ctxt_init->prio = ctxt->prio; ++ ctxt_init->thread = ctxt->thread; ++ ++ /* ++ * Enter unsafe in order to disable interrupts. If the console is ++ * lost before the interrupts are disabled, bail out because another ++ * context took over the printing. If the console is lost after the ++ * interrutps are disabled, the console must be reacquired in order ++ * to re-enable the interrupts. However in that case no printing is ++ * allowed because another context took over the printing. ++ */ ++ ++ if (!console_enter_unsafe(wctxt)) ++ return false; ++ ++ if (!__serial8250_clear_IER(up, wctxt, &ier)) ++ return false; ++ ++ if (!console_exit_unsafe(wctxt)) { ++ atomic_console_reacquire(wctxt, &wctxt_init); ++ goto enable_irq; ++ } ++ ++ if (!atomic_print_line(up, wctxt)) { ++ atomic_console_reacquire(wctxt, &wctxt_init); ++ goto enable_irq; ++ } ++ ++ wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); ++ finished = true; ++enable_irq: ++ /* ++ * Enter unsafe in order to enable interrupts. If the console is ++ * lost before the interrupts are enabled, the console must be ++ * reacquired in order to re-enable the interrupts. ++ */ ++ for (;;) { ++ if (console_enter_unsafe(wctxt) && ++ __serial8250_set_IER(up, wctxt, ier)) { ++ break; ++ } ++ ++ /* HW-IRQs still disabled. Reacquire to enable them. */ ++ atomic_console_reacquire(wctxt, &wctxt_init); ++ } ++ console_exit_unsafe(wctxt); ++ ++ return finished; + } + + /* +@@ -3400,78 +3535,116 @@ static void serial8250_console_fifo_write(struct uart_8250_port *up, + * Doing runtime PM is really a bad idea for the kernel console. + * Thus, we assume the function is called when device is powered up. + */ +-void serial8250_console_write(struct uart_8250_port *up, const char *s, +- unsigned int count) ++bool serial8250_console_write_thread(struct uart_8250_port *up, ++ struct cons_write_context *wctxt) + { ++ struct cons_write_context wctxt_init = { }; ++ struct cons_context *ctxt_init = &ACCESS_PRIVATE(&wctxt_init, ctxt); ++ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + struct uart_8250_em485 *em485 = up->em485; + struct uart_port *port = &up->port; +- unsigned long flags; +- unsigned int ier, use_fifo; +- int locked = 1; ++ unsigned int count = wctxt->len; ++ const char *s = wctxt->outbuf; ++ bool rs485_started = false; ++ bool finished = false; ++ unsigned int ier; + +- touch_nmi_watchdog(); +- +- if (oops_in_progress) +- locked = spin_trylock_irqsave(&port->lock, flags); +- else +- spin_lock_irqsave(&port->lock, flags); ++ ctxt_init->console = ctxt->console; ++ ctxt_init->prio = ctxt->prio; ++ ctxt_init->thread = ctxt->thread; + + /* +- * First save the IER then disable the interrupts ++ * Enter unsafe in order to disable interrupts. If the console is ++ * lost before the interrupts are disabled, bail out because another ++ * context took over the printing. If the console is lost after the ++ * interrutps are disabled, the console must be reacquired in order ++ * to re-enable the interrupts. However in that case no printing is ++ * allowed because another context took over the printing. + */ +- ier = serial_port_in(port, UART_IER); +- serial8250_clear_IER(up); ++ ++ if (!console_enter_unsafe(wctxt)) ++ return false; ++ ++ if (!__serial8250_clear_IER(up, wctxt, &ier)) ++ return false; ++ ++ if (!console_exit_unsafe(wctxt)) { ++ atomic_console_reacquire(wctxt, &wctxt_init); ++ goto enable_irq; ++ } + + /* check scratch reg to see if port powered off during system sleep */ + if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { ++ if (!console_enter_unsafe(wctxt)) { ++ atomic_console_reacquire(wctxt, &wctxt_init); ++ goto enable_irq; ++ } + serial8250_console_restore(up); ++ if (!console_exit_unsafe(wctxt)) { ++ atomic_console_reacquire(wctxt, &wctxt_init); ++ goto enable_irq; ++ } + up->canary = 0; + } + + if (em485) { +- if (em485->tx_stopped) ++ if (em485->tx_stopped) { ++ if (!console_enter_unsafe(wctxt)) { ++ atomic_console_reacquire(wctxt, &wctxt_init); ++ goto enable_irq; ++ } + up->rs485_start_tx(up); +- mdelay(port->rs485.delay_rts_before_send); ++ rs485_started = true; ++ if (!console_exit_unsafe(wctxt)) { ++ atomic_console_reacquire(wctxt, &wctxt_init); ++ goto enable_irq; ++ } ++ } ++ if (port->rs485.delay_rts_before_send) { ++ mdelay(port->rs485.delay_rts_before_send); ++ if (!console_can_proceed(wctxt)) { ++ atomic_console_reacquire(wctxt, &wctxt_init); ++ goto enable_irq; ++ } ++ } + } + +- use_fifo = (up->capabilities & UART_CAP_FIFO) && +- /* +- * BCM283x requires to check the fifo +- * after each byte. +- */ +- !(up->capabilities & UART_CAP_MINI) && +- /* +- * tx_loadsz contains the transmit fifo size +- */ +- up->tx_loadsz > 1 && +- (up->fcr & UART_FCR_ENABLE_FIFO) && +- port->state && +- test_bit(TTY_PORT_INITIALIZED, &port->state->port.iflags) && +- /* +- * After we put a data in the fifo, the controller will send +- * it regardless of the CTS state. Therefore, only use fifo +- * if we don't use control flow. +- */ +- !(up->port.flags & UPF_CONS_FLOW); ++ if (!serial8250_console_write(port, wctxt, s, count, serial8250_console_putchar)) { ++ atomic_console_reacquire(wctxt, &wctxt_init); ++ goto enable_irq; ++ } + +- if (likely(use_fifo)) +- serial8250_console_fifo_write(up, s, count); +- else +- uart_console_write(port, s, count, serial8250_console_putchar); +- +- /* +- * Finally, wait for transmitter to become empty +- * and restore the IER +- */ + wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); +- ++ finished = true; ++enable_irq: ++ /* ++ * Enter unsafe in order to stop rs485_tx. If the console is ++ * lost before the rs485_tx is stopped, the console must be ++ * reacquired in order to stop rs485_tx. ++ */ + if (em485) { + mdelay(port->rs485.delay_rts_after_send); +- if (em485->tx_stopped) ++ if (em485->tx_stopped && rs485_started) { ++ while (!console_enter_unsafe(wctxt)) ++ atomic_console_reacquire(wctxt, &wctxt_init); + up->rs485_stop_tx(up); ++ if (!console_exit_unsafe(wctxt)) ++ atomic_console_reacquire(wctxt, &wctxt_init); ++ } + } + +- serial_port_out(port, UART_IER, ier); ++ /* ++ * Enter unsafe in order to enable interrupts. If the console is ++ * lost before the interrupts are enabled, the console must be ++ * reacquired in order to re-enable the interrupts. ++ */ ++ for (;;) { ++ if (console_enter_unsafe(wctxt) && ++ __serial8250_set_IER(up, wctxt, ier)) { ++ break; ++ } ++ atomic_console_reacquire(wctxt, &wctxt_init); ++ } + + /* + * The receive handling will happen properly because the +@@ -3483,8 +3656,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, + if (up->msr_saved_flags) + serial8250_modem_status(up); + +- if (locked) +- spin_unlock_irqrestore(&port->lock, flags); ++ console_exit_unsafe(wctxt); ++ ++ return finished; + } + + static unsigned int probe_baud(struct uart_port *port) +@@ -3504,6 +3678,7 @@ static unsigned int probe_baud(struct uart_port *port) + + int serial8250_console_setup(struct uart_port *port, char *options, bool probe) + { ++ struct uart_8250_port *up = up_to_u8250p(port); + int baud = 9600; + int bits = 8; + int parity = 'n'; +@@ -3513,6 +3688,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe) + if (!port->iobase && !port->membase) + return -ENODEV; + ++ up->console_newline_needed = false; ++ + if (options) + uart_parse_options(options, &baud, &parity, &bits, &flow); + else if (probe) +diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig +index 5313aa31930f4..16715f01bdb5a 100644 +--- a/drivers/tty/serial/8250/Kconfig ++++ b/drivers/tty/serial/8250/Kconfig +@@ -9,6 +9,7 @@ config SERIAL_8250 + depends on !S390 + select SERIAL_CORE + select SERIAL_MCTRL_GPIO if GPIOLIB ++ select HAVE_ATOMIC_CONSOLE + help + This selects whether you want to include the driver for the standard + serial ports. The standard answer is Y. People who might say N +diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c +index d8c2f3455eeba..a4e142ac6ec46 100644 +--- a/drivers/tty/serial/amba-pl011.c ++++ b/drivers/tty/serial/amba-pl011.c +@@ -2319,18 +2319,24 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) + { + struct uart_amba_port *uap = amba_ports[co->index]; + unsigned int old_cr = 0, new_cr; +- unsigned long flags; ++ unsigned long flags = 0; + int locked = 1; + + clk_enable(uap->clk); + +- local_irq_save(flags); ++ /* ++ * local_irq_save(flags); ++ * ++ * This local_irq_save() is nonsense. If we come in via sysrq ++ * handling then interrupts are already disabled. Aside of ++ * that the port.sysrq check is racy on SMP regardless. ++ */ + if (uap->port.sysrq) + locked = 0; + else if (oops_in_progress) +- locked = spin_trylock(&uap->port.lock); ++ locked = spin_trylock_irqsave(&uap->port.lock, flags); + else +- spin_lock(&uap->port.lock); ++ spin_lock_irqsave(&uap->port.lock, flags); + + /* + * First save the CR then disable the interrupts +@@ -2356,8 +2362,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) + pl011_write(old_cr, uap, REG_CR); + + if (locked) +- spin_unlock(&uap->port.lock); +- local_irq_restore(flags); ++ spin_unlock_irqrestore(&uap->port.lock, flags); + + clk_disable(uap->clk); + } +diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c +index 82d35dbbfa6cb..511cf17d87958 100644 +--- a/drivers/tty/serial/omap-serial.c ++++ b/drivers/tty/serial/omap-serial.c +@@ -1219,13 +1219,10 @@ serial_omap_console_write(struct console *co, const char *s, + unsigned int ier; + int locked = 1; + +- local_irq_save(flags); +- if (up->port.sysrq) +- locked = 0; +- else if (oops_in_progress) +- locked = spin_trylock(&up->port.lock); ++ if (up->port.sysrq || oops_in_progress) ++ locked = spin_trylock_irqsave(&up->port.lock, flags); + else +- spin_lock(&up->port.lock); ++ spin_lock_irqsave(&up->port.lock, flags); + + /* + * First save the IER then disable the interrupts +@@ -1252,8 +1249,7 @@ serial_omap_console_write(struct console *co, const char *s, + check_modem_status(up); + + if (locked) +- spin_unlock(&up->port.lock); +- local_irq_restore(flags); ++ spin_unlock_irqrestore(&up->port.lock, flags); + } + + static int __init +diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c +index 728cb72be0666..409892b777d16 100644 +--- a/drivers/tty/serial/serial_core.c ++++ b/drivers/tty/serial/serial_core.c +@@ -2336,8 +2336,11 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport) + * able to Re-start_rx later. + */ + if (!console_suspend_enabled && uart_console(uport)) { +- if (uport->ops->start_rx) ++ if (uport->ops->start_rx) { ++ spin_lock_irq(&uport->lock); + uport->ops->stop_rx(uport); ++ spin_unlock_irq(&uport->lock); ++ } + goto unlock; + } + +@@ -2430,8 +2433,11 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport) + if (console_suspend_enabled) + uart_change_pm(state, UART_PM_STATE_ON); + uport->ops->set_termios(uport, &termios, NULL); +- if (!console_suspend_enabled && uport->ops->start_rx) ++ if (!console_suspend_enabled && uport->ops->start_rx) { ++ spin_lock_irq(&uport->lock); + uport->ops->start_rx(uport); ++ spin_unlock_irq(&uport->lock); ++ } + if (console_suspend_enabled) + console_start(uport->cons); + } +diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c +index 8e3de07f103da..af933c7a61d98 100644 +--- a/drivers/tty/tty_io.c ++++ b/drivers/tty/tty_io.c +@@ -3543,8 +3543,13 @@ static ssize_t show_cons_active(struct device *dev, + for_each_console(c) { + if (!c->device) + continue; +- if (!c->write) +- continue; ++ if (c->flags & CON_NO_BKL) { ++ if (!(c->write_thread || c->write_atomic)) ++ continue; ++ } else { ++ if (!c->write) ++ continue; ++ } + if ((c->flags & CON_ENABLED) == 0) + continue; + cs[i++] = c; +diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c +index e0758fe7936dc..ab9f42d478c8c 100644 +--- a/fs/proc/consoles.c ++++ b/fs/proc/consoles.c +@@ -21,12 +21,14 @@ static int show_console_dev(struct seq_file *m, void *v) + { CON_ENABLED, 'E' }, + { CON_CONSDEV, 'C' }, + { CON_BOOT, 'B' }, ++ { CON_NO_BKL, 'N' }, + { CON_PRINTBUFFER, 'p' }, + { CON_BRL, 'b' }, + { CON_ANYTIME, 'a' }, + }; + char flags[ARRAY_SIZE(con_flags) + 1]; + struct console *con = v; ++ char con_write = '-'; + unsigned int a; + dev_t dev = 0; + +@@ -57,9 +59,15 @@ static int show_console_dev(struct seq_file *m, void *v) + seq_setwidth(m, 21 - 1); + seq_printf(m, "%s%d", con->name, con->index); + seq_pad(m, ' '); +- seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', +- con->write ? 'W' : '-', con->unblank ? 'U' : '-', +- flags); ++ if (con->flags & CON_NO_BKL) { ++ if (con->write_thread || con->write_atomic) ++ con_write = 'W'; ++ } else { ++ if (con->write) ++ con_write = 'W'; ++ } ++ seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', con_write, ++ con->unblank ? 'U' : '-', flags); + if (dev) + seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev)); + +diff --git a/include/linux/console.h b/include/linux/console.h +index d3195664baa5a..1e9d5bc8fa76e 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -16,7 +16,9 @@ + + #include + #include ++#include + #include ++#include + #include + + struct vc_data; +@@ -154,6 +156,10 @@ static inline int con_debug_leave(void) + * receiving the printk spam for obvious reasons. + * @CON_EXTENDED: The console supports the extended output format of + * /dev/kmesg which requires a larger output buffer. ++ * @CON_SUSPENDED: Indicates if a console is suspended. If true, the ++ * printing callbacks must not be called. ++ * @CON_NO_BKL: Console can operate outside of the BKL style console_lock ++ * constraints. + */ + enum cons_flags { + CON_PRINTBUFFER = BIT(0), +@@ -163,8 +169,133 @@ enum cons_flags { + CON_ANYTIME = BIT(4), + CON_BRL = BIT(5), + CON_EXTENDED = BIT(6), ++ CON_SUSPENDED = BIT(7), ++ CON_NO_BKL = BIT(8), + }; + ++/** ++ * struct cons_state - console state for NOBKL consoles ++ * @atom: Compound of the state fields for atomic operations ++ * @seq: Sequence for record tracking (64bit only) ++ * @bits: Compound of the state bits below ++ * ++ * @locked: Console is locked by a writer ++ * @unsafe: Console is busy in a non takeover region ++ * @thread: Current owner is the printk thread ++ * @cur_prio: The priority of the current output ++ * @req_prio: The priority of a handover request ++ * @cpu: The CPU on which the writer runs ++ * ++ * To be used for state read and preparation of atomic_long_cmpxchg() ++ * operations. ++ * ++ * The @req_prio field is particularly important to allow spin-waiting to ++ * timeout and give up without the risk of it being assigned the lock ++ * after giving up. The @req_prio field has a nice side-effect that it ++ * also makes it possible for a single read+cmpxchg in the common case of ++ * acquire and release. ++ */ ++struct cons_state { ++ union { ++ unsigned long atom; ++ struct { ++#ifdef CONFIG_64BIT ++ u32 seq; ++#endif ++ union { ++ u32 bits; ++ struct { ++ u32 locked : 1; ++ u32 unsafe : 1; ++ u32 thread : 1; ++ u32 cur_prio : 2; ++ u32 req_prio : 2; ++ u32 cpu : 18; ++ }; ++ }; ++ }; ++ }; ++}; ++ ++/** ++ * cons_prio - console writer priority for NOBKL consoles ++ * @CONS_PRIO_NONE: Unused ++ * @CONS_PRIO_NORMAL: Regular printk ++ * @CONS_PRIO_EMERGENCY: Emergency output (WARN/OOPS...) ++ * @CONS_PRIO_PANIC: Panic output ++ * @CONS_PRIO_MAX: The number of priority levels ++ * ++ * Emergency output can carefully takeover the console even without consent ++ * of the owner, ideally only when @cons_state::unsafe is not set. Panic ++ * output can ignore the unsafe flag as a last resort. If panic output is ++ * active no takeover is possible until the panic output releases the ++ * console. ++ */ ++enum cons_prio { ++ CONS_PRIO_NONE = 0, ++ CONS_PRIO_NORMAL, ++ CONS_PRIO_EMERGENCY, ++ CONS_PRIO_PANIC, ++ CONS_PRIO_MAX, ++}; ++ ++struct console; ++struct printk_buffers; ++ ++/** ++ * struct cons_context - Context for console acquire/release ++ * @console: The associated console ++ * @state: The state at acquire time ++ * @old_state: The old state when try_acquire() failed for analysis ++ * by the caller ++ * @hov_state: The handover state for spin and cleanup ++ * @req_state: The request state for spin and cleanup ++ * @spinwait_max_us: Limit for spinwait acquire ++ * @oldseq: The sequence number at acquire() ++ * @newseq: The sequence number for progress ++ * @prio: Priority of the context ++ * @pbufs: Pointer to the text buffer for this context ++ * @dropped: Dropped counter for the current context ++ * @thread: The acquire is printk thread context ++ * @hostile: Hostile takeover requested. Cleared on normal ++ * acquire or friendly handover ++ * @spinwait: Spinwait on acquire if possible ++ * @backlog: Ringbuffer has pending records ++ */ ++struct cons_context { ++ struct console *console; ++ struct cons_state state; ++ struct cons_state old_state; ++ struct cons_state hov_state; ++ struct cons_state req_state; ++ u64 oldseq; ++ u64 newseq; ++ unsigned int spinwait_max_us; ++ enum cons_prio prio; ++ struct printk_buffers *pbufs; ++ unsigned long dropped; ++ unsigned int thread : 1; ++ unsigned int hostile : 1; ++ unsigned int spinwait : 1; ++ unsigned int backlog : 1; ++}; ++ ++/** ++ * struct cons_write_context - Context handed to the write callbacks ++ * @ctxt: The core console context ++ * @outbuf: Pointer to the text buffer for output ++ * @len: Length to write ++ * @unsafe: Invoked in unsafe state due to force takeover ++ */ ++struct cons_write_context { ++ struct cons_context __private ctxt; ++ char *outbuf; ++ unsigned int len; ++ bool unsafe; ++}; ++ ++struct cons_context_data; ++ + /** + * struct console - The console descriptor structure + * @name: The name of the console driver +@@ -184,6 +315,18 @@ enum cons_flags { + * @dropped: Number of unreported dropped ringbuffer records + * @data: Driver private data + * @node: hlist node for the console list ++ * ++ * @atomic_state: State array for NOBKL consoles; real and handover ++ * @atomic_seq: Sequence for record tracking (32bit only) ++ * @thread_pbufs: Pointer to thread private buffer ++ * @kthread: Pointer to kernel thread ++ * @rcuwait: RCU wait for the kernel thread ++ * @irq_work: IRQ work for thread wakeup ++ * @kthread_waiting: Indicator whether the kthread is waiting to be woken ++ * @write_atomic: Write callback for atomic context ++ * @write_thread: Write callback for printk threaded printing ++ * @port_lock: Callback to lock/unlock the port lock ++ * @pcpu_data: Pointer to percpu context data + */ + struct console { + char name[16]; +@@ -203,6 +346,23 @@ struct console { + unsigned long dropped; + void *data; + struct hlist_node node; ++ ++ /* NOBKL console specific members */ ++ atomic_long_t __private atomic_state[2]; ++#ifndef CONFIG_64BIT ++ atomic_t __private atomic_seq; ++#endif ++ struct printk_buffers *thread_pbufs; ++ struct task_struct *kthread; ++ struct rcuwait rcuwait; ++ struct irq_work irq_work; ++ atomic_t kthread_waiting; ++ ++ bool (*write_atomic)(struct console *con, struct cons_write_context *wctxt); ++ bool (*write_thread)(struct console *con, struct cons_write_context *wctxt); ++ void (*port_lock)(struct console *con, bool do_lock, unsigned long *flags); ++ ++ struct cons_context_data __percpu *pcpu_data; + }; + + #ifdef CONFIG_LOCKDEP +@@ -329,6 +489,24 @@ static inline bool console_is_registered(const struct console *con) + lockdep_assert_console_list_lock_held(); \ + hlist_for_each_entry(con, &console_list, node) + ++#ifdef CONFIG_PRINTK ++extern enum cons_prio cons_atomic_enter(enum cons_prio prio); ++extern void cons_atomic_exit(enum cons_prio prio, enum cons_prio prev_prio); ++extern bool console_can_proceed(struct cons_write_context *wctxt); ++extern bool console_enter_unsafe(struct cons_write_context *wctxt); ++extern bool console_exit_unsafe(struct cons_write_context *wctxt); ++extern bool console_try_acquire(struct cons_write_context *wctxt); ++extern bool console_release(struct cons_write_context *wctxt); ++#else ++static inline enum cons_prio cons_atomic_enter(enum cons_prio prio) { return CONS_PRIO_NONE; } ++static inline void cons_atomic_exit(enum cons_prio prio, enum cons_prio prev_prio) { } ++static inline bool console_can_proceed(struct cons_write_context *wctxt) { return false; } ++static inline bool console_enter_unsafe(struct cons_write_context *wctxt) { return false; } ++static inline bool console_exit_unsafe(struct cons_write_context *wctxt) { return false; } ++static inline bool console_try_acquire(struct cons_write_context *wctxt) { return false; } ++static inline bool console_release(struct cons_write_context *wctxt) { return false; } ++#endif ++ + extern int console_set_on_cmdline; + extern struct console *early_console; + +diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h +index d95ab85f96ba5..3dc3704a3cdbb 100644 +--- a/include/linux/entry-common.h ++++ b/include/linux/entry-common.h +@@ -57,9 +57,15 @@ + # define ARCH_EXIT_TO_USER_MODE_WORK (0) + #endif + ++#ifdef CONFIG_PREEMPT_LAZY ++# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) ++#else ++# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED) ++#endif ++ + #define EXIT_TO_USER_MODE_WORK \ + (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ +- _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ ++ _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ + ARCH_EXIT_TO_USER_MODE_WORK) + + /** +diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h +index a92bce40b04b3..bf82980f569df 100644 +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -605,6 +605,35 @@ extern void __raise_softirq_irqoff(unsigned int nr); + extern void raise_softirq_irqoff(unsigned int nr); + extern void raise_softirq(unsigned int nr); + ++#ifdef CONFIG_PREEMPT_RT ++DECLARE_PER_CPU(struct task_struct *, timersd); ++DECLARE_PER_CPU(unsigned long, pending_timer_softirq); ++ ++extern void raise_timer_softirq(void); ++extern void raise_hrtimer_softirq(void); ++ ++static inline unsigned int local_pending_timers(void) ++{ ++ return __this_cpu_read(pending_timer_softirq); ++} ++ ++#else ++static inline void raise_timer_softirq(void) ++{ ++ raise_softirq(TIMER_SOFTIRQ); ++} ++ ++static inline void raise_hrtimer_softirq(void) ++{ ++ raise_softirq_irqoff(HRTIMER_SOFTIRQ); ++} ++ ++static inline unsigned int local_pending_timers(void) ++{ ++ return local_softirq_pending(); ++} ++#endif ++ + DECLARE_PER_CPU(struct task_struct *, ksoftirqd); + + static inline struct task_struct *this_cpu_ksoftirqd(void) +diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h +index 09d4f17c8d3b6..7376c1df9c901 100644 +--- a/include/linux/io-mapping.h ++++ b/include/linux/io-mapping.h +@@ -69,7 +69,10 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, + + BUG_ON(offset >= mapping->size); + phys_addr = mapping->base + offset; +- preempt_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); ++ else ++ migrate_disable(); + pagefault_disable(); + return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); + } +@@ -79,7 +82,10 @@ io_mapping_unmap_atomic(void __iomem *vaddr) + { + kunmap_local_indexed((void __force *)vaddr); + pagefault_enable(); +- preempt_enable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable(); ++ else ++ migrate_enable(); + } + + static inline void __iomem * +@@ -162,7 +168,10 @@ static inline void __iomem * + io_mapping_map_atomic_wc(struct io_mapping *mapping, + unsigned long offset) + { +- preempt_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); ++ else ++ migrate_disable(); + pagefault_disable(); + return io_mapping_map_wc(mapping, offset, PAGE_SIZE); + } +@@ -172,7 +181,10 @@ io_mapping_unmap_atomic(void __iomem *vaddr) + { + io_mapping_unmap(vaddr); + pagefault_enable(); +- preempt_enable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable(); ++ else ++ migrate_enable(); + } + + static inline void __iomem * +diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h +index c35f04f636f15..0c014424b1312 100644 +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3202,7 +3202,11 @@ struct softnet_data { + int defer_count; + int defer_ipi_scheduled; + struct sk_buff *defer_list; ++#ifndef CONFIG_PREEMPT_RT + call_single_data_t defer_csd; ++#else ++ struct work_struct defer_work; ++#endif + }; + + static inline void input_queue_head_incr(struct softnet_data *sd) +diff --git a/include/linux/preempt.h b/include/linux/preempt.h +index 0df425bf9bd75..05338f00a5907 100644 +--- a/include/linux/preempt.h ++++ b/include/linux/preempt.h +@@ -196,6 +196,20 @@ extern void preempt_count_sub(int val); + #define preempt_count_inc() preempt_count_add(1) + #define preempt_count_dec() preempt_count_sub(1) + ++#ifdef CONFIG_PREEMPT_LAZY ++#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0) ++#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0) ++#define inc_preempt_lazy_count() add_preempt_lazy_count(1) ++#define dec_preempt_lazy_count() sub_preempt_lazy_count(1) ++#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count) ++#else ++#define add_preempt_lazy_count(val) do { } while (0) ++#define sub_preempt_lazy_count(val) do { } while (0) ++#define inc_preempt_lazy_count() do { } while (0) ++#define dec_preempt_lazy_count() do { } while (0) ++#define preempt_lazy_count() (0) ++#endif ++ + #ifdef CONFIG_PREEMPT_COUNT + + #define preempt_disable() \ +@@ -204,6 +218,12 @@ do { \ + barrier(); \ + } while (0) + ++#define preempt_lazy_disable() \ ++do { \ ++ inc_preempt_lazy_count(); \ ++ barrier(); \ ++} while (0) ++ + #define sched_preempt_enable_no_resched() \ + do { \ + barrier(); \ +@@ -218,15 +238,21 @@ do { \ + #define preempt_enable() \ + do { \ + barrier(); \ +- if (unlikely(preempt_count_dec_and_test())) \ ++ if (unlikely(preempt_count_dec_and_test())) { \ ++ instrumentation_begin(); \ + __preempt_schedule(); \ ++ instrumentation_end(); \ ++ } \ + } while (0) + + #define preempt_enable_notrace() \ + do { \ + barrier(); \ +- if (unlikely(__preempt_count_dec_and_test())) \ ++ if (unlikely(__preempt_count_dec_and_test())) { \ ++ instrumentation_begin(); \ + __preempt_schedule_notrace(); \ ++ instrumentation_end(); \ ++ } \ + } while (0) + + #define preempt_check_resched() \ +@@ -235,6 +261,18 @@ do { \ + __preempt_schedule(); \ + } while (0) + ++/* ++ * open code preempt_check_resched() because it is not exported to modules and ++ * used by local_unlock() or bpf_enable_instrumentation(). ++ */ ++#define preempt_lazy_enable() \ ++do { \ ++ dec_preempt_lazy_count(); \ ++ barrier(); \ ++ if (should_resched(0)) \ ++ __preempt_schedule(); \ ++} while (0) ++ + #else /* !CONFIG_PREEMPTION */ + #define preempt_enable() \ + do { \ +@@ -242,6 +280,12 @@ do { \ + preempt_count_dec(); \ + } while (0) + ++#define preempt_lazy_enable() \ ++do { \ ++ dec_preempt_lazy_count(); \ ++ barrier(); \ ++} while (0) ++ + #define preempt_enable_notrace() \ + do { \ + barrier(); \ +@@ -282,6 +326,9 @@ do { \ + #define preempt_enable_notrace() barrier() + #define preemptible() 0 + ++#define preempt_lazy_disable() barrier() ++#define preempt_lazy_enable() barrier() ++ + #endif /* CONFIG_PREEMPT_COUNT */ + + #ifdef MODULE +@@ -300,7 +347,7 @@ do { \ + } while (0) + #define preempt_fold_need_resched() \ + do { \ +- if (tif_need_resched()) \ ++ if (tif_need_resched_now()) \ + set_preempt_need_resched(); \ + } while (0) + +@@ -416,8 +463,15 @@ extern void migrate_enable(void); + + #else + +-static inline void migrate_disable(void) { } +-static inline void migrate_enable(void) { } ++static inline void migrate_disable(void) ++{ ++ preempt_lazy_disable(); ++} ++ ++static inline void migrate_enable(void) ++{ ++ preempt_lazy_enable(); ++} + + #endif /* CONFIG_SMP */ + +diff --git a/include/linux/printk.h b/include/linux/printk.h +index 8ef499ab3c1ed..b55662624ff87 100644 +--- a/include/linux/printk.h ++++ b/include/linux/printk.h +@@ -139,6 +139,7 @@ void early_printk(const char *s, ...) { } + #endif + + struct dev_printk_info; ++struct cons_write_context; + + #ifdef CONFIG_PRINTK + asmlinkage __printf(4, 0) +@@ -157,15 +158,17 @@ int _printk(const char *fmt, ...); + */ + __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...); + +-extern void __printk_safe_enter(void); +-extern void __printk_safe_exit(void); ++extern void __printk_safe_enter(unsigned long *flags); ++extern void __printk_safe_exit(unsigned long *flags); ++extern void __printk_deferred_enter(void); ++extern void __printk_deferred_exit(void); + /* + * The printk_deferred_enter/exit macros are available only as a hack for + * some code paths that need to defer all printk console printing. Interrupts + * must be disabled for the deferred duration. + */ +-#define printk_deferred_enter __printk_safe_enter +-#define printk_deferred_exit __printk_safe_exit ++#define printk_deferred_enter() __printk_deferred_enter() ++#define printk_deferred_exit() __printk_deferred_exit() + + /* + * Please don't use printk_ratelimit(), because it shares ratelimiting state +@@ -192,6 +195,8 @@ void show_regs_print_info(const char *log_lvl); + extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; + extern asmlinkage void dump_stack(void) __cold; + void printk_trigger_flush(void); ++extern void cons_atomic_flush(struct cons_write_context *printk_caller_wctxt, ++ bool skip_unsafe); + #else + static inline __printf(1, 0) + int vprintk(const char *s, va_list args) +@@ -271,6 +276,12 @@ static inline void dump_stack(void) + static inline void printk_trigger_flush(void) + { + } ++ ++static inline void cons_atomic_flush(struct cons_write_context *printk_caller_wctxt, ++ bool skip_unsafe) ++{ ++} ++ + #endif + + #ifdef CONFIG_SMP +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 63d242164b1a9..593d7e7c2e4c7 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -303,6 +303,11 @@ extern long schedule_timeout_idle(long timeout); + asmlinkage void schedule(void); + extern void schedule_preempt_disabled(void); + asmlinkage void preempt_schedule_irq(void); ++ ++extern void sched_submit_work(void); ++extern void sched_resume_work(void); ++extern void schedule_rtmutex(void); ++ + #ifdef CONFIG_PREEMPT_RT + extern void schedule_rtlock(void); + #endif +@@ -2059,6 +2064,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); + } + ++#ifdef CONFIG_PREEMPT_LAZY ++static inline void set_tsk_need_resched_lazy(struct task_struct *tsk) ++{ ++ set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); ++} ++ ++static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) ++{ ++ clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); ++} ++ ++static inline int test_tsk_need_resched_lazy(struct task_struct *tsk) ++{ ++ return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY)); ++} ++ ++static inline int need_resched_lazy(void) ++{ ++ return test_thread_flag(TIF_NEED_RESCHED_LAZY); ++} ++ ++static inline int need_resched_now(void) ++{ ++ return test_thread_flag(TIF_NEED_RESCHED); ++} ++ ++#else ++static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { } ++static inline int need_resched_lazy(void) { return 0; } ++ ++static inline int need_resched_now(void) ++{ ++ return test_thread_flag(TIF_NEED_RESCHED); ++} ++ ++#endif ++ + /* + * cond_resched() and cond_resched_lock(): latency reduction via + * explicit rescheduling in places that are safe. The return +diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h +index 20099268fa257..b38ce53576000 100644 +--- a/include/linux/sched/signal.h ++++ b/include/linux/sched/signal.h +@@ -22,6 +22,7 @@ struct sighand_struct { + refcount_t count; + wait_queue_head_t signalfd_wqh; + struct k_sigaction action[_NSIG]; ++ struct sigqueue *sigqueue_cache; + }; + + /* +@@ -135,7 +136,7 @@ struct signal_struct { + #ifdef CONFIG_POSIX_TIMERS + + /* POSIX.1b Interval Timers */ +- int posix_timer_id; ++ unsigned int next_posix_timer_id; + struct list_head posix_timers; + + /* ITIMER_REAL timer for the process */ +@@ -349,6 +350,7 @@ extern int send_sig(int, struct task_struct *, int); + extern int zap_other_threads(struct task_struct *p); + extern struct sigqueue *sigqueue_alloc(void); + extern void sigqueue_free(struct sigqueue *); ++extern void sigqueue_free_cached_entry(struct sigqueue *q); + extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type); + extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); + +diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h +index 19376bee96676..9055a22992edc 100644 +--- a/include/linux/serial_8250.h ++++ b/include/linux/serial_8250.h +@@ -125,6 +125,8 @@ struct uart_8250_port { + #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA + unsigned char msr_saved_flags; + ++ bool console_newline_needed; ++ + struct uart_8250_dma *dma; + const struct uart_8250_ops *ops; + +@@ -139,6 +141,9 @@ struct uart_8250_port { + /* Serial port overrun backoff */ + struct delayed_work overrun_backoff; + u32 overrun_backoff_time_ms; ++ ++ struct cons_write_context wctxt; ++ int cookie; + }; + + static inline struct uart_8250_port *up_to_u8250p(struct uart_port *up) +@@ -178,8 +183,10 @@ void serial8250_tx_chars(struct uart_8250_port *up); + unsigned int serial8250_modem_status(struct uart_8250_port *up); + void serial8250_init_port(struct uart_8250_port *up); + void serial8250_set_defaults(struct uart_8250_port *up); +-void serial8250_console_write(struct uart_8250_port *up, const char *s, +- unsigned int count); ++bool serial8250_console_write_atomic(struct uart_8250_port *up, ++ struct cons_write_context *wctxt); ++bool serial8250_console_write_thread(struct uart_8250_port *up, ++ struct cons_write_context *wctxt); + int serial8250_console_setup(struct uart_port *port, char *options, bool probe); + int serial8250_console_exit(struct uart_port *port); + +diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h +index c02646884fa83..c1acb81b81948 100644 +--- a/include/linux/thread_info.h ++++ b/include/linux/thread_info.h +@@ -178,6 +178,26 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti + #endif /* !CONFIG_GENERIC_ENTRY */ + + #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H ++# ifdef CONFIG_PREEMPT_LAZY ++ ++static __always_inline bool tif_need_resched(void) ++{ ++ return read_thread_flags() & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY); ++} ++ ++static __always_inline bool tif_need_resched_now(void) ++{ ++ return arch_test_bit(TIF_NEED_RESCHED, ++ (unsigned long *)(¤t_thread_info()->flags)); ++} ++ ++static __always_inline bool tif_need_resched_lazy(void) ++{ ++ return arch_test_bit(TIF_NEED_RESCHED_LAZY, ++ (unsigned long *)(¤t_thread_info()->flags)); ++} ++ ++# else /* !CONFIG_PREEMPT_LAZY */ + + static __always_inline bool tif_need_resched(void) + { +@@ -185,7 +205,38 @@ static __always_inline bool tif_need_resched(void) + (unsigned long *)(¤t_thread_info()->flags)); + } + +-#else ++static __always_inline bool tif_need_resched_now(void) ++{ ++ return tif_need_resched(); ++} ++ ++static __always_inline bool tif_need_resched_lazy(void) ++{ ++ return false; ++} ++ ++# endif /* CONFIG_PREEMPT_LAZY */ ++#else /* !_ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ ++# ifdef CONFIG_PREEMPT_LAZY ++ ++static __always_inline bool tif_need_resched(void) ++{ ++ return read_thread_flags() & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY); ++} ++ ++static __always_inline bool tif_need_resched_now(void) ++{ ++ return test_bit(TIF_NEED_RESCHED, ++ (unsigned long *)(¤t_thread_info()->flags)); ++} ++ ++static __always_inline bool tif_need_resched_lazy(void) ++{ ++ return test_bit(TIF_NEED_RESCHED_LAZY, ++ (unsigned long *)(¤t_thread_info()->flags)); ++} ++ ++# else /* !CONFIG_PREEMPT_LAZY */ + + static __always_inline bool tif_need_resched(void) + { +@@ -193,6 +244,17 @@ static __always_inline bool tif_need_resched(void) + (unsigned long *)(¤t_thread_info()->flags)); + } + ++static __always_inline bool tif_need_resched_now(void) ++{ ++ return tif_need_resched(); ++} ++ ++static __always_inline bool tif_need_resched_lazy(void) ++{ ++ return false; ++} ++ ++# endif /* !CONFIG_PREEMPT_LAZY */ + #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ + + #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES +diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h +index 0e373222a6df8..47017fcf5481f 100644 +--- a/include/linux/trace_events.h ++++ b/include/linux/trace_events.h +@@ -70,6 +70,7 @@ struct trace_entry { + unsigned char flags; + unsigned char preempt_count; + int pid; ++ unsigned char preempt_lazy_count; + }; + + #define TRACE_EVENT_TYPE_MAX \ +@@ -158,9 +159,10 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry, + unsigned int trace_ctx) + { + entry->preempt_count = trace_ctx & 0xff; ++ entry->preempt_lazy_count = (trace_ctx >> 16) & 0xff; + entry->pid = current->pid; + entry->type = type; +- entry->flags = trace_ctx >> 16; ++ entry->flags = trace_ctx >> 24; + } + + unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); +@@ -171,7 +173,13 @@ enum trace_flag_type { + TRACE_FLAG_NEED_RESCHED = 0x04, + TRACE_FLAG_HARDIRQ = 0x08, + TRACE_FLAG_SOFTIRQ = 0x10, ++#ifdef CONFIG_PREEMPT_LAZY ++ TRACE_FLAG_PREEMPT_RESCHED = 0x00, ++ TRACE_FLAG_NEED_RESCHED_LAZY = 0x20, ++#else ++ TRACE_FLAG_NEED_RESCHED_LAZY = 0x00, + TRACE_FLAG_PREEMPT_RESCHED = 0x20, ++#endif + TRACE_FLAG_NMI = 0x40, + TRACE_FLAG_BH_OFF = 0x80, + }; +diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h +index 3e8619c72f774..b4bc2828fa09f 100644 +--- a/include/trace/events/timer.h ++++ b/include/trace/events/timer.h +@@ -158,7 +158,11 @@ DEFINE_EVENT(timer_class, timer_cancel, + { HRTIMER_MODE_ABS_SOFT, "ABS|SOFT" }, \ + { HRTIMER_MODE_REL_SOFT, "REL|SOFT" }, \ + { HRTIMER_MODE_ABS_PINNED_SOFT, "ABS|PINNED|SOFT" }, \ +- { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }) ++ { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }, \ ++ { HRTIMER_MODE_ABS_HARD, "ABS|HARD" }, \ ++ { HRTIMER_MODE_REL_HARD, "REL|HARD" }, \ ++ { HRTIMER_MODE_ABS_PINNED_HARD, "ABS|PINNED|HARD" }, \ ++ { HRTIMER_MODE_REL_PINNED_HARD, "REL|PINNED|HARD" }) + + /** + * hrtimer_init - called when the hrtimer is initialized +diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt +index c2f1fd95a8214..260c08efeb486 100644 +--- a/kernel/Kconfig.preempt ++++ b/kernel/Kconfig.preempt +@@ -1,5 +1,11 @@ + # SPDX-License-Identifier: GPL-2.0-only + ++config HAVE_PREEMPT_LAZY ++ bool ++ ++config PREEMPT_LAZY ++ def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT ++ + config PREEMPT_NONE_BUILD + bool + +diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c +index 5c7e9ba7cd6b2..e9139dfc1f0a8 100644 +--- a/kernel/debug/kdb/kdb_io.c ++++ b/kernel/debug/kdb/kdb_io.c +@@ -576,6 +576,8 @@ static void kdb_msg_write(const char *msg, int msg_len) + continue; + if (c == dbg_io_ops->cons) + continue; ++ if (!c->write) ++ continue; + /* + * Set oops_in_progress to encourage the console drivers to + * disregard their internal spin locks: in the current calling +diff --git a/kernel/entry/common.c b/kernel/entry/common.c +index be61332c66b54..c6301e520d47b 100644 +--- a/kernel/entry/common.c ++++ b/kernel/entry/common.c +@@ -155,7 +155,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, + + local_irq_enable_exit_to_user(ti_work); + +- if (ti_work & _TIF_NEED_RESCHED) ++ if (ti_work & _TIF_NEED_RESCHED_MASK) + schedule(); + + if (ti_work & _TIF_UPROBE) +@@ -386,7 +386,7 @@ void raw_irqentry_exit_cond_resched(void) + rcu_irq_exit_check_preempt(); + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) + WARN_ON_ONCE(!on_thread_stack()); +- if (need_resched()) ++ if (should_resched(0)) + preempt_schedule_irq(); + } + } +diff --git a/kernel/fork.c b/kernel/fork.c +index ea332319dffea..d78954a3834cc 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -1665,6 +1665,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) + RCU_INIT_POINTER(tsk->sighand, sig); + if (!sig) + return -ENOMEM; ++ sig->sigqueue_cache = NULL; + + refcount_set(&sig->count, 1); + spin_lock_irq(¤t->sighand->siglock); +@@ -1681,7 +1682,17 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) + void __cleanup_sighand(struct sighand_struct *sighand) + { + if (refcount_dec_and_test(&sighand->count)) { ++ struct sigqueue *sigqueue = NULL; ++ + signalfd_cleanup(sighand); ++ spin_lock_irq(&sighand->siglock); ++ if (sighand->sigqueue_cache) { ++ sigqueue = sighand->sigqueue_cache; ++ sighand->sigqueue_cache = NULL; ++ } ++ spin_unlock_irq(&sighand->siglock); ++ ++ sigqueue_free_cached_entry(sigqueue); + /* + * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it + * without an RCU grace period, see __lock_task_sighand(). +diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c +index 0408aab80941b..ee063d28f94f8 100644 +--- a/kernel/ksysfs.c ++++ b/kernel/ksysfs.c +@@ -167,6 +167,15 @@ KERNEL_ATTR_RO(vmcoreinfo); + + #endif /* CONFIG_CRASH_CORE */ + ++#if defined(CONFIG_PREEMPT_RT) ++static ssize_t realtime_show(struct kobject *kobj, ++ struct kobj_attribute *attr, char *buf) ++{ ++ return sprintf(buf, "%d\n", 1); ++} ++KERNEL_ATTR_RO(realtime); ++#endif ++ + /* whether file capabilities are enabled */ + static ssize_t fscaps_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +@@ -259,6 +268,9 @@ static struct attribute * kernel_attrs[] = { + #ifndef CONFIG_TINY_RCU + &rcu_expedited_attr.attr, + &rcu_normal_attr.attr, ++#endif ++#ifdef CONFIG_PREEMPT_RT ++ &realtime_attr.attr, + #endif + NULL + }; +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index 728f434de2bbf..479a9487edcc2 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -218,6 +218,11 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, + return try_cmpxchg_acquire(&lock->owner, &old, new); + } + ++static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) ++{ ++ return rt_mutex_cmpxchg_acquire(lock, NULL, current); ++} ++ + static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, + struct task_struct *old, + struct task_struct *new) +@@ -297,6 +302,24 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, + + } + ++static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock); ++ ++static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) ++{ ++ /* ++ * With debug enabled rt_mutex_cmpxchg trylock() will always fail, ++ * which will unconditionally invoke sched_submit/resume_work() in ++ * the slow path of __rt_mutex_lock() and __ww_rt_mutex_lock() even ++ * in the non-contended case. ++ * ++ * Avoid that by using rt_mutex_slow_trylock() which is covered by ++ * the debug code and can acquire a non-contended rtmutex. On ++ * success the callsite avoids the sched_submit/resume_work() ++ * dance. ++ */ ++ return rt_mutex_slowtrylock(lock); ++} ++ + static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, + struct task_struct *old, + struct task_struct *new) +@@ -1555,7 +1578,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, + raw_spin_unlock_irq(&lock->wait_lock); + + if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) +- schedule(); ++ schedule_rtmutex(); + + raw_spin_lock_irq(&lock->wait_lock); + set_current_state(state); +@@ -1584,7 +1607,7 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, + WARN(1, "rtmutex deadlock detected\n"); + while (1) { + set_current_state(TASK_INTERRUPTIBLE); +- schedule(); ++ schedule_rtmutex(); + } + } + +@@ -1679,6 +1702,12 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, + unsigned long flags; + int ret; + ++ /* ++ * The task is about to sleep. Invoke sched_submit_work() before ++ * blocking as that might take locks and corrupt tsk::pi_blocked_on. ++ */ ++ sched_submit_work(); ++ + /* + * Technically we could use raw_spin_[un]lock_irq() here, but this can + * be called in early boot if the cmpxchg() fast path is disabled +@@ -1691,13 +1720,16 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, + ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + ++ sched_resume_work(); + return ret; + } + + static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, + unsigned int state) + { +- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) ++ lockdep_assert(!current->pi_blocked_on); ++ ++ if (likely(rt_mutex_try_acquire(lock))) + return 0; + + return rt_mutex_slowlock(lock, NULL, state); +diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c +index c201aadb93017..5be92ca5afabc 100644 +--- a/kernel/locking/rwbase_rt.c ++++ b/kernel/locking/rwbase_rt.c +@@ -72,15 +72,6 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, + int ret; + + raw_spin_lock_irq(&rtm->wait_lock); +- /* +- * Allow readers, as long as the writer has not completely +- * acquired the semaphore for write. +- */ +- if (atomic_read(&rwb->readers) != WRITER_BIAS) { +- atomic_inc(&rwb->readers); +- raw_spin_unlock_irq(&rtm->wait_lock); +- return 0; +- } + + /* + * Call into the slow lock path with the rtmutex->wait_lock +@@ -140,10 +131,23 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, + static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb, + unsigned int state) + { ++ int ret; ++ ++ lockdep_assert(!current->pi_blocked_on); ++ + if (rwbase_read_trylock(rwb)) + return 0; + +- return __rwbase_read_lock(rwb, state); ++ /* ++ * The task is about to sleep. For rwsems this submits work as that ++ * might take locks and corrupt tsk::pi_blocked_on. Must be ++ * explicit here because __rwbase_read_lock() cannot invoke ++ * rt_mutex_slowlock(). NOP for rwlocks. ++ */ ++ rwbase_sched_submit_work(); ++ ret = __rwbase_read_lock(rwb, state); ++ rwbase_sched_resume_work(); ++ return ret; + } + + static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb, +@@ -239,7 +243,10 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, + struct rt_mutex_base *rtm = &rwb->rtmutex; + unsigned long flags; + +- /* Take the rtmutex as a first step */ ++ /* ++ * Take the rtmutex as a first step. For rwsem this will also ++ * invoke sched_submit_work() to flush IO and workers. ++ */ + if (rwbase_rtmutex_lock_state(rtm, state)) + return -EINTR; + +diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c +index 9eabd585ce7af..e304db9ebfd95 100644 +--- a/kernel/locking/rwsem.c ++++ b/kernel/locking/rwsem.c +@@ -1415,6 +1415,12 @@ static inline void __downgrade_write(struct rw_semaphore *sem) + #define rwbase_rtmutex_lock_state(rtm, state) \ + __rt_mutex_lock(rtm, state) + ++#define rwbase_sched_submit_work() \ ++ sched_submit_work() ++ ++#define rwbase_sched_resume_work() \ ++ sched_resume_work() ++ + #define rwbase_rtmutex_slowlock_locked(rtm, state) \ + __rt_mutex_slowlock_locked(rtm, NULL, state) + +diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c +index 48a19ed8486d8..9fe282cd145d9 100644 +--- a/kernel/locking/spinlock_rt.c ++++ b/kernel/locking/spinlock_rt.c +@@ -37,6 +37,8 @@ + + static __always_inline void rtlock_lock(struct rt_mutex_base *rtm) + { ++ lockdep_assert(!current->pi_blocked_on); ++ + if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current))) + rtlock_slowlock(rtm); + } +@@ -159,6 +161,9 @@ rwbase_rtmutex_lock_state(struct rt_mutex_base *rtm, unsigned int state) + return 0; + } + ++static __always_inline void rwbase_sched_submit_work(void) { } ++static __always_inline void rwbase_sched_resume_work(void) { } ++ + static __always_inline int + rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state) + { +diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c +index d1473c624105c..c7196de838edc 100644 +--- a/kernel/locking/ww_rt_mutex.c ++++ b/kernel/locking/ww_rt_mutex.c +@@ -62,7 +62,7 @@ __ww_rt_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx, + } + mutex_acquire_nest(&rtm->dep_map, 0, 0, nest_lock, ip); + +- if (likely(rt_mutex_cmpxchg_acquire(&rtm->rtmutex, NULL, current))) { ++ if (likely(rt_mutex_try_acquire(&rtm->rtmutex))) { + if (ww_ctx) + ww_mutex_set_context_fastpath(lock, ww_ctx); + return 0; +diff --git a/kernel/panic.c b/kernel/panic.c +index 5cfea8302d23a..190f7f2bc6cfd 100644 +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -275,6 +275,7 @@ static void panic_other_cpus_shutdown(bool crash_kexec) + */ + void panic(const char *fmt, ...) + { ++ enum cons_prio prev_prio; + static char buf[1024]; + va_list args; + long i, i_next = 0, len; +@@ -322,6 +323,8 @@ void panic(const char *fmt, ...) + if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu) + panic_smp_self_stop(); + ++ prev_prio = cons_atomic_enter(CONS_PRIO_PANIC); ++ + console_verbose(); + bust_spinlocks(1); + va_start(args, fmt); +@@ -382,6 +385,8 @@ void panic(const char *fmt, ...) + if (_crash_kexec_post_notifiers) + __crash_kexec(NULL); + ++ cons_atomic_flush(NULL, true); ++ + console_unblank(); + + /* +@@ -406,6 +411,7 @@ void panic(const char *fmt, ...) + * We can't use the "normal" timers since we just panicked. + */ + pr_emerg("Rebooting in %d seconds..\n", panic_timeout); ++ cons_atomic_flush(NULL, true); + + for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) { + touch_nmi_watchdog(); +@@ -424,6 +430,7 @@ void panic(const char *fmt, ...) + */ + if (panic_reboot_mode != REBOOT_UNDEFINED) + reboot_mode = panic_reboot_mode; ++ cons_atomic_flush(NULL, true); + emergency_restart(); + } + #ifdef __sparc__ +@@ -436,12 +443,16 @@ void panic(const char *fmt, ...) + } + #endif + #if defined(CONFIG_S390) ++ cons_atomic_flush(NULL, true); + disabled_wait(); + #endif + pr_emerg("---[ end Kernel panic - not syncing: %s ]---\n", buf); + + /* Do not scroll important messages printed above */ + suppress_printk = 1; ++ ++ cons_atomic_exit(CONS_PRIO_PANIC, prev_prio); ++ + local_irq_enable(); + for (i = 0; ; i += PANIC_TIMER_STEP) { + touch_softlockup_watchdog(); +@@ -652,6 +663,10 @@ struct warn_args { + void __warn(const char *file, int line, void *caller, unsigned taint, + struct pt_regs *regs, struct warn_args *args) + { ++ enum cons_prio prev_prio; ++ ++ prev_prio = cons_atomic_enter(CONS_PRIO_EMERGENCY); ++ + disable_trace_on_warning(); + + if (file) +@@ -682,6 +697,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, + + /* Just a warning, don't kill lockdep. */ + add_taint(taint, LOCKDEP_STILL_OK); ++ ++ cons_atomic_exit(CONS_PRIO_EMERGENCY, prev_prio); + } + + #ifndef __WARN_FLAGS +diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile +index f5b388e810b9f..b36683bd2f821 100644 +--- a/kernel/printk/Makefile ++++ b/kernel/printk/Makefile +@@ -1,6 +1,6 @@ + # SPDX-License-Identifier: GPL-2.0-only + obj-y = printk.o +-obj-$(CONFIG_PRINTK) += printk_safe.o ++obj-$(CONFIG_PRINTK) += printk_safe.o printk_nobkl.o + obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o + obj-$(CONFIG_PRINTK_INDEX) += index.o + +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index 2a17704136f1d..6631fd70542f9 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -3,6 +3,8 @@ + * internal.h - printk internal definitions + */ + #include ++#include ++#include "printk_ringbuffer.h" + + #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) + void __init printk_sysctl_init(void); +@@ -12,8 +14,13 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, + #define printk_sysctl_init() do { } while (0) + #endif + +-#ifdef CONFIG_PRINTK ++#define con_printk(lvl, con, fmt, ...) \ ++ printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \ ++ (con->flags & CON_NO_BKL) ? "" : "legacy ", \ ++ (con->flags & CON_BOOT) ? "boot" : "", \ ++ con->name, con->index, ##__VA_ARGS__) + ++#ifdef CONFIG_PRINTK + #ifdef CONFIG_PRINTK_CALLER + #define PRINTK_PREFIX_MAX 48 + #else +@@ -35,6 +42,12 @@ enum printk_info_flags { + LOG_CONT = 8, /* text is a fragment of a continuation line */ + }; + ++extern struct printk_ringbuffer *prb; ++extern bool have_bkl_console; ++extern bool printk_threads_enabled; ++ ++extern bool have_boot_console; ++ + __printf(4, 0) + int vprintk_store(int facility, int level, + const struct dev_printk_info *dev_info, +@@ -45,28 +58,98 @@ __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); + + bool printk_percpu_data_ready(void); + ++/* ++ * The printk_safe_enter()/_exit() macros mark code blocks using locks that ++ * would lead to deadlock if an interrupting context were to call printk() ++ * while the interrupted context was within such code blocks. ++ * ++ * When a CPU is in such a code block, an interrupting context calling ++ * printk() will only log the new message to the lockless ringbuffer and ++ * then trigger console printing using irqwork. ++ */ ++ + #define printk_safe_enter_irqsave(flags) \ + do { \ +- local_irq_save(flags); \ +- __printk_safe_enter(); \ ++ __printk_safe_enter(&flags); \ + } while (0) + + #define printk_safe_exit_irqrestore(flags) \ + do { \ +- __printk_safe_exit(); \ +- local_irq_restore(flags); \ ++ __printk_safe_exit(&flags); \ + } while (0) + + void defer_console_output(void); + + u16 printk_parse_prefix(const char *text, int *level, + enum printk_info_flags *flags); ++ ++u64 cons_read_seq(struct console *con); ++void cons_nobkl_cleanup(struct console *con); ++bool cons_nobkl_init(struct console *con); ++bool cons_alloc_percpu_data(struct console *con); ++void cons_kthread_create(struct console *con); ++void cons_wake_threads(void); ++void cons_force_seq(struct console *con, u64 seq); ++void console_bkl_kthread_create(void); ++ ++/* ++ * Check if the given console is currently capable and allowed to print ++ * records. If the caller only works with certain types of consoles, the ++ * caller is responsible for checking the console type before calling ++ * this function. ++ */ ++static inline bool console_is_usable(struct console *con, short flags) ++{ ++ if (!(flags & CON_ENABLED)) ++ return false; ++ ++ if ((flags & CON_SUSPENDED)) ++ return false; ++ ++ /* ++ * The usability of a console varies depending on whether ++ * it is a NOBKL console or not. ++ */ ++ ++ if (flags & CON_NO_BKL) { ++ if (have_boot_console) ++ return false; ++ ++ } else { ++ if (!con->write) ++ return false; ++ /* ++ * Console drivers may assume that per-cpu resources have ++ * been allocated. So unless they're explicitly marked as ++ * being able to cope (CON_ANYTIME) don't call them until ++ * this CPU is officially up. ++ */ ++ if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) ++ return false; ++ } ++ ++ return true; ++} ++ ++/** ++ * cons_kthread_wake - Wake up a printk thread ++ * @con: Console to operate on ++ */ ++static inline void cons_kthread_wake(struct console *con) ++{ ++ rcuwait_wake_up(&con->rcuwait); ++} ++ + #else + + #define PRINTK_PREFIX_MAX 0 + #define PRINTK_MESSAGE_MAX 0 + #define PRINTKRB_RECORD_MAX 0 + ++static inline void cons_kthread_wake(struct console *con) { } ++static inline void cons_kthread_create(struct console *con) { } ++#define printk_threads_enabled (false) ++ + /* + * In !PRINTK builds we still export console_sem + * semaphore and some of console functions (console_unlock()/etc.), so +@@ -76,8 +159,15 @@ u16 printk_parse_prefix(const char *text, int *level, + #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) + + static inline bool printk_percpu_data_ready(void) { return false; } ++static inline bool cons_nobkl_init(struct console *con) { return true; } ++static inline void cons_nobkl_cleanup(struct console *con) { } ++static inline bool console_is_usable(struct console *con, short flags) { return false; } ++static inline void cons_force_seq(struct console *con, u64 seq) { } ++ + #endif /* CONFIG_PRINTK */ + ++extern bool have_boot_console; ++ + /** + * struct printk_buffers - Buffers to read/format/output printk messages. + * @outbuf: After formatting, contains text to output. +@@ -103,3 +193,28 @@ struct printk_message { + u64 seq; + unsigned long dropped; + }; ++ ++/** ++ * struct cons_context_data - console context data ++ * @wctxt: Write context per priority level ++ * @pbufs: Buffer for storing the text ++ * ++ * Used for early boot and for per CPU data. ++ * ++ * The write contexts are allocated to avoid having them on stack, e.g. in ++ * warn() or panic(). ++ */ ++struct cons_context_data { ++ struct cons_write_context wctxt[CONS_PRIO_MAX]; ++ struct printk_buffers pbufs; ++}; ++ ++bool printk_get_next_message(struct printk_message *pmsg, u64 seq, ++ bool is_extended, bool may_supress); ++ ++#ifdef CONFIG_PRINTK ++ ++void console_prepend_dropped(struct printk_message *pmsg, ++ unsigned long dropped); ++ ++#endif +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index fd0c9f913940a..e2466366d4f84 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -442,6 +442,21 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; + /* syslog_lock protects syslog_* variables and write access to clear_seq. */ + static DEFINE_MUTEX(syslog_lock); + ++/* ++ * Specifies if a BKL console was ever registered. Used to determine if the ++ * console lock/unlock dance is needed for console printing. ++ */ ++bool have_bkl_console; ++ ++/* ++ * Specifies if a boot console is registered. Used to determine if NOBKL ++ * consoles may be used since NOBKL consoles cannot synchronize with boot ++ * consoles. ++ */ ++bool have_boot_console; ++ ++static int unregister_console_locked(struct console *console); ++ + #ifdef CONFIG_PRINTK + DECLARE_WAIT_QUEUE_HEAD(log_wait); + /* All 3 protected by @syslog_lock. */ +@@ -492,7 +507,7 @@ _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, + + static struct printk_ringbuffer printk_rb_dynamic; + +-static struct printk_ringbuffer *prb = &printk_rb_static; ++struct printk_ringbuffer *prb = &printk_rb_static; + + /* + * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before +@@ -696,9 +711,6 @@ static ssize_t msg_print_ext_body(char *buf, size_t size, + return len; + } + +-static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, +- bool is_extended, bool may_supress); +- + /* /dev/kmsg - userspace message inject/listen interface */ + struct devkmsg_user { + atomic64_t seq; +@@ -1100,7 +1112,19 @@ static inline void log_buf_add_cpu(void) {} + + static void __init set_percpu_data_ready(void) + { ++ struct hlist_node *tmp; ++ struct console *con; ++ ++ console_list_lock(); ++ ++ hlist_for_each_entry_safe(con, tmp, &console_list, node) { ++ if (!cons_alloc_percpu_data(con)) ++ unregister_console_locked(con); ++ } ++ + __printk_percpu_data_ready = true; ++ ++ console_list_unlock(); + } + + static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, +@@ -2276,6 +2300,7 @@ asmlinkage int vprintk_emit(int facility, int level, + const struct dev_printk_info *dev_info, + const char *fmt, va_list args) + { ++ struct cons_write_context wctxt = { }; + int printed_len; + bool in_sched = false; + +@@ -2296,16 +2321,25 @@ asmlinkage int vprintk_emit(int facility, int level, + + printed_len = vprintk_store(facility, level, dev_info, fmt, args); + ++ /* ++ * The caller may be holding system-critical or ++ * timing-sensitive locks. Disable preemption during ++ * printing of all remaining records to all consoles so that ++ * this context can return as soon as possible. Hopefully ++ * another printk() caller will take over the printing. ++ */ ++ preempt_disable(); ++ ++ /* ++ * Flush the non-BKL consoles. This only leads to direct atomic ++ * printing for non-BKL consoles that do not have a printer ++ * thread available. Otherwise the printer thread will perform ++ * the printing. ++ */ ++ cons_atomic_flush(&wctxt, true); ++ + /* If called from the scheduler, we can not call up(). */ +- if (!in_sched) { +- /* +- * The caller may be holding system-critical or +- * timing-sensitive locks. Disable preemption during +- * printing of all remaining records to all consoles so that +- * this context can return as soon as possible. Hopefully +- * another printk() caller will take over the printing. +- */ +- preempt_disable(); ++ if (!in_sched && have_bkl_console && !IS_ENABLED(CONFIG_PREEMPT_RT)) { + /* + * Try to acquire and then immediately release the console + * semaphore. The release will print out buffers. With the +@@ -2314,10 +2348,15 @@ asmlinkage int vprintk_emit(int facility, int level, + */ + if (console_trylock_spinning()) + console_unlock(); +- preempt_enable(); + } + +- wake_up_klogd(); ++ preempt_enable(); ++ ++ cons_wake_threads(); ++ if (in_sched) ++ defer_console_output(); ++ else ++ wake_up_klogd(); + return printed_len; + } + EXPORT_SYMBOL(vprintk_emit); +@@ -2556,10 +2595,26 @@ MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to hig + */ + void suspend_console(void) + { ++ struct console *con; ++ + if (!console_suspend_enabled) + return; + pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); + pr_flush(1000, true); ++ ++ console_list_lock(); ++ for_each_console(con) ++ console_srcu_write_flags(con, con->flags | CON_SUSPENDED); ++ console_list_unlock(); ++ ++ /* ++ * Ensure that all SRCU list walks have completed. All printing ++ * contexts must be able to see that they are suspended so that it ++ * is guaranteed that all printing has stopped when this function ++ * completes. ++ */ ++ synchronize_srcu(&console_srcu); ++ + console_lock(); + console_suspended = 1; + up_console_sem(); +@@ -2567,11 +2622,39 @@ void suspend_console(void) + + void resume_console(void) + { ++ struct console *con; ++ short flags; ++ int cookie; ++ + if (!console_suspend_enabled) + return; + down_console_sem(); + console_suspended = 0; + console_unlock(); ++ ++ console_list_lock(); ++ for_each_console(con) ++ console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED); ++ console_list_unlock(); ++ ++ /* ++ * Ensure that all SRCU list walks have completed. All printing ++ * contexts must be able to see they are no longer suspended so ++ * that they are guaranteed to wake up and resume printing. ++ */ ++ synchronize_srcu(&console_srcu); ++ ++ cookie = console_srcu_read_lock(); ++ for_each_console_srcu(con) { ++ flags = console_srcu_read_flags(con); ++ if (flags & CON_NO_BKL) ++ cons_kthread_wake(con); ++ } ++ console_srcu_read_unlock(cookie); ++ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && have_bkl_console) ++ wake_up_interruptible(&log_wait); ++ + pr_flush(1000, true); + } + +@@ -2586,7 +2669,7 @@ void resume_console(void) + */ + static int console_cpu_notify(unsigned int cpu) + { +- if (!cpuhp_tasks_frozen) { ++ if (!cpuhp_tasks_frozen && have_bkl_console) { + /* If trylock fails, someone else is doing the printing */ + if (console_trylock()) + console_unlock(); +@@ -2661,33 +2744,6 @@ static bool abandon_console_lock_in_panic(void) + return atomic_read(&panic_cpu) != raw_smp_processor_id(); + } + +-/* +- * Check if the given console is currently capable and allowed to print +- * records. +- * +- * Requires the console_srcu_read_lock. +- */ +-static inline bool console_is_usable(struct console *con) +-{ +- short flags = console_srcu_read_flags(con); +- +- if (!(flags & CON_ENABLED)) +- return false; +- +- if (!con->write) +- return false; +- +- /* +- * Console drivers may assume that per-cpu resources have been +- * allocated. So unless they're explicitly marked as being able to +- * cope (CON_ANYTIME) don't call them until this CPU is officially up. +- */ +- if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) +- return false; +- +- return true; +-} +- + static void __console_unlock(void) + { + console_locked = 0; +@@ -2709,7 +2765,7 @@ static void __console_unlock(void) + * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated. + */ + #ifdef CONFIG_PRINTK +-static void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) ++void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) + { + struct printk_buffers *pbufs = pmsg->pbufs; + const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); +@@ -2741,7 +2797,8 @@ static void console_prepend_dropped(struct printk_message *pmsg, unsigned long d + pmsg->outbuf_len += len; + } + #else +-#define console_prepend_dropped(pmsg, dropped) ++static inline void console_prepend_dropped(struct printk_message *pmsg, ++ unsigned long dropped) { } + #endif /* CONFIG_PRINTK */ + + /* +@@ -2763,8 +2820,8 @@ static void console_prepend_dropped(struct printk_message *pmsg, unsigned long d + * of @pmsg are valid. (See the documentation of struct printk_message + * for information about the @pmsg fields.) + */ +-static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, +- bool is_extended, bool may_suppress) ++bool printk_get_next_message(struct printk_message *pmsg, u64 seq, ++ bool is_extended, bool may_suppress) + { + static int panic_console_dropped; + +@@ -2933,9 +2990,14 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove + + cookie = console_srcu_read_lock(); + for_each_console_srcu(con) { ++ short flags = console_srcu_read_flags(con); + bool progress; + +- if (!console_is_usable(con)) ++ /* console_flush_all() is only for legacy consoles. */ ++ if (flags & CON_NO_BKL) ++ continue; ++ ++ if (!console_is_usable(con, flags)) + continue; + any_usable = true; + +@@ -2973,30 +3035,13 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove + return false; + } + +-/** +- * console_unlock - unblock the console subsystem from printing +- * +- * Releases the console_lock which the caller holds to block printing of +- * the console subsystem. +- * +- * While the console_lock was held, console output may have been buffered +- * by printk(). If this is the case, console_unlock(); emits +- * the output prior to releasing the lock. +- * +- * console_unlock(); may be called from any context. +- */ +-void console_unlock(void) ++static u64 console_flush_and_unlock(void) + { + bool do_cond_resched; + bool handover; + bool flushed; + u64 next_seq; + +- if (console_suspended) { +- up_console_sem(); +- return; +- } +- + /* + * Console drivers are called with interrupts disabled, so + * @console_may_schedule should be cleared before; however, we may +@@ -3033,6 +3078,39 @@ void console_unlock(void) + * fails, another context is already handling the printing. + */ + } while (prb_read_valid(prb, next_seq, NULL) && console_trylock()); ++ ++ return next_seq; ++} ++ ++/** ++ * console_unlock - unblock the console subsystem from printing ++ * ++ * Releases the console_lock which the caller holds to block printing of ++ * the console subsystem. ++ * ++ * While the console_lock was held, console output may have been buffered ++ * by printk(). If this is the case, console_unlock(); emits ++ * the output prior to releasing the lock. ++ * ++ * console_unlock(); may be called from any context. ++ */ ++void console_unlock(void) ++{ ++ if (console_suspended) { ++ up_console_sem(); ++ return; ++ } ++ ++ /* ++ * PREEMPT_RT relies on kthread and atomic consoles for printing. ++ * It never attempts to print from console_unlock(). ++ */ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { ++ __console_unlock(); ++ return; ++ } ++ ++ console_flush_and_unlock(); + } + EXPORT_SYMBOL(console_unlock); + +@@ -3057,6 +3135,9 @@ void console_unblank(void) + struct console *c; + int cookie; + ++ if (!have_bkl_console) ++ return; ++ + /* + * Stop console printing because the unblank() callback may + * assume the console is not within its write() callback. +@@ -3065,6 +3146,10 @@ void console_unblank(void) + * In that case, attempt a trylock as best-effort. + */ + if (oops_in_progress) { ++ /* Semaphores are not NMI-safe. */ ++ if (in_nmi()) ++ return; ++ + if (down_trylock_console_sem() != 0) + return; + } else +@@ -3094,23 +3179,46 @@ void console_unblank(void) + */ + void console_flush_on_panic(enum con_flush_mode mode) + { ++ struct console *c; ++ short flags; ++ int cookie; ++ u64 seq; ++ ++ seq = prb_first_valid_seq(prb); ++ ++ /* ++ * Safely flush the atomic consoles before trying to flush any ++ * BKL/legacy consoles. ++ */ ++ if (mode == CONSOLE_REPLAY_ALL) { ++ cookie = console_srcu_read_lock(); ++ for_each_console_srcu(c) { ++ flags = console_srcu_read_flags(c); ++ if (flags & CON_NO_BKL) ++ cons_force_seq(c, seq); ++ } ++ console_srcu_read_unlock(cookie); ++ } ++ cons_atomic_flush(NULL, true); ++ ++ if (!have_bkl_console) ++ return; ++ + /* + * If someone else is holding the console lock, trylock will fail + * and may_schedule may be set. Ignore and proceed to unlock so + * that messages are flushed out. As this can be called from any + * context and we don't want to get preempted while flushing, + * ensure may_schedule is cleared. ++ * ++ * Since semaphores are not NMI-safe, the console lock must be ++ * ignored if the panic is in NMI context. + */ +- console_trylock(); ++ if (!in_nmi()) ++ console_trylock(); + console_may_schedule = 0; + + if (mode == CONSOLE_REPLAY_ALL) { +- struct console *c; +- int cookie; +- u64 seq; +- +- seq = prb_first_valid_seq(prb); +- + cookie = console_srcu_read_lock(); + for_each_console_srcu(c) { + /* +@@ -3122,7 +3230,8 @@ void console_flush_on_panic(enum con_flush_mode mode) + } + console_srcu_read_unlock(cookie); + } +- console_unlock(); ++ if (!in_nmi()) ++ console_unlock(); + } + + /* +@@ -3179,13 +3288,118 @@ EXPORT_SYMBOL(console_stop); + + void console_start(struct console *console) + { ++ short flags; ++ + console_list_lock(); + console_srcu_write_flags(console, console->flags | CON_ENABLED); ++ flags = console->flags; + console_list_unlock(); ++ ++ /* ++ * Ensure that all SRCU list walks have completed. The related ++ * printing context must be able to see it is enabled so that ++ * it is guaranteed to wake up and resume printing. ++ */ ++ synchronize_srcu(&console_srcu); ++ ++ if (flags & CON_NO_BKL) ++ cons_kthread_wake(console); ++ else if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ wake_up_interruptible(&log_wait); ++ + __pr_flush(console, 1000, true); + } + EXPORT_SYMBOL(console_start); + ++static struct task_struct *console_bkl_kthread; ++ ++static bool printer_should_wake(u64 seq) ++{ ++ bool available = false; ++ struct console *con; ++ int cookie; ++ ++ if (kthread_should_stop()) ++ return true; ++ ++ cookie = console_srcu_read_lock(); ++ for_each_console_srcu(con) { ++ short flags = console_srcu_read_flags(con); ++ ++ if (flags & CON_NO_BKL) ++ continue; ++ if (!console_is_usable(con, flags)) ++ continue; ++ /* ++ * It is safe to read @seq because only this ++ * thread context updates @seq. ++ */ ++ if (prb_read_valid(prb, con->seq, NULL)) { ++ available = true; ++ break; ++ } ++ } ++ console_srcu_read_unlock(cookie); ++ ++ return available; ++} ++ ++static int console_bkl_kthread_func(void *unused) ++{ ++ u64 seq = 0; ++ int error; ++ ++ for (;;) { ++ error = wait_event_interruptible(log_wait, printer_should_wake(seq)); ++ ++ if (kthread_should_stop()) ++ break; ++ ++ if (error) ++ continue; ++ ++ console_lock(); ++ if (console_suspended) ++ up_console_sem(); ++ else ++ seq = console_flush_and_unlock(); ++ } ++ return 0; ++} ++ ++void console_bkl_kthread_create(void) ++{ ++ struct task_struct *kt; ++ struct console *c; ++ ++ lockdep_assert_held(&console_mutex); ++ ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ return; ++ ++ if (!printk_threads_enabled || console_bkl_kthread) ++ return; ++ ++ for_each_console(c) { ++ if (c->flags & CON_BOOT) ++ return; ++ } ++ ++ kt = kthread_run(console_bkl_kthread_func, NULL, "pr/bkl"); ++ if (IS_ERR(kt)) { ++ pr_err("unable to start BKL printing thread\n"); ++ return; ++ } ++ ++ console_bkl_kthread = kt; ++ ++ /* ++ * It is important that console printing threads are scheduled ++ * shortly after a printk call and with generous runtime budgets. ++ */ ++ sched_set_normal(console_bkl_kthread, -20); ++} ++ + static int __read_mostly keep_bootcon; + + static int __init keep_bootcon_setup(char *str) +@@ -3269,11 +3483,6 @@ static void try_enable_default_console(struct console *newcon) + newcon->flags |= CON_CONSDEV; + } + +-#define con_printk(lvl, con, fmt, ...) \ +- printk(lvl pr_fmt("%sconsole [%s%d] " fmt), \ +- (con->flags & CON_BOOT) ? "boot" : "", \ +- con->name, con->index, ##__VA_ARGS__) +- + static void console_init_seq(struct console *newcon, bool bootcon_registered) + { + struct console *con; +@@ -3338,8 +3547,6 @@ static void console_init_seq(struct console *newcon, bool bootcon_registered) + #define console_first() \ + hlist_entry(console_list.first, struct console, node) + +-static int unregister_console_locked(struct console *console); +- + /* + * The console driver calls this routine during kernel initialization + * to register the console printing procedure with printk() and to +@@ -3431,6 +3638,16 @@ void register_console(struct console *newcon) + newcon->dropped = 0; + console_init_seq(newcon, bootcon_registered); + ++ if (!(newcon->flags & CON_NO_BKL)) { ++ have_bkl_console = true; ++ console_bkl_kthread_create(); ++ } else if (!cons_nobkl_init(newcon)) { ++ goto unlock; ++ } ++ ++ if (newcon->flags & CON_BOOT) ++ have_boot_console = true; ++ + /* + * Put this console in the list - keep the + * preferred driver at the head of the list. +@@ -3474,6 +3691,9 @@ void register_console(struct console *newcon) + if (con->flags & CON_BOOT) + unregister_console_locked(con); + } ++ ++ /* All boot consoles have been unregistered. */ ++ have_boot_console = false; + } + unlock: + console_list_unlock(); +@@ -3483,11 +3703,13 @@ EXPORT_SYMBOL(register_console); + /* Must be called under console_list_lock(). */ + static int unregister_console_locked(struct console *console) + { ++ struct console *c; ++ bool is_boot_con; + int res; + + lockdep_assert_console_list_lock_held(); + +- con_printk(KERN_INFO, console, "disabled\n"); ++ is_boot_con = console->flags & CON_BOOT; + + res = _braille_unregister_console(console); + if (res < 0) +@@ -3495,12 +3717,13 @@ static int unregister_console_locked(struct console *console) + if (res > 0) + return 0; + +- /* Disable it unconditionally */ +- console_srcu_write_flags(console, console->flags & ~CON_ENABLED); +- + if (!console_is_registered_locked(console)) + return -ENODEV; + ++ console_srcu_write_flags(console, console->flags & ~CON_ENABLED); ++ ++ con_printk(KERN_INFO, console, "disabled\n"); ++ + hlist_del_init_rcu(&console->node); + + /* +@@ -3522,11 +3745,23 @@ static int unregister_console_locked(struct console *console) + */ + synchronize_srcu(&console_srcu); + ++ if (console->flags & CON_NO_BKL) ++ cons_nobkl_cleanup(console); ++ + console_sysfs_notify(); + + if (console->exit) + res = console->exit(console); + ++ /* ++ * Each time a boot console unregisters, try to start up the printing ++ * threads. They will only start if this was the last boot console. ++ */ ++ if (is_boot_con) { ++ for_each_console(c) ++ cons_kthread_create(c); ++ } ++ + return res; + } + +@@ -3688,31 +3923,36 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre + + /* + * Hold the console_lock to guarantee safe access to +- * console->seq and to prevent changes to @console_suspended +- * until all consoles have been processed. ++ * console->seq. + */ + console_lock(); + + cookie = console_srcu_read_lock(); + for_each_console_srcu(c) { ++ short flags; ++ + if (con && con != c) + continue; +- if (!console_is_usable(c)) ++ ++ flags = console_srcu_read_flags(c); ++ ++ if (!console_is_usable(c, flags)) + continue; ++ ++ /* ++ * Since the console is locked, use this opportunity ++ * to update console->seq for NOBKL consoles. ++ */ ++ if (flags & CON_NO_BKL) ++ c->seq = cons_read_seq(c); ++ + printk_seq = c->seq; + if (printk_seq < seq) + diff += seq - printk_seq; + } + console_srcu_read_unlock(cookie); + +- /* +- * If consoles are suspended, it cannot be expected that they +- * make forward progress, so timeout immediately. @diff is +- * still used to return a valid flush status. +- */ +- if (console_suspended) +- remaining = 0; +- else if (diff != last_diff && reset_on_progress) ++ if (diff != last_diff && reset_on_progress) + remaining = timeout_ms; + + console_unlock(); +@@ -3770,9 +4010,17 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) + int pending = this_cpu_xchg(printk_pending, 0); + + if (pending & PRINTK_PENDING_OUTPUT) { +- /* If trylock fails, someone else is doing the printing */ +- if (console_trylock()) +- console_unlock(); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { ++ /* The BKL thread waits on @log_wait. */ ++ pending |= PRINTK_PENDING_WAKEUP; ++ } else { ++ /* ++ * If trylock fails, some other context ++ * will do the printing. ++ */ ++ if (console_trylock()) ++ console_unlock(); ++ } + } + + if (pending & PRINTK_PENDING_WAKEUP) +@@ -3807,33 +4055,58 @@ static void __wake_up_klogd(int val) + preempt_enable(); + } + ++/** ++ * wake_up_klogd - Wake kernel logging daemon ++ * ++ * Use this function when new records have been added to the ringbuffer ++ * and the console printing for those records is handled elsewhere. In ++ * this case only the logging daemon needs to be woken. ++ * ++ * Context: Any context. ++ */ + void wake_up_klogd(void) + { + __wake_up_klogd(PRINTK_PENDING_WAKEUP); + } + ++/** ++ * defer_console_output - Wake kernel logging daemon and trigger ++ * console printing in a deferred context ++ * ++ * Use this function when new records have been added to the ringbuffer ++ * but the current context is unable to perform the console printing. ++ * This function also wakes the logging daemon. ++ * ++ * Context: Any context. ++ */ + void defer_console_output(void) + { ++ int val = PRINTK_PENDING_WAKEUP; ++ + /* + * New messages may have been added directly to the ringbuffer + * using vprintk_store(), so wake any waiters as well. + */ +- __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); ++ if (have_bkl_console) ++ val |= PRINTK_PENDING_OUTPUT; ++ __wake_up_klogd(val); + } + + void printk_trigger_flush(void) + { ++ struct cons_write_context wctxt = { }; ++ ++ preempt_disable(); ++ cons_atomic_flush(&wctxt, true); ++ preempt_enable(); ++ ++ cons_wake_threads(); + defer_console_output(); + } + + int vprintk_deferred(const char *fmt, va_list args) + { +- int r; +- +- r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); +- defer_console_output(); +- +- return r; ++ return vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); + } + + int _printk_deferred(const char *fmt, ...) +diff --git a/kernel/printk/printk_nobkl.c b/kernel/printk/printk_nobkl.c +new file mode 100644 +index 0000000000000..e0b818a4f8b38 +--- /dev/null ++++ b/kernel/printk/printk_nobkl.c +@@ -0,0 +1,1825 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++// Copyright (C) 2022 Linutronix GmbH, John Ogness ++// Copyright (C) 2022 Intel, Thomas Gleixner ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "printk_ringbuffer.h" ++#include "internal.h" ++/* ++ * Printk implementation for consoles that do not depend on the BKL style ++ * console_lock mechanism. ++ * ++ * Console is locked on a CPU when state::locked is set and state:cpu == ++ * current CPU. This is valid for the current execution context. ++ * ++ * Nesting execution contexts on the same CPU can carefully take over ++ * if the driver allows reentrancy via state::unsafe = false. When the ++ * interrupted context resumes it checks the state before entering ++ * an unsafe region and aborts the operation if it detects a takeover. ++ * ++ * In case of panic or emergency the nesting context can take over the ++ * console forcefully. The write callback is then invoked with the unsafe ++ * flag set in the write context data, which allows the driver side to avoid ++ * locks and to evaluate the driver state so it can use an emergency path ++ * or repair the state instead of blindly assuming that it works. ++ * ++ * If the interrupted context touches the assigned record buffer after ++ * takeover, it does not cause harm because at the same execution level ++ * there is no concurrency on the same CPU. A threaded printer always has ++ * its own record buffer so it can never interfere with any of the per CPU ++ * record buffers. ++ * ++ * A concurrent writer on a different CPU can request to take over the ++ * console by: ++ * ++ * 1) Carefully writing the desired state into state[REQ] ++ * if there is no same or higher priority request pending. ++ * This locks state[REQ] except for higher priority ++ * waiters. ++ * ++ * 2) Setting state[CUR].req_prio unless a same or higher ++ * priority waiter won the race. ++ * ++ * 3) Carefully spin on state[CUR] until that is locked with the ++ * expected state. When the state is not the expected one then it ++ * has to verify that state[REQ] is still the same and that ++ * state[CUR] has not been taken over or unlocked. ++ * ++ * The unlocker hands over to state[REQ], but only if state[CUR] ++ * matches. ++ * ++ * In case that the owner does not react on the request and does not make ++ * observable progress, the waiter will timeout and can then decide to do ++ * a hostile takeover. ++ */ ++ ++#define copy_full_state(_dst, _src) do { _dst = _src; } while (0) ++#define copy_bit_state(_dst, _src) do { _dst.bits = _src.bits; } while (0) ++ ++#ifdef CONFIG_64BIT ++#define copy_seq_state64(_dst, _src) do { _dst.seq = _src.seq; } while (0) ++#else ++#define copy_seq_state64(_dst, _src) do { } while (0) ++#endif ++ ++enum state_selector { ++ CON_STATE_CUR, ++ CON_STATE_REQ, ++}; ++ ++/** ++ * cons_state_set - Helper function to set the console state ++ * @con: Console to update ++ * @which: Selects real state or handover state ++ * @new: The new state to write ++ * ++ * Only to be used when the console is not yet or no longer visible in the ++ * system. Otherwise use cons_state_try_cmpxchg(). ++ */ ++static inline void cons_state_set(struct console *con, enum state_selector which, ++ struct cons_state *new) ++{ ++ atomic_long_set(&ACCESS_PRIVATE(con, atomic_state[which]), new->atom); ++} ++ ++/** ++ * cons_state_read - Helper function to read the console state ++ * @con: Console to update ++ * @which: Selects real state or handover state ++ * @state: The state to store the result ++ */ ++static inline void cons_state_read(struct console *con, enum state_selector which, ++ struct cons_state *state) ++{ ++ state->atom = atomic_long_read(&ACCESS_PRIVATE(con, atomic_state[which])); ++} ++ ++/** ++ * cons_state_try_cmpxchg() - Helper function for atomic_long_try_cmpxchg() on console state ++ * @con: Console to update ++ * @which: Selects real state or handover state ++ * @old: Old/expected state ++ * @new: New state ++ * ++ * Returns: True on success, false on fail ++ */ ++static inline bool cons_state_try_cmpxchg(struct console *con, ++ enum state_selector which, ++ struct cons_state *old, ++ struct cons_state *new) ++{ ++ return atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, atomic_state[which]), ++ &old->atom, new->atom); ++} ++ ++/** ++ * cons_state_full_match - Check whether the full state matches ++ * @cur: The state to check ++ * @prev: The previous state ++ * ++ * Returns: True if matching, false otherwise. ++ * ++ * Check the full state including state::seq on 64bit. For take over ++ * detection. ++ */ ++static inline bool cons_state_full_match(struct cons_state cur, ++ struct cons_state prev) ++{ ++ /* ++ * req_prio can be set by a concurrent writer for friendly ++ * handover. Ignore it in the comparison. ++ */ ++ cur.req_prio = prev.req_prio; ++ return cur.atom == prev.atom; ++} ++ ++/** ++ * cons_state_bits_match - Check for matching state bits ++ * @cur: The state to check ++ * @prev: The previous state ++ * ++ * Returns: True if state matches, false otherwise. ++ * ++ * Contrary to cons_state_full_match this checks only the bits and ignores ++ * a sequence change on 64bits. On 32bit the two functions are identical. ++ */ ++static inline bool cons_state_bits_match(struct cons_state cur, struct cons_state prev) ++{ ++ /* ++ * req_prio can be set by a concurrent writer for friendly ++ * handover. Ignore it in the comparison. ++ */ ++ cur.req_prio = prev.req_prio; ++ return cur.bits == prev.bits; ++} ++ ++/** ++ * cons_check_panic - Check whether a remote CPU is in panic ++ * ++ * Returns: True if a remote CPU is in panic, false otherwise. ++ */ ++static inline bool cons_check_panic(void) ++{ ++ unsigned int pcpu = atomic_read(&panic_cpu); ++ ++ return pcpu != PANIC_CPU_INVALID && pcpu != smp_processor_id(); ++} ++ ++static struct cons_context_data early_cons_ctxt_data __initdata; ++ ++/** ++ * cons_context_set_pbufs - Set the output text buffer for the current context ++ * @ctxt: Pointer to the acquire context ++ * ++ * Buffer selection: ++ * 1) Early boot uses the global (initdata) buffer ++ * 2) Printer threads use the dynamically allocated per-console buffers ++ * 3) All other contexts use the per CPU buffers ++ * ++ * This guarantees that there is no concurrency on the output records ever. ++ * Early boot and per CPU nesting is not a problem. The takeover logic ++ * tells the interrupted context that the buffer has been overwritten. ++ * ++ * There are two critical regions that matter: ++ * ++ * 1) Context is filling the buffer with a record. After interruption ++ * it continues to sprintf() the record and before it goes to ++ * write it out, it checks the state, notices the takeover, discards ++ * the content and backs out. ++ * ++ * 2) Context is in a unsafe critical region in the driver. After ++ * interruption it might read overwritten data from the output ++ * buffer. When it leaves the critical region it notices and backs ++ * out. Hostile takeovers in driver critical regions are best effort ++ * and there is not much that can be done about that. ++ */ ++static __ref void cons_context_set_pbufs(struct cons_context *ctxt) ++{ ++ struct console *con = ctxt->console; ++ ++ /* Thread context or early boot? */ ++ if (ctxt->thread) ++ ctxt->pbufs = con->thread_pbufs; ++ else if (!con->pcpu_data) ++ ctxt->pbufs = &early_cons_ctxt_data.pbufs; ++ else ++ ctxt->pbufs = &(this_cpu_ptr(con->pcpu_data)->pbufs); ++} ++ ++/** ++ * cons_seq_init - Helper function to initialize the console sequence ++ * @con: Console to work on ++ * ++ * Set @con->atomic_seq to the starting record, or if that record no ++ * longer exists, the oldest available record. For init only. Do not ++ * use for runtime updates. ++ */ ++static void cons_seq_init(struct console *con) ++{ ++ u32 seq = (u32)max_t(u64, con->seq, prb_first_valid_seq(prb)); ++#ifdef CONFIG_64BIT ++ struct cons_state state; ++ ++ cons_state_read(con, CON_STATE_CUR, &state); ++ state.seq = seq; ++ cons_state_set(con, CON_STATE_CUR, &state); ++#else ++ atomic_set(&ACCESS_PRIVATE(con, atomic_seq), seq); ++#endif ++} ++ ++/** ++ * cons_force_seq - Force a specified sequence number for a console ++ * @con: Console to work on ++ * @seq: Sequence number to force ++ * ++ * This function is only intended to be used in emergency situations. In ++ * particular: console_flush_on_panic(CONSOLE_REPLAY_ALL) ++ */ ++void cons_force_seq(struct console *con, u64 seq) ++{ ++#ifdef CONFIG_64BIT ++ struct cons_state old; ++ struct cons_state new; ++ ++ do { ++ cons_state_read(con, CON_STATE_CUR, &old); ++ copy_bit_state(new, old); ++ new.seq = seq; ++ } while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new)); ++#else ++ atomic_set(&ACCESS_PRIVATE(con, atomic_seq), seq); ++#endif ++} ++ ++static inline u64 cons_expand_seq(u64 seq) ++{ ++ u64 rbseq; ++ ++ /* ++ * The provided sequence is only the lower 32bits of the ringbuffer ++ * sequence. It needs to be expanded to 64bit. Get the next sequence ++ * number from the ringbuffer and fold it. ++ */ ++ rbseq = prb_next_seq(prb); ++ seq = rbseq - ((u32)rbseq - (u32)seq); ++ ++ return seq; ++} ++ ++/** ++ * cons_read_seq - Read the current console sequence ++ * @con: Console to read the sequence of ++ * ++ * Returns: Sequence number of the next record to print on @con. ++ */ ++u64 cons_read_seq(struct console *con) ++{ ++ u64 seq; ++#ifdef CONFIG_64BIT ++ struct cons_state state; ++ ++ cons_state_read(con, CON_STATE_CUR, &state); ++ seq = state.seq; ++#else ++ seq = atomic_read(&ACCESS_PRIVATE(con, atomic_seq)); ++#endif ++ return cons_expand_seq(seq); ++} ++ ++/** ++ * cons_context_set_seq - Setup the context with the next sequence to print ++ * @ctxt: Pointer to an acquire context that contains ++ * all information about the acquire mode ++ * ++ * On return the retrieved sequence number is stored in ctxt->oldseq. ++ * ++ * The sequence number is safe in forceful takeover situations. ++ * ++ * Either the writer succeeded to update before it got interrupted ++ * or it failed. In the latter case the takeover will print the ++ * same line again. ++ * ++ * The sequence is only the lower 32bits of the ringbuffer sequence. The ++ * ringbuffer must be 2^31 records ahead to get out of sync. This needs ++ * some care when starting a console, i.e setting the sequence to 0 is ++ * wrong. It has to be set to the oldest valid sequence in the ringbuffer ++ * as that cannot be more than 2^31 records away ++ * ++ * On 64bit the 32bit sequence is part of console::state, which is saved ++ * in @ctxt->state. This prevents the 32bit update race. ++ */ ++static void cons_context_set_seq(struct cons_context *ctxt) ++{ ++#ifdef CONFIG_64BIT ++ ctxt->oldseq = ctxt->state.seq; ++#else ++ ctxt->oldseq = atomic_read(&ACCESS_PRIVATE(ctxt->console, atomic_seq)); ++#endif ++ ctxt->oldseq = cons_expand_seq(ctxt->oldseq); ++ ctxt->newseq = ctxt->oldseq; ++} ++ ++/** ++ * cons_seq_try_update - Try to update the console sequence number ++ * @ctxt: Pointer to an acquire context that contains ++ * all information about the acquire mode ++ * ++ * Returns: True if the console sequence was updated, false otherwise. ++ * ++ * Internal helper as the logic is different on 32bit and 64bit. ++ * ++ * On 32 bit the sequence is separate from state and therefore ++ * subject to a subtle race in the case of hostile takeovers. ++ * ++ * On 64 bit the sequence is part of the state and therefore safe ++ * vs. hostile takeovers. ++ * ++ * In case of fail the console has been taken over and @ctxt is ++ * invalid. Caller has to reacquire the console. ++ */ ++#ifdef CONFIG_64BIT ++static bool cons_seq_try_update(struct cons_context *ctxt) ++{ ++ struct console *con = ctxt->console; ++ struct cons_state old; ++ struct cons_state new; ++ ++ cons_state_read(con, CON_STATE_CUR, &old); ++ do { ++ /* Make sure this context is still the owner. */ ++ if (!cons_state_bits_match(old, ctxt->state)) ++ return false; ++ ++ /* Preserve bit state */ ++ copy_bit_state(new, old); ++ new.seq = ctxt->newseq; ++ ++ /* ++ * Can race with hostile takeover or with a handover ++ * request. ++ */ ++ } while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new)); ++ ++ copy_full_state(ctxt->state, new); ++ ctxt->oldseq = ctxt->newseq; ++ ++ return true; ++} ++#else ++static bool cons_release(struct cons_context *ctxt); ++static bool cons_seq_try_update(struct cons_context *ctxt) ++{ ++ struct console *con = ctxt->console; ++ struct cons_state state; ++ int pcpu; ++ u32 old; ++ u32 new; ++ ++ /* ++ * There is a corner case that needs to be considered here: ++ * ++ * CPU0 CPU1 ++ * printk() ++ * acquire() -> emergency ++ * write() acquire() ++ * update_seq() ++ * state == OK ++ * --> NMI ++ * takeover() ++ * <--- write() ++ * cmpxchg() succeeds update_seq() ++ * cmpxchg() fails ++ * ++ * There is nothing that can be done about this other than having ++ * yet another state bit that needs to be tracked and analyzed, ++ * but fails to cover the problem completely. ++ * ++ * No other scenarios expose such a problem. On same CPU takeovers ++ * the cmpxchg() always fails on the interrupted context after the ++ * interrupting context finished printing, but that's fine as it ++ * does not own the console anymore. The state check after the ++ * failed cmpxchg prevents that. ++ */ ++ cons_state_read(con, CON_STATE_CUR, &state); ++ /* Make sure this context is still the owner. */ ++ if (!cons_state_bits_match(state, ctxt->state)) ++ return false; ++ ++ /* ++ * Get the original sequence number that was retrieved ++ * from @con->atomic_seq. @con->atomic_seq should be still ++ * the same. 32bit truncates. See cons_context_set_seq(). ++ */ ++ old = (u32)ctxt->oldseq; ++ new = (u32)ctxt->newseq; ++ if (atomic_try_cmpxchg(&ACCESS_PRIVATE(con, atomic_seq), &old, new)) { ++ ctxt->oldseq = ctxt->newseq; ++ return true; ++ } ++ ++ /* ++ * Reread the state. If this context does not own the console anymore ++ * then it cannot touch the sequence again. ++ */ ++ cons_state_read(con, CON_STATE_CUR, &state); ++ if (!cons_state_bits_match(state, ctxt->state)) ++ return false; ++ ++ pcpu = atomic_read(&panic_cpu); ++ if (pcpu == smp_processor_id()) { ++ /* ++ * This is the panic CPU. Emitting a warning here does not ++ * help at all. The callchain is clear and the priority is ++ * to get the messages out. In the worst case duplicated ++ * ones. That's a job for postprocessing. ++ */ ++ atomic_set(&ACCESS_PRIVATE(con, atomic_seq), new); ++ ctxt->oldseq = ctxt->newseq; ++ return true; ++ } ++ ++ /* ++ * Only emit a warning when this happens outside of a panic ++ * situation as on panic it's neither useful nor helping to let the ++ * panic CPU get the important stuff out. ++ */ ++ WARN_ON_ONCE(pcpu == PANIC_CPU_INVALID); ++ ++ cons_release(ctxt); ++ return false; ++} ++#endif ++ ++/** ++ * cons_cleanup_handover - Cleanup a handover request ++ * @ctxt: Pointer to acquire context ++ * ++ * @ctxt->hov_state contains the state to clean up ++ */ ++static void cons_cleanup_handover(struct cons_context *ctxt) ++{ ++ struct console *con = ctxt->console; ++ struct cons_state new; ++ ++ /* ++ * No loop required. Either hov_state is still the same or ++ * not. ++ */ ++ new.atom = 0; ++ cons_state_try_cmpxchg(con, CON_STATE_REQ, &ctxt->hov_state, &new); ++} ++ ++/** ++ * cons_setup_handover - Setup a handover request ++ * @ctxt: Pointer to acquire context ++ * ++ * Returns: True if a handover request was setup, false otherwise. ++ * ++ * On success @ctxt->hov_state contains the requested handover state ++ * ++ * On failure this context is not allowed to request a handover from the ++ * current owner. Reasons would be priority too low or a remote CPU in panic. ++ * In both cases this context should give up trying to acquire the console. ++ */ ++static bool cons_setup_handover(struct cons_context *ctxt) ++{ ++ unsigned int cpu = smp_processor_id(); ++ struct console *con = ctxt->console; ++ struct cons_state old; ++ struct cons_state hstate = { ++ .locked = 1, ++ .cur_prio = ctxt->prio, ++ .cpu = cpu, ++ }; ++ ++ /* ++ * Try to store hstate in @con->atomic_state[REQ]. This might ++ * race with a higher priority waiter. ++ */ ++ cons_state_read(con, CON_STATE_REQ, &old); ++ do { ++ if (cons_check_panic()) ++ return false; ++ ++ /* Same or higher priority waiter exists? */ ++ if (old.cur_prio >= ctxt->prio) ++ return false; ++ ++ } while (!cons_state_try_cmpxchg(con, CON_STATE_REQ, &old, &hstate)); ++ ++ /* Save that state for comparison in spinwait */ ++ copy_full_state(ctxt->hov_state, hstate); ++ return true; ++} ++ ++/** ++ * cons_setup_request - Setup a handover request in state[CUR] ++ * @ctxt: Pointer to acquire context ++ * @old: The state that was used to make the decision to spin wait ++ * ++ * Returns: True if a handover request was setup in state[CUR], false ++ * otherwise. ++ * ++ * On success @ctxt->req_state contains the request state that was set in ++ * state[CUR] ++ * ++ * On failure this context encountered unexpected state values. This ++ * context should retry the full handover request setup process (the ++ * handover request setup by cons_setup_handover() is now invalidated ++ * and must be performed again). ++ */ ++static bool cons_setup_request(struct cons_context *ctxt, struct cons_state old) ++{ ++ struct console *con = ctxt->console; ++ struct cons_state cur; ++ struct cons_state new; ++ ++ /* Now set the request in state[CUR] */ ++ cons_state_read(con, CON_STATE_CUR, &cur); ++ do { ++ if (cons_check_panic()) ++ goto cleanup; ++ ++ /* Bit state changed vs. the decision to spinwait? */ ++ if (!cons_state_bits_match(cur, old)) ++ goto cleanup; ++ ++ /* ++ * A higher or equal priority context already setup a ++ * request? ++ */ ++ if (cur.req_prio >= ctxt->prio) ++ goto cleanup; ++ ++ /* Setup a request for handover. */ ++ copy_full_state(new, cur); ++ new.req_prio = ctxt->prio; ++ } while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &cur, &new)); ++ ++ /* Save that state for comparison in spinwait */ ++ copy_bit_state(ctxt->req_state, new); ++ return true; ++ ++cleanup: ++ cons_cleanup_handover(ctxt); ++ return false; ++} ++ ++/** ++ * cons_try_acquire_spin - Complete the spinwait attempt ++ * @ctxt: Pointer to an acquire context that contains ++ * all information about the acquire mode ++ * ++ * @ctxt->hov_state contains the handover state that was set in ++ * state[REQ] ++ * @ctxt->req_state contains the request state that was set in ++ * state[CUR] ++ * ++ * Returns: 0 if successfully locked. -EBUSY on timeout. -EAGAIN on ++ * unexpected state values. ++ * ++ * On success @ctxt->state contains the new state that was set in ++ * state[CUR] ++ * ++ * On -EBUSY failure this context timed out. This context should either ++ * give up or attempt a hostile takeover. ++ * ++ * On -EAGAIN failure this context encountered unexpected state values. ++ * This context should retry the full handover request setup process (the ++ * handover request setup by cons_setup_handover() is now invalidated and ++ * must be performed again). ++ */ ++static int cons_try_acquire_spin(struct cons_context *ctxt) ++{ ++ struct console *con = ctxt->console; ++ struct cons_state cur; ++ struct cons_state new; ++ int err = -EAGAIN; ++ int timeout; ++ ++ /* Now wait for the other side to hand over */ ++ for (timeout = ctxt->spinwait_max_us; timeout >= 0; timeout--) { ++ /* Timeout immediately if a remote panic is detected. */ ++ if (cons_check_panic()) ++ break; ++ ++ cons_state_read(con, CON_STATE_CUR, &cur); ++ ++ /* ++ * If the real state of the console matches the handover state ++ * that this context setup, then the handover was a success ++ * and this context is now the owner. ++ * ++ * Note that this might have raced with a new higher priority ++ * requester coming in after the lock was handed over. ++ * However, that requester will see that the owner changes and ++ * setup a new request for the current owner (this context). ++ */ ++ if (cons_state_bits_match(cur, ctxt->hov_state)) ++ goto success; ++ ++ /* ++ * If state changed since the request was made, give up as ++ * it is no longer consistent. This must include ++ * state::req_prio since there could be a higher priority ++ * request available. ++ */ ++ if (cur.bits != ctxt->req_state.bits) ++ goto cleanup; ++ ++ /* ++ * Finally check whether the handover state is still ++ * the same. ++ */ ++ cons_state_read(con, CON_STATE_REQ, &cur); ++ if (cur.atom != ctxt->hov_state.atom) ++ goto cleanup; ++ ++ /* Account time */ ++ if (timeout > 0) ++ udelay(1); ++ } ++ ++ /* ++ * Timeout. Cleanup the handover state and carefully try to reset ++ * req_prio in the real state. The reset is important to ensure ++ * that the owner does not hand over the lock after this context ++ * has given up waiting. ++ */ ++ cons_cleanup_handover(ctxt); ++ ++ cons_state_read(con, CON_STATE_CUR, &cur); ++ do { ++ /* ++ * The timeout might have raced with the owner coming late ++ * and handing it over gracefully. ++ */ ++ if (cons_state_bits_match(cur, ctxt->hov_state)) ++ goto success; ++ ++ /* ++ * Validate that the state matches with the state at request ++ * time. If this check fails, there is already a higher ++ * priority context waiting or the owner has changed (either ++ * by higher priority or by hostile takeover). In all fail ++ * cases this context is no longer in line for a handover to ++ * take place, so no reset is necessary. ++ */ ++ if (cur.bits != ctxt->req_state.bits) ++ goto cleanup; ++ ++ copy_full_state(new, cur); ++ new.req_prio = 0; ++ } while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &cur, &new)); ++ /* Reset worked. Report timeout. */ ++ return -EBUSY; ++ ++success: ++ /* Store the real state */ ++ copy_full_state(ctxt->state, cur); ++ ctxt->hostile = false; ++ err = 0; ++ ++cleanup: ++ cons_cleanup_handover(ctxt); ++ return err; ++} ++ ++/** ++ * __cons_try_acquire - Try to acquire the console for printk output ++ * @ctxt: Pointer to an acquire context that contains ++ * all information about the acquire mode ++ * ++ * Returns: True if the acquire was successful. False on fail. ++ * ++ * In case of success @ctxt->state contains the acquisition ++ * state. ++ * ++ * In case of fail @ctxt->old_state contains the state ++ * that was read from @con->state for analysis by the caller. ++ */ ++static bool __cons_try_acquire(struct cons_context *ctxt) ++{ ++ unsigned int cpu = smp_processor_id(); ++ struct console *con = ctxt->console; ++ short flags = console_srcu_read_flags(con); ++ struct cons_state old; ++ struct cons_state new; ++ int err; ++ ++ if (WARN_ON_ONCE(!(flags & CON_NO_BKL))) ++ return false; ++again: ++ cons_state_read(con, CON_STATE_CUR, &old); ++ ++ /* Preserve it for the caller and for spinwait */ ++ copy_full_state(ctxt->old_state, old); ++ ++ if (cons_check_panic()) ++ return false; ++ ++ /* Set up the new state for takeover */ ++ copy_full_state(new, old); ++ new.locked = 1; ++ new.thread = ctxt->thread; ++ new.cur_prio = ctxt->prio; ++ new.req_prio = CONS_PRIO_NONE; ++ new.cpu = cpu; ++ ++ /* Attempt to acquire it directly if unlocked */ ++ if (!old.locked) { ++ if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new)) ++ goto again; ++ ++ ctxt->hostile = false; ++ copy_full_state(ctxt->state, new); ++ goto success; ++ } ++ ++ /* ++ * A threaded printer context will never spin or perform a ++ * hostile takeover. The atomic writer will wake the thread ++ * when it is done with the important output. ++ */ ++ if (ctxt->thread) ++ return false; ++ ++ /* ++ * If the active context is on the same CPU then there is ++ * obviously no handshake possible. ++ */ ++ if (old.cpu == cpu) ++ goto check_hostile; ++ ++ /* ++ * If a handover request with same or higher priority is already ++ * pending then this context cannot setup a handover request. ++ */ ++ if (old.req_prio >= ctxt->prio) ++ goto check_hostile; ++ ++ /* ++ * If the caller did not request spin-waiting then performing a ++ * handover is not an option. ++ */ ++ if (!ctxt->spinwait) ++ goto check_hostile; ++ ++ /* ++ * Setup the request in state[REQ]. If this fails then this ++ * context is not allowed to setup a handover request. ++ */ ++ if (!cons_setup_handover(ctxt)) ++ goto check_hostile; ++ ++ /* ++ * Setup the request in state[CUR]. Hand in the state that was ++ * used to make the decision to spinwait above, for comparison. If ++ * this fails then unexpected state values were encountered and the ++ * full request setup process is retried. ++ */ ++ if (!cons_setup_request(ctxt, old)) ++ goto again; ++ ++ /* ++ * Spin-wait to acquire the console. If this fails then unexpected ++ * state values were encountered (for example, a hostile takeover by ++ * another context) and the full request setup process is retried. ++ */ ++ err = cons_try_acquire_spin(ctxt); ++ if (err) { ++ if (err == -EAGAIN) ++ goto again; ++ goto check_hostile; ++ } ++success: ++ /* Common updates on success */ ++ cons_context_set_seq(ctxt); ++ cons_context_set_pbufs(ctxt); ++ return true; ++ ++check_hostile: ++ if (!ctxt->hostile) ++ return false; ++ ++ if (cons_check_panic()) ++ return false; ++ ++ if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new)) ++ goto again; ++ ++ copy_full_state(ctxt->state, new); ++ goto success; ++} ++ ++/** ++ * cons_try_acquire - Try to acquire the console for printk output ++ * @ctxt: Pointer to an acquire context that contains ++ * all information about the acquire mode ++ * ++ * Returns: True if the acquire was successful. False on fail. ++ * ++ * In case of success @ctxt->state contains the acquisition ++ * state. ++ * ++ * In case of fail @ctxt->old_state contains the state ++ * that was read from @con->state for analysis by the caller. ++ */ ++static bool cons_try_acquire(struct cons_context *ctxt) ++{ ++ if (__cons_try_acquire(ctxt)) ++ return true; ++ ++ ctxt->state.atom = 0; ++ return false; ++} ++ ++/** ++ * __cons_release - Release the console after output is done ++ * @ctxt: The acquire context that contains the state ++ * at cons_try_acquire() ++ * ++ * Returns: True if the release was regular ++ * ++ * False if the console is in unusable state or was handed over ++ * with handshake or taken over hostile without handshake. ++ * ++ * The return value tells the caller whether it needs to evaluate further ++ * printing. ++ */ ++static bool __cons_release(struct cons_context *ctxt) ++{ ++ struct console *con = ctxt->console; ++ short flags = console_srcu_read_flags(con); ++ struct cons_state hstate; ++ struct cons_state old; ++ struct cons_state new; ++ ++ if (WARN_ON_ONCE(!(flags & CON_NO_BKL))) ++ return false; ++ ++ cons_state_read(con, CON_STATE_CUR, &old); ++again: ++ if (!cons_state_bits_match(old, ctxt->state)) ++ return false; ++ ++ /* Release it directly when no handover request is pending. */ ++ if (!old.req_prio) ++ goto unlock; ++ ++ /* Read the handover target state */ ++ cons_state_read(con, CON_STATE_REQ, &hstate); ++ ++ /* If the waiter gave up hstate is 0 */ ++ if (!hstate.atom) ++ goto unlock; ++ ++ /* ++ * If a higher priority waiter raced against a lower priority ++ * waiter then unlock instead of handing over to either. The ++ * higher priority waiter will notice the updated state and ++ * retry. ++ */ ++ if (hstate.cur_prio != old.req_prio) ++ goto unlock; ++ ++ /* Switch the state and preserve the sequence on 64bit */ ++ copy_bit_state(new, hstate); ++ copy_seq_state64(new, old); ++ if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new)) ++ goto again; ++ ++ return true; ++ ++unlock: ++ /* Clear the state and preserve the sequence on 64bit */ ++ new.atom = 0; ++ copy_seq_state64(new, old); ++ if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new)) ++ goto again; ++ ++ return true; ++} ++ ++bool printk_threads_enabled __ro_after_init; ++static bool printk_force_atomic __initdata; ++ ++/** ++ * cons_release - Release the console after output is done ++ * @ctxt: The acquire context that contains the state ++ * at cons_try_acquire() ++ * ++ * Returns: True if the release was regular ++ * ++ * False if the console is in unusable state or was handed over ++ * with handshake or taken over hostile without handshake. ++ * ++ * The return value tells the caller whether it needs to evaluate further ++ * printing. ++ */ ++static bool cons_release(struct cons_context *ctxt) ++{ ++ bool ret = __cons_release(ctxt); ++ ++ /* Invalidate the buffer pointer. It is no longer valid. */ ++ ctxt->pbufs = NULL; ++ ++ ctxt->state.atom = 0; ++ return ret; ++} ++ ++bool console_try_acquire(struct cons_write_context *wctxt) ++{ ++ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); ++ ++ return cons_try_acquire(ctxt); ++} ++EXPORT_SYMBOL_GPL(console_try_acquire); ++ ++bool console_release(struct cons_write_context *wctxt) ++{ ++ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); ++ ++ return cons_release(ctxt); ++} ++EXPORT_SYMBOL_GPL(console_release); ++ ++/** ++ * cons_alloc_percpu_data - Allocate percpu data for a console ++ * @con: Console to allocate for ++ * ++ * Returns: True on success. False otherwise and the console cannot be used. ++ * ++ * If it is not yet possible to allocate per CPU data, success is returned. ++ * When per CPU data becomes possible, set_percpu_data_ready() will call ++ * this function again for all registered consoles. ++ */ ++bool cons_alloc_percpu_data(struct console *con) ++{ ++ if (!printk_percpu_data_ready()) ++ return true; ++ ++ con->pcpu_data = alloc_percpu(typeof(*con->pcpu_data)); ++ if (con->pcpu_data) ++ return true; ++ ++ con_printk(KERN_WARNING, con, "failed to allocate percpu buffers\n"); ++ return false; ++} ++ ++/** ++ * cons_free_percpu_data - Free percpu data of a console on unregister ++ * @con: Console to clean up ++ */ ++static void cons_free_percpu_data(struct console *con) ++{ ++ if (!con->pcpu_data) ++ return; ++ ++ free_percpu(con->pcpu_data); ++ con->pcpu_data = NULL; ++} ++ ++/** ++ * console_can_proceed - Check whether printing can proceed ++ * @wctxt: The write context that was handed to the write function ++ * ++ * Returns: True if the state is correct. False if a handover ++ * has been requested or if the console was taken ++ * over. ++ * ++ * Must be invoked after the record was dumped into the assigned record ++ * buffer and at appropriate safe places in the driver. For unsafe driver ++ * sections see console_enter_unsafe(). ++ * ++ * When this function returns false then the calling context is not allowed ++ * to go forward and has to back out immediately and carefully. The buffer ++ * content is no longer trusted either and the console lock is no longer ++ * held. ++ */ ++bool console_can_proceed(struct cons_write_context *wctxt) ++{ ++ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); ++ struct console *con = ctxt->console; ++ struct cons_state state; ++ ++ cons_state_read(con, CON_STATE_CUR, &state); ++ /* Store it for analysis or reuse */ ++ copy_full_state(ctxt->old_state, state); ++ ++ /* Make sure this context is still the owner. */ ++ if (!cons_state_full_match(state, ctxt->state)) ++ return false; ++ ++ /* ++ * Having a safe point for take over and eventually a few ++ * duplicated characters or a full line is way better than a ++ * hostile takeover. Post processing can take care of the garbage. ++ * Continue if the requested priority is not sufficient. ++ */ ++ if (state.req_prio <= state.cur_prio) ++ return true; ++ ++ /* ++ * A console printer within an unsafe region is allowed to continue. ++ * It can perform the handover when exiting the safe region. Otherwise ++ * a hostile takeover will be necessary. ++ */ ++ if (state.unsafe) ++ return true; ++ ++ /* Release and hand over */ ++ cons_release(ctxt); ++ /* ++ * This does not check whether the handover succeeded. The ++ * outermost callsite has to make the final decision whether printing ++ * should continue or not (via reacquire, possibly hostile). The ++ * console is unlocked already so go back all the way instead of ++ * trying to implement heuristics in tons of places. ++ */ ++ return false; ++} ++EXPORT_SYMBOL_GPL(console_can_proceed); ++ ++/** ++ * __console_update_unsafe - Update the unsafe bit in @con->atomic_state ++ * @wctxt: The write context that was handed to the write function ++ * ++ * Returns: True if the state is correct. False if a handover ++ * has been requested or if the console was taken ++ * over. ++ * ++ * Must be invoked before an unsafe driver section is entered. ++ * ++ * When this function returns false then the calling context is not allowed ++ * to go forward and has to back out immediately and carefully. The buffer ++ * content is no longer trusted either and the console lock is no longer ++ * held. ++ * ++ * Internal helper to avoid duplicated code ++ */ ++static bool __console_update_unsafe(struct cons_write_context *wctxt, bool unsafe) ++{ ++ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); ++ struct console *con = ctxt->console; ++ struct cons_state new; ++ ++ do { ++ if (!console_can_proceed(wctxt)) ++ return false; ++ /* ++ * console_can_proceed() saved the real state in ++ * ctxt->old_state ++ */ ++ copy_full_state(new, ctxt->old_state); ++ new.unsafe = unsafe; ++ ++ } while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &ctxt->old_state, &new)); ++ ++ copy_full_state(ctxt->state, new); ++ return true; ++} ++ ++/** ++ * console_enter_unsafe - Enter an unsafe region in the driver ++ * @wctxt: The write context that was handed to the write function ++ * ++ * Returns: True if the state is correct. False if a handover ++ * has been requested or if the console was taken ++ * over. ++ * ++ * Must be invoked before an unsafe driver section is entered. ++ * ++ * When this function returns false then the calling context is not allowed ++ * to go forward and has to back out immediately and carefully. The buffer ++ * content is no longer trusted either and the console lock is no longer ++ * held. ++ */ ++bool console_enter_unsafe(struct cons_write_context *wctxt) ++{ ++ return __console_update_unsafe(wctxt, true); ++} ++EXPORT_SYMBOL_GPL(console_enter_unsafe); ++ ++/** ++ * console_exit_unsafe - Exit an unsafe region in the driver ++ * @wctxt: The write context that was handed to the write function ++ * ++ * Returns: True if the state is correct. False if a handover ++ * has been requested or if the console was taken ++ * over. ++ * ++ * Must be invoked before an unsafe driver section is exited. ++ * ++ * When this function returns false then the calling context is not allowed ++ * to go forward and has to back out immediately and carefully. The buffer ++ * content is no longer trusted either and the console lock is no longer ++ * held. ++ */ ++bool console_exit_unsafe(struct cons_write_context *wctxt) ++{ ++ return __console_update_unsafe(wctxt, false); ++} ++EXPORT_SYMBOL_GPL(console_exit_unsafe); ++ ++/** ++ * cons_get_record - Fill the buffer with the next pending ringbuffer record ++ * @wctxt: The write context which will be handed to the write function ++ * ++ * Returns: True if there are records available. If the next record should ++ * be printed, the output buffer is filled and @wctxt->outbuf ++ * points to the text to print. If @wctxt->outbuf is NULL after ++ * the call, the record should not be printed but the caller must ++ * still update the console sequence number. ++ * ++ * False means that there are no pending records anymore and the ++ * printing can stop. ++ */ ++static bool cons_get_record(struct cons_write_context *wctxt) ++{ ++ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); ++ struct console *con = ctxt->console; ++ bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; ++ struct printk_message pmsg = { ++ .pbufs = ctxt->pbufs, ++ }; ++ ++ if (!printk_get_next_message(&pmsg, ctxt->newseq, is_extended, true)) ++ return false; ++ ++ ctxt->newseq = pmsg.seq; ++ ctxt->dropped += pmsg.dropped; ++ ++ if (pmsg.outbuf_len == 0) { ++ wctxt->outbuf = NULL; ++ } else { ++ if (ctxt->dropped && !is_extended) ++ console_prepend_dropped(&pmsg, ctxt->dropped); ++ wctxt->outbuf = &pmsg.pbufs->outbuf[0]; ++ } ++ ++ wctxt->len = pmsg.outbuf_len; ++ ++ return true; ++} ++ ++/** ++ * cons_emit_record - Emit record in the acquired context ++ * @wctxt: The write context that will be handed to the write function ++ * ++ * Returns: False if the operation was aborted (takeover or handover). ++ * True otherwise ++ * ++ * When false is returned, the caller is not allowed to touch console state. ++ * The console is owned by someone else. If the caller wants to print more ++ * it has to reacquire the console first. ++ * ++ * When true is returned, @wctxt->ctxt.backlog indicates whether there are ++ * still records pending in the ringbuffer, ++ */ ++static bool cons_emit_record(struct cons_write_context *wctxt) ++{ ++ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); ++ struct console *con = ctxt->console; ++ bool done = false; ++ ++ /* ++ * @con->dropped is not protected in case of hostile takeovers so ++ * the update below is racy. Annotate it accordingly. ++ */ ++ ctxt->dropped = data_race(READ_ONCE(con->dropped)); ++ ++ /* Fill the output buffer with the next record */ ++ ctxt->backlog = cons_get_record(wctxt); ++ if (!ctxt->backlog) ++ return true; ++ ++ /* Safety point. Don't touch state in case of takeover */ ++ if (!console_can_proceed(wctxt)) ++ return false; ++ ++ /* Counterpart to the read above */ ++ WRITE_ONCE(con->dropped, ctxt->dropped); ++ ++ /* ++ * In case of skipped records, Update sequence state in @con. ++ */ ++ if (!wctxt->outbuf) ++ goto update; ++ ++ /* Tell the driver about potential unsafe state */ ++ wctxt->unsafe = ctxt->state.unsafe; ++ ++ if (!ctxt->thread && con->write_atomic) { ++ done = con->write_atomic(con, wctxt); ++ } else if (ctxt->thread && con->write_thread) { ++ done = con->write_thread(con, wctxt); ++ } else { ++ cons_release(ctxt); ++ WARN_ON_ONCE(1); ++ return false; ++ } ++ ++ /* If not done, the write was aborted due to takeover */ ++ if (!done) ++ return false; ++ ++ /* If there was a dropped message, it has now been output. */ ++ if (ctxt->dropped) { ++ ctxt->dropped = 0; ++ /* Counterpart to the read above */ ++ WRITE_ONCE(con->dropped, ctxt->dropped); ++ } ++update: ++ ctxt->newseq++; ++ /* ++ * The sequence update attempt is not part of console_release() ++ * because in panic situations the console is not released by ++ * the panic CPU until all records are written. On 32bit the ++ * sequence is separate from state anyway. ++ */ ++ return cons_seq_try_update(ctxt); ++} ++ ++/** ++ * cons_kthread_should_wakeup - Check whether the printk thread should wakeup ++ * @con: Console to operate on ++ * @ctxt: The acquire context that contains the state ++ * at console_acquire() ++ * ++ * Returns: True if the thread should shutdown or if the console is allowed to ++ * print and a record is available. False otherwise ++ * ++ * After the thread wakes up, it must first check if it should shutdown before ++ * attempting any printing. ++ */ ++static bool cons_kthread_should_wakeup(struct console *con, struct cons_context *ctxt) ++{ ++ bool is_usable; ++ short flags; ++ int cookie; ++ ++ if (kthread_should_stop()) ++ return true; ++ ++ cookie = console_srcu_read_lock(); ++ flags = console_srcu_read_flags(con); ++ is_usable = console_is_usable(con, flags); ++ console_srcu_read_unlock(cookie); ++ ++ if (!is_usable) ++ return false; ++ ++ /* This reads state and sequence on 64bit. On 32bit only state */ ++ cons_state_read(con, CON_STATE_CUR, &ctxt->state); ++ ++ /* ++ * Atomic printing is running on some other CPU. The owner ++ * will wake the console thread on unlock if necessary. ++ */ ++ if (ctxt->state.locked) ++ return false; ++ ++ /* Bring the sequence in @ctxt up to date */ ++ cons_context_set_seq(ctxt); ++ ++ return prb_read_valid(prb, ctxt->oldseq, NULL); ++} ++ ++/** ++ * cons_kthread_func - The printk thread function ++ * @__console: Console to operate on ++ */ ++static int cons_kthread_func(void *__console) ++{ ++ struct console *con = __console; ++ struct cons_write_context wctxt = { ++ .ctxt.console = con, ++ .ctxt.prio = CONS_PRIO_NORMAL, ++ .ctxt.thread = 1, ++ }; ++ struct cons_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); ++ unsigned long flags; ++ short con_flags; ++ bool backlog; ++ int cookie; ++ int ret; ++ ++ for (;;) { ++ atomic_inc(&con->kthread_waiting); ++ ++ /* ++ * Provides a full memory barrier vs. cons_kthread_wake(). ++ */ ++ ret = rcuwait_wait_event(&con->rcuwait, ++ cons_kthread_should_wakeup(con, ctxt), ++ TASK_INTERRUPTIBLE); ++ ++ atomic_dec(&con->kthread_waiting); ++ ++ if (kthread_should_stop()) ++ break; ++ ++ /* Wait was interrupted by a spurious signal, go back to sleep */ ++ if (ret) ++ continue; ++ ++ for (;;) { ++ cookie = console_srcu_read_lock(); ++ ++ /* ++ * Ensure this stays on the CPU to make handover and ++ * takeover possible. ++ */ ++ if (con->port_lock) ++ con->port_lock(con, true, &flags); ++ else ++ migrate_disable(); ++ ++ /* ++ * Try to acquire the console without attempting to ++ * take over. If an atomic printer wants to hand ++ * back to the thread it simply wakes it up. ++ */ ++ if (!cons_try_acquire(ctxt)) ++ break; ++ ++ con_flags = console_srcu_read_flags(con); ++ ++ if (console_is_usable(con, con_flags)) { ++ /* ++ * If the emit fails, this context is no ++ * longer the owner. Abort the processing and ++ * wait for new records to print. ++ */ ++ if (!cons_emit_record(&wctxt)) ++ break; ++ backlog = ctxt->backlog; ++ } else { ++ backlog = false; ++ } ++ ++ /* ++ * If the release fails, this context was not the ++ * owner. Abort the processing and wait for new ++ * records to print. ++ */ ++ if (!cons_release(ctxt)) ++ break; ++ ++ /* Backlog done? */ ++ if (!backlog) ++ break; ++ ++ if (con->port_lock) ++ con->port_lock(con, false, &flags); ++ else ++ migrate_enable(); ++ ++ console_srcu_read_unlock(cookie); ++ ++ cond_resched(); ++ } ++ if (con->port_lock) ++ con->port_lock(con, false, &flags); ++ else ++ migrate_enable(); ++ ++ console_srcu_read_unlock(cookie); ++ } ++ return 0; ++} ++ ++/** ++ * cons_irq_work - irq work to wake printk thread ++ * @irq_work: The irq work to operate on ++ */ ++static void cons_irq_work(struct irq_work *irq_work) ++{ ++ struct console *con = container_of(irq_work, struct console, irq_work); ++ ++ cons_kthread_wake(con); ++} ++ ++/** ++ * cons_wake_threads - Wake up printing threads ++ * ++ * A printing thread is only woken if it is within the @kthread_waiting ++ * block. If it is not within the block (or enters the block later), it ++ * will see any new records and continue printing on its own. ++ */ ++void cons_wake_threads(void) ++{ ++ struct console *con; ++ int cookie; ++ ++ cookie = console_srcu_read_lock(); ++ for_each_console_srcu(con) { ++ if (con->kthread && atomic_read(&con->kthread_waiting)) ++ irq_work_queue(&con->irq_work); ++ } ++ console_srcu_read_unlock(cookie); ++} ++ ++/** ++ * struct cons_cpu_state - Per CPU printk context state ++ * @prio: The current context priority level ++ * @nesting: Per priority nest counter ++ */ ++struct cons_cpu_state { ++ enum cons_prio prio; ++ int nesting[CONS_PRIO_MAX]; ++}; ++ ++static DEFINE_PER_CPU(struct cons_cpu_state, cons_pcpu_state); ++static struct cons_cpu_state early_cons_pcpu_state __initdata; ++ ++/** ++ * cons_get_cpu_state - Get the per CPU console state pointer ++ * ++ * Returns either a pointer to the per CPU state of the current CPU or to ++ * the init data state during early boot. ++ */ ++static __ref struct cons_cpu_state *cons_get_cpu_state(void) ++{ ++ if (!printk_percpu_data_ready()) ++ return &early_cons_pcpu_state; ++ ++ return this_cpu_ptr(&cons_pcpu_state); ++} ++ ++/** ++ * cons_get_wctxt - Get the write context for atomic printing ++ * @con: Console to operate on ++ * @prio: Priority of the context ++ * ++ * Returns either the per CPU context or the builtin context for ++ * early boot. ++ */ ++static __ref struct cons_write_context *cons_get_wctxt(struct console *con, ++ enum cons_prio prio) ++{ ++ if (!con->pcpu_data) ++ return &early_cons_ctxt_data.wctxt[prio]; ++ ++ return &this_cpu_ptr(con->pcpu_data)->wctxt[prio]; ++} ++ ++/** ++ * cons_atomic_try_acquire - Try to acquire the console for atomic printing ++ * @con: The console to acquire ++ * @ctxt: The console context instance to work on ++ * @prio: The priority of the current context ++ */ ++static bool cons_atomic_try_acquire(struct console *con, struct cons_context *ctxt, ++ enum cons_prio prio, bool skip_unsafe) ++{ ++ memset(ctxt, 0, sizeof(*ctxt)); ++ ctxt->console = con; ++ ctxt->spinwait_max_us = 2000; ++ ctxt->prio = prio; ++ ctxt->spinwait = 1; ++ ++ /* Try to acquire it directly or via a friendly handover */ ++ if (cons_try_acquire(ctxt)) ++ return true; ++ ++ /* Investigate whether a hostile takeover is due */ ++ if (ctxt->old_state.cur_prio >= prio) ++ return false; ++ ++ if (!ctxt->old_state.unsafe || !skip_unsafe) ++ ctxt->hostile = 1; ++ return cons_try_acquire(ctxt); ++} ++ ++/** ++ * cons_atomic_flush_con - Flush one console in atomic mode ++ * @wctxt: The write context struct to use for this context ++ * @con: The console to flush ++ * @prio: The priority of the current context ++ * @skip_unsafe: True, to avoid unsafe hostile takeovers ++ */ ++static void cons_atomic_flush_con(struct cons_write_context *wctxt, struct console *con, ++ enum cons_prio prio, bool skip_unsafe) ++{ ++ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); ++ bool wake_thread = false; ++ short flags; ++ ++ if (!cons_atomic_try_acquire(con, ctxt, prio, skip_unsafe)) ++ return; ++ ++ do { ++ flags = console_srcu_read_flags(con); ++ ++ if (!console_is_usable(con, flags)) ++ break; ++ ++ /* ++ * For normal prio messages let the printer thread handle ++ * the printing if it is available. ++ */ ++ if (prio <= CONS_PRIO_NORMAL && con->kthread) { ++ wake_thread = true; ++ break; ++ } ++ ++ /* ++ * cons_emit_record() returns false when the console was ++ * handed over or taken over. In both cases the context is ++ * no longer valid. ++ */ ++ if (!cons_emit_record(wctxt)) ++ return; ++ } while (ctxt->backlog); ++ ++ cons_release(ctxt); ++ ++ if (wake_thread && atomic_read(&con->kthread_waiting)) ++ irq_work_queue(&con->irq_work); ++} ++ ++/** ++ * cons_atomic_flush - Flush consoles in atomic mode if required ++ * @printk_caller_wctxt: The write context struct to use for this ++ * context (for printk() context only) ++ * @skip_unsafe: True, to avoid unsafe hostile takeovers ++ */ ++void cons_atomic_flush(struct cons_write_context *printk_caller_wctxt, bool skip_unsafe) ++{ ++ struct cons_write_context *wctxt; ++ struct cons_cpu_state *cpu_state; ++ struct console *con; ++ short flags; ++ int cookie; ++ ++ cpu_state = cons_get_cpu_state(); ++ ++ /* ++ * When in an elevated priority, the printk() calls are not ++ * individually flushed. This is to allow the full output to ++ * be dumped to the ringbuffer before starting with printing ++ * the backlog. ++ */ ++ if (cpu_state->prio > CONS_PRIO_NORMAL && printk_caller_wctxt) ++ return; ++ ++ /* ++ * Let the outermost write of this priority print. This avoids ++ * nasty hackery for nested WARN() where the printing itself ++ * generates one. ++ * ++ * cpu_state->prio <= CONS_PRIO_NORMAL is not subject to nesting ++ * and can proceed in order to allow atomic printing when consoles ++ * do not have a printer thread. ++ */ ++ if (cpu_state->prio > CONS_PRIO_NORMAL && ++ cpu_state->nesting[cpu_state->prio] != 1) ++ return; ++ ++ cookie = console_srcu_read_lock(); ++ for_each_console_srcu(con) { ++ if (!con->write_atomic) ++ continue; ++ ++ flags = console_srcu_read_flags(con); ++ ++ if (!console_is_usable(con, flags)) ++ continue; ++ ++ if (cpu_state->prio > CONS_PRIO_NORMAL || !con->kthread) { ++ if (printk_caller_wctxt) ++ wctxt = printk_caller_wctxt; ++ else ++ wctxt = cons_get_wctxt(con, cpu_state->prio); ++ cons_atomic_flush_con(wctxt, con, cpu_state->prio, skip_unsafe); ++ } ++ } ++ console_srcu_read_unlock(cookie); ++} ++ ++/** ++ * cons_atomic_enter - Enter a context that enforces atomic printing ++ * @prio: Priority of the context ++ * ++ * Returns: The previous priority that needs to be fed into ++ * the corresponding cons_atomic_exit() ++ */ ++enum cons_prio cons_atomic_enter(enum cons_prio prio) ++{ ++ struct cons_cpu_state *cpu_state; ++ enum cons_prio prev_prio; ++ ++ migrate_disable(); ++ cpu_state = cons_get_cpu_state(); ++ ++ prev_prio = cpu_state->prio; ++ if (prev_prio < prio) ++ cpu_state->prio = prio; ++ ++ /* ++ * Increment the nesting on @cpu_state->prio so a WARN() ++ * nested into a panic printout does not attempt to ++ * scribble state. ++ */ ++ cpu_state->nesting[cpu_state->prio]++; ++ ++ return prev_prio; ++} ++ ++/** ++ * cons_atomic_exit - Exit a context that enforces atomic printing ++ * @prio: Priority of the context to leave ++ * @prev_prio: Priority of the previous context for restore ++ * ++ * @prev_prio is the priority returned by the corresponding cons_atomic_enter(). ++ */ ++void cons_atomic_exit(enum cons_prio prio, enum cons_prio prev_prio) ++{ ++ struct cons_cpu_state *cpu_state; ++ ++ cons_atomic_flush(NULL, true); ++ ++ cpu_state = cons_get_cpu_state(); ++ ++ if (cpu_state->prio == CONS_PRIO_PANIC) ++ cons_atomic_flush(NULL, false); ++ ++ /* ++ * Undo the nesting of cons_atomic_enter() at the CPU state ++ * priority. ++ */ ++ cpu_state->nesting[cpu_state->prio]--; ++ ++ /* ++ * Restore the previous priority, which was returned by ++ * cons_atomic_enter(). ++ */ ++ cpu_state->prio = prev_prio; ++ ++ migrate_enable(); ++} ++ ++/** ++ * cons_kthread_stop - Stop a printk thread ++ * @con: Console to operate on ++ */ ++static void cons_kthread_stop(struct console *con) ++{ ++ lockdep_assert_console_list_lock_held(); ++ ++ if (!con->kthread) ++ return; ++ ++ kthread_stop(con->kthread); ++ con->kthread = NULL; ++ ++ kfree(con->thread_pbufs); ++ con->thread_pbufs = NULL; ++} ++ ++/** ++ * cons_kthread_create - Create a printk thread ++ * @con: Console to operate on ++ * ++ * If it fails, let the console proceed. The atomic part might ++ * be usable and useful. ++ */ ++void cons_kthread_create(struct console *con) ++{ ++ struct task_struct *kt; ++ struct console *c; ++ ++ lockdep_assert_console_list_lock_held(); ++ ++ if (!(con->flags & CON_NO_BKL) || !con->write_thread) ++ return; ++ ++ if (!printk_threads_enabled || con->kthread) ++ return; ++ ++ /* ++ * Printer threads cannot be started as long as any boot console is ++ * registered because there is no way to synchronize the hardware ++ * registers between boot console code and regular console code. ++ */ ++ for_each_console(c) { ++ if (c->flags & CON_BOOT) ++ return; ++ } ++ have_boot_console = false; ++ ++ con->thread_pbufs = kmalloc(sizeof(*con->thread_pbufs), GFP_KERNEL); ++ if (!con->thread_pbufs) { ++ con_printk(KERN_ERR, con, "failed to allocate printing thread buffers\n"); ++ return; ++ } ++ ++ kt = kthread_run(cons_kthread_func, con, "pr/%s%d", con->name, con->index); ++ if (IS_ERR(kt)) { ++ con_printk(KERN_ERR, con, "failed to start printing thread\n"); ++ kfree(con->thread_pbufs); ++ con->thread_pbufs = NULL; ++ return; ++ } ++ ++ con->kthread = kt; ++ ++ /* ++ * It is important that console printing threads are scheduled ++ * shortly after a printk call and with generous runtime budgets. ++ */ ++ sched_set_normal(con->kthread, -20); ++} ++ ++static int __init printk_setup_threads(void) ++{ ++ struct console *con; ++ ++ if (printk_force_atomic) ++ return 0; ++ ++ console_list_lock(); ++ printk_threads_enabled = true; ++ for_each_console(con) ++ cons_kthread_create(con); ++ if (have_bkl_console) ++ console_bkl_kthread_create(); ++ console_list_unlock(); ++ return 0; ++} ++early_initcall(printk_setup_threads); ++ ++/** ++ * cons_nobkl_init - Initialize the NOBKL console specific data ++ * @con: Console to initialize ++ * ++ * Returns: True on success. False otherwise and the console cannot be used. ++ */ ++bool cons_nobkl_init(struct console *con) ++{ ++ struct cons_state state = { }; ++ ++ if (!cons_alloc_percpu_data(con)) ++ return false; ++ ++ rcuwait_init(&con->rcuwait); ++ atomic_set(&con->kthread_waiting, 0); ++ init_irq_work(&con->irq_work, cons_irq_work); ++ cons_state_set(con, CON_STATE_CUR, &state); ++ cons_state_set(con, CON_STATE_REQ, &state); ++ cons_seq_init(con); ++ cons_kthread_create(con); ++ return true; ++} ++ ++/** ++ * cons_nobkl_cleanup - Cleanup the NOBKL console specific data ++ * @con: Console to cleanup ++ */ ++void cons_nobkl_cleanup(struct console *con) ++{ ++ struct cons_state state = { }; ++ ++ cons_kthread_stop(con); ++ cons_state_set(con, CON_STATE_CUR, &state); ++ cons_state_set(con, CON_STATE_REQ, &state); ++ cons_free_percpu_data(con); ++} ++ ++/** ++ * printk_kthread_shutdown - shutdown all threaded printers ++ * ++ * On system shutdown all threaded printers are stopped. This allows printk ++ * to transition back to atomic printing, thus providing a robust mechanism ++ * for the final shutdown/reboot messages to be output. ++ */ ++static void printk_kthread_shutdown(void) ++{ ++ struct console *con; ++ ++ console_list_lock(); ++ for_each_console(con) { ++ if (con->flags & CON_NO_BKL) ++ cons_kthread_stop(con); ++ } ++ console_list_unlock(); ++} ++ ++static struct syscore_ops printk_syscore_ops = { ++ .shutdown = printk_kthread_shutdown, ++}; ++ ++static int __init printk_init_ops(void) ++{ ++ register_syscore_ops(&printk_syscore_ops); ++ return 0; ++} ++device_initcall(printk_init_ops); +diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c +index ef0f9a2044da1..5c1470bd60bcb 100644 +--- a/kernel/printk/printk_safe.c ++++ b/kernel/printk/printk_safe.c +@@ -12,18 +12,41 @@ + + #include "internal.h" + +-static DEFINE_PER_CPU(int, printk_context); ++struct printk_context { ++ local_lock_t cpu; ++ int recursion; ++}; ++ ++static DEFINE_PER_CPU(struct printk_context, printk_context) = { ++ .cpu = INIT_LOCAL_LOCK(cpu), ++}; + + /* Can be preempted by NMI. */ +-void __printk_safe_enter(void) ++void __printk_safe_enter(unsigned long *flags) + { +- this_cpu_inc(printk_context); ++ WARN_ON_ONCE(in_nmi()); ++ local_lock_irqsave(&printk_context.cpu, *flags); ++ this_cpu_inc(printk_context.recursion); + } + + /* Can be preempted by NMI. */ +-void __printk_safe_exit(void) ++void __printk_safe_exit(unsigned long *flags) + { +- this_cpu_dec(printk_context); ++ WARN_ON_ONCE(in_nmi()); ++ this_cpu_dec(printk_context.recursion); ++ local_unlock_irqrestore(&printk_context.cpu, *flags); ++} ++ ++void __printk_deferred_enter(void) ++{ ++ WARN_ON_ONCE(!in_atomic()); ++ this_cpu_inc(printk_context.recursion); ++} ++ ++void __printk_deferred_exit(void) ++{ ++ WARN_ON_ONCE(!in_atomic()); ++ this_cpu_dec(printk_context.recursion); + } + + asmlinkage int vprintk(const char *fmt, va_list args) +@@ -38,13 +61,8 @@ asmlinkage int vprintk(const char *fmt, va_list args) + * Use the main logbuf even in NMI. But avoid calling console + * drivers that might have their own locks. + */ +- if (this_cpu_read(printk_context) || in_nmi()) { +- int len; +- +- len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args); +- defer_console_output(); +- return len; +- } ++ if (this_cpu_read(printk_context.recursion) || in_nmi()) ++ return vprintk_deferred(fmt, args); + + /* No obstacles. */ + return vprintk_default(fmt, args); +diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c +index 8e6c023212cb3..34f8adf7c0a39 100644 +--- a/kernel/rcu/rcutorture.c ++++ b/kernel/rcu/rcutorture.c +@@ -2407,6 +2407,12 @@ static int rcutorture_booster_init(unsigned int cpu) + WARN_ON_ONCE(!t); + sp.sched_priority = 2; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); ++#ifdef CONFIG_PREEMPT_RT ++ t = per_cpu(timersd, cpu); ++ WARN_ON_ONCE(!t); ++ sp.sched_priority = 2; ++ sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); ++#endif + } + + /* Don't allow time recalculation while creating a new task. */ +diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h +index b10b8349bb2a4..804306204d0d0 100644 +--- a/kernel/rcu/tree_stall.h ++++ b/kernel/rcu/tree_stall.h +@@ -8,6 +8,7 @@ + */ + + #include ++#include + + ////////////////////////////////////////////////////////////////////////////// + // +@@ -582,6 +583,7 @@ static void rcu_check_gp_kthread_expired_fqs_timer(void) + + static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) + { ++ enum cons_prio prev_prio; + int cpu; + unsigned long flags; + unsigned long gpa; +@@ -597,6 +599,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) + if (rcu_stall_is_suppressed()) + return; + ++ prev_prio = cons_atomic_enter(CONS_PRIO_EMERGENCY); ++ + /* + * OK, time to rat on our buddy... + * See Documentation/RCU/stallwarn.rst for info on how to debug +@@ -651,6 +655,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) + panic_on_rcu_stall(); + + rcu_force_quiescent_state(); /* Kick them all. */ ++ ++ cons_atomic_exit(CONS_PRIO_EMERGENCY, prev_prio); + } + + static void print_cpu_stall(unsigned long gps) +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 0d18c3969f904..a57a1a3beeba1 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -1042,6 +1042,46 @@ void resched_curr(struct rq *rq) + trace_sched_wake_idle_without_ipi(cpu); + } + ++#ifdef CONFIG_PREEMPT_LAZY ++ ++static int tsk_is_polling(struct task_struct *p) ++{ ++#ifdef TIF_POLLING_NRFLAG ++ return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); ++#else ++ return 0; ++#endif ++} ++ ++void resched_curr_lazy(struct rq *rq) ++{ ++ struct task_struct *curr = rq->curr; ++ int cpu; ++ ++ if (!sched_feat(PREEMPT_LAZY)) { ++ resched_curr(rq); ++ return; ++ } ++ ++ if (test_tsk_need_resched(curr)) ++ return; ++ ++ if (test_tsk_need_resched_lazy(curr)) ++ return; ++ ++ set_tsk_need_resched_lazy(curr); ++ ++ cpu = cpu_of(rq); ++ if (cpu == smp_processor_id()) ++ return; ++ ++ /* NEED_RESCHED_LAZY must be visible before we test polling */ ++ smp_mb(); ++ if (!tsk_is_polling(curr)) ++ smp_send_reschedule(cpu); ++} ++#endif ++ + void resched_cpu(int cpu) + { + struct rq *rq = cpu_rq(cpu); +@@ -2230,6 +2270,7 @@ void migrate_disable(void) + preempt_disable(); + this_rq()->nr_pinned++; + p->migration_disabled = 1; ++ preempt_lazy_disable(); + preempt_enable(); + } + EXPORT_SYMBOL_GPL(migrate_disable); +@@ -2265,6 +2306,7 @@ void migrate_enable(void) + barrier(); + p->migration_disabled = 0; + this_rq()->nr_pinned--; ++ preempt_lazy_enable(); + preempt_enable(); + } + EXPORT_SYMBOL_GPL(migrate_enable); +@@ -3318,6 +3360,76 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p, + } + #endif /* CONFIG_NUMA_BALANCING */ + ++#ifdef CONFIG_PREEMPT_RT ++ ++/* ++ * Consider: ++ * ++ * set_special_state(X); ++ * ++ * do_things() ++ * // Somewhere in there is an rtlock that can be contended: ++ * current_save_and_set_rtlock_wait_state(); ++ * [...] ++ * schedule_rtlock(); (A) ++ * [...] ++ * current_restore_rtlock_saved_state(); ++ * ++ * schedule(); (B) ++ * ++ * If p->saved_state is anything else than TASK_RUNNING, then p blocked on an ++ * rtlock (A) *before* voluntarily calling into schedule() (B) after setting its ++ * state to X. For things like ptrace (X=TASK_TRACED), the task could have more ++ * work to do upon acquiring the lock in do_things() before whoever called ++ * wait_task_inactive() should return. IOW, we have to wait for: ++ * ++ * p.saved_state = TASK_RUNNING ++ * p.__state = X ++ * ++ * which implies the task isn't blocked on an RT lock and got to schedule() (B). ++ * ++ * Also see comments in ttwu_state_match(). ++ */ ++ ++static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state) ++{ ++ unsigned long flags; ++ bool mismatch; ++ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ if (READ_ONCE(p->__state) & match_state) ++ mismatch = false; ++ else if (READ_ONCE(p->saved_state) & match_state) ++ mismatch = false; ++ else ++ mismatch = true; ++ ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ return mismatch; ++} ++static __always_inline bool state_match(struct task_struct *p, unsigned int match_state, ++ bool *wait) ++{ ++ if (READ_ONCE(p->__state) & match_state) ++ return true; ++ if (READ_ONCE(p->saved_state) & match_state) { ++ *wait = true; ++ return true; ++ } ++ return false; ++} ++#else ++static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state) ++{ ++ return !(READ_ONCE(p->__state) & match_state); ++} ++static __always_inline bool state_match(struct task_struct *p, unsigned int match_state, ++ bool *wait) ++{ ++ return (READ_ONCE(p->__state) & match_state); ++} ++#endif ++ + /* + * wait_task_inactive - wait for a thread to unschedule. + * +@@ -3336,7 +3448,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p, + */ + unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) + { +- int running, queued; ++ bool running, wait; + struct rq_flags rf; + unsigned long ncsw; + struct rq *rq; +@@ -3362,7 +3474,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state + * is actually now running somewhere else! + */ + while (task_on_cpu(rq, p)) { +- if (!(READ_ONCE(p->__state) & match_state)) ++ if (state_mismatch(p, match_state)) + return 0; + cpu_relax(); + } +@@ -3375,9 +3487,10 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state + rq = task_rq_lock(p, &rf); + trace_sched_wait_task(p); + running = task_on_cpu(rq, p); +- queued = task_on_rq_queued(p); ++ wait = task_on_rq_queued(p); + ncsw = 0; +- if (READ_ONCE(p->__state) & match_state) ++ ++ if (state_match(p, match_state, &wait)) + ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ + task_rq_unlock(rq, p, &rf); + +@@ -3407,7 +3520,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state + * running right now), it's preempted, and we should + * yield - it could be a while. + */ +- if (unlikely(queued)) { ++ if (unlikely(wait)) { + ktime_t to = NSEC_PER_SEC / HZ; + + set_current_state(TASK_UNINTERRUPTIBLE); +@@ -4712,6 +4825,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + p->on_cpu = 0; + #endif + init_task_preempt_count(p); ++#ifdef CONFIG_HAVE_PREEMPT_LAZY ++ task_thread_info(p)->preempt_lazy_count = 0; ++#endif + #ifdef CONFIG_SMP + plist_node_init(&p->pushable_tasks, MAX_PRIO); + RB_CLEAR_NODE(&p->pushable_dl_tasks); +@@ -6588,6 +6704,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) + + next = pick_next_task(rq, prev, &rf); + clear_tsk_need_resched(prev); ++ clear_tsk_need_resched_lazy(prev); + clear_preempt_need_resched(); + #ifdef CONFIG_SCHED_DEBUG + rq->last_seen_need_resched_ns = 0; +@@ -6648,14 +6765,11 @@ void __noreturn do_task_dead(void) + cpu_relax(); + } + +-static inline void sched_submit_work(struct task_struct *tsk) ++void sched_submit_work(void) + { +- unsigned int task_flags; ++ struct task_struct *tsk = current; ++ unsigned int task_flags = tsk->flags; + +- if (task_is_running(tsk)) +- return; +- +- task_flags = tsk->flags; + /* + * If a worker goes to sleep, notify and ask workqueue whether it + * wants to wake up a task to maintain concurrency. +@@ -6681,8 +6795,10 @@ static inline void sched_submit_work(struct task_struct *tsk) + blk_flush_plug(tsk->plug, true); + } + +-static void sched_update_worker(struct task_struct *tsk) ++void sched_resume_work(void) + { ++ struct task_struct *tsk = current; ++ + if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { + if (tsk->flags & PF_WQ_WORKER) + wq_worker_running(tsk); +@@ -6691,20 +6807,29 @@ static void sched_update_worker(struct task_struct *tsk) + } + } + +-asmlinkage __visible void __sched schedule(void) ++static void schedule_loop(unsigned int sched_mode) + { +- struct task_struct *tsk = current; +- +- sched_submit_work(tsk); + do { + preempt_disable(); +- __schedule(SM_NONE); ++ __schedule(sched_mode); + sched_preempt_enable_no_resched(); + } while (need_resched()); +- sched_update_worker(tsk); ++} ++ ++asmlinkage __visible void __sched schedule(void) ++{ ++ if (!task_is_running(current)) ++ sched_submit_work(); ++ schedule_loop(SM_NONE); ++ sched_resume_work(); + } + EXPORT_SYMBOL(schedule); + ++void schedule_rtmutex(void) ++{ ++ schedule_loop(SM_NONE); ++} ++ + /* + * synchronize_rcu_tasks() makes sure that no task is stuck in preempted + * state (have scheduled out non-voluntarily) by making sure that all +@@ -6764,11 +6889,7 @@ void __sched schedule_preempt_disabled(void) + #ifdef CONFIG_PREEMPT_RT + void __sched notrace schedule_rtlock(void) + { +- do { +- preempt_disable(); +- __schedule(SM_RTLOCK_WAIT); +- sched_preempt_enable_no_resched(); +- } while (need_resched()); ++ schedule_loop(SM_RTLOCK_WAIT); + } + NOKPROBE_SYMBOL(schedule_rtlock); + #endif +@@ -6802,6 +6923,30 @@ static void __sched notrace preempt_schedule_common(void) + } while (need_resched()); + } + ++#ifdef CONFIG_PREEMPT_LAZY ++/* ++ * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is ++ * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as ++ * preempt_lazy_count counter >0. ++ */ ++static __always_inline int preemptible_lazy(void) ++{ ++ if (test_thread_flag(TIF_NEED_RESCHED)) ++ return 1; ++ if (current_thread_info()->preempt_lazy_count) ++ return 0; ++ return 1; ++} ++ ++#else ++ ++static inline int preemptible_lazy(void) ++{ ++ return 1; ++} ++ ++#endif ++ + #ifdef CONFIG_PREEMPTION + /* + * This is the entry point to schedule() from in-kernel preemption +@@ -6815,6 +6960,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) + */ + if (likely(!preemptible())) + return; ++ if (!preemptible_lazy()) ++ return; + preempt_schedule_common(); + } + NOKPROBE_SYMBOL(preempt_schedule); +@@ -6862,6 +7009,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) + if (likely(!preemptible())) + return; + ++ if (!preemptible_lazy()) ++ return; ++ + do { + /* + * Because the function tracer can trace preempt_count_sub() +@@ -9167,7 +9317,9 @@ void __init init_idle(struct task_struct *idle, int cpu) + + /* Set the preempt count _outside_ the spinlocks! */ + init_idle_preempt_count(idle, cpu); +- ++#ifdef CONFIG_HAVE_PREEMPT_LAZY ++ task_thread_info(idle)->preempt_lazy_count = 0; ++#endif + /* + * The idle tasks have their own, simple scheduling class: + */ +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index ed89be0aa6503..46ffbbfde97b0 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -4948,7 +4948,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) + + delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; + if (delta_exec > ideal_runtime) { +- resched_curr(rq_of(cfs_rq)); ++ resched_curr_lazy(rq_of(cfs_rq)); + /* + * The current task ran long enough, ensure it doesn't get + * re-elected due to buddy favours. +@@ -4972,7 +4972,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) + return; + + if (delta > ideal_runtime) +- resched_curr(rq_of(cfs_rq)); ++ resched_curr_lazy(rq_of(cfs_rq)); + } + + static void +@@ -5118,7 +5118,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) + * validating it and just reschedule. + */ + if (queued) { +- resched_curr(rq_of(cfs_rq)); ++ resched_curr_lazy(rq_of(cfs_rq)); + return; + } + /* +@@ -5267,7 +5267,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) + * hierarchy can be throttled + */ + if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) +- resched_curr(rq_of(cfs_rq)); ++ resched_curr_lazy(rq_of(cfs_rq)); + } + + static __always_inline +@@ -6142,7 +6142,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) + + if (delta < 0) { + if (task_current(rq, p)) +- resched_curr(rq); ++ resched_curr_lazy(rq); + return; + } + hrtick_start(rq, delta); +@@ -7871,7 +7871,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ + return; + + preempt: +- resched_curr(rq); ++ resched_curr_lazy(rq); + /* + * Only set the backward buddy when the current task is still + * on the rq. This can happen when a wakeup gets interleaved +@@ -12036,7 +12036,7 @@ static void task_fork_fair(struct task_struct *p) + * 'current' within the tree based on its new key value. + */ + swap(curr->vruntime, se->vruntime); +- resched_curr(rq); ++ resched_curr_lazy(rq); + } + + se->vruntime -= cfs_rq->min_vruntime; +@@ -12063,7 +12063,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) + */ + if (task_current(rq, p)) { + if (p->prio > oldprio) +- resched_curr(rq); ++ resched_curr_lazy(rq); + } else + check_preempt_curr(rq, p, 0); + } +diff --git a/kernel/sched/features.h b/kernel/sched/features.h +index ee7f23c76bd33..e13090e33f3c4 100644 +--- a/kernel/sched/features.h ++++ b/kernel/sched/features.h +@@ -48,6 +48,9 @@ SCHED_FEAT(NONTASK_CAPACITY, true) + + #ifdef CONFIG_PREEMPT_RT + SCHED_FEAT(TTWU_QUEUE, false) ++# ifdef CONFIG_PREEMPT_LAZY ++SCHED_FEAT(PREEMPT_LAZY, true) ++# endif + #else + + /* +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index 3e8df6d31c1e3..6f272ef973675 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -2370,6 +2370,15 @@ extern void reweight_task(struct task_struct *p, int prio); + extern void resched_curr(struct rq *rq); + extern void resched_cpu(int cpu); + ++#ifdef CONFIG_PREEMPT_LAZY ++extern void resched_curr_lazy(struct rq *rq); ++#else ++static inline void resched_curr_lazy(struct rq *rq) ++{ ++ resched_curr(rq); ++} ++#endif ++ + extern struct rt_bandwidth def_rt_bandwidth; + extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); + extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); +diff --git a/kernel/signal.c b/kernel/signal.c +index 8cb28f1df2941..138d68cfc204d 100644 +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -432,7 +432,18 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags, + return NULL; + + if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { +- q = kmem_cache_alloc(sigqueue_cachep, gfp_flags); ++ ++ if (!sigqueue_flags) { ++ struct sighand_struct *sighand = t->sighand; ++ ++ lockdep_assert_held(&sighand->siglock); ++ if (sighand->sigqueue_cache) { ++ q = sighand->sigqueue_cache; ++ sighand->sigqueue_cache = NULL; ++ } ++ } ++ if (!q) ++ q = kmem_cache_alloc(sigqueue_cachep, gfp_flags); + } else { + print_dropped_signal(sig); + } +@@ -447,14 +458,43 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags, + return q; + } + +-static void __sigqueue_free(struct sigqueue *q) ++static bool sigqueue_cleanup_accounting(struct sigqueue *q) + { + if (q->flags & SIGQUEUE_PREALLOC) +- return; ++ return false; + if (q->ucounts) { + dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING); + q->ucounts = NULL; + } ++ return true; ++} ++ ++static void __sigqueue_free(struct sigqueue *q) ++{ ++ if (!sigqueue_cleanup_accounting(q)) ++ return; ++ kmem_cache_free(sigqueue_cachep, q); ++} ++ ++void sigqueue_free_cached_entry(struct sigqueue *q) ++{ ++ if (!q) ++ return; ++ kmem_cache_free(sigqueue_cachep, q); ++} ++ ++static void sigqueue_cache_or_free(struct sigqueue *q) ++{ ++ struct sighand_struct *sighand = current->sighand; ++ ++ if (!sigqueue_cleanup_accounting(q)) ++ return; ++ ++ lockdep_assert_held(&sighand->siglock); ++ if (!sighand->sigqueue_cache) { ++ sighand->sigqueue_cache = q; ++ return; ++ } + kmem_cache_free(sigqueue_cachep, q); + } + +@@ -594,7 +634,7 @@ static void collect_signal(int sig, struct sigpending *list, kernel_siginfo_t *i + (info->si_code == SI_TIMER) && + (info->si_sys_private); + +- __sigqueue_free(first); ++ sigqueue_cache_or_free(first); + } else { + /* + * Ok, it wasn't in the queue. This must be +@@ -2296,15 +2336,31 @@ static int ptrace_stop(int exit_code, int why, unsigned long message, + do_notify_parent_cldstop(current, false, why); + + /* +- * Don't want to allow preemption here, because +- * sys_ptrace() needs this task to be inactive. ++ * The previous do_notify_parent_cldstop() invocation woke ptracer. ++ * One a PREEMPTION kernel this can result in preemption requirement ++ * which will be fulfilled after read_unlock() and the ptracer will be ++ * put on the CPU. ++ * The ptracer is in wait_task_inactive(, __TASK_TRACED) waiting for ++ * this task wait in schedule(). If this task gets preempted then it ++ * remains enqueued on the runqueue. The ptracer will observe this and ++ * then sleep for a delay of one HZ tick. In the meantime this task ++ * gets scheduled, enters schedule() and will wait for the ptracer. + * +- * XXX: implement read_unlock_no_resched(). ++ * This preemption point is not bad from correctness point of view but ++ * extends the runtime by one HZ tick time due to the ptracer's sleep. ++ * The preempt-disable section ensures that there will be no preemption ++ * between unlock and schedule() and so improving the performance since ++ * the ptracer has no reason to sleep. ++ * ++ * This optimisation is not doable on PREEMPT_RT due to the spinlock_t ++ * within the preempt-disable section. + */ +- preempt_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); + read_unlock(&tasklist_lock); + cgroup_enter_frozen(); +- preempt_enable_no_resched(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable_no_resched(); + schedule(); + cgroup_leave_frozen(true); + +diff --git a/kernel/softirq.c b/kernel/softirq.c +index c8a6913c067d9..af9e879bbbf75 100644 +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -80,21 +80,6 @@ static void wakeup_softirqd(void) + wake_up_process(tsk); + } + +-/* +- * If ksoftirqd is scheduled, we do not want to process pending softirqs +- * right now. Let ksoftirqd handle this at its own rate, to get fairness, +- * unless we're doing some of the synchronous softirqs. +- */ +-#define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ)) +-static bool ksoftirqd_running(unsigned long pending) +-{ +- struct task_struct *tsk = __this_cpu_read(ksoftirqd); +- +- if (pending & SOFTIRQ_NOW_MASK) +- return false; +- return tsk && task_is_running(tsk) && !__kthread_should_park(tsk); +-} +- + #ifdef CONFIG_TRACE_IRQFLAGS + DEFINE_PER_CPU(int, hardirqs_enabled); + DEFINE_PER_CPU(int, hardirq_context); +@@ -236,7 +221,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) + goto out; + + pending = local_softirq_pending(); +- if (!pending || ksoftirqd_running(pending)) ++ if (!pending) + goto out; + + /* +@@ -432,9 +417,6 @@ static inline bool should_wake_ksoftirqd(void) + + static inline void invoke_softirq(void) + { +- if (ksoftirqd_running(local_softirq_pending())) +- return; +- + if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) { + #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK + /* +@@ -468,7 +450,7 @@ asmlinkage __visible void do_softirq(void) + + pending = local_softirq_pending(); + +- if (pending && !ksoftirqd_running(pending)) ++ if (pending) + do_softirq_own_stack(); + + local_irq_restore(flags); +@@ -637,6 +619,24 @@ static inline void tick_irq_exit(void) + #endif + } + ++#ifdef CONFIG_PREEMPT_RT ++DEFINE_PER_CPU(struct task_struct *, timersd); ++DEFINE_PER_CPU(unsigned long, pending_timer_softirq); ++ ++static void wake_timersd(void) ++{ ++ struct task_struct *tsk = __this_cpu_read(timersd); ++ ++ if (tsk) ++ wake_up_process(tsk); ++} ++ ++#else ++ ++static inline void wake_timersd(void) { } ++ ++#endif ++ + static inline void __irq_exit_rcu(void) + { + #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED +@@ -649,6 +649,10 @@ static inline void __irq_exit_rcu(void) + if (!in_interrupt() && local_softirq_pending()) + invoke_softirq(); + ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers() && ++ !(in_nmi() | in_hardirq())) ++ wake_timersd(); ++ + tick_irq_exit(); + } + +@@ -976,12 +980,70 @@ static struct smp_hotplug_thread softirq_threads = { + .thread_comm = "ksoftirqd/%u", + }; + ++#ifdef CONFIG_PREEMPT_RT ++static void timersd_setup(unsigned int cpu) ++{ ++ sched_set_fifo_low(current); ++} ++ ++static int timersd_should_run(unsigned int cpu) ++{ ++ return local_pending_timers(); ++} ++ ++static void run_timersd(unsigned int cpu) ++{ ++ unsigned int timer_si; ++ ++ ksoftirqd_run_begin(); ++ ++ timer_si = local_pending_timers(); ++ __this_cpu_write(pending_timer_softirq, 0); ++ or_softirq_pending(timer_si); ++ ++ __do_softirq(); ++ ++ ksoftirqd_run_end(); ++} ++ ++static void raise_ktimers_thread(unsigned int nr) ++{ ++ trace_softirq_raise(nr); ++ __this_cpu_or(pending_timer_softirq, 1 << nr); ++} ++ ++void raise_hrtimer_softirq(void) ++{ ++ raise_ktimers_thread(HRTIMER_SOFTIRQ); ++} ++ ++void raise_timer_softirq(void) ++{ ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ raise_ktimers_thread(TIMER_SOFTIRQ); ++ wake_timersd(); ++ local_irq_restore(flags); ++} ++ ++static struct smp_hotplug_thread timer_threads = { ++ .store = &timersd, ++ .setup = timersd_setup, ++ .thread_should_run = timersd_should_run, ++ .thread_fn = run_timersd, ++ .thread_comm = "ktimers/%u", ++}; ++#endif ++ + static __init int spawn_ksoftirqd(void) + { + cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, + takeover_tasklets); + BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); +- ++#ifdef CONFIG_PREEMPT_RT ++ BUG_ON(smpboot_register_percpu_thread(&timer_threads)); ++#endif + return 0; + } + early_initcall(spawn_ksoftirqd); +diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c +index e8c08292defcb..10c1246cdba76 100644 +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -1805,7 +1805,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) + if (!ktime_before(now, cpu_base->softirq_expires_next)) { + cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->softirq_activated = 1; +- raise_softirq_irqoff(HRTIMER_SOFTIRQ); ++ raise_hrtimer_softirq(); + } + + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); +@@ -1918,7 +1918,7 @@ void hrtimer_run_queues(void) + if (!ktime_before(now, cpu_base->softirq_expires_next)) { + cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->softirq_activated = 1; +- raise_softirq_irqoff(HRTIMER_SOFTIRQ); ++ raise_hrtimer_softirq(); + } + + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); +diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c +index 808a247205a9a..c0a32aab8f276 100644 +--- a/kernel/time/posix-timers.c ++++ b/kernel/time/posix-timers.c +@@ -140,25 +140,29 @@ static struct k_itimer *posix_timer_by_id(timer_t id) + static int posix_timer_add(struct k_itimer *timer) + { + struct signal_struct *sig = current->signal; +- int first_free_id = sig->posix_timer_id; + struct hlist_head *head; +- int ret = -ENOENT; ++ unsigned int start, id; + +- do { ++ /* Can be written by a different task concurrently in the loop below */ ++ start = READ_ONCE(sig->next_posix_timer_id); ++ ++ for (id = ~start; start != id; id++) { + spin_lock(&hash_lock); +- head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)]; +- if (!__posix_timers_find(head, sig, sig->posix_timer_id)) { ++ id = sig->next_posix_timer_id; ++ ++ /* Write the next ID back. Clamp it to the positive space */ ++ WRITE_ONCE(sig->next_posix_timer_id, (id + 1) & INT_MAX); ++ ++ head = &posix_timers_hashtable[hash(sig, id)]; ++ if (!__posix_timers_find(head, sig, id)) { + hlist_add_head_rcu(&timer->t_hash, head); +- ret = sig->posix_timer_id; ++ spin_unlock(&hash_lock); ++ return id; + } +- if (++sig->posix_timer_id < 0) +- sig->posix_timer_id = 0; +- if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT)) +- /* Loop over all possible ids completed */ +- ret = -EAGAIN; + spin_unlock(&hash_lock); +- } while (ret == -ENOENT); +- return ret; ++ } ++ /* POSIX return code when no timer ID could be allocated */ ++ return -EAGAIN; + } + + static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) +@@ -1037,27 +1041,59 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) + } + + /* +- * return timer owned by the process, used by exit_itimers ++ * Delete a timer if it is armed, remove it from the hash and schedule it ++ * for RCU freeing. + */ + static void itimer_delete(struct k_itimer *timer) + { +-retry_delete: +- spin_lock_irq(&timer->it_lock); ++ unsigned long flags; + ++retry_delete: ++ /* ++ * irqsave is required to make timer_wait_running() work. ++ */ ++ spin_lock_irqsave(&timer->it_lock, flags); ++ ++ /* ++ * Even if the timer is not longer accessible from other tasks ++ * it still might be armed and queued in the underlying timer ++ * mechanism. Worse, that timer mechanism might run the expiry ++ * function concurrently. ++ */ + if (timer_delete_hook(timer) == TIMER_RETRY) { +- spin_unlock_irq(&timer->it_lock); ++ /* ++ * Timer is expired concurrently, prevent livelocks ++ * and pointless spinning on RT. ++ * ++ * The CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y case is ++ * irrelevant here because obviously the exiting task ++ * cannot be expiring timer in task work concurrently. ++ * Ditto for CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n as the ++ * tick interrupt cannot run on this CPU because the above ++ * spin_lock disabled interrupts. ++ * ++ * timer_wait_running() drops timer::it_lock, which opens ++ * the possibility for another task to delete the timer. ++ * ++ * That's not possible here because this is invoked from ++ * do_exit() only for the last thread of the thread group. ++ * So no other task can access that timer. ++ */ ++ if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer)) ++ return; ++ + goto retry_delete; + } + list_del(&timer->list); + +- spin_unlock_irq(&timer->it_lock); ++ spin_unlock_irqrestore(&timer->it_lock, flags); + release_posix_timer(timer, IT_ID_SET); + } + + /* +- * This is called by do_exit or de_thread, only when nobody else can +- * modify the signal->posix_timers list. Yet we need sighand->siglock +- * to prevent the race with /proc/pid/timers. ++ * Invoked from do_exit() when the last thread of a thread group exits. ++ * At that point no other task can access the timers of the dying ++ * task anymore. + */ + void exit_itimers(struct task_struct *tsk) + { +@@ -1067,10 +1103,12 @@ void exit_itimers(struct task_struct *tsk) + if (list_empty(&tsk->signal->posix_timers)) + return; + ++ /* Protect against concurrent read via /proc/$PID/timers */ + spin_lock_irq(&tsk->sighand->siglock); + list_replace_init(&tsk->signal->posix_timers, &timers); + spin_unlock_irq(&tsk->sighand->siglock); + ++ /* The timers are not longer accessible via tsk::signal */ + while (!list_empty(&timers)) { + tmr = list_first_entry(&timers, struct k_itimer, list); + itimer_delete(tmr); +diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c +index a46506f7ec6d0..1ae9e4e8a0715 100644 +--- a/kernel/time/tick-sched.c ++++ b/kernel/time/tick-sched.c +@@ -789,7 +789,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) + + static inline bool local_timer_softirq_pending(void) + { +- return local_softirq_pending() & BIT(TIMER_SOFTIRQ); ++ return local_pending_timers() & BIT(TIMER_SOFTIRQ); + } + + static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) +diff --git a/kernel/time/timer.c b/kernel/time/timer.c +index 63a8ce7177dd4..7cad6fe3c035c 100644 +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -2054,7 +2054,7 @@ static void run_local_timers(void) + if (time_before(jiffies, base->next_expiry)) + return; + } +- raise_softirq(TIMER_SOFTIRQ); ++ raise_timer_softirq(); + } + + /* +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 5909aaf2f4c08..2867def70f441 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -2694,11 +2694,19 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) + if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) + trace_flags |= TRACE_FLAG_BH_OFF; + +- if (tif_need_resched()) ++ if (tif_need_resched_now()) + trace_flags |= TRACE_FLAG_NEED_RESCHED; ++#ifdef CONFIG_PREEMPT_LAZY ++ /* Run out of bits. Share the LAZY and PREEMPT_RESCHED */ ++ if (need_resched_lazy()) ++ trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; ++#else + if (test_preempt_need_resched()) + trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; +- return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | ++#endif ++ ++ return (trace_flags << 24) | (min_t(unsigned int, pc & 0xff, 0xf)) | ++ (preempt_lazy_count() & 0xff) << 16 | + (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; + } + +@@ -4287,15 +4295,17 @@ unsigned long trace_total_entries(struct trace_array *tr) + + static void print_lat_help_header(struct seq_file *m) + { +- seq_puts(m, "# _------=> CPU# \n" +- "# / _-----=> irqs-off/BH-disabled\n" +- "# | / _----=> need-resched \n" +- "# || / _---=> hardirq/softirq \n" +- "# ||| / _--=> preempt-depth \n" +- "# |||| / _-=> migrate-disable \n" +- "# ||||| / delay \n" +- "# cmd pid |||||| time | caller \n" +- "# \\ / |||||| \\ | / \n"); ++ seq_puts(m, "# _--------=> CPU# \n" ++ "# / _-------=> irqs-off/BH-disabled\n" ++ "# | / _------=> need-resched \n" ++ "# || / _-----=> need-resched-lazy\n" ++ "# ||| / _----=> hardirq/softirq \n" ++ "# |||| / _---=> preempt-depth \n" ++ "# ||||| / _--=> preempt-lazy-depth\n" ++ "# |||||| / _-=> migrate-disable \n" ++ "# ||||||| / delay \n" ++ "# cmd pid |||||||| time | caller \n" ++ "# \\ / |||||||| \\ | / \n"); + } + + static void print_event_info(struct array_buffer *buf, struct seq_file *m) +@@ -4329,14 +4339,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file + + print_event_info(buf, m); + +- seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); +- seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); +- seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); +- seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); +- seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); +- seq_printf(m, "# %.*s|||| / delay\n", prec, space); +- seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); +- seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); ++ seq_printf(m, "# %.*s _-------=> irqs-off/BH-disabled\n", prec, space); ++ seq_printf(m, "# %.*s / _------=> need-resched\n", prec, space); ++ seq_printf(m, "# %.*s| / _-----=> need-resched-lazy\n", prec, space); ++ seq_printf(m, "# %.*s|| / _----=> hardirq/softirq\n", prec, space); ++ seq_printf(m, "# %.*s||| / _---=> preempt-depth\n", prec, space); ++ seq_printf(m, "# %.*s|||| / _--=> preempt-lazy-depth\n", prec, space); ++ seq_printf(m, "# %.*s||||| / _-=> migrate-disable\n", prec, space); ++ seq_printf(m, "# %.*s|||||| / delay\n", prec, space); ++ seq_printf(m, "# TASK-PID %.*s CPU# ||||||| TIMESTAMP FUNCTION\n", prec, " TGID "); ++ seq_printf(m, "# | | %.*s | ||||||| | |\n", prec, " | "); + } + + void +diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c +index 654ffa40457aa..b2d52f8355b70 100644 +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -208,6 +208,7 @@ static int trace_define_common_fields(void) + /* Holds both preempt_count and migrate_disable */ + __common_field(unsigned char, preempt_count); + __common_field(int, pid); ++ __common_field(unsigned char, preempt_lazy_count); + + return ret; + } +diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c +index bd475a00f96d1..89d4a3bfdc6d5 100644 +--- a/kernel/trace/trace_output.c ++++ b/kernel/trace/trace_output.c +@@ -442,6 +442,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) + { + char hardsoft_irq; + char need_resched; ++ char need_resched_lazy; + char irqs_off; + int hardirq; + int softirq; +@@ -462,20 +463,27 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) + + switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | + TRACE_FLAG_PREEMPT_RESCHED)) { ++#ifndef CONFIG_PREEMPT_LAZY + case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED: + need_resched = 'N'; + break; ++#endif + case TRACE_FLAG_NEED_RESCHED: + need_resched = 'n'; + break; ++#ifndef CONFIG_PREEMPT_LAZY + case TRACE_FLAG_PREEMPT_RESCHED: + need_resched = 'p'; + break; ++#endif + default: + need_resched = '.'; + break; + } + ++ need_resched_lazy = ++ (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.'; ++ + hardsoft_irq = + (nmi && hardirq) ? 'Z' : + nmi ? 'z' : +@@ -484,14 +492,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) + softirq ? 's' : + '.' ; + +- trace_seq_printf(s, "%c%c%c", +- irqs_off, need_resched, hardsoft_irq); ++ trace_seq_printf(s, "%c%c%c%c", ++ irqs_off, need_resched, need_resched_lazy, ++ hardsoft_irq); + + if (entry->preempt_count & 0xf) + trace_seq_printf(s, "%x", entry->preempt_count & 0xf); + else + trace_seq_putc(s, '.'); + ++ if (entry->preempt_lazy_count) ++ trace_seq_printf(s, "%x", entry->preempt_lazy_count); ++ else ++ trace_seq_putc(s, '.'); ++ + if (entry->preempt_count & 0xf0) + trace_seq_printf(s, "%x", entry->preempt_count >> 4); + else +diff --git a/localversion-rt b/localversion-rt +new file mode 100644 +index 0000000000000..18777ec0c27d4 +--- /dev/null ++++ b/localversion-rt +@@ -0,0 +1 @@ ++-rt15 +diff --git a/net/core/dev.c b/net/core/dev.c +index 1488f700bf819..8c3b70160be8c 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4573,15 +4573,6 @@ static void rps_trigger_softirq(void *data) + + #endif /* CONFIG_RPS */ + +-/* Called from hardirq (IPI) context */ +-static void trigger_rx_softirq(void *data) +-{ +- struct softnet_data *sd = data; +- +- __raise_softirq_irqoff(NET_RX_SOFTIRQ); +- smp_store_release(&sd->defer_ipi_scheduled, 0); +-} +- + /* + * Check if this softnet_data structure is another cpu one + * If yes, queue it to our IPI list and return 1 +@@ -6632,6 +6623,30 @@ static void skb_defer_free_flush(struct softnet_data *sd) + } + } + ++#ifndef CONFIG_PREEMPT_RT ++/* Called from hardirq (IPI) context */ ++static void trigger_rx_softirq(void *data) ++{ ++ struct softnet_data *sd = data; ++ ++ __raise_softirq_irqoff(NET_RX_SOFTIRQ); ++ smp_store_release(&sd->defer_ipi_scheduled, 0); ++} ++ ++#else ++ ++static void trigger_rx_softirq(struct work_struct *defer_work) ++{ ++ struct softnet_data *sd; ++ ++ sd = container_of(defer_work, struct softnet_data, defer_work); ++ smp_store_release(&sd->defer_ipi_scheduled, 0); ++ local_bh_disable(); ++ skb_defer_free_flush(sd); ++ local_bh_enable(); ++} ++#endif ++ + static __latent_entropy void net_rx_action(struct softirq_action *h) + { + struct softnet_data *sd = this_cpu_ptr(&softnet_data); +@@ -11409,7 +11424,11 @@ static int __init net_dev_init(void) + INIT_CSD(&sd->csd, rps_trigger_softirq, sd); + sd->cpu = i; + #endif ++#ifndef CONFIG_PREEMPT_RT + INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd); ++#else ++ INIT_WORK(&sd->defer_work, trigger_rx_softirq); ++#endif + spin_lock_init(&sd->defer_lock); + + init_gro_hash(&sd->backlog); +diff --git a/net/core/skbuff.c b/net/core/skbuff.c +index 14bb41aafee30..3f8dac23205c6 100644 +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -6856,6 +6856,11 @@ nodefer: __kfree_skb(skb); + /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU + * if we are unlucky enough (this seems very unlikely). + */ +- if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) ++ if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) { ++#ifndef CONFIG_PREEMPT_RT + smp_call_function_single_async(cpu, &sd->defer_csd); ++#else ++ schedule_work_on(cpu, &sd->defer_work); ++#endif ++ } + } diff --git a/packages/sysutils/busybox/scripts/fs-resize b/packages/sysutils/busybox/scripts/fs-resize index add0f47cf..0906073c4 100755 --- a/packages/sysutils/busybox/scripts/fs-resize +++ b/packages/sysutils/busybox/scripts/fs-resize @@ -59,9 +59,10 @@ if [ -e /storage/.please_resize_me ] ; then fi fi +# Remove all of the modules that may be loaded to see if /flash will cleanly unmount. for module in $(lsmod | awk '{print $1}') do - rmmod ${module} + rmmod ${module} 2>/dev/null done shutdown -r now &>/dev/null diff --git a/projects/PC/devices/AMD64/linux/linux.x86_64.conf b/projects/PC/devices/AMD64/linux/linux.x86_64.conf index 9c280d779..1ca00ada4 100644 --- a/projects/PC/devices/AMD64/linux/linux.x86_64.conf +++ b/projects/PC/devices/AMD64/linux/linux.x86_64.conf @@ -117,10 +117,12 @@ CONFIG_BPF_SYSCALL=y # CONFIG_BPF_PRELOAD is not set # end of BPF subsystem +CONFIG_HAVE_PREEMPT_LAZY=y CONFIG_PREEMPT_BUILD=y CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set +CONFIG_PREEMPT_RT=y CONFIG_PREEMPT_COUNT=y CONFIG_PREEMPTION=y CONFIG_PREEMPT_DYNAMIC=y @@ -314,6 +316,7 @@ CONFIG_HAVE_INTEL_TXT=y CONFIG_X86_64_SMP=y CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_FIX_EARLYCON_MEM=y +CONFIG_DYNAMIC_PHYSICAL_MASK=y CONFIG_PGTABLE_LEVELS=4 CONFIG_CC_HAS_SANE_STACKPROTECTOR=y @@ -387,12 +390,12 @@ CONFIG_X86_IO_APIC=y # # Performance monitoring # -CONFIG_PERF_EVENTS_INTEL_UNCORE=y -CONFIG_PERF_EVENTS_INTEL_RAPL=y -CONFIG_PERF_EVENTS_INTEL_CSTATE=y -CONFIG_PERF_EVENTS_AMD_POWER=y -CONFIG_PERF_EVENTS_AMD_UNCORE=y -# CONFIG_PERF_EVENTS_AMD_BRS is not set +CONFIG_PERF_EVENTS_INTEL_UNCORE=m +CONFIG_PERF_EVENTS_INTEL_RAPL=m +CONFIG_PERF_EVENTS_INTEL_CSTATE=m +CONFIG_PERF_EVENTS_AMD_POWER=m +CONFIG_PERF_EVENTS_AMD_UNCORE=m +CONFIG_PERF_EVENTS_AMD_BRS=y # end of Performance monitoring CONFIG_X86_16BIT=y @@ -408,9 +411,11 @@ CONFIG_X86_CPUID=y # CONFIG_X86_5LEVEL is not set CONFIG_X86_DIRECT_GBPAGES=y # CONFIG_X86_CPA_STATISTICS is not set -# CONFIG_AMD_MEM_ENCRYPT is not set +CONFIG_X86_MEM_ENCRYPT=y +CONFIG_AMD_MEM_ENCRYPT=y +# CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT is not set CONFIG_NUMA=y -# CONFIG_AMD_NUMA is not set +CONFIG_AMD_NUMA=y CONFIG_X86_64_ACPI_NUMA=y # CONFIG_NUMA_EMU is not set CONFIG_NODES_SHIFT=6 @@ -431,9 +436,9 @@ CONFIG_X86_UMIP=y CONFIG_CC_HAS_IBT=y # CONFIG_X86_KERNEL_IBT is not set CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y -CONFIG_X86_INTEL_TSX_MODE_OFF=y +# CONFIG_X86_INTEL_TSX_MODE_OFF is not set # CONFIG_X86_INTEL_TSX_MODE_ON is not set -# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set +CONFIG_X86_INTEL_TSX_MODE_AUTO=y CONFIG_EFI=y CONFIG_EFI_STUB=y CONFIG_EFI_HANDOVER_PROTOCOL=y @@ -541,7 +546,7 @@ CONFIG_ACPI_PROCESSOR=y CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_PROCESSOR_AGGREGATOR=y CONFIG_ACPI_THERMAL=y -CONFIG_ACPI_PLATFORM_PROFILE=y +CONFIG_ACPI_PLATFORM_PROFILE=m CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y CONFIG_ACPI_TABLE_UPGRADE=y # CONFIG_ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD is not set @@ -566,6 +571,7 @@ CONFIG_HAVE_ACPI_APEI_NMI=y CONFIG_ACPI_PCC=y # CONFIG_ACPI_FFH is not set # CONFIG_PMIC_OPREGION is not set +# CONFIG_TPS68470_PMIC_OPREGION is not set CONFIG_ACPI_PRMT=y CONFIG_X86_PM_TIMER=y @@ -593,7 +599,7 @@ CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y CONFIG_X86_INTEL_PSTATE=y CONFIG_X86_PCC_CPUFREQ=m CONFIG_X86_AMD_PSTATE=y -CONFIG_X86_AMD_PSTATE_UT=y +CONFIG_X86_AMD_PSTATE_UT=m CONFIG_X86_ACPI_CPUFREQ=y CONFIG_X86_ACPI_CPUFREQ_CPB=y CONFIG_X86_POWERNOW_K8=y @@ -760,6 +766,7 @@ CONFIG_HAVE_RELIABLE_STACKTRACE=y CONFIG_OLD_SIGSUSPEND3=y CONFIG_COMPAT_OLD_SIGACTION=y CONFIG_COMPAT_32BIT_TIME=y +CONFIG_ARCH_SUPPORTS_RT=y CONFIG_HAVE_ARCH_VMAP_STACK=y CONFIG_VMAP_STACK=y CONFIG_HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET=y @@ -773,6 +780,7 @@ CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y CONFIG_ARCH_USE_MEMREMAP_PROT=y # CONFIG_LOCK_EVENT_COUNTS is not set CONFIG_ARCH_HAS_MEM_ENCRYPT=y +CONFIG_ARCH_HAS_CC_PLATFORM=y CONFIG_HAVE_STATIC_CALL=y CONFIG_HAVE_STATIC_CALL_INLINE=y CONFIG_HAVE_PREEMPT_DYNAMIC=y @@ -1778,11 +1786,12 @@ CONFIG_EEPROM_EE1004=y # CONFIG_SENSORS_LIS3_I2C is not set CONFIG_ALTERA_STAPL=m -# CONFIG_INTEL_MEI is not set -# CONFIG_INTEL_MEI_ME is not set -# CONFIG_INTEL_MEI_TXE is not set -# CONFIG_INTEL_MEI_HDCP is not set -# CONFIG_INTEL_MEI_PXP is not set +CONFIG_INTEL_MEI=m +CONFIG_INTEL_MEI_ME=m +CONFIG_INTEL_MEI_TXE=m +CONFIG_INTEL_MEI_GSC=m +CONFIG_INTEL_MEI_HDCP=m +CONFIG_INTEL_MEI_PXP=m # CONFIG_VMWARE_VMCI is not set # CONFIG_GENWQE is not set # CONFIG_ECHO is not set @@ -1936,7 +1945,7 @@ CONFIG_SATA_VIA=y # PATA SFF controllers with BMDMA # CONFIG_PATA_ALI=y -CONFIG_PATA_AMD=y +CONFIG_PATA_AMD=m # CONFIG_PATA_ARTOP is not set CONFIG_PATA_ATIIXP=y # CONFIG_PATA_ATP867X is not set @@ -2038,9 +2047,10 @@ CONFIG_VORTEX=y CONFIG_NET_VENDOR_AMAZON=y # CONFIG_ENA_ETHERNET is not set CONFIG_NET_VENDOR_AMD=y -# CONFIG_AMD8111_ETH is not set +CONFIG_AMD8111_ETH=m CONFIG_PCNET32=y -# CONFIG_AMD_XGBE is not set +CONFIG_AMD_XGBE=m +CONFIG_AMD_XGBE_HAVE_ECC=y CONFIG_NET_VENDOR_AQUANTIA=y CONFIG_AQTION=y CONFIG_NET_VENDOR_ARC=y @@ -2201,7 +2211,7 @@ CONFIG_FIXED_PHY=y # # MII PHY device drivers # -CONFIG_AMD_PHY=y +CONFIG_AMD_PHY=m # CONFIG_ADIN_PHY is not set # CONFIG_ADIN1100_PHY is not set # CONFIG_AQUANTIA_PHY is not set @@ -2217,7 +2227,7 @@ CONFIG_BCM_NET_PHYLIB=y # CONFIG_DAVICOM_PHY is not set # CONFIG_ICPLUS_PHY is not set # CONFIG_LXT_PHY is not set -# CONFIG_INTEL_XWAY_PHY is not set +CONFIG_INTEL_XWAY_PHY=m # CONFIG_LSI_ET1011C_PHY is not set CONFIG_MARVELL_PHY=y # CONFIG_MARVELL_10G_PHY is not set @@ -2588,7 +2598,7 @@ CONFIG_NET_FAILOVER=y CONFIG_INPUT=y CONFIG_INPUT_LEDS=y CONFIG_INPUT_FF_MEMLESS=y -# CONFIG_INPUT_SPARSEKMAP is not set +CONFIG_INPUT_SPARSEKMAP=m # CONFIG_INPUT_MATRIXKMAP is not set CONFIG_INPUT_VIVALDIFMAP=y @@ -2978,13 +2988,15 @@ CONFIG_I2C_CCGX_UCSI=y # CONFIG_I2C_ALI1535 is not set # CONFIG_I2C_ALI1563 is not set # CONFIG_I2C_ALI15X3 is not set -# CONFIG_I2C_AMD756 is not set -# CONFIG_I2C_AMD8111 is not set -# CONFIG_I2C_AMD_MP2 is not set +CONFIG_I2C_AMD756=m +CONFIG_I2C_AMD756_S4882=m +CONFIG_I2C_AMD8111=m +CONFIG_I2C_AMD_MP2=m CONFIG_I2C_I801=y CONFIG_I2C_ISCH=y # CONFIG_I2C_ISMT is not set CONFIG_I2C_PIIX4=y +# CONFIG_I2C_CHT_WC is not set CONFIG_I2C_NFORCE2=y # CONFIG_I2C_NFORCE2_S4985 is not set CONFIG_I2C_NVIDIA_GPU=m @@ -3062,8 +3074,9 @@ CONFIG_SPI_DW_PCI=m CONFIG_SPI_DW_MMIO=m # CONFIG_SPI_NXP_FLEXSPI is not set CONFIG_SPI_GPIO=m -# CONFIG_SPI_INTEL_PCI is not set -# CONFIG_SPI_INTEL_PLATFORM is not set +CONFIG_SPI_INTEL=m +CONFIG_SPI_INTEL_PCI=m +CONFIG_SPI_INTEL_PLATFORM=m # CONFIG_SPI_MICROCHIP_CORE is not set # CONFIG_SPI_MICROCHIP_CORE_QSPI is not set # CONFIG_SPI_LANTIQ_SSC is not set @@ -3184,17 +3197,18 @@ CONFIG_GPIOLIB_IRQCHIP=y CONFIG_GPIO_SYSFS=y CONFIG_GPIO_CDEV=y CONFIG_GPIO_CDEV_V1=y +CONFIG_GPIO_GENERIC=m # # Memory mapped GPIO drivers # -# CONFIG_GPIO_AMDPT is not set +CONFIG_GPIO_AMDPT=m # CONFIG_GPIO_DWAPB is not set # CONFIG_GPIO_EXAR is not set # CONFIG_GPIO_GENERIC_PLATFORM is not set # CONFIG_GPIO_MB86S7X is not set # CONFIG_GPIO_VX855 is not set -# CONFIG_GPIO_AMD_FCH is not set +CONFIG_GPIO_AMD_FCH=m # end of Memory mapped GPIO drivers # @@ -3222,12 +3236,15 @@ CONFIG_GPIO_CDEV_V1=y # # MFD GPIO expanders # +# CONFIG_GPIO_CRYSTAL_COVE is not set +# CONFIG_GPIO_TPS68470 is not set +# CONFIG_GPIO_WHISKEY_COVE is not set # end of MFD GPIO expanders # # PCI GPIO expanders # -# CONFIG_GPIO_AMD8111 is not set +CONFIG_GPIO_AMD8111=m # CONFIG_GPIO_ML_IOH is not set # CONFIG_GPIO_PCI_IDIO_16 is not set # CONFIG_GPIO_PCIE_IDIO_24 is not set @@ -3484,6 +3501,7 @@ CONFIG_THERMAL_NETLINK=y CONFIG_THERMAL_STATISTICS=y CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0 CONFIG_THERMAL_HWMON=y +CONFIG_THERMAL_ACPI=y CONFIG_THERMAL_WRITABLE_TRIPS=y CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y # CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set @@ -3510,9 +3528,10 @@ CONFIG_INTEL_SOC_DTS_THERMAL=m # CONFIG_INT340X_THERMAL is not set # end of ACPI INT340X thermal drivers -# CONFIG_INTEL_PCH_THERMAL is not set -# CONFIG_INTEL_TCC_COOLING is not set -# CONFIG_INTEL_MENLOW is not set +CONFIG_INTEL_BXT_PMIC_THERMAL=m +CONFIG_INTEL_PCH_THERMAL=m +CONFIG_INTEL_TCC_COOLING=m +CONFIG_INTEL_MENLOW=m CONFIG_INTEL_HFI_THERMAL=y # end of Intel thermal drivers @@ -3560,16 +3579,18 @@ CONFIG_MFD_CORE=y # CONFIG_MFD_MC13XXX_SPI is not set # CONFIG_MFD_MC13XXX_I2C is not set # CONFIG_MFD_MP2629 is not set -# CONFIG_MFD_INTEL_QUARK_I2C_GPIO is not set +CONFIG_MFD_INTEL_QUARK_I2C_GPIO=m # CONFIG_LPC_ICH is not set CONFIG_LPC_SCH=y -# CONFIG_INTEL_SOC_PMIC is not set -# CONFIG_INTEL_SOC_PMIC_CHTWC is not set -# CONFIG_INTEL_SOC_PMIC_CHTDC_TI is not set -CONFIG_MFD_INTEL_LPSS=y -CONFIG_MFD_INTEL_LPSS_ACPI=y -CONFIG_MFD_INTEL_LPSS_PCI=y -# CONFIG_MFD_INTEL_PMC_BXT is not set +CONFIG_INTEL_SOC_PMIC=y +CONFIG_INTEL_SOC_PMIC_BXTWC=m +CONFIG_INTEL_SOC_PMIC_CHTWC=y +CONFIG_INTEL_SOC_PMIC_CHTDC_TI=m +CONFIG_INTEL_SOC_PMIC_MRFLD=m +CONFIG_MFD_INTEL_LPSS=m +CONFIG_MFD_INTEL_LPSS_ACPI=m +CONFIG_MFD_INTEL_LPSS_PCI=m +CONFIG_MFD_INTEL_PMC_BXT=m # CONFIG_MFD_IQS62X is not set # CONFIG_MFD_JANZ_CMODIO is not set # CONFIG_MFD_KEMPLD is not set @@ -3688,6 +3709,7 @@ CONFIG_REGULATOR=y # CONFIG_REGULATOR_TPS6507X is not set # CONFIG_REGULATOR_TPS65132 is not set # CONFIG_REGULATOR_TPS6524X is not set +# CONFIG_REGULATOR_TPS68470 is not set # CONFIG_REGULATOR_QCOM_LABIBB is not set CONFIG_RC_CORE=y CONFIG_BPF_LIRC_MODE2=y @@ -4548,7 +4570,7 @@ CONFIG_APERTURE_HELPERS=y CONFIG_VIDEO_NOMODESET=y CONFIG_AGP=y # CONFIG_AGP_AMD64 is not set -CONFIG_AGP_INTEL=y +CONFIG_AGP_INTEL=m # CONFIG_AGP_SIS is not set CONFIG_AGP_VIA=y CONFIG_INTEL_GTT=y @@ -4619,6 +4641,7 @@ CONFIG_DRM_I915_FORCE_PROBE="" CONFIG_DRM_I915_CAPTURE_ERROR=y CONFIG_DRM_I915_COMPRESS_ERROR=y CONFIG_DRM_I915_USERPTR=y +# CONFIG_DRM_I915_PXP is not set # # drm/i915 Debugging @@ -4896,7 +4919,7 @@ CONFIG_SND_EMU10K1X=m CONFIG_SND_ICE1712=m CONFIG_SND_ICE1724=m CONFIG_SND_INTEL8X0=m -# CONFIG_SND_INTEL8X0M is not set +CONFIG_SND_INTEL8X0M=m # CONFIG_SND_KORG1212 is not set # CONFIG_SND_LOLA is not set # CONFIG_SND_LX6464ES is not set @@ -4946,7 +4969,7 @@ CONFIG_SND_HDA_CODEC_CMEDIA=m CONFIG_SND_HDA_CODEC_SI3054=m CONFIG_SND_HDA_GENERIC=m CONFIG_SND_HDA_POWER_SAVE_DEFAULT=1 -# CONFIG_SND_HDA_INTEL_HDMI_SILENT_STREAM is not set +CONFIG_SND_HDA_INTEL_HDMI_SILENT_STREAM=y # CONFIG_SND_HDA_CTL_DEV_ID is not set # end of HD-Audio @@ -4994,23 +5017,28 @@ CONFIG_SND_SOC_ACPI=m CONFIG_SND_SOC_AMD_ACP=m CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH=m CONFIG_SND_SOC_AMD_CZ_RT5645_MACH=m -# CONFIG_SND_SOC_AMD_ST_ES8336_MACH is not set +CONFIG_SND_SOC_AMD_ST_ES8336_MACH=m CONFIG_SND_SOC_AMD_ACP3x=m CONFIG_SND_SOC_AMD_RENOIR=m CONFIG_SND_SOC_AMD_RENOIR_MACH=m CONFIG_SND_SOC_AMD_ACP5x=m CONFIG_SND_SOC_AMD_VANGOGH_MACH=m CONFIG_SND_SOC_AMD_ACP6x=m -# CONFIG_SND_SOC_AMD_YC_MACH is not set +CONFIG_SND_SOC_AMD_YC_MACH=m CONFIG_SND_AMD_ACP_CONFIG=m CONFIG_SND_SOC_AMD_ACP_COMMON=m -# CONFIG_SND_SOC_AMD_ACP_PCI is not set -# CONFIG_SND_AMD_ASOC_RENOIR is not set -# CONFIG_SND_AMD_ASOC_REMBRANDT is not set -# CONFIG_SND_SOC_AMD_LEGACY_MACH is not set -# CONFIG_SND_SOC_AMD_SOF_MACH is not set -# CONFIG_SND_SOC_AMD_RPL_ACP6x is not set -# CONFIG_SND_SOC_AMD_PS is not set +CONFIG_SND_SOC_AMD_ACP_PDM=m +CONFIG_SND_SOC_AMD_ACP_I2S=m +CONFIG_SND_SOC_AMD_ACP_PCM=m +CONFIG_SND_SOC_AMD_ACP_PCI=m +CONFIG_SND_AMD_ASOC_RENOIR=m +CONFIG_SND_AMD_ASOC_REMBRANDT=m +CONFIG_SND_SOC_AMD_MACH_COMMON=m +CONFIG_SND_SOC_AMD_LEGACY_MACH=m +CONFIG_SND_SOC_AMD_SOF_MACH=m +CONFIG_SND_SOC_AMD_RPL_ACP6x=m +CONFIG_SND_SOC_AMD_PS=m +CONFIG_SND_SOC_AMD_PS_MACH=m # CONFIG_SND_ATMEL_SOC is not set # CONFIG_SND_BCM63XX_I2S_WHISTLER is not set # CONFIG_SND_DESIGNWARE_I2S is not set @@ -5037,7 +5065,7 @@ CONFIG_SND_SOC_AMD_ACP_COMMON=m # CONFIG_SND_SOC_IMG is not set CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y CONFIG_SND_SOC_INTEL_SST=m -# CONFIG_SND_SOC_INTEL_CATPT is not set +CONFIG_SND_SOC_INTEL_CATPT=m CONFIG_SND_SST_ATOM_HIFI2_PLATFORM=m CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_PCI=m CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_ACPI=m @@ -5051,6 +5079,7 @@ CONFIG_SND_SOC_INTEL_CFL=m CONFIG_SND_SOC_INTEL_CML_H=m CONFIG_SND_SOC_INTEL_CML_LP=m CONFIG_SND_SOC_INTEL_SKYLAKE_FAMILY=m +CONFIG_SND_SOC_INTEL_SKYLAKE_SSP_CLK=m CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC=y CONFIG_SND_SOC_INTEL_SKYLAKE_COMMON=m CONFIG_SND_SOC_ACPI_INTEL_MATCH=m @@ -5063,25 +5092,29 @@ CONFIG_SND_SOC_INTEL_AVS=m # # Available DSP configurations # -# CONFIG_SND_SOC_INTEL_AVS_MACH_DA7219 is not set -# CONFIG_SND_SOC_INTEL_AVS_MACH_DMIC is not set -# CONFIG_SND_SOC_INTEL_AVS_MACH_HDAUDIO is not set -# CONFIG_SND_SOC_INTEL_AVS_MACH_I2S_TEST is not set +CONFIG_SND_SOC_INTEL_AVS_MACH_DA7219=m +CONFIG_SND_SOC_INTEL_AVS_MACH_DMIC=m +CONFIG_SND_SOC_INTEL_AVS_MACH_HDAUDIO=m +CONFIG_SND_SOC_INTEL_AVS_MACH_I2S_TEST=m # CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98927 is not set -# CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98357A is not set -# CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98373 is not set -# CONFIG_SND_SOC_INTEL_AVS_MACH_NAU8825 is not set +CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98357A=m +CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98373=m +CONFIG_SND_SOC_INTEL_AVS_MACH_NAU8825=m # CONFIG_SND_SOC_INTEL_AVS_MACH_PROBE is not set -# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set -# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set -# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set -# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set -# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set +CONFIG_SND_SOC_INTEL_AVS_MACH_RT274=m +CONFIG_SND_SOC_INTEL_AVS_MACH_RT286=m +CONFIG_SND_SOC_INTEL_AVS_MACH_RT298=m +CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682=m +CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567=m # end of Intel AVS Machine drivers CONFIG_SND_SOC_INTEL_MACH=y -# CONFIG_SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES is not set +CONFIG_SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES=y CONFIG_SND_SOC_INTEL_HDA_DSP_COMMON=m +CONFIG_SND_SOC_INTEL_HASWELL_MACH=m +CONFIG_SND_SOC_INTEL_BDW_RT5650_MACH=m +CONFIG_SND_SOC_INTEL_BDW_RT5677_MACH=m +CONFIG_SND_SOC_INTEL_BROADWELL_MACH=m CONFIG_SND_SOC_INTEL_BYTCR_RT5640_MACH=m CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH=m CONFIG_SND_SOC_INTEL_CHT_BSW_RT5672_MACH=m @@ -5091,17 +5124,19 @@ CONFIG_SND_SOC_INTEL_CHT_BSW_NAU8824_MACH=m CONFIG_SND_SOC_INTEL_BYT_CHT_CX2072X_MACH=m CONFIG_SND_SOC_INTEL_BYT_CHT_DA7213_MACH=m CONFIG_SND_SOC_INTEL_BYT_CHT_ES8316_MACH=m -CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH=m -# CONFIG_SND_SOC_INTEL_SKL_RT286_MACH is not set -# CONFIG_SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH is not set -# CONFIG_SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH is not set -# CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH is not set -# CONFIG_SND_SOC_INTEL_BXT_RT298_MACH is not set -# CONFIG_SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH is not set -# CONFIG_SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH is not set -# CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98357A_MACH is not set -# CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98927_MACH is not set -# CONFIG_SND_SOC_INTEL_KBL_RT5660_MACH is not set +# CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH is not set +CONFIG_SND_SOC_INTEL_SKL_RT286_MACH=m +CONFIG_SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH=m +CONFIG_SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH=m +CONFIG_SND_SOC_INTEL_DA7219_MAX98357A_GENERIC=m +CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_COMMON=m +CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH=m +CONFIG_SND_SOC_INTEL_BXT_RT298_MACH=m +CONFIG_SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH=m +CONFIG_SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH=m +CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98357A_MACH=m +CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98927_MACH=m +CONFIG_SND_SOC_INTEL_KBL_RT5660_MACH=m CONFIG_SND_SOC_INTEL_SKL_HDA_DSP_GENERIC_MACH=m # CONFIG_SND_SOC_MTK_BTCVSD is not set # CONFIG_SND_SOC_SOF_TOPLEVEL is not set @@ -5200,8 +5235,9 @@ CONFIG_SND_SOC_MAX98357A=m # CONFIG_SND_SOC_MAX9867 is not set CONFIG_SND_SOC_MAX98927=m # CONFIG_SND_SOC_MAX98520 is not set -# CONFIG_SND_SOC_MAX98373_I2C is not set -# CONFIG_SND_SOC_MAX98390 is not set +CONFIG_SND_SOC_MAX98373=m +CONFIG_SND_SOC_MAX98373_I2C=m +CONFIG_SND_SOC_MAX98390=m # CONFIG_SND_SOC_MAX98396 is not set # CONFIG_SND_SOC_MAX9860 is not set # CONFIG_SND_SOC_MSM8916_WCD_ANALOG is not set @@ -5222,15 +5258,27 @@ CONFIG_SND_SOC_MAX98927=m # CONFIG_SND_SOC_PEB2466 is not set # CONFIG_SND_SOC_RK3328 is not set CONFIG_SND_SOC_RL6231=m +CONFIG_SND_SOC_RL6347A=m +CONFIG_SND_SOC_RT274=m +CONFIG_SND_SOC_RT286=m +CONFIG_SND_SOC_RT298=m +CONFIG_SND_SOC_RT1019=m +CONFIG_SND_SOC_RT5514=m +CONFIG_SND_SOC_RT5514_SPI=m # CONFIG_SND_SOC_RT5616 is not set # CONFIG_SND_SOC_RT5631 is not set CONFIG_SND_SOC_RT5640=m CONFIG_SND_SOC_RT5645=m CONFIG_SND_SOC_RT5651=m # CONFIG_SND_SOC_RT5659 is not set +CONFIG_SND_SOC_RT5660=m +CONFIG_SND_SOC_RT5663=m CONFIG_SND_SOC_RT5670=m +CONFIG_SND_SOC_RT5677=m +CONFIG_SND_SOC_RT5677_SPI=m CONFIG_SND_SOC_RT5682=m CONFIG_SND_SOC_RT5682_I2C=m +CONFIG_SND_SOC_RT5682S=m # CONFIG_SND_SOC_RT9120 is not set # CONFIG_SND_SOC_SGTL5000 is not set # CONFIG_SND_SOC_SIMPLE_AMPLIFIER is not set @@ -5242,7 +5290,7 @@ CONFIG_SND_SOC_SPDIF=m # CONFIG_SND_SOC_SSM2518 is not set # CONFIG_SND_SOC_SSM2602_SPI is not set # CONFIG_SND_SOC_SSM2602_I2C is not set -# CONFIG_SND_SOC_SSM4567 is not set +CONFIG_SND_SOC_SSM4567=m # CONFIG_SND_SOC_STA32X is not set # CONFIG_SND_SOC_STA350 is not set # CONFIG_SND_SOC_STI_SAS is not set @@ -5309,6 +5357,7 @@ CONFIG_SND_SOC_TS3A227E=m CONFIG_SND_SOC_NAU8821=m # CONFIG_SND_SOC_NAU8822 is not set CONFIG_SND_SOC_NAU8824=m +CONFIG_SND_SOC_NAU8825=m # CONFIG_SND_SOC_TPA6130A2 is not set # CONFIG_SND_SOC_LPASS_WSA_MACRO is not set # CONFIG_SND_SOC_LPASS_VA_MACRO is not set @@ -5478,13 +5527,14 @@ CONFIG_I2C_HID_CORE=m # # Intel ISH HID support # -# CONFIG_INTEL_ISH_HID is not set +CONFIG_INTEL_ISH_HID=m +CONFIG_INTEL_ISH_FIRMWARE_DOWNLOADER=m # end of Intel ISH HID support # # AMD SFH HID Support # -# CONFIG_AMD_SFH_HID is not set +CONFIG_AMD_SFH_HID=m # end of AMD SFH HID Support CONFIG_USB_OHCI_LITTLE_ENDIAN=y @@ -5689,7 +5739,9 @@ CONFIG_TYPEC=m # CONFIG_TYPEC_TCPM is not set # CONFIG_TYPEC_UCSI is not set # CONFIG_TYPEC_TPS6598X is not set +# CONFIG_TYPEC_ANX7411 is not set # CONFIG_TYPEC_RT1719 is not set +# CONFIG_TYPEC_HD3SS3220 is not set # CONFIG_TYPEC_STUSB160X is not set # CONFIG_TYPEC_WUSB3801 is not set @@ -5699,6 +5751,7 @@ CONFIG_TYPEC=m # CONFIG_TYPEC_MUX_FSA4480 is not set # CONFIG_TYPEC_MUX_GPIO_SBU is not set # CONFIG_TYPEC_MUX_PI3USB30532 is not set +CONFIG_TYPEC_MUX_INTEL_PMC=m # end of USB Type-C Multiplexer/DeMultiplexer Switch support # @@ -5707,7 +5760,8 @@ CONFIG_TYPEC=m # CONFIG_TYPEC_DP_ALTMODE is not set # end of USB Type-C Alternate Mode drivers -# CONFIG_USB_ROLE_SWITCH is not set +CONFIG_USB_ROLE_SWITCH=m +CONFIG_USB_ROLES_INTEL_XHCI=m CONFIG_MMC=y CONFIG_MMC_BLOCK=y CONFIG_MMC_BLOCK_MINORS=32 @@ -5765,7 +5819,7 @@ CONFIG_LEDS_CLASS_FLASH=y # CONFIG_LEDS_DAC124S085 is not set # CONFIG_LEDS_REGULATOR is not set # CONFIG_LEDS_BD2802 is not set -# CONFIG_LEDS_INTEL_SS4200 is not set +CONFIG_LEDS_INTEL_SS4200=m # CONFIG_LEDS_LT3593 is not set # CONFIG_LEDS_TCA6507 is not set # CONFIG_LEDS_TLC591XX is not set @@ -5943,12 +5997,14 @@ CONFIG_DMA_VIRTUAL_CHANNELS=y CONFIG_DMA_ACPI=y # CONFIG_ALTERA_MSGDMA is not set CONFIG_INTEL_IDMA64=m -# CONFIG_INTEL_IDXD is not set +CONFIG_INTEL_IDXD_BUS=m +CONFIG_INTEL_IDXD=m # CONFIG_INTEL_IDXD_COMPAT is not set +CONFIG_INTEL_IDXD_PERFMON=y CONFIG_INTEL_IOATDMA=m # CONFIG_PLX_DMA is not set # CONFIG_XILINX_XDMA is not set -# CONFIG_AMD_PTDMA is not set +CONFIG_AMD_PTDMA=m # CONFIG_QCOM_HIDMA_MGMT is not set # CONFIG_QCOM_HIDMA is not set CONFIG_DW_DMAC_CORE=m @@ -5957,7 +6013,7 @@ CONFIG_DW_DMAC_PCI=m # CONFIG_DW_EDMA is not set CONFIG_HSU_DMA=y # CONFIG_SF_PDMA is not set -# CONFIG_INTEL_LDMA is not set +CONFIG_INTEL_LDMA=y # # DMA Clients @@ -6029,7 +6085,7 @@ CONFIG_RTS5208=y CONFIG_VT6656=m # CONFIG_FB_SM750 is not set CONFIG_STAGING_MEDIA=y -# CONFIG_INTEL_ATOMISP is not set +CONFIG_INTEL_ATOMISP=y # CONFIG_DVB_AV7110 is not set CONFIG_VIDEO_IPU3_IMGU=m # CONFIG_STAGING_MEDIA_DEPRECATED is not set @@ -6062,9 +6118,9 @@ CONFIG_WMI_BMOF=y # CONFIG_ACERHDF is not set # CONFIG_ACER_WIRELESS is not set # CONFIG_ACER_WMI is not set -CONFIG_AMD_PMF=y -CONFIG_AMD_PMC=y -CONFIG_AMD_HSMP=y +CONFIG_AMD_PMF=m +CONFIG_AMD_PMC=m +CONFIG_AMD_HSMP=m # CONFIG_ADV_SWBUTTON is not set # CONFIG_APPLE_GMUX is not set # CONFIG_ASUS_LAPTOP is not set @@ -6082,37 +6138,46 @@ CONFIG_AMD_HSMP=y # CONFIG_SENSORS_HDAPS is not set # CONFIG_THINKPAD_ACPI is not set # CONFIG_THINKPAD_LMI is not set -CONFIG_INTEL_ATOMISP2_PDX86=y -CONFIG_INTEL_ATOMISP2_PM=y # CONFIG_INTEL_IFS is not set -# CONFIG_INTEL_SAR_INT1092 is not set -# CONFIG_INTEL_SKL_INT3472 is not set -# CONFIG_INTEL_PMC_CORE is not set +CONFIG_INTEL_SAR_INT1092=m +CONFIG_INTEL_SKL_INT3472=m +CONFIG_INTEL_PMC_CORE=y +CONFIG_INTEL_PMT_CLASS=m +CONFIG_INTEL_PMT_TELEMETRY=m +CONFIG_INTEL_PMT_CRASHLOG=m # # Intel Speed Select Technology interface support # -# CONFIG_INTEL_SPEED_SELECT_INTERFACE is not set +CONFIG_INTEL_SPEED_SELECT_INTERFACE=m # end of Intel Speed Select Technology interface support -# CONFIG_INTEL_WMI_SBL_FW_UPDATE is not set -# CONFIG_INTEL_WMI_THUNDERBOLT is not set +CONFIG_INTEL_TELEMETRY=m +CONFIG_INTEL_WMI=y +CONFIG_INTEL_WMI_SBL_FW_UPDATE=m +CONFIG_INTEL_WMI_THUNDERBOLT=m # # Intel Uncore Frequency Control # -# CONFIG_INTEL_UNCORE_FREQ_CONTROL is not set +CONFIG_INTEL_UNCORE_FREQ_CONTROL=m # end of Intel Uncore Frequency Control -# CONFIG_INTEL_HID_EVENT is not set -# CONFIG_INTEL_VBTN is not set -# CONFIG_INTEL_INT0002_VGPIO is not set -# CONFIG_INTEL_OAKTRAIL is not set -# CONFIG_INTEL_PUNIT_IPC is not set -# CONFIG_INTEL_RST is not set -# CONFIG_INTEL_SMARTCONNECT is not set -# CONFIG_INTEL_TURBO_MAX_3 is not set -# CONFIG_INTEL_VSEC is not set +CONFIG_INTEL_HID_EVENT=m +CONFIG_INTEL_VBTN=m +CONFIG_INTEL_INT0002_VGPIO=m +CONFIG_INTEL_OAKTRAIL=m +CONFIG_INTEL_BXTWC_PMIC_TMU=m +CONFIG_INTEL_CHTDC_TI_PWRBTN=m +CONFIG_INTEL_ISHTP_ECLITE=m +CONFIG_INTEL_MRFLD_PWRBTN=m +CONFIG_INTEL_PUNIT_IPC=m +CONFIG_INTEL_RST=m +CONFIG_INTEL_SDSI=m +CONFIG_INTEL_SMARTCONNECT=m +# CONFIG_INTEL_TPMI is not set +CONFIG_INTEL_TURBO_MAX_3=y +CONFIG_INTEL_VSEC=m # CONFIG_MSI_LAPTOP is not set # CONFIG_MSI_WMI is not set # CONFIG_PCENGINES_APU2 is not set @@ -6132,9 +6197,12 @@ CONFIG_INTEL_ATOMISP2_PM=y # CONFIG_SERIAL_MULTI_INSTANTIATE is not set # CONFIG_MLX_PLATFORM is not set # CONFIG_X86_ANDROID_TABLETS is not set -# CONFIG_INTEL_IPS is not set -# CONFIG_INTEL_SCU_PCI is not set -# CONFIG_INTEL_SCU_PLATFORM is not set +CONFIG_INTEL_IPS=m +CONFIG_INTEL_SCU_IPC=y +CONFIG_INTEL_SCU=y +CONFIG_INTEL_SCU_PCI=y +CONFIG_INTEL_SCU_PLATFORM=m +CONFIG_INTEL_SCU_IPC_UTIL=m # CONFIG_SIEMENS_SIMATIC_IPC is not set # CONFIG_WINMATE_FM07_KEYS is not set # CONFIG_STEAMDECK is not set @@ -6148,6 +6216,7 @@ CONFIG_COMMON_CLK=y # CONFIG_COMMON_CLK_SI5351 is not set # CONFIG_COMMON_CLK_SI544 is not set # CONFIG_COMMON_CLK_CDCE706 is not set +# CONFIG_COMMON_CLK_TPS68470 is not set # CONFIG_COMMON_CLK_CS2000_CP is not set # CONFIG_XILINX_VCU is not set # CONFIG_HWSPINLOCK is not set @@ -6186,7 +6255,7 @@ CONFIG_INTEL_IOMMU=y CONFIG_INTEL_IOMMU_SVM=y # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set CONFIG_INTEL_IOMMU_FLOPPY_WA=y -# CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON is not set +CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON=y CONFIG_INTEL_IOMMU_PERF_EVENTS=y # CONFIG_IOMMUFD is not set CONFIG_IRQ_REMAP=y @@ -6277,7 +6346,7 @@ CONFIG_RESET_CONTROLLER=y # # PHY Subsystem # -# CONFIG_GENERIC_PHY is not set +CONFIG_GENERIC_PHY=y # CONFIG_USB_LGM_PHY is not set # CONFIG_PHY_CAN_TRANSCEIVER is not set @@ -6289,7 +6358,7 @@ CONFIG_RESET_CONTROLLER=y # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set -# CONFIG_PHY_INTEL_LGM_EMMC is not set +CONFIG_PHY_INTEL_LGM_EMMC=m # end of PHY Subsystem CONFIG_POWERCAP=y @@ -6323,7 +6392,13 @@ CONFIG_NVMEM_SYSFS=y # HW tracing support # # CONFIG_STM is not set -# CONFIG_INTEL_TH is not set +CONFIG_INTEL_TH=m +CONFIG_INTEL_TH_PCI=m +CONFIG_INTEL_TH_ACPI=m +CONFIG_INTEL_TH_GTH=m +CONFIG_INTEL_TH_MSU=m +CONFIG_INTEL_TH_PTI=m +# CONFIG_INTEL_TH_DEBUG is not set # end of HW tracing support # CONFIG_FPGA is not set @@ -6523,28 +6598,28 @@ CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NLS=y CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y -# CONFIG_NLS_CODEPAGE_737 is not set -# CONFIG_NLS_CODEPAGE_775 is not set -# CONFIG_NLS_CODEPAGE_850 is not set -# CONFIG_NLS_CODEPAGE_852 is not set -# CONFIG_NLS_CODEPAGE_855 is not set -# CONFIG_NLS_CODEPAGE_857 is not set -# CONFIG_NLS_CODEPAGE_860 is not set -# CONFIG_NLS_CODEPAGE_861 is not set -# CONFIG_NLS_CODEPAGE_862 is not set -# CONFIG_NLS_CODEPAGE_863 is not set -# CONFIG_NLS_CODEPAGE_864 is not set -# CONFIG_NLS_CODEPAGE_865 is not set -# CONFIG_NLS_CODEPAGE_866 is not set -# CONFIG_NLS_CODEPAGE_869 is not set -# CONFIG_NLS_CODEPAGE_936 is not set -# CONFIG_NLS_CODEPAGE_950 is not set -# CONFIG_NLS_CODEPAGE_932 is not set -# CONFIG_NLS_CODEPAGE_949 is not set -# CONFIG_NLS_CODEPAGE_874 is not set +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m # CONFIG_NLS_ISO8859_8 is not set -# CONFIG_NLS_CODEPAGE_1250 is not set -# CONFIG_NLS_CODEPAGE_1251 is not set +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_ISO8859_2 is not set @@ -6658,7 +6733,7 @@ CONFIG_CRYPTO_NULL2=y CONFIG_CRYPTO_CRYPTD=y # CONFIG_CRYPTO_AUTHENC is not set # CONFIG_CRYPTO_TEST is not set -CONFIG_CRYPTO_SIMD=y +CONFIG_CRYPTO_SIMD=m # end of Crypto core or helper # @@ -6800,7 +6875,7 @@ CONFIG_CRYPTO_HASH_INFO=y # Accelerated Cryptographic Algorithms for CPU (x86) # CONFIG_CRYPTO_CURVE25519_X86=m -CONFIG_CRYPTO_AES_NI_INTEL=y +CONFIG_CRYPTO_AES_NI_INTEL=m # CONFIG_CRYPTO_BLOWFISH_X86_64 is not set # CONFIG_CRYPTO_CAMELLIA_X86_64 is not set # CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64 is not set @@ -6830,8 +6905,8 @@ CONFIG_CRYPTO_SHA1_SSSE3=y CONFIG_CRYPTO_SHA256_SSSE3=y CONFIG_CRYPTO_SHA512_SSSE3=y # CONFIG_CRYPTO_SM3_AVX_X86_64 is not set -# CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL is not set -# CONFIG_CRYPTO_CRC32C_INTEL is not set +CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=m +CONFIG_CRYPTO_CRC32C_INTEL=m # CONFIG_CRYPTO_CRC32_PCLMUL is not set # end of Accelerated Cryptographic Algorithms for CPU (x86) @@ -6949,7 +7024,9 @@ CONFIG_DMA_OPS=y CONFIG_NEED_SG_DMA_LENGTH=y CONFIG_NEED_DMA_MAP_STATE=y CONFIG_ARCH_DMA_ADDR_T_64BIT=y +CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y CONFIG_SWIOTLB=y +CONFIG_DMA_COHERENT_POOL=y CONFIG_DMA_CMA=y # CONFIG_DMA_PERNUMA_CMA is not set