diff --git a/packages/kernel/linux/patches/AMD64/001-patch-6.1-rc7-rt5.patch b/packages/kernel/linux/patches/AMD64/001-patch-6.1-rc7-rt5.patch
deleted file mode 100644
index e69de29bb..000000000
diff --git a/packages/kernel/linux/patches/AMD64/001-patch-6.3.3-rt15.patch b/packages/kernel/linux/patches/AMD64/001-patch-6.3.3-rt15.patch
new file mode 100644
index 000000000..1228bf9fd
--- /dev/null
+++ b/packages/kernel/linux/patches/AMD64/001-patch-6.3.3-rt15.patch
@@ -0,0 +1,7875 @@
+diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
+index e24a9820e12fa..ba2cf1cec3d9c 100644
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -34,6 +34,7 @@ config ARM
+ 	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
+ 	select ARCH_SUPPORTS_ATOMIC_RMW
+ 	select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
++	select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
+ 	select ARCH_USE_BUILTIN_BSWAP
+ 	select ARCH_USE_CMPXCHG_LOCKREF
+ 	select ARCH_USE_MEMTEST
+@@ -72,7 +73,7 @@ config ARM
+ 	select HARDIRQS_SW_RESEND
+ 	select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
+ 	select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
+-	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
++	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT
+ 	select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL
+ 	select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
+ 	select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
+@@ -117,6 +118,8 @@ config ARM
+ 	select HAVE_PERF_EVENTS
+ 	select HAVE_PERF_REGS
+ 	select HAVE_PERF_USER_STACK_DUMP
++	select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
++	select HAVE_PREEMPT_LAZY
+ 	select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
+ 	select HAVE_REGS_AND_STACK_ACCESS_API
+ 	select HAVE_RSEQ
+diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
+index 7f092cb55a417..ffcbf8ebed4bf 100644
+--- a/arch/arm/include/asm/thread_info.h
++++ b/arch/arm/include/asm/thread_info.h
+@@ -62,6 +62,7 @@ struct cpu_context_save {
+ struct thread_info {
+ 	unsigned long		flags;		/* low level flags */
+ 	int			preempt_count;	/* 0 => preemptable, <0 => bug */
++	int			preempt_lazy_count; /* 0 => preemptable, <0 => bug */
+ 	__u32			cpu;		/* cpu */
+ 	__u32			cpu_domain;	/* cpu domain */
+ 	struct cpu_context_save	cpu_context;	/* cpu context */
+@@ -129,6 +130,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+ #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
+ #define TIF_UPROBE		3	/* breakpointed or singlestepping */
+ #define TIF_NOTIFY_SIGNAL	4	/* signal notifications exist */
++#define TIF_NEED_RESCHED_LAZY	5
+ 
+ #define TIF_USING_IWMMXT	17
+ #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
+@@ -148,6 +150,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+ #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
+ #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+ #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
++#define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
+ #define _TIF_USING_IWMMXT	(1 << TIF_USING_IWMMXT)
+ 
+ /* Checks for any syscall work in entry-common.S */
+@@ -157,7 +160,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+ /*
+  * Change these and you break ASM code in entry-common.S
+  */
+-#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
++#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
++				 _TIF_SIGPENDING | \
+ 				 _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
+ 				 _TIF_NOTIFY_SIGNAL)
+ 
+diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
+index 38121c59cbc26..c6fafd53d5bea 100644
+--- a/arch/arm/kernel/asm-offsets.c
++++ b/arch/arm/kernel/asm-offsets.c
+@@ -43,6 +43,7 @@ int main(void)
+   BLANK();
+   DEFINE(TI_FLAGS,		offsetof(struct thread_info, flags));
+   DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
++  DEFINE(TI_PREEMPT_LAZY,	offsetof(struct thread_info, preempt_lazy_count));
+   DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
+   DEFINE(TI_CPU_DOMAIN,		offsetof(struct thread_info, cpu_domain));
+   DEFINE(TI_CPU_SAVE,		offsetof(struct thread_info, cpu_context));
+diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
+index c39303e5c2347..cfb4660e9feab 100644
+--- a/arch/arm/kernel/entry-armv.S
++++ b/arch/arm/kernel/entry-armv.S
+@@ -222,11 +222,18 @@ ENDPROC(__dabt_svc)
+ 
+ #ifdef CONFIG_PREEMPTION
+ 	ldr	r8, [tsk, #TI_PREEMPT]		@ get preempt count
+-	ldr	r0, [tsk, #TI_FLAGS]		@ get flags
+ 	teq	r8, #0				@ if preempt count != 0
++	bne	1f				@ return from exeption
++	ldr	r0, [tsk, #TI_FLAGS]		@ get flags
++	tst	r0, #_TIF_NEED_RESCHED		@ if NEED_RESCHED is set
++	blne	svc_preempt			@ preempt!
++
++	ldr	r8, [tsk, #TI_PREEMPT_LAZY]	@ get preempt lazy count
++	teq	r8, #0				@ if preempt lazy count != 0
+ 	movne	r0, #0				@ force flags to 0
+-	tst	r0, #_TIF_NEED_RESCHED
++	tst	r0, #_TIF_NEED_RESCHED_LAZY
+ 	blne	svc_preempt
++1:
+ #endif
+ 
+ 	svc_exit r5, irq = 1			@ return from exception
+@@ -241,8 +248,14 @@ ENDPROC(__irq_svc)
+ 1:	bl	preempt_schedule_irq		@ irq en/disable is done inside
+ 	ldr	r0, [tsk, #TI_FLAGS]		@ get new tasks TI_FLAGS
+ 	tst	r0, #_TIF_NEED_RESCHED
++	bne	1b
++	tst	r0, #_TIF_NEED_RESCHED_LAZY
+ 	reteq	r8				@ go again
+-	b	1b
++	ldr	r0, [tsk, #TI_PREEMPT_LAZY]	@ get preempt lazy count
++	teq	r0, #0				@ if preempt lazy count != 0
++	beq	1b
++	ret	r8				@ go again
++
+ #endif
+ 
+ __und_fault:
+diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
+index e07f359254c3c..b50a3248e79f3 100644
+--- a/arch/arm/kernel/signal.c
++++ b/arch/arm/kernel/signal.c
+@@ -607,7 +607,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
+ 	 */
+ 	trace_hardirqs_off();
+ 	do {
+-		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
++		if (likely(thread_flags & (_TIF_NEED_RESCHED |
++					   _TIF_NEED_RESCHED_LAZY))) {
+ 			schedule();
+ 		} else {
+ 			if (unlikely(!user_mode(regs)))
+diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
+index 2418f1efabd87..79ab2138ab0a8 100644
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -436,6 +436,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
+ 	if (addr < TASK_SIZE)
+ 		return do_page_fault(addr, fsr, regs);
+ 
++	if (interrupts_enabled(regs))
++		local_irq_enable();
++
+ 	if (user_mode(regs))
+ 		goto bad_area;
+ 
+@@ -506,6 +509,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
+ static int
+ do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ {
++	if (interrupts_enabled(regs))
++		local_irq_enable();
++
+ 	do_bad_area(addr, fsr, regs);
+ 	return 0;
+ }
+diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
+index 1023e896d46b8..29fcf54cf68ad 100644
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -95,6 +95,7 @@ config ARM64
+ 	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
+ 	select ARCH_SUPPORTS_NUMA_BALANCING
+ 	select ARCH_SUPPORTS_PAGE_TABLE_CHECK
++	select ARCH_SUPPORTS_RT
+ 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
+ 	select ARCH_WANT_DEFAULT_BPF_JIT
+ 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+@@ -207,6 +208,7 @@ config ARM64
+ 	select HAVE_PERF_USER_STACK_DUMP
+ 	select HAVE_PREEMPT_DYNAMIC_KEY
+ 	select HAVE_REGS_AND_STACK_ACCESS_API
++	select HAVE_PREEMPT_LAZY
+ 	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
+ 	select HAVE_FUNCTION_ARG_ACCESS_API
+ 	select MMU_GATHER_RCU_TABLE_FREE
+diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
+index 0159b625cc7f0..a5486918e5eeb 100644
+--- a/arch/arm64/include/asm/preempt.h
++++ b/arch/arm64/include/asm/preempt.h
+@@ -71,13 +71,36 @@ static inline bool __preempt_count_dec_and_test(void)
+ 	 * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
+ 	 * pair.
+ 	 */
+-	return !pc || !READ_ONCE(ti->preempt_count);
++	if (!pc || !READ_ONCE(ti->preempt_count))
++		return true;
++#ifdef CONFIG_PREEMPT_LAZY
++	if ((pc & ~PREEMPT_NEED_RESCHED))
++		return false;
++	if (current_thread_info()->preempt_lazy_count)
++		return false;
++	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
++#else
++	return false;
++#endif
+ }
+ 
+ static inline bool should_resched(int preempt_offset)
+ {
++#ifdef CONFIG_PREEMPT_LAZY
++	u64 pc = READ_ONCE(current_thread_info()->preempt_count);
++	if (pc == preempt_offset)
++		return true;
++
++	if ((pc & ~PREEMPT_NEED_RESCHED) != preempt_offset)
++		return false;
++
++	if (current_thread_info()->preempt_lazy_count)
++		return false;
++	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
++#else
+ 	u64 pc = READ_ONCE(current_thread_info()->preempt_count);
+ 	return pc == preempt_offset;
++#endif
+ }
+ 
+ #ifdef CONFIG_PREEMPTION
+diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
+index 848739c15de82..4b7148fd5551f 100644
+--- a/arch/arm64/include/asm/thread_info.h
++++ b/arch/arm64/include/asm/thread_info.h
+@@ -26,6 +26,7 @@ struct thread_info {
+ #ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ 	u64			ttbr0;		/* saved TTBR0_EL1 */
+ #endif
++	int			preempt_lazy_count;	/* 0 => preemptable, <0 => bug */
+ 	union {
+ 		u64		preempt_count;	/* 0 => preemptible, <0 => bug */
+ 		struct {
+@@ -68,6 +69,7 @@ int arch_dup_task_struct(struct task_struct *dst,
+ #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
+ #define TIF_MTE_ASYNC_FAULT	5	/* MTE Asynchronous Tag Check Fault */
+ #define TIF_NOTIFY_SIGNAL	6	/* signal notifications exist */
++#define TIF_NEED_RESCHED_LAZY	7
+ #define TIF_SYSCALL_TRACE	8	/* syscall trace active */
+ #define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
+ #define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
+@@ -100,8 +102,10 @@ int arch_dup_task_struct(struct task_struct *dst,
+ #define _TIF_SVE		(1 << TIF_SVE)
+ #define _TIF_MTE_ASYNC_FAULT	(1 << TIF_MTE_ASYNC_FAULT)
+ #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
++#define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
+ 
+-#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
++#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
++				 _TIF_SIGPENDING | \
+ 				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
+ 				 _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
+ 				 _TIF_NOTIFY_SIGNAL)
+@@ -110,6 +114,8 @@ int arch_dup_task_struct(struct task_struct *dst,
+ 				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
+ 				 _TIF_SYSCALL_EMU)
+ 
++#define _TIF_NEED_RESCHED_MASK	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
++
+ #ifdef CONFIG_SHADOW_CALL_STACK
+ #define INIT_SCS							\
+ 	.scs_base	= init_shadow_call_stack,			\
+diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
+index ae345b06e9f7e..da7e10ff11a81 100644
+--- a/arch/arm64/kernel/asm-offsets.c
++++ b/arch/arm64/kernel/asm-offsets.c
+@@ -33,6 +33,7 @@ int main(void)
+   DEFINE(TSK_TI_CPU,		offsetof(struct task_struct, thread_info.cpu));
+   DEFINE(TSK_TI_FLAGS,		offsetof(struct task_struct, thread_info.flags));
+   DEFINE(TSK_TI_PREEMPT,	offsetof(struct task_struct, thread_info.preempt_count));
++  DEFINE(TSK_TI_PREEMPT_LAZY,	offsetof(struct task_struct, thread_info.preempt_lazy_count));
+ #ifdef CONFIG_ARM64_SW_TTBR0_PAN
+   DEFINE(TSK_TI_TTBR0,		offsetof(struct task_struct, thread_info.ttbr0));
+ #endif
+diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
+index 06a02707f4882..e7f5284d5daf1 100644
+--- a/arch/arm64/kernel/signal.c
++++ b/arch/arm64/kernel/signal.c
+@@ -1278,7 +1278,7 @@ static void do_signal(struct pt_regs *regs)
+ void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
+ {
+ 	do {
+-		if (thread_flags & _TIF_NEED_RESCHED) {
++		if (thread_flags & _TIF_NEED_RESCHED_MASK) {
+ 			/* Unmask Debug and SError for the next task */
+ 			local_daif_restore(DAIF_PROCCTX_NOIRQ);
+ 
+diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
+index a6c4407d3ec83..25f98b854d32f 100644
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -154,6 +154,7 @@ config PPC
+ 	select ARCH_STACKWALK
+ 	select ARCH_SUPPORTS_ATOMIC_RMW
+ 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC	if PPC_BOOK3S || PPC_8xx || 40x
++	select ARCH_SUPPORTS_RT			if HAVE_POSIX_CPU_TIMERS_TASK_WORK
+ 	select ARCH_USE_BUILTIN_BSWAP
+ 	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
+ 	select ARCH_USE_MEMTEST
+@@ -247,8 +248,10 @@ config PPC
+ 	select HAVE_PERF_EVENTS_NMI		if PPC64
+ 	select HAVE_PERF_REGS
+ 	select HAVE_PERF_USER_STACK_DUMP
++	select HAVE_PREEMPT_LAZY
+ 	select HAVE_REGS_AND_STACK_ACCESS_API
+ 	select HAVE_RELIABLE_STACKTRACE
++	select HAVE_POSIX_CPU_TIMERS_TASK_WORK	if !KVM
+ 	select HAVE_RSEQ
+ 	select HAVE_SETUP_PER_CPU_AREA		if PPC64
+ 	select HAVE_SOFTIRQ_ON_OWN_STACK
+diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h
+index 283c346478565..4727f40052ddd 100644
+--- a/arch/powerpc/include/asm/stackprotector.h
++++ b/arch/powerpc/include/asm/stackprotector.h
+@@ -19,8 +19,13 @@
+  */
+ static __always_inline void boot_init_stack_canary(void)
+ {
+-	unsigned long canary = get_random_canary();
++	unsigned long canary;
+ 
++#ifndef CONFIG_PREEMPT_RT
++	canary = get_random_canary();
++#else
++	canary = ((unsigned long)&canary) & CANARY_MASK;
++#endif
+ 	current->stack_canary = canary;
+ #ifdef CONFIG_PPC64
+ 	get_paca()->canary = canary;
+diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
+index af58f1ed3952e..520864de8bb27 100644
+--- a/arch/powerpc/include/asm/thread_info.h
++++ b/arch/powerpc/include/asm/thread_info.h
+@@ -53,6 +53,8 @@
+ struct thread_info {
+ 	int		preempt_count;		/* 0 => preemptable,
+ 						   <0 => BUG */
++	int		preempt_lazy_count;	/* 0 => preemptable,
++						   <0 => BUG */
+ #ifdef CONFIG_SMP
+ 	unsigned int	cpu;
+ #endif
+@@ -77,6 +79,7 @@ struct thread_info {
+ #define INIT_THREAD_INFO(tsk)			\
+ {						\
+ 	.preempt_count = INIT_PREEMPT_COUNT,	\
++	.preempt_lazy_count = 0,		\
+ 	.flags =	0,			\
+ }
+ 
+@@ -102,6 +105,7 @@ void arch_setup_new_exec(void);
+ #define TIF_PATCH_PENDING	6	/* pending live patching update */
+ #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
+ #define TIF_SINGLESTEP		8	/* singlestepping active */
++#define TIF_NEED_RESCHED_LAZY	9	/* lazy rescheduling necessary */
+ #define TIF_SECCOMP		10	/* secure computing */
+ #define TIF_RESTOREALL		11	/* Restore all regs (implies NOERROR) */
+ #define TIF_NOERROR		12	/* Force successful syscall return */
+@@ -117,6 +121,7 @@ void arch_setup_new_exec(void);
+ #define TIF_POLLING_NRFLAG	19	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+ #define TIF_32BIT		20	/* 32 bit binary */
+ 
++
+ /* as above, but as bit values */
+ #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+ #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
+@@ -128,6 +133,7 @@ void arch_setup_new_exec(void);
+ #define _TIF_PATCH_PENDING	(1<<TIF_PATCH_PENDING)
+ #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+ #define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
++#define _TIF_NEED_RESCHED_LAZY	(1<<TIF_NEED_RESCHED_LAZY)
+ #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+ #define _TIF_RESTOREALL		(1<<TIF_RESTOREALL)
+ #define _TIF_NOERROR		(1<<TIF_NOERROR)
+@@ -141,10 +147,12 @@ void arch_setup_new_exec(void);
+ 				 _TIF_SYSCALL_EMU)
+ 
+ #define _TIF_USER_WORK_MASK	(_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
++				 _TIF_NEED_RESCHED_LAZY | \
+ 				 _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
+ 				 _TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
+ 				 _TIF_NOTIFY_SIGNAL)
+ #define _TIF_PERSYSCALL_MASK	(_TIF_RESTOREALL|_TIF_NOERROR)
++#define _TIF_NEED_RESCHED_MASK	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+ 
+ /* Bits in local_flags */
+ /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
+diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
+index 0ec1581619db5..e333feb12c984 100644
+--- a/arch/powerpc/kernel/interrupt.c
++++ b/arch/powerpc/kernel/interrupt.c
+@@ -186,7 +186,7 @@ interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
+ 	ti_flags = read_thread_flags();
+ 	while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+ 		local_irq_enable();
+-		if (ti_flags & _TIF_NEED_RESCHED) {
++		if (ti_flags & _TIF_NEED_RESCHED_MASK) {
+ 			schedule();
+ 		} else {
+ 			/*
+@@ -398,11 +398,15 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
+ 		/* Returning to a kernel context with local irqs enabled. */
+ 		WARN_ON_ONCE(!(regs->msr & MSR_EE));
+ again:
+-		if (IS_ENABLED(CONFIG_PREEMPT)) {
++		if (IS_ENABLED(CONFIG_PREEMPTION)) {
+ 			/* Return to preemptible kernel context */
+ 			if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) {
+ 				if (preempt_count() == 0)
+ 					preempt_schedule_irq();
++			} else if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED_LAZY)) {
++				if ((preempt_count() == 0) &&
++				    (current_thread_info()->preempt_lazy_count == 0))
++					preempt_schedule_irq();
+ 			}
+ 		}
+ 
+diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
+index 9bdd79aa51cfc..038f8355b29ca 100644
+--- a/arch/powerpc/kernel/traps.c
++++ b/arch/powerpc/kernel/traps.c
+@@ -261,12 +261,17 @@ static char *get_mmu_str(void)
+ 
+ static int __die(const char *str, struct pt_regs *regs, long err)
+ {
++	const char *pr = "";
++
+ 	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
+ 
++	if (IS_ENABLED(CONFIG_PREEMPTION))
++		pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
++
+ 	printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
+ 	       IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
+ 	       PAGE_SIZE / 1024, get_mmu_str(),
+-	       IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
++	       pr,
+ 	       IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
+ 	       IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
+ 	       debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
+diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
+index a9f57dad6d916..a0b528d4bb7cd 100644
+--- a/arch/powerpc/kvm/Kconfig
++++ b/arch/powerpc/kvm/Kconfig
+@@ -225,6 +225,7 @@ config KVM_E500MC
+ config KVM_MPIC
+ 	bool "KVM in-kernel MPIC emulation"
+ 	depends on KVM && PPC_E500
++	depends on !PREEMPT_RT
+ 	select HAVE_KVM_IRQCHIP
+ 	select HAVE_KVM_IRQFD
+ 	select HAVE_KVM_IRQ_ROUTING
+diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
+index 9d229ef7f86ef..ada817c49b722 100644
+--- a/arch/powerpc/perf/imc-pmu.c
++++ b/arch/powerpc/perf/imc-pmu.c
+@@ -51,7 +51,7 @@ static int trace_imc_mem_size;
+  * core and trace-imc
+  */
+ static struct imc_pmu_ref imc_global_refc = {
+-	.lock = __SPIN_LOCK_INITIALIZER(imc_global_refc.lock),
++	.lock = __SPIN_LOCK_UNLOCKED(imc_global_refc.lock),
+ 	.id = 0,
+ 	.refc = 0,
+ };
+diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
+index 21b22bf16ce66..b506377a16a74 100644
+--- a/arch/powerpc/platforms/pseries/Kconfig
++++ b/arch/powerpc/platforms/pseries/Kconfig
+@@ -2,6 +2,7 @@
+ config PPC_PSERIES
+ 	depends on PPC64 && PPC_BOOK3S
+ 	bool "IBM pSeries & new (POWER5-based) iSeries"
++	select GENERIC_ALLOCATOR
+ 	select HAVE_PCSPKR_PLATFORM
+ 	select MPIC
+ 	select OF_DYNAMIC
+diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
+index c74b71d4733d4..64ba14baabd30 100644
+--- a/arch/powerpc/platforms/pseries/iommu.c
++++ b/arch/powerpc/platforms/pseries/iommu.c
+@@ -24,6 +24,7 @@
+ #include <linux/of.h>
+ #include <linux/iommu.h>
+ #include <linux/rculist.h>
++#include <linux/local_lock.h>
+ #include <asm/io.h>
+ #include <asm/prom.h>
+ #include <asm/rtas.h>
+@@ -195,7 +196,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
+ 	return ret;
+ }
+ 
+-static DEFINE_PER_CPU(__be64 *, tce_page);
++struct tce_page {
++	__be64 * page;
++	local_lock_t lock;
++};
++static DEFINE_PER_CPU(struct tce_page, tce_page) = {
++	.lock = INIT_LOCAL_LOCK(lock),
++};
+ 
+ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ 				     long npages, unsigned long uaddr,
+@@ -218,9 +225,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ 		                           direction, attrs);
+ 	}
+ 
+-	local_irq_save(flags);	/* to protect tcep and the page behind it */
++	/* to protect tcep and the page behind it */
++	local_lock_irqsave(&tce_page.lock, flags);
+ 
+-	tcep = __this_cpu_read(tce_page);
++	tcep = __this_cpu_read(tce_page.page);
+ 
+ 	/* This is safe to do since interrupts are off when we're called
+ 	 * from iommu_alloc{,_sg}()
+@@ -229,12 +237,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ 		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
+ 		/* If allocation fails, fall back to the loop implementation */
+ 		if (!tcep) {
+-			local_irq_restore(flags);
++			local_unlock_irqrestore(&tce_page.lock, flags);
+ 			return tce_build_pSeriesLP(tbl->it_index, tcenum,
+ 					tceshift,
+ 					npages, uaddr, direction, attrs);
+ 		}
+-		__this_cpu_write(tce_page, tcep);
++		__this_cpu_write(tce_page.page, tcep);
+ 	}
+ 
+ 	rpn = __pa(uaddr) >> tceshift;
+@@ -264,7 +272,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ 		tcenum += limit;
+ 	} while (npages > 0 && !rc);
+ 
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(&tce_page.lock, flags);
+ 
+ 	if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+ 		ret = (int)rc;
+@@ -440,16 +448,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
+ 				DMA_BIDIRECTIONAL, 0);
+ 	}
+ 
+-	local_irq_disable();	/* to protect tcep and the page behind it */
+-	tcep = __this_cpu_read(tce_page);
++	/* to protect tcep and the page behind it */
++	local_lock_irq(&tce_page.lock);
++	tcep = __this_cpu_read(tce_page.page);
+ 
+ 	if (!tcep) {
+ 		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
+ 		if (!tcep) {
+-			local_irq_enable();
++			local_unlock_irq(&tce_page.lock);
+ 			return -ENOMEM;
+ 		}
+-		__this_cpu_write(tce_page, tcep);
++		__this_cpu_write(tce_page.page, tcep);
+ 	}
+ 
+ 	proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
+@@ -492,7 +501,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
+ 
+ 	/* error cleanup: caller will clear whole range */
+ 
+-	local_irq_enable();
++	local_unlock_irq(&tce_page.lock);
+ 	return rc;
+ }
+ 
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index a825bf031f495..dcbf3c08926b1 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -114,6 +114,7 @@ config X86
+ 	select ARCH_USES_CFI_TRAPS		if X86_64 && CFI_CLANG
+ 	select ARCH_SUPPORTS_LTO_CLANG
+ 	select ARCH_SUPPORTS_LTO_CLANG_THIN
++	select ARCH_SUPPORTS_RT
+ 	select ARCH_USE_BUILTIN_BSWAP
+ 	select ARCH_USE_MEMTEST
+ 	select ARCH_USE_QUEUED_RWLOCKS
+@@ -252,6 +253,7 @@ config X86
+ 	select HAVE_PCI
+ 	select HAVE_PERF_REGS
+ 	select HAVE_PERF_USER_STACK_DUMP
++	select HAVE_PREEMPT_LAZY
+ 	select MMU_GATHER_RCU_TABLE_FREE	if PARAVIRT
+ 	select MMU_GATHER_MERGE_VMAS
+ 	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
+diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
+index 2d13f25b1bd8f..5b096893f6a21 100644
+--- a/arch/x86/include/asm/preempt.h
++++ b/arch/x86/include/asm/preempt.h
+@@ -90,18 +90,49 @@ static __always_inline void __preempt_count_sub(int val)
+  * a decrement which hits zero means we have no preempt_count and should
+  * reschedule.
+  */
+-static __always_inline bool __preempt_count_dec_and_test(void)
++static __always_inline bool ____preempt_count_dec_and_test(void)
+ {
+ 	return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e,
+ 			       __percpu_arg([var]));
+ }
+ 
++static __always_inline bool __preempt_count_dec_and_test(void)
++{
++	if (____preempt_count_dec_and_test())
++		return true;
++#ifdef CONFIG_PREEMPT_LAZY
++	if (preempt_count())
++		return false;
++	if (current_thread_info()->preempt_lazy_count)
++		return false;
++	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
++#else
++	return false;
++#endif
++}
++
+ /*
+  * Returns true when we need to resched and can (barring IRQ state).
+  */
+ static __always_inline bool should_resched(int preempt_offset)
+ {
++#ifdef CONFIG_PREEMPT_LAZY
++	u32 tmp;
++	tmp = raw_cpu_read_4(pcpu_hot.preempt_count);
++	if (tmp == preempt_offset)
++		return true;
++
++	/* preempt count == 0 ? */
++	tmp &= ~PREEMPT_NEED_RESCHED;
++	if (tmp != preempt_offset)
++		return false;
++	/* XXX PREEMPT_LOCK_OFFSET */
++	if (current_thread_info()->preempt_lazy_count)
++		return false;
++	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
++#else
+ 	return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset);
++#endif
+ }
+ 
+ #ifdef CONFIG_PREEMPTION
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
+index f1cccba52eb97..c8697ca0378f4 100644
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -57,6 +57,8 @@ struct thread_info {
+ 	unsigned long		flags;		/* low level flags */
+ 	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
+ 	u32			status;		/* thread synchronous flags */
++	int			preempt_lazy_count;	/* 0 => lazy preemptable
++							  <0 => BUG */
+ #ifdef CONFIG_SMP
+ 	u32			cpu;		/* current CPU */
+ #endif
+@@ -65,6 +67,7 @@ struct thread_info {
+ #define INIT_THREAD_INFO(tsk)			\
+ {						\
+ 	.flags		= 0,			\
++	.preempt_lazy_count	= 0,		\
+ }
+ 
+ #else /* !__ASSEMBLY__ */
+@@ -92,6 +95,7 @@ struct thread_info {
+ #define TIF_NOCPUID		15	/* CPUID is not accessible in userland */
+ #define TIF_NOTSC		16	/* TSC is not accessible in userland */
+ #define TIF_NOTIFY_SIGNAL	17	/* signal notifications exist */
++#define TIF_NEED_RESCHED_LAZY	19	/* lazy rescheduling necessary */
+ #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
+ #define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */
+ #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
+@@ -115,6 +119,7 @@ struct thread_info {
+ #define _TIF_NOCPUID		(1 << TIF_NOCPUID)
+ #define _TIF_NOTSC		(1 << TIF_NOTSC)
+ #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
++#define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
+ #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
+ #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
+ #define _TIF_SPEC_FORCE_UPDATE	(1 << TIF_SPEC_FORCE_UPDATE)
+diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
+index aa490da3cef23..d73b6d32bd827 100644
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -57,6 +57,40 @@ static void zram_free_page(struct zram *zram, size_t index);
+ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
+ 				u32 index, int offset, struct bio *bio);
+ 
++#ifdef CONFIG_PREEMPT_RT
++static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages)
++{
++	size_t index;
++
++	for (index = 0; index < num_pages; index++)
++		spin_lock_init(&zram->table[index].lock);
++}
++
++static int zram_slot_trylock(struct zram *zram, u32 index)
++{
++	int ret;
++
++	ret = spin_trylock(&zram->table[index].lock);
++	if (ret)
++		__set_bit(ZRAM_LOCK, &zram->table[index].flags);
++	return ret;
++}
++
++static void zram_slot_lock(struct zram *zram, u32 index)
++{
++	spin_lock(&zram->table[index].lock);
++	__set_bit(ZRAM_LOCK, &zram->table[index].flags);
++}
++
++static void zram_slot_unlock(struct zram *zram, u32 index)
++{
++	__clear_bit(ZRAM_LOCK, &zram->table[index].flags);
++	spin_unlock(&zram->table[index].lock);
++}
++
++#else
++
++static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { }
+ 
+ static int zram_slot_trylock(struct zram *zram, u32 index)
+ {
+@@ -72,6 +106,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index)
+ {
+ 	bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
+ }
++#endif
+ 
+ static inline bool init_done(struct zram *zram)
+ {
+@@ -1311,6 +1346,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
+ 
+ 	if (!huge_class_size)
+ 		huge_class_size = zs_huge_class_size(zram->mem_pool);
++	zram_meta_init_table_locks(zram, num_pages);
+ 	return true;
+ }
+ 
+diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
+index c5254626f051f..a6b24dfec95fb 100644
+--- a/drivers/block/zram/zram_drv.h
++++ b/drivers/block/zram/zram_drv.h
+@@ -69,6 +69,9 @@ struct zram_table_entry {
+ 		unsigned long element;
+ 	};
+ 	unsigned long flags;
++#ifdef CONFIG_PREEMPT_RT
++	spinlock_t lock;
++#endif
+ #ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ 	ktime_t ac_time;
+ #endif
+diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
+index ed5dabd3c72d6..450d7985ff346 100644
+--- a/drivers/char/tpm/tpm_tis.c
++++ b/drivers/char/tpm/tpm_tis.c
+@@ -50,6 +50,45 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da
+ 	return container_of(data, struct tpm_tis_tcg_phy, priv);
+ }
+ 
++#ifdef CONFIG_PREEMPT_RT
++/*
++ * Flush previous write operations with a dummy read operation to the
++ * TPM MMIO base address.
++ */
++static inline void tpm_tis_flush(void __iomem *iobase)
++{
++	ioread8(iobase + TPM_ACCESS(0));
++}
++#else
++#define tpm_tis_flush(iobase) do { } while (0)
++#endif
++
++/*
++ * Write a byte word to the TPM MMIO address, and flush the write queue.
++ * The flush ensures that the data is sent immediately over the bus and not
++ * aggregated with further requests and transferred later in a batch. The large
++ * write requests can lead to unwanted latency spikes by blocking the CPU until
++ * the complete batch has been transferred.
++ */
++static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr)
++{
++	iowrite8(b, iobase + addr);
++	tpm_tis_flush(iobase);
++}
++
++/*
++ * Write a 32-bit word to the TPM MMIO address, and flush the write queue.
++ * The flush ensures that the data is sent immediately over the bus and not
++ * aggregated with further requests and transferred later in a batch. The large
++ * write requests can lead to unwanted latency spikes by blocking the CPU until
++ * the complete batch has been transferred.
++ */
++static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr)
++{
++	iowrite32(b, iobase + addr);
++	tpm_tis_flush(iobase);
++}
++
+ static int interrupts = -1;
+ module_param(interrupts, int, 0444);
+ MODULE_PARM_DESC(interrupts, "Enable interrupts");
+@@ -186,12 +225,12 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
+ 	switch (io_mode) {
+ 	case TPM_TIS_PHYS_8:
+ 		while (len--)
+-			iowrite8(*value++, phy->iobase + addr);
++			tpm_tis_iowrite8(*value++, phy->iobase, addr);
+ 		break;
+ 	case TPM_TIS_PHYS_16:
+ 		return -EINVAL;
+ 	case TPM_TIS_PHYS_32:
+-		iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase + addr);
++		tpm_tis_iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase, addr);
+ 		break;
+ 	}
+ 
+diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
+index 98f4e44976e09..d4dba1f89fde5 100644
+--- a/drivers/gpu/drm/i915/Kconfig
++++ b/drivers/gpu/drm/i915/Kconfig
+@@ -3,7 +3,6 @@ config DRM_I915
+ 	tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics"
+ 	depends on DRM
+ 	depends on X86 && PCI
+-	depends on !PREEMPT_RT
+ 	select INTEL_GTT if X86
+ 	select INTERVAL_TREE
+ 	# we need shmfs for the swappable backing store, and in particular
+diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c
+index d5b5d40ed817f..710e51d2377fe 100644
+--- a/drivers/gpu/drm/i915/display/intel_crtc.c
++++ b/drivers/gpu/drm/i915/display/intel_crtc.c
+@@ -520,7 +520,8 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state)
+ 	 */
+ 	intel_psr_wait_for_idle_locked(new_crtc_state);
+ 
+-	local_irq_disable();
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		local_irq_disable();
+ 
+ 	crtc->debug.min_vbl = min;
+ 	crtc->debug.max_vbl = max;
+@@ -545,11 +546,13 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state)
+ 			break;
+ 		}
+ 
+-		local_irq_enable();
++		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++			local_irq_enable();
+ 
+ 		timeout = schedule_timeout(timeout);
+ 
+-		local_irq_disable();
++		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++			local_irq_disable();
+ 	}
+ 
+ 	finish_wait(wq, &wait);
+@@ -582,7 +585,8 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state)
+ 	return;
+ 
+ irq_disable:
+-	local_irq_disable();
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		local_irq_disable();
+ }
+ 
+ #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE)
+@@ -691,7 +695,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
+ 	if (new_crtc_state->seamless_m_n && intel_crtc_needs_fastset(new_crtc_state))
+ 		intel_crtc_update_active_timings(new_crtc_state);
+ 
+-	local_irq_enable();
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		local_irq_enable();
+ 
+ 	if (intel_vgpu_active(dev_priv))
+ 		return;
+diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c
+index 4c83e2320bcac..2dd4ac8b30266 100644
+--- a/drivers/gpu/drm/i915/display/intel_vblank.c
++++ b/drivers/gpu/drm/i915/display/intel_vblank.c
+@@ -293,7 +293,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
+ 	 */
+ 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+ 
+-	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_disable();
+ 
+ 	/* Get optional system timestamp before query. */
+ 	if (stime)
+@@ -358,7 +359,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
+ 	if (etime)
+ 		*etime = ktime_get();
+ 
+-	/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_enable();
+ 
+ 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+ 
+diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+index ecc990ec1b952..8d04b10681f0d 100644
+--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
++++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+@@ -312,10 +312,9 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
+ 	/* Kick the work once more to drain the signalers, and disarm the irq */
+ 	irq_work_sync(&b->irq_work);
+ 	while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
+-		local_irq_disable();
+-		signal_irq_work(&b->irq_work);
+-		local_irq_enable();
++		irq_work_queue(&b->irq_work);
+ 		cond_resched();
++		irq_work_sync(&b->irq_work);
+ 	}
+ }
+ 
+diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+index 750326434677f..a2658a8ff7353 100644
+--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
++++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+@@ -1303,7 +1303,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
+ 	 * and context switches) submission.
+ 	 */
+ 
+-	spin_lock(&sched_engine->lock);
++	spin_lock_irq(&sched_engine->lock);
+ 
+ 	/*
+ 	 * If the queue is higher priority than the last
+@@ -1403,7 +1403,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
+ 				 * Even if ELSP[1] is occupied and not worthy
+ 				 * of timeslices, our queue might be.
+ 				 */
+-				spin_unlock(&sched_engine->lock);
++				spin_unlock_irq(&sched_engine->lock);
+ 				return;
+ 			}
+ 		}
+@@ -1429,7 +1429,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
+ 
+ 		if (last && !can_merge_rq(last, rq)) {
+ 			spin_unlock(&ve->base.sched_engine->lock);
+-			spin_unlock(&engine->sched_engine->lock);
++			spin_unlock_irq(&engine->sched_engine->lock);
+ 			return; /* leave this for another sibling */
+ 		}
+ 
+@@ -1591,7 +1591,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
+ 	 */
+ 	sched_engine->queue_priority_hint = queue_prio(sched_engine);
+ 	i915_sched_engine_reset_on_empty(sched_engine);
+-	spin_unlock(&sched_engine->lock);
++	spin_unlock_irq(&sched_engine->lock);
+ 
+ 	/*
+ 	 * We can skip poking the HW if we ended up with exactly the same set
+@@ -1617,13 +1617,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
+ 	}
+ }
+ 
+-static void execlists_dequeue_irq(struct intel_engine_cs *engine)
+-{
+-	local_irq_disable(); /* Suspend interrupts across request submission */
+-	execlists_dequeue(engine);
+-	local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
+-}
+-
+ static void clear_ports(struct i915_request **ports, int count)
+ {
+ 	memset_p((void **)ports, NULL, count);
+@@ -2477,7 +2470,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
+ 	}
+ 
+ 	if (!engine->execlists.pending[0]) {
+-		execlists_dequeue_irq(engine);
++		execlists_dequeue(engine);
+ 		start_timeslice(engine);
+ 	}
+ 
+diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
+index 7503dcb9043bb..799fb8083470f 100644
+--- a/drivers/gpu/drm/i915/i915_request.c
++++ b/drivers/gpu/drm/i915/i915_request.c
+@@ -613,7 +613,6 @@ bool __i915_request_submit(struct i915_request *request)
+ 
+ 	RQ_TRACE(request, "\n");
+ 
+-	GEM_BUG_ON(!irqs_disabled());
+ 	lockdep_assert_held(&engine->sched_engine->lock);
+ 
+ 	/*
+@@ -722,7 +721,6 @@ void __i915_request_unsubmit(struct i915_request *request)
+ 	 */
+ 	RQ_TRACE(request, "\n");
+ 
+-	GEM_BUG_ON(!irqs_disabled());
+ 	lockdep_assert_held(&engine->sched_engine->lock);
+ 
+ 	/*
+diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
+index f6f9228a13518..0ff1b60be8382 100644
+--- a/drivers/gpu/drm/i915/i915_trace.h
++++ b/drivers/gpu/drm/i915/i915_trace.h
+@@ -6,6 +6,10 @@
+ #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+ #define _I915_TRACE_H_
+ 
++#ifdef CONFIG_PREEMPT_RT
++#define NOTRACE
++#endif
++
+ #include <linux/stringify.h>
+ #include <linux/types.h>
+ #include <linux/tracepoint.h>
+@@ -322,7 +326,7 @@ DEFINE_EVENT(i915_request, i915_request_add,
+ 	     TP_ARGS(rq)
+ );
+ 
+-#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS)
++#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE)
+ DEFINE_EVENT(i915_request, i915_request_guc_submit,
+ 	     TP_PROTO(struct i915_request *rq),
+ 	     TP_ARGS(rq)
+diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
+index 2c430c0c3badd..7ec828637d622 100644
+--- a/drivers/gpu/drm/i915/i915_utils.h
++++ b/drivers/gpu/drm/i915/i915_utils.h
+@@ -288,7 +288,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
+ #define wait_for(COND, MS)		_wait_for((COND), (MS) * 1000, 10, 1000)
+ 
+ /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
+-#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT)
++#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT)
+ # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
+ #else
+ # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
+diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
+index 1e8fe44a7099f..b336d00d7988e 100644
+--- a/drivers/tty/serial/8250/8250.h
++++ b/drivers/tty/serial/8250/8250.h
+@@ -177,12 +177,277 @@ static inline void serial_dl_write(struct uart_8250_port *up, int value)
+ 	up->dl_write(up, value);
+ }
+ 
++static inline bool serial8250_is_console(struct uart_port *port)
++{
++	return uart_console(port) && !hlist_unhashed_lockless(&port->cons->node);
++}
++
++/**
++ * serial8250_init_wctxt - Initialize a write context for
++ *	non-console-printing usage
++ * @wctxt:	The write context to initialize
++ * @cons:	The console to assign to the write context
++ *
++ * In order to mark an unsafe region, drivers must acquire the console. This
++ * requires providing an initialized write context (even if that driver will
++ * not be doing any printing).
++ *
++ * This function should not be used for console printing contexts.
++ */
++static inline void serial8250_init_wctxt(struct cons_write_context *wctxt,
++					 struct console *cons)
++{
++	struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
++
++	memset(wctxt, 0, sizeof(*wctxt));
++	ctxt->console = cons;
++	ctxt->prio = CONS_PRIO_NORMAL;
++}
++
++/**
++ * __serial8250_console_acquire - Acquire a console for
++ *	non-console-printing usage
++ * @wctxt:	An uninitialized write context to use for acquiring
++ * @cons:	The console to assign to the write context
++ *
++ * The caller is holding the port->lock.
++ * The caller is holding the console_srcu_read_lock.
++ *
++ * This function should not be used for console printing contexts.
++ */
++static inline void __serial8250_console_acquire(struct cons_write_context *wctxt,
++						struct console *cons)
++{
++	for (;;) {
++		serial8250_init_wctxt(wctxt, cons);
++		if (console_try_acquire(wctxt))
++			break;
++		cpu_relax();
++	}
++}
++
++/**
++ * serial8250_enter_unsafe - Mark the beginning of an unsafe region for
++ *		non-console-printing usage
++ * @up:	The port that is entering the unsafe state
++ *
++ * The caller should ensure @up is a console before calling this function.
++ *
++ * The caller is holding the port->lock.
++ * This function takes the console_srcu_read_lock and becomes owner of the
++ * console associated with @up.
++ *
++ * This function should not be used for console printing contexts.
++ */
++static inline void serial8250_enter_unsafe(struct uart_8250_port *up)
++{
++	struct uart_port *port = &up->port;
++
++	lockdep_assert_held_once(&port->lock);
++
++	for (;;) {
++		up->cookie = console_srcu_read_lock();
++
++		__serial8250_console_acquire(&up->wctxt, port->cons);
++
++		if (console_enter_unsafe(&up->wctxt))
++			break;
++
++		console_srcu_read_unlock(up->cookie);
++		cpu_relax();
++	}
++}
++
++/**
++ * serial8250_exit_unsafe - Mark the end of an unsafe region for
++ *		non-console-printing usage
++ * @up:	The port that is exiting the unsafe state
++ *
++ * The caller is holding the port->lock.
++ * This function releases ownership of the console associated with @up and
++ * releases the console_srcu_read_lock.
++ *
++ * This function should not be used for console printing contexts.
++ */
++static inline void serial8250_exit_unsafe(struct uart_8250_port *up)
++{
++	struct uart_port *port = &up->port;
++
++	lockdep_assert_held_once(&port->lock);
++
++	if (console_exit_unsafe(&up->wctxt))
++		console_release(&up->wctxt);
++
++	console_srcu_read_unlock(up->cookie);
++}
++
++/**
++ * serial8250_in_IER - Read the IER register for
++ *		non-console-printing usage
++ * @up:	The port to work on
++ *
++ * Returns:	The value read from IER
++ *
++ * The caller is holding the port->lock.
++ *
++ * This is the top-level function for non-console-printing contexts to
++ * read the IER register. The caller does not need to care if @up is a
++ * console before calling this function.
++ *
++ * This function should not be used for printing contexts.
++ */
++static inline int serial8250_in_IER(struct uart_8250_port *up)
++{
++	struct uart_port *port = &up->port;
++	bool is_console;
++	int ier;
++
++	is_console = serial8250_is_console(port);
++
++	if (is_console)
++		serial8250_enter_unsafe(up);
++
++	ier = serial_in(up, UART_IER);
++
++	if (is_console)
++		serial8250_exit_unsafe(up);
++
++	return ier;
++}
++
++/**
++ * __serial8250_set_IER - Directly write to the IER register
++ * @up:		The port to work on
++ * @wctxt:	The current write context
++ * @ier:	The value to write
++ *
++ * Returns:	True if IER was written to. False otherwise
++ *
++ * The caller is holding the port->lock.
++ * The caller is holding the console_srcu_read_unlock.
++ * The caller is the owner of the console associated with @up.
++ *
++ * This function should only be directly called within console printing
++ * contexts. Other contexts should use serial8250_set_IER().
++ */
++static inline bool __serial8250_set_IER(struct uart_8250_port *up,
++					struct cons_write_context *wctxt,
++					int ier)
++{
++	if (wctxt && !console_can_proceed(wctxt))
++		return false;
++	serial_out(up, UART_IER, ier);
++	return true;
++}
++
++/**
++ * serial8250_set_IER - Write a new value to the IER register for
++ *	non-console-printing usage
++ * @up:		The port to work on
++ * @ier:	The value to write
++ *
++ * The caller is holding the port->lock.
++ *
++ * This is the top-level function for non-console-printing contexts to
++ * write to the IER register. The caller does not need to care if @up is a
++ * console before calling this function.
++ *
++ * This function should not be used for printing contexts.
++ */
++static inline void serial8250_set_IER(struct uart_8250_port *up, int ier)
++{
++	struct uart_port *port = &up->port;
++	bool is_console;
++
++	is_console = serial8250_is_console(port);
++
++	if (is_console) {
++		serial8250_enter_unsafe(up);
++		while (!__serial8250_set_IER(up, &up->wctxt, ier)) {
++			console_srcu_read_unlock(up->cookie);
++			console_enter_unsafe(&up->wctxt);
++		}
++		serial8250_exit_unsafe(up);
++	} else {
++		__serial8250_set_IER(up, NULL, ier);
++	}
++}
++
++/**
++ * __serial8250_clear_IER - Directly clear the IER register
++ * @up:		The port to work on
++ * @wctxt:	The current write context
++ * @prior:	Gets set to the previous value of IER
++ *
++ * Returns:	True if IER was cleared and @prior points to the previous
++ *		value of IER. False otherwise and @prior is invalid
++ *
++ * The caller is holding the port->lock.
++ * The caller is holding the console_srcu_read_unlock.
++ * The caller is the owner of the console associated with @up.
++ *
++ * This function should only be directly called within console printing
++ * contexts. Other contexts should use serial8250_clear_IER().
++ */
++static inline bool __serial8250_clear_IER(struct uart_8250_port *up,
++					  struct cons_write_context *wctxt,
++					  int *prior)
++{
++	unsigned int clearval = 0;
++
++	if (up->capabilities & UART_CAP_UUE)
++		clearval = UART_IER_UUE;
++
++	*prior = serial_in(up, UART_IER);
++	if (wctxt && !console_can_proceed(wctxt))
++		return false;
++	serial_out(up, UART_IER, clearval);
++	return true;
++}
++
++/**
++ * serial8250_clear_IER - Clear the IER register for
++ *		non-console-printing usage
++ * @up:	The port to work on
++ *
++ * Returns:	The previous value of IER
++ *
++ * The caller is holding the port->lock.
++ *
++ * This is the top-level function for non-console-printing contexts to
++ * clear the IER register. The caller does not need to care if @up is a
++ * console before calling this function.
++ *
++ * This function should not be used for printing contexts.
++ */
++static inline int serial8250_clear_IER(struct uart_8250_port *up)
++{
++	struct uart_port *port = &up->port;
++	bool is_console;
++	int prior;
++
++	is_console = serial8250_is_console(port);
++
++	if (is_console) {
++		serial8250_enter_unsafe(up);
++		while (!__serial8250_clear_IER(up, &up->wctxt, &prior)) {
++			console_srcu_read_unlock(up->cookie);
++			console_enter_unsafe(&up->wctxt);
++		}
++		serial8250_exit_unsafe(up);
++	} else {
++		__serial8250_clear_IER(up, NULL, &prior);
++	}
++
++	return prior;
++}
++
+ static inline bool serial8250_set_THRI(struct uart_8250_port *up)
+ {
+ 	if (up->ier & UART_IER_THRI)
+ 		return false;
+ 	up->ier |= UART_IER_THRI;
+-	serial_out(up, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ 	return true;
+ }
+ 
+@@ -191,7 +456,7 @@ static inline bool serial8250_clear_THRI(struct uart_8250_port *up)
+ 	if (!(up->ier & UART_IER_THRI))
+ 		return false;
+ 	up->ier &= ~UART_IER_THRI;
+-	serial_out(up, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ 	return true;
+ }
+ 
+diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c
+index 9d2a7856784f7..7cc6b527c088b 100644
+--- a/drivers/tty/serial/8250/8250_aspeed_vuart.c
++++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c
+@@ -278,7 +278,7 @@ static void __aspeed_vuart_set_throttle(struct uart_8250_port *up,
+ 	up->ier &= ~irqs;
+ 	if (!throttle)
+ 		up->ier |= irqs;
+-	serial_out(up, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ }
+ static void aspeed_vuart_set_throttle(struct uart_port *port, bool throttle)
+ {
+diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c
+index f801b1f5b46c0..a29f5f45d22f2 100644
+--- a/drivers/tty/serial/8250/8250_bcm7271.c
++++ b/drivers/tty/serial/8250/8250_bcm7271.c
+@@ -606,8 +606,10 @@ static int brcmuart_startup(struct uart_port *port)
+ 	 * Disable the Receive Data Interrupt because the DMA engine
+ 	 * will handle this.
+ 	 */
++	spin_lock_irq(&port->lock);
+ 	up->ier &= ~UART_IER_RDI;
+-	serial_port_out(port, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
++	spin_unlock_irq(&port->lock);
+ 
+ 	priv->tx_running = false;
+ 	priv->dma.rx_dma = NULL;
+@@ -787,6 +789,12 @@ static int brcmuart_handle_irq(struct uart_port *p)
+ 		spin_lock_irqsave(&p->lock, flags);
+ 		status = serial_port_in(p, UART_LSR);
+ 		if ((status & UART_LSR_DR) == 0) {
++			bool is_console;
++
++			is_console = serial8250_is_console(p);
++
++			if (is_console)
++				serial8250_enter_unsafe(up);
+ 
+ 			ier = serial_port_in(p, UART_IER);
+ 			/*
+@@ -807,6 +815,9 @@ static int brcmuart_handle_irq(struct uart_port *p)
+ 				serial_port_in(p, UART_RX);
+ 			}
+ 
++			if (is_console)
++				serial8250_exit_unsafe(up);
++
+ 			handled = 1;
+ 		}
+ 		spin_unlock_irqrestore(&p->lock, flags);
+@@ -844,12 +855,22 @@ static enum hrtimer_restart brcmuart_hrtimer_func(struct hrtimer *t)
+ 	/* re-enable receive unless upper layer has disabled it */
+ 	if ((up->ier & (UART_IER_RLSI | UART_IER_RDI)) ==
+ 	    (UART_IER_RLSI | UART_IER_RDI)) {
++		bool is_console;
++
++		is_console = serial8250_is_console(p);
++
++		if (is_console)
++			serial8250_enter_unsafe(up);
++
+ 		status = serial_port_in(p, UART_IER);
+ 		status |= (UART_IER_RLSI | UART_IER_RDI);
+ 		serial_port_out(p, UART_IER, status);
+ 		status = serial_port_in(p, UART_MCR);
+ 		status |= UART_MCR_RTS;
+ 		serial_port_out(p, UART_MCR, status);
++
++		if (is_console)
++			serial8250_exit_unsafe(up);
+ 	}
+ 	spin_unlock_irqrestore(&p->lock, flags);
+ 	return HRTIMER_NORESTART;
+diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
+index ab63c308be0a2..8e89bffa1f121 100644
+--- a/drivers/tty/serial/8250/8250_core.c
++++ b/drivers/tty/serial/8250/8250_core.c
+@@ -256,6 +256,7 @@ static void serial8250_timeout(struct timer_list *t)
+ static void serial8250_backup_timeout(struct timer_list *t)
+ {
+ 	struct uart_8250_port *up = from_timer(up, t, timer);
++	struct uart_port *port = &up->port;
+ 	unsigned int iir, ier = 0, lsr;
+ 	unsigned long flags;
+ 
+@@ -266,8 +267,23 @@ static void serial8250_backup_timeout(struct timer_list *t)
+ 	 * based handler.
+ 	 */
+ 	if (up->port.irq) {
++		bool is_console;
++
++		/*
++		 * Do not use serial8250_clear_IER() because this code
++		 * ignores capabilties.
++		 */
++
++		is_console = serial8250_is_console(port);
++
++		if (is_console)
++			serial8250_enter_unsafe(up);
++
+ 		ier = serial_in(up, UART_IER);
+ 		serial_out(up, UART_IER, 0);
++
++		if (is_console)
++			serial8250_exit_unsafe(up);
+ 	}
+ 
+ 	iir = serial_in(up, UART_IIR);
+@@ -290,7 +306,7 @@ static void serial8250_backup_timeout(struct timer_list *t)
+ 		serial8250_tx_chars(up);
+ 
+ 	if (up->port.irq)
+-		serial_out(up, UART_IER, ier);
++		serial8250_set_IER(up, ier);
+ 
+ 	spin_unlock_irqrestore(&up->port.lock, flags);
+ 
+@@ -576,12 +592,30 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev)
+ 
+ #ifdef CONFIG_SERIAL_8250_CONSOLE
+ 
+-static void univ8250_console_write(struct console *co, const char *s,
+-				   unsigned int count)
++static void univ8250_console_port_lock(struct console *con, bool do_lock, unsigned long *flags)
++{
++	struct uart_8250_port *up = &serial8250_ports[con->index];
++
++	if (do_lock)
++		spin_lock_irqsave(&up->port.lock, *flags);
++	else
++		spin_unlock_irqrestore(&up->port.lock, *flags);
++}
++
++static bool univ8250_console_write_atomic(struct console *co,
++					  struct cons_write_context *wctxt)
+ {
+ 	struct uart_8250_port *up = &serial8250_ports[co->index];
+ 
+-	serial8250_console_write(up, s, count);
++	return serial8250_console_write_atomic(up, wctxt);
++}
++
++static bool univ8250_console_write_thread(struct console *co,
++					  struct cons_write_context *wctxt)
++{
++	struct uart_8250_port *up = &serial8250_ports[co->index];
++
++	return serial8250_console_write_thread(up, wctxt);
+ }
+ 
+ static int univ8250_console_setup(struct console *co, char *options)
+@@ -669,12 +703,14 @@ static int univ8250_console_match(struct console *co, char *name, int idx,
+ 
+ static struct console univ8250_console = {
+ 	.name		= "ttyS",
+-	.write		= univ8250_console_write,
++	.write_atomic	= univ8250_console_write_atomic,
++	.write_thread	= univ8250_console_write_thread,
++	.port_lock	= univ8250_console_port_lock,
+ 	.device		= uart_console_device,
+ 	.setup		= univ8250_console_setup,
+ 	.exit		= univ8250_console_exit,
+ 	.match		= univ8250_console_match,
+-	.flags		= CON_PRINTBUFFER | CON_ANYTIME,
++	.flags		= CON_PRINTBUFFER | CON_ANYTIME | CON_NO_BKL,
+ 	.index		= -1,
+ 	.data		= &serial8250_reg,
+ };
+@@ -962,7 +998,7 @@ static void serial_8250_overrun_backoff_work(struct work_struct *work)
+ 	spin_lock_irqsave(&port->lock, flags);
+ 	up->ier |= UART_IER_RLSI | UART_IER_RDI;
+ 	up->port.read_status_mask |= UART_LSR_DR;
+-	serial_out(up, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ 	spin_unlock_irqrestore(&port->lock, flags);
+ }
+ 
+diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c
+index 64770c62bbec5..ccb70b20b1f4f 100644
+--- a/drivers/tty/serial/8250/8250_exar.c
++++ b/drivers/tty/serial/8250/8250_exar.c
+@@ -185,6 +185,10 @@ static void xr17v35x_set_divisor(struct uart_port *p, unsigned int baud,
+ 
+ static int xr17v35x_startup(struct uart_port *port)
+ {
++	struct uart_8250_port *up = up_to_u8250p(port);
++
++	spin_lock_irq(&port->lock);
++
+ 	/*
+ 	 * First enable access to IER [7:5], ISR [5:4], FCR [5:4],
+ 	 * MCR [7:5] and MSR [7:0]
+@@ -195,7 +199,9 @@ static int xr17v35x_startup(struct uart_port *port)
+ 	 * Make sure all interrups are masked until initialization is
+ 	 * complete and the FIFOs are cleared
+ 	 */
+-	serial_port_out(port, UART_IER, 0);
++	serial8250_set_IER(up, 0);
++
++	spin_unlock_irq(&port->lock);
+ 
+ 	return serial8250_do_startup(port);
+ }
+diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c
+index 8adfaa183f778..eaf148245a10d 100644
+--- a/drivers/tty/serial/8250/8250_fsl.c
++++ b/drivers/tty/serial/8250/8250_fsl.c
+@@ -58,7 +58,8 @@ int fsl8250_handle_irq(struct uart_port *port)
+ 	if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) {
+ 		unsigned long delay;
+ 
+-		up->ier = port->serial_in(port, UART_IER);
++		up->ier = serial8250_in_IER(up);
++
+ 		if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) {
+ 			port->ops->stop_rx(port);
+ 		} else {
+diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
+index fb1d5ec0940e6..bf7ab55c8923f 100644
+--- a/drivers/tty/serial/8250/8250_mtk.c
++++ b/drivers/tty/serial/8250/8250_mtk.c
+@@ -222,12 +222,38 @@ static void mtk8250_shutdown(struct uart_port *port)
+ 
+ static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask)
+ {
+-	serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask));
++	struct uart_port *port = &up->port;
++	bool is_console;
++	int ier;
++
++	is_console = serial8250_is_console(port);
++
++	if (is_console)
++		serial8250_enter_unsafe(up);
++
++	ier = serial_in(up, UART_IER);
++	serial_out(up, UART_IER, ier & (~mask));
++
++	if (is_console)
++		serial8250_exit_unsafe(up);
+ }
+ 
+ static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask)
+ {
+-	serial_out(up, UART_IER, serial_in(up, UART_IER) | mask);
++	struct uart_port *port = &up->port;
++	bool is_console;
++	int ier;
++
++	is_console = serial8250_is_console(port);
++
++	if (is_console)
++		serial8250_enter_unsafe(up);
++
++	ier = serial_in(up, UART_IER);
++	serial_out(up, UART_IER, ier | mask);
++
++	if (is_console)
++		serial8250_exit_unsafe(up);
+ }
+ 
+ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
+diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
+index 734f092ef839a..bfa50a26349dd 100644
+--- a/drivers/tty/serial/8250/8250_omap.c
++++ b/drivers/tty/serial/8250/8250_omap.c
+@@ -334,8 +334,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up)
+ 
+ 	/* drop TCR + TLR access, we setup XON/XOFF later */
+ 	serial8250_out_MCR(up, mcr);
+-
+-	serial_out(up, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ 
+ 	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+ 	serial_dl_write(up, priv->quot);
+@@ -523,16 +522,21 @@ static void omap_8250_pm(struct uart_port *port, unsigned int state,
+ 	u8 efr;
+ 
+ 	pm_runtime_get_sync(port->dev);
++
++	spin_lock_irq(&port->lock);
++
+ 	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+ 	efr = serial_in(up, UART_EFR);
+ 	serial_out(up, UART_EFR, efr | UART_EFR_ECB);
+ 	serial_out(up, UART_LCR, 0);
+ 
+-	serial_out(up, UART_IER, (state != 0) ? UART_IERX_SLEEP : 0);
++	serial8250_set_IER(up, (state != 0) ? UART_IERX_SLEEP : 0);
+ 	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+ 	serial_out(up, UART_EFR, efr);
+ 	serial_out(up, UART_LCR, 0);
+ 
++	spin_unlock_irq(&port->lock);
++
+ 	pm_runtime_mark_last_busy(port->dev);
+ 	pm_runtime_put_autosuspend(port->dev);
+ }
+@@ -649,7 +653,8 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id)
+ 	if ((lsr & UART_LSR_OE) && up->overrun_backoff_time_ms > 0) {
+ 		unsigned long delay;
+ 
+-		up->ier = port->serial_in(port, UART_IER);
++		spin_lock(&port->lock);
++		up->ier = serial8250_in_IER(up);
+ 		if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) {
+ 			port->ops->stop_rx(port);
+ 		} else {
+@@ -658,6 +663,7 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id)
+ 			 */
+ 			cancel_delayed_work(&up->overrun_backoff);
+ 		}
++		spin_unlock(&port->lock);
+ 
+ 		delay = msecs_to_jiffies(up->overrun_backoff_time_ms);
+ 		schedule_delayed_work(&up->overrun_backoff, delay);
+@@ -707,8 +713,10 @@ static int omap_8250_startup(struct uart_port *port)
+ 	if (ret < 0)
+ 		goto err;
+ 
++	spin_lock_irq(&port->lock);
+ 	up->ier = UART_IER_RLSI | UART_IER_RDI;
+-	serial_out(up, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
++	spin_unlock_irq(&port->lock);
+ 
+ #ifdef CONFIG_PM
+ 	up->capabilities |= UART_CAP_RPM;
+@@ -748,8 +756,10 @@ static void omap_8250_shutdown(struct uart_port *port)
+ 	if (priv->habit & UART_HAS_EFR2)
+ 		serial_out(up, UART_OMAP_EFR2, 0x0);
+ 
++	spin_lock_irq(&port->lock);
+ 	up->ier = 0;
+-	serial_out(up, UART_IER, 0);
++	serial8250_set_IER(up, 0);
++	spin_unlock_irq(&port->lock);
+ 
+ 	if (up->dma)
+ 		serial8250_release_dma(up);
+@@ -797,7 +807,7 @@ static void omap_8250_unthrottle(struct uart_port *port)
+ 		up->dma->rx_dma(up);
+ 	up->ier |= UART_IER_RLSI | UART_IER_RDI;
+ 	port->read_status_mask |= UART_LSR_DR;
+-	serial_out(up, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ 	spin_unlock_irqrestore(&port->lock, flags);
+ 
+ 	pm_runtime_mark_last_busy(port->dev);
+@@ -956,7 +966,7 @@ static void __dma_rx_complete(void *param)
+ 	__dma_rx_do_complete(p);
+ 	if (!priv->throttled) {
+ 		p->ier |= UART_IER_RLSI | UART_IER_RDI;
+-		serial_out(p, UART_IER, p->ier);
++		serial8250_set_IER(p, p->ier);
+ 		if (!(priv->habit & UART_HAS_EFR2))
+ 			omap_8250_rx_dma(p);
+ 	}
+@@ -1013,7 +1023,7 @@ static int omap_8250_rx_dma(struct uart_8250_port *p)
+ 			 * callback to run.
+ 			 */
+ 			p->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
+-			serial_out(p, UART_IER, p->ier);
++			serial8250_set_IER(p, p->ier);
+ 		}
+ 		goto out;
+ 	}
+@@ -1226,12 +1236,12 @@ static void am654_8250_handle_rx_dma(struct uart_8250_port *up, u8 iir,
+ 		 * periodic timeouts, re-enable interrupts.
+ 		 */
+ 		up->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
+-		serial_out(up, UART_IER, up->ier);
++		serial8250_set_IER(up, up->ier);
+ 		omap_8250_rx_dma_flush(up);
+ 		serial_in(up, UART_IIR);
+ 		serial_out(up, UART_OMAP_EFR2, 0x0);
+ 		up->ier |= UART_IER_RLSI | UART_IER_RDI;
+-		serial_out(up, UART_IER, up->ier);
++		serial8250_set_IER(up, up->ier);
+ 	}
+ }
+ 
+@@ -1717,12 +1727,16 @@ static int omap8250_runtime_resume(struct device *dev)
+ 
+ 	up = serial8250_get_port(priv->line);
+ 
++	spin_lock_irq(&up->port.lock);
++
+ 	if (omap8250_lost_context(up))
+ 		omap8250_restore_regs(up);
+ 
+ 	if (up->dma && up->dma->rxchan && !(priv->habit & UART_HAS_EFR2))
+ 		omap_8250_rx_dma(up);
+ 
++	spin_unlock_irq(&up->port.lock);
++
+ 	priv->latency = priv->calc_latency;
+ 	schedule_work(&priv->qos_work);
+ 	return 0;
+diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
+index fe8d79c4ae95e..68f01f8bdf64b 100644
+--- a/drivers/tty/serial/8250/8250_port.c
++++ b/drivers/tty/serial/8250/8250_port.c
+@@ -745,6 +745,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
+ 	serial8250_rpm_get(p);
+ 
+ 	if (p->capabilities & UART_CAP_SLEEP) {
++		spin_lock_irq(&p->port.lock);
+ 		if (p->capabilities & UART_CAP_EFR) {
+ 			lcr = serial_in(p, UART_LCR);
+ 			efr = serial_in(p, UART_EFR);
+@@ -752,25 +753,18 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
+ 			serial_out(p, UART_EFR, UART_EFR_ECB);
+ 			serial_out(p, UART_LCR, 0);
+ 		}
+-		serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0);
++		serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0);
+ 		if (p->capabilities & UART_CAP_EFR) {
+ 			serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
+ 			serial_out(p, UART_EFR, efr);
+ 			serial_out(p, UART_LCR, lcr);
+ 		}
++		spin_unlock_irq(&p->port.lock);
+ 	}
+ 
+ 	serial8250_rpm_put(p);
+ }
+ 
+-static void serial8250_clear_IER(struct uart_8250_port *up)
+-{
+-	if (up->capabilities & UART_CAP_UUE)
+-		serial_out(up, UART_IER, UART_IER_UUE);
+-	else
+-		serial_out(up, UART_IER, 0);
+-}
+-
+ #ifdef CONFIG_SERIAL_8250_RSA
+ /*
+  * Attempts to turn on the RSA FIFO.  Returns zero on failure.
+@@ -1034,8 +1028,10 @@ static int broken_efr(struct uart_8250_port *up)
+  */
+ static void autoconfig_16550a(struct uart_8250_port *up)
+ {
++	struct uart_port *port = &up->port;
+ 	unsigned char status1, status2;
+ 	unsigned int iersave;
++	bool is_console;
+ 
+ 	up->port.type = PORT_16550A;
+ 	up->capabilities |= UART_CAP_FIFO;
+@@ -1151,6 +1147,11 @@ static void autoconfig_16550a(struct uart_8250_port *up)
+ 		return;
+ 	}
+ 
++	is_console = serial8250_is_console(port);
++
++	if (is_console)
++		serial8250_enter_unsafe(up);
++
+ 	/*
+ 	 * Try writing and reading the UART_IER_UUE bit (b6).
+ 	 * If it works, this is probably one of the Xscale platform's
+@@ -1186,6 +1187,9 @@ static void autoconfig_16550a(struct uart_8250_port *up)
+ 	}
+ 	serial_out(up, UART_IER, iersave);
+ 
++	if (is_console)
++		serial8250_exit_unsafe(up);
++
+ 	/*
+ 	 * We distinguish between 16550A and U6 16550A by counting
+ 	 * how many bytes are in the FIFO.
+@@ -1227,6 +1231,13 @@ static void autoconfig(struct uart_8250_port *up)
+ 	up->bugs = 0;
+ 
+ 	if (!(port->flags & UPF_BUGGY_UART)) {
++		bool is_console;
++
++		is_console = serial8250_is_console(port);
++
++		if (is_console)
++			serial8250_enter_unsafe(up);
++
+ 		/*
+ 		 * Do a simple existence test first; if we fail this,
+ 		 * there's no point trying anything else.
+@@ -1256,6 +1267,10 @@ static void autoconfig(struct uart_8250_port *up)
+ #endif
+ 		scratch3 = serial_in(up, UART_IER) & UART_IER_ALL_INTR;
+ 		serial_out(up, UART_IER, scratch);
++
++		if (is_console)
++			serial8250_exit_unsafe(up);
++
+ 		if (scratch2 != 0 || scratch3 != UART_IER_ALL_INTR) {
+ 			/*
+ 			 * We failed; there's nothing here
+@@ -1377,6 +1392,7 @@ static void autoconfig_irq(struct uart_8250_port *up)
+ 	unsigned char save_ICP = 0;
+ 	unsigned int ICP = 0;
+ 	unsigned long irqs;
++	bool is_console;
+ 	int irq;
+ 
+ 	if (port->flags & UPF_FOURPORT) {
+@@ -1386,8 +1402,12 @@ static void autoconfig_irq(struct uart_8250_port *up)
+ 		inb_p(ICP);
+ 	}
+ 
+-	if (uart_console(port))
++	is_console = serial8250_is_console(port);
++
++	if (is_console) {
+ 		console_lock();
++		serial8250_enter_unsafe(up);
++	}
+ 
+ 	/* forget possible initially masked and pending IRQ */
+ 	probe_irq_off(probe_irq_on());
+@@ -1419,8 +1439,10 @@ static void autoconfig_irq(struct uart_8250_port *up)
+ 	if (port->flags & UPF_FOURPORT)
+ 		outb_p(save_ICP, ICP);
+ 
+-	if (uart_console(port))
++	if (is_console) {
++		serial8250_exit_unsafe(up);
+ 		console_unlock();
++	}
+ 
+ 	port->irq = (irq > 0) ? irq : 0;
+ }
+@@ -1433,7 +1455,7 @@ static void serial8250_stop_rx(struct uart_port *port)
+ 
+ 	up->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
+ 	up->port.read_status_mask &= ~UART_LSR_DR;
+-	serial_port_out(port, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ 
+ 	serial8250_rpm_put(up);
+ }
+@@ -1463,7 +1485,7 @@ void serial8250_em485_stop_tx(struct uart_8250_port *p)
+ 		serial8250_clear_and_reinit_fifos(p);
+ 
+ 		p->ier |= UART_IER_RLSI | UART_IER_RDI;
+-		serial_port_out(&p->port, UART_IER, p->ier);
++		serial8250_set_IER(p, p->ier);
+ 	}
+ }
+ EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx);
+@@ -1710,7 +1732,7 @@ static void serial8250_disable_ms(struct uart_port *port)
+ 	mctrl_gpio_disable_ms(up->gpios);
+ 
+ 	up->ier &= ~UART_IER_MSI;
+-	serial_port_out(port, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ }
+ 
+ static void serial8250_enable_ms(struct uart_port *port)
+@@ -1726,7 +1748,7 @@ static void serial8250_enable_ms(struct uart_port *port)
+ 	up->ier |= UART_IER_MSI;
+ 
+ 	serial8250_rpm_get(up);
+-	serial_port_out(port, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ 	serial8250_rpm_put(up);
+ }
+ 
+@@ -2176,9 +2198,10 @@ static void serial8250_put_poll_char(struct uart_port *port,
+ 	serial8250_rpm_get(up);
+ 	/*
+ 	 *	First save the IER then disable the interrupts
++	 *
++	 *	Best-effort IER access because other CPUs are quiesced.
+ 	 */
+-	ier = serial_port_in(port, UART_IER);
+-	serial8250_clear_IER(up);
++	__serial8250_clear_IER(up, NULL, &ier);
+ 
+ 	wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
+ 	/*
+@@ -2191,7 +2214,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
+ 	 *	and restore the IER
+ 	 */
+ 	wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
+-	serial_port_out(port, UART_IER, ier);
++	__serial8250_set_IER(up, NULL, ier);
+ 	serial8250_rpm_put(up);
+ }
+ 
+@@ -2202,6 +2225,7 @@ int serial8250_do_startup(struct uart_port *port)
+ 	struct uart_8250_port *up = up_to_u8250p(port);
+ 	unsigned long flags;
+ 	unsigned char iir;
++	bool is_console;
+ 	int retval;
+ 	u16 lsr;
+ 
+@@ -2219,21 +2243,25 @@ int serial8250_do_startup(struct uart_port *port)
+ 	serial8250_rpm_get(up);
+ 	if (port->type == PORT_16C950) {
+ 		/* Wake up and initialize UART */
++		spin_lock_irqsave(&port->lock, flags);
+ 		up->acr = 0;
+ 		serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B);
+ 		serial_port_out(port, UART_EFR, UART_EFR_ECB);
+-		serial_port_out(port, UART_IER, 0);
++		serial8250_set_IER(up, 0);
+ 		serial_port_out(port, UART_LCR, 0);
+ 		serial_icr_write(up, UART_CSR, 0); /* Reset the UART */
+ 		serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B);
+ 		serial_port_out(port, UART_EFR, UART_EFR_ECB);
+ 		serial_port_out(port, UART_LCR, 0);
++		spin_unlock_irqrestore(&port->lock, flags);
+ 	}
+ 
+ 	if (port->type == PORT_DA830) {
+ 		/* Reset the port */
+-		serial_port_out(port, UART_IER, 0);
++		spin_lock_irqsave(&port->lock, flags);
++		serial8250_set_IER(up, 0);
+ 		serial_port_out(port, UART_DA830_PWREMU_MGMT, 0);
++		spin_unlock_irqrestore(&port->lock, flags);
+ 		mdelay(10);
+ 
+ 		/* Enable Tx, Rx and free run mode */
+@@ -2331,6 +2359,8 @@ int serial8250_do_startup(struct uart_port *port)
+ 	if (retval)
+ 		goto out;
+ 
++	is_console = serial8250_is_console(port);
++
+ 	if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) {
+ 		unsigned char iir1;
+ 
+@@ -2347,6 +2377,9 @@ int serial8250_do_startup(struct uart_port *port)
+ 		 */
+ 		spin_lock_irqsave(&port->lock, flags);
+ 
++		if (is_console)
++			serial8250_enter_unsafe(up);
++
+ 		wait_for_xmitr(up, UART_LSR_THRE);
+ 		serial_port_out_sync(port, UART_IER, UART_IER_THRI);
+ 		udelay(1); /* allow THRE to set */
+@@ -2357,6 +2390,9 @@ int serial8250_do_startup(struct uart_port *port)
+ 		iir = serial_port_in(port, UART_IIR);
+ 		serial_port_out(port, UART_IER, 0);
+ 
++		if (is_console)
++			serial8250_exit_unsafe(up);
++
+ 		spin_unlock_irqrestore(&port->lock, flags);
+ 
+ 		if (port->irqflags & IRQF_SHARED)
+@@ -2411,10 +2447,14 @@ int serial8250_do_startup(struct uart_port *port)
+ 	 * Do a quick test to see if we receive an interrupt when we enable
+ 	 * the TX irq.
+ 	 */
++	if (is_console)
++		serial8250_enter_unsafe(up);
+ 	serial_port_out(port, UART_IER, UART_IER_THRI);
+ 	lsr = serial_port_in(port, UART_LSR);
+ 	iir = serial_port_in(port, UART_IIR);
+ 	serial_port_out(port, UART_IER, 0);
++	if (is_console)
++		serial8250_exit_unsafe(up);
+ 
+ 	if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) {
+ 		if (!(up->bugs & UART_BUG_TXEN)) {
+@@ -2446,7 +2486,7 @@ int serial8250_do_startup(struct uart_port *port)
+ 	if (up->dma) {
+ 		const char *msg = NULL;
+ 
+-		if (uart_console(port))
++		if (is_console)
+ 			msg = "forbid DMA for kernel console";
+ 		else if (serial8250_request_dma(up))
+ 			msg = "failed to request DMA";
+@@ -2497,7 +2537,7 @@ void serial8250_do_shutdown(struct uart_port *port)
+ 	 */
+ 	spin_lock_irqsave(&port->lock, flags);
+ 	up->ier = 0;
+-	serial_port_out(port, UART_IER, 0);
++	serial8250_set_IER(up, 0);
+ 	spin_unlock_irqrestore(&port->lock, flags);
+ 
+ 	synchronize_irq(port->irq);
+@@ -2863,7 +2903,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
+ 	if (up->capabilities & UART_CAP_RTOIE)
+ 		up->ier |= UART_IER_RTOIE;
+ 
+-	serial_port_out(port, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ 
+ 	if (up->capabilities & UART_CAP_EFR) {
+ 		unsigned char efr = 0;
+@@ -3328,12 +3368,21 @@ EXPORT_SYMBOL_GPL(serial8250_set_defaults);
+ 
+ #ifdef CONFIG_SERIAL_8250_CONSOLE
+ 
+-static void serial8250_console_putchar(struct uart_port *port, unsigned char ch)
++static bool serial8250_console_putchar(struct uart_port *port, unsigned char ch,
++				       struct cons_write_context *wctxt)
+ {
+ 	struct uart_8250_port *up = up_to_u8250p(port);
+ 
+ 	wait_for_xmitr(up, UART_LSR_THRE);
++	if (!console_can_proceed(wctxt))
++		return false;
+ 	serial_port_out(port, UART_TX, ch);
++	if (ch == '\n')
++		up->console_newline_needed = false;
++	else
++		up->console_newline_needed = true;
++
++	return true;
+ }
+ 
+ /*
+@@ -3362,33 +3411,119 @@ static void serial8250_console_restore(struct uart_8250_port *up)
+ 	serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
+ }
+ 
+-/*
+- * Print a string to the serial port using the device FIFO
+- *
+- * It sends fifosize bytes and then waits for the fifo
+- * to get empty.
+- */
+-static void serial8250_console_fifo_write(struct uart_8250_port *up,
+-					  const char *s, unsigned int count)
++static bool __serial8250_console_write(struct uart_port *port, struct cons_write_context *wctxt,
++		const char *s, unsigned int count,
++		bool (*putchar)(struct uart_port *, unsigned char, struct cons_write_context *))
+ {
+-	int i;
+-	const char *end = s + count;
+-	unsigned int fifosize = up->tx_loadsz;
+-	bool cr_sent = false;
++	bool finished = false;
++	unsigned int i;
+ 
+-	while (s != end) {
+-		wait_for_lsr(up, UART_LSR_THRE);
+-
+-		for (i = 0; i < fifosize && s != end; ++i) {
+-			if (*s == '\n' && !cr_sent) {
+-				serial_out(up, UART_TX, '\r');
+-				cr_sent = true;
+-			} else {
+-				serial_out(up, UART_TX, *s++);
+-				cr_sent = false;
+-			}
++	for (i = 0; i < count; i++, s++) {
++		if (*s == '\n') {
++			if (!putchar(port, '\r', wctxt))
++				goto out;
+ 		}
++		if (!putchar(port, *s, wctxt))
++			goto out;
+ 	}
++	finished = true;
++out:
++	return finished;
++}
++
++static bool serial8250_console_write(struct uart_port *port, struct cons_write_context *wctxt,
++		const char *s, unsigned int count,
++		bool (*putchar)(struct uart_port *, unsigned char, struct cons_write_context *))
++{
++	return __serial8250_console_write(port, wctxt, s, count, putchar);
++}
++
++static bool atomic_print_line(struct uart_8250_port *up,
++			      struct cons_write_context *wctxt)
++{
++	struct uart_port *port = &up->port;
++
++	if (up->console_newline_needed &&
++	    !__serial8250_console_write(port, wctxt, "\n", 1, serial8250_console_putchar)) {
++		return false;
++	}
++
++	return __serial8250_console_write(port, wctxt, wctxt->outbuf, wctxt->len,
++					  serial8250_console_putchar);
++}
++
++static void atomic_console_reacquire(struct cons_write_context *wctxt,
++				     struct cons_write_context *wctxt_init)
++{
++	memcpy(wctxt, wctxt_init, sizeof(*wctxt));
++	while (!console_try_acquire(wctxt)) {
++		cpu_relax();
++		memcpy(wctxt, wctxt_init, sizeof(*wctxt));
++	}
++}
++
++bool serial8250_console_write_atomic(struct uart_8250_port *up,
++				     struct cons_write_context *wctxt)
++{
++	struct cons_write_context wctxt_init = { };
++	struct cons_context *ctxt_init = &ACCESS_PRIVATE(&wctxt_init, ctxt);
++	struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
++	bool finished = false;
++	unsigned int ier;
++
++	touch_nmi_watchdog();
++
++	/* With write_atomic, another context may hold the port->lock. */
++
++	ctxt_init->console = ctxt->console;
++	ctxt_init->prio = ctxt->prio;
++	ctxt_init->thread = ctxt->thread;
++
++	/*
++	 * Enter unsafe in order to disable interrupts. If the console is
++	 * lost before the interrupts are disabled, bail out because another
++	 * context took over the printing. If the console is lost after the
++	 * interrutps are disabled, the console must be reacquired in order
++	 * to re-enable the interrupts. However in that case no printing is
++	 * allowed because another context took over the printing.
++	 */
++
++	if (!console_enter_unsafe(wctxt))
++		return false;
++
++	if (!__serial8250_clear_IER(up, wctxt, &ier))
++		return false;
++
++	if (!console_exit_unsafe(wctxt)) {
++		atomic_console_reacquire(wctxt, &wctxt_init);
++		goto enable_irq;
++	}
++
++	if (!atomic_print_line(up, wctxt)) {
++		atomic_console_reacquire(wctxt, &wctxt_init);
++		goto enable_irq;
++	}
++
++	wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
++	finished = true;
++enable_irq:
++	/*
++	 * Enter unsafe in order to enable interrupts. If the console is
++	 * lost before the interrupts are enabled, the console must be
++	 * reacquired in order to re-enable the interrupts.
++	 */
++	for (;;) {
++		if (console_enter_unsafe(wctxt) &&
++		    __serial8250_set_IER(up, wctxt, ier)) {
++			break;
++		}
++
++		/* HW-IRQs still disabled. Reacquire to enable them. */
++		atomic_console_reacquire(wctxt, &wctxt_init);
++	}
++	console_exit_unsafe(wctxt);
++
++	return finished;
+ }
+ 
+ /*
+@@ -3400,78 +3535,116 @@ static void serial8250_console_fifo_write(struct uart_8250_port *up,
+  *	Doing runtime PM is really a bad idea for the kernel console.
+  *	Thus, we assume the function is called when device is powered up.
+  */
+-void serial8250_console_write(struct uart_8250_port *up, const char *s,
+-			      unsigned int count)
++bool serial8250_console_write_thread(struct uart_8250_port *up,
++				     struct cons_write_context *wctxt)
+ {
++	struct cons_write_context wctxt_init = { };
++	struct cons_context *ctxt_init = &ACCESS_PRIVATE(&wctxt_init, ctxt);
++	struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
+ 	struct uart_8250_em485 *em485 = up->em485;
+ 	struct uart_port *port = &up->port;
+-	unsigned long flags;
+-	unsigned int ier, use_fifo;
+-	int locked = 1;
++	unsigned int count = wctxt->len;
++	const char *s = wctxt->outbuf;
++	bool rs485_started = false;
++	bool finished = false;
++	unsigned int ier;
+ 
+-	touch_nmi_watchdog();
+-
+-	if (oops_in_progress)
+-		locked = spin_trylock_irqsave(&port->lock, flags);
+-	else
+-		spin_lock_irqsave(&port->lock, flags);
++	ctxt_init->console = ctxt->console;
++	ctxt_init->prio = ctxt->prio;
++	ctxt_init->thread = ctxt->thread;
+ 
+ 	/*
+-	 *	First save the IER then disable the interrupts
++	 * Enter unsafe in order to disable interrupts. If the console is
++	 * lost before the interrupts are disabled, bail out because another
++	 * context took over the printing. If the console is lost after the
++	 * interrutps are disabled, the console must be reacquired in order
++	 * to re-enable the interrupts. However in that case no printing is
++	 * allowed because another context took over the printing.
+ 	 */
+-	ier = serial_port_in(port, UART_IER);
+-	serial8250_clear_IER(up);
++
++	if (!console_enter_unsafe(wctxt))
++		return false;
++
++	if (!__serial8250_clear_IER(up, wctxt, &ier))
++		return false;
++
++	if (!console_exit_unsafe(wctxt)) {
++		atomic_console_reacquire(wctxt, &wctxt_init);
++		goto enable_irq;
++	}
+ 
+ 	/* check scratch reg to see if port powered off during system sleep */
+ 	if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
++		if (!console_enter_unsafe(wctxt)) {
++			atomic_console_reacquire(wctxt, &wctxt_init);
++			goto enable_irq;
++		}
+ 		serial8250_console_restore(up);
++		if (!console_exit_unsafe(wctxt)) {
++			atomic_console_reacquire(wctxt, &wctxt_init);
++			goto enable_irq;
++		}
+ 		up->canary = 0;
+ 	}
+ 
+ 	if (em485) {
+-		if (em485->tx_stopped)
++		if (em485->tx_stopped) {
++			if (!console_enter_unsafe(wctxt)) {
++				atomic_console_reacquire(wctxt, &wctxt_init);
++				goto enable_irq;
++			}
+ 			up->rs485_start_tx(up);
+-		mdelay(port->rs485.delay_rts_before_send);
++			rs485_started = true;
++			if (!console_exit_unsafe(wctxt)) {
++				atomic_console_reacquire(wctxt, &wctxt_init);
++				goto enable_irq;
++			}
++		}
++		if (port->rs485.delay_rts_before_send) {
++			mdelay(port->rs485.delay_rts_before_send);
++			if (!console_can_proceed(wctxt)) {
++				atomic_console_reacquire(wctxt, &wctxt_init);
++				goto enable_irq;
++			}
++		}
+ 	}
+ 
+-	use_fifo = (up->capabilities & UART_CAP_FIFO) &&
+-		/*
+-		 * BCM283x requires to check the fifo
+-		 * after each byte.
+-		 */
+-		!(up->capabilities & UART_CAP_MINI) &&
+-		/*
+-		 * tx_loadsz contains the transmit fifo size
+-		 */
+-		up->tx_loadsz > 1 &&
+-		(up->fcr & UART_FCR_ENABLE_FIFO) &&
+-		port->state &&
+-		test_bit(TTY_PORT_INITIALIZED, &port->state->port.iflags) &&
+-		/*
+-		 * After we put a data in the fifo, the controller will send
+-		 * it regardless of the CTS state. Therefore, only use fifo
+-		 * if we don't use control flow.
+-		 */
+-		!(up->port.flags & UPF_CONS_FLOW);
++	if (!serial8250_console_write(port, wctxt, s, count, serial8250_console_putchar)) {
++		atomic_console_reacquire(wctxt, &wctxt_init);
++		goto enable_irq;
++	}
+ 
+-	if (likely(use_fifo))
+-		serial8250_console_fifo_write(up, s, count);
+-	else
+-		uart_console_write(port, s, count, serial8250_console_putchar);
+-
+-	/*
+-	 *	Finally, wait for transmitter to become empty
+-	 *	and restore the IER
+-	 */
+ 	wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
+-
++	finished = true;
++enable_irq:
++	/*
++	 * Enter unsafe in order to stop rs485_tx. If the console is
++	 * lost before the rs485_tx is stopped, the console must be
++	 * reacquired in order to stop rs485_tx.
++	 */
+ 	if (em485) {
+ 		mdelay(port->rs485.delay_rts_after_send);
+-		if (em485->tx_stopped)
++		if (em485->tx_stopped && rs485_started) {
++			while (!console_enter_unsafe(wctxt))
++				atomic_console_reacquire(wctxt, &wctxt_init);
+ 			up->rs485_stop_tx(up);
++			if (!console_exit_unsafe(wctxt))
++				atomic_console_reacquire(wctxt, &wctxt_init);
++		}
+ 	}
+ 
+-	serial_port_out(port, UART_IER, ier);
++	/*
++	 * Enter unsafe in order to enable interrupts. If the console is
++	 * lost before the interrupts are enabled, the console must be
++	 * reacquired in order to re-enable the interrupts.
++	 */
++	for (;;) {
++		if (console_enter_unsafe(wctxt) &&
++		    __serial8250_set_IER(up, wctxt, ier)) {
++			break;
++		}
++		atomic_console_reacquire(wctxt, &wctxt_init);
++	}
+ 
+ 	/*
+ 	 *	The receive handling will happen properly because the
+@@ -3483,8 +3656,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
+ 	if (up->msr_saved_flags)
+ 		serial8250_modem_status(up);
+ 
+-	if (locked)
+-		spin_unlock_irqrestore(&port->lock, flags);
++	console_exit_unsafe(wctxt);
++
++	return finished;
+ }
+ 
+ static unsigned int probe_baud(struct uart_port *port)
+@@ -3504,6 +3678,7 @@ static unsigned int probe_baud(struct uart_port *port)
+ 
+ int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
+ {
++	struct uart_8250_port *up = up_to_u8250p(port);
+ 	int baud = 9600;
+ 	int bits = 8;
+ 	int parity = 'n';
+@@ -3513,6 +3688,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
+ 	if (!port->iobase && !port->membase)
+ 		return -ENODEV;
+ 
++	up->console_newline_needed = false;
++
+ 	if (options)
+ 		uart_parse_options(options, &baud, &parity, &bits, &flow);
+ 	else if (probe)
+diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
+index 5313aa31930f4..16715f01bdb5a 100644
+--- a/drivers/tty/serial/8250/Kconfig
++++ b/drivers/tty/serial/8250/Kconfig
+@@ -9,6 +9,7 @@ config SERIAL_8250
+ 	depends on !S390
+ 	select SERIAL_CORE
+ 	select SERIAL_MCTRL_GPIO if GPIOLIB
++	select HAVE_ATOMIC_CONSOLE
+ 	help
+ 	  This selects whether you want to include the driver for the standard
+ 	  serial ports.  The standard answer is Y.  People who might say N
+diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
+index d8c2f3455eeba..a4e142ac6ec46 100644
+--- a/drivers/tty/serial/amba-pl011.c
++++ b/drivers/tty/serial/amba-pl011.c
+@@ -2319,18 +2319,24 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
+ {
+ 	struct uart_amba_port *uap = amba_ports[co->index];
+ 	unsigned int old_cr = 0, new_cr;
+-	unsigned long flags;
++	unsigned long flags = 0;
+ 	int locked = 1;
+ 
+ 	clk_enable(uap->clk);
+ 
+-	local_irq_save(flags);
++	/*
++	 * local_irq_save(flags);
++	 *
++	 * This local_irq_save() is nonsense. If we come in via sysrq
++	 * handling then interrupts are already disabled. Aside of
++	 * that the port.sysrq check is racy on SMP regardless.
++	*/
+ 	if (uap->port.sysrq)
+ 		locked = 0;
+ 	else if (oops_in_progress)
+-		locked = spin_trylock(&uap->port.lock);
++		locked = spin_trylock_irqsave(&uap->port.lock, flags);
+ 	else
+-		spin_lock(&uap->port.lock);
++		spin_lock_irqsave(&uap->port.lock, flags);
+ 
+ 	/*
+ 	 *	First save the CR then disable the interrupts
+@@ -2356,8 +2362,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
+ 		pl011_write(old_cr, uap, REG_CR);
+ 
+ 	if (locked)
+-		spin_unlock(&uap->port.lock);
+-	local_irq_restore(flags);
++		spin_unlock_irqrestore(&uap->port.lock, flags);
+ 
+ 	clk_disable(uap->clk);
+ }
+diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
+index 82d35dbbfa6cb..511cf17d87958 100644
+--- a/drivers/tty/serial/omap-serial.c
++++ b/drivers/tty/serial/omap-serial.c
+@@ -1219,13 +1219,10 @@ serial_omap_console_write(struct console *co, const char *s,
+ 	unsigned int ier;
+ 	int locked = 1;
+ 
+-	local_irq_save(flags);
+-	if (up->port.sysrq)
+-		locked = 0;
+-	else if (oops_in_progress)
+-		locked = spin_trylock(&up->port.lock);
++	if (up->port.sysrq || oops_in_progress)
++		locked = spin_trylock_irqsave(&up->port.lock, flags);
+ 	else
+-		spin_lock(&up->port.lock);
++		spin_lock_irqsave(&up->port.lock, flags);
+ 
+ 	/*
+ 	 * First save the IER then disable the interrupts
+@@ -1252,8 +1249,7 @@ serial_omap_console_write(struct console *co, const char *s,
+ 		check_modem_status(up);
+ 
+ 	if (locked)
+-		spin_unlock(&up->port.lock);
+-	local_irq_restore(flags);
++		spin_unlock_irqrestore(&up->port.lock, flags);
+ }
+ 
+ static int __init
+diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
+index 728cb72be0666..409892b777d16 100644
+--- a/drivers/tty/serial/serial_core.c
++++ b/drivers/tty/serial/serial_core.c
+@@ -2336,8 +2336,11 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
+ 	 * able to Re-start_rx later.
+ 	 */
+ 	if (!console_suspend_enabled && uart_console(uport)) {
+-		if (uport->ops->start_rx)
++		if (uport->ops->start_rx) {
++			spin_lock_irq(&uport->lock);
+ 			uport->ops->stop_rx(uport);
++			spin_unlock_irq(&uport->lock);
++		}
+ 		goto unlock;
+ 	}
+ 
+@@ -2430,8 +2433,11 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
+ 		if (console_suspend_enabled)
+ 			uart_change_pm(state, UART_PM_STATE_ON);
+ 		uport->ops->set_termios(uport, &termios, NULL);
+-		if (!console_suspend_enabled && uport->ops->start_rx)
++		if (!console_suspend_enabled && uport->ops->start_rx) {
++			spin_lock_irq(&uport->lock);
+ 			uport->ops->start_rx(uport);
++			spin_unlock_irq(&uport->lock);
++		}
+ 		if (console_suspend_enabled)
+ 			console_start(uport->cons);
+ 	}
+diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
+index 8e3de07f103da..af933c7a61d98 100644
+--- a/drivers/tty/tty_io.c
++++ b/drivers/tty/tty_io.c
+@@ -3543,8 +3543,13 @@ static ssize_t show_cons_active(struct device *dev,
+ 	for_each_console(c) {
+ 		if (!c->device)
+ 			continue;
+-		if (!c->write)
+-			continue;
++		if (c->flags & CON_NO_BKL) {
++			if (!(c->write_thread || c->write_atomic))
++				continue;
++		} else {
++			if (!c->write)
++				continue;
++		}
+ 		if ((c->flags & CON_ENABLED) == 0)
+ 			continue;
+ 		cs[i++] = c;
+diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
+index e0758fe7936dc..ab9f42d478c8c 100644
+--- a/fs/proc/consoles.c
++++ b/fs/proc/consoles.c
+@@ -21,12 +21,14 @@ static int show_console_dev(struct seq_file *m, void *v)
+ 		{ CON_ENABLED,		'E' },
+ 		{ CON_CONSDEV,		'C' },
+ 		{ CON_BOOT,		'B' },
++		{ CON_NO_BKL,		'N' },
+ 		{ CON_PRINTBUFFER,	'p' },
+ 		{ CON_BRL,		'b' },
+ 		{ CON_ANYTIME,		'a' },
+ 	};
+ 	char flags[ARRAY_SIZE(con_flags) + 1];
+ 	struct console *con = v;
++	char con_write = '-';
+ 	unsigned int a;
+ 	dev_t dev = 0;
+ 
+@@ -57,9 +59,15 @@ static int show_console_dev(struct seq_file *m, void *v)
+ 	seq_setwidth(m, 21 - 1);
+ 	seq_printf(m, "%s%d", con->name, con->index);
+ 	seq_pad(m, ' ');
+-	seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-',
+-			con->write ? 'W' : '-', con->unblank ? 'U' : '-',
+-			flags);
++	if (con->flags & CON_NO_BKL) {
++		if (con->write_thread || con->write_atomic)
++			con_write = 'W';
++	} else {
++		if (con->write)
++			con_write = 'W';
++	}
++	seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', con_write,
++		   con->unblank ? 'U' : '-', flags);
+ 	if (dev)
+ 		seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev));
+ 
+diff --git a/include/linux/console.h b/include/linux/console.h
+index d3195664baa5a..1e9d5bc8fa76e 100644
+--- a/include/linux/console.h
++++ b/include/linux/console.h
+@@ -16,7 +16,9 @@
+ 
+ #include <linux/atomic.h>
+ #include <linux/bits.h>
++#include <linux/irq_work.h>
+ #include <linux/rculist.h>
++#include <linux/rcuwait.h>
+ #include <linux/types.h>
+ 
+ struct vc_data;
+@@ -154,6 +156,10 @@ static inline int con_debug_leave(void)
+  *			receiving the printk spam for obvious reasons.
+  * @CON_EXTENDED:	The console supports the extended output format of
+  *			/dev/kmesg which requires a larger output buffer.
++ * @CON_SUSPENDED:	Indicates if a console is suspended. If true, the
++ *			printing callbacks must not be called.
++ * @CON_NO_BKL:		Console can operate outside of the BKL style console_lock
++ *			constraints.
+  */
+ enum cons_flags {
+ 	CON_PRINTBUFFER		= BIT(0),
+@@ -163,8 +169,133 @@ enum cons_flags {
+ 	CON_ANYTIME		= BIT(4),
+ 	CON_BRL			= BIT(5),
+ 	CON_EXTENDED		= BIT(6),
++	CON_SUSPENDED		= BIT(7),
++	CON_NO_BKL		= BIT(8),
+ };
+ 
++/**
++ * struct cons_state - console state for NOBKL consoles
++ * @atom:	Compound of the state fields for atomic operations
++ * @seq:	Sequence for record tracking (64bit only)
++ * @bits:	Compound of the state bits below
++ *
++ * @locked:	Console is locked by a writer
++ * @unsafe:	Console is busy in a non takeover region
++ * @thread:	Current owner is the printk thread
++ * @cur_prio:	The priority of the current output
++ * @req_prio:	The priority of a handover request
++ * @cpu:	The CPU on which the writer runs
++ *
++ * To be used for state read and preparation of atomic_long_cmpxchg()
++ * operations.
++ *
++ * The @req_prio field is particularly important to allow spin-waiting to
++ * timeout and give up without the risk of it being assigned the lock
++ * after giving up. The @req_prio field has a nice side-effect that it
++ * also makes it possible for a single read+cmpxchg in the common case of
++ * acquire and release.
++ */
++struct cons_state {
++	union {
++		unsigned long	atom;
++		struct {
++#ifdef CONFIG_64BIT
++			u32	seq;
++#endif
++			union {
++				u32	bits;
++				struct {
++					u32 locked	:  1;
++					u32 unsafe	:  1;
++					u32 thread	:  1;
++					u32 cur_prio	:  2;
++					u32 req_prio	:  2;
++					u32 cpu		: 18;
++				};
++			};
++		};
++	};
++};
++
++/**
++ * cons_prio - console writer priority for NOBKL consoles
++ * @CONS_PRIO_NONE:		Unused
++ * @CONS_PRIO_NORMAL:		Regular printk
++ * @CONS_PRIO_EMERGENCY:	Emergency output (WARN/OOPS...)
++ * @CONS_PRIO_PANIC:		Panic output
++ * @CONS_PRIO_MAX:		The number of priority levels
++ *
++ * Emergency output can carefully takeover the console even without consent
++ * of the owner, ideally only when @cons_state::unsafe is not set. Panic
++ * output can ignore the unsafe flag as a last resort. If panic output is
++ * active no takeover is possible until the panic output releases the
++ * console.
++ */
++enum cons_prio {
++	CONS_PRIO_NONE = 0,
++	CONS_PRIO_NORMAL,
++	CONS_PRIO_EMERGENCY,
++	CONS_PRIO_PANIC,
++	CONS_PRIO_MAX,
++};
++
++struct console;
++struct printk_buffers;
++
++/**
++ * struct cons_context - Context for console acquire/release
++ * @console:		The associated console
++ * @state:		The state at acquire time
++ * @old_state:		The old state when try_acquire() failed for analysis
++ *			by the caller
++ * @hov_state:		The handover state for spin and cleanup
++ * @req_state:		The request state for spin and cleanup
++ * @spinwait_max_us:	Limit for spinwait acquire
++ * @oldseq:		The sequence number at acquire()
++ * @newseq:		The sequence number for progress
++ * @prio:		Priority of the context
++ * @pbufs:		Pointer to the text buffer for this context
++ * @dropped:		Dropped counter for the current context
++ * @thread:		The acquire is printk thread context
++ * @hostile:		Hostile takeover requested. Cleared on normal
++ *			acquire or friendly handover
++ * @spinwait:		Spinwait on acquire if possible
++ * @backlog:		Ringbuffer has pending records
++ */
++struct cons_context {
++	struct console		*console;
++	struct cons_state	state;
++	struct cons_state	old_state;
++	struct cons_state	hov_state;
++	struct cons_state	req_state;
++	u64			oldseq;
++	u64			newseq;
++	unsigned int		spinwait_max_us;
++	enum cons_prio		prio;
++	struct printk_buffers	*pbufs;
++	unsigned long		dropped;
++	unsigned int		thread		: 1;
++	unsigned int		hostile		: 1;
++	unsigned int		spinwait	: 1;
++	unsigned int		backlog		: 1;
++};
++
++/**
++ * struct cons_write_context - Context handed to the write callbacks
++ * @ctxt:	The core console context
++ * @outbuf:	Pointer to the text buffer for output
++ * @len:	Length to write
++ * @unsafe:	Invoked in unsafe state due to force takeover
++ */
++struct cons_write_context {
++	struct cons_context	__private ctxt;
++	char			*outbuf;
++	unsigned int		len;
++	bool			unsafe;
++};
++
++struct cons_context_data;
++
+ /**
+  * struct console - The console descriptor structure
+  * @name:		The name of the console driver
+@@ -184,6 +315,18 @@ enum cons_flags {
+  * @dropped:		Number of unreported dropped ringbuffer records
+  * @data:		Driver private data
+  * @node:		hlist node for the console list
++ *
++ * @atomic_state:	State array for NOBKL consoles; real and handover
++ * @atomic_seq:		Sequence for record tracking (32bit only)
++ * @thread_pbufs:	Pointer to thread private buffer
++ * @kthread:		Pointer to kernel thread
++ * @rcuwait:		RCU wait for the kernel thread
++ * @irq_work:		IRQ work for thread wakeup
++ * @kthread_waiting:	Indicator whether the kthread is waiting to be woken
++ * @write_atomic:	Write callback for atomic context
++ * @write_thread:	Write callback for printk threaded printing
++ * @port_lock:		Callback to lock/unlock the port lock
++ * @pcpu_data:		Pointer to percpu context data
+  */
+ struct console {
+ 	char			name[16];
+@@ -203,6 +346,23 @@ struct console {
+ 	unsigned long		dropped;
+ 	void			*data;
+ 	struct hlist_node	node;
++
++	/* NOBKL console specific members */
++	atomic_long_t		__private atomic_state[2];
++#ifndef CONFIG_64BIT
++	atomic_t		__private atomic_seq;
++#endif
++	struct printk_buffers	*thread_pbufs;
++	struct task_struct	*kthread;
++	struct rcuwait		rcuwait;
++	struct irq_work		irq_work;
++	atomic_t		kthread_waiting;
++
++	bool (*write_atomic)(struct console *con, struct cons_write_context *wctxt);
++	bool (*write_thread)(struct console *con, struct cons_write_context *wctxt);
++	void (*port_lock)(struct console *con, bool do_lock, unsigned long *flags);
++
++	struct cons_context_data	__percpu *pcpu_data;
+ };
+ 
+ #ifdef CONFIG_LOCKDEP
+@@ -329,6 +489,24 @@ static inline bool console_is_registered(const struct console *con)
+ 	lockdep_assert_console_list_lock_held();			\
+ 	hlist_for_each_entry(con, &console_list, node)
+ 
++#ifdef CONFIG_PRINTK
++extern enum cons_prio cons_atomic_enter(enum cons_prio prio);
++extern void cons_atomic_exit(enum cons_prio prio, enum cons_prio prev_prio);
++extern bool console_can_proceed(struct cons_write_context *wctxt);
++extern bool console_enter_unsafe(struct cons_write_context *wctxt);
++extern bool console_exit_unsafe(struct cons_write_context *wctxt);
++extern bool console_try_acquire(struct cons_write_context *wctxt);
++extern bool console_release(struct cons_write_context *wctxt);
++#else
++static inline enum cons_prio cons_atomic_enter(enum cons_prio prio) { return CONS_PRIO_NONE; }
++static inline void cons_atomic_exit(enum cons_prio prio, enum cons_prio prev_prio) { }
++static inline bool console_can_proceed(struct cons_write_context *wctxt) { return false; }
++static inline bool console_enter_unsafe(struct cons_write_context *wctxt) { return false; }
++static inline bool console_exit_unsafe(struct cons_write_context *wctxt) { return false; }
++static inline bool console_try_acquire(struct cons_write_context *wctxt) { return false; }
++static inline bool console_release(struct cons_write_context *wctxt) { return false; }
++#endif
++
+ extern int console_set_on_cmdline;
+ extern struct console *early_console;
+ 
+diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
+index d95ab85f96ba5..3dc3704a3cdbb 100644
+--- a/include/linux/entry-common.h
++++ b/include/linux/entry-common.h
+@@ -57,9 +57,15 @@
+ # define ARCH_EXIT_TO_USER_MODE_WORK		(0)
+ #endif
+ 
++#ifdef CONFIG_PREEMPT_LAZY
++# define _TIF_NEED_RESCHED_MASK	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
++#else
++# define _TIF_NEED_RESCHED_MASK	(_TIF_NEED_RESCHED)
++#endif
++
+ #define EXIT_TO_USER_MODE_WORK						\
+ 	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |		\
+-	 _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |	\
++	 _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |	\
+ 	 ARCH_EXIT_TO_USER_MODE_WORK)
+ 
+ /**
+diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
+index a92bce40b04b3..bf82980f569df 100644
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -605,6 +605,35 @@ extern void __raise_softirq_irqoff(unsigned int nr);
+ extern void raise_softirq_irqoff(unsigned int nr);
+ extern void raise_softirq(unsigned int nr);
+ 
++#ifdef CONFIG_PREEMPT_RT
++DECLARE_PER_CPU(struct task_struct *, timersd);
++DECLARE_PER_CPU(unsigned long, pending_timer_softirq);
++
++extern void raise_timer_softirq(void);
++extern void raise_hrtimer_softirq(void);
++
++static inline unsigned int local_pending_timers(void)
++{
++        return __this_cpu_read(pending_timer_softirq);
++}
++
++#else
++static inline void raise_timer_softirq(void)
++{
++	raise_softirq(TIMER_SOFTIRQ);
++}
++
++static inline void raise_hrtimer_softirq(void)
++{
++	raise_softirq_irqoff(HRTIMER_SOFTIRQ);
++}
++
++static inline unsigned int local_pending_timers(void)
++{
++        return local_softirq_pending();
++}
++#endif
++
+ DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
+ 
+ static inline struct task_struct *this_cpu_ksoftirqd(void)
+diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
+index 09d4f17c8d3b6..7376c1df9c901 100644
+--- a/include/linux/io-mapping.h
++++ b/include/linux/io-mapping.h
+@@ -69,7 +69,10 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping,
+ 
+ 	BUG_ON(offset >= mapping->size);
+ 	phys_addr = mapping->base + offset;
+-	preempt_disable();
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_disable();
++	else
++		migrate_disable();
+ 	pagefault_disable();
+ 	return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot);
+ }
+@@ -79,7 +82,10 @@ io_mapping_unmap_atomic(void __iomem *vaddr)
+ {
+ 	kunmap_local_indexed((void __force *)vaddr);
+ 	pagefault_enable();
+-	preempt_enable();
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_enable();
++	else
++		migrate_enable();
+ }
+ 
+ static inline void __iomem *
+@@ -162,7 +168,10 @@ static inline void __iomem *
+ io_mapping_map_atomic_wc(struct io_mapping *mapping,
+ 			 unsigned long offset)
+ {
+-	preempt_disable();
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_disable();
++	else
++		migrate_disable();
+ 	pagefault_disable();
+ 	return io_mapping_map_wc(mapping, offset, PAGE_SIZE);
+ }
+@@ -172,7 +181,10 @@ io_mapping_unmap_atomic(void __iomem *vaddr)
+ {
+ 	io_mapping_unmap(vaddr);
+ 	pagefault_enable();
+-	preempt_enable();
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_enable();
++	else
++		migrate_enable();
+ }
+ 
+ static inline void __iomem *
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index c35f04f636f15..0c014424b1312 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -3202,7 +3202,11 @@ struct softnet_data {
+ 	int			defer_count;
+ 	int			defer_ipi_scheduled;
+ 	struct sk_buff		*defer_list;
++#ifndef CONFIG_PREEMPT_RT
+ 	call_single_data_t	defer_csd;
++#else
++	struct work_struct	defer_work;
++#endif
+ };
+ 
+ static inline void input_queue_head_incr(struct softnet_data *sd)
+diff --git a/include/linux/preempt.h b/include/linux/preempt.h
+index 0df425bf9bd75..05338f00a5907 100644
+--- a/include/linux/preempt.h
++++ b/include/linux/preempt.h
+@@ -196,6 +196,20 @@ extern void preempt_count_sub(int val);
+ #define preempt_count_inc() preempt_count_add(1)
+ #define preempt_count_dec() preempt_count_sub(1)
+ 
++#ifdef CONFIG_PREEMPT_LAZY
++#define add_preempt_lazy_count(val)	do { preempt_lazy_count() += (val); } while (0)
++#define sub_preempt_lazy_count(val)	do { preempt_lazy_count() -= (val); } while (0)
++#define inc_preempt_lazy_count()	add_preempt_lazy_count(1)
++#define dec_preempt_lazy_count()	sub_preempt_lazy_count(1)
++#define preempt_lazy_count()		(current_thread_info()->preempt_lazy_count)
++#else
++#define add_preempt_lazy_count(val)	do { } while (0)
++#define sub_preempt_lazy_count(val)	do { } while (0)
++#define inc_preempt_lazy_count()	do { } while (0)
++#define dec_preempt_lazy_count()	do { } while (0)
++#define preempt_lazy_count()		(0)
++#endif
++
+ #ifdef CONFIG_PREEMPT_COUNT
+ 
+ #define preempt_disable() \
+@@ -204,6 +218,12 @@ do { \
+ 	barrier(); \
+ } while (0)
+ 
++#define preempt_lazy_disable() \
++do { \
++	inc_preempt_lazy_count(); \
++	barrier(); \
++} while (0)
++
+ #define sched_preempt_enable_no_resched() \
+ do { \
+ 	barrier(); \
+@@ -218,15 +238,21 @@ do { \
+ #define preempt_enable() \
+ do { \
+ 	barrier(); \
+-	if (unlikely(preempt_count_dec_and_test())) \
++	if (unlikely(preempt_count_dec_and_test())) { \
++		instrumentation_begin(); \
+ 		__preempt_schedule(); \
++		instrumentation_end(); \
++	} \
+ } while (0)
+ 
+ #define preempt_enable_notrace() \
+ do { \
+ 	barrier(); \
+-	if (unlikely(__preempt_count_dec_and_test())) \
++	if (unlikely(__preempt_count_dec_and_test())) { \
++		instrumentation_begin(); \
+ 		__preempt_schedule_notrace(); \
++		instrumentation_end(); \
++	} \
+ } while (0)
+ 
+ #define preempt_check_resched() \
+@@ -235,6 +261,18 @@ do { \
+ 		__preempt_schedule(); \
+ } while (0)
+ 
++/*
++ * open code preempt_check_resched() because it is not exported to modules and
++ * used by local_unlock() or bpf_enable_instrumentation().
++ */
++#define preempt_lazy_enable() \
++do { \
++	dec_preempt_lazy_count(); \
++	barrier(); \
++	if (should_resched(0)) \
++		__preempt_schedule(); \
++} while (0)
++
+ #else /* !CONFIG_PREEMPTION */
+ #define preempt_enable() \
+ do { \
+@@ -242,6 +280,12 @@ do { \
+ 	preempt_count_dec(); \
+ } while (0)
+ 
++#define preempt_lazy_enable() \
++do { \
++	dec_preempt_lazy_count(); \
++	barrier(); \
++} while (0)
++
+ #define preempt_enable_notrace() \
+ do { \
+ 	barrier(); \
+@@ -282,6 +326,9 @@ do { \
+ #define preempt_enable_notrace()		barrier()
+ #define preemptible()				0
+ 
++#define preempt_lazy_disable()			barrier()
++#define preempt_lazy_enable()			barrier()
++
+ #endif /* CONFIG_PREEMPT_COUNT */
+ 
+ #ifdef MODULE
+@@ -300,7 +347,7 @@ do { \
+ } while (0)
+ #define preempt_fold_need_resched() \
+ do { \
+-	if (tif_need_resched()) \
++	if (tif_need_resched_now()) \
+ 		set_preempt_need_resched(); \
+ } while (0)
+ 
+@@ -416,8 +463,15 @@ extern void migrate_enable(void);
+ 
+ #else
+ 
+-static inline void migrate_disable(void) { }
+-static inline void migrate_enable(void) { }
++static inline void migrate_disable(void)
++{
++	preempt_lazy_disable();
++}
++
++static inline void migrate_enable(void)
++{
++	preempt_lazy_enable();
++}
+ 
+ #endif /* CONFIG_SMP */
+ 
+diff --git a/include/linux/printk.h b/include/linux/printk.h
+index 8ef499ab3c1ed..b55662624ff87 100644
+--- a/include/linux/printk.h
++++ b/include/linux/printk.h
+@@ -139,6 +139,7 @@ void early_printk(const char *s, ...) { }
+ #endif
+ 
+ struct dev_printk_info;
++struct cons_write_context;
+ 
+ #ifdef CONFIG_PRINTK
+ asmlinkage __printf(4, 0)
+@@ -157,15 +158,17 @@ int _printk(const char *fmt, ...);
+  */
+ __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...);
+ 
+-extern void __printk_safe_enter(void);
+-extern void __printk_safe_exit(void);
++extern void __printk_safe_enter(unsigned long *flags);
++extern void __printk_safe_exit(unsigned long *flags);
++extern void __printk_deferred_enter(void);
++extern void __printk_deferred_exit(void);
+ /*
+  * The printk_deferred_enter/exit macros are available only as a hack for
+  * some code paths that need to defer all printk console printing. Interrupts
+  * must be disabled for the deferred duration.
+  */
+-#define printk_deferred_enter __printk_safe_enter
+-#define printk_deferred_exit __printk_safe_exit
++#define printk_deferred_enter() __printk_deferred_enter()
++#define printk_deferred_exit() __printk_deferred_exit()
+ 
+ /*
+  * Please don't use printk_ratelimit(), because it shares ratelimiting state
+@@ -192,6 +195,8 @@ void show_regs_print_info(const char *log_lvl);
+ extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
+ extern asmlinkage void dump_stack(void) __cold;
+ void printk_trigger_flush(void);
++extern void cons_atomic_flush(struct cons_write_context *printk_caller_wctxt,
++			      bool skip_unsafe);
+ #else
+ static inline __printf(1, 0)
+ int vprintk(const char *s, va_list args)
+@@ -271,6 +276,12 @@ static inline void dump_stack(void)
+ static inline void printk_trigger_flush(void)
+ {
+ }
++
++static inline void cons_atomic_flush(struct cons_write_context *printk_caller_wctxt,
++				     bool skip_unsafe)
++{
++}
++
+ #endif
+ 
+ #ifdef CONFIG_SMP
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 63d242164b1a9..593d7e7c2e4c7 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -303,6 +303,11 @@ extern long schedule_timeout_idle(long timeout);
+ asmlinkage void schedule(void);
+ extern void schedule_preempt_disabled(void);
+ asmlinkage void preempt_schedule_irq(void);
++
++extern void sched_submit_work(void);
++extern void sched_resume_work(void);
++extern void schedule_rtmutex(void);
++
+ #ifdef CONFIG_PREEMPT_RT
+  extern void schedule_rtlock(void);
+ #endif
+@@ -2059,6 +2064,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
+ 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
+ }
+ 
++#ifdef CONFIG_PREEMPT_LAZY
++static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
++{
++	set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
++}
++
++static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
++{
++	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
++}
++
++static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
++{
++	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
++}
++
++static inline int need_resched_lazy(void)
++{
++	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
++}
++
++static inline int need_resched_now(void)
++{
++	return test_thread_flag(TIF_NEED_RESCHED);
++}
++
++#else
++static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
++static inline int need_resched_lazy(void) { return 0; }
++
++static inline int need_resched_now(void)
++{
++	return test_thread_flag(TIF_NEED_RESCHED);
++}
++
++#endif
++
+ /*
+  * cond_resched() and cond_resched_lock(): latency reduction via
+  * explicit rescheduling in places that are safe. The return
+diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
+index 20099268fa257..b38ce53576000 100644
+--- a/include/linux/sched/signal.h
++++ b/include/linux/sched/signal.h
+@@ -22,6 +22,7 @@ struct sighand_struct {
+ 	refcount_t		count;
+ 	wait_queue_head_t	signalfd_wqh;
+ 	struct k_sigaction	action[_NSIG];
++	struct sigqueue		*sigqueue_cache;
+ };
+ 
+ /*
+@@ -135,7 +136,7 @@ struct signal_struct {
+ #ifdef CONFIG_POSIX_TIMERS
+ 
+ 	/* POSIX.1b Interval Timers */
+-	int			posix_timer_id;
++	unsigned int		next_posix_timer_id;
+ 	struct list_head	posix_timers;
+ 
+ 	/* ITIMER_REAL timer for the process */
+@@ -349,6 +350,7 @@ extern int send_sig(int, struct task_struct *, int);
+ extern int zap_other_threads(struct task_struct *p);
+ extern struct sigqueue *sigqueue_alloc(void);
+ extern void sigqueue_free(struct sigqueue *);
++extern void sigqueue_free_cached_entry(struct sigqueue *q);
+ extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type);
+ extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
+ 
+diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
+index 19376bee96676..9055a22992edc 100644
+--- a/include/linux/serial_8250.h
++++ b/include/linux/serial_8250.h
+@@ -125,6 +125,8 @@ struct uart_8250_port {
+ #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
+ 	unsigned char		msr_saved_flags;
+ 
++	bool			console_newline_needed;
++
+ 	struct uart_8250_dma	*dma;
+ 	const struct uart_8250_ops *ops;
+ 
+@@ -139,6 +141,9 @@ struct uart_8250_port {
+ 	/* Serial port overrun backoff */
+ 	struct delayed_work overrun_backoff;
+ 	u32 overrun_backoff_time_ms;
++
++	struct cons_write_context wctxt;
++	int cookie;
+ };
+ 
+ static inline struct uart_8250_port *up_to_u8250p(struct uart_port *up)
+@@ -178,8 +183,10 @@ void serial8250_tx_chars(struct uart_8250_port *up);
+ unsigned int serial8250_modem_status(struct uart_8250_port *up);
+ void serial8250_init_port(struct uart_8250_port *up);
+ void serial8250_set_defaults(struct uart_8250_port *up);
+-void serial8250_console_write(struct uart_8250_port *up, const char *s,
+-			      unsigned int count);
++bool serial8250_console_write_atomic(struct uart_8250_port *up,
++				     struct cons_write_context *wctxt);
++bool serial8250_console_write_thread(struct uart_8250_port *up,
++				     struct cons_write_context *wctxt);
+ int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
+ int serial8250_console_exit(struct uart_port *port);
+ 
+diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
+index c02646884fa83..c1acb81b81948 100644
+--- a/include/linux/thread_info.h
++++ b/include/linux/thread_info.h
+@@ -178,6 +178,26 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti
+ #endif /* !CONFIG_GENERIC_ENTRY */
+ 
+ #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
++# ifdef CONFIG_PREEMPT_LAZY
++
++static __always_inline bool tif_need_resched(void)
++{
++	return read_thread_flags() & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY);
++}
++
++static __always_inline bool tif_need_resched_now(void)
++{
++	return arch_test_bit(TIF_NEED_RESCHED,
++			     (unsigned long *)(&current_thread_info()->flags));
++}
++
++static __always_inline bool tif_need_resched_lazy(void)
++{
++	return arch_test_bit(TIF_NEED_RESCHED_LAZY,
++			     (unsigned long *)(&current_thread_info()->flags));
++}
++
++# else /* !CONFIG_PREEMPT_LAZY */
+ 
+ static __always_inline bool tif_need_resched(void)
+ {
+@@ -185,7 +205,38 @@ static __always_inline bool tif_need_resched(void)
+ 			     (unsigned long *)(&current_thread_info()->flags));
+ }
+ 
+-#else
++static __always_inline bool tif_need_resched_now(void)
++{
++	return tif_need_resched();
++}
++
++static __always_inline bool tif_need_resched_lazy(void)
++{
++	return false;
++}
++
++# endif /* CONFIG_PREEMPT_LAZY */
++#else /* !_ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
++# ifdef CONFIG_PREEMPT_LAZY
++
++static __always_inline bool tif_need_resched(void)
++{
++	return read_thread_flags() & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY);
++}
++
++static __always_inline bool tif_need_resched_now(void)
++{
++	return test_bit(TIF_NEED_RESCHED,
++			(unsigned long *)(&current_thread_info()->flags));
++}
++
++static __always_inline bool tif_need_resched_lazy(void)
++{
++	return test_bit(TIF_NEED_RESCHED_LAZY,
++			(unsigned long *)(&current_thread_info()->flags));
++}
++
++# else /* !CONFIG_PREEMPT_LAZY */
+ 
+ static __always_inline bool tif_need_resched(void)
+ {
+@@ -193,6 +244,17 @@ static __always_inline bool tif_need_resched(void)
+ 			(unsigned long *)(&current_thread_info()->flags));
+ }
+ 
++static __always_inline bool tif_need_resched_now(void)
++{
++	return tif_need_resched();
++}
++
++static __always_inline bool tif_need_resched_lazy(void)
++{
++	return false;
++}
++
++# endif /* !CONFIG_PREEMPT_LAZY */
+ #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
+ 
+ #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
+diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
+index 0e373222a6df8..47017fcf5481f 100644
+--- a/include/linux/trace_events.h
++++ b/include/linux/trace_events.h
+@@ -70,6 +70,7 @@ struct trace_entry {
+ 	unsigned char		flags;
+ 	unsigned char		preempt_count;
+ 	int			pid;
++	unsigned char		preempt_lazy_count;
+ };
+ 
+ #define TRACE_EVENT_TYPE_MAX						\
+@@ -158,9 +159,10 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry,
+ 						unsigned int trace_ctx)
+ {
+ 	entry->preempt_count		= trace_ctx & 0xff;
++	entry->preempt_lazy_count	= (trace_ctx >> 16) & 0xff;
+ 	entry->pid			= current->pid;
+ 	entry->type			= type;
+-	entry->flags =			trace_ctx >> 16;
++	entry->flags			= trace_ctx >> 24;
+ }
+ 
+ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status);
+@@ -171,7 +173,13 @@ enum trace_flag_type {
+ 	TRACE_FLAG_NEED_RESCHED		= 0x04,
+ 	TRACE_FLAG_HARDIRQ		= 0x08,
+ 	TRACE_FLAG_SOFTIRQ		= 0x10,
++#ifdef CONFIG_PREEMPT_LAZY
++	TRACE_FLAG_PREEMPT_RESCHED	= 0x00,
++	TRACE_FLAG_NEED_RESCHED_LAZY	= 0x20,
++#else
++	TRACE_FLAG_NEED_RESCHED_LAZY	= 0x00,
+ 	TRACE_FLAG_PREEMPT_RESCHED	= 0x20,
++#endif
+ 	TRACE_FLAG_NMI			= 0x40,
+ 	TRACE_FLAG_BH_OFF		= 0x80,
+ };
+diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
+index 3e8619c72f774..b4bc2828fa09f 100644
+--- a/include/trace/events/timer.h
++++ b/include/trace/events/timer.h
+@@ -158,7 +158,11 @@ DEFINE_EVENT(timer_class, timer_cancel,
+ 		{ HRTIMER_MODE_ABS_SOFT,	"ABS|SOFT"	},	\
+ 		{ HRTIMER_MODE_REL_SOFT,	"REL|SOFT"	},	\
+ 		{ HRTIMER_MODE_ABS_PINNED_SOFT,	"ABS|PINNED|SOFT" },	\
+-		{ HRTIMER_MODE_REL_PINNED_SOFT,	"REL|PINNED|SOFT" })
++		{ HRTIMER_MODE_REL_PINNED_SOFT,	"REL|PINNED|SOFT" },	\
++		{ HRTIMER_MODE_ABS_HARD,	"ABS|HARD" },		\
++		{ HRTIMER_MODE_REL_HARD,	"REL|HARD" },		\
++		{ HRTIMER_MODE_ABS_PINNED_HARD, "ABS|PINNED|HARD" },	\
++		{ HRTIMER_MODE_REL_PINNED_HARD,	"REL|PINNED|HARD" })
+ 
+ /**
+  * hrtimer_init - called when the hrtimer is initialized
+diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
+index c2f1fd95a8214..260c08efeb486 100644
+--- a/kernel/Kconfig.preempt
++++ b/kernel/Kconfig.preempt
+@@ -1,5 +1,11 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+ 
++config HAVE_PREEMPT_LAZY
++	bool
++
++config PREEMPT_LAZY
++	def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT
++
+ config PREEMPT_NONE_BUILD
+ 	bool
+ 
+diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
+index 5c7e9ba7cd6b2..e9139dfc1f0a8 100644
+--- a/kernel/debug/kdb/kdb_io.c
++++ b/kernel/debug/kdb/kdb_io.c
+@@ -576,6 +576,8 @@ static void kdb_msg_write(const char *msg, int msg_len)
+ 			continue;
+ 		if (c == dbg_io_ops->cons)
+ 			continue;
++		if (!c->write)
++			continue;
+ 		/*
+ 		 * Set oops_in_progress to encourage the console drivers to
+ 		 * disregard their internal spin locks: in the current calling
+diff --git a/kernel/entry/common.c b/kernel/entry/common.c
+index be61332c66b54..c6301e520d47b 100644
+--- a/kernel/entry/common.c
++++ b/kernel/entry/common.c
+@@ -155,7 +155,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
+ 
+ 		local_irq_enable_exit_to_user(ti_work);
+ 
+-		if (ti_work & _TIF_NEED_RESCHED)
++		if (ti_work & _TIF_NEED_RESCHED_MASK)
+ 			schedule();
+ 
+ 		if (ti_work & _TIF_UPROBE)
+@@ -386,7 +386,7 @@ void raw_irqentry_exit_cond_resched(void)
+ 		rcu_irq_exit_check_preempt();
+ 		if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
+ 			WARN_ON_ONCE(!on_thread_stack());
+-		if (need_resched())
++		if (should_resched(0))
+ 			preempt_schedule_irq();
+ 	}
+ }
+diff --git a/kernel/fork.c b/kernel/fork.c
+index ea332319dffea..d78954a3834cc 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1665,6 +1665,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
+ 	RCU_INIT_POINTER(tsk->sighand, sig);
+ 	if (!sig)
+ 		return -ENOMEM;
++	sig->sigqueue_cache = NULL;
+ 
+ 	refcount_set(&sig->count, 1);
+ 	spin_lock_irq(&current->sighand->siglock);
+@@ -1681,7 +1682,17 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
+ void __cleanup_sighand(struct sighand_struct *sighand)
+ {
+ 	if (refcount_dec_and_test(&sighand->count)) {
++		struct sigqueue *sigqueue = NULL;
++
+ 		signalfd_cleanup(sighand);
++		spin_lock_irq(&sighand->siglock);
++		if (sighand->sigqueue_cache) {
++			sigqueue = sighand->sigqueue_cache;
++			sighand->sigqueue_cache = NULL;
++		}
++		spin_unlock_irq(&sighand->siglock);
++
++		sigqueue_free_cached_entry(sigqueue);
+ 		/*
+ 		 * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it
+ 		 * without an RCU grace period, see __lock_task_sighand().
+diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
+index 0408aab80941b..ee063d28f94f8 100644
+--- a/kernel/ksysfs.c
++++ b/kernel/ksysfs.c
+@@ -167,6 +167,15 @@ KERNEL_ATTR_RO(vmcoreinfo);
+ 
+ #endif /* CONFIG_CRASH_CORE */
+ 
++#if defined(CONFIG_PREEMPT_RT)
++static ssize_t realtime_show(struct kobject *kobj,
++			     struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%d\n", 1);
++}
++KERNEL_ATTR_RO(realtime);
++#endif
++
+ /* whether file capabilities are enabled */
+ static ssize_t fscaps_show(struct kobject *kobj,
+ 				  struct kobj_attribute *attr, char *buf)
+@@ -259,6 +268,9 @@ static struct attribute * kernel_attrs[] = {
+ #ifndef CONFIG_TINY_RCU
+ 	&rcu_expedited_attr.attr,
+ 	&rcu_normal_attr.attr,
++#endif
++#ifdef CONFIG_PREEMPT_RT
++	&realtime_attr.attr,
+ #endif
+ 	NULL
+ };
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index 728f434de2bbf..479a9487edcc2 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -218,6 +218,11 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
+ 	return try_cmpxchg_acquire(&lock->owner, &old, new);
+ }
+ 
++static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock)
++{
++	return rt_mutex_cmpxchg_acquire(lock, NULL, current);
++}
++
+ static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
+ 						     struct task_struct *old,
+ 						     struct task_struct *new)
+@@ -297,6 +302,24 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
+ 
+ }
+ 
++static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock);
++
++static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock)
++{
++	/*
++	 * With debug enabled rt_mutex_cmpxchg trylock() will always fail,
++	 * which will unconditionally invoke sched_submit/resume_work() in
++	 * the slow path of __rt_mutex_lock() and __ww_rt_mutex_lock() even
++	 * in the non-contended case.
++	 *
++	 * Avoid that by using rt_mutex_slow_trylock() which is covered by
++	 * the debug code and can acquire a non-contended rtmutex. On
++	 * success the callsite avoids the sched_submit/resume_work()
++	 * dance.
++	 */
++	return rt_mutex_slowtrylock(lock);
++}
++
+ static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
+ 						     struct task_struct *old,
+ 						     struct task_struct *new)
+@@ -1555,7 +1578,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
+ 		raw_spin_unlock_irq(&lock->wait_lock);
+ 
+ 		if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
+-			schedule();
++			schedule_rtmutex();
+ 
+ 		raw_spin_lock_irq(&lock->wait_lock);
+ 		set_current_state(state);
+@@ -1584,7 +1607,7 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
+ 	WARN(1, "rtmutex deadlock detected\n");
+ 	while (1) {
+ 		set_current_state(TASK_INTERRUPTIBLE);
+-		schedule();
++		schedule_rtmutex();
+ 	}
+ }
+ 
+@@ -1679,6 +1702,12 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
+ 	unsigned long flags;
+ 	int ret;
+ 
++	/*
++	 * The task is about to sleep. Invoke sched_submit_work() before
++	 * blocking as that might take locks and corrupt tsk::pi_blocked_on.
++	 */
++	sched_submit_work();
++
+ 	/*
+ 	 * Technically we could use raw_spin_[un]lock_irq() here, but this can
+ 	 * be called in early boot if the cmpxchg() fast path is disabled
+@@ -1691,13 +1720,16 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
+ 	ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
+ 	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ 
++	sched_resume_work();
+ 	return ret;
+ }
+ 
+ static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
+ 					   unsigned int state)
+ {
+-	if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
++	lockdep_assert(!current->pi_blocked_on);
++
++	if (likely(rt_mutex_try_acquire(lock)))
+ 		return 0;
+ 
+ 	return rt_mutex_slowlock(lock, NULL, state);
+diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
+index c201aadb93017..5be92ca5afabc 100644
+--- a/kernel/locking/rwbase_rt.c
++++ b/kernel/locking/rwbase_rt.c
+@@ -72,15 +72,6 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
+ 	int ret;
+ 
+ 	raw_spin_lock_irq(&rtm->wait_lock);
+-	/*
+-	 * Allow readers, as long as the writer has not completely
+-	 * acquired the semaphore for write.
+-	 */
+-	if (atomic_read(&rwb->readers) != WRITER_BIAS) {
+-		atomic_inc(&rwb->readers);
+-		raw_spin_unlock_irq(&rtm->wait_lock);
+-		return 0;
+-	}
+ 
+ 	/*
+ 	 * Call into the slow lock path with the rtmutex->wait_lock
+@@ -140,10 +131,23 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
+ static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb,
+ 					    unsigned int state)
+ {
++	int ret;
++
++	lockdep_assert(!current->pi_blocked_on);
++
+ 	if (rwbase_read_trylock(rwb))
+ 		return 0;
+ 
+-	return __rwbase_read_lock(rwb, state);
++	/*
++	 * The task is about to sleep. For rwsems this submits work as that
++	 * might take locks and corrupt tsk::pi_blocked_on. Must be
++	 * explicit here because __rwbase_read_lock() cannot invoke
++	 * rt_mutex_slowlock(). NOP for rwlocks.
++	 */
++	rwbase_sched_submit_work();
++	ret = __rwbase_read_lock(rwb, state);
++	rwbase_sched_resume_work();
++	return ret;
+ }
+ 
+ static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb,
+@@ -239,7 +243,10 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
+ 	struct rt_mutex_base *rtm = &rwb->rtmutex;
+ 	unsigned long flags;
+ 
+-	/* Take the rtmutex as a first step */
++	/*
++	 * Take the rtmutex as a first step. For rwsem this will also
++	 * invoke sched_submit_work() to flush IO and workers.
++	 */
+ 	if (rwbase_rtmutex_lock_state(rtm, state))
+ 		return -EINTR;
+ 
+diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
+index 9eabd585ce7af..e304db9ebfd95 100644
+--- a/kernel/locking/rwsem.c
++++ b/kernel/locking/rwsem.c
+@@ -1415,6 +1415,12 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
+ #define rwbase_rtmutex_lock_state(rtm, state)		\
+ 	__rt_mutex_lock(rtm, state)
+ 
++#define rwbase_sched_submit_work()			\
++	sched_submit_work()
++
++#define rwbase_sched_resume_work()			\
++	sched_resume_work()
++
+ #define rwbase_rtmutex_slowlock_locked(rtm, state)	\
+ 	__rt_mutex_slowlock_locked(rtm, NULL, state)
+ 
+diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c
+index 48a19ed8486d8..9fe282cd145d9 100644
+--- a/kernel/locking/spinlock_rt.c
++++ b/kernel/locking/spinlock_rt.c
+@@ -37,6 +37,8 @@
+ 
+ static __always_inline void rtlock_lock(struct rt_mutex_base *rtm)
+ {
++	lockdep_assert(!current->pi_blocked_on);
++
+ 	if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
+ 		rtlock_slowlock(rtm);
+ }
+@@ -159,6 +161,9 @@ rwbase_rtmutex_lock_state(struct rt_mutex_base *rtm, unsigned int state)
+ 	return 0;
+ }
+ 
++static __always_inline void rwbase_sched_submit_work(void) { }
++static __always_inline void rwbase_sched_resume_work(void) { }
++
+ static __always_inline int
+ rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state)
+ {
+diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c
+index d1473c624105c..c7196de838edc 100644
+--- a/kernel/locking/ww_rt_mutex.c
++++ b/kernel/locking/ww_rt_mutex.c
+@@ -62,7 +62,7 @@ __ww_rt_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx,
+ 	}
+ 	mutex_acquire_nest(&rtm->dep_map, 0, 0, nest_lock, ip);
+ 
+-	if (likely(rt_mutex_cmpxchg_acquire(&rtm->rtmutex, NULL, current))) {
++	if (likely(rt_mutex_try_acquire(&rtm->rtmutex))) {
+ 		if (ww_ctx)
+ 			ww_mutex_set_context_fastpath(lock, ww_ctx);
+ 		return 0;
+diff --git a/kernel/panic.c b/kernel/panic.c
+index 5cfea8302d23a..190f7f2bc6cfd 100644
+--- a/kernel/panic.c
++++ b/kernel/panic.c
+@@ -275,6 +275,7 @@ static void panic_other_cpus_shutdown(bool crash_kexec)
+  */
+ void panic(const char *fmt, ...)
+ {
++	enum cons_prio prev_prio;
+ 	static char buf[1024];
+ 	va_list args;
+ 	long i, i_next = 0, len;
+@@ -322,6 +323,8 @@ void panic(const char *fmt, ...)
+ 	if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
+ 		panic_smp_self_stop();
+ 
++	prev_prio = cons_atomic_enter(CONS_PRIO_PANIC);
++
+ 	console_verbose();
+ 	bust_spinlocks(1);
+ 	va_start(args, fmt);
+@@ -382,6 +385,8 @@ void panic(const char *fmt, ...)
+ 	if (_crash_kexec_post_notifiers)
+ 		__crash_kexec(NULL);
+ 
++	cons_atomic_flush(NULL, true);
++
+ 	console_unblank();
+ 
+ 	/*
+@@ -406,6 +411,7 @@ void panic(const char *fmt, ...)
+ 		 * We can't use the "normal" timers since we just panicked.
+ 		 */
+ 		pr_emerg("Rebooting in %d seconds..\n", panic_timeout);
++		cons_atomic_flush(NULL, true);
+ 
+ 		for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {
+ 			touch_nmi_watchdog();
+@@ -424,6 +430,7 @@ void panic(const char *fmt, ...)
+ 		 */
+ 		if (panic_reboot_mode != REBOOT_UNDEFINED)
+ 			reboot_mode = panic_reboot_mode;
++		cons_atomic_flush(NULL, true);
+ 		emergency_restart();
+ 	}
+ #ifdef __sparc__
+@@ -436,12 +443,16 @@ void panic(const char *fmt, ...)
+ 	}
+ #endif
+ #if defined(CONFIG_S390)
++	cons_atomic_flush(NULL, true);
+ 	disabled_wait();
+ #endif
+ 	pr_emerg("---[ end Kernel panic - not syncing: %s ]---\n", buf);
+ 
+ 	/* Do not scroll important messages printed above */
+ 	suppress_printk = 1;
++
++	cons_atomic_exit(CONS_PRIO_PANIC, prev_prio);
++
+ 	local_irq_enable();
+ 	for (i = 0; ; i += PANIC_TIMER_STEP) {
+ 		touch_softlockup_watchdog();
+@@ -652,6 +663,10 @@ struct warn_args {
+ void __warn(const char *file, int line, void *caller, unsigned taint,
+ 	    struct pt_regs *regs, struct warn_args *args)
+ {
++	enum cons_prio prev_prio;
++
++	prev_prio = cons_atomic_enter(CONS_PRIO_EMERGENCY);
++
+ 	disable_trace_on_warning();
+ 
+ 	if (file)
+@@ -682,6 +697,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
+ 
+ 	/* Just a warning, don't kill lockdep. */
+ 	add_taint(taint, LOCKDEP_STILL_OK);
++
++	cons_atomic_exit(CONS_PRIO_EMERGENCY, prev_prio);
+ }
+ 
+ #ifndef __WARN_FLAGS
+diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
+index f5b388e810b9f..b36683bd2f821 100644
+--- a/kernel/printk/Makefile
++++ b/kernel/printk/Makefile
+@@ -1,6 +1,6 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+ obj-y	= printk.o
+-obj-$(CONFIG_PRINTK)	+= printk_safe.o
++obj-$(CONFIG_PRINTK)	+= printk_safe.o printk_nobkl.o
+ obj-$(CONFIG_A11Y_BRAILLE_CONSOLE)	+= braille.o
+ obj-$(CONFIG_PRINTK_INDEX)	+= index.o
+ 
+diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
+index 2a17704136f1d..6631fd70542f9 100644
+--- a/kernel/printk/internal.h
++++ b/kernel/printk/internal.h
+@@ -3,6 +3,8 @@
+  * internal.h - printk internal definitions
+  */
+ #include <linux/percpu.h>
++#include <linux/console.h>
++#include "printk_ringbuffer.h"
+ 
+ #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
+ void __init printk_sysctl_init(void);
+@@ -12,8 +14,13 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
+ #define printk_sysctl_init() do { } while (0)
+ #endif
+ 
+-#ifdef CONFIG_PRINTK
++#define con_printk(lvl, con, fmt, ...)				\
++	printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt),		\
++	       (con->flags & CON_NO_BKL) ? "" : "legacy ",	\
++	       (con->flags & CON_BOOT) ? "boot" : "",		\
++	       con->name, con->index, ##__VA_ARGS__)
+ 
++#ifdef CONFIG_PRINTK
+ #ifdef CONFIG_PRINTK_CALLER
+ #define PRINTK_PREFIX_MAX	48
+ #else
+@@ -35,6 +42,12 @@ enum printk_info_flags {
+ 	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
+ };
+ 
++extern struct printk_ringbuffer *prb;
++extern bool have_bkl_console;
++extern bool printk_threads_enabled;
++
++extern bool have_boot_console;
++
+ __printf(4, 0)
+ int vprintk_store(int facility, int level,
+ 		  const struct dev_printk_info *dev_info,
+@@ -45,28 +58,98 @@ __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
+ 
+ bool printk_percpu_data_ready(void);
+ 
++/*
++ * The printk_safe_enter()/_exit() macros mark code blocks using locks that
++ * would lead to deadlock if an interrupting context were to call printk()
++ * while the interrupted context was within such code blocks.
++ *
++ * When a CPU is in such a code block, an interrupting context calling
++ * printk() will only log the new message to the lockless ringbuffer and
++ * then trigger console printing using irqwork.
++ */
++
+ #define printk_safe_enter_irqsave(flags)	\
+ 	do {					\
+-		local_irq_save(flags);		\
+-		__printk_safe_enter();		\
++		__printk_safe_enter(&flags);	\
+ 	} while (0)
+ 
+ #define printk_safe_exit_irqrestore(flags)	\
+ 	do {					\
+-		__printk_safe_exit();		\
+-		local_irq_restore(flags);	\
++		__printk_safe_exit(&flags);	\
+ 	} while (0)
+ 
+ void defer_console_output(void);
+ 
+ u16 printk_parse_prefix(const char *text, int *level,
+ 			enum printk_info_flags *flags);
++
++u64 cons_read_seq(struct console *con);
++void cons_nobkl_cleanup(struct console *con);
++bool cons_nobkl_init(struct console *con);
++bool cons_alloc_percpu_data(struct console *con);
++void cons_kthread_create(struct console *con);
++void cons_wake_threads(void);
++void cons_force_seq(struct console *con, u64 seq);
++void console_bkl_kthread_create(void);
++
++/*
++ * Check if the given console is currently capable and allowed to print
++ * records. If the caller only works with certain types of consoles, the
++ * caller is responsible for checking the console type before calling
++ * this function.
++ */
++static inline bool console_is_usable(struct console *con, short flags)
++{
++	if (!(flags & CON_ENABLED))
++		return false;
++
++	if ((flags & CON_SUSPENDED))
++		return false;
++
++	/*
++	 * The usability of a console varies depending on whether
++	 * it is a NOBKL console or not.
++	 */
++
++	if (flags & CON_NO_BKL) {
++		if (have_boot_console)
++			return false;
++
++	} else {
++		if (!con->write)
++			return false;
++		/*
++		 * Console drivers may assume that per-cpu resources have
++		 * been allocated. So unless they're explicitly marked as
++		 * being able to cope (CON_ANYTIME) don't call them until
++		 * this CPU is officially up.
++		 */
++		if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME))
++			return false;
++	}
++
++	return true;
++}
++
++/**
++ * cons_kthread_wake - Wake up a printk thread
++ * @con:        Console to operate on
++ */
++static inline void cons_kthread_wake(struct console *con)
++{
++	rcuwait_wake_up(&con->rcuwait);
++}
++
+ #else
+ 
+ #define PRINTK_PREFIX_MAX	0
+ #define PRINTK_MESSAGE_MAX	0
+ #define PRINTKRB_RECORD_MAX	0
+ 
++static inline void cons_kthread_wake(struct console *con) { }
++static inline void cons_kthread_create(struct console *con) { }
++#define printk_threads_enabled	(false)
++
+ /*
+  * In !PRINTK builds we still export console_sem
+  * semaphore and some of console functions (console_unlock()/etc.), so
+@@ -76,8 +159,15 @@ u16 printk_parse_prefix(const char *text, int *level,
+ #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags)
+ 
+ static inline bool printk_percpu_data_ready(void) { return false; }
++static inline bool cons_nobkl_init(struct console *con) { return true; }
++static inline void cons_nobkl_cleanup(struct console *con) { }
++static inline bool console_is_usable(struct console *con, short flags) { return false; }
++static inline void cons_force_seq(struct console *con, u64 seq) { }
++
+ #endif /* CONFIG_PRINTK */
+ 
++extern bool have_boot_console;
++
+ /**
+  * struct printk_buffers - Buffers to read/format/output printk messages.
+  * @outbuf:	After formatting, contains text to output.
+@@ -103,3 +193,28 @@ struct printk_message {
+ 	u64			seq;
+ 	unsigned long		dropped;
+ };
++
++/**
++ * struct cons_context_data - console context data
++ * @wctxt:		Write context per priority level
++ * @pbufs:		Buffer for storing the text
++ *
++ * Used for early boot and for per CPU data.
++ *
++ * The write contexts are allocated to avoid having them on stack, e.g. in
++ * warn() or panic().
++ */
++struct cons_context_data {
++	struct cons_write_context	wctxt[CONS_PRIO_MAX];
++	struct printk_buffers		pbufs;
++};
++
++bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
++			     bool is_extended, bool may_supress);
++
++#ifdef CONFIG_PRINTK
++
++void console_prepend_dropped(struct printk_message *pmsg,
++			     unsigned long dropped);
++
++#endif
+diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
+index fd0c9f913940a..e2466366d4f84 100644
+--- a/kernel/printk/printk.c
++++ b/kernel/printk/printk.c
+@@ -442,6 +442,21 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
+ /* syslog_lock protects syslog_* variables and write access to clear_seq. */
+ static DEFINE_MUTEX(syslog_lock);
+ 
++/*
++ * Specifies if a BKL console was ever registered. Used to determine if the
++ * console lock/unlock dance is needed for console printing.
++ */
++bool have_bkl_console;
++
++/*
++ * Specifies if a boot console is registered. Used to determine if NOBKL
++ * consoles may be used since NOBKL consoles cannot synchronize with boot
++ * consoles.
++ */
++bool have_boot_console;
++
++static int unregister_console_locked(struct console *console);
++
+ #ifdef CONFIG_PRINTK
+ DECLARE_WAIT_QUEUE_HEAD(log_wait);
+ /* All 3 protected by @syslog_lock. */
+@@ -492,7 +507,7 @@ _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS,
+ 
+ static struct printk_ringbuffer printk_rb_dynamic;
+ 
+-static struct printk_ringbuffer *prb = &printk_rb_static;
++struct printk_ringbuffer *prb = &printk_rb_static;
+ 
+ /*
+  * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before
+@@ -696,9 +711,6 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
+ 	return len;
+ }
+ 
+-static bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
+-				    bool is_extended, bool may_supress);
+-
+ /* /dev/kmsg - userspace message inject/listen interface */
+ struct devkmsg_user {
+ 	atomic64_t seq;
+@@ -1100,7 +1112,19 @@ static inline void log_buf_add_cpu(void) {}
+ 
+ static void __init set_percpu_data_ready(void)
+ {
++	struct hlist_node *tmp;
++	struct console *con;
++
++	console_list_lock();
++
++	hlist_for_each_entry_safe(con, tmp, &console_list, node) {
++		if (!cons_alloc_percpu_data(con))
++			unregister_console_locked(con);
++	}
++
+ 	__printk_percpu_data_ready = true;
++
++	console_list_unlock();
+ }
+ 
+ static unsigned int __init add_to_rb(struct printk_ringbuffer *rb,
+@@ -2276,6 +2300,7 @@ asmlinkage int vprintk_emit(int facility, int level,
+ 			    const struct dev_printk_info *dev_info,
+ 			    const char *fmt, va_list args)
+ {
++	struct cons_write_context wctxt = { };
+ 	int printed_len;
+ 	bool in_sched = false;
+ 
+@@ -2296,16 +2321,25 @@ asmlinkage int vprintk_emit(int facility, int level,
+ 
+ 	printed_len = vprintk_store(facility, level, dev_info, fmt, args);
+ 
++	/*
++	 * The caller may be holding system-critical or
++	 * timing-sensitive locks. Disable preemption during
++	 * printing of all remaining records to all consoles so that
++	 * this context can return as soon as possible. Hopefully
++	 * another printk() caller will take over the printing.
++	 */
++	preempt_disable();
++
++	/*
++	 * Flush the non-BKL consoles. This only leads to direct atomic
++	 * printing for non-BKL consoles that do not have a printer
++	 * thread available. Otherwise the printer thread will perform
++	 * the printing.
++	 */
++	cons_atomic_flush(&wctxt, true);
++
+ 	/* If called from the scheduler, we can not call up(). */
+-	if (!in_sched) {
+-		/*
+-		 * The caller may be holding system-critical or
+-		 * timing-sensitive locks. Disable preemption during
+-		 * printing of all remaining records to all consoles so that
+-		 * this context can return as soon as possible. Hopefully
+-		 * another printk() caller will take over the printing.
+-		 */
+-		preempt_disable();
++	if (!in_sched && have_bkl_console && !IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ 		/*
+ 		 * Try to acquire and then immediately release the console
+ 		 * semaphore. The release will print out buffers. With the
+@@ -2314,10 +2348,15 @@ asmlinkage int vprintk_emit(int facility, int level,
+ 		 */
+ 		if (console_trylock_spinning())
+ 			console_unlock();
+-		preempt_enable();
+ 	}
+ 
+-	wake_up_klogd();
++	preempt_enable();
++
++	cons_wake_threads();
++	if (in_sched)
++		defer_console_output();
++	else
++		wake_up_klogd();
+ 	return printed_len;
+ }
+ EXPORT_SYMBOL(vprintk_emit);
+@@ -2556,10 +2595,26 @@ MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to hig
+  */
+ void suspend_console(void)
+ {
++	struct console *con;
++
+ 	if (!console_suspend_enabled)
+ 		return;
+ 	pr_info("Suspending console(s) (use no_console_suspend to debug)\n");
+ 	pr_flush(1000, true);
++
++	console_list_lock();
++	for_each_console(con)
++		console_srcu_write_flags(con, con->flags | CON_SUSPENDED);
++	console_list_unlock();
++
++	/*
++	 * Ensure that all SRCU list walks have completed. All printing
++	 * contexts must be able to see that they are suspended so that it
++	 * is guaranteed that all printing has stopped when this function
++	 * completes.
++	 */
++	synchronize_srcu(&console_srcu);
++
+ 	console_lock();
+ 	console_suspended = 1;
+ 	up_console_sem();
+@@ -2567,11 +2622,39 @@ void suspend_console(void)
+ 
+ void resume_console(void)
+ {
++	struct console *con;
++	short flags;
++	int cookie;
++
+ 	if (!console_suspend_enabled)
+ 		return;
+ 	down_console_sem();
+ 	console_suspended = 0;
+ 	console_unlock();
++
++	console_list_lock();
++	for_each_console(con)
++		console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED);
++	console_list_unlock();
++
++	/*
++	 * Ensure that all SRCU list walks have completed. All printing
++	 * contexts must be able to see they are no longer suspended so
++	 * that they are guaranteed to wake up and resume printing.
++	 */
++	synchronize_srcu(&console_srcu);
++
++	cookie = console_srcu_read_lock();
++	for_each_console_srcu(con) {
++		flags = console_srcu_read_flags(con);
++		if (flags & CON_NO_BKL)
++			cons_kthread_wake(con);
++	}
++	console_srcu_read_unlock(cookie);
++
++	if (IS_ENABLED(CONFIG_PREEMPT_RT) && have_bkl_console)
++		wake_up_interruptible(&log_wait);
++
+ 	pr_flush(1000, true);
+ }
+ 
+@@ -2586,7 +2669,7 @@ void resume_console(void)
+  */
+ static int console_cpu_notify(unsigned int cpu)
+ {
+-	if (!cpuhp_tasks_frozen) {
++	if (!cpuhp_tasks_frozen && have_bkl_console) {
+ 		/* If trylock fails, someone else is doing the printing */
+ 		if (console_trylock())
+ 			console_unlock();
+@@ -2661,33 +2744,6 @@ static bool abandon_console_lock_in_panic(void)
+ 	return atomic_read(&panic_cpu) != raw_smp_processor_id();
+ }
+ 
+-/*
+- * Check if the given console is currently capable and allowed to print
+- * records.
+- *
+- * Requires the console_srcu_read_lock.
+- */
+-static inline bool console_is_usable(struct console *con)
+-{
+-	short flags = console_srcu_read_flags(con);
+-
+-	if (!(flags & CON_ENABLED))
+-		return false;
+-
+-	if (!con->write)
+-		return false;
+-
+-	/*
+-	 * Console drivers may assume that per-cpu resources have been
+-	 * allocated. So unless they're explicitly marked as being able to
+-	 * cope (CON_ANYTIME) don't call them until this CPU is officially up.
+-	 */
+-	if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME))
+-		return false;
+-
+-	return true;
+-}
+-
+ static void __console_unlock(void)
+ {
+ 	console_locked = 0;
+@@ -2709,7 +2765,7 @@ static void __console_unlock(void)
+  * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated.
+  */
+ #ifdef CONFIG_PRINTK
+-static void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped)
++void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped)
+ {
+ 	struct printk_buffers *pbufs = pmsg->pbufs;
+ 	const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
+@@ -2741,7 +2797,8 @@ static void console_prepend_dropped(struct printk_message *pmsg, unsigned long d
+ 	pmsg->outbuf_len += len;
+ }
+ #else
+-#define console_prepend_dropped(pmsg, dropped)
++static inline void console_prepend_dropped(struct printk_message *pmsg,
++					   unsigned long dropped) { }
+ #endif /* CONFIG_PRINTK */
+ 
+ /*
+@@ -2763,8 +2820,8 @@ static void console_prepend_dropped(struct printk_message *pmsg, unsigned long d
+  * of @pmsg are valid. (See the documentation of struct printk_message
+  * for information about the @pmsg fields.)
+  */
+-static bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
+-				    bool is_extended, bool may_suppress)
++bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
++			     bool is_extended, bool may_suppress)
+ {
+ 	static int panic_console_dropped;
+ 
+@@ -2933,9 +2990,14 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
+ 
+ 		cookie = console_srcu_read_lock();
+ 		for_each_console_srcu(con) {
++			short flags = console_srcu_read_flags(con);
+ 			bool progress;
+ 
+-			if (!console_is_usable(con))
++			/* console_flush_all() is only for legacy consoles. */
++			if (flags & CON_NO_BKL)
++				continue;
++
++			if (!console_is_usable(con, flags))
+ 				continue;
+ 			any_usable = true;
+ 
+@@ -2973,30 +3035,13 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
+ 	return false;
+ }
+ 
+-/**
+- * console_unlock - unblock the console subsystem from printing
+- *
+- * Releases the console_lock which the caller holds to block printing of
+- * the console subsystem.
+- *
+- * While the console_lock was held, console output may have been buffered
+- * by printk().  If this is the case, console_unlock(); emits
+- * the output prior to releasing the lock.
+- *
+- * console_unlock(); may be called from any context.
+- */
+-void console_unlock(void)
++static u64 console_flush_and_unlock(void)
+ {
+ 	bool do_cond_resched;
+ 	bool handover;
+ 	bool flushed;
+ 	u64 next_seq;
+ 
+-	if (console_suspended) {
+-		up_console_sem();
+-		return;
+-	}
+-
+ 	/*
+ 	 * Console drivers are called with interrupts disabled, so
+ 	 * @console_may_schedule should be cleared before; however, we may
+@@ -3033,6 +3078,39 @@ void console_unlock(void)
+ 		 * fails, another context is already handling the printing.
+ 		 */
+ 	} while (prb_read_valid(prb, next_seq, NULL) && console_trylock());
++
++	return next_seq;
++}
++
++/**
++ * console_unlock - unblock the console subsystem from printing
++ *
++ * Releases the console_lock which the caller holds to block printing of
++ * the console subsystem.
++ *
++ * While the console_lock was held, console output may have been buffered
++ * by printk().  If this is the case, console_unlock(); emits
++ * the output prior to releasing the lock.
++ *
++ * console_unlock(); may be called from any context.
++ */
++void console_unlock(void)
++{
++	if (console_suspended) {
++		up_console_sem();
++		return;
++	}
++
++	/*
++	 * PREEMPT_RT relies on kthread and atomic consoles for printing.
++	 * It never attempts to print from console_unlock().
++	 */
++	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
++		__console_unlock();
++		return;
++	}
++
++	console_flush_and_unlock();
+ }
+ EXPORT_SYMBOL(console_unlock);
+ 
+@@ -3057,6 +3135,9 @@ void console_unblank(void)
+ 	struct console *c;
+ 	int cookie;
+ 
++	if (!have_bkl_console)
++		return;
++
+ 	/*
+ 	 * Stop console printing because the unblank() callback may
+ 	 * assume the console is not within its write() callback.
+@@ -3065,6 +3146,10 @@ void console_unblank(void)
+ 	 * In that case, attempt a trylock as best-effort.
+ 	 */
+ 	if (oops_in_progress) {
++		/* Semaphores are not NMI-safe. */
++		if (in_nmi())
++			return;
++
+ 		if (down_trylock_console_sem() != 0)
+ 			return;
+ 	} else
+@@ -3094,23 +3179,46 @@ void console_unblank(void)
+  */
+ void console_flush_on_panic(enum con_flush_mode mode)
+ {
++	struct console *c;
++	short flags;
++	int cookie;
++	u64 seq;
++
++	seq = prb_first_valid_seq(prb);
++
++	/*
++	 * Safely flush the atomic consoles before trying to flush any
++	 * BKL/legacy consoles.
++	 */
++	if (mode == CONSOLE_REPLAY_ALL) {
++		cookie = console_srcu_read_lock();
++		for_each_console_srcu(c) {
++			flags = console_srcu_read_flags(c);
++			if (flags & CON_NO_BKL)
++				cons_force_seq(c, seq);
++		}
++		console_srcu_read_unlock(cookie);
++	}
++	cons_atomic_flush(NULL, true);
++
++	if (!have_bkl_console)
++		return;
++
+ 	/*
+ 	 * If someone else is holding the console lock, trylock will fail
+ 	 * and may_schedule may be set.  Ignore and proceed to unlock so
+ 	 * that messages are flushed out.  As this can be called from any
+ 	 * context and we don't want to get preempted while flushing,
+ 	 * ensure may_schedule is cleared.
++	 *
++	 * Since semaphores are not NMI-safe, the console lock must be
++	 * ignored if the panic is in NMI context.
+ 	 */
+-	console_trylock();
++	if (!in_nmi())
++		console_trylock();
+ 	console_may_schedule = 0;
+ 
+ 	if (mode == CONSOLE_REPLAY_ALL) {
+-		struct console *c;
+-		int cookie;
+-		u64 seq;
+-
+-		seq = prb_first_valid_seq(prb);
+-
+ 		cookie = console_srcu_read_lock();
+ 		for_each_console_srcu(c) {
+ 			/*
+@@ -3122,7 +3230,8 @@ void console_flush_on_panic(enum con_flush_mode mode)
+ 		}
+ 		console_srcu_read_unlock(cookie);
+ 	}
+-	console_unlock();
++	if (!in_nmi())
++		console_unlock();
+ }
+ 
+ /*
+@@ -3179,13 +3288,118 @@ EXPORT_SYMBOL(console_stop);
+ 
+ void console_start(struct console *console)
+ {
++	short flags;
++
+ 	console_list_lock();
+ 	console_srcu_write_flags(console, console->flags | CON_ENABLED);
++	flags = console->flags;
+ 	console_list_unlock();
++
++	/*
++	 * Ensure that all SRCU list walks have completed. The related
++	 * printing context must be able to see it is enabled so that
++	 * it is guaranteed to wake up and resume printing.
++	 */
++	synchronize_srcu(&console_srcu);
++
++	if (flags & CON_NO_BKL)
++		cons_kthread_wake(console);
++	else if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		wake_up_interruptible(&log_wait);
++
+ 	__pr_flush(console, 1000, true);
+ }
+ EXPORT_SYMBOL(console_start);
+ 
++static struct task_struct *console_bkl_kthread;
++
++static bool printer_should_wake(u64 seq)
++{
++	bool available = false;
++	struct console *con;
++	int cookie;
++
++	if (kthread_should_stop())
++		return true;
++
++	cookie = console_srcu_read_lock();
++	for_each_console_srcu(con) {
++		short flags = console_srcu_read_flags(con);
++
++		if (flags & CON_NO_BKL)
++			continue;
++		if (!console_is_usable(con, flags))
++			continue;
++		/*
++		 * It is safe to read @seq because only this
++		 * thread context updates @seq.
++		 */
++		if (prb_read_valid(prb, con->seq, NULL)) {
++			available = true;
++			break;
++		}
++	}
++	console_srcu_read_unlock(cookie);
++
++	return available;
++}
++
++static int console_bkl_kthread_func(void *unused)
++{
++	u64 seq = 0;
++	int error;
++
++	for (;;) {
++		error = wait_event_interruptible(log_wait, printer_should_wake(seq));
++
++		if (kthread_should_stop())
++			break;
++
++		if (error)
++			continue;
++
++		console_lock();
++		if (console_suspended)
++			up_console_sem();
++		else
++			seq = console_flush_and_unlock();
++	}
++	return 0;
++}
++
++void console_bkl_kthread_create(void)
++{
++	struct task_struct *kt;
++	struct console *c;
++
++	lockdep_assert_held(&console_mutex);
++
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		return;
++
++	if (!printk_threads_enabled || console_bkl_kthread)
++		return;
++
++	for_each_console(c) {
++		if (c->flags & CON_BOOT)
++			return;
++	}
++
++	kt = kthread_run(console_bkl_kthread_func, NULL, "pr/bkl");
++	if (IS_ERR(kt)) {
++		pr_err("unable to start BKL printing thread\n");
++		return;
++	}
++
++	console_bkl_kthread = kt;
++
++	/*
++	 * It is important that console printing threads are scheduled
++	 * shortly after a printk call and with generous runtime budgets.
++	 */
++	sched_set_normal(console_bkl_kthread, -20);
++}
++
+ static int __read_mostly keep_bootcon;
+ 
+ static int __init keep_bootcon_setup(char *str)
+@@ -3269,11 +3483,6 @@ static void try_enable_default_console(struct console *newcon)
+ 		newcon->flags |= CON_CONSDEV;
+ }
+ 
+-#define con_printk(lvl, con, fmt, ...)			\
+-	printk(lvl pr_fmt("%sconsole [%s%d] " fmt),	\
+-	       (con->flags & CON_BOOT) ? "boot" : "",	\
+-	       con->name, con->index, ##__VA_ARGS__)
+-
+ static void console_init_seq(struct console *newcon, bool bootcon_registered)
+ {
+ 	struct console *con;
+@@ -3338,8 +3547,6 @@ static void console_init_seq(struct console *newcon, bool bootcon_registered)
+ #define console_first()				\
+ 	hlist_entry(console_list.first, struct console, node)
+ 
+-static int unregister_console_locked(struct console *console);
+-
+ /*
+  * The console driver calls this routine during kernel initialization
+  * to register the console printing procedure with printk() and to
+@@ -3431,6 +3638,16 @@ void register_console(struct console *newcon)
+ 	newcon->dropped = 0;
+ 	console_init_seq(newcon, bootcon_registered);
+ 
++	if (!(newcon->flags & CON_NO_BKL)) {
++		have_bkl_console = true;
++		console_bkl_kthread_create();
++	} else if (!cons_nobkl_init(newcon)) {
++		goto unlock;
++	}
++
++	if (newcon->flags & CON_BOOT)
++		have_boot_console = true;
++
+ 	/*
+ 	 * Put this console in the list - keep the
+ 	 * preferred driver at the head of the list.
+@@ -3474,6 +3691,9 @@ void register_console(struct console *newcon)
+ 			if (con->flags & CON_BOOT)
+ 				unregister_console_locked(con);
+ 		}
++
++		/* All boot consoles have been unregistered. */
++		have_boot_console = false;
+ 	}
+ unlock:
+ 	console_list_unlock();
+@@ -3483,11 +3703,13 @@ EXPORT_SYMBOL(register_console);
+ /* Must be called under console_list_lock(). */
+ static int unregister_console_locked(struct console *console)
+ {
++	struct console *c;
++	bool is_boot_con;
+ 	int res;
+ 
+ 	lockdep_assert_console_list_lock_held();
+ 
+-	con_printk(KERN_INFO, console, "disabled\n");
++	is_boot_con = console->flags & CON_BOOT;
+ 
+ 	res = _braille_unregister_console(console);
+ 	if (res < 0)
+@@ -3495,12 +3717,13 @@ static int unregister_console_locked(struct console *console)
+ 	if (res > 0)
+ 		return 0;
+ 
+-	/* Disable it unconditionally */
+-	console_srcu_write_flags(console, console->flags & ~CON_ENABLED);
+-
+ 	if (!console_is_registered_locked(console))
+ 		return -ENODEV;
+ 
++	console_srcu_write_flags(console, console->flags & ~CON_ENABLED);
++
++	con_printk(KERN_INFO, console, "disabled\n");
++
+ 	hlist_del_init_rcu(&console->node);
+ 
+ 	/*
+@@ -3522,11 +3745,23 @@ static int unregister_console_locked(struct console *console)
+ 	 */
+ 	synchronize_srcu(&console_srcu);
+ 
++	if (console->flags & CON_NO_BKL)
++		cons_nobkl_cleanup(console);
++
+ 	console_sysfs_notify();
+ 
+ 	if (console->exit)
+ 		res = console->exit(console);
+ 
++	/*
++	 * Each time a boot console unregisters, try to start up the printing
++	 * threads. They will only start if this was the last boot console.
++	 */
++	if (is_boot_con) {
++		for_each_console(c)
++			cons_kthread_create(c);
++	}
++
+ 	return res;
+ }
+ 
+@@ -3688,31 +3923,36 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
+ 
+ 		/*
+ 		 * Hold the console_lock to guarantee safe access to
+-		 * console->seq and to prevent changes to @console_suspended
+-		 * until all consoles have been processed.
++		 * console->seq.
+ 		 */
+ 		console_lock();
+ 
+ 		cookie = console_srcu_read_lock();
+ 		for_each_console_srcu(c) {
++			short flags;
++
+ 			if (con && con != c)
+ 				continue;
+-			if (!console_is_usable(c))
++
++			flags = console_srcu_read_flags(c);
++
++			if (!console_is_usable(c, flags))
+ 				continue;
++
++			/*
++			 * Since the console is locked, use this opportunity
++			 * to update console->seq for NOBKL consoles.
++			 */
++			if (flags & CON_NO_BKL)
++				c->seq = cons_read_seq(c);
++
+ 			printk_seq = c->seq;
+ 			if (printk_seq < seq)
+ 				diff += seq - printk_seq;
+ 		}
+ 		console_srcu_read_unlock(cookie);
+ 
+-		/*
+-		 * If consoles are suspended, it cannot be expected that they
+-		 * make forward progress, so timeout immediately. @diff is
+-		 * still used to return a valid flush status.
+-		 */
+-		if (console_suspended)
+-			remaining = 0;
+-		else if (diff != last_diff && reset_on_progress)
++		if (diff != last_diff && reset_on_progress)
+ 			remaining = timeout_ms;
+ 
+ 		console_unlock();
+@@ -3770,9 +4010,17 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
+ 	int pending = this_cpu_xchg(printk_pending, 0);
+ 
+ 	if (pending & PRINTK_PENDING_OUTPUT) {
+-		/* If trylock fails, someone else is doing the printing */
+-		if (console_trylock())
+-			console_unlock();
++		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
++			/* The BKL thread waits on @log_wait. */
++			pending |= PRINTK_PENDING_WAKEUP;
++		} else {
++			/*
++			 * If trylock fails, some other context
++			 * will do the printing.
++			 */
++			if (console_trylock())
++				console_unlock();
++		}
+ 	}
+ 
+ 	if (pending & PRINTK_PENDING_WAKEUP)
+@@ -3807,33 +4055,58 @@ static void __wake_up_klogd(int val)
+ 	preempt_enable();
+ }
+ 
++/**
++ * wake_up_klogd - Wake kernel logging daemon
++ *
++ * Use this function when new records have been added to the ringbuffer
++ * and the console printing for those records is handled elsewhere. In
++ * this case only the logging daemon needs to be woken.
++ *
++ * Context: Any context.
++ */
+ void wake_up_klogd(void)
+ {
+ 	__wake_up_klogd(PRINTK_PENDING_WAKEUP);
+ }
+ 
++/**
++ * defer_console_output - Wake kernel logging daemon and trigger
++ *	console printing in a deferred context
++ *
++ * Use this function when new records have been added to the ringbuffer
++ * but the current context is unable to perform the console printing.
++ * This function also wakes the logging daemon.
++ *
++ * Context: Any context.
++ */
+ void defer_console_output(void)
+ {
++	int val = PRINTK_PENDING_WAKEUP;
++
+ 	/*
+ 	 * New messages may have been added directly to the ringbuffer
+ 	 * using vprintk_store(), so wake any waiters as well.
+ 	 */
+-	__wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT);
++	if (have_bkl_console)
++		val |= PRINTK_PENDING_OUTPUT;
++	__wake_up_klogd(val);
+ }
+ 
+ void printk_trigger_flush(void)
+ {
++	struct cons_write_context wctxt = { };
++
++	preempt_disable();
++	cons_atomic_flush(&wctxt, true);
++	preempt_enable();
++
++	cons_wake_threads();
+ 	defer_console_output();
+ }
+ 
+ int vprintk_deferred(const char *fmt, va_list args)
+ {
+-	int r;
+-
+-	r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
+-	defer_console_output();
+-
+-	return r;
++	return vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
+ }
+ 
+ int _printk_deferred(const char *fmt, ...)
+diff --git a/kernel/printk/printk_nobkl.c b/kernel/printk/printk_nobkl.c
+new file mode 100644
+index 0000000000000..e0b818a4f8b38
+--- /dev/null
++++ b/kernel/printk/printk_nobkl.c
+@@ -0,0 +1,1825 @@
++// SPDX-License-Identifier: GPL-2.0-only
++// Copyright (C) 2022 Linutronix GmbH, John Ogness
++// Copyright (C) 2022 Intel, Thomas Gleixner
++
++#include <linux/kernel.h>
++#include <linux/console.h>
++#include <linux/delay.h>
++#include <linux/kthread.h>
++#include <linux/slab.h>
++#include <linux/syscore_ops.h>
++#include "printk_ringbuffer.h"
++#include "internal.h"
++/*
++ * Printk implementation for consoles that do not depend on the BKL style
++ * console_lock mechanism.
++ *
++ * Console is locked on a CPU when state::locked is set and state:cpu ==
++ * current CPU. This is valid for the current execution context.
++ *
++ * Nesting execution contexts on the same CPU can carefully take over
++ * if the driver allows reentrancy via state::unsafe = false. When the
++ * interrupted context resumes it checks the state before entering
++ * an unsafe region and aborts the operation if it detects a takeover.
++ *
++ * In case of panic or emergency the nesting context can take over the
++ * console forcefully. The write callback is then invoked with the unsafe
++ * flag set in the write context data, which allows the driver side to avoid
++ * locks and to evaluate the driver state so it can use an emergency path
++ * or repair the state instead of blindly assuming that it works.
++ *
++ * If the interrupted context touches the assigned record buffer after
++ * takeover, it does not cause harm because at the same execution level
++ * there is no concurrency on the same CPU. A threaded printer always has
++ * its own record buffer so it can never interfere with any of the per CPU
++ * record buffers.
++ *
++ * A concurrent writer on a different CPU can request to take over the
++ * console by:
++ *
++ *	1) Carefully writing the desired state into state[REQ]
++ *	   if there is no same or higher priority request pending.
++ *	   This locks state[REQ] except for higher priority
++ *	   waiters.
++ *
++ *	2) Setting state[CUR].req_prio unless a same or higher
++ *	   priority waiter won the race.
++ *
++ *	3) Carefully spin on state[CUR] until that is locked with the
++ *	   expected state. When the state is not the expected one then it
++ *	   has to verify that state[REQ] is still the same and that
++ *	   state[CUR] has not been taken over or unlocked.
++ *
++ *      The unlocker hands over to state[REQ], but only if state[CUR]
++ *	matches.
++ *
++ * In case that the owner does not react on the request and does not make
++ * observable progress, the waiter will timeout and can then decide to do
++ * a hostile takeover.
++ */
++
++#define copy_full_state(_dst, _src)	do { _dst = _src; } while (0)
++#define copy_bit_state(_dst, _src)	do { _dst.bits = _src.bits; } while (0)
++
++#ifdef CONFIG_64BIT
++#define copy_seq_state64(_dst, _src)	do { _dst.seq = _src.seq; } while (0)
++#else
++#define copy_seq_state64(_dst, _src)	do { } while (0)
++#endif
++
++enum state_selector {
++	CON_STATE_CUR,
++	CON_STATE_REQ,
++};
++
++/**
++ * cons_state_set - Helper function to set the console state
++ * @con:	Console to update
++ * @which:	Selects real state or handover state
++ * @new:	The new state to write
++ *
++ * Only to be used when the console is not yet or no longer visible in the
++ * system. Otherwise use cons_state_try_cmpxchg().
++ */
++static inline void cons_state_set(struct console *con, enum state_selector which,
++				  struct cons_state *new)
++{
++	atomic_long_set(&ACCESS_PRIVATE(con, atomic_state[which]), new->atom);
++}
++
++/**
++ * cons_state_read - Helper function to read the console state
++ * @con:	Console to update
++ * @which:	Selects real state or handover state
++ * @state:	The state to store the result
++ */
++static inline void cons_state_read(struct console *con, enum state_selector which,
++				   struct cons_state *state)
++{
++	state->atom = atomic_long_read(&ACCESS_PRIVATE(con, atomic_state[which]));
++}
++
++/**
++ * cons_state_try_cmpxchg() - Helper function for atomic_long_try_cmpxchg() on console state
++ * @con:	Console to update
++ * @which:	Selects real state or handover state
++ * @old:	Old/expected state
++ * @new:	New state
++ *
++ * Returns: True on success, false on fail
++ */
++static inline bool cons_state_try_cmpxchg(struct console *con,
++					  enum state_selector which,
++					  struct cons_state *old,
++					  struct cons_state *new)
++{
++	return atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, atomic_state[which]),
++				       &old->atom, new->atom);
++}
++
++/**
++ * cons_state_full_match - Check whether the full state matches
++ * @cur:	The state to check
++ * @prev:	The previous state
++ *
++ * Returns: True if matching, false otherwise.
++ *
++ * Check the full state including state::seq on 64bit. For take over
++ * detection.
++ */
++static inline bool cons_state_full_match(struct cons_state cur,
++					 struct cons_state prev)
++{
++	/*
++	 * req_prio can be set by a concurrent writer for friendly
++	 * handover. Ignore it in the comparison.
++	 */
++	cur.req_prio = prev.req_prio;
++	return cur.atom == prev.atom;
++}
++
++/**
++ * cons_state_bits_match - Check for matching state bits
++ * @cur:	The state to check
++ * @prev:	The previous state
++ *
++ * Returns: True if state matches, false otherwise.
++ *
++ * Contrary to cons_state_full_match this checks only the bits and ignores
++ * a sequence change on 64bits. On 32bit the two functions are identical.
++ */
++static inline bool cons_state_bits_match(struct cons_state cur, struct cons_state prev)
++{
++	/*
++	 * req_prio can be set by a concurrent writer for friendly
++	 * handover. Ignore it in the comparison.
++	 */
++	cur.req_prio = prev.req_prio;
++	return cur.bits == prev.bits;
++}
++
++/**
++ * cons_check_panic - Check whether a remote CPU is in panic
++ *
++ * Returns: True if a remote CPU is in panic, false otherwise.
++ */
++static inline bool cons_check_panic(void)
++{
++	unsigned int pcpu = atomic_read(&panic_cpu);
++
++	return pcpu != PANIC_CPU_INVALID && pcpu != smp_processor_id();
++}
++
++static struct cons_context_data early_cons_ctxt_data __initdata;
++
++/**
++ * cons_context_set_pbufs - Set the output text buffer for the current context
++ * @ctxt:	Pointer to the acquire context
++ *
++ * Buffer selection:
++ *   1) Early boot uses the global (initdata) buffer
++ *   2) Printer threads use the dynamically allocated per-console buffers
++ *   3) All other contexts use the per CPU buffers
++ *
++ * This guarantees that there is no concurrency on the output records ever.
++ * Early boot and per CPU nesting is not a problem. The takeover logic
++ * tells the interrupted context that the buffer has been overwritten.
++ *
++ * There are two critical regions that matter:
++ *
++ * 1) Context is filling the buffer with a record. After interruption
++ *    it continues to sprintf() the record and before it goes to
++ *    write it out, it checks the state, notices the takeover, discards
++ *    the content and backs out.
++ *
++ * 2) Context is in a unsafe critical region in the driver. After
++ *    interruption it might read overwritten data from the output
++ *    buffer. When it leaves the critical region it notices and backs
++ *    out. Hostile takeovers in driver critical regions are best effort
++ *    and there is not much that can be done about that.
++ */
++static __ref void cons_context_set_pbufs(struct cons_context *ctxt)
++{
++	struct console *con = ctxt->console;
++
++	/* Thread context or early boot? */
++	if (ctxt->thread)
++		ctxt->pbufs = con->thread_pbufs;
++	else if (!con->pcpu_data)
++		ctxt->pbufs = &early_cons_ctxt_data.pbufs;
++	else
++		ctxt->pbufs = &(this_cpu_ptr(con->pcpu_data)->pbufs);
++}
++
++/**
++ * cons_seq_init - Helper function to initialize the console sequence
++ * @con:	Console to work on
++ *
++ * Set @con->atomic_seq to the starting record, or if that record no
++ * longer exists, the oldest available record. For init only. Do not
++ * use for runtime updates.
++ */
++static void cons_seq_init(struct console *con)
++{
++	u32 seq = (u32)max_t(u64, con->seq, prb_first_valid_seq(prb));
++#ifdef CONFIG_64BIT
++	struct cons_state state;
++
++	cons_state_read(con, CON_STATE_CUR, &state);
++	state.seq = seq;
++	cons_state_set(con, CON_STATE_CUR, &state);
++#else
++	atomic_set(&ACCESS_PRIVATE(con, atomic_seq), seq);
++#endif
++}
++
++/**
++ * cons_force_seq - Force a specified sequence number for a console
++ * @con:	Console to work on
++ * @seq:	Sequence number to force
++ *
++ * This function is only intended to be used in emergency situations. In
++ * particular: console_flush_on_panic(CONSOLE_REPLAY_ALL)
++ */
++void cons_force_seq(struct console *con, u64 seq)
++{
++#ifdef CONFIG_64BIT
++	struct cons_state old;
++	struct cons_state new;
++
++	do {
++		cons_state_read(con, CON_STATE_CUR, &old);
++		copy_bit_state(new, old);
++		new.seq = seq;
++	} while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new));
++#else
++	atomic_set(&ACCESS_PRIVATE(con, atomic_seq), seq);
++#endif
++}
++
++static inline u64 cons_expand_seq(u64 seq)
++{
++	u64 rbseq;
++
++	/*
++	 * The provided sequence is only the lower 32bits of the ringbuffer
++	 * sequence. It needs to be expanded to 64bit. Get the next sequence
++	 * number from the ringbuffer and fold it.
++	 */
++	rbseq = prb_next_seq(prb);
++	seq = rbseq - ((u32)rbseq - (u32)seq);
++
++	return seq;
++}
++
++/**
++ * cons_read_seq - Read the current console sequence
++ * @con:	Console to read the sequence of
++ *
++ * Returns:	Sequence number of the next record to print on @con.
++ */
++u64 cons_read_seq(struct console *con)
++{
++	u64 seq;
++#ifdef CONFIG_64BIT
++	struct cons_state state;
++
++	cons_state_read(con, CON_STATE_CUR, &state);
++	seq = state.seq;
++#else
++	seq = atomic_read(&ACCESS_PRIVATE(con, atomic_seq));
++#endif
++	return cons_expand_seq(seq);
++}
++
++/**
++ * cons_context_set_seq - Setup the context with the next sequence to print
++ * @ctxt:	Pointer to an acquire context that contains
++ *		all information about the acquire mode
++ *
++ * On return the retrieved sequence number is stored in ctxt->oldseq.
++ *
++ * The sequence number is safe in forceful takeover situations.
++ *
++ * Either the writer succeeded to update before it got interrupted
++ * or it failed. In the latter case the takeover will print the
++ * same line again.
++ *
++ * The sequence is only the lower 32bits of the ringbuffer sequence. The
++ * ringbuffer must be 2^31 records ahead to get out of sync. This needs
++ * some care when starting a console, i.e setting the sequence to 0 is
++ * wrong. It has to be set to the oldest valid sequence in the ringbuffer
++ * as that cannot be more than 2^31 records away
++ *
++ * On 64bit the 32bit sequence is part of console::state, which is saved
++ * in @ctxt->state. This prevents the 32bit update race.
++ */
++static void cons_context_set_seq(struct cons_context *ctxt)
++{
++#ifdef CONFIG_64BIT
++	ctxt->oldseq = ctxt->state.seq;
++#else
++	ctxt->oldseq = atomic_read(&ACCESS_PRIVATE(ctxt->console, atomic_seq));
++#endif
++	ctxt->oldseq = cons_expand_seq(ctxt->oldseq);
++	ctxt->newseq = ctxt->oldseq;
++}
++
++/**
++ * cons_seq_try_update - Try to update the console sequence number
++ * @ctxt:	Pointer to an acquire context that contains
++ *		all information about the acquire mode
++ *
++ * Returns:	True if the console sequence was updated, false otherwise.
++ *
++ * Internal helper as the logic is different on 32bit and 64bit.
++ *
++ * On 32 bit the sequence is separate from state and therefore
++ * subject to a subtle race in the case of hostile takeovers.
++ *
++ * On 64 bit the sequence is part of the state and therefore safe
++ * vs. hostile takeovers.
++ *
++ * In case of fail the console has been taken over and @ctxt is
++ * invalid. Caller has to reacquire the console.
++ */
++#ifdef CONFIG_64BIT
++static bool cons_seq_try_update(struct cons_context *ctxt)
++{
++	struct console *con = ctxt->console;
++	struct cons_state old;
++	struct cons_state new;
++
++	cons_state_read(con, CON_STATE_CUR, &old);
++	do {
++		/* Make sure this context is still the owner. */
++		if (!cons_state_bits_match(old, ctxt->state))
++			return false;
++
++		/* Preserve bit state */
++		copy_bit_state(new, old);
++		new.seq = ctxt->newseq;
++
++		/*
++		 * Can race with hostile takeover or with a handover
++		 * request.
++		 */
++	} while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new));
++
++	copy_full_state(ctxt->state, new);
++	ctxt->oldseq = ctxt->newseq;
++
++	return true;
++}
++#else
++static bool cons_release(struct cons_context *ctxt);
++static bool cons_seq_try_update(struct cons_context *ctxt)
++{
++	struct console *con = ctxt->console;
++	struct cons_state state;
++	int pcpu;
++	u32 old;
++	u32 new;
++
++	/*
++	 * There is a corner case that needs to be considered here:
++	 *
++	 * CPU0			CPU1
++	 * printk()
++	 *  acquire()		-> emergency
++	 *  write()		   acquire()
++	 *  update_seq()
++	 *    state == OK
++	 * --> NMI
++	 *			   takeover()
++	 * <---			     write()
++	 *  cmpxchg() succeeds	     update_seq()
++	 *			     cmpxchg() fails
++	 *
++	 * There is nothing that can be done about this other than having
++	 * yet another state bit that needs to be tracked and analyzed,
++	 * but fails to cover the problem completely.
++	 *
++	 * No other scenarios expose such a problem. On same CPU takeovers
++	 * the cmpxchg() always fails on the interrupted context after the
++	 * interrupting context finished printing, but that's fine as it
++	 * does not own the console anymore. The state check after the
++	 * failed cmpxchg prevents that.
++	 */
++	cons_state_read(con, CON_STATE_CUR, &state);
++	/* Make sure this context is still the owner. */
++	if (!cons_state_bits_match(state, ctxt->state))
++		return false;
++
++	/*
++	 * Get the original sequence number that was retrieved
++	 * from @con->atomic_seq. @con->atomic_seq should be still
++	 * the same. 32bit truncates. See cons_context_set_seq().
++	 */
++	old = (u32)ctxt->oldseq;
++	new = (u32)ctxt->newseq;
++	if (atomic_try_cmpxchg(&ACCESS_PRIVATE(con, atomic_seq), &old, new)) {
++		ctxt->oldseq = ctxt->newseq;
++		return true;
++	}
++
++	/*
++	 * Reread the state. If this context does not own the console anymore
++	 * then it cannot touch the sequence again.
++	 */
++	cons_state_read(con, CON_STATE_CUR, &state);
++	if (!cons_state_bits_match(state, ctxt->state))
++		return false;
++
++	pcpu = atomic_read(&panic_cpu);
++	if (pcpu == smp_processor_id()) {
++		/*
++		 * This is the panic CPU. Emitting a warning here does not
++		 * help at all. The callchain is clear and the priority is
++		 * to get the messages out. In the worst case duplicated
++		 * ones. That's a job for postprocessing.
++		 */
++		atomic_set(&ACCESS_PRIVATE(con, atomic_seq), new);
++		ctxt->oldseq = ctxt->newseq;
++		return true;
++	}
++
++	/*
++	 * Only emit a warning when this happens outside of a panic
++	 * situation as on panic it's neither useful nor helping to let the
++	 * panic CPU get the important stuff out.
++	 */
++	WARN_ON_ONCE(pcpu == PANIC_CPU_INVALID);
++
++	cons_release(ctxt);
++	return false;
++}
++#endif
++
++/**
++ * cons_cleanup_handover - Cleanup a handover request
++ * @ctxt:	Pointer to acquire context
++ *
++ * @ctxt->hov_state contains the state to clean up
++ */
++static void cons_cleanup_handover(struct cons_context *ctxt)
++{
++	struct console *con = ctxt->console;
++	struct cons_state new;
++
++	/*
++	 * No loop required. Either hov_state is still the same or
++	 * not.
++	 */
++	new.atom = 0;
++	cons_state_try_cmpxchg(con, CON_STATE_REQ, &ctxt->hov_state, &new);
++}
++
++/**
++ * cons_setup_handover - Setup a handover request
++ * @ctxt:	Pointer to acquire context
++ *
++ * Returns: True if a handover request was setup, false otherwise.
++ *
++ * On success @ctxt->hov_state contains the requested handover state
++ *
++ * On failure this context is not allowed to request a handover from the
++ * current owner. Reasons would be priority too low or a remote CPU in panic.
++ * In both cases this context should give up trying to acquire the console.
++ */
++static bool cons_setup_handover(struct cons_context *ctxt)
++{
++	unsigned int cpu = smp_processor_id();
++	struct console *con = ctxt->console;
++	struct cons_state old;
++	struct cons_state hstate = {
++		.locked		= 1,
++		.cur_prio	= ctxt->prio,
++		.cpu		= cpu,
++	};
++
++	/*
++	 * Try to store hstate in @con->atomic_state[REQ]. This might
++	 * race with a higher priority waiter.
++	 */
++	cons_state_read(con, CON_STATE_REQ, &old);
++	do {
++		if (cons_check_panic())
++			return false;
++
++		/* Same or higher priority waiter exists? */
++		if (old.cur_prio >= ctxt->prio)
++			return false;
++
++	} while (!cons_state_try_cmpxchg(con, CON_STATE_REQ, &old, &hstate));
++
++	/* Save that state for comparison in spinwait */
++	copy_full_state(ctxt->hov_state, hstate);
++	return true;
++}
++
++/**
++ * cons_setup_request - Setup a handover request in state[CUR]
++ * @ctxt:	Pointer to acquire context
++ * @old:	The state that was used to make the decision to spin wait
++ *
++ * Returns: True if a handover request was setup in state[CUR], false
++ * otherwise.
++ *
++ * On success @ctxt->req_state contains the request state that was set in
++ * state[CUR]
++ *
++ * On failure this context encountered unexpected state values. This
++ * context should retry the full handover request setup process (the
++ * handover request setup by cons_setup_handover() is now invalidated
++ * and must be performed again).
++ */
++static bool cons_setup_request(struct cons_context *ctxt, struct cons_state old)
++{
++	struct console *con = ctxt->console;
++	struct cons_state cur;
++	struct cons_state new;
++
++	/* Now set the request in state[CUR] */
++	cons_state_read(con, CON_STATE_CUR, &cur);
++	do {
++		if (cons_check_panic())
++			goto cleanup;
++
++		/* Bit state changed vs. the decision to spinwait? */
++		if (!cons_state_bits_match(cur, old))
++			goto cleanup;
++
++		/*
++		 * A higher or equal priority context already setup a
++		 * request?
++		 */
++		if (cur.req_prio >= ctxt->prio)
++			goto cleanup;
++
++		/* Setup a request for handover. */
++		copy_full_state(new, cur);
++		new.req_prio = ctxt->prio;
++	} while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &cur, &new));
++
++	/* Save that state for comparison in spinwait */
++	copy_bit_state(ctxt->req_state, new);
++	return true;
++
++cleanup:
++	cons_cleanup_handover(ctxt);
++	return false;
++}
++
++/**
++ * cons_try_acquire_spin - Complete the spinwait attempt
++ * @ctxt:	Pointer to an acquire context that contains
++ *		all information about the acquire mode
++ *
++ * @ctxt->hov_state contains the handover state that was set in
++ * state[REQ]
++ * @ctxt->req_state contains the request state that was set in
++ * state[CUR]
++ *
++ * Returns: 0 if successfully locked. -EBUSY on timeout. -EAGAIN on
++ * unexpected state values.
++ *
++ * On success @ctxt->state contains the new state that was set in
++ * state[CUR]
++ *
++ * On -EBUSY failure this context timed out. This context should either
++ * give up or attempt a hostile takeover.
++ *
++ * On -EAGAIN failure this context encountered unexpected state values.
++ * This context should retry the full handover request setup process (the
++ * handover request setup by cons_setup_handover() is now invalidated and
++ * must be performed again).
++ */
++static int cons_try_acquire_spin(struct cons_context *ctxt)
++{
++	struct console *con = ctxt->console;
++	struct cons_state cur;
++	struct cons_state new;
++	int err = -EAGAIN;
++	int timeout;
++
++	/* Now wait for the other side to hand over */
++	for (timeout = ctxt->spinwait_max_us; timeout >= 0; timeout--) {
++		/* Timeout immediately if a remote panic is detected. */
++		if (cons_check_panic())
++			break;
++
++		cons_state_read(con, CON_STATE_CUR, &cur);
++
++		/*
++		 * If the real state of the console matches the handover state
++		 * that this context setup, then the handover was a success
++		 * and this context is now the owner.
++		 *
++		 * Note that this might have raced with a new higher priority
++		 * requester coming in after the lock was handed over.
++		 * However, that requester will see that the owner changes and
++		 * setup a new request for the current owner (this context).
++		 */
++		if (cons_state_bits_match(cur, ctxt->hov_state))
++			goto success;
++
++		/*
++		 * If state changed since the request was made, give up as
++		 * it is no longer consistent. This must include
++		 * state::req_prio since there could be a higher priority
++		 * request available.
++		 */
++		if (cur.bits != ctxt->req_state.bits)
++			goto cleanup;
++
++		/*
++		 * Finally check whether the handover state is still
++		 * the same.
++		 */
++		cons_state_read(con, CON_STATE_REQ, &cur);
++		if (cur.atom != ctxt->hov_state.atom)
++			goto cleanup;
++
++		/* Account time */
++		if (timeout > 0)
++			udelay(1);
++	}
++
++	/*
++	 * Timeout. Cleanup the handover state and carefully try to reset
++	 * req_prio in the real state. The reset is important to ensure
++	 * that the owner does not hand over the lock after this context
++	 * has given up waiting.
++	 */
++	cons_cleanup_handover(ctxt);
++
++	cons_state_read(con, CON_STATE_CUR, &cur);
++	do {
++		/*
++		 * The timeout might have raced with the owner coming late
++		 * and handing it over gracefully.
++		 */
++		if (cons_state_bits_match(cur, ctxt->hov_state))
++			goto success;
++
++		/*
++		 * Validate that the state matches with the state at request
++		 * time. If this check fails, there is already a higher
++		 * priority context waiting or the owner has changed (either
++		 * by higher priority or by hostile takeover). In all fail
++		 * cases this context is no longer in line for a handover to
++		 * take place, so no reset is necessary.
++		 */
++		if (cur.bits != ctxt->req_state.bits)
++			goto cleanup;
++
++		copy_full_state(new, cur);
++		new.req_prio = 0;
++	} while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &cur, &new));
++	/* Reset worked. Report timeout. */
++	return -EBUSY;
++
++success:
++	/* Store the real state */
++	copy_full_state(ctxt->state, cur);
++	ctxt->hostile = false;
++	err = 0;
++
++cleanup:
++	cons_cleanup_handover(ctxt);
++	return err;
++}
++
++/**
++ * __cons_try_acquire - Try to acquire the console for printk output
++ * @ctxt:	Pointer to an acquire context that contains
++ *		all information about the acquire mode
++ *
++ * Returns: True if the acquire was successful. False on fail.
++ *
++ * In case of success @ctxt->state contains the acquisition
++ * state.
++ *
++ * In case of fail @ctxt->old_state contains the state
++ * that was read from @con->state for analysis by the caller.
++ */
++static bool __cons_try_acquire(struct cons_context *ctxt)
++{
++	unsigned int cpu = smp_processor_id();
++	struct console *con = ctxt->console;
++	short flags = console_srcu_read_flags(con);
++	struct cons_state old;
++	struct cons_state new;
++	int err;
++
++	if (WARN_ON_ONCE(!(flags & CON_NO_BKL)))
++		return false;
++again:
++	cons_state_read(con, CON_STATE_CUR, &old);
++
++	/* Preserve it for the caller and for spinwait */
++	copy_full_state(ctxt->old_state, old);
++
++	if (cons_check_panic())
++		return false;
++
++	/* Set up the new state for takeover */
++	copy_full_state(new, old);
++	new.locked = 1;
++	new.thread = ctxt->thread;
++	new.cur_prio = ctxt->prio;
++	new.req_prio = CONS_PRIO_NONE;
++	new.cpu = cpu;
++
++	/* Attempt to acquire it directly if unlocked */
++	if (!old.locked) {
++		if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new))
++			goto again;
++
++		ctxt->hostile = false;
++		copy_full_state(ctxt->state, new);
++		goto success;
++	}
++
++	/*
++	 * A threaded printer context will never spin or perform a
++	 * hostile takeover. The atomic writer will wake the thread
++	 * when it is done with the important output.
++	 */
++	if (ctxt->thread)
++		return false;
++
++	/*
++	 * If the active context is on the same CPU then there is
++	 * obviously no handshake possible.
++	 */
++	if (old.cpu == cpu)
++		goto check_hostile;
++
++	/*
++	 * If a handover request with same or higher priority is already
++	 * pending then this context cannot setup a handover request.
++	 */
++	if (old.req_prio >= ctxt->prio)
++		goto check_hostile;
++
++	/*
++	 * If the caller did not request spin-waiting then performing a
++	 * handover is not an option.
++	 */
++	if (!ctxt->spinwait)
++		goto check_hostile;
++
++	/*
++	 * Setup the request in state[REQ]. If this fails then this
++	 * context is not allowed to setup a handover request.
++	 */
++	if (!cons_setup_handover(ctxt))
++		goto check_hostile;
++
++	/*
++	 * Setup the request in state[CUR]. Hand in the state that was
++	 * used to make the decision to spinwait above, for comparison. If
++	 * this fails then unexpected state values were encountered and the
++	 * full request setup process is retried.
++	 */
++	if (!cons_setup_request(ctxt, old))
++		goto again;
++
++	/*
++	 * Spin-wait to acquire the console. If this fails then unexpected
++	 * state values were encountered (for example, a hostile takeover by
++	 * another context) and the full request setup process is retried.
++	 */
++	err = cons_try_acquire_spin(ctxt);
++	if (err) {
++		if (err == -EAGAIN)
++			goto again;
++		goto check_hostile;
++	}
++success:
++	/* Common updates on success */
++	cons_context_set_seq(ctxt);
++	cons_context_set_pbufs(ctxt);
++	return true;
++
++check_hostile:
++	if (!ctxt->hostile)
++		return false;
++
++	if (cons_check_panic())
++		return false;
++
++	if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new))
++		goto again;
++
++	copy_full_state(ctxt->state, new);
++	goto success;
++}
++
++/**
++ * cons_try_acquire - Try to acquire the console for printk output
++ * @ctxt:	Pointer to an acquire context that contains
++ *		all information about the acquire mode
++ *
++ * Returns: True if the acquire was successful. False on fail.
++ *
++ * In case of success @ctxt->state contains the acquisition
++ * state.
++ *
++ * In case of fail @ctxt->old_state contains the state
++ * that was read from @con->state for analysis by the caller.
++ */
++static bool cons_try_acquire(struct cons_context *ctxt)
++{
++	if (__cons_try_acquire(ctxt))
++		return true;
++
++	ctxt->state.atom = 0;
++	return false;
++}
++
++/**
++ * __cons_release - Release the console after output is done
++ * @ctxt:	The acquire context that contains the state
++ *		at cons_try_acquire()
++ *
++ * Returns:	True if the release was regular
++ *
++ *		False if the console is in unusable state or was handed over
++ *		with handshake or taken	over hostile without handshake.
++ *
++ * The return value tells the caller whether it needs to evaluate further
++ * printing.
++ */
++static bool __cons_release(struct cons_context *ctxt)
++{
++	struct console *con = ctxt->console;
++	short flags = console_srcu_read_flags(con);
++	struct cons_state hstate;
++	struct cons_state old;
++	struct cons_state new;
++
++	if (WARN_ON_ONCE(!(flags & CON_NO_BKL)))
++		return false;
++
++	cons_state_read(con, CON_STATE_CUR, &old);
++again:
++	if (!cons_state_bits_match(old, ctxt->state))
++		return false;
++
++	/* Release it directly when no handover request is pending. */
++	if (!old.req_prio)
++		goto unlock;
++
++	/* Read the handover target state */
++	cons_state_read(con, CON_STATE_REQ, &hstate);
++
++	/* If the waiter gave up hstate is 0 */
++	if (!hstate.atom)
++		goto unlock;
++
++	/*
++	 * If a higher priority waiter raced against a lower priority
++	 * waiter then unlock instead of handing over to either. The
++	 * higher priority waiter will notice the updated state and
++	 * retry.
++	 */
++	if (hstate.cur_prio != old.req_prio)
++		goto unlock;
++
++	/* Switch the state and preserve the sequence on 64bit */
++	copy_bit_state(new, hstate);
++	copy_seq_state64(new, old);
++	if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new))
++		goto again;
++
++	return true;
++
++unlock:
++	/* Clear the state and preserve the sequence on 64bit */
++	new.atom = 0;
++	copy_seq_state64(new, old);
++	if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new))
++		goto again;
++
++	return true;
++}
++
++bool printk_threads_enabled __ro_after_init;
++static bool printk_force_atomic __initdata;
++
++/**
++ * cons_release - Release the console after output is done
++ * @ctxt:	The acquire context that contains the state
++ *		at cons_try_acquire()
++ *
++ * Returns:	True if the release was regular
++ *
++ *		False if the console is in unusable state or was handed over
++ *		with handshake or taken	over hostile without handshake.
++ *
++ * The return value tells the caller whether it needs to evaluate further
++ * printing.
++ */
++static bool cons_release(struct cons_context *ctxt)
++{
++	bool ret = __cons_release(ctxt);
++
++	/* Invalidate the buffer pointer. It is no longer valid. */
++	ctxt->pbufs = NULL;
++
++	ctxt->state.atom = 0;
++	return ret;
++}
++
++bool console_try_acquire(struct cons_write_context *wctxt)
++{
++	struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
++
++	return cons_try_acquire(ctxt);
++}
++EXPORT_SYMBOL_GPL(console_try_acquire);
++
++bool console_release(struct cons_write_context *wctxt)
++{
++	struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
++
++	return cons_release(ctxt);
++}
++EXPORT_SYMBOL_GPL(console_release);
++
++/**
++ * cons_alloc_percpu_data - Allocate percpu data for a console
++ * @con:	Console to allocate for
++ *
++ * Returns: True on success. False otherwise and the console cannot be used.
++ *
++ * If it is not yet possible to allocate per CPU data, success is returned.
++ * When per CPU data becomes possible, set_percpu_data_ready() will call
++ * this function again for all registered consoles.
++ */
++bool cons_alloc_percpu_data(struct console *con)
++{
++	if (!printk_percpu_data_ready())
++		return true;
++
++	con->pcpu_data = alloc_percpu(typeof(*con->pcpu_data));
++	if (con->pcpu_data)
++		return true;
++
++	con_printk(KERN_WARNING, con, "failed to allocate percpu buffers\n");
++	return false;
++}
++
++/**
++ * cons_free_percpu_data - Free percpu data of a console on unregister
++ * @con:	Console to clean up
++ */
++static void cons_free_percpu_data(struct console *con)
++{
++	if (!con->pcpu_data)
++		return;
++
++	free_percpu(con->pcpu_data);
++	con->pcpu_data = NULL;
++}
++
++/**
++ * console_can_proceed - Check whether printing can proceed
++ * @wctxt:	The write context that was handed to the write function
++ *
++ * Returns:	True if the state is correct. False if a handover
++ *		has been requested or if the console was taken
++ *		over.
++ *
++ * Must be invoked after the record was dumped into the assigned record
++ * buffer and at appropriate safe places in the driver.  For unsafe driver
++ * sections see console_enter_unsafe().
++ *
++ * When this function returns false then the calling context is not allowed
++ * to go forward and has to back out immediately and carefully. The buffer
++ * content is no longer trusted either and the console lock is no longer
++ * held.
++ */
++bool console_can_proceed(struct cons_write_context *wctxt)
++{
++	struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
++	struct console *con = ctxt->console;
++	struct cons_state state;
++
++	cons_state_read(con, CON_STATE_CUR, &state);
++	/* Store it for analysis or reuse */
++	copy_full_state(ctxt->old_state, state);
++
++	/* Make sure this context is still the owner. */
++	if (!cons_state_full_match(state, ctxt->state))
++		return false;
++
++	/*
++	 * Having a safe point for take over and eventually a few
++	 * duplicated characters or a full line is way better than a
++	 * hostile takeover. Post processing can take care of the garbage.
++	 * Continue if the requested priority is not sufficient.
++	 */
++	if (state.req_prio <= state.cur_prio)
++		return true;
++
++	/*
++	 * A console printer within an unsafe region is allowed to continue.
++	 * It can perform the handover when exiting the safe region. Otherwise
++	 * a hostile takeover will be necessary.
++	 */
++	if (state.unsafe)
++		return true;
++
++	/* Release and hand over */
++	cons_release(ctxt);
++	/*
++	 * This does not check whether the handover succeeded. The
++	 * outermost callsite has to make the final decision whether printing
++	 * should continue or not (via reacquire, possibly hostile). The
++	 * console is unlocked already so go back all the way instead of
++	 * trying to implement heuristics in tons of places.
++	 */
++	return false;
++}
++EXPORT_SYMBOL_GPL(console_can_proceed);
++
++/**
++ * __console_update_unsafe - Update the unsafe bit in @con->atomic_state
++ * @wctxt:	The write context that was handed to the write function
++ *
++ * Returns:	True if the state is correct. False if a handover
++ *		has been requested or if the console was taken
++ *		over.
++ *
++ * Must be invoked before an unsafe driver section is entered.
++ *
++ * When this function returns false then the calling context is not allowed
++ * to go forward and has to back out immediately and carefully. The buffer
++ * content is no longer trusted either and the console lock is no longer
++ * held.
++ *
++ * Internal helper to avoid duplicated code
++ */
++static bool __console_update_unsafe(struct cons_write_context *wctxt, bool unsafe)
++{
++	struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
++	struct console *con = ctxt->console;
++	struct cons_state new;
++
++	do  {
++		if (!console_can_proceed(wctxt))
++			return false;
++		/*
++		 * console_can_proceed() saved the real state in
++		 * ctxt->old_state
++		 */
++		copy_full_state(new, ctxt->old_state);
++		new.unsafe = unsafe;
++
++	} while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &ctxt->old_state, &new));
++
++	copy_full_state(ctxt->state, new);
++	return true;
++}
++
++/**
++ * console_enter_unsafe - Enter an unsafe region in the driver
++ * @wctxt:	The write context that was handed to the write function
++ *
++ * Returns:	True if the state is correct. False if a handover
++ *		has been requested or if the console was taken
++ *		over.
++ *
++ * Must be invoked before an unsafe driver section is entered.
++ *
++ * When this function returns false then the calling context is not allowed
++ * to go forward and has to back out immediately and carefully. The buffer
++ * content is no longer trusted either and the console lock is no longer
++ * held.
++ */
++bool console_enter_unsafe(struct cons_write_context *wctxt)
++{
++	return __console_update_unsafe(wctxt, true);
++}
++EXPORT_SYMBOL_GPL(console_enter_unsafe);
++
++/**
++ * console_exit_unsafe - Exit an unsafe region in the driver
++ * @wctxt:	The write context that was handed to the write function
++ *
++ * Returns:	True if the state is correct. False if a handover
++ *		has been requested or if the console was taken
++ *		over.
++ *
++ * Must be invoked before an unsafe driver section is exited.
++ *
++ * When this function returns false then the calling context is not allowed
++ * to go forward and has to back out immediately and carefully. The buffer
++ * content is no longer trusted either and the console lock is no longer
++ * held.
++ */
++bool console_exit_unsafe(struct cons_write_context *wctxt)
++{
++	return __console_update_unsafe(wctxt, false);
++}
++EXPORT_SYMBOL_GPL(console_exit_unsafe);
++
++/**
++ * cons_get_record - Fill the buffer with the next pending ringbuffer record
++ * @wctxt:	The write context which will be handed to the write function
++ *
++ * Returns:	True if there are records available. If the next record should
++ *		be printed, the output buffer is filled and @wctxt->outbuf
++ *		points to the text to print. If @wctxt->outbuf is NULL after
++ *		the call, the record should not be printed but the caller must
++ *		still update the console sequence number.
++ *
++ *		False means that there are no pending records anymore and the
++ *		printing can stop.
++ */
++static bool cons_get_record(struct cons_write_context *wctxt)
++{
++	struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
++	struct console *con = ctxt->console;
++	bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED;
++	struct printk_message pmsg = {
++		.pbufs = ctxt->pbufs,
++	};
++
++	if (!printk_get_next_message(&pmsg, ctxt->newseq, is_extended, true))
++		return false;
++
++	ctxt->newseq = pmsg.seq;
++	ctxt->dropped += pmsg.dropped;
++
++	if (pmsg.outbuf_len == 0) {
++		wctxt->outbuf = NULL;
++	} else {
++		if (ctxt->dropped && !is_extended)
++			console_prepend_dropped(&pmsg, ctxt->dropped);
++		wctxt->outbuf = &pmsg.pbufs->outbuf[0];
++	}
++
++	wctxt->len = pmsg.outbuf_len;
++
++	return true;
++}
++
++/**
++ * cons_emit_record - Emit record in the acquired context
++ * @wctxt:	The write context that will be handed to the write function
++ *
++ * Returns:	False if the operation was aborted (takeover or handover).
++ *		True otherwise
++ *
++ * When false is returned, the caller is not allowed to touch console state.
++ * The console is owned by someone else. If the caller wants to print more
++ * it has to reacquire the console first.
++ *
++ * When true is returned, @wctxt->ctxt.backlog indicates whether there are
++ * still records pending in the ringbuffer,
++ */
++static bool cons_emit_record(struct cons_write_context *wctxt)
++{
++	struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
++	struct console *con = ctxt->console;
++	bool done = false;
++
++	/*
++	 * @con->dropped is not protected in case of hostile takeovers so
++	 * the update below is racy. Annotate it accordingly.
++	 */
++	ctxt->dropped = data_race(READ_ONCE(con->dropped));
++
++	/* Fill the output buffer with the next record */
++	ctxt->backlog = cons_get_record(wctxt);
++	if (!ctxt->backlog)
++		return true;
++
++	/* Safety point. Don't touch state in case of takeover */
++	if (!console_can_proceed(wctxt))
++		return false;
++
++	/* Counterpart to the read above */
++	WRITE_ONCE(con->dropped, ctxt->dropped);
++
++	/*
++	 * In case of skipped records, Update sequence state in @con.
++	 */
++	if (!wctxt->outbuf)
++		goto update;
++
++	/* Tell the driver about potential unsafe state */
++	wctxt->unsafe = ctxt->state.unsafe;
++
++	if (!ctxt->thread && con->write_atomic) {
++		done = con->write_atomic(con, wctxt);
++	} else if (ctxt->thread && con->write_thread) {
++		done = con->write_thread(con, wctxt);
++	} else {
++		cons_release(ctxt);
++		WARN_ON_ONCE(1);
++		return false;
++	}
++
++	/* If not done, the write was aborted due to takeover */
++	if (!done)
++		return false;
++
++	/* If there was a dropped message, it has now been output. */
++	if (ctxt->dropped) {
++		ctxt->dropped = 0;
++		/* Counterpart to the read above */
++		WRITE_ONCE(con->dropped, ctxt->dropped);
++	}
++update:
++	ctxt->newseq++;
++	/*
++	 * The sequence update attempt is not part of console_release()
++	 * because in panic situations the console is not released by
++	 * the panic CPU until all records are written. On 32bit the
++	 * sequence is separate from state anyway.
++	 */
++	return cons_seq_try_update(ctxt);
++}
++
++/**
++ * cons_kthread_should_wakeup - Check whether the printk thread should wakeup
++ * @con:	Console to operate on
++ * @ctxt:	The acquire context that contains the state
++ *		at console_acquire()
++ *
++ * Returns: True if the thread should shutdown or if the console is allowed to
++ * print and a record is available. False otherwise
++ *
++ * After the thread wakes up, it must first check if it should shutdown before
++ * attempting any printing.
++ */
++static bool cons_kthread_should_wakeup(struct console *con, struct cons_context *ctxt)
++{
++	bool is_usable;
++	short flags;
++	int cookie;
++
++	if (kthread_should_stop())
++		return true;
++
++	cookie = console_srcu_read_lock();
++	flags = console_srcu_read_flags(con);
++	is_usable = console_is_usable(con, flags);
++	console_srcu_read_unlock(cookie);
++
++	if (!is_usable)
++		return false;
++
++	/* This reads state and sequence on 64bit. On 32bit only state */
++	cons_state_read(con, CON_STATE_CUR, &ctxt->state);
++
++	/*
++	 * Atomic printing is running on some other CPU. The owner
++	 * will wake the console thread on unlock if necessary.
++	 */
++	if (ctxt->state.locked)
++		return false;
++
++	/* Bring the sequence in @ctxt up to date */
++	cons_context_set_seq(ctxt);
++
++	return prb_read_valid(prb, ctxt->oldseq, NULL);
++}
++
++/**
++ * cons_kthread_func - The printk thread function
++ * @__console:	Console to operate on
++ */
++static int cons_kthread_func(void *__console)
++{
++	struct console *con = __console;
++	struct cons_write_context wctxt = {
++		.ctxt.console	= con,
++		.ctxt.prio	= CONS_PRIO_NORMAL,
++		.ctxt.thread	= 1,
++	};
++	struct cons_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
++	unsigned long flags;
++	short con_flags;
++	bool backlog;
++	int cookie;
++	int ret;
++
++	for (;;) {
++		atomic_inc(&con->kthread_waiting);
++
++		/*
++		 * Provides a full memory barrier vs. cons_kthread_wake().
++		 */
++		ret = rcuwait_wait_event(&con->rcuwait,
++					 cons_kthread_should_wakeup(con, ctxt),
++					 TASK_INTERRUPTIBLE);
++
++		atomic_dec(&con->kthread_waiting);
++
++		if (kthread_should_stop())
++			break;
++
++		/* Wait was interrupted by a spurious signal, go back to sleep */
++		if (ret)
++			continue;
++
++		for (;;) {
++			cookie = console_srcu_read_lock();
++
++			/*
++			 * Ensure this stays on the CPU to make handover and
++			 * takeover possible.
++			 */
++			if (con->port_lock)
++				con->port_lock(con, true, &flags);
++			else
++				migrate_disable();
++
++			/*
++			 * Try to acquire the console without attempting to
++			 * take over. If an atomic printer wants to hand
++			 * back to the thread it simply wakes it up.
++			 */
++			if (!cons_try_acquire(ctxt))
++				break;
++
++			con_flags = console_srcu_read_flags(con);
++
++			if (console_is_usable(con, con_flags)) {
++				/*
++				 * If the emit fails, this context is no
++				 * longer the owner. Abort the processing and
++				 * wait for new records to print.
++				 */
++				if (!cons_emit_record(&wctxt))
++					break;
++				backlog = ctxt->backlog;
++			} else {
++				backlog = false;
++			}
++
++			/*
++			 * If the release fails, this context was not the
++			 * owner. Abort the processing and wait for new
++			 * records to print.
++			 */
++			if (!cons_release(ctxt))
++				break;
++
++			/* Backlog done? */
++			if (!backlog)
++				break;
++
++			if (con->port_lock)
++				con->port_lock(con, false, &flags);
++			else
++				migrate_enable();
++
++			console_srcu_read_unlock(cookie);
++
++			cond_resched();
++		}
++		if (con->port_lock)
++			con->port_lock(con, false, &flags);
++		else
++			migrate_enable();
++
++		console_srcu_read_unlock(cookie);
++	}
++	return 0;
++}
++
++/**
++ * cons_irq_work - irq work to wake printk thread
++ * @irq_work:	The irq work to operate on
++ */
++static void cons_irq_work(struct irq_work *irq_work)
++{
++	struct console *con = container_of(irq_work, struct console, irq_work);
++
++	cons_kthread_wake(con);
++}
++
++/**
++ * cons_wake_threads - Wake up printing threads
++ *
++ * A printing thread is only woken if it is within the @kthread_waiting
++ * block. If it is not within the block (or enters the block later), it
++ * will see any new records and continue printing on its own.
++ */
++void cons_wake_threads(void)
++{
++	struct console *con;
++	int cookie;
++
++	cookie = console_srcu_read_lock();
++	for_each_console_srcu(con) {
++		if (con->kthread && atomic_read(&con->kthread_waiting))
++			irq_work_queue(&con->irq_work);
++	}
++	console_srcu_read_unlock(cookie);
++}
++
++/**
++ * struct cons_cpu_state - Per CPU printk context state
++ * @prio:	The current context priority level
++ * @nesting:	Per priority nest counter
++ */
++struct cons_cpu_state {
++	enum cons_prio	prio;
++	int		nesting[CONS_PRIO_MAX];
++};
++
++static DEFINE_PER_CPU(struct cons_cpu_state, cons_pcpu_state);
++static struct cons_cpu_state early_cons_pcpu_state __initdata;
++
++/**
++ * cons_get_cpu_state - Get the per CPU console state pointer
++ *
++ * Returns either a pointer to the per CPU state of the current CPU or to
++ * the init data state during early boot.
++ */
++static __ref struct cons_cpu_state *cons_get_cpu_state(void)
++{
++	if (!printk_percpu_data_ready())
++		return &early_cons_pcpu_state;
++
++	return this_cpu_ptr(&cons_pcpu_state);
++}
++
++/**
++ * cons_get_wctxt - Get the write context for atomic printing
++ * @con:	Console to operate on
++ * @prio:	Priority of the context
++ *
++ * Returns either the per CPU context or the builtin context for
++ * early boot.
++ */
++static __ref struct cons_write_context *cons_get_wctxt(struct console *con,
++						       enum cons_prio prio)
++{
++	if (!con->pcpu_data)
++		return &early_cons_ctxt_data.wctxt[prio];
++
++	return &this_cpu_ptr(con->pcpu_data)->wctxt[prio];
++}
++
++/**
++ * cons_atomic_try_acquire - Try to acquire the console for atomic printing
++ * @con:	The console to acquire
++ * @ctxt:	The console context instance to work on
++ * @prio:	The priority of the current context
++ */
++static bool cons_atomic_try_acquire(struct console *con, struct cons_context *ctxt,
++				    enum cons_prio prio, bool skip_unsafe)
++{
++	memset(ctxt, 0, sizeof(*ctxt));
++	ctxt->console		= con;
++	ctxt->spinwait_max_us	= 2000;
++	ctxt->prio		= prio;
++	ctxt->spinwait		= 1;
++
++	/* Try to acquire it directly or via a friendly handover */
++	if (cons_try_acquire(ctxt))
++		return true;
++
++	/* Investigate whether a hostile takeover is due */
++	if (ctxt->old_state.cur_prio >= prio)
++		return false;
++
++	if (!ctxt->old_state.unsafe || !skip_unsafe)
++		ctxt->hostile = 1;
++	return cons_try_acquire(ctxt);
++}
++
++/**
++ * cons_atomic_flush_con - Flush one console in atomic mode
++ * @wctxt:		The write context struct to use for this context
++ * @con:		The console to flush
++ * @prio:		The priority of the current context
++ * @skip_unsafe:	True, to avoid unsafe hostile takeovers
++ */
++static void cons_atomic_flush_con(struct cons_write_context *wctxt, struct console *con,
++				  enum cons_prio prio, bool skip_unsafe)
++{
++	struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
++	bool wake_thread = false;
++	short flags;
++
++	if (!cons_atomic_try_acquire(con, ctxt, prio, skip_unsafe))
++		return;
++
++	do {
++		flags = console_srcu_read_flags(con);
++
++		if (!console_is_usable(con, flags))
++			break;
++
++		/*
++		 * For normal prio messages let the printer thread handle
++		 * the printing if it is available.
++		 */
++		if (prio <= CONS_PRIO_NORMAL && con->kthread) {
++			wake_thread = true;
++			break;
++		}
++
++		/*
++		 * cons_emit_record() returns false when the console was
++		 * handed over or taken over. In both cases the context is
++		 * no longer valid.
++		 */
++		if (!cons_emit_record(wctxt))
++			return;
++	} while (ctxt->backlog);
++
++	cons_release(ctxt);
++
++	if (wake_thread && atomic_read(&con->kthread_waiting))
++		irq_work_queue(&con->irq_work);
++}
++
++/**
++ * cons_atomic_flush - Flush consoles in atomic mode if required
++ * @printk_caller_wctxt:	The write context struct to use for this
++ *				context (for printk() context only)
++ * @skip_unsafe:		True, to avoid unsafe hostile takeovers
++ */
++void cons_atomic_flush(struct cons_write_context *printk_caller_wctxt, bool skip_unsafe)
++{
++	struct cons_write_context *wctxt;
++	struct cons_cpu_state *cpu_state;
++	struct console *con;
++	short flags;
++	int cookie;
++
++	cpu_state = cons_get_cpu_state();
++
++	/*
++	 * When in an elevated priority, the printk() calls are not
++	 * individually flushed. This is to allow the full output to
++	 * be dumped to the ringbuffer before starting with printing
++	 * the backlog.
++	 */
++	if (cpu_state->prio > CONS_PRIO_NORMAL && printk_caller_wctxt)
++		return;
++
++	/*
++	 * Let the outermost write of this priority print. This avoids
++	 * nasty hackery for nested WARN() where the printing itself
++	 * generates one.
++	 *
++	 * cpu_state->prio <= CONS_PRIO_NORMAL is not subject to nesting
++	 * and can proceed in order to allow atomic printing when consoles
++	 * do not have a printer thread.
++	 */
++	if (cpu_state->prio > CONS_PRIO_NORMAL &&
++	    cpu_state->nesting[cpu_state->prio] != 1)
++		return;
++
++	cookie = console_srcu_read_lock();
++	for_each_console_srcu(con) {
++		if (!con->write_atomic)
++			continue;
++
++		flags = console_srcu_read_flags(con);
++
++		if (!console_is_usable(con, flags))
++			continue;
++
++		if (cpu_state->prio > CONS_PRIO_NORMAL || !con->kthread) {
++			if (printk_caller_wctxt)
++				wctxt = printk_caller_wctxt;
++			else
++				wctxt = cons_get_wctxt(con, cpu_state->prio);
++			cons_atomic_flush_con(wctxt, con, cpu_state->prio, skip_unsafe);
++		}
++	}
++	console_srcu_read_unlock(cookie);
++}
++
++/**
++ * cons_atomic_enter - Enter a context that enforces atomic printing
++ * @prio:	Priority of the context
++ *
++ * Returns:	The previous priority that needs to be fed into
++ *		the corresponding cons_atomic_exit()
++ */
++enum cons_prio cons_atomic_enter(enum cons_prio prio)
++{
++	struct cons_cpu_state *cpu_state;
++	enum cons_prio prev_prio;
++
++	migrate_disable();
++	cpu_state = cons_get_cpu_state();
++
++	prev_prio = cpu_state->prio;
++	if (prev_prio < prio)
++		cpu_state->prio = prio;
++
++	/*
++	 * Increment the nesting on @cpu_state->prio so a WARN()
++	 * nested into a panic printout does not attempt to
++	 * scribble state.
++	 */
++	cpu_state->nesting[cpu_state->prio]++;
++
++	return prev_prio;
++}
++
++/**
++ * cons_atomic_exit - Exit a context that enforces atomic printing
++ * @prio:	Priority of the context to leave
++ * @prev_prio:	Priority of the previous context for restore
++ *
++ * @prev_prio is the priority returned by the corresponding cons_atomic_enter().
++ */
++void cons_atomic_exit(enum cons_prio prio, enum cons_prio prev_prio)
++{
++	struct cons_cpu_state *cpu_state;
++
++	cons_atomic_flush(NULL, true);
++
++	cpu_state = cons_get_cpu_state();
++
++	if (cpu_state->prio == CONS_PRIO_PANIC)
++		cons_atomic_flush(NULL, false);
++
++	/*
++	 * Undo the nesting of cons_atomic_enter() at the CPU state
++	 * priority.
++	 */
++	cpu_state->nesting[cpu_state->prio]--;
++
++	/*
++	 * Restore the previous priority, which was returned by
++	 * cons_atomic_enter().
++	 */
++	cpu_state->prio = prev_prio;
++
++	migrate_enable();
++}
++
++/**
++ * cons_kthread_stop - Stop a printk thread
++ * @con:	Console to operate on
++ */
++static void cons_kthread_stop(struct console *con)
++{
++	lockdep_assert_console_list_lock_held();
++
++	if (!con->kthread)
++		return;
++
++	kthread_stop(con->kthread);
++	con->kthread = NULL;
++
++	kfree(con->thread_pbufs);
++	con->thread_pbufs = NULL;
++}
++
++/**
++ * cons_kthread_create - Create a printk thread
++ * @con:	Console to operate on
++ *
++ * If it fails, let the console proceed. The atomic part might
++ * be usable and useful.
++ */
++void cons_kthread_create(struct console *con)
++{
++	struct task_struct *kt;
++	struct console *c;
++
++	lockdep_assert_console_list_lock_held();
++
++	if (!(con->flags & CON_NO_BKL) || !con->write_thread)
++		return;
++
++	if (!printk_threads_enabled || con->kthread)
++		return;
++
++	/*
++	 * Printer threads cannot be started as long as any boot console is
++	 * registered because there is no way to synchronize the hardware
++	 * registers between boot console code and regular console code.
++	 */
++	for_each_console(c) {
++		if (c->flags & CON_BOOT)
++			return;
++	}
++	have_boot_console = false;
++
++	con->thread_pbufs = kmalloc(sizeof(*con->thread_pbufs), GFP_KERNEL);
++	if (!con->thread_pbufs) {
++		con_printk(KERN_ERR, con, "failed to allocate printing thread buffers\n");
++		return;
++	}
++
++	kt = kthread_run(cons_kthread_func, con, "pr/%s%d", con->name, con->index);
++	if (IS_ERR(kt)) {
++		con_printk(KERN_ERR, con, "failed to start printing thread\n");
++		kfree(con->thread_pbufs);
++		con->thread_pbufs = NULL;
++		return;
++	}
++
++	con->kthread = kt;
++
++	/*
++	 * It is important that console printing threads are scheduled
++	 * shortly after a printk call and with generous runtime budgets.
++	 */
++	sched_set_normal(con->kthread, -20);
++}
++
++static int __init printk_setup_threads(void)
++{
++	struct console *con;
++
++	if (printk_force_atomic)
++		return 0;
++
++	console_list_lock();
++	printk_threads_enabled = true;
++	for_each_console(con)
++		cons_kthread_create(con);
++	if (have_bkl_console)
++		console_bkl_kthread_create();
++	console_list_unlock();
++	return 0;
++}
++early_initcall(printk_setup_threads);
++
++/**
++ * cons_nobkl_init - Initialize the NOBKL console specific data
++ * @con:	Console to initialize
++ *
++ * Returns: True on success. False otherwise and the console cannot be used.
++ */
++bool cons_nobkl_init(struct console *con)
++{
++	struct cons_state state = { };
++
++	if (!cons_alloc_percpu_data(con))
++		return false;
++
++	rcuwait_init(&con->rcuwait);
++	atomic_set(&con->kthread_waiting, 0);
++	init_irq_work(&con->irq_work, cons_irq_work);
++	cons_state_set(con, CON_STATE_CUR, &state);
++	cons_state_set(con, CON_STATE_REQ, &state);
++	cons_seq_init(con);
++	cons_kthread_create(con);
++	return true;
++}
++
++/**
++ * cons_nobkl_cleanup - Cleanup the NOBKL console specific data
++ * @con:	Console to cleanup
++ */
++void cons_nobkl_cleanup(struct console *con)
++{
++	struct cons_state state = { };
++
++	cons_kthread_stop(con);
++	cons_state_set(con, CON_STATE_CUR, &state);
++	cons_state_set(con, CON_STATE_REQ, &state);
++	cons_free_percpu_data(con);
++}
++
++/**
++ * printk_kthread_shutdown - shutdown all threaded printers
++ *
++ * On system shutdown all threaded printers are stopped. This allows printk
++ * to transition back to atomic printing, thus providing a robust mechanism
++ * for the final shutdown/reboot messages to be output.
++ */
++static void printk_kthread_shutdown(void)
++{
++	struct console *con;
++
++	console_list_lock();
++	for_each_console(con) {
++		if (con->flags & CON_NO_BKL)
++			cons_kthread_stop(con);
++	}
++	console_list_unlock();
++}
++
++static struct syscore_ops printk_syscore_ops = {
++	.shutdown = printk_kthread_shutdown,
++};
++
++static int __init printk_init_ops(void)
++{
++	register_syscore_ops(&printk_syscore_ops);
++	return 0;
++}
++device_initcall(printk_init_ops);
+diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
+index ef0f9a2044da1..5c1470bd60bcb 100644
+--- a/kernel/printk/printk_safe.c
++++ b/kernel/printk/printk_safe.c
+@@ -12,18 +12,41 @@
+ 
+ #include "internal.h"
+ 
+-static DEFINE_PER_CPU(int, printk_context);
++struct printk_context {
++	local_lock_t cpu;
++	int recursion;
++};
++
++static DEFINE_PER_CPU(struct printk_context, printk_context) = {
++	.cpu = INIT_LOCAL_LOCK(cpu),
++};
+ 
+ /* Can be preempted by NMI. */
+-void __printk_safe_enter(void)
++void __printk_safe_enter(unsigned long *flags)
+ {
+-	this_cpu_inc(printk_context);
++	WARN_ON_ONCE(in_nmi());
++	local_lock_irqsave(&printk_context.cpu, *flags);
++	this_cpu_inc(printk_context.recursion);
+ }
+ 
+ /* Can be preempted by NMI. */
+-void __printk_safe_exit(void)
++void __printk_safe_exit(unsigned long *flags)
+ {
+-	this_cpu_dec(printk_context);
++	WARN_ON_ONCE(in_nmi());
++	this_cpu_dec(printk_context.recursion);
++	local_unlock_irqrestore(&printk_context.cpu, *flags);
++}
++
++void __printk_deferred_enter(void)
++{
++	WARN_ON_ONCE(!in_atomic());
++	this_cpu_inc(printk_context.recursion);
++}
++
++void __printk_deferred_exit(void)
++{
++	WARN_ON_ONCE(!in_atomic());
++	this_cpu_dec(printk_context.recursion);
+ }
+ 
+ asmlinkage int vprintk(const char *fmt, va_list args)
+@@ -38,13 +61,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
+ 	 * Use the main logbuf even in NMI. But avoid calling console
+ 	 * drivers that might have their own locks.
+ 	 */
+-	if (this_cpu_read(printk_context) || in_nmi()) {
+-		int len;
+-
+-		len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
+-		defer_console_output();
+-		return len;
+-	}
++	if (this_cpu_read(printk_context.recursion) || in_nmi())
++		return vprintk_deferred(fmt, args);
+ 
+ 	/* No obstacles. */
+ 	return vprintk_default(fmt, args);
+diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
+index 8e6c023212cb3..34f8adf7c0a39 100644
+--- a/kernel/rcu/rcutorture.c
++++ b/kernel/rcu/rcutorture.c
+@@ -2407,6 +2407,12 @@ static int rcutorture_booster_init(unsigned int cpu)
+ 		WARN_ON_ONCE(!t);
+ 		sp.sched_priority = 2;
+ 		sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
++#ifdef CONFIG_PREEMPT_RT
++		t = per_cpu(timersd, cpu);
++		WARN_ON_ONCE(!t);
++		sp.sched_priority = 2;
++		sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
++#endif
+ 	}
+ 
+ 	/* Don't allow time recalculation while creating a new task. */
+diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
+index b10b8349bb2a4..804306204d0d0 100644
+--- a/kernel/rcu/tree_stall.h
++++ b/kernel/rcu/tree_stall.h
+@@ -8,6 +8,7 @@
+  */
+ 
+ #include <linux/kvm_para.h>
++#include <linux/console.h>
+ 
+ //////////////////////////////////////////////////////////////////////////////
+ //
+@@ -582,6 +583,7 @@ static void rcu_check_gp_kthread_expired_fqs_timer(void)
+ 
+ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
+ {
++	enum cons_prio prev_prio;
+ 	int cpu;
+ 	unsigned long flags;
+ 	unsigned long gpa;
+@@ -597,6 +599,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
+ 	if (rcu_stall_is_suppressed())
+ 		return;
+ 
++	prev_prio = cons_atomic_enter(CONS_PRIO_EMERGENCY);
++
+ 	/*
+ 	 * OK, time to rat on our buddy...
+ 	 * See Documentation/RCU/stallwarn.rst for info on how to debug
+@@ -651,6 +655,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
+ 	panic_on_rcu_stall();
+ 
+ 	rcu_force_quiescent_state();  /* Kick them all. */
++
++	cons_atomic_exit(CONS_PRIO_EMERGENCY, prev_prio);
+ }
+ 
+ static void print_cpu_stall(unsigned long gps)
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 0d18c3969f904..a57a1a3beeba1 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1042,6 +1042,46 @@ void resched_curr(struct rq *rq)
+ 		trace_sched_wake_idle_without_ipi(cpu);
+ }
+ 
++#ifdef CONFIG_PREEMPT_LAZY
++
++static int tsk_is_polling(struct task_struct *p)
++{
++#ifdef TIF_POLLING_NRFLAG
++	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
++#else
++	return 0;
++#endif
++}
++
++void resched_curr_lazy(struct rq *rq)
++{
++	struct task_struct *curr = rq->curr;
++	int cpu;
++
++	if (!sched_feat(PREEMPT_LAZY)) {
++		resched_curr(rq);
++		return;
++	}
++
++	if (test_tsk_need_resched(curr))
++		return;
++
++	if (test_tsk_need_resched_lazy(curr))
++		return;
++
++	set_tsk_need_resched_lazy(curr);
++
++	cpu = cpu_of(rq);
++	if (cpu == smp_processor_id())
++		return;
++
++	/* NEED_RESCHED_LAZY must be visible before we test polling */
++	smp_mb();
++	if (!tsk_is_polling(curr))
++		smp_send_reschedule(cpu);
++}
++#endif
++
+ void resched_cpu(int cpu)
+ {
+ 	struct rq *rq = cpu_rq(cpu);
+@@ -2230,6 +2270,7 @@ void migrate_disable(void)
+ 	preempt_disable();
+ 	this_rq()->nr_pinned++;
+ 	p->migration_disabled = 1;
++	preempt_lazy_disable();
+ 	preempt_enable();
+ }
+ EXPORT_SYMBOL_GPL(migrate_disable);
+@@ -2265,6 +2306,7 @@ void migrate_enable(void)
+ 	barrier();
+ 	p->migration_disabled = 0;
+ 	this_rq()->nr_pinned--;
++	preempt_lazy_enable();
+ 	preempt_enable();
+ }
+ EXPORT_SYMBOL_GPL(migrate_enable);
+@@ -3318,6 +3360,76 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
+ }
+ #endif /* CONFIG_NUMA_BALANCING */
+ 
++#ifdef CONFIG_PREEMPT_RT
++
++/*
++ * Consider:
++ *
++ *  set_special_state(X);
++ *
++ *  do_things()
++ *    // Somewhere in there is an rtlock that can be contended:
++ *    current_save_and_set_rtlock_wait_state();
++ *    [...]
++ *    schedule_rtlock(); (A)
++ *    [...]
++ *    current_restore_rtlock_saved_state();
++ *
++ *  schedule(); (B)
++ *
++ * If p->saved_state is anything else than TASK_RUNNING, then p blocked on an
++ * rtlock (A) *before* voluntarily calling into schedule() (B) after setting its
++ * state to X. For things like ptrace (X=TASK_TRACED), the task could have more
++ * work to do upon acquiring the lock in do_things() before whoever called
++ * wait_task_inactive() should return. IOW, we have to wait for:
++ *
++ *   p.saved_state = TASK_RUNNING
++ *   p.__state     = X
++ *
++ * which implies the task isn't blocked on an RT lock and got to schedule() (B).
++ *
++ * Also see comments in ttwu_state_match().
++ */
++
++static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state)
++{
++	unsigned long flags;
++	bool mismatch;
++
++	raw_spin_lock_irqsave(&p->pi_lock, flags);
++	if (READ_ONCE(p->__state) & match_state)
++		mismatch = false;
++	else if (READ_ONCE(p->saved_state) & match_state)
++		mismatch = false;
++	else
++		mismatch = true;
++
++	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
++	return mismatch;
++}
++static __always_inline bool state_match(struct task_struct *p, unsigned int match_state,
++					bool *wait)
++{
++	if (READ_ONCE(p->__state) & match_state)
++		return true;
++	if (READ_ONCE(p->saved_state) & match_state) {
++		*wait = true;
++		return true;
++	}
++	return false;
++}
++#else
++static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state)
++{
++	return !(READ_ONCE(p->__state) & match_state);
++}
++static __always_inline bool state_match(struct task_struct *p, unsigned int match_state,
++					bool *wait)
++{
++	return (READ_ONCE(p->__state) & match_state);
++}
++#endif
++
+ /*
+  * wait_task_inactive - wait for a thread to unschedule.
+  *
+@@ -3336,7 +3448,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
+  */
+ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
+ {
+-	int running, queued;
++	bool running, wait;
+ 	struct rq_flags rf;
+ 	unsigned long ncsw;
+ 	struct rq *rq;
+@@ -3362,7 +3474,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
+ 		 * is actually now running somewhere else!
+ 		 */
+ 		while (task_on_cpu(rq, p)) {
+-			if (!(READ_ONCE(p->__state) & match_state))
++			if (state_mismatch(p, match_state))
+ 				return 0;
+ 			cpu_relax();
+ 		}
+@@ -3375,9 +3487,10 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
+ 		rq = task_rq_lock(p, &rf);
+ 		trace_sched_wait_task(p);
+ 		running = task_on_cpu(rq, p);
+-		queued = task_on_rq_queued(p);
++		wait = task_on_rq_queued(p);
+ 		ncsw = 0;
+-		if (READ_ONCE(p->__state) & match_state)
++
++		if (state_match(p, match_state, &wait))
+ 			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
+ 		task_rq_unlock(rq, p, &rf);
+ 
+@@ -3407,7 +3520,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
+ 		 * running right now), it's preempted, and we should
+ 		 * yield - it could be a while.
+ 		 */
+-		if (unlikely(queued)) {
++		if (unlikely(wait)) {
+ 			ktime_t to = NSEC_PER_SEC / HZ;
+ 
+ 			set_current_state(TASK_UNINTERRUPTIBLE);
+@@ -4712,6 +4825,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
+ 	p->on_cpu = 0;
+ #endif
+ 	init_task_preempt_count(p);
++#ifdef CONFIG_HAVE_PREEMPT_LAZY
++	task_thread_info(p)->preempt_lazy_count = 0;
++#endif
+ #ifdef CONFIG_SMP
+ 	plist_node_init(&p->pushable_tasks, MAX_PRIO);
+ 	RB_CLEAR_NODE(&p->pushable_dl_tasks);
+@@ -6588,6 +6704,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
+ 
+ 	next = pick_next_task(rq, prev, &rf);
+ 	clear_tsk_need_resched(prev);
++	clear_tsk_need_resched_lazy(prev);
+ 	clear_preempt_need_resched();
+ #ifdef CONFIG_SCHED_DEBUG
+ 	rq->last_seen_need_resched_ns = 0;
+@@ -6648,14 +6765,11 @@ void __noreturn do_task_dead(void)
+ 		cpu_relax();
+ }
+ 
+-static inline void sched_submit_work(struct task_struct *tsk)
++void sched_submit_work(void)
+ {
+-	unsigned int task_flags;
++	struct task_struct *tsk = current;
++	unsigned int task_flags = tsk->flags;
+ 
+-	if (task_is_running(tsk))
+-		return;
+-
+-	task_flags = tsk->flags;
+ 	/*
+ 	 * If a worker goes to sleep, notify and ask workqueue whether it
+ 	 * wants to wake up a task to maintain concurrency.
+@@ -6681,8 +6795,10 @@ static inline void sched_submit_work(struct task_struct *tsk)
+ 	blk_flush_plug(tsk->plug, true);
+ }
+ 
+-static void sched_update_worker(struct task_struct *tsk)
++void sched_resume_work(void)
+ {
++	struct task_struct *tsk = current;
++
+ 	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
+ 		if (tsk->flags & PF_WQ_WORKER)
+ 			wq_worker_running(tsk);
+@@ -6691,20 +6807,29 @@ static void sched_update_worker(struct task_struct *tsk)
+ 	}
+ }
+ 
+-asmlinkage __visible void __sched schedule(void)
++static void schedule_loop(unsigned int sched_mode)
+ {
+-	struct task_struct *tsk = current;
+-
+-	sched_submit_work(tsk);
+ 	do {
+ 		preempt_disable();
+-		__schedule(SM_NONE);
++		__schedule(sched_mode);
+ 		sched_preempt_enable_no_resched();
+ 	} while (need_resched());
+-	sched_update_worker(tsk);
++}
++
++asmlinkage __visible void __sched schedule(void)
++{
++	if (!task_is_running(current))
++		sched_submit_work();
++	schedule_loop(SM_NONE);
++	sched_resume_work();
+ }
+ EXPORT_SYMBOL(schedule);
+ 
++void schedule_rtmutex(void)
++{
++	schedule_loop(SM_NONE);
++}
++
+ /*
+  * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
+  * state (have scheduled out non-voluntarily) by making sure that all
+@@ -6764,11 +6889,7 @@ void __sched schedule_preempt_disabled(void)
+ #ifdef CONFIG_PREEMPT_RT
+ void __sched notrace schedule_rtlock(void)
+ {
+-	do {
+-		preempt_disable();
+-		__schedule(SM_RTLOCK_WAIT);
+-		sched_preempt_enable_no_resched();
+-	} while (need_resched());
++	schedule_loop(SM_RTLOCK_WAIT);
+ }
+ NOKPROBE_SYMBOL(schedule_rtlock);
+ #endif
+@@ -6802,6 +6923,30 @@ static void __sched notrace preempt_schedule_common(void)
+ 	} while (need_resched());
+ }
+ 
++#ifdef CONFIG_PREEMPT_LAZY
++/*
++ * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
++ * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
++ * preempt_lazy_count counter >0.
++ */
++static __always_inline int preemptible_lazy(void)
++{
++	if (test_thread_flag(TIF_NEED_RESCHED))
++		return 1;
++	if (current_thread_info()->preempt_lazy_count)
++		return 0;
++	return 1;
++}
++
++#else
++
++static inline int preemptible_lazy(void)
++{
++	return 1;
++}
++
++#endif
++
+ #ifdef CONFIG_PREEMPTION
+ /*
+  * This is the entry point to schedule() from in-kernel preemption
+@@ -6815,6 +6960,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
+ 	 */
+ 	if (likely(!preemptible()))
+ 		return;
++	if (!preemptible_lazy())
++		return;
+ 	preempt_schedule_common();
+ }
+ NOKPROBE_SYMBOL(preempt_schedule);
+@@ -6862,6 +7009,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
+ 	if (likely(!preemptible()))
+ 		return;
+ 
++	if (!preemptible_lazy())
++		return;
++
+ 	do {
+ 		/*
+ 		 * Because the function tracer can trace preempt_count_sub()
+@@ -9167,7 +9317,9 @@ void __init init_idle(struct task_struct *idle, int cpu)
+ 
+ 	/* Set the preempt count _outside_ the spinlocks! */
+ 	init_idle_preempt_count(idle, cpu);
+-
++#ifdef CONFIG_HAVE_PREEMPT_LAZY
++	task_thread_info(idle)->preempt_lazy_count = 0;
++#endif
+ 	/*
+ 	 * The idle tasks have their own, simple scheduling class:
+ 	 */
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index ed89be0aa6503..46ffbbfde97b0 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -4948,7 +4948,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+ 
+ 	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
+ 	if (delta_exec > ideal_runtime) {
+-		resched_curr(rq_of(cfs_rq));
++		resched_curr_lazy(rq_of(cfs_rq));
+ 		/*
+ 		 * The current task ran long enough, ensure it doesn't get
+ 		 * re-elected due to buddy favours.
+@@ -4972,7 +4972,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+ 		return;
+ 
+ 	if (delta > ideal_runtime)
+-		resched_curr(rq_of(cfs_rq));
++		resched_curr_lazy(rq_of(cfs_rq));
+ }
+ 
+ static void
+@@ -5118,7 +5118,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
+ 	 * validating it and just reschedule.
+ 	 */
+ 	if (queued) {
+-		resched_curr(rq_of(cfs_rq));
++		resched_curr_lazy(rq_of(cfs_rq));
+ 		return;
+ 	}
+ 	/*
+@@ -5267,7 +5267,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
+ 	 * hierarchy can be throttled
+ 	 */
+ 	if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
+-		resched_curr(rq_of(cfs_rq));
++		resched_curr_lazy(rq_of(cfs_rq));
+ }
+ 
+ static __always_inline
+@@ -6142,7 +6142,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
+ 
+ 		if (delta < 0) {
+ 			if (task_current(rq, p))
+-				resched_curr(rq);
++				resched_curr_lazy(rq);
+ 			return;
+ 		}
+ 		hrtick_start(rq, delta);
+@@ -7871,7 +7871,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+ 	return;
+ 
+ preempt:
+-	resched_curr(rq);
++	resched_curr_lazy(rq);
+ 	/*
+ 	 * Only set the backward buddy when the current task is still
+ 	 * on the rq. This can happen when a wakeup gets interleaved
+@@ -12036,7 +12036,7 @@ static void task_fork_fair(struct task_struct *p)
+ 		 * 'current' within the tree based on its new key value.
+ 		 */
+ 		swap(curr->vruntime, se->vruntime);
+-		resched_curr(rq);
++		resched_curr_lazy(rq);
+ 	}
+ 
+ 	se->vruntime -= cfs_rq->min_vruntime;
+@@ -12063,7 +12063,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
+ 	 */
+ 	if (task_current(rq, p)) {
+ 		if (p->prio > oldprio)
+-			resched_curr(rq);
++			resched_curr_lazy(rq);
+ 	} else
+ 		check_preempt_curr(rq, p, 0);
+ }
+diff --git a/kernel/sched/features.h b/kernel/sched/features.h
+index ee7f23c76bd33..e13090e33f3c4 100644
+--- a/kernel/sched/features.h
++++ b/kernel/sched/features.h
+@@ -48,6 +48,9 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
+ 
+ #ifdef CONFIG_PREEMPT_RT
+ SCHED_FEAT(TTWU_QUEUE, false)
++# ifdef CONFIG_PREEMPT_LAZY
++SCHED_FEAT(PREEMPT_LAZY, true)
++# endif
+ #else
+ 
+ /*
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 3e8df6d31c1e3..6f272ef973675 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2370,6 +2370,15 @@ extern void reweight_task(struct task_struct *p, int prio);
+ extern void resched_curr(struct rq *rq);
+ extern void resched_cpu(int cpu);
+ 
++#ifdef CONFIG_PREEMPT_LAZY
++extern void resched_curr_lazy(struct rq *rq);
++#else
++static inline void resched_curr_lazy(struct rq *rq)
++{
++	resched_curr(rq);
++}
++#endif
++
+ extern struct rt_bandwidth def_rt_bandwidth;
+ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
+ extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
+diff --git a/kernel/signal.c b/kernel/signal.c
+index 8cb28f1df2941..138d68cfc204d 100644
+--- a/kernel/signal.c
++++ b/kernel/signal.c
+@@ -432,7 +432,18 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
+ 		return NULL;
+ 
+ 	if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
+-		q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
++
++		if (!sigqueue_flags) {
++			struct sighand_struct *sighand = t->sighand;
++
++			lockdep_assert_held(&sighand->siglock);
++			if (sighand->sigqueue_cache) {
++				q = sighand->sigqueue_cache;
++				sighand->sigqueue_cache = NULL;
++			}
++		}
++		if (!q)
++			q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
+ 	} else {
+ 		print_dropped_signal(sig);
+ 	}
+@@ -447,14 +458,43 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
+ 	return q;
+ }
+ 
+-static void __sigqueue_free(struct sigqueue *q)
++static bool sigqueue_cleanup_accounting(struct sigqueue *q)
+ {
+ 	if (q->flags & SIGQUEUE_PREALLOC)
+-		return;
++		return false;
+ 	if (q->ucounts) {
+ 		dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING);
+ 		q->ucounts = NULL;
+ 	}
++	return true;
++}
++
++static void __sigqueue_free(struct sigqueue *q)
++{
++	if (!sigqueue_cleanup_accounting(q))
++		return;
++	kmem_cache_free(sigqueue_cachep, q);
++}
++
++void sigqueue_free_cached_entry(struct sigqueue *q)
++{
++	if (!q)
++		return;
++	kmem_cache_free(sigqueue_cachep, q);
++}
++
++static void sigqueue_cache_or_free(struct sigqueue *q)
++{
++	struct sighand_struct *sighand = current->sighand;
++
++	if (!sigqueue_cleanup_accounting(q))
++		return;
++
++	lockdep_assert_held(&sighand->siglock);
++	if (!sighand->sigqueue_cache) {
++		sighand->sigqueue_cache = q;
++		return;
++	}
+ 	kmem_cache_free(sigqueue_cachep, q);
+ }
+ 
+@@ -594,7 +634,7 @@ static void collect_signal(int sig, struct sigpending *list, kernel_siginfo_t *i
+ 			(info->si_code == SI_TIMER) &&
+ 			(info->si_sys_private);
+ 
+-		__sigqueue_free(first);
++		sigqueue_cache_or_free(first);
+ 	} else {
+ 		/*
+ 		 * Ok, it wasn't in the queue.  This must be
+@@ -2296,15 +2336,31 @@ static int ptrace_stop(int exit_code, int why, unsigned long message,
+ 		do_notify_parent_cldstop(current, false, why);
+ 
+ 	/*
+-	 * Don't want to allow preemption here, because
+-	 * sys_ptrace() needs this task to be inactive.
++	 * The previous do_notify_parent_cldstop() invocation woke ptracer.
++	 * One a PREEMPTION kernel this can result in preemption requirement
++	 * which will be fulfilled after read_unlock() and the ptracer will be
++	 * put on the CPU.
++	 * The ptracer is in wait_task_inactive(, __TASK_TRACED) waiting for
++	 * this task wait in schedule(). If this task gets preempted then it
++	 * remains enqueued on the runqueue. The ptracer will observe this and
++	 * then sleep for a delay of one HZ tick. In the meantime this task
++	 * gets scheduled, enters schedule() and will wait for the ptracer.
+ 	 *
+-	 * XXX: implement read_unlock_no_resched().
++	 * This preemption point is not bad from correctness point of view but
++	 * extends the runtime by one HZ tick time due to the ptracer's sleep.
++	 * The preempt-disable section ensures that there will be no preemption
++	 * between unlock and schedule() and so improving the performance since
++	 * the ptracer has no reason to sleep.
++	 *
++	 * This optimisation is not doable on PREEMPT_RT due to the spinlock_t
++	 * within the preempt-disable section.
+ 	 */
+-	preempt_disable();
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_disable();
+ 	read_unlock(&tasklist_lock);
+ 	cgroup_enter_frozen();
+-	preempt_enable_no_resched();
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_enable_no_resched();
+ 	schedule();
+ 	cgroup_leave_frozen(true);
+ 
+diff --git a/kernel/softirq.c b/kernel/softirq.c
+index c8a6913c067d9..af9e879bbbf75 100644
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -80,21 +80,6 @@ static void wakeup_softirqd(void)
+ 		wake_up_process(tsk);
+ }
+ 
+-/*
+- * If ksoftirqd is scheduled, we do not want to process pending softirqs
+- * right now. Let ksoftirqd handle this at its own rate, to get fairness,
+- * unless we're doing some of the synchronous softirqs.
+- */
+-#define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ))
+-static bool ksoftirqd_running(unsigned long pending)
+-{
+-	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
+-
+-	if (pending & SOFTIRQ_NOW_MASK)
+-		return false;
+-	return tsk && task_is_running(tsk) && !__kthread_should_park(tsk);
+-}
+-
+ #ifdef CONFIG_TRACE_IRQFLAGS
+ DEFINE_PER_CPU(int, hardirqs_enabled);
+ DEFINE_PER_CPU(int, hardirq_context);
+@@ -236,7 +221,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
+ 		goto out;
+ 
+ 	pending = local_softirq_pending();
+-	if (!pending || ksoftirqd_running(pending))
++	if (!pending)
+ 		goto out;
+ 
+ 	/*
+@@ -432,9 +417,6 @@ static inline bool should_wake_ksoftirqd(void)
+ 
+ static inline void invoke_softirq(void)
+ {
+-	if (ksoftirqd_running(local_softirq_pending()))
+-		return;
+-
+ 	if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) {
+ #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
+ 		/*
+@@ -468,7 +450,7 @@ asmlinkage __visible void do_softirq(void)
+ 
+ 	pending = local_softirq_pending();
+ 
+-	if (pending && !ksoftirqd_running(pending))
++	if (pending)
+ 		do_softirq_own_stack();
+ 
+ 	local_irq_restore(flags);
+@@ -637,6 +619,24 @@ static inline void tick_irq_exit(void)
+ #endif
+ }
+ 
++#ifdef CONFIG_PREEMPT_RT
++DEFINE_PER_CPU(struct task_struct *, timersd);
++DEFINE_PER_CPU(unsigned long, pending_timer_softirq);
++
++static void wake_timersd(void)
++{
++        struct task_struct *tsk = __this_cpu_read(timersd);
++
++        if (tsk)
++                wake_up_process(tsk);
++}
++
++#else
++
++static inline void wake_timersd(void) { }
++
++#endif
++
+ static inline void __irq_exit_rcu(void)
+ {
+ #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
+@@ -649,6 +649,10 @@ static inline void __irq_exit_rcu(void)
+ 	if (!in_interrupt() && local_softirq_pending())
+ 		invoke_softirq();
+ 
++	if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers() &&
++	    !(in_nmi() | in_hardirq()))
++		wake_timersd();
++
+ 	tick_irq_exit();
+ }
+ 
+@@ -976,12 +980,70 @@ static struct smp_hotplug_thread softirq_threads = {
+ 	.thread_comm		= "ksoftirqd/%u",
+ };
+ 
++#ifdef CONFIG_PREEMPT_RT
++static void timersd_setup(unsigned int cpu)
++{
++        sched_set_fifo_low(current);
++}
++
++static int timersd_should_run(unsigned int cpu)
++{
++        return local_pending_timers();
++}
++
++static void run_timersd(unsigned int cpu)
++{
++	unsigned int timer_si;
++
++	ksoftirqd_run_begin();
++
++	timer_si = local_pending_timers();
++	__this_cpu_write(pending_timer_softirq, 0);
++	or_softirq_pending(timer_si);
++
++	__do_softirq();
++
++	ksoftirqd_run_end();
++}
++
++static void raise_ktimers_thread(unsigned int nr)
++{
++	trace_softirq_raise(nr);
++	__this_cpu_or(pending_timer_softirq, 1 << nr);
++}
++
++void raise_hrtimer_softirq(void)
++{
++	raise_ktimers_thread(HRTIMER_SOFTIRQ);
++}
++
++void raise_timer_softirq(void)
++{
++	unsigned long flags;
++
++	local_irq_save(flags);
++	raise_ktimers_thread(TIMER_SOFTIRQ);
++	wake_timersd();
++	local_irq_restore(flags);
++}
++
++static struct smp_hotplug_thread timer_threads = {
++        .store                  = &timersd,
++        .setup                  = timersd_setup,
++        .thread_should_run      = timersd_should_run,
++        .thread_fn              = run_timersd,
++        .thread_comm            = "ktimers/%u",
++};
++#endif
++
+ static __init int spawn_ksoftirqd(void)
+ {
+ 	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
+ 				  takeover_tasklets);
+ 	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
+-
++#ifdef CONFIG_PREEMPT_RT
++	BUG_ON(smpboot_register_percpu_thread(&timer_threads));
++#endif
+ 	return 0;
+ }
+ early_initcall(spawn_ksoftirqd);
+diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
+index e8c08292defcb..10c1246cdba76 100644
+--- a/kernel/time/hrtimer.c
++++ b/kernel/time/hrtimer.c
+@@ -1805,7 +1805,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
+ 	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
+ 		cpu_base->softirq_expires_next = KTIME_MAX;
+ 		cpu_base->softirq_activated = 1;
+-		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
++		raise_hrtimer_softirq();
+ 	}
+ 
+ 	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
+@@ -1918,7 +1918,7 @@ void hrtimer_run_queues(void)
+ 	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
+ 		cpu_base->softirq_expires_next = KTIME_MAX;
+ 		cpu_base->softirq_activated = 1;
+-		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
++		raise_hrtimer_softirq();
+ 	}
+ 
+ 	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
+diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
+index 808a247205a9a..c0a32aab8f276 100644
+--- a/kernel/time/posix-timers.c
++++ b/kernel/time/posix-timers.c
+@@ -140,25 +140,29 @@ static struct k_itimer *posix_timer_by_id(timer_t id)
+ static int posix_timer_add(struct k_itimer *timer)
+ {
+ 	struct signal_struct *sig = current->signal;
+-	int first_free_id = sig->posix_timer_id;
+ 	struct hlist_head *head;
+-	int ret = -ENOENT;
++	unsigned int start, id;
+ 
+-	do {
++	/* Can be written by a different task concurrently in the loop below */
++	start = READ_ONCE(sig->next_posix_timer_id);
++
++	for (id = ~start; start != id; id++) {
+ 		spin_lock(&hash_lock);
+-		head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)];
+-		if (!__posix_timers_find(head, sig, sig->posix_timer_id)) {
++		id = sig->next_posix_timer_id;
++
++		/* Write the next ID back. Clamp it to the positive space */
++		WRITE_ONCE(sig->next_posix_timer_id, (id + 1) & INT_MAX);
++
++		head = &posix_timers_hashtable[hash(sig, id)];
++		if (!__posix_timers_find(head, sig, id)) {
+ 			hlist_add_head_rcu(&timer->t_hash, head);
+-			ret = sig->posix_timer_id;
++			spin_unlock(&hash_lock);
++			return id;
+ 		}
+-		if (++sig->posix_timer_id < 0)
+-			sig->posix_timer_id = 0;
+-		if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT))
+-			/* Loop over all possible ids completed */
+-			ret = -EAGAIN;
+ 		spin_unlock(&hash_lock);
+-	} while (ret == -ENOENT);
+-	return ret;
++	}
++	/* POSIX return code when no timer ID could be allocated */
++	return -EAGAIN;
+ }
+ 
+ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
+@@ -1037,27 +1041,59 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
+ }
+ 
+ /*
+- * return timer owned by the process, used by exit_itimers
++ * Delete a timer if it is armed, remove it from the hash and schedule it
++ * for RCU freeing.
+  */
+ static void itimer_delete(struct k_itimer *timer)
+ {
+-retry_delete:
+-	spin_lock_irq(&timer->it_lock);
++	unsigned long flags;
+ 
++retry_delete:
++	/*
++	 * irqsave is required to make timer_wait_running() work.
++	 */
++	spin_lock_irqsave(&timer->it_lock, flags);
++
++	/*
++	 * Even if the timer is not longer accessible from other tasks
++	 * it still might be armed and queued in the underlying timer
++	 * mechanism. Worse, that timer mechanism might run the expiry
++	 * function concurrently.
++	 */
+ 	if (timer_delete_hook(timer) == TIMER_RETRY) {
+-		spin_unlock_irq(&timer->it_lock);
++		/*
++		 * Timer is expired concurrently, prevent livelocks
++		 * and pointless spinning on RT.
++		 *
++		 * The CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y case is
++		 * irrelevant here because obviously the exiting task
++		 * cannot be expiring timer in task work concurrently.
++		 * Ditto for CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n as the
++		 * tick interrupt cannot run on this CPU because the above
++		 * spin_lock disabled interrupts.
++		 *
++		 * timer_wait_running() drops timer::it_lock, which opens
++		 * the possibility for another task to delete the timer.
++		 *
++		 * That's not possible here because this is invoked from
++		 * do_exit() only for the last thread of the thread group.
++		 * So no other task can access that timer.
++		 */
++		if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer))
++			return;
++
+ 		goto retry_delete;
+ 	}
+ 	list_del(&timer->list);
+ 
+-	spin_unlock_irq(&timer->it_lock);
++	spin_unlock_irqrestore(&timer->it_lock, flags);
+ 	release_posix_timer(timer, IT_ID_SET);
+ }
+ 
+ /*
+- * This is called by do_exit or de_thread, only when nobody else can
+- * modify the signal->posix_timers list. Yet we need sighand->siglock
+- * to prevent the race with /proc/pid/timers.
++ * Invoked from do_exit() when the last thread of a thread group exits.
++ * At that point no other task can access the timers of the dying
++ * task anymore.
+  */
+ void exit_itimers(struct task_struct *tsk)
+ {
+@@ -1067,10 +1103,12 @@ void exit_itimers(struct task_struct *tsk)
+ 	if (list_empty(&tsk->signal->posix_timers))
+ 		return;
+ 
++	/* Protect against concurrent read via /proc/$PID/timers */
+ 	spin_lock_irq(&tsk->sighand->siglock);
+ 	list_replace_init(&tsk->signal->posix_timers, &timers);
+ 	spin_unlock_irq(&tsk->sighand->siglock);
+ 
++	/* The timers are not longer accessible via tsk::signal */
+ 	while (!list_empty(&timers)) {
+ 		tmr = list_first_entry(&timers, struct k_itimer, list);
+ 		itimer_delete(tmr);
+diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
+index a46506f7ec6d0..1ae9e4e8a0715 100644
+--- a/kernel/time/tick-sched.c
++++ b/kernel/time/tick-sched.c
+@@ -789,7 +789,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
+ 
+ static inline bool local_timer_softirq_pending(void)
+ {
+-	return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
++	return local_pending_timers() & BIT(TIMER_SOFTIRQ);
+ }
+ 
+ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
+diff --git a/kernel/time/timer.c b/kernel/time/timer.c
+index 63a8ce7177dd4..7cad6fe3c035c 100644
+--- a/kernel/time/timer.c
++++ b/kernel/time/timer.c
+@@ -2054,7 +2054,7 @@ static void run_local_timers(void)
+ 		if (time_before(jiffies, base->next_expiry))
+ 			return;
+ 	}
+-	raise_softirq(TIMER_SOFTIRQ);
++	raise_timer_softirq();
+ }
+ 
+ /*
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 5909aaf2f4c08..2867def70f441 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -2694,11 +2694,19 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
+ 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
+ 		trace_flags |= TRACE_FLAG_BH_OFF;
+ 
+-	if (tif_need_resched())
++	if (tif_need_resched_now())
+ 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
++#ifdef CONFIG_PREEMPT_LAZY
++	/* Run out of bits. Share the LAZY and PREEMPT_RESCHED */
++	if (need_resched_lazy())
++		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
++#else
+ 	if (test_preempt_need_resched())
+ 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
+-	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
++#endif
++
++	return (trace_flags << 24) | (min_t(unsigned int, pc & 0xff, 0xf)) |
++		(preempt_lazy_count() & 0xff) << 16 |
+ 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
+ }
+ 
+@@ -4287,15 +4295,17 @@ unsigned long trace_total_entries(struct trace_array *tr)
+ 
+ static void print_lat_help_header(struct seq_file *m)
+ {
+-	seq_puts(m, "#                    _------=> CPU#            \n"
+-		    "#                   / _-----=> irqs-off/BH-disabled\n"
+-		    "#                  | / _----=> need-resched    \n"
+-		    "#                  || / _---=> hardirq/softirq \n"
+-		    "#                  ||| / _--=> preempt-depth   \n"
+-		    "#                  |||| / _-=> migrate-disable \n"
+-		    "#                  ||||| /     delay           \n"
+-		    "#  cmd     pid     |||||| time  |   caller     \n"
+-		    "#     \\   /        ||||||  \\    |    /       \n");
++	seq_puts(m, "#                    _--------=> CPU#            \n"
++		    "#                   / _-------=> irqs-off/BH-disabled\n"
++		    "#                  | / _------=> need-resched    \n"
++		    "#                  || / _-----=> need-resched-lazy\n"
++		    "#                  ||| / _----=> hardirq/softirq \n"
++		    "#                  |||| / _---=> preempt-depth   \n"
++		    "#                  ||||| / _--=> preempt-lazy-depth\n"
++		    "#                  |||||| / _-=> migrate-disable \n"
++		    "#                  ||||||| /     delay           \n"
++		    "#  cmd     pid     |||||||| time  |   caller     \n"
++		    "#     \\   /        ||||||||  \\    |    /       \n");
+ }
+ 
+ static void print_event_info(struct array_buffer *buf, struct seq_file *m)
+@@ -4329,14 +4339,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
+ 
+ 	print_event_info(buf, m);
+ 
+-	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
+-	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
+-	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
+-	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
+-	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
+-	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
+-	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
+-	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
++	seq_printf(m, "#                            %.*s  _-------=> irqs-off/BH-disabled\n", prec, space);
++	seq_printf(m, "#                            %.*s / _------=> need-resched\n", prec, space);
++	seq_printf(m, "#                            %.*s| / _-----=> need-resched-lazy\n", prec, space);
++	seq_printf(m, "#                            %.*s|| / _----=> hardirq/softirq\n", prec, space);
++	seq_printf(m, "#                            %.*s||| / _---=> preempt-depth\n", prec, space);
++	seq_printf(m, "#                            %.*s|||| / _--=> preempt-lazy-depth\n", prec, space);
++	seq_printf(m, "#                            %.*s||||| / _-=> migrate-disable\n", prec, space);
++	seq_printf(m, "#                            %.*s|||||| /     delay\n", prec, space);
++	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
++	seq_printf(m, "#              | |    %.*s   |   |||||||      |         |\n", prec, "       |    ");
+ }
+ 
+ void
+diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
+index 654ffa40457aa..b2d52f8355b70 100644
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -208,6 +208,7 @@ static int trace_define_common_fields(void)
+ 	/* Holds both preempt_count and migrate_disable */
+ 	__common_field(unsigned char, preempt_count);
+ 	__common_field(int, pid);
++	__common_field(unsigned char, preempt_lazy_count);
+ 
+ 	return ret;
+ }
+diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
+index bd475a00f96d1..89d4a3bfdc6d5 100644
+--- a/kernel/trace/trace_output.c
++++ b/kernel/trace/trace_output.c
+@@ -442,6 +442,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
+ {
+ 	char hardsoft_irq;
+ 	char need_resched;
++	char need_resched_lazy;
+ 	char irqs_off;
+ 	int hardirq;
+ 	int softirq;
+@@ -462,20 +463,27 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
+ 
+ 	switch (entry->flags & (TRACE_FLAG_NEED_RESCHED |
+ 				TRACE_FLAG_PREEMPT_RESCHED)) {
++#ifndef CONFIG_PREEMPT_LAZY
+ 	case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED:
+ 		need_resched = 'N';
+ 		break;
++#endif
+ 	case TRACE_FLAG_NEED_RESCHED:
+ 		need_resched = 'n';
+ 		break;
++#ifndef CONFIG_PREEMPT_LAZY
+ 	case TRACE_FLAG_PREEMPT_RESCHED:
+ 		need_resched = 'p';
+ 		break;
++#endif
+ 	default:
+ 		need_resched = '.';
+ 		break;
+ 	}
+ 
++	need_resched_lazy =
++		(entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
++
+ 	hardsoft_irq =
+ 		(nmi && hardirq)     ? 'Z' :
+ 		nmi                  ? 'z' :
+@@ -484,14 +492,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
+ 		softirq              ? 's' :
+ 		                       '.' ;
+ 
+-	trace_seq_printf(s, "%c%c%c",
+-			 irqs_off, need_resched, hardsoft_irq);
++	trace_seq_printf(s, "%c%c%c%c",
++			 irqs_off, need_resched, need_resched_lazy,
++			 hardsoft_irq);
+ 
+ 	if (entry->preempt_count & 0xf)
+ 		trace_seq_printf(s, "%x", entry->preempt_count & 0xf);
+ 	else
+ 		trace_seq_putc(s, '.');
+ 
++	if (entry->preempt_lazy_count)
++		trace_seq_printf(s, "%x", entry->preempt_lazy_count);
++	else
++		trace_seq_putc(s, '.');
++
+ 	if (entry->preempt_count & 0xf0)
+ 		trace_seq_printf(s, "%x", entry->preempt_count >> 4);
+ 	else
+diff --git a/localversion-rt b/localversion-rt
+new file mode 100644
+index 0000000000000..18777ec0c27d4
+--- /dev/null
++++ b/localversion-rt
+@@ -0,0 +1 @@
++-rt15
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 1488f700bf819..8c3b70160be8c 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -4573,15 +4573,6 @@ static void rps_trigger_softirq(void *data)
+ 
+ #endif /* CONFIG_RPS */
+ 
+-/* Called from hardirq (IPI) context */
+-static void trigger_rx_softirq(void *data)
+-{
+-	struct softnet_data *sd = data;
+-
+-	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+-	smp_store_release(&sd->defer_ipi_scheduled, 0);
+-}
+-
+ /*
+  * Check if this softnet_data structure is another cpu one
+  * If yes, queue it to our IPI list and return 1
+@@ -6632,6 +6623,30 @@ static void skb_defer_free_flush(struct softnet_data *sd)
+ 	}
+ }
+ 
++#ifndef CONFIG_PREEMPT_RT
++/* Called from hardirq (IPI) context */
++static void trigger_rx_softirq(void *data)
++{
++	struct softnet_data *sd = data;
++
++	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
++	smp_store_release(&sd->defer_ipi_scheduled, 0);
++}
++
++#else
++
++static void trigger_rx_softirq(struct work_struct *defer_work)
++{
++	struct softnet_data *sd;
++
++	sd = container_of(defer_work, struct softnet_data, defer_work);
++	smp_store_release(&sd->defer_ipi_scheduled, 0);
++	local_bh_disable();
++	skb_defer_free_flush(sd);
++	local_bh_enable();
++}
++#endif
++
+ static __latent_entropy void net_rx_action(struct softirq_action *h)
+ {
+ 	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
+@@ -11409,7 +11424,11 @@ static int __init net_dev_init(void)
+ 		INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
+ 		sd->cpu = i;
+ #endif
++#ifndef CONFIG_PREEMPT_RT
+ 		INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
++#else
++		INIT_WORK(&sd->defer_work, trigger_rx_softirq);
++#endif
+ 		spin_lock_init(&sd->defer_lock);
+ 
+ 		init_gro_hash(&sd->backlog);
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index 14bb41aafee30..3f8dac23205c6 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -6856,6 +6856,11 @@ nodefer:	__kfree_skb(skb);
+ 	/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
+ 	 * if we are unlucky enough (this seems very unlikely).
+ 	 */
+-	if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
++	if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) {
++#ifndef CONFIG_PREEMPT_RT
+ 		smp_call_function_single_async(cpu, &sd->defer_csd);
++#else
++		schedule_work_on(cpu, &sd->defer_work);
++#endif
++	}
+ }
diff --git a/packages/sysutils/busybox/scripts/fs-resize b/packages/sysutils/busybox/scripts/fs-resize
index add0f47cf..0906073c4 100755
--- a/packages/sysutils/busybox/scripts/fs-resize
+++ b/packages/sysutils/busybox/scripts/fs-resize
@@ -59,9 +59,10 @@ if [ -e /storage/.please_resize_me ] ; then
   fi
 fi
 
+# Remove all of the modules that may be loaded to see if /flash will cleanly unmount.
 for module in $(lsmod | awk '{print $1}')
 do
-  rmmod ${module}
+  rmmod ${module} 2>/dev/null
 done
 
 shutdown -r now &>/dev/null
diff --git a/projects/PC/devices/AMD64/linux/linux.x86_64.conf b/projects/PC/devices/AMD64/linux/linux.x86_64.conf
index 9c280d779..1ca00ada4 100644
--- a/projects/PC/devices/AMD64/linux/linux.x86_64.conf
+++ b/projects/PC/devices/AMD64/linux/linux.x86_64.conf
@@ -117,10 +117,12 @@ CONFIG_BPF_SYSCALL=y
 # CONFIG_BPF_PRELOAD is not set
 # end of BPF subsystem
 
+CONFIG_HAVE_PREEMPT_LAZY=y
 CONFIG_PREEMPT_BUILD=y
 CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
+CONFIG_PREEMPT_RT=y
 CONFIG_PREEMPT_COUNT=y
 CONFIG_PREEMPTION=y
 CONFIG_PREEMPT_DYNAMIC=y
@@ -314,6 +316,7 @@ CONFIG_HAVE_INTEL_TXT=y
 CONFIG_X86_64_SMP=y
 CONFIG_ARCH_SUPPORTS_UPROBES=y
 CONFIG_FIX_EARLYCON_MEM=y
+CONFIG_DYNAMIC_PHYSICAL_MASK=y
 CONFIG_PGTABLE_LEVELS=4
 CONFIG_CC_HAS_SANE_STACKPROTECTOR=y
 
@@ -387,12 +390,12 @@ CONFIG_X86_IO_APIC=y
 #
 # Performance monitoring
 #
-CONFIG_PERF_EVENTS_INTEL_UNCORE=y
-CONFIG_PERF_EVENTS_INTEL_RAPL=y
-CONFIG_PERF_EVENTS_INTEL_CSTATE=y
-CONFIG_PERF_EVENTS_AMD_POWER=y
-CONFIG_PERF_EVENTS_AMD_UNCORE=y
-# CONFIG_PERF_EVENTS_AMD_BRS is not set
+CONFIG_PERF_EVENTS_INTEL_UNCORE=m
+CONFIG_PERF_EVENTS_INTEL_RAPL=m
+CONFIG_PERF_EVENTS_INTEL_CSTATE=m
+CONFIG_PERF_EVENTS_AMD_POWER=m
+CONFIG_PERF_EVENTS_AMD_UNCORE=m
+CONFIG_PERF_EVENTS_AMD_BRS=y
 # end of Performance monitoring
 
 CONFIG_X86_16BIT=y
@@ -408,9 +411,11 @@ CONFIG_X86_CPUID=y
 # CONFIG_X86_5LEVEL is not set
 CONFIG_X86_DIRECT_GBPAGES=y
 # CONFIG_X86_CPA_STATISTICS is not set
-# CONFIG_AMD_MEM_ENCRYPT is not set
+CONFIG_X86_MEM_ENCRYPT=y
+CONFIG_AMD_MEM_ENCRYPT=y
+# CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT is not set
 CONFIG_NUMA=y
-# CONFIG_AMD_NUMA is not set
+CONFIG_AMD_NUMA=y
 CONFIG_X86_64_ACPI_NUMA=y
 # CONFIG_NUMA_EMU is not set
 CONFIG_NODES_SHIFT=6
@@ -431,9 +436,9 @@ CONFIG_X86_UMIP=y
 CONFIG_CC_HAS_IBT=y
 # CONFIG_X86_KERNEL_IBT is not set
 CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y
-CONFIG_X86_INTEL_TSX_MODE_OFF=y
+# CONFIG_X86_INTEL_TSX_MODE_OFF is not set
 # CONFIG_X86_INTEL_TSX_MODE_ON is not set
-# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
+CONFIG_X86_INTEL_TSX_MODE_AUTO=y
 CONFIG_EFI=y
 CONFIG_EFI_STUB=y
 CONFIG_EFI_HANDOVER_PROTOCOL=y
@@ -541,7 +546,7 @@ CONFIG_ACPI_PROCESSOR=y
 CONFIG_ACPI_HOTPLUG_CPU=y
 CONFIG_ACPI_PROCESSOR_AGGREGATOR=y
 CONFIG_ACPI_THERMAL=y
-CONFIG_ACPI_PLATFORM_PROFILE=y
+CONFIG_ACPI_PLATFORM_PROFILE=m
 CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y
 CONFIG_ACPI_TABLE_UPGRADE=y
 # CONFIG_ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD is not set
@@ -566,6 +571,7 @@ CONFIG_HAVE_ACPI_APEI_NMI=y
 CONFIG_ACPI_PCC=y
 # CONFIG_ACPI_FFH is not set
 # CONFIG_PMIC_OPREGION is not set
+# CONFIG_TPS68470_PMIC_OPREGION is not set
 CONFIG_ACPI_PRMT=y
 CONFIG_X86_PM_TIMER=y
 
@@ -593,7 +599,7 @@ CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
 CONFIG_X86_INTEL_PSTATE=y
 CONFIG_X86_PCC_CPUFREQ=m
 CONFIG_X86_AMD_PSTATE=y
-CONFIG_X86_AMD_PSTATE_UT=y
+CONFIG_X86_AMD_PSTATE_UT=m
 CONFIG_X86_ACPI_CPUFREQ=y
 CONFIG_X86_ACPI_CPUFREQ_CPB=y
 CONFIG_X86_POWERNOW_K8=y
@@ -760,6 +766,7 @@ CONFIG_HAVE_RELIABLE_STACKTRACE=y
 CONFIG_OLD_SIGSUSPEND3=y
 CONFIG_COMPAT_OLD_SIGACTION=y
 CONFIG_COMPAT_32BIT_TIME=y
+CONFIG_ARCH_SUPPORTS_RT=y
 CONFIG_HAVE_ARCH_VMAP_STACK=y
 CONFIG_VMAP_STACK=y
 CONFIG_HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET=y
@@ -773,6 +780,7 @@ CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y
 CONFIG_ARCH_USE_MEMREMAP_PROT=y
 # CONFIG_LOCK_EVENT_COUNTS is not set
 CONFIG_ARCH_HAS_MEM_ENCRYPT=y
+CONFIG_ARCH_HAS_CC_PLATFORM=y
 CONFIG_HAVE_STATIC_CALL=y
 CONFIG_HAVE_STATIC_CALL_INLINE=y
 CONFIG_HAVE_PREEMPT_DYNAMIC=y
@@ -1778,11 +1786,12 @@ CONFIG_EEPROM_EE1004=y
 
 # CONFIG_SENSORS_LIS3_I2C is not set
 CONFIG_ALTERA_STAPL=m
-# CONFIG_INTEL_MEI is not set
-# CONFIG_INTEL_MEI_ME is not set
-# CONFIG_INTEL_MEI_TXE is not set
-# CONFIG_INTEL_MEI_HDCP is not set
-# CONFIG_INTEL_MEI_PXP is not set
+CONFIG_INTEL_MEI=m
+CONFIG_INTEL_MEI_ME=m
+CONFIG_INTEL_MEI_TXE=m
+CONFIG_INTEL_MEI_GSC=m
+CONFIG_INTEL_MEI_HDCP=m
+CONFIG_INTEL_MEI_PXP=m
 # CONFIG_VMWARE_VMCI is not set
 # CONFIG_GENWQE is not set
 # CONFIG_ECHO is not set
@@ -1936,7 +1945,7 @@ CONFIG_SATA_VIA=y
 # PATA SFF controllers with BMDMA
 #
 CONFIG_PATA_ALI=y
-CONFIG_PATA_AMD=y
+CONFIG_PATA_AMD=m
 # CONFIG_PATA_ARTOP is not set
 CONFIG_PATA_ATIIXP=y
 # CONFIG_PATA_ATP867X is not set
@@ -2038,9 +2047,10 @@ CONFIG_VORTEX=y
 CONFIG_NET_VENDOR_AMAZON=y
 # CONFIG_ENA_ETHERNET is not set
 CONFIG_NET_VENDOR_AMD=y
-# CONFIG_AMD8111_ETH is not set
+CONFIG_AMD8111_ETH=m
 CONFIG_PCNET32=y
-# CONFIG_AMD_XGBE is not set
+CONFIG_AMD_XGBE=m
+CONFIG_AMD_XGBE_HAVE_ECC=y
 CONFIG_NET_VENDOR_AQUANTIA=y
 CONFIG_AQTION=y
 CONFIG_NET_VENDOR_ARC=y
@@ -2201,7 +2211,7 @@ CONFIG_FIXED_PHY=y
 #
 # MII PHY device drivers
 #
-CONFIG_AMD_PHY=y
+CONFIG_AMD_PHY=m
 # CONFIG_ADIN_PHY is not set
 # CONFIG_ADIN1100_PHY is not set
 # CONFIG_AQUANTIA_PHY is not set
@@ -2217,7 +2227,7 @@ CONFIG_BCM_NET_PHYLIB=y
 # CONFIG_DAVICOM_PHY is not set
 # CONFIG_ICPLUS_PHY is not set
 # CONFIG_LXT_PHY is not set
-# CONFIG_INTEL_XWAY_PHY is not set
+CONFIG_INTEL_XWAY_PHY=m
 # CONFIG_LSI_ET1011C_PHY is not set
 CONFIG_MARVELL_PHY=y
 # CONFIG_MARVELL_10G_PHY is not set
@@ -2588,7 +2598,7 @@ CONFIG_NET_FAILOVER=y
 CONFIG_INPUT=y
 CONFIG_INPUT_LEDS=y
 CONFIG_INPUT_FF_MEMLESS=y
-# CONFIG_INPUT_SPARSEKMAP is not set
+CONFIG_INPUT_SPARSEKMAP=m
 # CONFIG_INPUT_MATRIXKMAP is not set
 CONFIG_INPUT_VIVALDIFMAP=y
 
@@ -2978,13 +2988,15 @@ CONFIG_I2C_CCGX_UCSI=y
 # CONFIG_I2C_ALI1535 is not set
 # CONFIG_I2C_ALI1563 is not set
 # CONFIG_I2C_ALI15X3 is not set
-# CONFIG_I2C_AMD756 is not set
-# CONFIG_I2C_AMD8111 is not set
-# CONFIG_I2C_AMD_MP2 is not set
+CONFIG_I2C_AMD756=m
+CONFIG_I2C_AMD756_S4882=m
+CONFIG_I2C_AMD8111=m
+CONFIG_I2C_AMD_MP2=m
 CONFIG_I2C_I801=y
 CONFIG_I2C_ISCH=y
 # CONFIG_I2C_ISMT is not set
 CONFIG_I2C_PIIX4=y
+# CONFIG_I2C_CHT_WC is not set
 CONFIG_I2C_NFORCE2=y
 # CONFIG_I2C_NFORCE2_S4985 is not set
 CONFIG_I2C_NVIDIA_GPU=m
@@ -3062,8 +3074,9 @@ CONFIG_SPI_DW_PCI=m
 CONFIG_SPI_DW_MMIO=m
 # CONFIG_SPI_NXP_FLEXSPI is not set
 CONFIG_SPI_GPIO=m
-# CONFIG_SPI_INTEL_PCI is not set
-# CONFIG_SPI_INTEL_PLATFORM is not set
+CONFIG_SPI_INTEL=m
+CONFIG_SPI_INTEL_PCI=m
+CONFIG_SPI_INTEL_PLATFORM=m
 # CONFIG_SPI_MICROCHIP_CORE is not set
 # CONFIG_SPI_MICROCHIP_CORE_QSPI is not set
 # CONFIG_SPI_LANTIQ_SSC is not set
@@ -3184,17 +3197,18 @@ CONFIG_GPIOLIB_IRQCHIP=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_CDEV=y
 CONFIG_GPIO_CDEV_V1=y
+CONFIG_GPIO_GENERIC=m
 
 #
 # Memory mapped GPIO drivers
 #
-# CONFIG_GPIO_AMDPT is not set
+CONFIG_GPIO_AMDPT=m
 # CONFIG_GPIO_DWAPB is not set
 # CONFIG_GPIO_EXAR is not set
 # CONFIG_GPIO_GENERIC_PLATFORM is not set
 # CONFIG_GPIO_MB86S7X is not set
 # CONFIG_GPIO_VX855 is not set
-# CONFIG_GPIO_AMD_FCH is not set
+CONFIG_GPIO_AMD_FCH=m
 # end of Memory mapped GPIO drivers
 
 #
@@ -3222,12 +3236,15 @@ CONFIG_GPIO_CDEV_V1=y
 #
 # MFD GPIO expanders
 #
+# CONFIG_GPIO_CRYSTAL_COVE is not set
+# CONFIG_GPIO_TPS68470 is not set
+# CONFIG_GPIO_WHISKEY_COVE is not set
 # end of MFD GPIO expanders
 
 #
 # PCI GPIO expanders
 #
-# CONFIG_GPIO_AMD8111 is not set
+CONFIG_GPIO_AMD8111=m
 # CONFIG_GPIO_ML_IOH is not set
 # CONFIG_GPIO_PCI_IDIO_16 is not set
 # CONFIG_GPIO_PCIE_IDIO_24 is not set
@@ -3484,6 +3501,7 @@ CONFIG_THERMAL_NETLINK=y
 CONFIG_THERMAL_STATISTICS=y
 CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0
 CONFIG_THERMAL_HWMON=y
+CONFIG_THERMAL_ACPI=y
 CONFIG_THERMAL_WRITABLE_TRIPS=y
 CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
 # CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set
@@ -3510,9 +3528,10 @@ CONFIG_INTEL_SOC_DTS_THERMAL=m
 # CONFIG_INT340X_THERMAL is not set
 # end of ACPI INT340X thermal drivers
 
-# CONFIG_INTEL_PCH_THERMAL is not set
-# CONFIG_INTEL_TCC_COOLING is not set
-# CONFIG_INTEL_MENLOW is not set
+CONFIG_INTEL_BXT_PMIC_THERMAL=m
+CONFIG_INTEL_PCH_THERMAL=m
+CONFIG_INTEL_TCC_COOLING=m
+CONFIG_INTEL_MENLOW=m
 CONFIG_INTEL_HFI_THERMAL=y
 # end of Intel thermal drivers
 
@@ -3560,16 +3579,18 @@ CONFIG_MFD_CORE=y
 # CONFIG_MFD_MC13XXX_SPI is not set
 # CONFIG_MFD_MC13XXX_I2C is not set
 # CONFIG_MFD_MP2629 is not set
-# CONFIG_MFD_INTEL_QUARK_I2C_GPIO is not set
+CONFIG_MFD_INTEL_QUARK_I2C_GPIO=m
 # CONFIG_LPC_ICH is not set
 CONFIG_LPC_SCH=y
-# CONFIG_INTEL_SOC_PMIC is not set
-# CONFIG_INTEL_SOC_PMIC_CHTWC is not set
-# CONFIG_INTEL_SOC_PMIC_CHTDC_TI is not set
-CONFIG_MFD_INTEL_LPSS=y
-CONFIG_MFD_INTEL_LPSS_ACPI=y
-CONFIG_MFD_INTEL_LPSS_PCI=y
-# CONFIG_MFD_INTEL_PMC_BXT is not set
+CONFIG_INTEL_SOC_PMIC=y
+CONFIG_INTEL_SOC_PMIC_BXTWC=m
+CONFIG_INTEL_SOC_PMIC_CHTWC=y
+CONFIG_INTEL_SOC_PMIC_CHTDC_TI=m
+CONFIG_INTEL_SOC_PMIC_MRFLD=m
+CONFIG_MFD_INTEL_LPSS=m
+CONFIG_MFD_INTEL_LPSS_ACPI=m
+CONFIG_MFD_INTEL_LPSS_PCI=m
+CONFIG_MFD_INTEL_PMC_BXT=m
 # CONFIG_MFD_IQS62X is not set
 # CONFIG_MFD_JANZ_CMODIO is not set
 # CONFIG_MFD_KEMPLD is not set
@@ -3688,6 +3709,7 @@ CONFIG_REGULATOR=y
 # CONFIG_REGULATOR_TPS6507X is not set
 # CONFIG_REGULATOR_TPS65132 is not set
 # CONFIG_REGULATOR_TPS6524X is not set
+# CONFIG_REGULATOR_TPS68470 is not set
 # CONFIG_REGULATOR_QCOM_LABIBB is not set
 CONFIG_RC_CORE=y
 CONFIG_BPF_LIRC_MODE2=y
@@ -4548,7 +4570,7 @@ CONFIG_APERTURE_HELPERS=y
 CONFIG_VIDEO_NOMODESET=y
 CONFIG_AGP=y
 # CONFIG_AGP_AMD64 is not set
-CONFIG_AGP_INTEL=y
+CONFIG_AGP_INTEL=m
 # CONFIG_AGP_SIS is not set
 CONFIG_AGP_VIA=y
 CONFIG_INTEL_GTT=y
@@ -4619,6 +4641,7 @@ CONFIG_DRM_I915_FORCE_PROBE=""
 CONFIG_DRM_I915_CAPTURE_ERROR=y
 CONFIG_DRM_I915_COMPRESS_ERROR=y
 CONFIG_DRM_I915_USERPTR=y
+# CONFIG_DRM_I915_PXP is not set
 
 #
 # drm/i915 Debugging
@@ -4896,7 +4919,7 @@ CONFIG_SND_EMU10K1X=m
 CONFIG_SND_ICE1712=m
 CONFIG_SND_ICE1724=m
 CONFIG_SND_INTEL8X0=m
-# CONFIG_SND_INTEL8X0M is not set
+CONFIG_SND_INTEL8X0M=m
 # CONFIG_SND_KORG1212 is not set
 # CONFIG_SND_LOLA is not set
 # CONFIG_SND_LX6464ES is not set
@@ -4946,7 +4969,7 @@ CONFIG_SND_HDA_CODEC_CMEDIA=m
 CONFIG_SND_HDA_CODEC_SI3054=m
 CONFIG_SND_HDA_GENERIC=m
 CONFIG_SND_HDA_POWER_SAVE_DEFAULT=1
-# CONFIG_SND_HDA_INTEL_HDMI_SILENT_STREAM is not set
+CONFIG_SND_HDA_INTEL_HDMI_SILENT_STREAM=y
 # CONFIG_SND_HDA_CTL_DEV_ID is not set
 # end of HD-Audio
 
@@ -4994,23 +5017,28 @@ CONFIG_SND_SOC_ACPI=m
 CONFIG_SND_SOC_AMD_ACP=m
 CONFIG_SND_SOC_AMD_CZ_DA7219MX98357_MACH=m
 CONFIG_SND_SOC_AMD_CZ_RT5645_MACH=m
-# CONFIG_SND_SOC_AMD_ST_ES8336_MACH is not set
+CONFIG_SND_SOC_AMD_ST_ES8336_MACH=m
 CONFIG_SND_SOC_AMD_ACP3x=m
 CONFIG_SND_SOC_AMD_RENOIR=m
 CONFIG_SND_SOC_AMD_RENOIR_MACH=m
 CONFIG_SND_SOC_AMD_ACP5x=m
 CONFIG_SND_SOC_AMD_VANGOGH_MACH=m
 CONFIG_SND_SOC_AMD_ACP6x=m
-# CONFIG_SND_SOC_AMD_YC_MACH is not set
+CONFIG_SND_SOC_AMD_YC_MACH=m
 CONFIG_SND_AMD_ACP_CONFIG=m
 CONFIG_SND_SOC_AMD_ACP_COMMON=m
-# CONFIG_SND_SOC_AMD_ACP_PCI is not set
-# CONFIG_SND_AMD_ASOC_RENOIR is not set
-# CONFIG_SND_AMD_ASOC_REMBRANDT is not set
-# CONFIG_SND_SOC_AMD_LEGACY_MACH is not set
-# CONFIG_SND_SOC_AMD_SOF_MACH is not set
-# CONFIG_SND_SOC_AMD_RPL_ACP6x is not set
-# CONFIG_SND_SOC_AMD_PS is not set
+CONFIG_SND_SOC_AMD_ACP_PDM=m
+CONFIG_SND_SOC_AMD_ACP_I2S=m
+CONFIG_SND_SOC_AMD_ACP_PCM=m
+CONFIG_SND_SOC_AMD_ACP_PCI=m
+CONFIG_SND_AMD_ASOC_RENOIR=m
+CONFIG_SND_AMD_ASOC_REMBRANDT=m
+CONFIG_SND_SOC_AMD_MACH_COMMON=m
+CONFIG_SND_SOC_AMD_LEGACY_MACH=m
+CONFIG_SND_SOC_AMD_SOF_MACH=m
+CONFIG_SND_SOC_AMD_RPL_ACP6x=m
+CONFIG_SND_SOC_AMD_PS=m
+CONFIG_SND_SOC_AMD_PS_MACH=m
 # CONFIG_SND_ATMEL_SOC is not set
 # CONFIG_SND_BCM63XX_I2S_WHISTLER is not set
 # CONFIG_SND_DESIGNWARE_I2S is not set
@@ -5037,7 +5065,7 @@ CONFIG_SND_SOC_AMD_ACP_COMMON=m
 # CONFIG_SND_SOC_IMG is not set
 CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y
 CONFIG_SND_SOC_INTEL_SST=m
-# CONFIG_SND_SOC_INTEL_CATPT is not set
+CONFIG_SND_SOC_INTEL_CATPT=m
 CONFIG_SND_SST_ATOM_HIFI2_PLATFORM=m
 CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_PCI=m
 CONFIG_SND_SST_ATOM_HIFI2_PLATFORM_ACPI=m
@@ -5051,6 +5079,7 @@ CONFIG_SND_SOC_INTEL_CFL=m
 CONFIG_SND_SOC_INTEL_CML_H=m
 CONFIG_SND_SOC_INTEL_CML_LP=m
 CONFIG_SND_SOC_INTEL_SKYLAKE_FAMILY=m
+CONFIG_SND_SOC_INTEL_SKYLAKE_SSP_CLK=m
 CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC=y
 CONFIG_SND_SOC_INTEL_SKYLAKE_COMMON=m
 CONFIG_SND_SOC_ACPI_INTEL_MATCH=m
@@ -5063,25 +5092,29 @@ CONFIG_SND_SOC_INTEL_AVS=m
 #
 # Available DSP configurations
 #
-# CONFIG_SND_SOC_INTEL_AVS_MACH_DA7219 is not set
-# CONFIG_SND_SOC_INTEL_AVS_MACH_DMIC is not set
-# CONFIG_SND_SOC_INTEL_AVS_MACH_HDAUDIO is not set
-# CONFIG_SND_SOC_INTEL_AVS_MACH_I2S_TEST is not set
+CONFIG_SND_SOC_INTEL_AVS_MACH_DA7219=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_DMIC=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_HDAUDIO=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_I2S_TEST=m
 # CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98927 is not set
-# CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98357A is not set
-# CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98373 is not set
-# CONFIG_SND_SOC_INTEL_AVS_MACH_NAU8825 is not set
+CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98357A=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98373=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_NAU8825=m
 # CONFIG_SND_SOC_INTEL_AVS_MACH_PROBE is not set
-# CONFIG_SND_SOC_INTEL_AVS_MACH_RT274 is not set
-# CONFIG_SND_SOC_INTEL_AVS_MACH_RT286 is not set
-# CONFIG_SND_SOC_INTEL_AVS_MACH_RT298 is not set
-# CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682 is not set
-# CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567 is not set
+CONFIG_SND_SOC_INTEL_AVS_MACH_RT274=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_RT286=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_RT298=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_RT5682=m
+CONFIG_SND_SOC_INTEL_AVS_MACH_SSM4567=m
 # end of Intel AVS Machine drivers
 
 CONFIG_SND_SOC_INTEL_MACH=y
-# CONFIG_SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES is not set
+CONFIG_SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES=y
 CONFIG_SND_SOC_INTEL_HDA_DSP_COMMON=m
+CONFIG_SND_SOC_INTEL_HASWELL_MACH=m
+CONFIG_SND_SOC_INTEL_BDW_RT5650_MACH=m
+CONFIG_SND_SOC_INTEL_BDW_RT5677_MACH=m
+CONFIG_SND_SOC_INTEL_BROADWELL_MACH=m
 CONFIG_SND_SOC_INTEL_BYTCR_RT5640_MACH=m
 CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH=m
 CONFIG_SND_SOC_INTEL_CHT_BSW_RT5672_MACH=m
@@ -5091,17 +5124,19 @@ CONFIG_SND_SOC_INTEL_CHT_BSW_NAU8824_MACH=m
 CONFIG_SND_SOC_INTEL_BYT_CHT_CX2072X_MACH=m
 CONFIG_SND_SOC_INTEL_BYT_CHT_DA7213_MACH=m
 CONFIG_SND_SOC_INTEL_BYT_CHT_ES8316_MACH=m
-CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH=m
-# CONFIG_SND_SOC_INTEL_SKL_RT286_MACH is not set
-# CONFIG_SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH is not set
-# CONFIG_SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH is not set
-# CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH is not set
-# CONFIG_SND_SOC_INTEL_BXT_RT298_MACH is not set
-# CONFIG_SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH is not set
-# CONFIG_SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH is not set
-# CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98357A_MACH is not set
-# CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98927_MACH is not set
-# CONFIG_SND_SOC_INTEL_KBL_RT5660_MACH is not set
+# CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH is not set
+CONFIG_SND_SOC_INTEL_SKL_RT286_MACH=m
+CONFIG_SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH=m
+CONFIG_SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH=m
+CONFIG_SND_SOC_INTEL_DA7219_MAX98357A_GENERIC=m
+CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_COMMON=m
+CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH=m
+CONFIG_SND_SOC_INTEL_BXT_RT298_MACH=m
+CONFIG_SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH=m
+CONFIG_SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH=m
+CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98357A_MACH=m
+CONFIG_SND_SOC_INTEL_KBL_DA7219_MAX98927_MACH=m
+CONFIG_SND_SOC_INTEL_KBL_RT5660_MACH=m
 CONFIG_SND_SOC_INTEL_SKL_HDA_DSP_GENERIC_MACH=m
 # CONFIG_SND_SOC_MTK_BTCVSD is not set
 # CONFIG_SND_SOC_SOF_TOPLEVEL is not set
@@ -5200,8 +5235,9 @@ CONFIG_SND_SOC_MAX98357A=m
 # CONFIG_SND_SOC_MAX9867 is not set
 CONFIG_SND_SOC_MAX98927=m
 # CONFIG_SND_SOC_MAX98520 is not set
-# CONFIG_SND_SOC_MAX98373_I2C is not set
-# CONFIG_SND_SOC_MAX98390 is not set
+CONFIG_SND_SOC_MAX98373=m
+CONFIG_SND_SOC_MAX98373_I2C=m
+CONFIG_SND_SOC_MAX98390=m
 # CONFIG_SND_SOC_MAX98396 is not set
 # CONFIG_SND_SOC_MAX9860 is not set
 # CONFIG_SND_SOC_MSM8916_WCD_ANALOG is not set
@@ -5222,15 +5258,27 @@ CONFIG_SND_SOC_MAX98927=m
 # CONFIG_SND_SOC_PEB2466 is not set
 # CONFIG_SND_SOC_RK3328 is not set
 CONFIG_SND_SOC_RL6231=m
+CONFIG_SND_SOC_RL6347A=m
+CONFIG_SND_SOC_RT274=m
+CONFIG_SND_SOC_RT286=m
+CONFIG_SND_SOC_RT298=m
+CONFIG_SND_SOC_RT1019=m
+CONFIG_SND_SOC_RT5514=m
+CONFIG_SND_SOC_RT5514_SPI=m
 # CONFIG_SND_SOC_RT5616 is not set
 # CONFIG_SND_SOC_RT5631 is not set
 CONFIG_SND_SOC_RT5640=m
 CONFIG_SND_SOC_RT5645=m
 CONFIG_SND_SOC_RT5651=m
 # CONFIG_SND_SOC_RT5659 is not set
+CONFIG_SND_SOC_RT5660=m
+CONFIG_SND_SOC_RT5663=m
 CONFIG_SND_SOC_RT5670=m
+CONFIG_SND_SOC_RT5677=m
+CONFIG_SND_SOC_RT5677_SPI=m
 CONFIG_SND_SOC_RT5682=m
 CONFIG_SND_SOC_RT5682_I2C=m
+CONFIG_SND_SOC_RT5682S=m
 # CONFIG_SND_SOC_RT9120 is not set
 # CONFIG_SND_SOC_SGTL5000 is not set
 # CONFIG_SND_SOC_SIMPLE_AMPLIFIER is not set
@@ -5242,7 +5290,7 @@ CONFIG_SND_SOC_SPDIF=m
 # CONFIG_SND_SOC_SSM2518 is not set
 # CONFIG_SND_SOC_SSM2602_SPI is not set
 # CONFIG_SND_SOC_SSM2602_I2C is not set
-# CONFIG_SND_SOC_SSM4567 is not set
+CONFIG_SND_SOC_SSM4567=m
 # CONFIG_SND_SOC_STA32X is not set
 # CONFIG_SND_SOC_STA350 is not set
 # CONFIG_SND_SOC_STI_SAS is not set
@@ -5309,6 +5357,7 @@ CONFIG_SND_SOC_TS3A227E=m
 CONFIG_SND_SOC_NAU8821=m
 # CONFIG_SND_SOC_NAU8822 is not set
 CONFIG_SND_SOC_NAU8824=m
+CONFIG_SND_SOC_NAU8825=m
 # CONFIG_SND_SOC_TPA6130A2 is not set
 # CONFIG_SND_SOC_LPASS_WSA_MACRO is not set
 # CONFIG_SND_SOC_LPASS_VA_MACRO is not set
@@ -5478,13 +5527,14 @@ CONFIG_I2C_HID_CORE=m
 #
 # Intel ISH HID support
 #
-# CONFIG_INTEL_ISH_HID is not set
+CONFIG_INTEL_ISH_HID=m
+CONFIG_INTEL_ISH_FIRMWARE_DOWNLOADER=m
 # end of Intel ISH HID support
 
 #
 # AMD SFH HID Support
 #
-# CONFIG_AMD_SFH_HID is not set
+CONFIG_AMD_SFH_HID=m
 # end of AMD SFH HID Support
 
 CONFIG_USB_OHCI_LITTLE_ENDIAN=y
@@ -5689,7 +5739,9 @@ CONFIG_TYPEC=m
 # CONFIG_TYPEC_TCPM is not set
 # CONFIG_TYPEC_UCSI is not set
 # CONFIG_TYPEC_TPS6598X is not set
+# CONFIG_TYPEC_ANX7411 is not set
 # CONFIG_TYPEC_RT1719 is not set
+# CONFIG_TYPEC_HD3SS3220 is not set
 # CONFIG_TYPEC_STUSB160X is not set
 # CONFIG_TYPEC_WUSB3801 is not set
 
@@ -5699,6 +5751,7 @@ CONFIG_TYPEC=m
 # CONFIG_TYPEC_MUX_FSA4480 is not set
 # CONFIG_TYPEC_MUX_GPIO_SBU is not set
 # CONFIG_TYPEC_MUX_PI3USB30532 is not set
+CONFIG_TYPEC_MUX_INTEL_PMC=m
 # end of USB Type-C Multiplexer/DeMultiplexer Switch support
 
 #
@@ -5707,7 +5760,8 @@ CONFIG_TYPEC=m
 # CONFIG_TYPEC_DP_ALTMODE is not set
 # end of USB Type-C Alternate Mode drivers
 
-# CONFIG_USB_ROLE_SWITCH is not set
+CONFIG_USB_ROLE_SWITCH=m
+CONFIG_USB_ROLES_INTEL_XHCI=m
 CONFIG_MMC=y
 CONFIG_MMC_BLOCK=y
 CONFIG_MMC_BLOCK_MINORS=32
@@ -5765,7 +5819,7 @@ CONFIG_LEDS_CLASS_FLASH=y
 # CONFIG_LEDS_DAC124S085 is not set
 # CONFIG_LEDS_REGULATOR is not set
 # CONFIG_LEDS_BD2802 is not set
-# CONFIG_LEDS_INTEL_SS4200 is not set
+CONFIG_LEDS_INTEL_SS4200=m
 # CONFIG_LEDS_LT3593 is not set
 # CONFIG_LEDS_TCA6507 is not set
 # CONFIG_LEDS_TLC591XX is not set
@@ -5943,12 +5997,14 @@ CONFIG_DMA_VIRTUAL_CHANNELS=y
 CONFIG_DMA_ACPI=y
 # CONFIG_ALTERA_MSGDMA is not set
 CONFIG_INTEL_IDMA64=m
-# CONFIG_INTEL_IDXD is not set
+CONFIG_INTEL_IDXD_BUS=m
+CONFIG_INTEL_IDXD=m
 # CONFIG_INTEL_IDXD_COMPAT is not set
+CONFIG_INTEL_IDXD_PERFMON=y
 CONFIG_INTEL_IOATDMA=m
 # CONFIG_PLX_DMA is not set
 # CONFIG_XILINX_XDMA is not set
-# CONFIG_AMD_PTDMA is not set
+CONFIG_AMD_PTDMA=m
 # CONFIG_QCOM_HIDMA_MGMT is not set
 # CONFIG_QCOM_HIDMA is not set
 CONFIG_DW_DMAC_CORE=m
@@ -5957,7 +6013,7 @@ CONFIG_DW_DMAC_PCI=m
 # CONFIG_DW_EDMA is not set
 CONFIG_HSU_DMA=y
 # CONFIG_SF_PDMA is not set
-# CONFIG_INTEL_LDMA is not set
+CONFIG_INTEL_LDMA=y
 
 #
 # DMA Clients
@@ -6029,7 +6085,7 @@ CONFIG_RTS5208=y
 CONFIG_VT6656=m
 # CONFIG_FB_SM750 is not set
 CONFIG_STAGING_MEDIA=y
-# CONFIG_INTEL_ATOMISP is not set
+CONFIG_INTEL_ATOMISP=y
 # CONFIG_DVB_AV7110 is not set
 CONFIG_VIDEO_IPU3_IMGU=m
 # CONFIG_STAGING_MEDIA_DEPRECATED is not set
@@ -6062,9 +6118,9 @@ CONFIG_WMI_BMOF=y
 # CONFIG_ACERHDF is not set
 # CONFIG_ACER_WIRELESS is not set
 # CONFIG_ACER_WMI is not set
-CONFIG_AMD_PMF=y
-CONFIG_AMD_PMC=y
-CONFIG_AMD_HSMP=y
+CONFIG_AMD_PMF=m
+CONFIG_AMD_PMC=m
+CONFIG_AMD_HSMP=m
 # CONFIG_ADV_SWBUTTON is not set
 # CONFIG_APPLE_GMUX is not set
 # CONFIG_ASUS_LAPTOP is not set
@@ -6082,37 +6138,46 @@ CONFIG_AMD_HSMP=y
 # CONFIG_SENSORS_HDAPS is not set
 # CONFIG_THINKPAD_ACPI is not set
 # CONFIG_THINKPAD_LMI is not set
-CONFIG_INTEL_ATOMISP2_PDX86=y
-CONFIG_INTEL_ATOMISP2_PM=y
 # CONFIG_INTEL_IFS is not set
-# CONFIG_INTEL_SAR_INT1092 is not set
-# CONFIG_INTEL_SKL_INT3472 is not set
-# CONFIG_INTEL_PMC_CORE is not set
+CONFIG_INTEL_SAR_INT1092=m
+CONFIG_INTEL_SKL_INT3472=m
+CONFIG_INTEL_PMC_CORE=y
+CONFIG_INTEL_PMT_CLASS=m
+CONFIG_INTEL_PMT_TELEMETRY=m
+CONFIG_INTEL_PMT_CRASHLOG=m
 
 #
 # Intel Speed Select Technology interface support
 #
-# CONFIG_INTEL_SPEED_SELECT_INTERFACE is not set
+CONFIG_INTEL_SPEED_SELECT_INTERFACE=m
 # end of Intel Speed Select Technology interface support
 
-# CONFIG_INTEL_WMI_SBL_FW_UPDATE is not set
-# CONFIG_INTEL_WMI_THUNDERBOLT is not set
+CONFIG_INTEL_TELEMETRY=m
+CONFIG_INTEL_WMI=y
+CONFIG_INTEL_WMI_SBL_FW_UPDATE=m
+CONFIG_INTEL_WMI_THUNDERBOLT=m
 
 #
 # Intel Uncore Frequency Control
 #
-# CONFIG_INTEL_UNCORE_FREQ_CONTROL is not set
+CONFIG_INTEL_UNCORE_FREQ_CONTROL=m
 # end of Intel Uncore Frequency Control
 
-# CONFIG_INTEL_HID_EVENT is not set
-# CONFIG_INTEL_VBTN is not set
-# CONFIG_INTEL_INT0002_VGPIO is not set
-# CONFIG_INTEL_OAKTRAIL is not set
-# CONFIG_INTEL_PUNIT_IPC is not set
-# CONFIG_INTEL_RST is not set
-# CONFIG_INTEL_SMARTCONNECT is not set
-# CONFIG_INTEL_TURBO_MAX_3 is not set
-# CONFIG_INTEL_VSEC is not set
+CONFIG_INTEL_HID_EVENT=m
+CONFIG_INTEL_VBTN=m
+CONFIG_INTEL_INT0002_VGPIO=m
+CONFIG_INTEL_OAKTRAIL=m
+CONFIG_INTEL_BXTWC_PMIC_TMU=m
+CONFIG_INTEL_CHTDC_TI_PWRBTN=m
+CONFIG_INTEL_ISHTP_ECLITE=m
+CONFIG_INTEL_MRFLD_PWRBTN=m
+CONFIG_INTEL_PUNIT_IPC=m
+CONFIG_INTEL_RST=m
+CONFIG_INTEL_SDSI=m
+CONFIG_INTEL_SMARTCONNECT=m
+# CONFIG_INTEL_TPMI is not set
+CONFIG_INTEL_TURBO_MAX_3=y
+CONFIG_INTEL_VSEC=m
 # CONFIG_MSI_LAPTOP is not set
 # CONFIG_MSI_WMI is not set
 # CONFIG_PCENGINES_APU2 is not set
@@ -6132,9 +6197,12 @@ CONFIG_INTEL_ATOMISP2_PM=y
 # CONFIG_SERIAL_MULTI_INSTANTIATE is not set
 # CONFIG_MLX_PLATFORM is not set
 # CONFIG_X86_ANDROID_TABLETS is not set
-# CONFIG_INTEL_IPS is not set
-# CONFIG_INTEL_SCU_PCI is not set
-# CONFIG_INTEL_SCU_PLATFORM is not set
+CONFIG_INTEL_IPS=m
+CONFIG_INTEL_SCU_IPC=y
+CONFIG_INTEL_SCU=y
+CONFIG_INTEL_SCU_PCI=y
+CONFIG_INTEL_SCU_PLATFORM=m
+CONFIG_INTEL_SCU_IPC_UTIL=m
 # CONFIG_SIEMENS_SIMATIC_IPC is not set
 # CONFIG_WINMATE_FM07_KEYS is not set
 # CONFIG_STEAMDECK is not set
@@ -6148,6 +6216,7 @@ CONFIG_COMMON_CLK=y
 # CONFIG_COMMON_CLK_SI5351 is not set
 # CONFIG_COMMON_CLK_SI544 is not set
 # CONFIG_COMMON_CLK_CDCE706 is not set
+# CONFIG_COMMON_CLK_TPS68470 is not set
 # CONFIG_COMMON_CLK_CS2000_CP is not set
 # CONFIG_XILINX_VCU is not set
 # CONFIG_HWSPINLOCK is not set
@@ -6186,7 +6255,7 @@ CONFIG_INTEL_IOMMU=y
 CONFIG_INTEL_IOMMU_SVM=y
 # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
 CONFIG_INTEL_IOMMU_FLOPPY_WA=y
-# CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON is not set
+CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON=y
 CONFIG_INTEL_IOMMU_PERF_EVENTS=y
 # CONFIG_IOMMUFD is not set
 CONFIG_IRQ_REMAP=y
@@ -6277,7 +6346,7 @@ CONFIG_RESET_CONTROLLER=y
 #
 # PHY Subsystem
 #
-# CONFIG_GENERIC_PHY is not set
+CONFIG_GENERIC_PHY=y
 # CONFIG_USB_LGM_PHY is not set
 # CONFIG_PHY_CAN_TRANSCEIVER is not set
 
@@ -6289,7 +6358,7 @@ CONFIG_RESET_CONTROLLER=y
 
 # CONFIG_PHY_PXA_28NM_HSIC is not set
 # CONFIG_PHY_PXA_28NM_USB2 is not set
-# CONFIG_PHY_INTEL_LGM_EMMC is not set
+CONFIG_PHY_INTEL_LGM_EMMC=m
 # end of PHY Subsystem
 
 CONFIG_POWERCAP=y
@@ -6323,7 +6392,13 @@ CONFIG_NVMEM_SYSFS=y
 # HW tracing support
 #
 # CONFIG_STM is not set
-# CONFIG_INTEL_TH is not set
+CONFIG_INTEL_TH=m
+CONFIG_INTEL_TH_PCI=m
+CONFIG_INTEL_TH_ACPI=m
+CONFIG_INTEL_TH_GTH=m
+CONFIG_INTEL_TH_MSU=m
+CONFIG_INTEL_TH_PTI=m
+# CONFIG_INTEL_TH_DEBUG is not set
 # end of HW tracing support
 
 # CONFIG_FPGA is not set
@@ -6523,28 +6598,28 @@ CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NLS=y
 CONFIG_NLS_DEFAULT="utf8"
 CONFIG_NLS_CODEPAGE_437=y
-# CONFIG_NLS_CODEPAGE_737 is not set
-# CONFIG_NLS_CODEPAGE_775 is not set
-# CONFIG_NLS_CODEPAGE_850 is not set
-# CONFIG_NLS_CODEPAGE_852 is not set
-# CONFIG_NLS_CODEPAGE_855 is not set
-# CONFIG_NLS_CODEPAGE_857 is not set
-# CONFIG_NLS_CODEPAGE_860 is not set
-# CONFIG_NLS_CODEPAGE_861 is not set
-# CONFIG_NLS_CODEPAGE_862 is not set
-# CONFIG_NLS_CODEPAGE_863 is not set
-# CONFIG_NLS_CODEPAGE_864 is not set
-# CONFIG_NLS_CODEPAGE_865 is not set
-# CONFIG_NLS_CODEPAGE_866 is not set
-# CONFIG_NLS_CODEPAGE_869 is not set
-# CONFIG_NLS_CODEPAGE_936 is not set
-# CONFIG_NLS_CODEPAGE_950 is not set
-# CONFIG_NLS_CODEPAGE_932 is not set
-# CONFIG_NLS_CODEPAGE_949 is not set
-# CONFIG_NLS_CODEPAGE_874 is not set
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
 # CONFIG_NLS_ISO8859_8 is not set
-# CONFIG_NLS_CODEPAGE_1250 is not set
-# CONFIG_NLS_CODEPAGE_1251 is not set
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
 CONFIG_NLS_ASCII=y
 CONFIG_NLS_ISO8859_1=y
 # CONFIG_NLS_ISO8859_2 is not set
@@ -6658,7 +6733,7 @@ CONFIG_CRYPTO_NULL2=y
 CONFIG_CRYPTO_CRYPTD=y
 # CONFIG_CRYPTO_AUTHENC is not set
 # CONFIG_CRYPTO_TEST is not set
-CONFIG_CRYPTO_SIMD=y
+CONFIG_CRYPTO_SIMD=m
 # end of Crypto core or helper
 
 #
@@ -6800,7 +6875,7 @@ CONFIG_CRYPTO_HASH_INFO=y
 # Accelerated Cryptographic Algorithms for CPU (x86)
 #
 CONFIG_CRYPTO_CURVE25519_X86=m
-CONFIG_CRYPTO_AES_NI_INTEL=y
+CONFIG_CRYPTO_AES_NI_INTEL=m
 # CONFIG_CRYPTO_BLOWFISH_X86_64 is not set
 # CONFIG_CRYPTO_CAMELLIA_X86_64 is not set
 # CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64 is not set
@@ -6830,8 +6905,8 @@ CONFIG_CRYPTO_SHA1_SSSE3=y
 CONFIG_CRYPTO_SHA256_SSSE3=y
 CONFIG_CRYPTO_SHA512_SSSE3=y
 # CONFIG_CRYPTO_SM3_AVX_X86_64 is not set
-# CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL is not set
-# CONFIG_CRYPTO_CRC32C_INTEL is not set
+CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=m
+CONFIG_CRYPTO_CRC32C_INTEL=m
 # CONFIG_CRYPTO_CRC32_PCLMUL is not set
 # end of Accelerated Cryptographic Algorithms for CPU (x86)
 
@@ -6949,7 +7024,9 @@ CONFIG_DMA_OPS=y
 CONFIG_NEED_SG_DMA_LENGTH=y
 CONFIG_NEED_DMA_MAP_STATE=y
 CONFIG_ARCH_DMA_ADDR_T_64BIT=y
+CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y
 CONFIG_SWIOTLB=y
+CONFIG_DMA_COHERENT_POOL=y
 CONFIG_DMA_CMA=y
 # CONFIG_DMA_PERNUMA_CMA is not set