diff --git a/packages/emulators/libretro/dosbox-pure-lr/patches/01-revert-svga-memory-core-option.patch b/packages/emulators/libretro/dosbox-pure-lr/patches/01-revert-svga-memory-core-option.patch deleted file mode 100644 index da3213f2b..000000000 --- a/packages/emulators/libretro/dosbox-pure-lr/patches/01-revert-svga-memory-core-option.patch +++ /dev/null @@ -1,62 +0,0 @@ -diff --git a/core_options.h b/core_options.h -index a00b36a..7e6b4db 100644 ---- a/core_options.h -+++ b/core_options.h -@@ -409,21 +409,6 @@ static retro_core_option_v2_definition option_defs[] = - }, - "svga_s3" - }, -- { -- "dosbox_pure_svgamem", -- "SVGA Memory (restart required)", NULL, -- "The amount of memory available to the emulated SVGA card.", NULL, -- "Video", -- { -- { "0", "512KB" }, -- { "1", "1MB" }, -- { "2", "2MB (default)" }, -- { "3", "3MB" }, -- { "4", "4MB" }, -- { "8", "8MB (not always recognized)" }, -- }, -- "2" -- }, - { - "dosbox_pure_voodoo", - "3dfx Voodoo Emulation", NULL, -diff --git a/dosbox_pure_libretro.cpp b/dosbox_pure_libretro.cpp -index c51753a..9c4f405 100644 ---- a/dosbox_pure_libretro.cpp -+++ b/dosbox_pure_libretro.cpp -@@ -3556,7 +3556,6 @@ static bool check_variables(bool is_startup = false) - case 'p': dbmachine = "pcjr"; break; - } - Variables::DosBoxSet("dosbox", "machine", dbmachine, false, true); -- Variables::DosBoxSet("dosbox", "vmemsize", retro_get_variable("dosbox_pure_svgamem", "2"), false, true); - if (dbp_reboot_machine) - control->GetSection("dosbox")->GetProp("machine")->OnChangedByConfigProgram(), dbp_reboot_machine = 0; - -@@ -3640,7 +3639,6 @@ static bool check_variables(bool is_startup = false) - } - - Variables::RetroVisibility("dosbox_pure_svga", machine_is_svga); -- Variables::RetroVisibility("dosbox_pure_svgamem", machine_is_svga); - Variables::RetroVisibility("dosbox_pure_voodoo", machine_is_svga); - Variables::RetroVisibility("dosbox_pure_voodoo_perf", machine_is_svga); - if (machine_is_svga) -diff --git a/src/hardware/vga_s3.cpp b/src/hardware/vga_s3.cpp -index 06eafe7..ac82258 100644 ---- a/src/hardware/vga_s3.cpp -+++ b/src/hardware/vga_s3.cpp -@@ -546,11 +546,6 @@ void SVGA_Setup_S3Trio(void) { - } else if (vga.vmemsize < 4096*1024) { - vga.vmemsize = 3072*1024; - vga.s3.reg_36 = 0x5a; // 3mb fast page mode --#ifdef C_DBP_LIBRETRO -- } else if (vga.vmemsize == 8192*1024) { // hack more mem than supported by real Trio64 card -- vga.vmemsize = 8192*1024; -- vga.s3.reg_36 = 0x7a; // 8mb fast page mode --#endif - } else { // Trio64 supported only up to 4M - vga.vmemsize = 4096*1024; - vga.s3.reg_36 = 0x1a; // 4mb fast page mode diff --git a/packages/hardware/quirks/devices/ATARI VCS 800 Onyx/001-deviceconfig b/packages/hardware/quirks/devices/ATARI VCS 800 Onyx/001-deviceconfig index 522b772b7..21e5915a3 100644 --- a/packages/hardware/quirks/devices/ATARI VCS 800 Onyx/001-deviceconfig +++ b/packages/hardware/quirks/devices/ATARI VCS 800 Onyx/001-deviceconfig @@ -7,7 +7,8 @@ DEVICE_VOLUMECTL=true DEVICE_AUDIO_MIXER="Master" DEVICE_PLAYBACK_PATH_SPK="SPK" DEVICE_PLAYBACK_PATH_HP="HP" -DEVICE_VOL_MODIFIER="ABS_RZ" +DEVICE_FUNC_KEYA_MODIFIER="ABS_RZ" +DEVICE_FUNC_KEYB_MODIFIER="ABS_Z" UI_SERVICE="weston.service" DEVICE_BRIGHTNESS="hardware" diff --git a/packages/hardware/quirks/devices/AYANEO AIR Plus/001-deviceconfig b/packages/hardware/quirks/devices/AYANEO AIR Plus/001-deviceconfig index 522b772b7..21e5915a3 100644 --- a/packages/hardware/quirks/devices/AYANEO AIR Plus/001-deviceconfig +++ b/packages/hardware/quirks/devices/AYANEO AIR Plus/001-deviceconfig @@ -7,7 +7,8 @@ DEVICE_VOLUMECTL=true DEVICE_AUDIO_MIXER="Master" DEVICE_PLAYBACK_PATH_SPK="SPK" DEVICE_PLAYBACK_PATH_HP="HP" -DEVICE_VOL_MODIFIER="ABS_RZ" +DEVICE_FUNC_KEYA_MODIFIER="ABS_RZ" +DEVICE_FUNC_KEYB_MODIFIER="ABS_Z" UI_SERVICE="weston.service" DEVICE_BRIGHTNESS="hardware" diff --git a/packages/hardware/quirks/devices/AYANEO AIR/001-deviceconfig b/packages/hardware/quirks/devices/AYANEO AIR/001-deviceconfig index 522b772b7..21e5915a3 100644 --- a/packages/hardware/quirks/devices/AYANEO AIR/001-deviceconfig +++ b/packages/hardware/quirks/devices/AYANEO AIR/001-deviceconfig @@ -7,7 +7,8 @@ DEVICE_VOLUMECTL=true DEVICE_AUDIO_MIXER="Master" DEVICE_PLAYBACK_PATH_SPK="SPK" DEVICE_PLAYBACK_PATH_HP="HP" -DEVICE_VOL_MODIFIER="ABS_RZ" +DEVICE_FUNC_KEYA_MODIFIER="ABS_RZ" +DEVICE_FUNC_KEYB_MODIFIER="ABS_Z" UI_SERVICE="weston.service" DEVICE_BRIGHTNESS="hardware" diff --git a/packages/hardware/quirks/devices/Anbernic RG353P/001-deviceconfig b/packages/hardware/quirks/devices/Anbernic RG353P/001-deviceconfig index 3bf2a1056..db49c550b 100755 --- a/packages/hardware/quirks/devices/Anbernic RG353P/001-deviceconfig +++ b/packages/hardware/quirks/devices/Anbernic RG353P/001-deviceconfig @@ -34,5 +34,6 @@ DMC_FREQ="/sys/devices/platform/dmc/devfreq/dmc" # Volume Keys DEVICE_KEY_VOLUMEDOWN=114 DEVICE_KEY_VOLUMEUP=115 -DEVICE_VOL_MODIFIER="BTN_MODE" +DEVICE_FUNC_KEYA_MODIFIER="BTN_TR2" +DEVICE_FUNC_KEYB_MODIFIER="BTN_TL2" EOF diff --git a/packages/hardware/quirks/devices/Anbernic RG353V/001-deviceconfig b/packages/hardware/quirks/devices/Anbernic RG353V/001-deviceconfig index 21b37bc30..fff8f8545 100755 --- a/packages/hardware/quirks/devices/Anbernic RG353V/001-deviceconfig +++ b/packages/hardware/quirks/devices/Anbernic RG353V/001-deviceconfig @@ -35,5 +35,6 @@ DMC_FREQ="/sys/devices/platform/dmc/devfreq/dmc" # Volume Keys #DEVICE_KEY_VOLUMEDOWN= #DEVICE_KEY_VOLUMEUP= -DEVICE_VOL_MODIFIER="BTN_MODE" +DEVICE_FUNC_KEYA_MODIFIER="BTN_TR2" +DEVICE_FUNC_KEYB_MODIFIER="BTN_TL2" EOF diff --git a/packages/hardware/quirks/devices/Anbernic RG503/001-deviceconfig b/packages/hardware/quirks/devices/Anbernic RG503/001-deviceconfig index 8c3094d5a..4b63f34c7 100755 --- a/packages/hardware/quirks/devices/Anbernic RG503/001-deviceconfig +++ b/packages/hardware/quirks/devices/Anbernic RG503/001-deviceconfig @@ -36,5 +36,6 @@ DMC_FREQ="/sys/devices/platform/dmc/devfreq/dmc" # Volume Keys DEVICE_KEY_VOLUMEDOWN=114 DEVICE_KEY_VOLUMEUP=115 -DEVICE_VOL_MODIFIER="BTN_MODE" +DEVICE_FUNC_KEYA_MODIFIER="BTN_TR2" +DEVICE_FUNC_KEYB_MODIFIER="BTN_TL2" EOF diff --git a/packages/hardware/quirks/devices/GPD G1619-04/001-deviceconfig b/packages/hardware/quirks/devices/GPD G1619-04/001-deviceconfig index 522b772b7..06b034fb7 100644 --- a/packages/hardware/quirks/devices/GPD G1619-04/001-deviceconfig +++ b/packages/hardware/quirks/devices/GPD G1619-04/001-deviceconfig @@ -7,7 +7,8 @@ DEVICE_VOLUMECTL=true DEVICE_AUDIO_MIXER="Master" DEVICE_PLAYBACK_PATH_SPK="SPK" DEVICE_PLAYBACK_PATH_HP="HP" -DEVICE_VOL_MODIFIER="ABS_RZ" +DEVICE_FUNC_KEYA_MODIFIER=="ABS_RZ" +DEVICE_FUNC_KEYB_MODIFIER=="ABS_Z" UI_SERVICE="weston.service" DEVICE_BRIGHTNESS="hardware" diff --git a/packages/hardware/quirks/devices/Hardkernel ODROID-GO-Ultra/001-deviceconfig b/packages/hardware/quirks/devices/Hardkernel ODROID-GO-Ultra/001-deviceconfig index 2b810bbab..db1dce170 100755 --- a/packages/hardware/quirks/devices/Hardkernel ODROID-GO-Ultra/001-deviceconfig +++ b/packages/hardware/quirks/devices/Hardkernel ODROID-GO-Ultra/001-deviceconfig @@ -27,5 +27,6 @@ FAST_CORES="taskset -c 2-5" # Volume Keys DEVICE_KEY_VOLUMEDOWN=114 DEVICE_KEY_VOLUMEUP=115 -DEVICE_VOL_MODIFIER="BTN_MODE" +DEVICE_FUNC_KEYA_MODIFIER="BTN_TR2" +DEVICE_FUNC_KEYB_MODIFIER="BTN_TL2" EOF diff --git a/packages/hardware/quirks/devices/LENOVO 81TC/001-deviceconfig b/packages/hardware/quirks/devices/LENOVO 81TC/001-deviceconfig index 522b772b7..06b034fb7 100644 --- a/packages/hardware/quirks/devices/LENOVO 81TC/001-deviceconfig +++ b/packages/hardware/quirks/devices/LENOVO 81TC/001-deviceconfig @@ -7,7 +7,8 @@ DEVICE_VOLUMECTL=true DEVICE_AUDIO_MIXER="Master" DEVICE_PLAYBACK_PATH_SPK="SPK" DEVICE_PLAYBACK_PATH_HP="HP" -DEVICE_VOL_MODIFIER="ABS_RZ" +DEVICE_FUNC_KEYA_MODIFIER=="ABS_RZ" +DEVICE_FUNC_KEYB_MODIFIER=="ABS_Z" UI_SERVICE="weston.service" DEVICE_BRIGHTNESS="hardware" diff --git a/packages/hardware/quirks/devices/Powkiddy RK2023/001-deviceconfig b/packages/hardware/quirks/devices/Powkiddy RK2023/001-deviceconfig index fdba17456..db49c550b 100755 --- a/packages/hardware/quirks/devices/Powkiddy RK2023/001-deviceconfig +++ b/packages/hardware/quirks/devices/Powkiddy RK2023/001-deviceconfig @@ -34,5 +34,6 @@ DMC_FREQ="/sys/devices/platform/dmc/devfreq/dmc" # Volume Keys DEVICE_KEY_VOLUMEDOWN=114 DEVICE_KEY_VOLUMEUP=115 -DEVICE_VOL_MODIFIER="BTN_TR2" +DEVICE_FUNC_KEYA_MODIFIER="BTN_TR2" +DEVICE_FUNC_KEYB_MODIFIER="BTN_TL2" EOF diff --git a/packages/hardware/quirks/devices/Powkiddy x55/001-deviceconfig b/packages/hardware/quirks/devices/Powkiddy x55/001-deviceconfig index d97de4ef8..e9517f34e 100755 --- a/packages/hardware/quirks/devices/Powkiddy x55/001-deviceconfig +++ b/packages/hardware/quirks/devices/Powkiddy x55/001-deviceconfig @@ -34,5 +34,6 @@ DMC_FREQ="/sys/devices/platform/dmc/devfreq/dmc" # Volume Keys DEVICE_KEY_VOLUMEDOWN=114 DEVICE_KEY_VOLUMEUP=115 -DEVICE_VOL_MODIFIER="BTN_TR2" +DEVICE_FUNC_KEYA_MODIFIER="BTN_TR2" +DEVICE_FUNC_KEYB_MODIFIER="BTN_TL2" EOF diff --git a/packages/hardware/quirks/devices/Valve Jupiter/001-deviceconfig b/packages/hardware/quirks/devices/Valve Jupiter/001-deviceconfig index 522b772b7..06b034fb7 100644 --- a/packages/hardware/quirks/devices/Valve Jupiter/001-deviceconfig +++ b/packages/hardware/quirks/devices/Valve Jupiter/001-deviceconfig @@ -7,7 +7,8 @@ DEVICE_VOLUMECTL=true DEVICE_AUDIO_MIXER="Master" DEVICE_PLAYBACK_PATH_SPK="SPK" DEVICE_PLAYBACK_PATH_HP="HP" -DEVICE_VOL_MODIFIER="ABS_RZ" +DEVICE_FUNC_KEYA_MODIFIER=="ABS_RZ" +DEVICE_FUNC_KEYB_MODIFIER=="ABS_Z" UI_SERVICE="weston.service" DEVICE_BRIGHTNESS="hardware" diff --git a/packages/kernel/linux/patches/AMD64/001-patch-6.3.3-rt15.patch b/packages/kernel/linux/patches/AMD64/001-patch-6.3.3-rt15.patch deleted file mode 100644 index 1228bf9fd..000000000 --- a/packages/kernel/linux/patches/AMD64/001-patch-6.3.3-rt15.patch +++ /dev/null @@ -1,7875 +0,0 @@ -diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig -index e24a9820e12fa..ba2cf1cec3d9c 100644 ---- a/arch/arm/Kconfig -+++ b/arch/arm/Kconfig -@@ -34,6 +34,7 @@ config ARM - select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 - select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE -+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK - select ARCH_USE_BUILTIN_BSWAP - select ARCH_USE_CMPXCHG_LOCKREF - select ARCH_USE_MEMTEST -@@ -72,7 +73,7 @@ config ARM - select HARDIRQS_SW_RESEND - select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT - select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 -- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU -+ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT - select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL - select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU - select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL -@@ -117,6 +118,8 @@ config ARM - select HAVE_PERF_EVENTS - select HAVE_PERF_REGS - select HAVE_PERF_USER_STACK_DUMP -+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM -+ select HAVE_PREEMPT_LAZY - select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE - select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_RSEQ -diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h -index 7f092cb55a417..ffcbf8ebed4bf 100644 ---- a/arch/arm/include/asm/thread_info.h -+++ b/arch/arm/include/asm/thread_info.h -@@ -62,6 +62,7 @@ struct cpu_context_save { - struct thread_info { - unsigned long flags; /* low level flags */ - int preempt_count; /* 0 => preemptable, <0 => bug */ -+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ - __u32 cpu; /* cpu */ - __u32 cpu_domain; /* cpu domain */ - struct cpu_context_save cpu_context; /* cpu context */ -@@ -129,6 +130,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, - #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ - #define TIF_UPROBE 3 /* breakpointed or singlestepping */ - #define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */ -+#define TIF_NEED_RESCHED_LAZY 5 - - #define TIF_USING_IWMMXT 17 - #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ -@@ -148,6 +150,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, - #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) - #define _TIF_SECCOMP (1 << TIF_SECCOMP) - #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) -+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) - #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) - - /* Checks for any syscall work in entry-common.S */ -@@ -157,7 +160,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, - /* - * Change these and you break ASM code in entry-common.S - */ --#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ -+#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ -+ _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NOTIFY_SIGNAL) - -diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c -index 38121c59cbc26..c6fafd53d5bea 100644 ---- a/arch/arm/kernel/asm-offsets.c -+++ b/arch/arm/kernel/asm-offsets.c -@@ -43,6 +43,7 @@ int main(void) - BLANK(); - DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); - DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); -+ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); - DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); - DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain)); - DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context)); -diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S -index c39303e5c2347..cfb4660e9feab 100644 ---- a/arch/arm/kernel/entry-armv.S -+++ b/arch/arm/kernel/entry-armv.S -@@ -222,11 +222,18 @@ ENDPROC(__dabt_svc) - - #ifdef CONFIG_PREEMPTION - ldr r8, [tsk, #TI_PREEMPT] @ get preempt count -- ldr r0, [tsk, #TI_FLAGS] @ get flags - teq r8, #0 @ if preempt count != 0 -+ bne 1f @ return from exeption -+ ldr r0, [tsk, #TI_FLAGS] @ get flags -+ tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set -+ blne svc_preempt @ preempt! -+ -+ ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count -+ teq r8, #0 @ if preempt lazy count != 0 - movne r0, #0 @ force flags to 0 -- tst r0, #_TIF_NEED_RESCHED -+ tst r0, #_TIF_NEED_RESCHED_LAZY - blne svc_preempt -+1: - #endif - - svc_exit r5, irq = 1 @ return from exception -@@ -241,8 +248,14 @@ ENDPROC(__irq_svc) - 1: bl preempt_schedule_irq @ irq en/disable is done inside - ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS - tst r0, #_TIF_NEED_RESCHED -+ bne 1b -+ tst r0, #_TIF_NEED_RESCHED_LAZY - reteq r8 @ go again -- b 1b -+ ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count -+ teq r0, #0 @ if preempt lazy count != 0 -+ beq 1b -+ ret r8 @ go again -+ - #endif - - __und_fault: -diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c -index e07f359254c3c..b50a3248e79f3 100644 ---- a/arch/arm/kernel/signal.c -+++ b/arch/arm/kernel/signal.c -@@ -607,7 +607,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) - */ - trace_hardirqs_off(); - do { -- if (likely(thread_flags & _TIF_NEED_RESCHED)) { -+ if (likely(thread_flags & (_TIF_NEED_RESCHED | -+ _TIF_NEED_RESCHED_LAZY))) { - schedule(); - } else { - if (unlikely(!user_mode(regs))) -diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c -index 2418f1efabd87..79ab2138ab0a8 100644 ---- a/arch/arm/mm/fault.c -+++ b/arch/arm/mm/fault.c -@@ -436,6 +436,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, - if (addr < TASK_SIZE) - return do_page_fault(addr, fsr, regs); - -+ if (interrupts_enabled(regs)) -+ local_irq_enable(); -+ - if (user_mode(regs)) - goto bad_area; - -@@ -506,6 +509,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, - static int - do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) - { -+ if (interrupts_enabled(regs)) -+ local_irq_enable(); -+ - do_bad_area(addr, fsr, regs); - return 0; - } -diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig -index 1023e896d46b8..29fcf54cf68ad 100644 ---- a/arch/arm64/Kconfig -+++ b/arch/arm64/Kconfig -@@ -95,6 +95,7 @@ config ARM64 - select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 - select ARCH_SUPPORTS_NUMA_BALANCING - select ARCH_SUPPORTS_PAGE_TABLE_CHECK -+ select ARCH_SUPPORTS_RT - select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT - select ARCH_WANT_DEFAULT_BPF_JIT - select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT -@@ -207,6 +208,7 @@ config ARM64 - select HAVE_PERF_USER_STACK_DUMP - select HAVE_PREEMPT_DYNAMIC_KEY - select HAVE_REGS_AND_STACK_ACCESS_API -+ select HAVE_PREEMPT_LAZY - select HAVE_POSIX_CPU_TIMERS_TASK_WORK - select HAVE_FUNCTION_ARG_ACCESS_API - select MMU_GATHER_RCU_TABLE_FREE -diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h -index 0159b625cc7f0..a5486918e5eeb 100644 ---- a/arch/arm64/include/asm/preempt.h -+++ b/arch/arm64/include/asm/preempt.h -@@ -71,13 +71,36 @@ static inline bool __preempt_count_dec_and_test(void) - * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE - * pair. - */ -- return !pc || !READ_ONCE(ti->preempt_count); -+ if (!pc || !READ_ONCE(ti->preempt_count)) -+ return true; -+#ifdef CONFIG_PREEMPT_LAZY -+ if ((pc & ~PREEMPT_NEED_RESCHED)) -+ return false; -+ if (current_thread_info()->preempt_lazy_count) -+ return false; -+ return test_thread_flag(TIF_NEED_RESCHED_LAZY); -+#else -+ return false; -+#endif - } - - static inline bool should_resched(int preempt_offset) - { -+#ifdef CONFIG_PREEMPT_LAZY -+ u64 pc = READ_ONCE(current_thread_info()->preempt_count); -+ if (pc == preempt_offset) -+ return true; -+ -+ if ((pc & ~PREEMPT_NEED_RESCHED) != preempt_offset) -+ return false; -+ -+ if (current_thread_info()->preempt_lazy_count) -+ return false; -+ return test_thread_flag(TIF_NEED_RESCHED_LAZY); -+#else - u64 pc = READ_ONCE(current_thread_info()->preempt_count); - return pc == preempt_offset; -+#endif - } - - #ifdef CONFIG_PREEMPTION -diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h -index 848739c15de82..4b7148fd5551f 100644 ---- a/arch/arm64/include/asm/thread_info.h -+++ b/arch/arm64/include/asm/thread_info.h -@@ -26,6 +26,7 @@ struct thread_info { - #ifdef CONFIG_ARM64_SW_TTBR0_PAN - u64 ttbr0; /* saved TTBR0_EL1 */ - #endif -+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ - union { - u64 preempt_count; /* 0 => preemptible, <0 => bug */ - struct { -@@ -68,6 +69,7 @@ int arch_dup_task_struct(struct task_struct *dst, - #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ - #define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */ - #define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */ -+#define TIF_NEED_RESCHED_LAZY 7 - #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ - #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ - #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ -@@ -100,8 +102,10 @@ int arch_dup_task_struct(struct task_struct *dst, - #define _TIF_SVE (1 << TIF_SVE) - #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) - #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) -+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) - --#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ -+#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ -+ _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ - _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ - _TIF_NOTIFY_SIGNAL) -@@ -110,6 +114,8 @@ int arch_dup_task_struct(struct task_struct *dst, - _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ - _TIF_SYSCALL_EMU) - -+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) -+ - #ifdef CONFIG_SHADOW_CALL_STACK - #define INIT_SCS \ - .scs_base = init_shadow_call_stack, \ -diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c -index ae345b06e9f7e..da7e10ff11a81 100644 ---- a/arch/arm64/kernel/asm-offsets.c -+++ b/arch/arm64/kernel/asm-offsets.c -@@ -33,6 +33,7 @@ int main(void) - DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu)); - DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); - DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); -+ DEFINE(TSK_TI_PREEMPT_LAZY, offsetof(struct task_struct, thread_info.preempt_lazy_count)); - #ifdef CONFIG_ARM64_SW_TTBR0_PAN - DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); - #endif -diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c -index 06a02707f4882..e7f5284d5daf1 100644 ---- a/arch/arm64/kernel/signal.c -+++ b/arch/arm64/kernel/signal.c -@@ -1278,7 +1278,7 @@ static void do_signal(struct pt_regs *regs) - void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) - { - do { -- if (thread_flags & _TIF_NEED_RESCHED) { -+ if (thread_flags & _TIF_NEED_RESCHED_MASK) { - /* Unmask Debug and SError for the next task */ - local_daif_restore(DAIF_PROCCTX_NOIRQ); - -diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig -index a6c4407d3ec83..25f98b854d32f 100644 ---- a/arch/powerpc/Kconfig -+++ b/arch/powerpc/Kconfig -@@ -154,6 +154,7 @@ config PPC - select ARCH_STACKWALK - select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x -+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK - select ARCH_USE_BUILTIN_BSWAP - select ARCH_USE_CMPXCHG_LOCKREF if PPC64 - select ARCH_USE_MEMTEST -@@ -247,8 +248,10 @@ config PPC - select HAVE_PERF_EVENTS_NMI if PPC64 - select HAVE_PERF_REGS - select HAVE_PERF_USER_STACK_DUMP -+ select HAVE_PREEMPT_LAZY - select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_RELIABLE_STACKTRACE -+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM - select HAVE_RSEQ - select HAVE_SETUP_PER_CPU_AREA if PPC64 - select HAVE_SOFTIRQ_ON_OWN_STACK -diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h -index 283c346478565..4727f40052ddd 100644 ---- a/arch/powerpc/include/asm/stackprotector.h -+++ b/arch/powerpc/include/asm/stackprotector.h -@@ -19,8 +19,13 @@ - */ - static __always_inline void boot_init_stack_canary(void) - { -- unsigned long canary = get_random_canary(); -+ unsigned long canary; - -+#ifndef CONFIG_PREEMPT_RT -+ canary = get_random_canary(); -+#else -+ canary = ((unsigned long)&canary) & CANARY_MASK; -+#endif - current->stack_canary = canary; - #ifdef CONFIG_PPC64 - get_paca()->canary = canary; -diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h -index af58f1ed3952e..520864de8bb27 100644 ---- a/arch/powerpc/include/asm/thread_info.h -+++ b/arch/powerpc/include/asm/thread_info.h -@@ -53,6 +53,8 @@ - struct thread_info { - int preempt_count; /* 0 => preemptable, - <0 => BUG */ -+ int preempt_lazy_count; /* 0 => preemptable, -+ <0 => BUG */ - #ifdef CONFIG_SMP - unsigned int cpu; - #endif -@@ -77,6 +79,7 @@ struct thread_info { - #define INIT_THREAD_INFO(tsk) \ - { \ - .preempt_count = INIT_PREEMPT_COUNT, \ -+ .preempt_lazy_count = 0, \ - .flags = 0, \ - } - -@@ -102,6 +105,7 @@ void arch_setup_new_exec(void); - #define TIF_PATCH_PENDING 6 /* pending live patching update */ - #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ - #define TIF_SINGLESTEP 8 /* singlestepping active */ -+#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ - #define TIF_SECCOMP 10 /* secure computing */ - #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ - #define TIF_NOERROR 12 /* Force successful syscall return */ -@@ -117,6 +121,7 @@ void arch_setup_new_exec(void); - #define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */ - #define TIF_32BIT 20 /* 32 bit binary */ - -+ - /* as above, but as bit values */ - #define _TIF_SYSCALL_TRACE (1<msr & MSR_EE)); - again: -- if (IS_ENABLED(CONFIG_PREEMPT)) { -+ if (IS_ENABLED(CONFIG_PREEMPTION)) { - /* Return to preemptible kernel context */ - if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) { - if (preempt_count() == 0) - preempt_schedule_irq(); -+ } else if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED_LAZY)) { -+ if ((preempt_count() == 0) && -+ (current_thread_info()->preempt_lazy_count == 0)) -+ preempt_schedule_irq(); - } - } - -diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c -index 9bdd79aa51cfc..038f8355b29ca 100644 ---- a/arch/powerpc/kernel/traps.c -+++ b/arch/powerpc/kernel/traps.c -@@ -261,12 +261,17 @@ static char *get_mmu_str(void) - - static int __die(const char *str, struct pt_regs *regs, long err) - { -+ const char *pr = ""; -+ - printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); - -+ if (IS_ENABLED(CONFIG_PREEMPTION)) -+ pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT"; -+ - printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n", - IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE", - PAGE_SIZE / 1024, get_mmu_str(), -- IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", -+ pr, - IS_ENABLED(CONFIG_SMP) ? " SMP" : "", - IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "", - debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", -diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig -index a9f57dad6d916..a0b528d4bb7cd 100644 ---- a/arch/powerpc/kvm/Kconfig -+++ b/arch/powerpc/kvm/Kconfig -@@ -225,6 +225,7 @@ config KVM_E500MC - config KVM_MPIC - bool "KVM in-kernel MPIC emulation" - depends on KVM && PPC_E500 -+ depends on !PREEMPT_RT - select HAVE_KVM_IRQCHIP - select HAVE_KVM_IRQFD - select HAVE_KVM_IRQ_ROUTING -diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c -index 9d229ef7f86ef..ada817c49b722 100644 ---- a/arch/powerpc/perf/imc-pmu.c -+++ b/arch/powerpc/perf/imc-pmu.c -@@ -51,7 +51,7 @@ static int trace_imc_mem_size; - * core and trace-imc - */ - static struct imc_pmu_ref imc_global_refc = { -- .lock = __SPIN_LOCK_INITIALIZER(imc_global_refc.lock), -+ .lock = __SPIN_LOCK_UNLOCKED(imc_global_refc.lock), - .id = 0, - .refc = 0, - }; -diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig -index 21b22bf16ce66..b506377a16a74 100644 ---- a/arch/powerpc/platforms/pseries/Kconfig -+++ b/arch/powerpc/platforms/pseries/Kconfig -@@ -2,6 +2,7 @@ - config PPC_PSERIES - depends on PPC64 && PPC_BOOK3S - bool "IBM pSeries & new (POWER5-based) iSeries" -+ select GENERIC_ALLOCATOR - select HAVE_PCSPKR_PLATFORM - select MPIC - select OF_DYNAMIC -diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c -index c74b71d4733d4..64ba14baabd30 100644 ---- a/arch/powerpc/platforms/pseries/iommu.c -+++ b/arch/powerpc/platforms/pseries/iommu.c -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -195,7 +196,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, - return ret; - } - --static DEFINE_PER_CPU(__be64 *, tce_page); -+struct tce_page { -+ __be64 * page; -+ local_lock_t lock; -+}; -+static DEFINE_PER_CPU(struct tce_page, tce_page) = { -+ .lock = INIT_LOCAL_LOCK(lock), -+}; - - static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, - long npages, unsigned long uaddr, -@@ -218,9 +225,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, - direction, attrs); - } - -- local_irq_save(flags); /* to protect tcep and the page behind it */ -+ /* to protect tcep and the page behind it */ -+ local_lock_irqsave(&tce_page.lock, flags); - -- tcep = __this_cpu_read(tce_page); -+ tcep = __this_cpu_read(tce_page.page); - - /* This is safe to do since interrupts are off when we're called - * from iommu_alloc{,_sg}() -@@ -229,12 +237,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, - tcep = (__be64 *)__get_free_page(GFP_ATOMIC); - /* If allocation fails, fall back to the loop implementation */ - if (!tcep) { -- local_irq_restore(flags); -+ local_unlock_irqrestore(&tce_page.lock, flags); - return tce_build_pSeriesLP(tbl->it_index, tcenum, - tceshift, - npages, uaddr, direction, attrs); - } -- __this_cpu_write(tce_page, tcep); -+ __this_cpu_write(tce_page.page, tcep); - } - - rpn = __pa(uaddr) >> tceshift; -@@ -264,7 +272,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, - tcenum += limit; - } while (npages > 0 && !rc); - -- local_irq_restore(flags); -+ local_unlock_irqrestore(&tce_page.lock, flags); - - if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { - ret = (int)rc; -@@ -440,16 +448,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, - DMA_BIDIRECTIONAL, 0); - } - -- local_irq_disable(); /* to protect tcep and the page behind it */ -- tcep = __this_cpu_read(tce_page); -+ /* to protect tcep and the page behind it */ -+ local_lock_irq(&tce_page.lock); -+ tcep = __this_cpu_read(tce_page.page); - - if (!tcep) { - tcep = (__be64 *)__get_free_page(GFP_ATOMIC); - if (!tcep) { -- local_irq_enable(); -+ local_unlock_irq(&tce_page.lock); - return -ENOMEM; - } -- __this_cpu_write(tce_page, tcep); -+ __this_cpu_write(tce_page.page, tcep); - } - - proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; -@@ -492,7 +501,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, - - /* error cleanup: caller will clear whole range */ - -- local_irq_enable(); -+ local_unlock_irq(&tce_page.lock); - return rc; - } - -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index a825bf031f495..dcbf3c08926b1 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -114,6 +114,7 @@ config X86 - select ARCH_USES_CFI_TRAPS if X86_64 && CFI_CLANG - select ARCH_SUPPORTS_LTO_CLANG - select ARCH_SUPPORTS_LTO_CLANG_THIN -+ select ARCH_SUPPORTS_RT - select ARCH_USE_BUILTIN_BSWAP - select ARCH_USE_MEMTEST - select ARCH_USE_QUEUED_RWLOCKS -@@ -252,6 +253,7 @@ config X86 - select HAVE_PCI - select HAVE_PERF_REGS - select HAVE_PERF_USER_STACK_DUMP -+ select HAVE_PREEMPT_LAZY - select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT - select MMU_GATHER_MERGE_VMAS - select HAVE_POSIX_CPU_TIMERS_TASK_WORK -diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h -index 2d13f25b1bd8f..5b096893f6a21 100644 ---- a/arch/x86/include/asm/preempt.h -+++ b/arch/x86/include/asm/preempt.h -@@ -90,18 +90,49 @@ static __always_inline void __preempt_count_sub(int val) - * a decrement which hits zero means we have no preempt_count and should - * reschedule. - */ --static __always_inline bool __preempt_count_dec_and_test(void) -+static __always_inline bool ____preempt_count_dec_and_test(void) - { - return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e, - __percpu_arg([var])); - } - -+static __always_inline bool __preempt_count_dec_and_test(void) -+{ -+ if (____preempt_count_dec_and_test()) -+ return true; -+#ifdef CONFIG_PREEMPT_LAZY -+ if (preempt_count()) -+ return false; -+ if (current_thread_info()->preempt_lazy_count) -+ return false; -+ return test_thread_flag(TIF_NEED_RESCHED_LAZY); -+#else -+ return false; -+#endif -+} -+ - /* - * Returns true when we need to resched and can (barring IRQ state). - */ - static __always_inline bool should_resched(int preempt_offset) - { -+#ifdef CONFIG_PREEMPT_LAZY -+ u32 tmp; -+ tmp = raw_cpu_read_4(pcpu_hot.preempt_count); -+ if (tmp == preempt_offset) -+ return true; -+ -+ /* preempt count == 0 ? */ -+ tmp &= ~PREEMPT_NEED_RESCHED; -+ if (tmp != preempt_offset) -+ return false; -+ /* XXX PREEMPT_LOCK_OFFSET */ -+ if (current_thread_info()->preempt_lazy_count) -+ return false; -+ return test_thread_flag(TIF_NEED_RESCHED_LAZY); -+#else - return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset); -+#endif - } - - #ifdef CONFIG_PREEMPTION -diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h -index f1cccba52eb97..c8697ca0378f4 100644 ---- a/arch/x86/include/asm/thread_info.h -+++ b/arch/x86/include/asm/thread_info.h -@@ -57,6 +57,8 @@ struct thread_info { - unsigned long flags; /* low level flags */ - unsigned long syscall_work; /* SYSCALL_WORK_ flags */ - u32 status; /* thread synchronous flags */ -+ int preempt_lazy_count; /* 0 => lazy preemptable -+ <0 => BUG */ - #ifdef CONFIG_SMP - u32 cpu; /* current CPU */ - #endif -@@ -65,6 +67,7 @@ struct thread_info { - #define INIT_THREAD_INFO(tsk) \ - { \ - .flags = 0, \ -+ .preempt_lazy_count = 0, \ - } - - #else /* !__ASSEMBLY__ */ -@@ -92,6 +95,7 @@ struct thread_info { - #define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ - #define TIF_NOTSC 16 /* TSC is not accessible in userland */ - #define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */ -+#define TIF_NEED_RESCHED_LAZY 19 /* lazy rescheduling necessary */ - #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ - #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ - #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ -@@ -115,6 +119,7 @@ struct thread_info { - #define _TIF_NOCPUID (1 << TIF_NOCPUID) - #define _TIF_NOTSC (1 << TIF_NOTSC) - #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) -+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) - #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) - #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) - #define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) -diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c -index aa490da3cef23..d73b6d32bd827 100644 ---- a/drivers/block/zram/zram_drv.c -+++ b/drivers/block/zram/zram_drv.c -@@ -57,6 +57,40 @@ static void zram_free_page(struct zram *zram, size_t index); - static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio); - -+#ifdef CONFIG_PREEMPT_RT -+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) -+{ -+ size_t index; -+ -+ for (index = 0; index < num_pages; index++) -+ spin_lock_init(&zram->table[index].lock); -+} -+ -+static int zram_slot_trylock(struct zram *zram, u32 index) -+{ -+ int ret; -+ -+ ret = spin_trylock(&zram->table[index].lock); -+ if (ret) -+ __set_bit(ZRAM_LOCK, &zram->table[index].flags); -+ return ret; -+} -+ -+static void zram_slot_lock(struct zram *zram, u32 index) -+{ -+ spin_lock(&zram->table[index].lock); -+ __set_bit(ZRAM_LOCK, &zram->table[index].flags); -+} -+ -+static void zram_slot_unlock(struct zram *zram, u32 index) -+{ -+ __clear_bit(ZRAM_LOCK, &zram->table[index].flags); -+ spin_unlock(&zram->table[index].lock); -+} -+ -+#else -+ -+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { } - - static int zram_slot_trylock(struct zram *zram, u32 index) - { -@@ -72,6 +106,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index) - { - bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); - } -+#endif - - static inline bool init_done(struct zram *zram) - { -@@ -1311,6 +1346,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) - - if (!huge_class_size) - huge_class_size = zs_huge_class_size(zram->mem_pool); -+ zram_meta_init_table_locks(zram, num_pages); - return true; - } - -diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h -index c5254626f051f..a6b24dfec95fb 100644 ---- a/drivers/block/zram/zram_drv.h -+++ b/drivers/block/zram/zram_drv.h -@@ -69,6 +69,9 @@ struct zram_table_entry { - unsigned long element; - }; - unsigned long flags; -+#ifdef CONFIG_PREEMPT_RT -+ spinlock_t lock; -+#endif - #ifdef CONFIG_ZRAM_MEMORY_TRACKING - ktime_t ac_time; - #endif -diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c -index ed5dabd3c72d6..450d7985ff346 100644 ---- a/drivers/char/tpm/tpm_tis.c -+++ b/drivers/char/tpm/tpm_tis.c -@@ -50,6 +50,45 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da - return container_of(data, struct tpm_tis_tcg_phy, priv); - } - -+#ifdef CONFIG_PREEMPT_RT -+/* -+ * Flush previous write operations with a dummy read operation to the -+ * TPM MMIO base address. -+ */ -+static inline void tpm_tis_flush(void __iomem *iobase) -+{ -+ ioread8(iobase + TPM_ACCESS(0)); -+} -+#else -+#define tpm_tis_flush(iobase) do { } while (0) -+#endif -+ -+/* -+ * Write a byte word to the TPM MMIO address, and flush the write queue. -+ * The flush ensures that the data is sent immediately over the bus and not -+ * aggregated with further requests and transferred later in a batch. The large -+ * write requests can lead to unwanted latency spikes by blocking the CPU until -+ * the complete batch has been transferred. -+ */ -+static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr) -+{ -+ iowrite8(b, iobase + addr); -+ tpm_tis_flush(iobase); -+} -+ -+/* -+ * Write a 32-bit word to the TPM MMIO address, and flush the write queue. -+ * The flush ensures that the data is sent immediately over the bus and not -+ * aggregated with further requests and transferred later in a batch. The large -+ * write requests can lead to unwanted latency spikes by blocking the CPU until -+ * the complete batch has been transferred. -+ */ -+static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr) -+{ -+ iowrite32(b, iobase + addr); -+ tpm_tis_flush(iobase); -+} -+ - static int interrupts = -1; - module_param(interrupts, int, 0444); - MODULE_PARM_DESC(interrupts, "Enable interrupts"); -@@ -186,12 +225,12 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len, - switch (io_mode) { - case TPM_TIS_PHYS_8: - while (len--) -- iowrite8(*value++, phy->iobase + addr); -+ tpm_tis_iowrite8(*value++, phy->iobase, addr); - break; - case TPM_TIS_PHYS_16: - return -EINVAL; - case TPM_TIS_PHYS_32: -- iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase + addr); -+ tpm_tis_iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase, addr); - break; - } - -diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig -index 98f4e44976e09..d4dba1f89fde5 100644 ---- a/drivers/gpu/drm/i915/Kconfig -+++ b/drivers/gpu/drm/i915/Kconfig -@@ -3,7 +3,6 @@ config DRM_I915 - tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics" - depends on DRM - depends on X86 && PCI -- depends on !PREEMPT_RT - select INTEL_GTT if X86 - select INTERVAL_TREE - # we need shmfs for the swappable backing store, and in particular -diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c -index d5b5d40ed817f..710e51d2377fe 100644 ---- a/drivers/gpu/drm/i915/display/intel_crtc.c -+++ b/drivers/gpu/drm/i915/display/intel_crtc.c -@@ -520,7 +520,8 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state) - */ - intel_psr_wait_for_idle_locked(new_crtc_state); - -- local_irq_disable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_irq_disable(); - - crtc->debug.min_vbl = min; - crtc->debug.max_vbl = max; -@@ -545,11 +546,13 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state) - break; - } - -- local_irq_enable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_irq_enable(); - - timeout = schedule_timeout(timeout); - -- local_irq_disable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_irq_disable(); - } - - finish_wait(wq, &wait); -@@ -582,7 +585,8 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state) - return; - - irq_disable: -- local_irq_disable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_irq_disable(); - } - - #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE) -@@ -691,7 +695,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) - if (new_crtc_state->seamless_m_n && intel_crtc_needs_fastset(new_crtc_state)) - intel_crtc_update_active_timings(new_crtc_state); - -- local_irq_enable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_irq_enable(); - - if (intel_vgpu_active(dev_priv)) - return; -diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c -index 4c83e2320bcac..2dd4ac8b30266 100644 ---- a/drivers/gpu/drm/i915/display/intel_vblank.c -+++ b/drivers/gpu/drm/i915/display/intel_vblank.c -@@ -293,7 +293,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, - */ - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - -- /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ preempt_disable(); - - /* Get optional system timestamp before query. */ - if (stime) -@@ -358,7 +359,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, - if (etime) - *etime = ktime_get(); - -- /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ preempt_enable(); - - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); - -diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c -index ecc990ec1b952..8d04b10681f0d 100644 ---- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c -+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c -@@ -312,10 +312,9 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) - /* Kick the work once more to drain the signalers, and disarm the irq */ - irq_work_sync(&b->irq_work); - while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { -- local_irq_disable(); -- signal_irq_work(&b->irq_work); -- local_irq_enable(); -+ irq_work_queue(&b->irq_work); - cond_resched(); -+ irq_work_sync(&b->irq_work); - } - } - -diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c -index 750326434677f..a2658a8ff7353 100644 ---- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c -+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c -@@ -1303,7 +1303,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) - * and context switches) submission. - */ - -- spin_lock(&sched_engine->lock); -+ spin_lock_irq(&sched_engine->lock); - - /* - * If the queue is higher priority than the last -@@ -1403,7 +1403,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) - * Even if ELSP[1] is occupied and not worthy - * of timeslices, our queue might be. - */ -- spin_unlock(&sched_engine->lock); -+ spin_unlock_irq(&sched_engine->lock); - return; - } - } -@@ -1429,7 +1429,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) - - if (last && !can_merge_rq(last, rq)) { - spin_unlock(&ve->base.sched_engine->lock); -- spin_unlock(&engine->sched_engine->lock); -+ spin_unlock_irq(&engine->sched_engine->lock); - return; /* leave this for another sibling */ - } - -@@ -1591,7 +1591,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) - */ - sched_engine->queue_priority_hint = queue_prio(sched_engine); - i915_sched_engine_reset_on_empty(sched_engine); -- spin_unlock(&sched_engine->lock); -+ spin_unlock_irq(&sched_engine->lock); - - /* - * We can skip poking the HW if we ended up with exactly the same set -@@ -1617,13 +1617,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) - } - } - --static void execlists_dequeue_irq(struct intel_engine_cs *engine) --{ -- local_irq_disable(); /* Suspend interrupts across request submission */ -- execlists_dequeue(engine); -- local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ --} -- - static void clear_ports(struct i915_request **ports, int count) - { - memset_p((void **)ports, NULL, count); -@@ -2477,7 +2470,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t) - } - - if (!engine->execlists.pending[0]) { -- execlists_dequeue_irq(engine); -+ execlists_dequeue(engine); - start_timeslice(engine); - } - -diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c -index 7503dcb9043bb..799fb8083470f 100644 ---- a/drivers/gpu/drm/i915/i915_request.c -+++ b/drivers/gpu/drm/i915/i915_request.c -@@ -613,7 +613,6 @@ bool __i915_request_submit(struct i915_request *request) - - RQ_TRACE(request, "\n"); - -- GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->sched_engine->lock); - - /* -@@ -722,7 +721,6 @@ void __i915_request_unsubmit(struct i915_request *request) - */ - RQ_TRACE(request, "\n"); - -- GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->sched_engine->lock); - - /* -diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h -index f6f9228a13518..0ff1b60be8382 100644 ---- a/drivers/gpu/drm/i915/i915_trace.h -+++ b/drivers/gpu/drm/i915/i915_trace.h -@@ -6,6 +6,10 @@ - #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) - #define _I915_TRACE_H_ - -+#ifdef CONFIG_PREEMPT_RT -+#define NOTRACE -+#endif -+ - #include - #include - #include -@@ -322,7 +326,7 @@ DEFINE_EVENT(i915_request, i915_request_add, - TP_ARGS(rq) - ); - --#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) -+#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE) - DEFINE_EVENT(i915_request, i915_request_guc_submit, - TP_PROTO(struct i915_request *rq), - TP_ARGS(rq) -diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h -index 2c430c0c3badd..7ec828637d622 100644 ---- a/drivers/gpu/drm/i915/i915_utils.h -+++ b/drivers/gpu/drm/i915/i915_utils.h -@@ -288,7 +288,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) - #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) - - /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ --#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) -+#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT) - # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) - #else - # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) -diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h -index 1e8fe44a7099f..b336d00d7988e 100644 ---- a/drivers/tty/serial/8250/8250.h -+++ b/drivers/tty/serial/8250/8250.h -@@ -177,12 +177,277 @@ static inline void serial_dl_write(struct uart_8250_port *up, int value) - up->dl_write(up, value); - } - -+static inline bool serial8250_is_console(struct uart_port *port) -+{ -+ return uart_console(port) && !hlist_unhashed_lockless(&port->cons->node); -+} -+ -+/** -+ * serial8250_init_wctxt - Initialize a write context for -+ * non-console-printing usage -+ * @wctxt: The write context to initialize -+ * @cons: The console to assign to the write context -+ * -+ * In order to mark an unsafe region, drivers must acquire the console. This -+ * requires providing an initialized write context (even if that driver will -+ * not be doing any printing). -+ * -+ * This function should not be used for console printing contexts. -+ */ -+static inline void serial8250_init_wctxt(struct cons_write_context *wctxt, -+ struct console *cons) -+{ -+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); -+ -+ memset(wctxt, 0, sizeof(*wctxt)); -+ ctxt->console = cons; -+ ctxt->prio = CONS_PRIO_NORMAL; -+} -+ -+/** -+ * __serial8250_console_acquire - Acquire a console for -+ * non-console-printing usage -+ * @wctxt: An uninitialized write context to use for acquiring -+ * @cons: The console to assign to the write context -+ * -+ * The caller is holding the port->lock. -+ * The caller is holding the console_srcu_read_lock. -+ * -+ * This function should not be used for console printing contexts. -+ */ -+static inline void __serial8250_console_acquire(struct cons_write_context *wctxt, -+ struct console *cons) -+{ -+ for (;;) { -+ serial8250_init_wctxt(wctxt, cons); -+ if (console_try_acquire(wctxt)) -+ break; -+ cpu_relax(); -+ } -+} -+ -+/** -+ * serial8250_enter_unsafe - Mark the beginning of an unsafe region for -+ * non-console-printing usage -+ * @up: The port that is entering the unsafe state -+ * -+ * The caller should ensure @up is a console before calling this function. -+ * -+ * The caller is holding the port->lock. -+ * This function takes the console_srcu_read_lock and becomes owner of the -+ * console associated with @up. -+ * -+ * This function should not be used for console printing contexts. -+ */ -+static inline void serial8250_enter_unsafe(struct uart_8250_port *up) -+{ -+ struct uart_port *port = &up->port; -+ -+ lockdep_assert_held_once(&port->lock); -+ -+ for (;;) { -+ up->cookie = console_srcu_read_lock(); -+ -+ __serial8250_console_acquire(&up->wctxt, port->cons); -+ -+ if (console_enter_unsafe(&up->wctxt)) -+ break; -+ -+ console_srcu_read_unlock(up->cookie); -+ cpu_relax(); -+ } -+} -+ -+/** -+ * serial8250_exit_unsafe - Mark the end of an unsafe region for -+ * non-console-printing usage -+ * @up: The port that is exiting the unsafe state -+ * -+ * The caller is holding the port->lock. -+ * This function releases ownership of the console associated with @up and -+ * releases the console_srcu_read_lock. -+ * -+ * This function should not be used for console printing contexts. -+ */ -+static inline void serial8250_exit_unsafe(struct uart_8250_port *up) -+{ -+ struct uart_port *port = &up->port; -+ -+ lockdep_assert_held_once(&port->lock); -+ -+ if (console_exit_unsafe(&up->wctxt)) -+ console_release(&up->wctxt); -+ -+ console_srcu_read_unlock(up->cookie); -+} -+ -+/** -+ * serial8250_in_IER - Read the IER register for -+ * non-console-printing usage -+ * @up: The port to work on -+ * -+ * Returns: The value read from IER -+ * -+ * The caller is holding the port->lock. -+ * -+ * This is the top-level function for non-console-printing contexts to -+ * read the IER register. The caller does not need to care if @up is a -+ * console before calling this function. -+ * -+ * This function should not be used for printing contexts. -+ */ -+static inline int serial8250_in_IER(struct uart_8250_port *up) -+{ -+ struct uart_port *port = &up->port; -+ bool is_console; -+ int ier; -+ -+ is_console = serial8250_is_console(port); -+ -+ if (is_console) -+ serial8250_enter_unsafe(up); -+ -+ ier = serial_in(up, UART_IER); -+ -+ if (is_console) -+ serial8250_exit_unsafe(up); -+ -+ return ier; -+} -+ -+/** -+ * __serial8250_set_IER - Directly write to the IER register -+ * @up: The port to work on -+ * @wctxt: The current write context -+ * @ier: The value to write -+ * -+ * Returns: True if IER was written to. False otherwise -+ * -+ * The caller is holding the port->lock. -+ * The caller is holding the console_srcu_read_unlock. -+ * The caller is the owner of the console associated with @up. -+ * -+ * This function should only be directly called within console printing -+ * contexts. Other contexts should use serial8250_set_IER(). -+ */ -+static inline bool __serial8250_set_IER(struct uart_8250_port *up, -+ struct cons_write_context *wctxt, -+ int ier) -+{ -+ if (wctxt && !console_can_proceed(wctxt)) -+ return false; -+ serial_out(up, UART_IER, ier); -+ return true; -+} -+ -+/** -+ * serial8250_set_IER - Write a new value to the IER register for -+ * non-console-printing usage -+ * @up: The port to work on -+ * @ier: The value to write -+ * -+ * The caller is holding the port->lock. -+ * -+ * This is the top-level function for non-console-printing contexts to -+ * write to the IER register. The caller does not need to care if @up is a -+ * console before calling this function. -+ * -+ * This function should not be used for printing contexts. -+ */ -+static inline void serial8250_set_IER(struct uart_8250_port *up, int ier) -+{ -+ struct uart_port *port = &up->port; -+ bool is_console; -+ -+ is_console = serial8250_is_console(port); -+ -+ if (is_console) { -+ serial8250_enter_unsafe(up); -+ while (!__serial8250_set_IER(up, &up->wctxt, ier)) { -+ console_srcu_read_unlock(up->cookie); -+ console_enter_unsafe(&up->wctxt); -+ } -+ serial8250_exit_unsafe(up); -+ } else { -+ __serial8250_set_IER(up, NULL, ier); -+ } -+} -+ -+/** -+ * __serial8250_clear_IER - Directly clear the IER register -+ * @up: The port to work on -+ * @wctxt: The current write context -+ * @prior: Gets set to the previous value of IER -+ * -+ * Returns: True if IER was cleared and @prior points to the previous -+ * value of IER. False otherwise and @prior is invalid -+ * -+ * The caller is holding the port->lock. -+ * The caller is holding the console_srcu_read_unlock. -+ * The caller is the owner of the console associated with @up. -+ * -+ * This function should only be directly called within console printing -+ * contexts. Other contexts should use serial8250_clear_IER(). -+ */ -+static inline bool __serial8250_clear_IER(struct uart_8250_port *up, -+ struct cons_write_context *wctxt, -+ int *prior) -+{ -+ unsigned int clearval = 0; -+ -+ if (up->capabilities & UART_CAP_UUE) -+ clearval = UART_IER_UUE; -+ -+ *prior = serial_in(up, UART_IER); -+ if (wctxt && !console_can_proceed(wctxt)) -+ return false; -+ serial_out(up, UART_IER, clearval); -+ return true; -+} -+ -+/** -+ * serial8250_clear_IER - Clear the IER register for -+ * non-console-printing usage -+ * @up: The port to work on -+ * -+ * Returns: The previous value of IER -+ * -+ * The caller is holding the port->lock. -+ * -+ * This is the top-level function for non-console-printing contexts to -+ * clear the IER register. The caller does not need to care if @up is a -+ * console before calling this function. -+ * -+ * This function should not be used for printing contexts. -+ */ -+static inline int serial8250_clear_IER(struct uart_8250_port *up) -+{ -+ struct uart_port *port = &up->port; -+ bool is_console; -+ int prior; -+ -+ is_console = serial8250_is_console(port); -+ -+ if (is_console) { -+ serial8250_enter_unsafe(up); -+ while (!__serial8250_clear_IER(up, &up->wctxt, &prior)) { -+ console_srcu_read_unlock(up->cookie); -+ console_enter_unsafe(&up->wctxt); -+ } -+ serial8250_exit_unsafe(up); -+ } else { -+ __serial8250_clear_IER(up, NULL, &prior); -+ } -+ -+ return prior; -+} -+ - static inline bool serial8250_set_THRI(struct uart_8250_port *up) - { - if (up->ier & UART_IER_THRI) - return false; - up->ier |= UART_IER_THRI; -- serial_out(up, UART_IER, up->ier); -+ serial8250_set_IER(up, up->ier); - return true; - } - -@@ -191,7 +456,7 @@ static inline bool serial8250_clear_THRI(struct uart_8250_port *up) - if (!(up->ier & UART_IER_THRI)) - return false; - up->ier &= ~UART_IER_THRI; -- serial_out(up, UART_IER, up->ier); -+ serial8250_set_IER(up, up->ier); - return true; - } - -diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c -index 9d2a7856784f7..7cc6b527c088b 100644 ---- a/drivers/tty/serial/8250/8250_aspeed_vuart.c -+++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c -@@ -278,7 +278,7 @@ static void __aspeed_vuart_set_throttle(struct uart_8250_port *up, - up->ier &= ~irqs; - if (!throttle) - up->ier |= irqs; -- serial_out(up, UART_IER, up->ier); -+ serial8250_set_IER(up, up->ier); - } - static void aspeed_vuart_set_throttle(struct uart_port *port, bool throttle) - { -diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c -index f801b1f5b46c0..a29f5f45d22f2 100644 ---- a/drivers/tty/serial/8250/8250_bcm7271.c -+++ b/drivers/tty/serial/8250/8250_bcm7271.c -@@ -606,8 +606,10 @@ static int brcmuart_startup(struct uart_port *port) - * Disable the Receive Data Interrupt because the DMA engine - * will handle this. - */ -+ spin_lock_irq(&port->lock); - up->ier &= ~UART_IER_RDI; -- serial_port_out(port, UART_IER, up->ier); -+ serial8250_set_IER(up, up->ier); -+ spin_unlock_irq(&port->lock); - - priv->tx_running = false; - priv->dma.rx_dma = NULL; -@@ -787,6 +789,12 @@ static int brcmuart_handle_irq(struct uart_port *p) - spin_lock_irqsave(&p->lock, flags); - status = serial_port_in(p, UART_LSR); - if ((status & UART_LSR_DR) == 0) { -+ bool is_console; -+ -+ is_console = serial8250_is_console(p); -+ -+ if (is_console) -+ serial8250_enter_unsafe(up); - - ier = serial_port_in(p, UART_IER); - /* -@@ -807,6 +815,9 @@ static int brcmuart_handle_irq(struct uart_port *p) - serial_port_in(p, UART_RX); - } - -+ if (is_console) -+ serial8250_exit_unsafe(up); -+ - handled = 1; - } - spin_unlock_irqrestore(&p->lock, flags); -@@ -844,12 +855,22 @@ static enum hrtimer_restart brcmuart_hrtimer_func(struct hrtimer *t) - /* re-enable receive unless upper layer has disabled it */ - if ((up->ier & (UART_IER_RLSI | UART_IER_RDI)) == - (UART_IER_RLSI | UART_IER_RDI)) { -+ bool is_console; -+ -+ is_console = serial8250_is_console(p); -+ -+ if (is_console) -+ serial8250_enter_unsafe(up); -+ - status = serial_port_in(p, UART_IER); - status |= (UART_IER_RLSI | UART_IER_RDI); - serial_port_out(p, UART_IER, status); - status = serial_port_in(p, UART_MCR); - status |= UART_MCR_RTS; - serial_port_out(p, UART_MCR, status); -+ -+ if (is_console) -+ serial8250_exit_unsafe(up); - } - spin_unlock_irqrestore(&p->lock, flags); - return HRTIMER_NORESTART; -diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c -index ab63c308be0a2..8e89bffa1f121 100644 ---- a/drivers/tty/serial/8250/8250_core.c -+++ b/drivers/tty/serial/8250/8250_core.c -@@ -256,6 +256,7 @@ static void serial8250_timeout(struct timer_list *t) - static void serial8250_backup_timeout(struct timer_list *t) - { - struct uart_8250_port *up = from_timer(up, t, timer); -+ struct uart_port *port = &up->port; - unsigned int iir, ier = 0, lsr; - unsigned long flags; - -@@ -266,8 +267,23 @@ static void serial8250_backup_timeout(struct timer_list *t) - * based handler. - */ - if (up->port.irq) { -+ bool is_console; -+ -+ /* -+ * Do not use serial8250_clear_IER() because this code -+ * ignores capabilties. -+ */ -+ -+ is_console = serial8250_is_console(port); -+ -+ if (is_console) -+ serial8250_enter_unsafe(up); -+ - ier = serial_in(up, UART_IER); - serial_out(up, UART_IER, 0); -+ -+ if (is_console) -+ serial8250_exit_unsafe(up); - } - - iir = serial_in(up, UART_IIR); -@@ -290,7 +306,7 @@ static void serial8250_backup_timeout(struct timer_list *t) - serial8250_tx_chars(up); - - if (up->port.irq) -- serial_out(up, UART_IER, ier); -+ serial8250_set_IER(up, ier); - - spin_unlock_irqrestore(&up->port.lock, flags); - -@@ -576,12 +592,30 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev) - - #ifdef CONFIG_SERIAL_8250_CONSOLE - --static void univ8250_console_write(struct console *co, const char *s, -- unsigned int count) -+static void univ8250_console_port_lock(struct console *con, bool do_lock, unsigned long *flags) -+{ -+ struct uart_8250_port *up = &serial8250_ports[con->index]; -+ -+ if (do_lock) -+ spin_lock_irqsave(&up->port.lock, *flags); -+ else -+ spin_unlock_irqrestore(&up->port.lock, *flags); -+} -+ -+static bool univ8250_console_write_atomic(struct console *co, -+ struct cons_write_context *wctxt) - { - struct uart_8250_port *up = &serial8250_ports[co->index]; - -- serial8250_console_write(up, s, count); -+ return serial8250_console_write_atomic(up, wctxt); -+} -+ -+static bool univ8250_console_write_thread(struct console *co, -+ struct cons_write_context *wctxt) -+{ -+ struct uart_8250_port *up = &serial8250_ports[co->index]; -+ -+ return serial8250_console_write_thread(up, wctxt); - } - - static int univ8250_console_setup(struct console *co, char *options) -@@ -669,12 +703,14 @@ static int univ8250_console_match(struct console *co, char *name, int idx, - - static struct console univ8250_console = { - .name = "ttyS", -- .write = univ8250_console_write, -+ .write_atomic = univ8250_console_write_atomic, -+ .write_thread = univ8250_console_write_thread, -+ .port_lock = univ8250_console_port_lock, - .device = uart_console_device, - .setup = univ8250_console_setup, - .exit = univ8250_console_exit, - .match = univ8250_console_match, -- .flags = CON_PRINTBUFFER | CON_ANYTIME, -+ .flags = CON_PRINTBUFFER | CON_ANYTIME | CON_NO_BKL, - .index = -1, - .data = &serial8250_reg, - }; -@@ -962,7 +998,7 @@ static void serial_8250_overrun_backoff_work(struct work_struct *work) - spin_lock_irqsave(&port->lock, flags); - up->ier |= UART_IER_RLSI | UART_IER_RDI; - up->port.read_status_mask |= UART_LSR_DR; -- serial_out(up, UART_IER, up->ier); -+ serial8250_set_IER(up, up->ier); - spin_unlock_irqrestore(&port->lock, flags); - } - -diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c -index 64770c62bbec5..ccb70b20b1f4f 100644 ---- a/drivers/tty/serial/8250/8250_exar.c -+++ b/drivers/tty/serial/8250/8250_exar.c -@@ -185,6 +185,10 @@ static void xr17v35x_set_divisor(struct uart_port *p, unsigned int baud, - - static int xr17v35x_startup(struct uart_port *port) - { -+ struct uart_8250_port *up = up_to_u8250p(port); -+ -+ spin_lock_irq(&port->lock); -+ - /* - * First enable access to IER [7:5], ISR [5:4], FCR [5:4], - * MCR [7:5] and MSR [7:0] -@@ -195,7 +199,9 @@ static int xr17v35x_startup(struct uart_port *port) - * Make sure all interrups are masked until initialization is - * complete and the FIFOs are cleared - */ -- serial_port_out(port, UART_IER, 0); -+ serial8250_set_IER(up, 0); -+ -+ spin_unlock_irq(&port->lock); - - return serial8250_do_startup(port); - } -diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c -index 8adfaa183f778..eaf148245a10d 100644 ---- a/drivers/tty/serial/8250/8250_fsl.c -+++ b/drivers/tty/serial/8250/8250_fsl.c -@@ -58,7 +58,8 @@ int fsl8250_handle_irq(struct uart_port *port) - if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) { - unsigned long delay; - -- up->ier = port->serial_in(port, UART_IER); -+ up->ier = serial8250_in_IER(up); -+ - if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { - port->ops->stop_rx(port); - } else { -diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c -index fb1d5ec0940e6..bf7ab55c8923f 100644 ---- a/drivers/tty/serial/8250/8250_mtk.c -+++ b/drivers/tty/serial/8250/8250_mtk.c -@@ -222,12 +222,38 @@ static void mtk8250_shutdown(struct uart_port *port) - - static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask) - { -- serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask)); -+ struct uart_port *port = &up->port; -+ bool is_console; -+ int ier; -+ -+ is_console = serial8250_is_console(port); -+ -+ if (is_console) -+ serial8250_enter_unsafe(up); -+ -+ ier = serial_in(up, UART_IER); -+ serial_out(up, UART_IER, ier & (~mask)); -+ -+ if (is_console) -+ serial8250_exit_unsafe(up); - } - - static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask) - { -- serial_out(up, UART_IER, serial_in(up, UART_IER) | mask); -+ struct uart_port *port = &up->port; -+ bool is_console; -+ int ier; -+ -+ is_console = serial8250_is_console(port); -+ -+ if (is_console) -+ serial8250_enter_unsafe(up); -+ -+ ier = serial_in(up, UART_IER); -+ serial_out(up, UART_IER, ier | mask); -+ -+ if (is_console) -+ serial8250_exit_unsafe(up); - } - - static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) -diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c -index 734f092ef839a..bfa50a26349dd 100644 ---- a/drivers/tty/serial/8250/8250_omap.c -+++ b/drivers/tty/serial/8250/8250_omap.c -@@ -334,8 +334,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up) - - /* drop TCR + TLR access, we setup XON/XOFF later */ - serial8250_out_MCR(up, mcr); -- -- serial_out(up, UART_IER, up->ier); -+ serial8250_set_IER(up, up->ier); - - serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - serial_dl_write(up, priv->quot); -@@ -523,16 +522,21 @@ static void omap_8250_pm(struct uart_port *port, unsigned int state, - u8 efr; - - pm_runtime_get_sync(port->dev); -+ -+ spin_lock_irq(&port->lock); -+ - serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - efr = serial_in(up, UART_EFR); - serial_out(up, UART_EFR, efr | UART_EFR_ECB); - serial_out(up, UART_LCR, 0); - -- serial_out(up, UART_IER, (state != 0) ? UART_IERX_SLEEP : 0); -+ serial8250_set_IER(up, (state != 0) ? UART_IERX_SLEEP : 0); - serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(up, UART_EFR, efr); - serial_out(up, UART_LCR, 0); - -+ spin_unlock_irq(&port->lock); -+ - pm_runtime_mark_last_busy(port->dev); - pm_runtime_put_autosuspend(port->dev); - } -@@ -649,7 +653,8 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) - if ((lsr & UART_LSR_OE) && up->overrun_backoff_time_ms > 0) { - unsigned long delay; - -- up->ier = port->serial_in(port, UART_IER); -+ spin_lock(&port->lock); -+ up->ier = serial8250_in_IER(up); - if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { - port->ops->stop_rx(port); - } else { -@@ -658,6 +663,7 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) - */ - cancel_delayed_work(&up->overrun_backoff); - } -+ spin_unlock(&port->lock); - - delay = msecs_to_jiffies(up->overrun_backoff_time_ms); - schedule_delayed_work(&up->overrun_backoff, delay); -@@ -707,8 +713,10 @@ static int omap_8250_startup(struct uart_port *port) - if (ret < 0) - goto err; - -+ spin_lock_irq(&port->lock); - up->ier = UART_IER_RLSI | UART_IER_RDI; -- serial_out(up, UART_IER, up->ier); -+ serial8250_set_IER(up, up->ier); -+ spin_unlock_irq(&port->lock); - - #ifdef CONFIG_PM - up->capabilities |= UART_CAP_RPM; -@@ -748,8 +756,10 @@ static void omap_8250_shutdown(struct uart_port *port) - if (priv->habit & UART_HAS_EFR2) - serial_out(up, UART_OMAP_EFR2, 0x0); - -+ spin_lock_irq(&port->lock); - up->ier = 0; -- serial_out(up, UART_IER, 0); -+ serial8250_set_IER(up, 0); -+ spin_unlock_irq(&port->lock); - - if (up->dma) - serial8250_release_dma(up); -@@ -797,7 +807,7 @@ static void omap_8250_unthrottle(struct uart_port *port) - up->dma->rx_dma(up); - up->ier |= UART_IER_RLSI | UART_IER_RDI; - port->read_status_mask |= UART_LSR_DR; -- serial_out(up, UART_IER, up->ier); -+ serial8250_set_IER(up, up->ier); - spin_unlock_irqrestore(&port->lock, flags); - - pm_runtime_mark_last_busy(port->dev); -@@ -956,7 +966,7 @@ static void __dma_rx_complete(void *param) - __dma_rx_do_complete(p); - if (!priv->throttled) { - p->ier |= UART_IER_RLSI | UART_IER_RDI; -- serial_out(p, UART_IER, p->ier); -+ serial8250_set_IER(p, p->ier); - if (!(priv->habit & UART_HAS_EFR2)) - omap_8250_rx_dma(p); - } -@@ -1013,7 +1023,7 @@ static int omap_8250_rx_dma(struct uart_8250_port *p) - * callback to run. - */ - p->ier &= ~(UART_IER_RLSI | UART_IER_RDI); -- serial_out(p, UART_IER, p->ier); -+ serial8250_set_IER(p, p->ier); - } - goto out; - } -@@ -1226,12 +1236,12 @@ static void am654_8250_handle_rx_dma(struct uart_8250_port *up, u8 iir, - * periodic timeouts, re-enable interrupts. - */ - up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); -- serial_out(up, UART_IER, up->ier); -+ serial8250_set_IER(up, up->ier); - omap_8250_rx_dma_flush(up); - serial_in(up, UART_IIR); - serial_out(up, UART_OMAP_EFR2, 0x0); - up->ier |= UART_IER_RLSI | UART_IER_RDI; -- serial_out(up, UART_IER, up->ier); -+ serial8250_set_IER(up, up->ier); - } - } - -@@ -1717,12 +1727,16 @@ static int omap8250_runtime_resume(struct device *dev) - - up = serial8250_get_port(priv->line); - -+ spin_lock_irq(&up->port.lock); -+ - if (omap8250_lost_context(up)) - omap8250_restore_regs(up); - - if (up->dma && up->dma->rxchan && !(priv->habit & UART_HAS_EFR2)) - omap_8250_rx_dma(up); - -+ spin_unlock_irq(&up->port.lock); -+ - priv->latency = priv->calc_latency; - schedule_work(&priv->qos_work); - return 0; -diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c -index fe8d79c4ae95e..68f01f8bdf64b 100644 ---- a/drivers/tty/serial/8250/8250_port.c -+++ b/drivers/tty/serial/8250/8250_port.c -@@ -745,6 +745,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) - serial8250_rpm_get(p); - - if (p->capabilities & UART_CAP_SLEEP) { -+ spin_lock_irq(&p->port.lock); - if (p->capabilities & UART_CAP_EFR) { - lcr = serial_in(p, UART_LCR); - efr = serial_in(p, UART_EFR); -@@ -752,25 +753,18 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) - serial_out(p, UART_EFR, UART_EFR_ECB); - serial_out(p, UART_LCR, 0); - } -- serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0); -+ serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0); - if (p->capabilities & UART_CAP_EFR) { - serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(p, UART_EFR, efr); - serial_out(p, UART_LCR, lcr); - } -+ spin_unlock_irq(&p->port.lock); - } - - serial8250_rpm_put(p); - } - --static void serial8250_clear_IER(struct uart_8250_port *up) --{ -- if (up->capabilities & UART_CAP_UUE) -- serial_out(up, UART_IER, UART_IER_UUE); -- else -- serial_out(up, UART_IER, 0); --} -- - #ifdef CONFIG_SERIAL_8250_RSA - /* - * Attempts to turn on the RSA FIFO. Returns zero on failure. -@@ -1034,8 +1028,10 @@ static int broken_efr(struct uart_8250_port *up) - */ - static void autoconfig_16550a(struct uart_8250_port *up) - { -+ struct uart_port *port = &up->port; - unsigned char status1, status2; - unsigned int iersave; -+ bool is_console; - - up->port.type = PORT_16550A; - up->capabilities |= UART_CAP_FIFO; -@@ -1151,6 +1147,11 @@ static void autoconfig_16550a(struct uart_8250_port *up) - return; - } - -+ is_console = serial8250_is_console(port); -+ -+ if (is_console) -+ serial8250_enter_unsafe(up); -+ - /* - * Try writing and reading the UART_IER_UUE bit (b6). - * If it works, this is probably one of the Xscale platform's -@@ -1186,6 +1187,9 @@ static void autoconfig_16550a(struct uart_8250_port *up) - } - serial_out(up, UART_IER, iersave); - -+ if (is_console) -+ serial8250_exit_unsafe(up); -+ - /* - * We distinguish between 16550A and U6 16550A by counting - * how many bytes are in the FIFO. -@@ -1227,6 +1231,13 @@ static void autoconfig(struct uart_8250_port *up) - up->bugs = 0; - - if (!(port->flags & UPF_BUGGY_UART)) { -+ bool is_console; -+ -+ is_console = serial8250_is_console(port); -+ -+ if (is_console) -+ serial8250_enter_unsafe(up); -+ - /* - * Do a simple existence test first; if we fail this, - * there's no point trying anything else. -@@ -1256,6 +1267,10 @@ static void autoconfig(struct uart_8250_port *up) - #endif - scratch3 = serial_in(up, UART_IER) & UART_IER_ALL_INTR; - serial_out(up, UART_IER, scratch); -+ -+ if (is_console) -+ serial8250_exit_unsafe(up); -+ - if (scratch2 != 0 || scratch3 != UART_IER_ALL_INTR) { - /* - * We failed; there's nothing here -@@ -1377,6 +1392,7 @@ static void autoconfig_irq(struct uart_8250_port *up) - unsigned char save_ICP = 0; - unsigned int ICP = 0; - unsigned long irqs; -+ bool is_console; - int irq; - - if (port->flags & UPF_FOURPORT) { -@@ -1386,8 +1402,12 @@ static void autoconfig_irq(struct uart_8250_port *up) - inb_p(ICP); - } - -- if (uart_console(port)) -+ is_console = serial8250_is_console(port); -+ -+ if (is_console) { - console_lock(); -+ serial8250_enter_unsafe(up); -+ } - - /* forget possible initially masked and pending IRQ */ - probe_irq_off(probe_irq_on()); -@@ -1419,8 +1439,10 @@ static void autoconfig_irq(struct uart_8250_port *up) - if (port->flags & UPF_FOURPORT) - outb_p(save_ICP, ICP); - -- if (uart_console(port)) -+ if (is_console) { -+ serial8250_exit_unsafe(up); - console_unlock(); -+ } - - port->irq = (irq > 0) ? irq : 0; - } -@@ -1433,7 +1455,7 @@ static void serial8250_stop_rx(struct uart_port *port) - - up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); - up->port.read_status_mask &= ~UART_LSR_DR; -- serial_port_out(port, UART_IER, up->ier); -+ serial8250_set_IER(up, up->ier); - - serial8250_rpm_put(up); - } -@@ -1463,7 +1485,7 @@ void serial8250_em485_stop_tx(struct uart_8250_port *p) - serial8250_clear_and_reinit_fifos(p); - - p->ier |= UART_IER_RLSI | UART_IER_RDI; -- serial_port_out(&p->port, UART_IER, p->ier); -+ serial8250_set_IER(p, p->ier); - } - } - EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx); -@@ -1710,7 +1732,7 @@ static void serial8250_disable_ms(struct uart_port *port) - mctrl_gpio_disable_ms(up->gpios); - - up->ier &= ~UART_IER_MSI; -- serial_port_out(port, UART_IER, up->ier); -+ serial8250_set_IER(up, up->ier); - } - - static void serial8250_enable_ms(struct uart_port *port) -@@ -1726,7 +1748,7 @@ static void serial8250_enable_ms(struct uart_port *port) - up->ier |= UART_IER_MSI; - - serial8250_rpm_get(up); -- serial_port_out(port, UART_IER, up->ier); -+ serial8250_set_IER(up, up->ier); - serial8250_rpm_put(up); - } - -@@ -2176,9 +2198,10 @@ static void serial8250_put_poll_char(struct uart_port *port, - serial8250_rpm_get(up); - /* - * First save the IER then disable the interrupts -+ * -+ * Best-effort IER access because other CPUs are quiesced. - */ -- ier = serial_port_in(port, UART_IER); -- serial8250_clear_IER(up); -+ __serial8250_clear_IER(up, NULL, &ier); - - wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); - /* -@@ -2191,7 +2214,7 @@ static void serial8250_put_poll_char(struct uart_port *port, - * and restore the IER - */ - wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); -- serial_port_out(port, UART_IER, ier); -+ __serial8250_set_IER(up, NULL, ier); - serial8250_rpm_put(up); - } - -@@ -2202,6 +2225,7 @@ int serial8250_do_startup(struct uart_port *port) - struct uart_8250_port *up = up_to_u8250p(port); - unsigned long flags; - unsigned char iir; -+ bool is_console; - int retval; - u16 lsr; - -@@ -2219,21 +2243,25 @@ int serial8250_do_startup(struct uart_port *port) - serial8250_rpm_get(up); - if (port->type == PORT_16C950) { - /* Wake up and initialize UART */ -+ spin_lock_irqsave(&port->lock, flags); - up->acr = 0; - serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); - serial_port_out(port, UART_EFR, UART_EFR_ECB); -- serial_port_out(port, UART_IER, 0); -+ serial8250_set_IER(up, 0); - serial_port_out(port, UART_LCR, 0); - serial_icr_write(up, UART_CSR, 0); /* Reset the UART */ - serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); - serial_port_out(port, UART_EFR, UART_EFR_ECB); - serial_port_out(port, UART_LCR, 0); -+ spin_unlock_irqrestore(&port->lock, flags); - } - - if (port->type == PORT_DA830) { - /* Reset the port */ -- serial_port_out(port, UART_IER, 0); -+ spin_lock_irqsave(&port->lock, flags); -+ serial8250_set_IER(up, 0); - serial_port_out(port, UART_DA830_PWREMU_MGMT, 0); -+ spin_unlock_irqrestore(&port->lock, flags); - mdelay(10); - - /* Enable Tx, Rx and free run mode */ -@@ -2331,6 +2359,8 @@ int serial8250_do_startup(struct uart_port *port) - if (retval) - goto out; - -+ is_console = serial8250_is_console(port); -+ - if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) { - unsigned char iir1; - -@@ -2347,6 +2377,9 @@ int serial8250_do_startup(struct uart_port *port) - */ - spin_lock_irqsave(&port->lock, flags); - -+ if (is_console) -+ serial8250_enter_unsafe(up); -+ - wait_for_xmitr(up, UART_LSR_THRE); - serial_port_out_sync(port, UART_IER, UART_IER_THRI); - udelay(1); /* allow THRE to set */ -@@ -2357,6 +2390,9 @@ int serial8250_do_startup(struct uart_port *port) - iir = serial_port_in(port, UART_IIR); - serial_port_out(port, UART_IER, 0); - -+ if (is_console) -+ serial8250_exit_unsafe(up); -+ - spin_unlock_irqrestore(&port->lock, flags); - - if (port->irqflags & IRQF_SHARED) -@@ -2411,10 +2447,14 @@ int serial8250_do_startup(struct uart_port *port) - * Do a quick test to see if we receive an interrupt when we enable - * the TX irq. - */ -+ if (is_console) -+ serial8250_enter_unsafe(up); - serial_port_out(port, UART_IER, UART_IER_THRI); - lsr = serial_port_in(port, UART_LSR); - iir = serial_port_in(port, UART_IIR); - serial_port_out(port, UART_IER, 0); -+ if (is_console) -+ serial8250_exit_unsafe(up); - - if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) { - if (!(up->bugs & UART_BUG_TXEN)) { -@@ -2446,7 +2486,7 @@ int serial8250_do_startup(struct uart_port *port) - if (up->dma) { - const char *msg = NULL; - -- if (uart_console(port)) -+ if (is_console) - msg = "forbid DMA for kernel console"; - else if (serial8250_request_dma(up)) - msg = "failed to request DMA"; -@@ -2497,7 +2537,7 @@ void serial8250_do_shutdown(struct uart_port *port) - */ - spin_lock_irqsave(&port->lock, flags); - up->ier = 0; -- serial_port_out(port, UART_IER, 0); -+ serial8250_set_IER(up, 0); - spin_unlock_irqrestore(&port->lock, flags); - - synchronize_irq(port->irq); -@@ -2863,7 +2903,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, - if (up->capabilities & UART_CAP_RTOIE) - up->ier |= UART_IER_RTOIE; - -- serial_port_out(port, UART_IER, up->ier); -+ serial8250_set_IER(up, up->ier); - - if (up->capabilities & UART_CAP_EFR) { - unsigned char efr = 0; -@@ -3328,12 +3368,21 @@ EXPORT_SYMBOL_GPL(serial8250_set_defaults); - - #ifdef CONFIG_SERIAL_8250_CONSOLE - --static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) -+static bool serial8250_console_putchar(struct uart_port *port, unsigned char ch, -+ struct cons_write_context *wctxt) - { - struct uart_8250_port *up = up_to_u8250p(port); - - wait_for_xmitr(up, UART_LSR_THRE); -+ if (!console_can_proceed(wctxt)) -+ return false; - serial_port_out(port, UART_TX, ch); -+ if (ch == '\n') -+ up->console_newline_needed = false; -+ else -+ up->console_newline_needed = true; -+ -+ return true; - } - - /* -@@ -3362,33 +3411,119 @@ static void serial8250_console_restore(struct uart_8250_port *up) - serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); - } - --/* -- * Print a string to the serial port using the device FIFO -- * -- * It sends fifosize bytes and then waits for the fifo -- * to get empty. -- */ --static void serial8250_console_fifo_write(struct uart_8250_port *up, -- const char *s, unsigned int count) -+static bool __serial8250_console_write(struct uart_port *port, struct cons_write_context *wctxt, -+ const char *s, unsigned int count, -+ bool (*putchar)(struct uart_port *, unsigned char, struct cons_write_context *)) - { -- int i; -- const char *end = s + count; -- unsigned int fifosize = up->tx_loadsz; -- bool cr_sent = false; -+ bool finished = false; -+ unsigned int i; - -- while (s != end) { -- wait_for_lsr(up, UART_LSR_THRE); -- -- for (i = 0; i < fifosize && s != end; ++i) { -- if (*s == '\n' && !cr_sent) { -- serial_out(up, UART_TX, '\r'); -- cr_sent = true; -- } else { -- serial_out(up, UART_TX, *s++); -- cr_sent = false; -- } -+ for (i = 0; i < count; i++, s++) { -+ if (*s == '\n') { -+ if (!putchar(port, '\r', wctxt)) -+ goto out; - } -+ if (!putchar(port, *s, wctxt)) -+ goto out; - } -+ finished = true; -+out: -+ return finished; -+} -+ -+static bool serial8250_console_write(struct uart_port *port, struct cons_write_context *wctxt, -+ const char *s, unsigned int count, -+ bool (*putchar)(struct uart_port *, unsigned char, struct cons_write_context *)) -+{ -+ return __serial8250_console_write(port, wctxt, s, count, putchar); -+} -+ -+static bool atomic_print_line(struct uart_8250_port *up, -+ struct cons_write_context *wctxt) -+{ -+ struct uart_port *port = &up->port; -+ -+ if (up->console_newline_needed && -+ !__serial8250_console_write(port, wctxt, "\n", 1, serial8250_console_putchar)) { -+ return false; -+ } -+ -+ return __serial8250_console_write(port, wctxt, wctxt->outbuf, wctxt->len, -+ serial8250_console_putchar); -+} -+ -+static void atomic_console_reacquire(struct cons_write_context *wctxt, -+ struct cons_write_context *wctxt_init) -+{ -+ memcpy(wctxt, wctxt_init, sizeof(*wctxt)); -+ while (!console_try_acquire(wctxt)) { -+ cpu_relax(); -+ memcpy(wctxt, wctxt_init, sizeof(*wctxt)); -+ } -+} -+ -+bool serial8250_console_write_atomic(struct uart_8250_port *up, -+ struct cons_write_context *wctxt) -+{ -+ struct cons_write_context wctxt_init = { }; -+ struct cons_context *ctxt_init = &ACCESS_PRIVATE(&wctxt_init, ctxt); -+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); -+ bool finished = false; -+ unsigned int ier; -+ -+ touch_nmi_watchdog(); -+ -+ /* With write_atomic, another context may hold the port->lock. */ -+ -+ ctxt_init->console = ctxt->console; -+ ctxt_init->prio = ctxt->prio; -+ ctxt_init->thread = ctxt->thread; -+ -+ /* -+ * Enter unsafe in order to disable interrupts. If the console is -+ * lost before the interrupts are disabled, bail out because another -+ * context took over the printing. If the console is lost after the -+ * interrutps are disabled, the console must be reacquired in order -+ * to re-enable the interrupts. However in that case no printing is -+ * allowed because another context took over the printing. -+ */ -+ -+ if (!console_enter_unsafe(wctxt)) -+ return false; -+ -+ if (!__serial8250_clear_IER(up, wctxt, &ier)) -+ return false; -+ -+ if (!console_exit_unsafe(wctxt)) { -+ atomic_console_reacquire(wctxt, &wctxt_init); -+ goto enable_irq; -+ } -+ -+ if (!atomic_print_line(up, wctxt)) { -+ atomic_console_reacquire(wctxt, &wctxt_init); -+ goto enable_irq; -+ } -+ -+ wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); -+ finished = true; -+enable_irq: -+ /* -+ * Enter unsafe in order to enable interrupts. If the console is -+ * lost before the interrupts are enabled, the console must be -+ * reacquired in order to re-enable the interrupts. -+ */ -+ for (;;) { -+ if (console_enter_unsafe(wctxt) && -+ __serial8250_set_IER(up, wctxt, ier)) { -+ break; -+ } -+ -+ /* HW-IRQs still disabled. Reacquire to enable them. */ -+ atomic_console_reacquire(wctxt, &wctxt_init); -+ } -+ console_exit_unsafe(wctxt); -+ -+ return finished; - } - - /* -@@ -3400,78 +3535,116 @@ static void serial8250_console_fifo_write(struct uart_8250_port *up, - * Doing runtime PM is really a bad idea for the kernel console. - * Thus, we assume the function is called when device is powered up. - */ --void serial8250_console_write(struct uart_8250_port *up, const char *s, -- unsigned int count) -+bool serial8250_console_write_thread(struct uart_8250_port *up, -+ struct cons_write_context *wctxt) - { -+ struct cons_write_context wctxt_init = { }; -+ struct cons_context *ctxt_init = &ACCESS_PRIVATE(&wctxt_init, ctxt); -+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); - struct uart_8250_em485 *em485 = up->em485; - struct uart_port *port = &up->port; -- unsigned long flags; -- unsigned int ier, use_fifo; -- int locked = 1; -+ unsigned int count = wctxt->len; -+ const char *s = wctxt->outbuf; -+ bool rs485_started = false; -+ bool finished = false; -+ unsigned int ier; - -- touch_nmi_watchdog(); -- -- if (oops_in_progress) -- locked = spin_trylock_irqsave(&port->lock, flags); -- else -- spin_lock_irqsave(&port->lock, flags); -+ ctxt_init->console = ctxt->console; -+ ctxt_init->prio = ctxt->prio; -+ ctxt_init->thread = ctxt->thread; - - /* -- * First save the IER then disable the interrupts -+ * Enter unsafe in order to disable interrupts. If the console is -+ * lost before the interrupts are disabled, bail out because another -+ * context took over the printing. If the console is lost after the -+ * interrutps are disabled, the console must be reacquired in order -+ * to re-enable the interrupts. However in that case no printing is -+ * allowed because another context took over the printing. - */ -- ier = serial_port_in(port, UART_IER); -- serial8250_clear_IER(up); -+ -+ if (!console_enter_unsafe(wctxt)) -+ return false; -+ -+ if (!__serial8250_clear_IER(up, wctxt, &ier)) -+ return false; -+ -+ if (!console_exit_unsafe(wctxt)) { -+ atomic_console_reacquire(wctxt, &wctxt_init); -+ goto enable_irq; -+ } - - /* check scratch reg to see if port powered off during system sleep */ - if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { -+ if (!console_enter_unsafe(wctxt)) { -+ atomic_console_reacquire(wctxt, &wctxt_init); -+ goto enable_irq; -+ } - serial8250_console_restore(up); -+ if (!console_exit_unsafe(wctxt)) { -+ atomic_console_reacquire(wctxt, &wctxt_init); -+ goto enable_irq; -+ } - up->canary = 0; - } - - if (em485) { -- if (em485->tx_stopped) -+ if (em485->tx_stopped) { -+ if (!console_enter_unsafe(wctxt)) { -+ atomic_console_reacquire(wctxt, &wctxt_init); -+ goto enable_irq; -+ } - up->rs485_start_tx(up); -- mdelay(port->rs485.delay_rts_before_send); -+ rs485_started = true; -+ if (!console_exit_unsafe(wctxt)) { -+ atomic_console_reacquire(wctxt, &wctxt_init); -+ goto enable_irq; -+ } -+ } -+ if (port->rs485.delay_rts_before_send) { -+ mdelay(port->rs485.delay_rts_before_send); -+ if (!console_can_proceed(wctxt)) { -+ atomic_console_reacquire(wctxt, &wctxt_init); -+ goto enable_irq; -+ } -+ } - } - -- use_fifo = (up->capabilities & UART_CAP_FIFO) && -- /* -- * BCM283x requires to check the fifo -- * after each byte. -- */ -- !(up->capabilities & UART_CAP_MINI) && -- /* -- * tx_loadsz contains the transmit fifo size -- */ -- up->tx_loadsz > 1 && -- (up->fcr & UART_FCR_ENABLE_FIFO) && -- port->state && -- test_bit(TTY_PORT_INITIALIZED, &port->state->port.iflags) && -- /* -- * After we put a data in the fifo, the controller will send -- * it regardless of the CTS state. Therefore, only use fifo -- * if we don't use control flow. -- */ -- !(up->port.flags & UPF_CONS_FLOW); -+ if (!serial8250_console_write(port, wctxt, s, count, serial8250_console_putchar)) { -+ atomic_console_reacquire(wctxt, &wctxt_init); -+ goto enable_irq; -+ } - -- if (likely(use_fifo)) -- serial8250_console_fifo_write(up, s, count); -- else -- uart_console_write(port, s, count, serial8250_console_putchar); -- -- /* -- * Finally, wait for transmitter to become empty -- * and restore the IER -- */ - wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); -- -+ finished = true; -+enable_irq: -+ /* -+ * Enter unsafe in order to stop rs485_tx. If the console is -+ * lost before the rs485_tx is stopped, the console must be -+ * reacquired in order to stop rs485_tx. -+ */ - if (em485) { - mdelay(port->rs485.delay_rts_after_send); -- if (em485->tx_stopped) -+ if (em485->tx_stopped && rs485_started) { -+ while (!console_enter_unsafe(wctxt)) -+ atomic_console_reacquire(wctxt, &wctxt_init); - up->rs485_stop_tx(up); -+ if (!console_exit_unsafe(wctxt)) -+ atomic_console_reacquire(wctxt, &wctxt_init); -+ } - } - -- serial_port_out(port, UART_IER, ier); -+ /* -+ * Enter unsafe in order to enable interrupts. If the console is -+ * lost before the interrupts are enabled, the console must be -+ * reacquired in order to re-enable the interrupts. -+ */ -+ for (;;) { -+ if (console_enter_unsafe(wctxt) && -+ __serial8250_set_IER(up, wctxt, ier)) { -+ break; -+ } -+ atomic_console_reacquire(wctxt, &wctxt_init); -+ } - - /* - * The receive handling will happen properly because the -@@ -3483,8 +3656,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, - if (up->msr_saved_flags) - serial8250_modem_status(up); - -- if (locked) -- spin_unlock_irqrestore(&port->lock, flags); -+ console_exit_unsafe(wctxt); -+ -+ return finished; - } - - static unsigned int probe_baud(struct uart_port *port) -@@ -3504,6 +3678,7 @@ static unsigned int probe_baud(struct uart_port *port) - - int serial8250_console_setup(struct uart_port *port, char *options, bool probe) - { -+ struct uart_8250_port *up = up_to_u8250p(port); - int baud = 9600; - int bits = 8; - int parity = 'n'; -@@ -3513,6 +3688,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe) - if (!port->iobase && !port->membase) - return -ENODEV; - -+ up->console_newline_needed = false; -+ - if (options) - uart_parse_options(options, &baud, &parity, &bits, &flow); - else if (probe) -diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig -index 5313aa31930f4..16715f01bdb5a 100644 ---- a/drivers/tty/serial/8250/Kconfig -+++ b/drivers/tty/serial/8250/Kconfig -@@ -9,6 +9,7 @@ config SERIAL_8250 - depends on !S390 - select SERIAL_CORE - select SERIAL_MCTRL_GPIO if GPIOLIB -+ select HAVE_ATOMIC_CONSOLE - help - This selects whether you want to include the driver for the standard - serial ports. The standard answer is Y. People who might say N -diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c -index d8c2f3455eeba..a4e142ac6ec46 100644 ---- a/drivers/tty/serial/amba-pl011.c -+++ b/drivers/tty/serial/amba-pl011.c -@@ -2319,18 +2319,24 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) - { - struct uart_amba_port *uap = amba_ports[co->index]; - unsigned int old_cr = 0, new_cr; -- unsigned long flags; -+ unsigned long flags = 0; - int locked = 1; - - clk_enable(uap->clk); - -- local_irq_save(flags); -+ /* -+ * local_irq_save(flags); -+ * -+ * This local_irq_save() is nonsense. If we come in via sysrq -+ * handling then interrupts are already disabled. Aside of -+ * that the port.sysrq check is racy on SMP regardless. -+ */ - if (uap->port.sysrq) - locked = 0; - else if (oops_in_progress) -- locked = spin_trylock(&uap->port.lock); -+ locked = spin_trylock_irqsave(&uap->port.lock, flags); - else -- spin_lock(&uap->port.lock); -+ spin_lock_irqsave(&uap->port.lock, flags); - - /* - * First save the CR then disable the interrupts -@@ -2356,8 +2362,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) - pl011_write(old_cr, uap, REG_CR); - - if (locked) -- spin_unlock(&uap->port.lock); -- local_irq_restore(flags); -+ spin_unlock_irqrestore(&uap->port.lock, flags); - - clk_disable(uap->clk); - } -diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c -index 82d35dbbfa6cb..511cf17d87958 100644 ---- a/drivers/tty/serial/omap-serial.c -+++ b/drivers/tty/serial/omap-serial.c -@@ -1219,13 +1219,10 @@ serial_omap_console_write(struct console *co, const char *s, - unsigned int ier; - int locked = 1; - -- local_irq_save(flags); -- if (up->port.sysrq) -- locked = 0; -- else if (oops_in_progress) -- locked = spin_trylock(&up->port.lock); -+ if (up->port.sysrq || oops_in_progress) -+ locked = spin_trylock_irqsave(&up->port.lock, flags); - else -- spin_lock(&up->port.lock); -+ spin_lock_irqsave(&up->port.lock, flags); - - /* - * First save the IER then disable the interrupts -@@ -1252,8 +1249,7 @@ serial_omap_console_write(struct console *co, const char *s, - check_modem_status(up); - - if (locked) -- spin_unlock(&up->port.lock); -- local_irq_restore(flags); -+ spin_unlock_irqrestore(&up->port.lock, flags); - } - - static int __init -diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c -index 728cb72be0666..409892b777d16 100644 ---- a/drivers/tty/serial/serial_core.c -+++ b/drivers/tty/serial/serial_core.c -@@ -2336,8 +2336,11 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport) - * able to Re-start_rx later. - */ - if (!console_suspend_enabled && uart_console(uport)) { -- if (uport->ops->start_rx) -+ if (uport->ops->start_rx) { -+ spin_lock_irq(&uport->lock); - uport->ops->stop_rx(uport); -+ spin_unlock_irq(&uport->lock); -+ } - goto unlock; - } - -@@ -2430,8 +2433,11 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport) - if (console_suspend_enabled) - uart_change_pm(state, UART_PM_STATE_ON); - uport->ops->set_termios(uport, &termios, NULL); -- if (!console_suspend_enabled && uport->ops->start_rx) -+ if (!console_suspend_enabled && uport->ops->start_rx) { -+ spin_lock_irq(&uport->lock); - uport->ops->start_rx(uport); -+ spin_unlock_irq(&uport->lock); -+ } - if (console_suspend_enabled) - console_start(uport->cons); - } -diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c -index 8e3de07f103da..af933c7a61d98 100644 ---- a/drivers/tty/tty_io.c -+++ b/drivers/tty/tty_io.c -@@ -3543,8 +3543,13 @@ static ssize_t show_cons_active(struct device *dev, - for_each_console(c) { - if (!c->device) - continue; -- if (!c->write) -- continue; -+ if (c->flags & CON_NO_BKL) { -+ if (!(c->write_thread || c->write_atomic)) -+ continue; -+ } else { -+ if (!c->write) -+ continue; -+ } - if ((c->flags & CON_ENABLED) == 0) - continue; - cs[i++] = c; -diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c -index e0758fe7936dc..ab9f42d478c8c 100644 ---- a/fs/proc/consoles.c -+++ b/fs/proc/consoles.c -@@ -21,12 +21,14 @@ static int show_console_dev(struct seq_file *m, void *v) - { CON_ENABLED, 'E' }, - { CON_CONSDEV, 'C' }, - { CON_BOOT, 'B' }, -+ { CON_NO_BKL, 'N' }, - { CON_PRINTBUFFER, 'p' }, - { CON_BRL, 'b' }, - { CON_ANYTIME, 'a' }, - }; - char flags[ARRAY_SIZE(con_flags) + 1]; - struct console *con = v; -+ char con_write = '-'; - unsigned int a; - dev_t dev = 0; - -@@ -57,9 +59,15 @@ static int show_console_dev(struct seq_file *m, void *v) - seq_setwidth(m, 21 - 1); - seq_printf(m, "%s%d", con->name, con->index); - seq_pad(m, ' '); -- seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', -- con->write ? 'W' : '-', con->unblank ? 'U' : '-', -- flags); -+ if (con->flags & CON_NO_BKL) { -+ if (con->write_thread || con->write_atomic) -+ con_write = 'W'; -+ } else { -+ if (con->write) -+ con_write = 'W'; -+ } -+ seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', con_write, -+ con->unblank ? 'U' : '-', flags); - if (dev) - seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev)); - -diff --git a/include/linux/console.h b/include/linux/console.h -index d3195664baa5a..1e9d5bc8fa76e 100644 ---- a/include/linux/console.h -+++ b/include/linux/console.h -@@ -16,7 +16,9 @@ - - #include - #include -+#include - #include -+#include - #include - - struct vc_data; -@@ -154,6 +156,10 @@ static inline int con_debug_leave(void) - * receiving the printk spam for obvious reasons. - * @CON_EXTENDED: The console supports the extended output format of - * /dev/kmesg which requires a larger output buffer. -+ * @CON_SUSPENDED: Indicates if a console is suspended. If true, the -+ * printing callbacks must not be called. -+ * @CON_NO_BKL: Console can operate outside of the BKL style console_lock -+ * constraints. - */ - enum cons_flags { - CON_PRINTBUFFER = BIT(0), -@@ -163,8 +169,133 @@ enum cons_flags { - CON_ANYTIME = BIT(4), - CON_BRL = BIT(5), - CON_EXTENDED = BIT(6), -+ CON_SUSPENDED = BIT(7), -+ CON_NO_BKL = BIT(8), - }; - -+/** -+ * struct cons_state - console state for NOBKL consoles -+ * @atom: Compound of the state fields for atomic operations -+ * @seq: Sequence for record tracking (64bit only) -+ * @bits: Compound of the state bits below -+ * -+ * @locked: Console is locked by a writer -+ * @unsafe: Console is busy in a non takeover region -+ * @thread: Current owner is the printk thread -+ * @cur_prio: The priority of the current output -+ * @req_prio: The priority of a handover request -+ * @cpu: The CPU on which the writer runs -+ * -+ * To be used for state read and preparation of atomic_long_cmpxchg() -+ * operations. -+ * -+ * The @req_prio field is particularly important to allow spin-waiting to -+ * timeout and give up without the risk of it being assigned the lock -+ * after giving up. The @req_prio field has a nice side-effect that it -+ * also makes it possible for a single read+cmpxchg in the common case of -+ * acquire and release. -+ */ -+struct cons_state { -+ union { -+ unsigned long atom; -+ struct { -+#ifdef CONFIG_64BIT -+ u32 seq; -+#endif -+ union { -+ u32 bits; -+ struct { -+ u32 locked : 1; -+ u32 unsafe : 1; -+ u32 thread : 1; -+ u32 cur_prio : 2; -+ u32 req_prio : 2; -+ u32 cpu : 18; -+ }; -+ }; -+ }; -+ }; -+}; -+ -+/** -+ * cons_prio - console writer priority for NOBKL consoles -+ * @CONS_PRIO_NONE: Unused -+ * @CONS_PRIO_NORMAL: Regular printk -+ * @CONS_PRIO_EMERGENCY: Emergency output (WARN/OOPS...) -+ * @CONS_PRIO_PANIC: Panic output -+ * @CONS_PRIO_MAX: The number of priority levels -+ * -+ * Emergency output can carefully takeover the console even without consent -+ * of the owner, ideally only when @cons_state::unsafe is not set. Panic -+ * output can ignore the unsafe flag as a last resort. If panic output is -+ * active no takeover is possible until the panic output releases the -+ * console. -+ */ -+enum cons_prio { -+ CONS_PRIO_NONE = 0, -+ CONS_PRIO_NORMAL, -+ CONS_PRIO_EMERGENCY, -+ CONS_PRIO_PANIC, -+ CONS_PRIO_MAX, -+}; -+ -+struct console; -+struct printk_buffers; -+ -+/** -+ * struct cons_context - Context for console acquire/release -+ * @console: The associated console -+ * @state: The state at acquire time -+ * @old_state: The old state when try_acquire() failed for analysis -+ * by the caller -+ * @hov_state: The handover state for spin and cleanup -+ * @req_state: The request state for spin and cleanup -+ * @spinwait_max_us: Limit for spinwait acquire -+ * @oldseq: The sequence number at acquire() -+ * @newseq: The sequence number for progress -+ * @prio: Priority of the context -+ * @pbufs: Pointer to the text buffer for this context -+ * @dropped: Dropped counter for the current context -+ * @thread: The acquire is printk thread context -+ * @hostile: Hostile takeover requested. Cleared on normal -+ * acquire or friendly handover -+ * @spinwait: Spinwait on acquire if possible -+ * @backlog: Ringbuffer has pending records -+ */ -+struct cons_context { -+ struct console *console; -+ struct cons_state state; -+ struct cons_state old_state; -+ struct cons_state hov_state; -+ struct cons_state req_state; -+ u64 oldseq; -+ u64 newseq; -+ unsigned int spinwait_max_us; -+ enum cons_prio prio; -+ struct printk_buffers *pbufs; -+ unsigned long dropped; -+ unsigned int thread : 1; -+ unsigned int hostile : 1; -+ unsigned int spinwait : 1; -+ unsigned int backlog : 1; -+}; -+ -+/** -+ * struct cons_write_context - Context handed to the write callbacks -+ * @ctxt: The core console context -+ * @outbuf: Pointer to the text buffer for output -+ * @len: Length to write -+ * @unsafe: Invoked in unsafe state due to force takeover -+ */ -+struct cons_write_context { -+ struct cons_context __private ctxt; -+ char *outbuf; -+ unsigned int len; -+ bool unsafe; -+}; -+ -+struct cons_context_data; -+ - /** - * struct console - The console descriptor structure - * @name: The name of the console driver -@@ -184,6 +315,18 @@ enum cons_flags { - * @dropped: Number of unreported dropped ringbuffer records - * @data: Driver private data - * @node: hlist node for the console list -+ * -+ * @atomic_state: State array for NOBKL consoles; real and handover -+ * @atomic_seq: Sequence for record tracking (32bit only) -+ * @thread_pbufs: Pointer to thread private buffer -+ * @kthread: Pointer to kernel thread -+ * @rcuwait: RCU wait for the kernel thread -+ * @irq_work: IRQ work for thread wakeup -+ * @kthread_waiting: Indicator whether the kthread is waiting to be woken -+ * @write_atomic: Write callback for atomic context -+ * @write_thread: Write callback for printk threaded printing -+ * @port_lock: Callback to lock/unlock the port lock -+ * @pcpu_data: Pointer to percpu context data - */ - struct console { - char name[16]; -@@ -203,6 +346,23 @@ struct console { - unsigned long dropped; - void *data; - struct hlist_node node; -+ -+ /* NOBKL console specific members */ -+ atomic_long_t __private atomic_state[2]; -+#ifndef CONFIG_64BIT -+ atomic_t __private atomic_seq; -+#endif -+ struct printk_buffers *thread_pbufs; -+ struct task_struct *kthread; -+ struct rcuwait rcuwait; -+ struct irq_work irq_work; -+ atomic_t kthread_waiting; -+ -+ bool (*write_atomic)(struct console *con, struct cons_write_context *wctxt); -+ bool (*write_thread)(struct console *con, struct cons_write_context *wctxt); -+ void (*port_lock)(struct console *con, bool do_lock, unsigned long *flags); -+ -+ struct cons_context_data __percpu *pcpu_data; - }; - - #ifdef CONFIG_LOCKDEP -@@ -329,6 +489,24 @@ static inline bool console_is_registered(const struct console *con) - lockdep_assert_console_list_lock_held(); \ - hlist_for_each_entry(con, &console_list, node) - -+#ifdef CONFIG_PRINTK -+extern enum cons_prio cons_atomic_enter(enum cons_prio prio); -+extern void cons_atomic_exit(enum cons_prio prio, enum cons_prio prev_prio); -+extern bool console_can_proceed(struct cons_write_context *wctxt); -+extern bool console_enter_unsafe(struct cons_write_context *wctxt); -+extern bool console_exit_unsafe(struct cons_write_context *wctxt); -+extern bool console_try_acquire(struct cons_write_context *wctxt); -+extern bool console_release(struct cons_write_context *wctxt); -+#else -+static inline enum cons_prio cons_atomic_enter(enum cons_prio prio) { return CONS_PRIO_NONE; } -+static inline void cons_atomic_exit(enum cons_prio prio, enum cons_prio prev_prio) { } -+static inline bool console_can_proceed(struct cons_write_context *wctxt) { return false; } -+static inline bool console_enter_unsafe(struct cons_write_context *wctxt) { return false; } -+static inline bool console_exit_unsafe(struct cons_write_context *wctxt) { return false; } -+static inline bool console_try_acquire(struct cons_write_context *wctxt) { return false; } -+static inline bool console_release(struct cons_write_context *wctxt) { return false; } -+#endif -+ - extern int console_set_on_cmdline; - extern struct console *early_console; - -diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h -index d95ab85f96ba5..3dc3704a3cdbb 100644 ---- a/include/linux/entry-common.h -+++ b/include/linux/entry-common.h -@@ -57,9 +57,15 @@ - # define ARCH_EXIT_TO_USER_MODE_WORK (0) - #endif - -+#ifdef CONFIG_PREEMPT_LAZY -+# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) -+#else -+# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED) -+#endif -+ - #define EXIT_TO_USER_MODE_WORK \ - (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ -- _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ -+ _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ - ARCH_EXIT_TO_USER_MODE_WORK) - - /** -diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h -index a92bce40b04b3..bf82980f569df 100644 ---- a/include/linux/interrupt.h -+++ b/include/linux/interrupt.h -@@ -605,6 +605,35 @@ extern void __raise_softirq_irqoff(unsigned int nr); - extern void raise_softirq_irqoff(unsigned int nr); - extern void raise_softirq(unsigned int nr); - -+#ifdef CONFIG_PREEMPT_RT -+DECLARE_PER_CPU(struct task_struct *, timersd); -+DECLARE_PER_CPU(unsigned long, pending_timer_softirq); -+ -+extern void raise_timer_softirq(void); -+extern void raise_hrtimer_softirq(void); -+ -+static inline unsigned int local_pending_timers(void) -+{ -+ return __this_cpu_read(pending_timer_softirq); -+} -+ -+#else -+static inline void raise_timer_softirq(void) -+{ -+ raise_softirq(TIMER_SOFTIRQ); -+} -+ -+static inline void raise_hrtimer_softirq(void) -+{ -+ raise_softirq_irqoff(HRTIMER_SOFTIRQ); -+} -+ -+static inline unsigned int local_pending_timers(void) -+{ -+ return local_softirq_pending(); -+} -+#endif -+ - DECLARE_PER_CPU(struct task_struct *, ksoftirqd); - - static inline struct task_struct *this_cpu_ksoftirqd(void) -diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h -index 09d4f17c8d3b6..7376c1df9c901 100644 ---- a/include/linux/io-mapping.h -+++ b/include/linux/io-mapping.h -@@ -69,7 +69,10 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, - - BUG_ON(offset >= mapping->size); - phys_addr = mapping->base + offset; -- preempt_disable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ preempt_disable(); -+ else -+ migrate_disable(); - pagefault_disable(); - return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); - } -@@ -79,7 +82,10 @@ io_mapping_unmap_atomic(void __iomem *vaddr) - { - kunmap_local_indexed((void __force *)vaddr); - pagefault_enable(); -- preempt_enable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ preempt_enable(); -+ else -+ migrate_enable(); - } - - static inline void __iomem * -@@ -162,7 +168,10 @@ static inline void __iomem * - io_mapping_map_atomic_wc(struct io_mapping *mapping, - unsigned long offset) - { -- preempt_disable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ preempt_disable(); -+ else -+ migrate_disable(); - pagefault_disable(); - return io_mapping_map_wc(mapping, offset, PAGE_SIZE); - } -@@ -172,7 +181,10 @@ io_mapping_unmap_atomic(void __iomem *vaddr) - { - io_mapping_unmap(vaddr); - pagefault_enable(); -- preempt_enable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ preempt_enable(); -+ else -+ migrate_enable(); - } - - static inline void __iomem * -diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h -index c35f04f636f15..0c014424b1312 100644 ---- a/include/linux/netdevice.h -+++ b/include/linux/netdevice.h -@@ -3202,7 +3202,11 @@ struct softnet_data { - int defer_count; - int defer_ipi_scheduled; - struct sk_buff *defer_list; -+#ifndef CONFIG_PREEMPT_RT - call_single_data_t defer_csd; -+#else -+ struct work_struct defer_work; -+#endif - }; - - static inline void input_queue_head_incr(struct softnet_data *sd) -diff --git a/include/linux/preempt.h b/include/linux/preempt.h -index 0df425bf9bd75..05338f00a5907 100644 ---- a/include/linux/preempt.h -+++ b/include/linux/preempt.h -@@ -196,6 +196,20 @@ extern void preempt_count_sub(int val); - #define preempt_count_inc() preempt_count_add(1) - #define preempt_count_dec() preempt_count_sub(1) - -+#ifdef CONFIG_PREEMPT_LAZY -+#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0) -+#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0) -+#define inc_preempt_lazy_count() add_preempt_lazy_count(1) -+#define dec_preempt_lazy_count() sub_preempt_lazy_count(1) -+#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count) -+#else -+#define add_preempt_lazy_count(val) do { } while (0) -+#define sub_preempt_lazy_count(val) do { } while (0) -+#define inc_preempt_lazy_count() do { } while (0) -+#define dec_preempt_lazy_count() do { } while (0) -+#define preempt_lazy_count() (0) -+#endif -+ - #ifdef CONFIG_PREEMPT_COUNT - - #define preempt_disable() \ -@@ -204,6 +218,12 @@ do { \ - barrier(); \ - } while (0) - -+#define preempt_lazy_disable() \ -+do { \ -+ inc_preempt_lazy_count(); \ -+ barrier(); \ -+} while (0) -+ - #define sched_preempt_enable_no_resched() \ - do { \ - barrier(); \ -@@ -218,15 +238,21 @@ do { \ - #define preempt_enable() \ - do { \ - barrier(); \ -- if (unlikely(preempt_count_dec_and_test())) \ -+ if (unlikely(preempt_count_dec_and_test())) { \ -+ instrumentation_begin(); \ - __preempt_schedule(); \ -+ instrumentation_end(); \ -+ } \ - } while (0) - - #define preempt_enable_notrace() \ - do { \ - barrier(); \ -- if (unlikely(__preempt_count_dec_and_test())) \ -+ if (unlikely(__preempt_count_dec_and_test())) { \ -+ instrumentation_begin(); \ - __preempt_schedule_notrace(); \ -+ instrumentation_end(); \ -+ } \ - } while (0) - - #define preempt_check_resched() \ -@@ -235,6 +261,18 @@ do { \ - __preempt_schedule(); \ - } while (0) - -+/* -+ * open code preempt_check_resched() because it is not exported to modules and -+ * used by local_unlock() or bpf_enable_instrumentation(). -+ */ -+#define preempt_lazy_enable() \ -+do { \ -+ dec_preempt_lazy_count(); \ -+ barrier(); \ -+ if (should_resched(0)) \ -+ __preempt_schedule(); \ -+} while (0) -+ - #else /* !CONFIG_PREEMPTION */ - #define preempt_enable() \ - do { \ -@@ -242,6 +280,12 @@ do { \ - preempt_count_dec(); \ - } while (0) - -+#define preempt_lazy_enable() \ -+do { \ -+ dec_preempt_lazy_count(); \ -+ barrier(); \ -+} while (0) -+ - #define preempt_enable_notrace() \ - do { \ - barrier(); \ -@@ -282,6 +326,9 @@ do { \ - #define preempt_enable_notrace() barrier() - #define preemptible() 0 - -+#define preempt_lazy_disable() barrier() -+#define preempt_lazy_enable() barrier() -+ - #endif /* CONFIG_PREEMPT_COUNT */ - - #ifdef MODULE -@@ -300,7 +347,7 @@ do { \ - } while (0) - #define preempt_fold_need_resched() \ - do { \ -- if (tif_need_resched()) \ -+ if (tif_need_resched_now()) \ - set_preempt_need_resched(); \ - } while (0) - -@@ -416,8 +463,15 @@ extern void migrate_enable(void); - - #else - --static inline void migrate_disable(void) { } --static inline void migrate_enable(void) { } -+static inline void migrate_disable(void) -+{ -+ preempt_lazy_disable(); -+} -+ -+static inline void migrate_enable(void) -+{ -+ preempt_lazy_enable(); -+} - - #endif /* CONFIG_SMP */ - -diff --git a/include/linux/printk.h b/include/linux/printk.h -index 8ef499ab3c1ed..b55662624ff87 100644 ---- a/include/linux/printk.h -+++ b/include/linux/printk.h -@@ -139,6 +139,7 @@ void early_printk(const char *s, ...) { } - #endif - - struct dev_printk_info; -+struct cons_write_context; - - #ifdef CONFIG_PRINTK - asmlinkage __printf(4, 0) -@@ -157,15 +158,17 @@ int _printk(const char *fmt, ...); - */ - __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...); - --extern void __printk_safe_enter(void); --extern void __printk_safe_exit(void); -+extern void __printk_safe_enter(unsigned long *flags); -+extern void __printk_safe_exit(unsigned long *flags); -+extern void __printk_deferred_enter(void); -+extern void __printk_deferred_exit(void); - /* - * The printk_deferred_enter/exit macros are available only as a hack for - * some code paths that need to defer all printk console printing. Interrupts - * must be disabled for the deferred duration. - */ --#define printk_deferred_enter __printk_safe_enter --#define printk_deferred_exit __printk_safe_exit -+#define printk_deferred_enter() __printk_deferred_enter() -+#define printk_deferred_exit() __printk_deferred_exit() - - /* - * Please don't use printk_ratelimit(), because it shares ratelimiting state -@@ -192,6 +195,8 @@ void show_regs_print_info(const char *log_lvl); - extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; - extern asmlinkage void dump_stack(void) __cold; - void printk_trigger_flush(void); -+extern void cons_atomic_flush(struct cons_write_context *printk_caller_wctxt, -+ bool skip_unsafe); - #else - static inline __printf(1, 0) - int vprintk(const char *s, va_list args) -@@ -271,6 +276,12 @@ static inline void dump_stack(void) - static inline void printk_trigger_flush(void) - { - } -+ -+static inline void cons_atomic_flush(struct cons_write_context *printk_caller_wctxt, -+ bool skip_unsafe) -+{ -+} -+ - #endif - - #ifdef CONFIG_SMP -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 63d242164b1a9..593d7e7c2e4c7 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -303,6 +303,11 @@ extern long schedule_timeout_idle(long timeout); - asmlinkage void schedule(void); - extern void schedule_preempt_disabled(void); - asmlinkage void preempt_schedule_irq(void); -+ -+extern void sched_submit_work(void); -+extern void sched_resume_work(void); -+extern void schedule_rtmutex(void); -+ - #ifdef CONFIG_PREEMPT_RT - extern void schedule_rtlock(void); - #endif -@@ -2059,6 +2064,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) - return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); - } - -+#ifdef CONFIG_PREEMPT_LAZY -+static inline void set_tsk_need_resched_lazy(struct task_struct *tsk) -+{ -+ set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); -+} -+ -+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) -+{ -+ clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); -+} -+ -+static inline int test_tsk_need_resched_lazy(struct task_struct *tsk) -+{ -+ return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY)); -+} -+ -+static inline int need_resched_lazy(void) -+{ -+ return test_thread_flag(TIF_NEED_RESCHED_LAZY); -+} -+ -+static inline int need_resched_now(void) -+{ -+ return test_thread_flag(TIF_NEED_RESCHED); -+} -+ -+#else -+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { } -+static inline int need_resched_lazy(void) { return 0; } -+ -+static inline int need_resched_now(void) -+{ -+ return test_thread_flag(TIF_NEED_RESCHED); -+} -+ -+#endif -+ - /* - * cond_resched() and cond_resched_lock(): latency reduction via - * explicit rescheduling in places that are safe. The return -diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h -index 20099268fa257..b38ce53576000 100644 ---- a/include/linux/sched/signal.h -+++ b/include/linux/sched/signal.h -@@ -22,6 +22,7 @@ struct sighand_struct { - refcount_t count; - wait_queue_head_t signalfd_wqh; - struct k_sigaction action[_NSIG]; -+ struct sigqueue *sigqueue_cache; - }; - - /* -@@ -135,7 +136,7 @@ struct signal_struct { - #ifdef CONFIG_POSIX_TIMERS - - /* POSIX.1b Interval Timers */ -- int posix_timer_id; -+ unsigned int next_posix_timer_id; - struct list_head posix_timers; - - /* ITIMER_REAL timer for the process */ -@@ -349,6 +350,7 @@ extern int send_sig(int, struct task_struct *, int); - extern int zap_other_threads(struct task_struct *p); - extern struct sigqueue *sigqueue_alloc(void); - extern void sigqueue_free(struct sigqueue *); -+extern void sigqueue_free_cached_entry(struct sigqueue *q); - extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type); - extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); - -diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h -index 19376bee96676..9055a22992edc 100644 ---- a/include/linux/serial_8250.h -+++ b/include/linux/serial_8250.h -@@ -125,6 +125,8 @@ struct uart_8250_port { - #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA - unsigned char msr_saved_flags; - -+ bool console_newline_needed; -+ - struct uart_8250_dma *dma; - const struct uart_8250_ops *ops; - -@@ -139,6 +141,9 @@ struct uart_8250_port { - /* Serial port overrun backoff */ - struct delayed_work overrun_backoff; - u32 overrun_backoff_time_ms; -+ -+ struct cons_write_context wctxt; -+ int cookie; - }; - - static inline struct uart_8250_port *up_to_u8250p(struct uart_port *up) -@@ -178,8 +183,10 @@ void serial8250_tx_chars(struct uart_8250_port *up); - unsigned int serial8250_modem_status(struct uart_8250_port *up); - void serial8250_init_port(struct uart_8250_port *up); - void serial8250_set_defaults(struct uart_8250_port *up); --void serial8250_console_write(struct uart_8250_port *up, const char *s, -- unsigned int count); -+bool serial8250_console_write_atomic(struct uart_8250_port *up, -+ struct cons_write_context *wctxt); -+bool serial8250_console_write_thread(struct uart_8250_port *up, -+ struct cons_write_context *wctxt); - int serial8250_console_setup(struct uart_port *port, char *options, bool probe); - int serial8250_console_exit(struct uart_port *port); - -diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h -index c02646884fa83..c1acb81b81948 100644 ---- a/include/linux/thread_info.h -+++ b/include/linux/thread_info.h -@@ -178,6 +178,26 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti - #endif /* !CONFIG_GENERIC_ENTRY */ - - #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H -+# ifdef CONFIG_PREEMPT_LAZY -+ -+static __always_inline bool tif_need_resched(void) -+{ -+ return read_thread_flags() & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY); -+} -+ -+static __always_inline bool tif_need_resched_now(void) -+{ -+ return arch_test_bit(TIF_NEED_RESCHED, -+ (unsigned long *)(¤t_thread_info()->flags)); -+} -+ -+static __always_inline bool tif_need_resched_lazy(void) -+{ -+ return arch_test_bit(TIF_NEED_RESCHED_LAZY, -+ (unsigned long *)(¤t_thread_info()->flags)); -+} -+ -+# else /* !CONFIG_PREEMPT_LAZY */ - - static __always_inline bool tif_need_resched(void) - { -@@ -185,7 +205,38 @@ static __always_inline bool tif_need_resched(void) - (unsigned long *)(¤t_thread_info()->flags)); - } - --#else -+static __always_inline bool tif_need_resched_now(void) -+{ -+ return tif_need_resched(); -+} -+ -+static __always_inline bool tif_need_resched_lazy(void) -+{ -+ return false; -+} -+ -+# endif /* CONFIG_PREEMPT_LAZY */ -+#else /* !_ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ -+# ifdef CONFIG_PREEMPT_LAZY -+ -+static __always_inline bool tif_need_resched(void) -+{ -+ return read_thread_flags() & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY); -+} -+ -+static __always_inline bool tif_need_resched_now(void) -+{ -+ return test_bit(TIF_NEED_RESCHED, -+ (unsigned long *)(¤t_thread_info()->flags)); -+} -+ -+static __always_inline bool tif_need_resched_lazy(void) -+{ -+ return test_bit(TIF_NEED_RESCHED_LAZY, -+ (unsigned long *)(¤t_thread_info()->flags)); -+} -+ -+# else /* !CONFIG_PREEMPT_LAZY */ - - static __always_inline bool tif_need_resched(void) - { -@@ -193,6 +244,17 @@ static __always_inline bool tif_need_resched(void) - (unsigned long *)(¤t_thread_info()->flags)); - } - -+static __always_inline bool tif_need_resched_now(void) -+{ -+ return tif_need_resched(); -+} -+ -+static __always_inline bool tif_need_resched_lazy(void) -+{ -+ return false; -+} -+ -+# endif /* !CONFIG_PREEMPT_LAZY */ - #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ - - #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES -diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h -index 0e373222a6df8..47017fcf5481f 100644 ---- a/include/linux/trace_events.h -+++ b/include/linux/trace_events.h -@@ -70,6 +70,7 @@ struct trace_entry { - unsigned char flags; - unsigned char preempt_count; - int pid; -+ unsigned char preempt_lazy_count; - }; - - #define TRACE_EVENT_TYPE_MAX \ -@@ -158,9 +159,10 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry, - unsigned int trace_ctx) - { - entry->preempt_count = trace_ctx & 0xff; -+ entry->preempt_lazy_count = (trace_ctx >> 16) & 0xff; - entry->pid = current->pid; - entry->type = type; -- entry->flags = trace_ctx >> 16; -+ entry->flags = trace_ctx >> 24; - } - - unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); -@@ -171,7 +173,13 @@ enum trace_flag_type { - TRACE_FLAG_NEED_RESCHED = 0x04, - TRACE_FLAG_HARDIRQ = 0x08, - TRACE_FLAG_SOFTIRQ = 0x10, -+#ifdef CONFIG_PREEMPT_LAZY -+ TRACE_FLAG_PREEMPT_RESCHED = 0x00, -+ TRACE_FLAG_NEED_RESCHED_LAZY = 0x20, -+#else -+ TRACE_FLAG_NEED_RESCHED_LAZY = 0x00, - TRACE_FLAG_PREEMPT_RESCHED = 0x20, -+#endif - TRACE_FLAG_NMI = 0x40, - TRACE_FLAG_BH_OFF = 0x80, - }; -diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h -index 3e8619c72f774..b4bc2828fa09f 100644 ---- a/include/trace/events/timer.h -+++ b/include/trace/events/timer.h -@@ -158,7 +158,11 @@ DEFINE_EVENT(timer_class, timer_cancel, - { HRTIMER_MODE_ABS_SOFT, "ABS|SOFT" }, \ - { HRTIMER_MODE_REL_SOFT, "REL|SOFT" }, \ - { HRTIMER_MODE_ABS_PINNED_SOFT, "ABS|PINNED|SOFT" }, \ -- { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }) -+ { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }, \ -+ { HRTIMER_MODE_ABS_HARD, "ABS|HARD" }, \ -+ { HRTIMER_MODE_REL_HARD, "REL|HARD" }, \ -+ { HRTIMER_MODE_ABS_PINNED_HARD, "ABS|PINNED|HARD" }, \ -+ { HRTIMER_MODE_REL_PINNED_HARD, "REL|PINNED|HARD" }) - - /** - * hrtimer_init - called when the hrtimer is initialized -diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt -index c2f1fd95a8214..260c08efeb486 100644 ---- a/kernel/Kconfig.preempt -+++ b/kernel/Kconfig.preempt -@@ -1,5 +1,11 @@ - # SPDX-License-Identifier: GPL-2.0-only - -+config HAVE_PREEMPT_LAZY -+ bool -+ -+config PREEMPT_LAZY -+ def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT -+ - config PREEMPT_NONE_BUILD - bool - -diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c -index 5c7e9ba7cd6b2..e9139dfc1f0a8 100644 ---- a/kernel/debug/kdb/kdb_io.c -+++ b/kernel/debug/kdb/kdb_io.c -@@ -576,6 +576,8 @@ static void kdb_msg_write(const char *msg, int msg_len) - continue; - if (c == dbg_io_ops->cons) - continue; -+ if (!c->write) -+ continue; - /* - * Set oops_in_progress to encourage the console drivers to - * disregard their internal spin locks: in the current calling -diff --git a/kernel/entry/common.c b/kernel/entry/common.c -index be61332c66b54..c6301e520d47b 100644 ---- a/kernel/entry/common.c -+++ b/kernel/entry/common.c -@@ -155,7 +155,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, - - local_irq_enable_exit_to_user(ti_work); - -- if (ti_work & _TIF_NEED_RESCHED) -+ if (ti_work & _TIF_NEED_RESCHED_MASK) - schedule(); - - if (ti_work & _TIF_UPROBE) -@@ -386,7 +386,7 @@ void raw_irqentry_exit_cond_resched(void) - rcu_irq_exit_check_preempt(); - if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) - WARN_ON_ONCE(!on_thread_stack()); -- if (need_resched()) -+ if (should_resched(0)) - preempt_schedule_irq(); - } - } -diff --git a/kernel/fork.c b/kernel/fork.c -index ea332319dffea..d78954a3834cc 100644 ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -1665,6 +1665,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) - RCU_INIT_POINTER(tsk->sighand, sig); - if (!sig) - return -ENOMEM; -+ sig->sigqueue_cache = NULL; - - refcount_set(&sig->count, 1); - spin_lock_irq(¤t->sighand->siglock); -@@ -1681,7 +1682,17 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) - void __cleanup_sighand(struct sighand_struct *sighand) - { - if (refcount_dec_and_test(&sighand->count)) { -+ struct sigqueue *sigqueue = NULL; -+ - signalfd_cleanup(sighand); -+ spin_lock_irq(&sighand->siglock); -+ if (sighand->sigqueue_cache) { -+ sigqueue = sighand->sigqueue_cache; -+ sighand->sigqueue_cache = NULL; -+ } -+ spin_unlock_irq(&sighand->siglock); -+ -+ sigqueue_free_cached_entry(sigqueue); - /* - * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it - * without an RCU grace period, see __lock_task_sighand(). -diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c -index 0408aab80941b..ee063d28f94f8 100644 ---- a/kernel/ksysfs.c -+++ b/kernel/ksysfs.c -@@ -167,6 +167,15 @@ KERNEL_ATTR_RO(vmcoreinfo); - - #endif /* CONFIG_CRASH_CORE */ - -+#if defined(CONFIG_PREEMPT_RT) -+static ssize_t realtime_show(struct kobject *kobj, -+ struct kobj_attribute *attr, char *buf) -+{ -+ return sprintf(buf, "%d\n", 1); -+} -+KERNEL_ATTR_RO(realtime); -+#endif -+ - /* whether file capabilities are enabled */ - static ssize_t fscaps_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -@@ -259,6 +268,9 @@ static struct attribute * kernel_attrs[] = { - #ifndef CONFIG_TINY_RCU - &rcu_expedited_attr.attr, - &rcu_normal_attr.attr, -+#endif -+#ifdef CONFIG_PREEMPT_RT -+ &realtime_attr.attr, - #endif - NULL - }; -diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c -index 728f434de2bbf..479a9487edcc2 100644 ---- a/kernel/locking/rtmutex.c -+++ b/kernel/locking/rtmutex.c -@@ -218,6 +218,11 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, - return try_cmpxchg_acquire(&lock->owner, &old, new); - } - -+static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) -+{ -+ return rt_mutex_cmpxchg_acquire(lock, NULL, current); -+} -+ - static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, - struct task_struct *old, - struct task_struct *new) -@@ -297,6 +302,24 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, - - } - -+static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock); -+ -+static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) -+{ -+ /* -+ * With debug enabled rt_mutex_cmpxchg trylock() will always fail, -+ * which will unconditionally invoke sched_submit/resume_work() in -+ * the slow path of __rt_mutex_lock() and __ww_rt_mutex_lock() even -+ * in the non-contended case. -+ * -+ * Avoid that by using rt_mutex_slow_trylock() which is covered by -+ * the debug code and can acquire a non-contended rtmutex. On -+ * success the callsite avoids the sched_submit/resume_work() -+ * dance. -+ */ -+ return rt_mutex_slowtrylock(lock); -+} -+ - static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, - struct task_struct *old, - struct task_struct *new) -@@ -1555,7 +1578,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, - raw_spin_unlock_irq(&lock->wait_lock); - - if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) -- schedule(); -+ schedule_rtmutex(); - - raw_spin_lock_irq(&lock->wait_lock); - set_current_state(state); -@@ -1584,7 +1607,7 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, - WARN(1, "rtmutex deadlock detected\n"); - while (1) { - set_current_state(TASK_INTERRUPTIBLE); -- schedule(); -+ schedule_rtmutex(); - } - } - -@@ -1679,6 +1702,12 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, - unsigned long flags; - int ret; - -+ /* -+ * The task is about to sleep. Invoke sched_submit_work() before -+ * blocking as that might take locks and corrupt tsk::pi_blocked_on. -+ */ -+ sched_submit_work(); -+ - /* - * Technically we could use raw_spin_[un]lock_irq() here, but this can - * be called in early boot if the cmpxchg() fast path is disabled -@@ -1691,13 +1720,16 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, - ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - -+ sched_resume_work(); - return ret; - } - - static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, - unsigned int state) - { -- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) -+ lockdep_assert(!current->pi_blocked_on); -+ -+ if (likely(rt_mutex_try_acquire(lock))) - return 0; - - return rt_mutex_slowlock(lock, NULL, state); -diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c -index c201aadb93017..5be92ca5afabc 100644 ---- a/kernel/locking/rwbase_rt.c -+++ b/kernel/locking/rwbase_rt.c -@@ -72,15 +72,6 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, - int ret; - - raw_spin_lock_irq(&rtm->wait_lock); -- /* -- * Allow readers, as long as the writer has not completely -- * acquired the semaphore for write. -- */ -- if (atomic_read(&rwb->readers) != WRITER_BIAS) { -- atomic_inc(&rwb->readers); -- raw_spin_unlock_irq(&rtm->wait_lock); -- return 0; -- } - - /* - * Call into the slow lock path with the rtmutex->wait_lock -@@ -140,10 +131,23 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, - static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb, - unsigned int state) - { -+ int ret; -+ -+ lockdep_assert(!current->pi_blocked_on); -+ - if (rwbase_read_trylock(rwb)) - return 0; - -- return __rwbase_read_lock(rwb, state); -+ /* -+ * The task is about to sleep. For rwsems this submits work as that -+ * might take locks and corrupt tsk::pi_blocked_on. Must be -+ * explicit here because __rwbase_read_lock() cannot invoke -+ * rt_mutex_slowlock(). NOP for rwlocks. -+ */ -+ rwbase_sched_submit_work(); -+ ret = __rwbase_read_lock(rwb, state); -+ rwbase_sched_resume_work(); -+ return ret; - } - - static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb, -@@ -239,7 +243,10 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, - struct rt_mutex_base *rtm = &rwb->rtmutex; - unsigned long flags; - -- /* Take the rtmutex as a first step */ -+ /* -+ * Take the rtmutex as a first step. For rwsem this will also -+ * invoke sched_submit_work() to flush IO and workers. -+ */ - if (rwbase_rtmutex_lock_state(rtm, state)) - return -EINTR; - -diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c -index 9eabd585ce7af..e304db9ebfd95 100644 ---- a/kernel/locking/rwsem.c -+++ b/kernel/locking/rwsem.c -@@ -1415,6 +1415,12 @@ static inline void __downgrade_write(struct rw_semaphore *sem) - #define rwbase_rtmutex_lock_state(rtm, state) \ - __rt_mutex_lock(rtm, state) - -+#define rwbase_sched_submit_work() \ -+ sched_submit_work() -+ -+#define rwbase_sched_resume_work() \ -+ sched_resume_work() -+ - #define rwbase_rtmutex_slowlock_locked(rtm, state) \ - __rt_mutex_slowlock_locked(rtm, NULL, state) - -diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c -index 48a19ed8486d8..9fe282cd145d9 100644 ---- a/kernel/locking/spinlock_rt.c -+++ b/kernel/locking/spinlock_rt.c -@@ -37,6 +37,8 @@ - - static __always_inline void rtlock_lock(struct rt_mutex_base *rtm) - { -+ lockdep_assert(!current->pi_blocked_on); -+ - if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current))) - rtlock_slowlock(rtm); - } -@@ -159,6 +161,9 @@ rwbase_rtmutex_lock_state(struct rt_mutex_base *rtm, unsigned int state) - return 0; - } - -+static __always_inline void rwbase_sched_submit_work(void) { } -+static __always_inline void rwbase_sched_resume_work(void) { } -+ - static __always_inline int - rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state) - { -diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c -index d1473c624105c..c7196de838edc 100644 ---- a/kernel/locking/ww_rt_mutex.c -+++ b/kernel/locking/ww_rt_mutex.c -@@ -62,7 +62,7 @@ __ww_rt_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx, - } - mutex_acquire_nest(&rtm->dep_map, 0, 0, nest_lock, ip); - -- if (likely(rt_mutex_cmpxchg_acquire(&rtm->rtmutex, NULL, current))) { -+ if (likely(rt_mutex_try_acquire(&rtm->rtmutex))) { - if (ww_ctx) - ww_mutex_set_context_fastpath(lock, ww_ctx); - return 0; -diff --git a/kernel/panic.c b/kernel/panic.c -index 5cfea8302d23a..190f7f2bc6cfd 100644 ---- a/kernel/panic.c -+++ b/kernel/panic.c -@@ -275,6 +275,7 @@ static void panic_other_cpus_shutdown(bool crash_kexec) - */ - void panic(const char *fmt, ...) - { -+ enum cons_prio prev_prio; - static char buf[1024]; - va_list args; - long i, i_next = 0, len; -@@ -322,6 +323,8 @@ void panic(const char *fmt, ...) - if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu) - panic_smp_self_stop(); - -+ prev_prio = cons_atomic_enter(CONS_PRIO_PANIC); -+ - console_verbose(); - bust_spinlocks(1); - va_start(args, fmt); -@@ -382,6 +385,8 @@ void panic(const char *fmt, ...) - if (_crash_kexec_post_notifiers) - __crash_kexec(NULL); - -+ cons_atomic_flush(NULL, true); -+ - console_unblank(); - - /* -@@ -406,6 +411,7 @@ void panic(const char *fmt, ...) - * We can't use the "normal" timers since we just panicked. - */ - pr_emerg("Rebooting in %d seconds..\n", panic_timeout); -+ cons_atomic_flush(NULL, true); - - for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) { - touch_nmi_watchdog(); -@@ -424,6 +430,7 @@ void panic(const char *fmt, ...) - */ - if (panic_reboot_mode != REBOOT_UNDEFINED) - reboot_mode = panic_reboot_mode; -+ cons_atomic_flush(NULL, true); - emergency_restart(); - } - #ifdef __sparc__ -@@ -436,12 +443,16 @@ void panic(const char *fmt, ...) - } - #endif - #if defined(CONFIG_S390) -+ cons_atomic_flush(NULL, true); - disabled_wait(); - #endif - pr_emerg("---[ end Kernel panic - not syncing: %s ]---\n", buf); - - /* Do not scroll important messages printed above */ - suppress_printk = 1; -+ -+ cons_atomic_exit(CONS_PRIO_PANIC, prev_prio); -+ - local_irq_enable(); - for (i = 0; ; i += PANIC_TIMER_STEP) { - touch_softlockup_watchdog(); -@@ -652,6 +663,10 @@ struct warn_args { - void __warn(const char *file, int line, void *caller, unsigned taint, - struct pt_regs *regs, struct warn_args *args) - { -+ enum cons_prio prev_prio; -+ -+ prev_prio = cons_atomic_enter(CONS_PRIO_EMERGENCY); -+ - disable_trace_on_warning(); - - if (file) -@@ -682,6 +697,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, - - /* Just a warning, don't kill lockdep. */ - add_taint(taint, LOCKDEP_STILL_OK); -+ -+ cons_atomic_exit(CONS_PRIO_EMERGENCY, prev_prio); - } - - #ifndef __WARN_FLAGS -diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile -index f5b388e810b9f..b36683bd2f821 100644 ---- a/kernel/printk/Makefile -+++ b/kernel/printk/Makefile -@@ -1,6 +1,6 @@ - # SPDX-License-Identifier: GPL-2.0-only - obj-y = printk.o --obj-$(CONFIG_PRINTK) += printk_safe.o -+obj-$(CONFIG_PRINTK) += printk_safe.o printk_nobkl.o - obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o - obj-$(CONFIG_PRINTK_INDEX) += index.o - -diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h -index 2a17704136f1d..6631fd70542f9 100644 ---- a/kernel/printk/internal.h -+++ b/kernel/printk/internal.h -@@ -3,6 +3,8 @@ - * internal.h - printk internal definitions - */ - #include -+#include -+#include "printk_ringbuffer.h" - - #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) - void __init printk_sysctl_init(void); -@@ -12,8 +14,13 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, - #define printk_sysctl_init() do { } while (0) - #endif - --#ifdef CONFIG_PRINTK -+#define con_printk(lvl, con, fmt, ...) \ -+ printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \ -+ (con->flags & CON_NO_BKL) ? "" : "legacy ", \ -+ (con->flags & CON_BOOT) ? "boot" : "", \ -+ con->name, con->index, ##__VA_ARGS__) - -+#ifdef CONFIG_PRINTK - #ifdef CONFIG_PRINTK_CALLER - #define PRINTK_PREFIX_MAX 48 - #else -@@ -35,6 +42,12 @@ enum printk_info_flags { - LOG_CONT = 8, /* text is a fragment of a continuation line */ - }; - -+extern struct printk_ringbuffer *prb; -+extern bool have_bkl_console; -+extern bool printk_threads_enabled; -+ -+extern bool have_boot_console; -+ - __printf(4, 0) - int vprintk_store(int facility, int level, - const struct dev_printk_info *dev_info, -@@ -45,28 +58,98 @@ __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); - - bool printk_percpu_data_ready(void); - -+/* -+ * The printk_safe_enter()/_exit() macros mark code blocks using locks that -+ * would lead to deadlock if an interrupting context were to call printk() -+ * while the interrupted context was within such code blocks. -+ * -+ * When a CPU is in such a code block, an interrupting context calling -+ * printk() will only log the new message to the lockless ringbuffer and -+ * then trigger console printing using irqwork. -+ */ -+ - #define printk_safe_enter_irqsave(flags) \ - do { \ -- local_irq_save(flags); \ -- __printk_safe_enter(); \ -+ __printk_safe_enter(&flags); \ - } while (0) - - #define printk_safe_exit_irqrestore(flags) \ - do { \ -- __printk_safe_exit(); \ -- local_irq_restore(flags); \ -+ __printk_safe_exit(&flags); \ - } while (0) - - void defer_console_output(void); - - u16 printk_parse_prefix(const char *text, int *level, - enum printk_info_flags *flags); -+ -+u64 cons_read_seq(struct console *con); -+void cons_nobkl_cleanup(struct console *con); -+bool cons_nobkl_init(struct console *con); -+bool cons_alloc_percpu_data(struct console *con); -+void cons_kthread_create(struct console *con); -+void cons_wake_threads(void); -+void cons_force_seq(struct console *con, u64 seq); -+void console_bkl_kthread_create(void); -+ -+/* -+ * Check if the given console is currently capable and allowed to print -+ * records. If the caller only works with certain types of consoles, the -+ * caller is responsible for checking the console type before calling -+ * this function. -+ */ -+static inline bool console_is_usable(struct console *con, short flags) -+{ -+ if (!(flags & CON_ENABLED)) -+ return false; -+ -+ if ((flags & CON_SUSPENDED)) -+ return false; -+ -+ /* -+ * The usability of a console varies depending on whether -+ * it is a NOBKL console or not. -+ */ -+ -+ if (flags & CON_NO_BKL) { -+ if (have_boot_console) -+ return false; -+ -+ } else { -+ if (!con->write) -+ return false; -+ /* -+ * Console drivers may assume that per-cpu resources have -+ * been allocated. So unless they're explicitly marked as -+ * being able to cope (CON_ANYTIME) don't call them until -+ * this CPU is officially up. -+ */ -+ if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) -+ return false; -+ } -+ -+ return true; -+} -+ -+/** -+ * cons_kthread_wake - Wake up a printk thread -+ * @con: Console to operate on -+ */ -+static inline void cons_kthread_wake(struct console *con) -+{ -+ rcuwait_wake_up(&con->rcuwait); -+} -+ - #else - - #define PRINTK_PREFIX_MAX 0 - #define PRINTK_MESSAGE_MAX 0 - #define PRINTKRB_RECORD_MAX 0 - -+static inline void cons_kthread_wake(struct console *con) { } -+static inline void cons_kthread_create(struct console *con) { } -+#define printk_threads_enabled (false) -+ - /* - * In !PRINTK builds we still export console_sem - * semaphore and some of console functions (console_unlock()/etc.), so -@@ -76,8 +159,15 @@ u16 printk_parse_prefix(const char *text, int *level, - #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) - - static inline bool printk_percpu_data_ready(void) { return false; } -+static inline bool cons_nobkl_init(struct console *con) { return true; } -+static inline void cons_nobkl_cleanup(struct console *con) { } -+static inline bool console_is_usable(struct console *con, short flags) { return false; } -+static inline void cons_force_seq(struct console *con, u64 seq) { } -+ - #endif /* CONFIG_PRINTK */ - -+extern bool have_boot_console; -+ - /** - * struct printk_buffers - Buffers to read/format/output printk messages. - * @outbuf: After formatting, contains text to output. -@@ -103,3 +193,28 @@ struct printk_message { - u64 seq; - unsigned long dropped; - }; -+ -+/** -+ * struct cons_context_data - console context data -+ * @wctxt: Write context per priority level -+ * @pbufs: Buffer for storing the text -+ * -+ * Used for early boot and for per CPU data. -+ * -+ * The write contexts are allocated to avoid having them on stack, e.g. in -+ * warn() or panic(). -+ */ -+struct cons_context_data { -+ struct cons_write_context wctxt[CONS_PRIO_MAX]; -+ struct printk_buffers pbufs; -+}; -+ -+bool printk_get_next_message(struct printk_message *pmsg, u64 seq, -+ bool is_extended, bool may_supress); -+ -+#ifdef CONFIG_PRINTK -+ -+void console_prepend_dropped(struct printk_message *pmsg, -+ unsigned long dropped); -+ -+#endif -diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c -index fd0c9f913940a..e2466366d4f84 100644 ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -442,6 +442,21 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; - /* syslog_lock protects syslog_* variables and write access to clear_seq. */ - static DEFINE_MUTEX(syslog_lock); - -+/* -+ * Specifies if a BKL console was ever registered. Used to determine if the -+ * console lock/unlock dance is needed for console printing. -+ */ -+bool have_bkl_console; -+ -+/* -+ * Specifies if a boot console is registered. Used to determine if NOBKL -+ * consoles may be used since NOBKL consoles cannot synchronize with boot -+ * consoles. -+ */ -+bool have_boot_console; -+ -+static int unregister_console_locked(struct console *console); -+ - #ifdef CONFIG_PRINTK - DECLARE_WAIT_QUEUE_HEAD(log_wait); - /* All 3 protected by @syslog_lock. */ -@@ -492,7 +507,7 @@ _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, - - static struct printk_ringbuffer printk_rb_dynamic; - --static struct printk_ringbuffer *prb = &printk_rb_static; -+struct printk_ringbuffer *prb = &printk_rb_static; - - /* - * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before -@@ -696,9 +711,6 @@ static ssize_t msg_print_ext_body(char *buf, size_t size, - return len; - } - --static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, -- bool is_extended, bool may_supress); -- - /* /dev/kmsg - userspace message inject/listen interface */ - struct devkmsg_user { - atomic64_t seq; -@@ -1100,7 +1112,19 @@ static inline void log_buf_add_cpu(void) {} - - static void __init set_percpu_data_ready(void) - { -+ struct hlist_node *tmp; -+ struct console *con; -+ -+ console_list_lock(); -+ -+ hlist_for_each_entry_safe(con, tmp, &console_list, node) { -+ if (!cons_alloc_percpu_data(con)) -+ unregister_console_locked(con); -+ } -+ - __printk_percpu_data_ready = true; -+ -+ console_list_unlock(); - } - - static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, -@@ -2276,6 +2300,7 @@ asmlinkage int vprintk_emit(int facility, int level, - const struct dev_printk_info *dev_info, - const char *fmt, va_list args) - { -+ struct cons_write_context wctxt = { }; - int printed_len; - bool in_sched = false; - -@@ -2296,16 +2321,25 @@ asmlinkage int vprintk_emit(int facility, int level, - - printed_len = vprintk_store(facility, level, dev_info, fmt, args); - -+ /* -+ * The caller may be holding system-critical or -+ * timing-sensitive locks. Disable preemption during -+ * printing of all remaining records to all consoles so that -+ * this context can return as soon as possible. Hopefully -+ * another printk() caller will take over the printing. -+ */ -+ preempt_disable(); -+ -+ /* -+ * Flush the non-BKL consoles. This only leads to direct atomic -+ * printing for non-BKL consoles that do not have a printer -+ * thread available. Otherwise the printer thread will perform -+ * the printing. -+ */ -+ cons_atomic_flush(&wctxt, true); -+ - /* If called from the scheduler, we can not call up(). */ -- if (!in_sched) { -- /* -- * The caller may be holding system-critical or -- * timing-sensitive locks. Disable preemption during -- * printing of all remaining records to all consoles so that -- * this context can return as soon as possible. Hopefully -- * another printk() caller will take over the printing. -- */ -- preempt_disable(); -+ if (!in_sched && have_bkl_console && !IS_ENABLED(CONFIG_PREEMPT_RT)) { - /* - * Try to acquire and then immediately release the console - * semaphore. The release will print out buffers. With the -@@ -2314,10 +2348,15 @@ asmlinkage int vprintk_emit(int facility, int level, - */ - if (console_trylock_spinning()) - console_unlock(); -- preempt_enable(); - } - -- wake_up_klogd(); -+ preempt_enable(); -+ -+ cons_wake_threads(); -+ if (in_sched) -+ defer_console_output(); -+ else -+ wake_up_klogd(); - return printed_len; - } - EXPORT_SYMBOL(vprintk_emit); -@@ -2556,10 +2595,26 @@ MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to hig - */ - void suspend_console(void) - { -+ struct console *con; -+ - if (!console_suspend_enabled) - return; - pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); - pr_flush(1000, true); -+ -+ console_list_lock(); -+ for_each_console(con) -+ console_srcu_write_flags(con, con->flags | CON_SUSPENDED); -+ console_list_unlock(); -+ -+ /* -+ * Ensure that all SRCU list walks have completed. All printing -+ * contexts must be able to see that they are suspended so that it -+ * is guaranteed that all printing has stopped when this function -+ * completes. -+ */ -+ synchronize_srcu(&console_srcu); -+ - console_lock(); - console_suspended = 1; - up_console_sem(); -@@ -2567,11 +2622,39 @@ void suspend_console(void) - - void resume_console(void) - { -+ struct console *con; -+ short flags; -+ int cookie; -+ - if (!console_suspend_enabled) - return; - down_console_sem(); - console_suspended = 0; - console_unlock(); -+ -+ console_list_lock(); -+ for_each_console(con) -+ console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED); -+ console_list_unlock(); -+ -+ /* -+ * Ensure that all SRCU list walks have completed. All printing -+ * contexts must be able to see they are no longer suspended so -+ * that they are guaranteed to wake up and resume printing. -+ */ -+ synchronize_srcu(&console_srcu); -+ -+ cookie = console_srcu_read_lock(); -+ for_each_console_srcu(con) { -+ flags = console_srcu_read_flags(con); -+ if (flags & CON_NO_BKL) -+ cons_kthread_wake(con); -+ } -+ console_srcu_read_unlock(cookie); -+ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && have_bkl_console) -+ wake_up_interruptible(&log_wait); -+ - pr_flush(1000, true); - } - -@@ -2586,7 +2669,7 @@ void resume_console(void) - */ - static int console_cpu_notify(unsigned int cpu) - { -- if (!cpuhp_tasks_frozen) { -+ if (!cpuhp_tasks_frozen && have_bkl_console) { - /* If trylock fails, someone else is doing the printing */ - if (console_trylock()) - console_unlock(); -@@ -2661,33 +2744,6 @@ static bool abandon_console_lock_in_panic(void) - return atomic_read(&panic_cpu) != raw_smp_processor_id(); - } - --/* -- * Check if the given console is currently capable and allowed to print -- * records. -- * -- * Requires the console_srcu_read_lock. -- */ --static inline bool console_is_usable(struct console *con) --{ -- short flags = console_srcu_read_flags(con); -- -- if (!(flags & CON_ENABLED)) -- return false; -- -- if (!con->write) -- return false; -- -- /* -- * Console drivers may assume that per-cpu resources have been -- * allocated. So unless they're explicitly marked as being able to -- * cope (CON_ANYTIME) don't call them until this CPU is officially up. -- */ -- if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME)) -- return false; -- -- return true; --} -- - static void __console_unlock(void) - { - console_locked = 0; -@@ -2709,7 +2765,7 @@ static void __console_unlock(void) - * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated. - */ - #ifdef CONFIG_PRINTK --static void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) -+void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) - { - struct printk_buffers *pbufs = pmsg->pbufs; - const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); -@@ -2741,7 +2797,8 @@ static void console_prepend_dropped(struct printk_message *pmsg, unsigned long d - pmsg->outbuf_len += len; - } - #else --#define console_prepend_dropped(pmsg, dropped) -+static inline void console_prepend_dropped(struct printk_message *pmsg, -+ unsigned long dropped) { } - #endif /* CONFIG_PRINTK */ - - /* -@@ -2763,8 +2820,8 @@ static void console_prepend_dropped(struct printk_message *pmsg, unsigned long d - * of @pmsg are valid. (See the documentation of struct printk_message - * for information about the @pmsg fields.) - */ --static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, -- bool is_extended, bool may_suppress) -+bool printk_get_next_message(struct printk_message *pmsg, u64 seq, -+ bool is_extended, bool may_suppress) - { - static int panic_console_dropped; - -@@ -2933,9 +2990,14 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove - - cookie = console_srcu_read_lock(); - for_each_console_srcu(con) { -+ short flags = console_srcu_read_flags(con); - bool progress; - -- if (!console_is_usable(con)) -+ /* console_flush_all() is only for legacy consoles. */ -+ if (flags & CON_NO_BKL) -+ continue; -+ -+ if (!console_is_usable(con, flags)) - continue; - any_usable = true; - -@@ -2973,30 +3035,13 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove - return false; - } - --/** -- * console_unlock - unblock the console subsystem from printing -- * -- * Releases the console_lock which the caller holds to block printing of -- * the console subsystem. -- * -- * While the console_lock was held, console output may have been buffered -- * by printk(). If this is the case, console_unlock(); emits -- * the output prior to releasing the lock. -- * -- * console_unlock(); may be called from any context. -- */ --void console_unlock(void) -+static u64 console_flush_and_unlock(void) - { - bool do_cond_resched; - bool handover; - bool flushed; - u64 next_seq; - -- if (console_suspended) { -- up_console_sem(); -- return; -- } -- - /* - * Console drivers are called with interrupts disabled, so - * @console_may_schedule should be cleared before; however, we may -@@ -3033,6 +3078,39 @@ void console_unlock(void) - * fails, another context is already handling the printing. - */ - } while (prb_read_valid(prb, next_seq, NULL) && console_trylock()); -+ -+ return next_seq; -+} -+ -+/** -+ * console_unlock - unblock the console subsystem from printing -+ * -+ * Releases the console_lock which the caller holds to block printing of -+ * the console subsystem. -+ * -+ * While the console_lock was held, console output may have been buffered -+ * by printk(). If this is the case, console_unlock(); emits -+ * the output prior to releasing the lock. -+ * -+ * console_unlock(); may be called from any context. -+ */ -+void console_unlock(void) -+{ -+ if (console_suspended) { -+ up_console_sem(); -+ return; -+ } -+ -+ /* -+ * PREEMPT_RT relies on kthread and atomic consoles for printing. -+ * It never attempts to print from console_unlock(). -+ */ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { -+ __console_unlock(); -+ return; -+ } -+ -+ console_flush_and_unlock(); - } - EXPORT_SYMBOL(console_unlock); - -@@ -3057,6 +3135,9 @@ void console_unblank(void) - struct console *c; - int cookie; - -+ if (!have_bkl_console) -+ return; -+ - /* - * Stop console printing because the unblank() callback may - * assume the console is not within its write() callback. -@@ -3065,6 +3146,10 @@ void console_unblank(void) - * In that case, attempt a trylock as best-effort. - */ - if (oops_in_progress) { -+ /* Semaphores are not NMI-safe. */ -+ if (in_nmi()) -+ return; -+ - if (down_trylock_console_sem() != 0) - return; - } else -@@ -3094,23 +3179,46 @@ void console_unblank(void) - */ - void console_flush_on_panic(enum con_flush_mode mode) - { -+ struct console *c; -+ short flags; -+ int cookie; -+ u64 seq; -+ -+ seq = prb_first_valid_seq(prb); -+ -+ /* -+ * Safely flush the atomic consoles before trying to flush any -+ * BKL/legacy consoles. -+ */ -+ if (mode == CONSOLE_REPLAY_ALL) { -+ cookie = console_srcu_read_lock(); -+ for_each_console_srcu(c) { -+ flags = console_srcu_read_flags(c); -+ if (flags & CON_NO_BKL) -+ cons_force_seq(c, seq); -+ } -+ console_srcu_read_unlock(cookie); -+ } -+ cons_atomic_flush(NULL, true); -+ -+ if (!have_bkl_console) -+ return; -+ - /* - * If someone else is holding the console lock, trylock will fail - * and may_schedule may be set. Ignore and proceed to unlock so - * that messages are flushed out. As this can be called from any - * context and we don't want to get preempted while flushing, - * ensure may_schedule is cleared. -+ * -+ * Since semaphores are not NMI-safe, the console lock must be -+ * ignored if the panic is in NMI context. - */ -- console_trylock(); -+ if (!in_nmi()) -+ console_trylock(); - console_may_schedule = 0; - - if (mode == CONSOLE_REPLAY_ALL) { -- struct console *c; -- int cookie; -- u64 seq; -- -- seq = prb_first_valid_seq(prb); -- - cookie = console_srcu_read_lock(); - for_each_console_srcu(c) { - /* -@@ -3122,7 +3230,8 @@ void console_flush_on_panic(enum con_flush_mode mode) - } - console_srcu_read_unlock(cookie); - } -- console_unlock(); -+ if (!in_nmi()) -+ console_unlock(); - } - - /* -@@ -3179,13 +3288,118 @@ EXPORT_SYMBOL(console_stop); - - void console_start(struct console *console) - { -+ short flags; -+ - console_list_lock(); - console_srcu_write_flags(console, console->flags | CON_ENABLED); -+ flags = console->flags; - console_list_unlock(); -+ -+ /* -+ * Ensure that all SRCU list walks have completed. The related -+ * printing context must be able to see it is enabled so that -+ * it is guaranteed to wake up and resume printing. -+ */ -+ synchronize_srcu(&console_srcu); -+ -+ if (flags & CON_NO_BKL) -+ cons_kthread_wake(console); -+ else if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ wake_up_interruptible(&log_wait); -+ - __pr_flush(console, 1000, true); - } - EXPORT_SYMBOL(console_start); - -+static struct task_struct *console_bkl_kthread; -+ -+static bool printer_should_wake(u64 seq) -+{ -+ bool available = false; -+ struct console *con; -+ int cookie; -+ -+ if (kthread_should_stop()) -+ return true; -+ -+ cookie = console_srcu_read_lock(); -+ for_each_console_srcu(con) { -+ short flags = console_srcu_read_flags(con); -+ -+ if (flags & CON_NO_BKL) -+ continue; -+ if (!console_is_usable(con, flags)) -+ continue; -+ /* -+ * It is safe to read @seq because only this -+ * thread context updates @seq. -+ */ -+ if (prb_read_valid(prb, con->seq, NULL)) { -+ available = true; -+ break; -+ } -+ } -+ console_srcu_read_unlock(cookie); -+ -+ return available; -+} -+ -+static int console_bkl_kthread_func(void *unused) -+{ -+ u64 seq = 0; -+ int error; -+ -+ for (;;) { -+ error = wait_event_interruptible(log_wait, printer_should_wake(seq)); -+ -+ if (kthread_should_stop()) -+ break; -+ -+ if (error) -+ continue; -+ -+ console_lock(); -+ if (console_suspended) -+ up_console_sem(); -+ else -+ seq = console_flush_and_unlock(); -+ } -+ return 0; -+} -+ -+void console_bkl_kthread_create(void) -+{ -+ struct task_struct *kt; -+ struct console *c; -+ -+ lockdep_assert_held(&console_mutex); -+ -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ return; -+ -+ if (!printk_threads_enabled || console_bkl_kthread) -+ return; -+ -+ for_each_console(c) { -+ if (c->flags & CON_BOOT) -+ return; -+ } -+ -+ kt = kthread_run(console_bkl_kthread_func, NULL, "pr/bkl"); -+ if (IS_ERR(kt)) { -+ pr_err("unable to start BKL printing thread\n"); -+ return; -+ } -+ -+ console_bkl_kthread = kt; -+ -+ /* -+ * It is important that console printing threads are scheduled -+ * shortly after a printk call and with generous runtime budgets. -+ */ -+ sched_set_normal(console_bkl_kthread, -20); -+} -+ - static int __read_mostly keep_bootcon; - - static int __init keep_bootcon_setup(char *str) -@@ -3269,11 +3483,6 @@ static void try_enable_default_console(struct console *newcon) - newcon->flags |= CON_CONSDEV; - } - --#define con_printk(lvl, con, fmt, ...) \ -- printk(lvl pr_fmt("%sconsole [%s%d] " fmt), \ -- (con->flags & CON_BOOT) ? "boot" : "", \ -- con->name, con->index, ##__VA_ARGS__) -- - static void console_init_seq(struct console *newcon, bool bootcon_registered) - { - struct console *con; -@@ -3338,8 +3547,6 @@ static void console_init_seq(struct console *newcon, bool bootcon_registered) - #define console_first() \ - hlist_entry(console_list.first, struct console, node) - --static int unregister_console_locked(struct console *console); -- - /* - * The console driver calls this routine during kernel initialization - * to register the console printing procedure with printk() and to -@@ -3431,6 +3638,16 @@ void register_console(struct console *newcon) - newcon->dropped = 0; - console_init_seq(newcon, bootcon_registered); - -+ if (!(newcon->flags & CON_NO_BKL)) { -+ have_bkl_console = true; -+ console_bkl_kthread_create(); -+ } else if (!cons_nobkl_init(newcon)) { -+ goto unlock; -+ } -+ -+ if (newcon->flags & CON_BOOT) -+ have_boot_console = true; -+ - /* - * Put this console in the list - keep the - * preferred driver at the head of the list. -@@ -3474,6 +3691,9 @@ void register_console(struct console *newcon) - if (con->flags & CON_BOOT) - unregister_console_locked(con); - } -+ -+ /* All boot consoles have been unregistered. */ -+ have_boot_console = false; - } - unlock: - console_list_unlock(); -@@ -3483,11 +3703,13 @@ EXPORT_SYMBOL(register_console); - /* Must be called under console_list_lock(). */ - static int unregister_console_locked(struct console *console) - { -+ struct console *c; -+ bool is_boot_con; - int res; - - lockdep_assert_console_list_lock_held(); - -- con_printk(KERN_INFO, console, "disabled\n"); -+ is_boot_con = console->flags & CON_BOOT; - - res = _braille_unregister_console(console); - if (res < 0) -@@ -3495,12 +3717,13 @@ static int unregister_console_locked(struct console *console) - if (res > 0) - return 0; - -- /* Disable it unconditionally */ -- console_srcu_write_flags(console, console->flags & ~CON_ENABLED); -- - if (!console_is_registered_locked(console)) - return -ENODEV; - -+ console_srcu_write_flags(console, console->flags & ~CON_ENABLED); -+ -+ con_printk(KERN_INFO, console, "disabled\n"); -+ - hlist_del_init_rcu(&console->node); - - /* -@@ -3522,11 +3745,23 @@ static int unregister_console_locked(struct console *console) - */ - synchronize_srcu(&console_srcu); - -+ if (console->flags & CON_NO_BKL) -+ cons_nobkl_cleanup(console); -+ - console_sysfs_notify(); - - if (console->exit) - res = console->exit(console); - -+ /* -+ * Each time a boot console unregisters, try to start up the printing -+ * threads. They will only start if this was the last boot console. -+ */ -+ if (is_boot_con) { -+ for_each_console(c) -+ cons_kthread_create(c); -+ } -+ - return res; - } - -@@ -3688,31 +3923,36 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre - - /* - * Hold the console_lock to guarantee safe access to -- * console->seq and to prevent changes to @console_suspended -- * until all consoles have been processed. -+ * console->seq. - */ - console_lock(); - - cookie = console_srcu_read_lock(); - for_each_console_srcu(c) { -+ short flags; -+ - if (con && con != c) - continue; -- if (!console_is_usable(c)) -+ -+ flags = console_srcu_read_flags(c); -+ -+ if (!console_is_usable(c, flags)) - continue; -+ -+ /* -+ * Since the console is locked, use this opportunity -+ * to update console->seq for NOBKL consoles. -+ */ -+ if (flags & CON_NO_BKL) -+ c->seq = cons_read_seq(c); -+ - printk_seq = c->seq; - if (printk_seq < seq) - diff += seq - printk_seq; - } - console_srcu_read_unlock(cookie); - -- /* -- * If consoles are suspended, it cannot be expected that they -- * make forward progress, so timeout immediately. @diff is -- * still used to return a valid flush status. -- */ -- if (console_suspended) -- remaining = 0; -- else if (diff != last_diff && reset_on_progress) -+ if (diff != last_diff && reset_on_progress) - remaining = timeout_ms; - - console_unlock(); -@@ -3770,9 +4010,17 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) - int pending = this_cpu_xchg(printk_pending, 0); - - if (pending & PRINTK_PENDING_OUTPUT) { -- /* If trylock fails, someone else is doing the printing */ -- if (console_trylock()) -- console_unlock(); -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { -+ /* The BKL thread waits on @log_wait. */ -+ pending |= PRINTK_PENDING_WAKEUP; -+ } else { -+ /* -+ * If trylock fails, some other context -+ * will do the printing. -+ */ -+ if (console_trylock()) -+ console_unlock(); -+ } - } - - if (pending & PRINTK_PENDING_WAKEUP) -@@ -3807,33 +4055,58 @@ static void __wake_up_klogd(int val) - preempt_enable(); - } - -+/** -+ * wake_up_klogd - Wake kernel logging daemon -+ * -+ * Use this function when new records have been added to the ringbuffer -+ * and the console printing for those records is handled elsewhere. In -+ * this case only the logging daemon needs to be woken. -+ * -+ * Context: Any context. -+ */ - void wake_up_klogd(void) - { - __wake_up_klogd(PRINTK_PENDING_WAKEUP); - } - -+/** -+ * defer_console_output - Wake kernel logging daemon and trigger -+ * console printing in a deferred context -+ * -+ * Use this function when new records have been added to the ringbuffer -+ * but the current context is unable to perform the console printing. -+ * This function also wakes the logging daemon. -+ * -+ * Context: Any context. -+ */ - void defer_console_output(void) - { -+ int val = PRINTK_PENDING_WAKEUP; -+ - /* - * New messages may have been added directly to the ringbuffer - * using vprintk_store(), so wake any waiters as well. - */ -- __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); -+ if (have_bkl_console) -+ val |= PRINTK_PENDING_OUTPUT; -+ __wake_up_klogd(val); - } - - void printk_trigger_flush(void) - { -+ struct cons_write_context wctxt = { }; -+ -+ preempt_disable(); -+ cons_atomic_flush(&wctxt, true); -+ preempt_enable(); -+ -+ cons_wake_threads(); - defer_console_output(); - } - - int vprintk_deferred(const char *fmt, va_list args) - { -- int r; -- -- r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); -- defer_console_output(); -- -- return r; -+ return vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); - } - - int _printk_deferred(const char *fmt, ...) -diff --git a/kernel/printk/printk_nobkl.c b/kernel/printk/printk_nobkl.c -new file mode 100644 -index 0000000000000..e0b818a4f8b38 ---- /dev/null -+++ b/kernel/printk/printk_nobkl.c -@@ -0,0 +1,1825 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+// Copyright (C) 2022 Linutronix GmbH, John Ogness -+// Copyright (C) 2022 Intel, Thomas Gleixner -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include "printk_ringbuffer.h" -+#include "internal.h" -+/* -+ * Printk implementation for consoles that do not depend on the BKL style -+ * console_lock mechanism. -+ * -+ * Console is locked on a CPU when state::locked is set and state:cpu == -+ * current CPU. This is valid for the current execution context. -+ * -+ * Nesting execution contexts on the same CPU can carefully take over -+ * if the driver allows reentrancy via state::unsafe = false. When the -+ * interrupted context resumes it checks the state before entering -+ * an unsafe region and aborts the operation if it detects a takeover. -+ * -+ * In case of panic or emergency the nesting context can take over the -+ * console forcefully. The write callback is then invoked with the unsafe -+ * flag set in the write context data, which allows the driver side to avoid -+ * locks and to evaluate the driver state so it can use an emergency path -+ * or repair the state instead of blindly assuming that it works. -+ * -+ * If the interrupted context touches the assigned record buffer after -+ * takeover, it does not cause harm because at the same execution level -+ * there is no concurrency on the same CPU. A threaded printer always has -+ * its own record buffer so it can never interfere with any of the per CPU -+ * record buffers. -+ * -+ * A concurrent writer on a different CPU can request to take over the -+ * console by: -+ * -+ * 1) Carefully writing the desired state into state[REQ] -+ * if there is no same or higher priority request pending. -+ * This locks state[REQ] except for higher priority -+ * waiters. -+ * -+ * 2) Setting state[CUR].req_prio unless a same or higher -+ * priority waiter won the race. -+ * -+ * 3) Carefully spin on state[CUR] until that is locked with the -+ * expected state. When the state is not the expected one then it -+ * has to verify that state[REQ] is still the same and that -+ * state[CUR] has not been taken over or unlocked. -+ * -+ * The unlocker hands over to state[REQ], but only if state[CUR] -+ * matches. -+ * -+ * In case that the owner does not react on the request and does not make -+ * observable progress, the waiter will timeout and can then decide to do -+ * a hostile takeover. -+ */ -+ -+#define copy_full_state(_dst, _src) do { _dst = _src; } while (0) -+#define copy_bit_state(_dst, _src) do { _dst.bits = _src.bits; } while (0) -+ -+#ifdef CONFIG_64BIT -+#define copy_seq_state64(_dst, _src) do { _dst.seq = _src.seq; } while (0) -+#else -+#define copy_seq_state64(_dst, _src) do { } while (0) -+#endif -+ -+enum state_selector { -+ CON_STATE_CUR, -+ CON_STATE_REQ, -+}; -+ -+/** -+ * cons_state_set - Helper function to set the console state -+ * @con: Console to update -+ * @which: Selects real state or handover state -+ * @new: The new state to write -+ * -+ * Only to be used when the console is not yet or no longer visible in the -+ * system. Otherwise use cons_state_try_cmpxchg(). -+ */ -+static inline void cons_state_set(struct console *con, enum state_selector which, -+ struct cons_state *new) -+{ -+ atomic_long_set(&ACCESS_PRIVATE(con, atomic_state[which]), new->atom); -+} -+ -+/** -+ * cons_state_read - Helper function to read the console state -+ * @con: Console to update -+ * @which: Selects real state or handover state -+ * @state: The state to store the result -+ */ -+static inline void cons_state_read(struct console *con, enum state_selector which, -+ struct cons_state *state) -+{ -+ state->atom = atomic_long_read(&ACCESS_PRIVATE(con, atomic_state[which])); -+} -+ -+/** -+ * cons_state_try_cmpxchg() - Helper function for atomic_long_try_cmpxchg() on console state -+ * @con: Console to update -+ * @which: Selects real state or handover state -+ * @old: Old/expected state -+ * @new: New state -+ * -+ * Returns: True on success, false on fail -+ */ -+static inline bool cons_state_try_cmpxchg(struct console *con, -+ enum state_selector which, -+ struct cons_state *old, -+ struct cons_state *new) -+{ -+ return atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, atomic_state[which]), -+ &old->atom, new->atom); -+} -+ -+/** -+ * cons_state_full_match - Check whether the full state matches -+ * @cur: The state to check -+ * @prev: The previous state -+ * -+ * Returns: True if matching, false otherwise. -+ * -+ * Check the full state including state::seq on 64bit. For take over -+ * detection. -+ */ -+static inline bool cons_state_full_match(struct cons_state cur, -+ struct cons_state prev) -+{ -+ /* -+ * req_prio can be set by a concurrent writer for friendly -+ * handover. Ignore it in the comparison. -+ */ -+ cur.req_prio = prev.req_prio; -+ return cur.atom == prev.atom; -+} -+ -+/** -+ * cons_state_bits_match - Check for matching state bits -+ * @cur: The state to check -+ * @prev: The previous state -+ * -+ * Returns: True if state matches, false otherwise. -+ * -+ * Contrary to cons_state_full_match this checks only the bits and ignores -+ * a sequence change on 64bits. On 32bit the two functions are identical. -+ */ -+static inline bool cons_state_bits_match(struct cons_state cur, struct cons_state prev) -+{ -+ /* -+ * req_prio can be set by a concurrent writer for friendly -+ * handover. Ignore it in the comparison. -+ */ -+ cur.req_prio = prev.req_prio; -+ return cur.bits == prev.bits; -+} -+ -+/** -+ * cons_check_panic - Check whether a remote CPU is in panic -+ * -+ * Returns: True if a remote CPU is in panic, false otherwise. -+ */ -+static inline bool cons_check_panic(void) -+{ -+ unsigned int pcpu = atomic_read(&panic_cpu); -+ -+ return pcpu != PANIC_CPU_INVALID && pcpu != smp_processor_id(); -+} -+ -+static struct cons_context_data early_cons_ctxt_data __initdata; -+ -+/** -+ * cons_context_set_pbufs - Set the output text buffer for the current context -+ * @ctxt: Pointer to the acquire context -+ * -+ * Buffer selection: -+ * 1) Early boot uses the global (initdata) buffer -+ * 2) Printer threads use the dynamically allocated per-console buffers -+ * 3) All other contexts use the per CPU buffers -+ * -+ * This guarantees that there is no concurrency on the output records ever. -+ * Early boot and per CPU nesting is not a problem. The takeover logic -+ * tells the interrupted context that the buffer has been overwritten. -+ * -+ * There are two critical regions that matter: -+ * -+ * 1) Context is filling the buffer with a record. After interruption -+ * it continues to sprintf() the record and before it goes to -+ * write it out, it checks the state, notices the takeover, discards -+ * the content and backs out. -+ * -+ * 2) Context is in a unsafe critical region in the driver. After -+ * interruption it might read overwritten data from the output -+ * buffer. When it leaves the critical region it notices and backs -+ * out. Hostile takeovers in driver critical regions are best effort -+ * and there is not much that can be done about that. -+ */ -+static __ref void cons_context_set_pbufs(struct cons_context *ctxt) -+{ -+ struct console *con = ctxt->console; -+ -+ /* Thread context or early boot? */ -+ if (ctxt->thread) -+ ctxt->pbufs = con->thread_pbufs; -+ else if (!con->pcpu_data) -+ ctxt->pbufs = &early_cons_ctxt_data.pbufs; -+ else -+ ctxt->pbufs = &(this_cpu_ptr(con->pcpu_data)->pbufs); -+} -+ -+/** -+ * cons_seq_init - Helper function to initialize the console sequence -+ * @con: Console to work on -+ * -+ * Set @con->atomic_seq to the starting record, or if that record no -+ * longer exists, the oldest available record. For init only. Do not -+ * use for runtime updates. -+ */ -+static void cons_seq_init(struct console *con) -+{ -+ u32 seq = (u32)max_t(u64, con->seq, prb_first_valid_seq(prb)); -+#ifdef CONFIG_64BIT -+ struct cons_state state; -+ -+ cons_state_read(con, CON_STATE_CUR, &state); -+ state.seq = seq; -+ cons_state_set(con, CON_STATE_CUR, &state); -+#else -+ atomic_set(&ACCESS_PRIVATE(con, atomic_seq), seq); -+#endif -+} -+ -+/** -+ * cons_force_seq - Force a specified sequence number for a console -+ * @con: Console to work on -+ * @seq: Sequence number to force -+ * -+ * This function is only intended to be used in emergency situations. In -+ * particular: console_flush_on_panic(CONSOLE_REPLAY_ALL) -+ */ -+void cons_force_seq(struct console *con, u64 seq) -+{ -+#ifdef CONFIG_64BIT -+ struct cons_state old; -+ struct cons_state new; -+ -+ do { -+ cons_state_read(con, CON_STATE_CUR, &old); -+ copy_bit_state(new, old); -+ new.seq = seq; -+ } while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new)); -+#else -+ atomic_set(&ACCESS_PRIVATE(con, atomic_seq), seq); -+#endif -+} -+ -+static inline u64 cons_expand_seq(u64 seq) -+{ -+ u64 rbseq; -+ -+ /* -+ * The provided sequence is only the lower 32bits of the ringbuffer -+ * sequence. It needs to be expanded to 64bit. Get the next sequence -+ * number from the ringbuffer and fold it. -+ */ -+ rbseq = prb_next_seq(prb); -+ seq = rbseq - ((u32)rbseq - (u32)seq); -+ -+ return seq; -+} -+ -+/** -+ * cons_read_seq - Read the current console sequence -+ * @con: Console to read the sequence of -+ * -+ * Returns: Sequence number of the next record to print on @con. -+ */ -+u64 cons_read_seq(struct console *con) -+{ -+ u64 seq; -+#ifdef CONFIG_64BIT -+ struct cons_state state; -+ -+ cons_state_read(con, CON_STATE_CUR, &state); -+ seq = state.seq; -+#else -+ seq = atomic_read(&ACCESS_PRIVATE(con, atomic_seq)); -+#endif -+ return cons_expand_seq(seq); -+} -+ -+/** -+ * cons_context_set_seq - Setup the context with the next sequence to print -+ * @ctxt: Pointer to an acquire context that contains -+ * all information about the acquire mode -+ * -+ * On return the retrieved sequence number is stored in ctxt->oldseq. -+ * -+ * The sequence number is safe in forceful takeover situations. -+ * -+ * Either the writer succeeded to update before it got interrupted -+ * or it failed. In the latter case the takeover will print the -+ * same line again. -+ * -+ * The sequence is only the lower 32bits of the ringbuffer sequence. The -+ * ringbuffer must be 2^31 records ahead to get out of sync. This needs -+ * some care when starting a console, i.e setting the sequence to 0 is -+ * wrong. It has to be set to the oldest valid sequence in the ringbuffer -+ * as that cannot be more than 2^31 records away -+ * -+ * On 64bit the 32bit sequence is part of console::state, which is saved -+ * in @ctxt->state. This prevents the 32bit update race. -+ */ -+static void cons_context_set_seq(struct cons_context *ctxt) -+{ -+#ifdef CONFIG_64BIT -+ ctxt->oldseq = ctxt->state.seq; -+#else -+ ctxt->oldseq = atomic_read(&ACCESS_PRIVATE(ctxt->console, atomic_seq)); -+#endif -+ ctxt->oldseq = cons_expand_seq(ctxt->oldseq); -+ ctxt->newseq = ctxt->oldseq; -+} -+ -+/** -+ * cons_seq_try_update - Try to update the console sequence number -+ * @ctxt: Pointer to an acquire context that contains -+ * all information about the acquire mode -+ * -+ * Returns: True if the console sequence was updated, false otherwise. -+ * -+ * Internal helper as the logic is different on 32bit and 64bit. -+ * -+ * On 32 bit the sequence is separate from state and therefore -+ * subject to a subtle race in the case of hostile takeovers. -+ * -+ * On 64 bit the sequence is part of the state and therefore safe -+ * vs. hostile takeovers. -+ * -+ * In case of fail the console has been taken over and @ctxt is -+ * invalid. Caller has to reacquire the console. -+ */ -+#ifdef CONFIG_64BIT -+static bool cons_seq_try_update(struct cons_context *ctxt) -+{ -+ struct console *con = ctxt->console; -+ struct cons_state old; -+ struct cons_state new; -+ -+ cons_state_read(con, CON_STATE_CUR, &old); -+ do { -+ /* Make sure this context is still the owner. */ -+ if (!cons_state_bits_match(old, ctxt->state)) -+ return false; -+ -+ /* Preserve bit state */ -+ copy_bit_state(new, old); -+ new.seq = ctxt->newseq; -+ -+ /* -+ * Can race with hostile takeover or with a handover -+ * request. -+ */ -+ } while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new)); -+ -+ copy_full_state(ctxt->state, new); -+ ctxt->oldseq = ctxt->newseq; -+ -+ return true; -+} -+#else -+static bool cons_release(struct cons_context *ctxt); -+static bool cons_seq_try_update(struct cons_context *ctxt) -+{ -+ struct console *con = ctxt->console; -+ struct cons_state state; -+ int pcpu; -+ u32 old; -+ u32 new; -+ -+ /* -+ * There is a corner case that needs to be considered here: -+ * -+ * CPU0 CPU1 -+ * printk() -+ * acquire() -> emergency -+ * write() acquire() -+ * update_seq() -+ * state == OK -+ * --> NMI -+ * takeover() -+ * <--- write() -+ * cmpxchg() succeeds update_seq() -+ * cmpxchg() fails -+ * -+ * There is nothing that can be done about this other than having -+ * yet another state bit that needs to be tracked and analyzed, -+ * but fails to cover the problem completely. -+ * -+ * No other scenarios expose such a problem. On same CPU takeovers -+ * the cmpxchg() always fails on the interrupted context after the -+ * interrupting context finished printing, but that's fine as it -+ * does not own the console anymore. The state check after the -+ * failed cmpxchg prevents that. -+ */ -+ cons_state_read(con, CON_STATE_CUR, &state); -+ /* Make sure this context is still the owner. */ -+ if (!cons_state_bits_match(state, ctxt->state)) -+ return false; -+ -+ /* -+ * Get the original sequence number that was retrieved -+ * from @con->atomic_seq. @con->atomic_seq should be still -+ * the same. 32bit truncates. See cons_context_set_seq(). -+ */ -+ old = (u32)ctxt->oldseq; -+ new = (u32)ctxt->newseq; -+ if (atomic_try_cmpxchg(&ACCESS_PRIVATE(con, atomic_seq), &old, new)) { -+ ctxt->oldseq = ctxt->newseq; -+ return true; -+ } -+ -+ /* -+ * Reread the state. If this context does not own the console anymore -+ * then it cannot touch the sequence again. -+ */ -+ cons_state_read(con, CON_STATE_CUR, &state); -+ if (!cons_state_bits_match(state, ctxt->state)) -+ return false; -+ -+ pcpu = atomic_read(&panic_cpu); -+ if (pcpu == smp_processor_id()) { -+ /* -+ * This is the panic CPU. Emitting a warning here does not -+ * help at all. The callchain is clear and the priority is -+ * to get the messages out. In the worst case duplicated -+ * ones. That's a job for postprocessing. -+ */ -+ atomic_set(&ACCESS_PRIVATE(con, atomic_seq), new); -+ ctxt->oldseq = ctxt->newseq; -+ return true; -+ } -+ -+ /* -+ * Only emit a warning when this happens outside of a panic -+ * situation as on panic it's neither useful nor helping to let the -+ * panic CPU get the important stuff out. -+ */ -+ WARN_ON_ONCE(pcpu == PANIC_CPU_INVALID); -+ -+ cons_release(ctxt); -+ return false; -+} -+#endif -+ -+/** -+ * cons_cleanup_handover - Cleanup a handover request -+ * @ctxt: Pointer to acquire context -+ * -+ * @ctxt->hov_state contains the state to clean up -+ */ -+static void cons_cleanup_handover(struct cons_context *ctxt) -+{ -+ struct console *con = ctxt->console; -+ struct cons_state new; -+ -+ /* -+ * No loop required. Either hov_state is still the same or -+ * not. -+ */ -+ new.atom = 0; -+ cons_state_try_cmpxchg(con, CON_STATE_REQ, &ctxt->hov_state, &new); -+} -+ -+/** -+ * cons_setup_handover - Setup a handover request -+ * @ctxt: Pointer to acquire context -+ * -+ * Returns: True if a handover request was setup, false otherwise. -+ * -+ * On success @ctxt->hov_state contains the requested handover state -+ * -+ * On failure this context is not allowed to request a handover from the -+ * current owner. Reasons would be priority too low or a remote CPU in panic. -+ * In both cases this context should give up trying to acquire the console. -+ */ -+static bool cons_setup_handover(struct cons_context *ctxt) -+{ -+ unsigned int cpu = smp_processor_id(); -+ struct console *con = ctxt->console; -+ struct cons_state old; -+ struct cons_state hstate = { -+ .locked = 1, -+ .cur_prio = ctxt->prio, -+ .cpu = cpu, -+ }; -+ -+ /* -+ * Try to store hstate in @con->atomic_state[REQ]. This might -+ * race with a higher priority waiter. -+ */ -+ cons_state_read(con, CON_STATE_REQ, &old); -+ do { -+ if (cons_check_panic()) -+ return false; -+ -+ /* Same or higher priority waiter exists? */ -+ if (old.cur_prio >= ctxt->prio) -+ return false; -+ -+ } while (!cons_state_try_cmpxchg(con, CON_STATE_REQ, &old, &hstate)); -+ -+ /* Save that state for comparison in spinwait */ -+ copy_full_state(ctxt->hov_state, hstate); -+ return true; -+} -+ -+/** -+ * cons_setup_request - Setup a handover request in state[CUR] -+ * @ctxt: Pointer to acquire context -+ * @old: The state that was used to make the decision to spin wait -+ * -+ * Returns: True if a handover request was setup in state[CUR], false -+ * otherwise. -+ * -+ * On success @ctxt->req_state contains the request state that was set in -+ * state[CUR] -+ * -+ * On failure this context encountered unexpected state values. This -+ * context should retry the full handover request setup process (the -+ * handover request setup by cons_setup_handover() is now invalidated -+ * and must be performed again). -+ */ -+static bool cons_setup_request(struct cons_context *ctxt, struct cons_state old) -+{ -+ struct console *con = ctxt->console; -+ struct cons_state cur; -+ struct cons_state new; -+ -+ /* Now set the request in state[CUR] */ -+ cons_state_read(con, CON_STATE_CUR, &cur); -+ do { -+ if (cons_check_panic()) -+ goto cleanup; -+ -+ /* Bit state changed vs. the decision to spinwait? */ -+ if (!cons_state_bits_match(cur, old)) -+ goto cleanup; -+ -+ /* -+ * A higher or equal priority context already setup a -+ * request? -+ */ -+ if (cur.req_prio >= ctxt->prio) -+ goto cleanup; -+ -+ /* Setup a request for handover. */ -+ copy_full_state(new, cur); -+ new.req_prio = ctxt->prio; -+ } while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &cur, &new)); -+ -+ /* Save that state for comparison in spinwait */ -+ copy_bit_state(ctxt->req_state, new); -+ return true; -+ -+cleanup: -+ cons_cleanup_handover(ctxt); -+ return false; -+} -+ -+/** -+ * cons_try_acquire_spin - Complete the spinwait attempt -+ * @ctxt: Pointer to an acquire context that contains -+ * all information about the acquire mode -+ * -+ * @ctxt->hov_state contains the handover state that was set in -+ * state[REQ] -+ * @ctxt->req_state contains the request state that was set in -+ * state[CUR] -+ * -+ * Returns: 0 if successfully locked. -EBUSY on timeout. -EAGAIN on -+ * unexpected state values. -+ * -+ * On success @ctxt->state contains the new state that was set in -+ * state[CUR] -+ * -+ * On -EBUSY failure this context timed out. This context should either -+ * give up or attempt a hostile takeover. -+ * -+ * On -EAGAIN failure this context encountered unexpected state values. -+ * This context should retry the full handover request setup process (the -+ * handover request setup by cons_setup_handover() is now invalidated and -+ * must be performed again). -+ */ -+static int cons_try_acquire_spin(struct cons_context *ctxt) -+{ -+ struct console *con = ctxt->console; -+ struct cons_state cur; -+ struct cons_state new; -+ int err = -EAGAIN; -+ int timeout; -+ -+ /* Now wait for the other side to hand over */ -+ for (timeout = ctxt->spinwait_max_us; timeout >= 0; timeout--) { -+ /* Timeout immediately if a remote panic is detected. */ -+ if (cons_check_panic()) -+ break; -+ -+ cons_state_read(con, CON_STATE_CUR, &cur); -+ -+ /* -+ * If the real state of the console matches the handover state -+ * that this context setup, then the handover was a success -+ * and this context is now the owner. -+ * -+ * Note that this might have raced with a new higher priority -+ * requester coming in after the lock was handed over. -+ * However, that requester will see that the owner changes and -+ * setup a new request for the current owner (this context). -+ */ -+ if (cons_state_bits_match(cur, ctxt->hov_state)) -+ goto success; -+ -+ /* -+ * If state changed since the request was made, give up as -+ * it is no longer consistent. This must include -+ * state::req_prio since there could be a higher priority -+ * request available. -+ */ -+ if (cur.bits != ctxt->req_state.bits) -+ goto cleanup; -+ -+ /* -+ * Finally check whether the handover state is still -+ * the same. -+ */ -+ cons_state_read(con, CON_STATE_REQ, &cur); -+ if (cur.atom != ctxt->hov_state.atom) -+ goto cleanup; -+ -+ /* Account time */ -+ if (timeout > 0) -+ udelay(1); -+ } -+ -+ /* -+ * Timeout. Cleanup the handover state and carefully try to reset -+ * req_prio in the real state. The reset is important to ensure -+ * that the owner does not hand over the lock after this context -+ * has given up waiting. -+ */ -+ cons_cleanup_handover(ctxt); -+ -+ cons_state_read(con, CON_STATE_CUR, &cur); -+ do { -+ /* -+ * The timeout might have raced with the owner coming late -+ * and handing it over gracefully. -+ */ -+ if (cons_state_bits_match(cur, ctxt->hov_state)) -+ goto success; -+ -+ /* -+ * Validate that the state matches with the state at request -+ * time. If this check fails, there is already a higher -+ * priority context waiting or the owner has changed (either -+ * by higher priority or by hostile takeover). In all fail -+ * cases this context is no longer in line for a handover to -+ * take place, so no reset is necessary. -+ */ -+ if (cur.bits != ctxt->req_state.bits) -+ goto cleanup; -+ -+ copy_full_state(new, cur); -+ new.req_prio = 0; -+ } while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &cur, &new)); -+ /* Reset worked. Report timeout. */ -+ return -EBUSY; -+ -+success: -+ /* Store the real state */ -+ copy_full_state(ctxt->state, cur); -+ ctxt->hostile = false; -+ err = 0; -+ -+cleanup: -+ cons_cleanup_handover(ctxt); -+ return err; -+} -+ -+/** -+ * __cons_try_acquire - Try to acquire the console for printk output -+ * @ctxt: Pointer to an acquire context that contains -+ * all information about the acquire mode -+ * -+ * Returns: True if the acquire was successful. False on fail. -+ * -+ * In case of success @ctxt->state contains the acquisition -+ * state. -+ * -+ * In case of fail @ctxt->old_state contains the state -+ * that was read from @con->state for analysis by the caller. -+ */ -+static bool __cons_try_acquire(struct cons_context *ctxt) -+{ -+ unsigned int cpu = smp_processor_id(); -+ struct console *con = ctxt->console; -+ short flags = console_srcu_read_flags(con); -+ struct cons_state old; -+ struct cons_state new; -+ int err; -+ -+ if (WARN_ON_ONCE(!(flags & CON_NO_BKL))) -+ return false; -+again: -+ cons_state_read(con, CON_STATE_CUR, &old); -+ -+ /* Preserve it for the caller and for spinwait */ -+ copy_full_state(ctxt->old_state, old); -+ -+ if (cons_check_panic()) -+ return false; -+ -+ /* Set up the new state for takeover */ -+ copy_full_state(new, old); -+ new.locked = 1; -+ new.thread = ctxt->thread; -+ new.cur_prio = ctxt->prio; -+ new.req_prio = CONS_PRIO_NONE; -+ new.cpu = cpu; -+ -+ /* Attempt to acquire it directly if unlocked */ -+ if (!old.locked) { -+ if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new)) -+ goto again; -+ -+ ctxt->hostile = false; -+ copy_full_state(ctxt->state, new); -+ goto success; -+ } -+ -+ /* -+ * A threaded printer context will never spin or perform a -+ * hostile takeover. The atomic writer will wake the thread -+ * when it is done with the important output. -+ */ -+ if (ctxt->thread) -+ return false; -+ -+ /* -+ * If the active context is on the same CPU then there is -+ * obviously no handshake possible. -+ */ -+ if (old.cpu == cpu) -+ goto check_hostile; -+ -+ /* -+ * If a handover request with same or higher priority is already -+ * pending then this context cannot setup a handover request. -+ */ -+ if (old.req_prio >= ctxt->prio) -+ goto check_hostile; -+ -+ /* -+ * If the caller did not request spin-waiting then performing a -+ * handover is not an option. -+ */ -+ if (!ctxt->spinwait) -+ goto check_hostile; -+ -+ /* -+ * Setup the request in state[REQ]. If this fails then this -+ * context is not allowed to setup a handover request. -+ */ -+ if (!cons_setup_handover(ctxt)) -+ goto check_hostile; -+ -+ /* -+ * Setup the request in state[CUR]. Hand in the state that was -+ * used to make the decision to spinwait above, for comparison. If -+ * this fails then unexpected state values were encountered and the -+ * full request setup process is retried. -+ */ -+ if (!cons_setup_request(ctxt, old)) -+ goto again; -+ -+ /* -+ * Spin-wait to acquire the console. If this fails then unexpected -+ * state values were encountered (for example, a hostile takeover by -+ * another context) and the full request setup process is retried. -+ */ -+ err = cons_try_acquire_spin(ctxt); -+ if (err) { -+ if (err == -EAGAIN) -+ goto again; -+ goto check_hostile; -+ } -+success: -+ /* Common updates on success */ -+ cons_context_set_seq(ctxt); -+ cons_context_set_pbufs(ctxt); -+ return true; -+ -+check_hostile: -+ if (!ctxt->hostile) -+ return false; -+ -+ if (cons_check_panic()) -+ return false; -+ -+ if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new)) -+ goto again; -+ -+ copy_full_state(ctxt->state, new); -+ goto success; -+} -+ -+/** -+ * cons_try_acquire - Try to acquire the console for printk output -+ * @ctxt: Pointer to an acquire context that contains -+ * all information about the acquire mode -+ * -+ * Returns: True if the acquire was successful. False on fail. -+ * -+ * In case of success @ctxt->state contains the acquisition -+ * state. -+ * -+ * In case of fail @ctxt->old_state contains the state -+ * that was read from @con->state for analysis by the caller. -+ */ -+static bool cons_try_acquire(struct cons_context *ctxt) -+{ -+ if (__cons_try_acquire(ctxt)) -+ return true; -+ -+ ctxt->state.atom = 0; -+ return false; -+} -+ -+/** -+ * __cons_release - Release the console after output is done -+ * @ctxt: The acquire context that contains the state -+ * at cons_try_acquire() -+ * -+ * Returns: True if the release was regular -+ * -+ * False if the console is in unusable state or was handed over -+ * with handshake or taken over hostile without handshake. -+ * -+ * The return value tells the caller whether it needs to evaluate further -+ * printing. -+ */ -+static bool __cons_release(struct cons_context *ctxt) -+{ -+ struct console *con = ctxt->console; -+ short flags = console_srcu_read_flags(con); -+ struct cons_state hstate; -+ struct cons_state old; -+ struct cons_state new; -+ -+ if (WARN_ON_ONCE(!(flags & CON_NO_BKL))) -+ return false; -+ -+ cons_state_read(con, CON_STATE_CUR, &old); -+again: -+ if (!cons_state_bits_match(old, ctxt->state)) -+ return false; -+ -+ /* Release it directly when no handover request is pending. */ -+ if (!old.req_prio) -+ goto unlock; -+ -+ /* Read the handover target state */ -+ cons_state_read(con, CON_STATE_REQ, &hstate); -+ -+ /* If the waiter gave up hstate is 0 */ -+ if (!hstate.atom) -+ goto unlock; -+ -+ /* -+ * If a higher priority waiter raced against a lower priority -+ * waiter then unlock instead of handing over to either. The -+ * higher priority waiter will notice the updated state and -+ * retry. -+ */ -+ if (hstate.cur_prio != old.req_prio) -+ goto unlock; -+ -+ /* Switch the state and preserve the sequence on 64bit */ -+ copy_bit_state(new, hstate); -+ copy_seq_state64(new, old); -+ if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new)) -+ goto again; -+ -+ return true; -+ -+unlock: -+ /* Clear the state and preserve the sequence on 64bit */ -+ new.atom = 0; -+ copy_seq_state64(new, old); -+ if (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &old, &new)) -+ goto again; -+ -+ return true; -+} -+ -+bool printk_threads_enabled __ro_after_init; -+static bool printk_force_atomic __initdata; -+ -+/** -+ * cons_release - Release the console after output is done -+ * @ctxt: The acquire context that contains the state -+ * at cons_try_acquire() -+ * -+ * Returns: True if the release was regular -+ * -+ * False if the console is in unusable state or was handed over -+ * with handshake or taken over hostile without handshake. -+ * -+ * The return value tells the caller whether it needs to evaluate further -+ * printing. -+ */ -+static bool cons_release(struct cons_context *ctxt) -+{ -+ bool ret = __cons_release(ctxt); -+ -+ /* Invalidate the buffer pointer. It is no longer valid. */ -+ ctxt->pbufs = NULL; -+ -+ ctxt->state.atom = 0; -+ return ret; -+} -+ -+bool console_try_acquire(struct cons_write_context *wctxt) -+{ -+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); -+ -+ return cons_try_acquire(ctxt); -+} -+EXPORT_SYMBOL_GPL(console_try_acquire); -+ -+bool console_release(struct cons_write_context *wctxt) -+{ -+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); -+ -+ return cons_release(ctxt); -+} -+EXPORT_SYMBOL_GPL(console_release); -+ -+/** -+ * cons_alloc_percpu_data - Allocate percpu data for a console -+ * @con: Console to allocate for -+ * -+ * Returns: True on success. False otherwise and the console cannot be used. -+ * -+ * If it is not yet possible to allocate per CPU data, success is returned. -+ * When per CPU data becomes possible, set_percpu_data_ready() will call -+ * this function again for all registered consoles. -+ */ -+bool cons_alloc_percpu_data(struct console *con) -+{ -+ if (!printk_percpu_data_ready()) -+ return true; -+ -+ con->pcpu_data = alloc_percpu(typeof(*con->pcpu_data)); -+ if (con->pcpu_data) -+ return true; -+ -+ con_printk(KERN_WARNING, con, "failed to allocate percpu buffers\n"); -+ return false; -+} -+ -+/** -+ * cons_free_percpu_data - Free percpu data of a console on unregister -+ * @con: Console to clean up -+ */ -+static void cons_free_percpu_data(struct console *con) -+{ -+ if (!con->pcpu_data) -+ return; -+ -+ free_percpu(con->pcpu_data); -+ con->pcpu_data = NULL; -+} -+ -+/** -+ * console_can_proceed - Check whether printing can proceed -+ * @wctxt: The write context that was handed to the write function -+ * -+ * Returns: True if the state is correct. False if a handover -+ * has been requested or if the console was taken -+ * over. -+ * -+ * Must be invoked after the record was dumped into the assigned record -+ * buffer and at appropriate safe places in the driver. For unsafe driver -+ * sections see console_enter_unsafe(). -+ * -+ * When this function returns false then the calling context is not allowed -+ * to go forward and has to back out immediately and carefully. The buffer -+ * content is no longer trusted either and the console lock is no longer -+ * held. -+ */ -+bool console_can_proceed(struct cons_write_context *wctxt) -+{ -+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); -+ struct console *con = ctxt->console; -+ struct cons_state state; -+ -+ cons_state_read(con, CON_STATE_CUR, &state); -+ /* Store it for analysis or reuse */ -+ copy_full_state(ctxt->old_state, state); -+ -+ /* Make sure this context is still the owner. */ -+ if (!cons_state_full_match(state, ctxt->state)) -+ return false; -+ -+ /* -+ * Having a safe point for take over and eventually a few -+ * duplicated characters or a full line is way better than a -+ * hostile takeover. Post processing can take care of the garbage. -+ * Continue if the requested priority is not sufficient. -+ */ -+ if (state.req_prio <= state.cur_prio) -+ return true; -+ -+ /* -+ * A console printer within an unsafe region is allowed to continue. -+ * It can perform the handover when exiting the safe region. Otherwise -+ * a hostile takeover will be necessary. -+ */ -+ if (state.unsafe) -+ return true; -+ -+ /* Release and hand over */ -+ cons_release(ctxt); -+ /* -+ * This does not check whether the handover succeeded. The -+ * outermost callsite has to make the final decision whether printing -+ * should continue or not (via reacquire, possibly hostile). The -+ * console is unlocked already so go back all the way instead of -+ * trying to implement heuristics in tons of places. -+ */ -+ return false; -+} -+EXPORT_SYMBOL_GPL(console_can_proceed); -+ -+/** -+ * __console_update_unsafe - Update the unsafe bit in @con->atomic_state -+ * @wctxt: The write context that was handed to the write function -+ * -+ * Returns: True if the state is correct. False if a handover -+ * has been requested or if the console was taken -+ * over. -+ * -+ * Must be invoked before an unsafe driver section is entered. -+ * -+ * When this function returns false then the calling context is not allowed -+ * to go forward and has to back out immediately and carefully. The buffer -+ * content is no longer trusted either and the console lock is no longer -+ * held. -+ * -+ * Internal helper to avoid duplicated code -+ */ -+static bool __console_update_unsafe(struct cons_write_context *wctxt, bool unsafe) -+{ -+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); -+ struct console *con = ctxt->console; -+ struct cons_state new; -+ -+ do { -+ if (!console_can_proceed(wctxt)) -+ return false; -+ /* -+ * console_can_proceed() saved the real state in -+ * ctxt->old_state -+ */ -+ copy_full_state(new, ctxt->old_state); -+ new.unsafe = unsafe; -+ -+ } while (!cons_state_try_cmpxchg(con, CON_STATE_CUR, &ctxt->old_state, &new)); -+ -+ copy_full_state(ctxt->state, new); -+ return true; -+} -+ -+/** -+ * console_enter_unsafe - Enter an unsafe region in the driver -+ * @wctxt: The write context that was handed to the write function -+ * -+ * Returns: True if the state is correct. False if a handover -+ * has been requested or if the console was taken -+ * over. -+ * -+ * Must be invoked before an unsafe driver section is entered. -+ * -+ * When this function returns false then the calling context is not allowed -+ * to go forward and has to back out immediately and carefully. The buffer -+ * content is no longer trusted either and the console lock is no longer -+ * held. -+ */ -+bool console_enter_unsafe(struct cons_write_context *wctxt) -+{ -+ return __console_update_unsafe(wctxt, true); -+} -+EXPORT_SYMBOL_GPL(console_enter_unsafe); -+ -+/** -+ * console_exit_unsafe - Exit an unsafe region in the driver -+ * @wctxt: The write context that was handed to the write function -+ * -+ * Returns: True if the state is correct. False if a handover -+ * has been requested or if the console was taken -+ * over. -+ * -+ * Must be invoked before an unsafe driver section is exited. -+ * -+ * When this function returns false then the calling context is not allowed -+ * to go forward and has to back out immediately and carefully. The buffer -+ * content is no longer trusted either and the console lock is no longer -+ * held. -+ */ -+bool console_exit_unsafe(struct cons_write_context *wctxt) -+{ -+ return __console_update_unsafe(wctxt, false); -+} -+EXPORT_SYMBOL_GPL(console_exit_unsafe); -+ -+/** -+ * cons_get_record - Fill the buffer with the next pending ringbuffer record -+ * @wctxt: The write context which will be handed to the write function -+ * -+ * Returns: True if there are records available. If the next record should -+ * be printed, the output buffer is filled and @wctxt->outbuf -+ * points to the text to print. If @wctxt->outbuf is NULL after -+ * the call, the record should not be printed but the caller must -+ * still update the console sequence number. -+ * -+ * False means that there are no pending records anymore and the -+ * printing can stop. -+ */ -+static bool cons_get_record(struct cons_write_context *wctxt) -+{ -+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); -+ struct console *con = ctxt->console; -+ bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; -+ struct printk_message pmsg = { -+ .pbufs = ctxt->pbufs, -+ }; -+ -+ if (!printk_get_next_message(&pmsg, ctxt->newseq, is_extended, true)) -+ return false; -+ -+ ctxt->newseq = pmsg.seq; -+ ctxt->dropped += pmsg.dropped; -+ -+ if (pmsg.outbuf_len == 0) { -+ wctxt->outbuf = NULL; -+ } else { -+ if (ctxt->dropped && !is_extended) -+ console_prepend_dropped(&pmsg, ctxt->dropped); -+ wctxt->outbuf = &pmsg.pbufs->outbuf[0]; -+ } -+ -+ wctxt->len = pmsg.outbuf_len; -+ -+ return true; -+} -+ -+/** -+ * cons_emit_record - Emit record in the acquired context -+ * @wctxt: The write context that will be handed to the write function -+ * -+ * Returns: False if the operation was aborted (takeover or handover). -+ * True otherwise -+ * -+ * When false is returned, the caller is not allowed to touch console state. -+ * The console is owned by someone else. If the caller wants to print more -+ * it has to reacquire the console first. -+ * -+ * When true is returned, @wctxt->ctxt.backlog indicates whether there are -+ * still records pending in the ringbuffer, -+ */ -+static bool cons_emit_record(struct cons_write_context *wctxt) -+{ -+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); -+ struct console *con = ctxt->console; -+ bool done = false; -+ -+ /* -+ * @con->dropped is not protected in case of hostile takeovers so -+ * the update below is racy. Annotate it accordingly. -+ */ -+ ctxt->dropped = data_race(READ_ONCE(con->dropped)); -+ -+ /* Fill the output buffer with the next record */ -+ ctxt->backlog = cons_get_record(wctxt); -+ if (!ctxt->backlog) -+ return true; -+ -+ /* Safety point. Don't touch state in case of takeover */ -+ if (!console_can_proceed(wctxt)) -+ return false; -+ -+ /* Counterpart to the read above */ -+ WRITE_ONCE(con->dropped, ctxt->dropped); -+ -+ /* -+ * In case of skipped records, Update sequence state in @con. -+ */ -+ if (!wctxt->outbuf) -+ goto update; -+ -+ /* Tell the driver about potential unsafe state */ -+ wctxt->unsafe = ctxt->state.unsafe; -+ -+ if (!ctxt->thread && con->write_atomic) { -+ done = con->write_atomic(con, wctxt); -+ } else if (ctxt->thread && con->write_thread) { -+ done = con->write_thread(con, wctxt); -+ } else { -+ cons_release(ctxt); -+ WARN_ON_ONCE(1); -+ return false; -+ } -+ -+ /* If not done, the write was aborted due to takeover */ -+ if (!done) -+ return false; -+ -+ /* If there was a dropped message, it has now been output. */ -+ if (ctxt->dropped) { -+ ctxt->dropped = 0; -+ /* Counterpart to the read above */ -+ WRITE_ONCE(con->dropped, ctxt->dropped); -+ } -+update: -+ ctxt->newseq++; -+ /* -+ * The sequence update attempt is not part of console_release() -+ * because in panic situations the console is not released by -+ * the panic CPU until all records are written. On 32bit the -+ * sequence is separate from state anyway. -+ */ -+ return cons_seq_try_update(ctxt); -+} -+ -+/** -+ * cons_kthread_should_wakeup - Check whether the printk thread should wakeup -+ * @con: Console to operate on -+ * @ctxt: The acquire context that contains the state -+ * at console_acquire() -+ * -+ * Returns: True if the thread should shutdown or if the console is allowed to -+ * print and a record is available. False otherwise -+ * -+ * After the thread wakes up, it must first check if it should shutdown before -+ * attempting any printing. -+ */ -+static bool cons_kthread_should_wakeup(struct console *con, struct cons_context *ctxt) -+{ -+ bool is_usable; -+ short flags; -+ int cookie; -+ -+ if (kthread_should_stop()) -+ return true; -+ -+ cookie = console_srcu_read_lock(); -+ flags = console_srcu_read_flags(con); -+ is_usable = console_is_usable(con, flags); -+ console_srcu_read_unlock(cookie); -+ -+ if (!is_usable) -+ return false; -+ -+ /* This reads state and sequence on 64bit. On 32bit only state */ -+ cons_state_read(con, CON_STATE_CUR, &ctxt->state); -+ -+ /* -+ * Atomic printing is running on some other CPU. The owner -+ * will wake the console thread on unlock if necessary. -+ */ -+ if (ctxt->state.locked) -+ return false; -+ -+ /* Bring the sequence in @ctxt up to date */ -+ cons_context_set_seq(ctxt); -+ -+ return prb_read_valid(prb, ctxt->oldseq, NULL); -+} -+ -+/** -+ * cons_kthread_func - The printk thread function -+ * @__console: Console to operate on -+ */ -+static int cons_kthread_func(void *__console) -+{ -+ struct console *con = __console; -+ struct cons_write_context wctxt = { -+ .ctxt.console = con, -+ .ctxt.prio = CONS_PRIO_NORMAL, -+ .ctxt.thread = 1, -+ }; -+ struct cons_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt); -+ unsigned long flags; -+ short con_flags; -+ bool backlog; -+ int cookie; -+ int ret; -+ -+ for (;;) { -+ atomic_inc(&con->kthread_waiting); -+ -+ /* -+ * Provides a full memory barrier vs. cons_kthread_wake(). -+ */ -+ ret = rcuwait_wait_event(&con->rcuwait, -+ cons_kthread_should_wakeup(con, ctxt), -+ TASK_INTERRUPTIBLE); -+ -+ atomic_dec(&con->kthread_waiting); -+ -+ if (kthread_should_stop()) -+ break; -+ -+ /* Wait was interrupted by a spurious signal, go back to sleep */ -+ if (ret) -+ continue; -+ -+ for (;;) { -+ cookie = console_srcu_read_lock(); -+ -+ /* -+ * Ensure this stays on the CPU to make handover and -+ * takeover possible. -+ */ -+ if (con->port_lock) -+ con->port_lock(con, true, &flags); -+ else -+ migrate_disable(); -+ -+ /* -+ * Try to acquire the console without attempting to -+ * take over. If an atomic printer wants to hand -+ * back to the thread it simply wakes it up. -+ */ -+ if (!cons_try_acquire(ctxt)) -+ break; -+ -+ con_flags = console_srcu_read_flags(con); -+ -+ if (console_is_usable(con, con_flags)) { -+ /* -+ * If the emit fails, this context is no -+ * longer the owner. Abort the processing and -+ * wait for new records to print. -+ */ -+ if (!cons_emit_record(&wctxt)) -+ break; -+ backlog = ctxt->backlog; -+ } else { -+ backlog = false; -+ } -+ -+ /* -+ * If the release fails, this context was not the -+ * owner. Abort the processing and wait for new -+ * records to print. -+ */ -+ if (!cons_release(ctxt)) -+ break; -+ -+ /* Backlog done? */ -+ if (!backlog) -+ break; -+ -+ if (con->port_lock) -+ con->port_lock(con, false, &flags); -+ else -+ migrate_enable(); -+ -+ console_srcu_read_unlock(cookie); -+ -+ cond_resched(); -+ } -+ if (con->port_lock) -+ con->port_lock(con, false, &flags); -+ else -+ migrate_enable(); -+ -+ console_srcu_read_unlock(cookie); -+ } -+ return 0; -+} -+ -+/** -+ * cons_irq_work - irq work to wake printk thread -+ * @irq_work: The irq work to operate on -+ */ -+static void cons_irq_work(struct irq_work *irq_work) -+{ -+ struct console *con = container_of(irq_work, struct console, irq_work); -+ -+ cons_kthread_wake(con); -+} -+ -+/** -+ * cons_wake_threads - Wake up printing threads -+ * -+ * A printing thread is only woken if it is within the @kthread_waiting -+ * block. If it is not within the block (or enters the block later), it -+ * will see any new records and continue printing on its own. -+ */ -+void cons_wake_threads(void) -+{ -+ struct console *con; -+ int cookie; -+ -+ cookie = console_srcu_read_lock(); -+ for_each_console_srcu(con) { -+ if (con->kthread && atomic_read(&con->kthread_waiting)) -+ irq_work_queue(&con->irq_work); -+ } -+ console_srcu_read_unlock(cookie); -+} -+ -+/** -+ * struct cons_cpu_state - Per CPU printk context state -+ * @prio: The current context priority level -+ * @nesting: Per priority nest counter -+ */ -+struct cons_cpu_state { -+ enum cons_prio prio; -+ int nesting[CONS_PRIO_MAX]; -+}; -+ -+static DEFINE_PER_CPU(struct cons_cpu_state, cons_pcpu_state); -+static struct cons_cpu_state early_cons_pcpu_state __initdata; -+ -+/** -+ * cons_get_cpu_state - Get the per CPU console state pointer -+ * -+ * Returns either a pointer to the per CPU state of the current CPU or to -+ * the init data state during early boot. -+ */ -+static __ref struct cons_cpu_state *cons_get_cpu_state(void) -+{ -+ if (!printk_percpu_data_ready()) -+ return &early_cons_pcpu_state; -+ -+ return this_cpu_ptr(&cons_pcpu_state); -+} -+ -+/** -+ * cons_get_wctxt - Get the write context for atomic printing -+ * @con: Console to operate on -+ * @prio: Priority of the context -+ * -+ * Returns either the per CPU context or the builtin context for -+ * early boot. -+ */ -+static __ref struct cons_write_context *cons_get_wctxt(struct console *con, -+ enum cons_prio prio) -+{ -+ if (!con->pcpu_data) -+ return &early_cons_ctxt_data.wctxt[prio]; -+ -+ return &this_cpu_ptr(con->pcpu_data)->wctxt[prio]; -+} -+ -+/** -+ * cons_atomic_try_acquire - Try to acquire the console for atomic printing -+ * @con: The console to acquire -+ * @ctxt: The console context instance to work on -+ * @prio: The priority of the current context -+ */ -+static bool cons_atomic_try_acquire(struct console *con, struct cons_context *ctxt, -+ enum cons_prio prio, bool skip_unsafe) -+{ -+ memset(ctxt, 0, sizeof(*ctxt)); -+ ctxt->console = con; -+ ctxt->spinwait_max_us = 2000; -+ ctxt->prio = prio; -+ ctxt->spinwait = 1; -+ -+ /* Try to acquire it directly or via a friendly handover */ -+ if (cons_try_acquire(ctxt)) -+ return true; -+ -+ /* Investigate whether a hostile takeover is due */ -+ if (ctxt->old_state.cur_prio >= prio) -+ return false; -+ -+ if (!ctxt->old_state.unsafe || !skip_unsafe) -+ ctxt->hostile = 1; -+ return cons_try_acquire(ctxt); -+} -+ -+/** -+ * cons_atomic_flush_con - Flush one console in atomic mode -+ * @wctxt: The write context struct to use for this context -+ * @con: The console to flush -+ * @prio: The priority of the current context -+ * @skip_unsafe: True, to avoid unsafe hostile takeovers -+ */ -+static void cons_atomic_flush_con(struct cons_write_context *wctxt, struct console *con, -+ enum cons_prio prio, bool skip_unsafe) -+{ -+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); -+ bool wake_thread = false; -+ short flags; -+ -+ if (!cons_atomic_try_acquire(con, ctxt, prio, skip_unsafe)) -+ return; -+ -+ do { -+ flags = console_srcu_read_flags(con); -+ -+ if (!console_is_usable(con, flags)) -+ break; -+ -+ /* -+ * For normal prio messages let the printer thread handle -+ * the printing if it is available. -+ */ -+ if (prio <= CONS_PRIO_NORMAL && con->kthread) { -+ wake_thread = true; -+ break; -+ } -+ -+ /* -+ * cons_emit_record() returns false when the console was -+ * handed over or taken over. In both cases the context is -+ * no longer valid. -+ */ -+ if (!cons_emit_record(wctxt)) -+ return; -+ } while (ctxt->backlog); -+ -+ cons_release(ctxt); -+ -+ if (wake_thread && atomic_read(&con->kthread_waiting)) -+ irq_work_queue(&con->irq_work); -+} -+ -+/** -+ * cons_atomic_flush - Flush consoles in atomic mode if required -+ * @printk_caller_wctxt: The write context struct to use for this -+ * context (for printk() context only) -+ * @skip_unsafe: True, to avoid unsafe hostile takeovers -+ */ -+void cons_atomic_flush(struct cons_write_context *printk_caller_wctxt, bool skip_unsafe) -+{ -+ struct cons_write_context *wctxt; -+ struct cons_cpu_state *cpu_state; -+ struct console *con; -+ short flags; -+ int cookie; -+ -+ cpu_state = cons_get_cpu_state(); -+ -+ /* -+ * When in an elevated priority, the printk() calls are not -+ * individually flushed. This is to allow the full output to -+ * be dumped to the ringbuffer before starting with printing -+ * the backlog. -+ */ -+ if (cpu_state->prio > CONS_PRIO_NORMAL && printk_caller_wctxt) -+ return; -+ -+ /* -+ * Let the outermost write of this priority print. This avoids -+ * nasty hackery for nested WARN() where the printing itself -+ * generates one. -+ * -+ * cpu_state->prio <= CONS_PRIO_NORMAL is not subject to nesting -+ * and can proceed in order to allow atomic printing when consoles -+ * do not have a printer thread. -+ */ -+ if (cpu_state->prio > CONS_PRIO_NORMAL && -+ cpu_state->nesting[cpu_state->prio] != 1) -+ return; -+ -+ cookie = console_srcu_read_lock(); -+ for_each_console_srcu(con) { -+ if (!con->write_atomic) -+ continue; -+ -+ flags = console_srcu_read_flags(con); -+ -+ if (!console_is_usable(con, flags)) -+ continue; -+ -+ if (cpu_state->prio > CONS_PRIO_NORMAL || !con->kthread) { -+ if (printk_caller_wctxt) -+ wctxt = printk_caller_wctxt; -+ else -+ wctxt = cons_get_wctxt(con, cpu_state->prio); -+ cons_atomic_flush_con(wctxt, con, cpu_state->prio, skip_unsafe); -+ } -+ } -+ console_srcu_read_unlock(cookie); -+} -+ -+/** -+ * cons_atomic_enter - Enter a context that enforces atomic printing -+ * @prio: Priority of the context -+ * -+ * Returns: The previous priority that needs to be fed into -+ * the corresponding cons_atomic_exit() -+ */ -+enum cons_prio cons_atomic_enter(enum cons_prio prio) -+{ -+ struct cons_cpu_state *cpu_state; -+ enum cons_prio prev_prio; -+ -+ migrate_disable(); -+ cpu_state = cons_get_cpu_state(); -+ -+ prev_prio = cpu_state->prio; -+ if (prev_prio < prio) -+ cpu_state->prio = prio; -+ -+ /* -+ * Increment the nesting on @cpu_state->prio so a WARN() -+ * nested into a panic printout does not attempt to -+ * scribble state. -+ */ -+ cpu_state->nesting[cpu_state->prio]++; -+ -+ return prev_prio; -+} -+ -+/** -+ * cons_atomic_exit - Exit a context that enforces atomic printing -+ * @prio: Priority of the context to leave -+ * @prev_prio: Priority of the previous context for restore -+ * -+ * @prev_prio is the priority returned by the corresponding cons_atomic_enter(). -+ */ -+void cons_atomic_exit(enum cons_prio prio, enum cons_prio prev_prio) -+{ -+ struct cons_cpu_state *cpu_state; -+ -+ cons_atomic_flush(NULL, true); -+ -+ cpu_state = cons_get_cpu_state(); -+ -+ if (cpu_state->prio == CONS_PRIO_PANIC) -+ cons_atomic_flush(NULL, false); -+ -+ /* -+ * Undo the nesting of cons_atomic_enter() at the CPU state -+ * priority. -+ */ -+ cpu_state->nesting[cpu_state->prio]--; -+ -+ /* -+ * Restore the previous priority, which was returned by -+ * cons_atomic_enter(). -+ */ -+ cpu_state->prio = prev_prio; -+ -+ migrate_enable(); -+} -+ -+/** -+ * cons_kthread_stop - Stop a printk thread -+ * @con: Console to operate on -+ */ -+static void cons_kthread_stop(struct console *con) -+{ -+ lockdep_assert_console_list_lock_held(); -+ -+ if (!con->kthread) -+ return; -+ -+ kthread_stop(con->kthread); -+ con->kthread = NULL; -+ -+ kfree(con->thread_pbufs); -+ con->thread_pbufs = NULL; -+} -+ -+/** -+ * cons_kthread_create - Create a printk thread -+ * @con: Console to operate on -+ * -+ * If it fails, let the console proceed. The atomic part might -+ * be usable and useful. -+ */ -+void cons_kthread_create(struct console *con) -+{ -+ struct task_struct *kt; -+ struct console *c; -+ -+ lockdep_assert_console_list_lock_held(); -+ -+ if (!(con->flags & CON_NO_BKL) || !con->write_thread) -+ return; -+ -+ if (!printk_threads_enabled || con->kthread) -+ return; -+ -+ /* -+ * Printer threads cannot be started as long as any boot console is -+ * registered because there is no way to synchronize the hardware -+ * registers between boot console code and regular console code. -+ */ -+ for_each_console(c) { -+ if (c->flags & CON_BOOT) -+ return; -+ } -+ have_boot_console = false; -+ -+ con->thread_pbufs = kmalloc(sizeof(*con->thread_pbufs), GFP_KERNEL); -+ if (!con->thread_pbufs) { -+ con_printk(KERN_ERR, con, "failed to allocate printing thread buffers\n"); -+ return; -+ } -+ -+ kt = kthread_run(cons_kthread_func, con, "pr/%s%d", con->name, con->index); -+ if (IS_ERR(kt)) { -+ con_printk(KERN_ERR, con, "failed to start printing thread\n"); -+ kfree(con->thread_pbufs); -+ con->thread_pbufs = NULL; -+ return; -+ } -+ -+ con->kthread = kt; -+ -+ /* -+ * It is important that console printing threads are scheduled -+ * shortly after a printk call and with generous runtime budgets. -+ */ -+ sched_set_normal(con->kthread, -20); -+} -+ -+static int __init printk_setup_threads(void) -+{ -+ struct console *con; -+ -+ if (printk_force_atomic) -+ return 0; -+ -+ console_list_lock(); -+ printk_threads_enabled = true; -+ for_each_console(con) -+ cons_kthread_create(con); -+ if (have_bkl_console) -+ console_bkl_kthread_create(); -+ console_list_unlock(); -+ return 0; -+} -+early_initcall(printk_setup_threads); -+ -+/** -+ * cons_nobkl_init - Initialize the NOBKL console specific data -+ * @con: Console to initialize -+ * -+ * Returns: True on success. False otherwise and the console cannot be used. -+ */ -+bool cons_nobkl_init(struct console *con) -+{ -+ struct cons_state state = { }; -+ -+ if (!cons_alloc_percpu_data(con)) -+ return false; -+ -+ rcuwait_init(&con->rcuwait); -+ atomic_set(&con->kthread_waiting, 0); -+ init_irq_work(&con->irq_work, cons_irq_work); -+ cons_state_set(con, CON_STATE_CUR, &state); -+ cons_state_set(con, CON_STATE_REQ, &state); -+ cons_seq_init(con); -+ cons_kthread_create(con); -+ return true; -+} -+ -+/** -+ * cons_nobkl_cleanup - Cleanup the NOBKL console specific data -+ * @con: Console to cleanup -+ */ -+void cons_nobkl_cleanup(struct console *con) -+{ -+ struct cons_state state = { }; -+ -+ cons_kthread_stop(con); -+ cons_state_set(con, CON_STATE_CUR, &state); -+ cons_state_set(con, CON_STATE_REQ, &state); -+ cons_free_percpu_data(con); -+} -+ -+/** -+ * printk_kthread_shutdown - shutdown all threaded printers -+ * -+ * On system shutdown all threaded printers are stopped. This allows printk -+ * to transition back to atomic printing, thus providing a robust mechanism -+ * for the final shutdown/reboot messages to be output. -+ */ -+static void printk_kthread_shutdown(void) -+{ -+ struct console *con; -+ -+ console_list_lock(); -+ for_each_console(con) { -+ if (con->flags & CON_NO_BKL) -+ cons_kthread_stop(con); -+ } -+ console_list_unlock(); -+} -+ -+static struct syscore_ops printk_syscore_ops = { -+ .shutdown = printk_kthread_shutdown, -+}; -+ -+static int __init printk_init_ops(void) -+{ -+ register_syscore_ops(&printk_syscore_ops); -+ return 0; -+} -+device_initcall(printk_init_ops); -diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c -index ef0f9a2044da1..5c1470bd60bcb 100644 ---- a/kernel/printk/printk_safe.c -+++ b/kernel/printk/printk_safe.c -@@ -12,18 +12,41 @@ - - #include "internal.h" - --static DEFINE_PER_CPU(int, printk_context); -+struct printk_context { -+ local_lock_t cpu; -+ int recursion; -+}; -+ -+static DEFINE_PER_CPU(struct printk_context, printk_context) = { -+ .cpu = INIT_LOCAL_LOCK(cpu), -+}; - - /* Can be preempted by NMI. */ --void __printk_safe_enter(void) -+void __printk_safe_enter(unsigned long *flags) - { -- this_cpu_inc(printk_context); -+ WARN_ON_ONCE(in_nmi()); -+ local_lock_irqsave(&printk_context.cpu, *flags); -+ this_cpu_inc(printk_context.recursion); - } - - /* Can be preempted by NMI. */ --void __printk_safe_exit(void) -+void __printk_safe_exit(unsigned long *flags) - { -- this_cpu_dec(printk_context); -+ WARN_ON_ONCE(in_nmi()); -+ this_cpu_dec(printk_context.recursion); -+ local_unlock_irqrestore(&printk_context.cpu, *flags); -+} -+ -+void __printk_deferred_enter(void) -+{ -+ WARN_ON_ONCE(!in_atomic()); -+ this_cpu_inc(printk_context.recursion); -+} -+ -+void __printk_deferred_exit(void) -+{ -+ WARN_ON_ONCE(!in_atomic()); -+ this_cpu_dec(printk_context.recursion); - } - - asmlinkage int vprintk(const char *fmt, va_list args) -@@ -38,13 +61,8 @@ asmlinkage int vprintk(const char *fmt, va_list args) - * Use the main logbuf even in NMI. But avoid calling console - * drivers that might have their own locks. - */ -- if (this_cpu_read(printk_context) || in_nmi()) { -- int len; -- -- len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args); -- defer_console_output(); -- return len; -- } -+ if (this_cpu_read(printk_context.recursion) || in_nmi()) -+ return vprintk_deferred(fmt, args); - - /* No obstacles. */ - return vprintk_default(fmt, args); -diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c -index 8e6c023212cb3..34f8adf7c0a39 100644 ---- a/kernel/rcu/rcutorture.c -+++ b/kernel/rcu/rcutorture.c -@@ -2407,6 +2407,12 @@ static int rcutorture_booster_init(unsigned int cpu) - WARN_ON_ONCE(!t); - sp.sched_priority = 2; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); -+#ifdef CONFIG_PREEMPT_RT -+ t = per_cpu(timersd, cpu); -+ WARN_ON_ONCE(!t); -+ sp.sched_priority = 2; -+ sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); -+#endif - } - - /* Don't allow time recalculation while creating a new task. */ -diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h -index b10b8349bb2a4..804306204d0d0 100644 ---- a/kernel/rcu/tree_stall.h -+++ b/kernel/rcu/tree_stall.h -@@ -8,6 +8,7 @@ - */ - - #include -+#include - - ////////////////////////////////////////////////////////////////////////////// - // -@@ -582,6 +583,7 @@ static void rcu_check_gp_kthread_expired_fqs_timer(void) - - static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) - { -+ enum cons_prio prev_prio; - int cpu; - unsigned long flags; - unsigned long gpa; -@@ -597,6 +599,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) - if (rcu_stall_is_suppressed()) - return; - -+ prev_prio = cons_atomic_enter(CONS_PRIO_EMERGENCY); -+ - /* - * OK, time to rat on our buddy... - * See Documentation/RCU/stallwarn.rst for info on how to debug -@@ -651,6 +655,8 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) - panic_on_rcu_stall(); - - rcu_force_quiescent_state(); /* Kick them all. */ -+ -+ cons_atomic_exit(CONS_PRIO_EMERGENCY, prev_prio); - } - - static void print_cpu_stall(unsigned long gps) -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 0d18c3969f904..a57a1a3beeba1 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -1042,6 +1042,46 @@ void resched_curr(struct rq *rq) - trace_sched_wake_idle_without_ipi(cpu); - } - -+#ifdef CONFIG_PREEMPT_LAZY -+ -+static int tsk_is_polling(struct task_struct *p) -+{ -+#ifdef TIF_POLLING_NRFLAG -+ return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); -+#else -+ return 0; -+#endif -+} -+ -+void resched_curr_lazy(struct rq *rq) -+{ -+ struct task_struct *curr = rq->curr; -+ int cpu; -+ -+ if (!sched_feat(PREEMPT_LAZY)) { -+ resched_curr(rq); -+ return; -+ } -+ -+ if (test_tsk_need_resched(curr)) -+ return; -+ -+ if (test_tsk_need_resched_lazy(curr)) -+ return; -+ -+ set_tsk_need_resched_lazy(curr); -+ -+ cpu = cpu_of(rq); -+ if (cpu == smp_processor_id()) -+ return; -+ -+ /* NEED_RESCHED_LAZY must be visible before we test polling */ -+ smp_mb(); -+ if (!tsk_is_polling(curr)) -+ smp_send_reschedule(cpu); -+} -+#endif -+ - void resched_cpu(int cpu) - { - struct rq *rq = cpu_rq(cpu); -@@ -2230,6 +2270,7 @@ void migrate_disable(void) - preempt_disable(); - this_rq()->nr_pinned++; - p->migration_disabled = 1; -+ preempt_lazy_disable(); - preempt_enable(); - } - EXPORT_SYMBOL_GPL(migrate_disable); -@@ -2265,6 +2306,7 @@ void migrate_enable(void) - barrier(); - p->migration_disabled = 0; - this_rq()->nr_pinned--; -+ preempt_lazy_enable(); - preempt_enable(); - } - EXPORT_SYMBOL_GPL(migrate_enable); -@@ -3318,6 +3360,76 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p, - } - #endif /* CONFIG_NUMA_BALANCING */ - -+#ifdef CONFIG_PREEMPT_RT -+ -+/* -+ * Consider: -+ * -+ * set_special_state(X); -+ * -+ * do_things() -+ * // Somewhere in there is an rtlock that can be contended: -+ * current_save_and_set_rtlock_wait_state(); -+ * [...] -+ * schedule_rtlock(); (A) -+ * [...] -+ * current_restore_rtlock_saved_state(); -+ * -+ * schedule(); (B) -+ * -+ * If p->saved_state is anything else than TASK_RUNNING, then p blocked on an -+ * rtlock (A) *before* voluntarily calling into schedule() (B) after setting its -+ * state to X. For things like ptrace (X=TASK_TRACED), the task could have more -+ * work to do upon acquiring the lock in do_things() before whoever called -+ * wait_task_inactive() should return. IOW, we have to wait for: -+ * -+ * p.saved_state = TASK_RUNNING -+ * p.__state = X -+ * -+ * which implies the task isn't blocked on an RT lock and got to schedule() (B). -+ * -+ * Also see comments in ttwu_state_match(). -+ */ -+ -+static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state) -+{ -+ unsigned long flags; -+ bool mismatch; -+ -+ raw_spin_lock_irqsave(&p->pi_lock, flags); -+ if (READ_ONCE(p->__state) & match_state) -+ mismatch = false; -+ else if (READ_ONCE(p->saved_state) & match_state) -+ mismatch = false; -+ else -+ mismatch = true; -+ -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+ return mismatch; -+} -+static __always_inline bool state_match(struct task_struct *p, unsigned int match_state, -+ bool *wait) -+{ -+ if (READ_ONCE(p->__state) & match_state) -+ return true; -+ if (READ_ONCE(p->saved_state) & match_state) { -+ *wait = true; -+ return true; -+ } -+ return false; -+} -+#else -+static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state) -+{ -+ return !(READ_ONCE(p->__state) & match_state); -+} -+static __always_inline bool state_match(struct task_struct *p, unsigned int match_state, -+ bool *wait) -+{ -+ return (READ_ONCE(p->__state) & match_state); -+} -+#endif -+ - /* - * wait_task_inactive - wait for a thread to unschedule. - * -@@ -3336,7 +3448,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p, - */ - unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) - { -- int running, queued; -+ bool running, wait; - struct rq_flags rf; - unsigned long ncsw; - struct rq *rq; -@@ -3362,7 +3474,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state - * is actually now running somewhere else! - */ - while (task_on_cpu(rq, p)) { -- if (!(READ_ONCE(p->__state) & match_state)) -+ if (state_mismatch(p, match_state)) - return 0; - cpu_relax(); - } -@@ -3375,9 +3487,10 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state - rq = task_rq_lock(p, &rf); - trace_sched_wait_task(p); - running = task_on_cpu(rq, p); -- queued = task_on_rq_queued(p); -+ wait = task_on_rq_queued(p); - ncsw = 0; -- if (READ_ONCE(p->__state) & match_state) -+ -+ if (state_match(p, match_state, &wait)) - ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ - task_rq_unlock(rq, p, &rf); - -@@ -3407,7 +3520,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state - * running right now), it's preempted, and we should - * yield - it could be a while. - */ -- if (unlikely(queued)) { -+ if (unlikely(wait)) { - ktime_t to = NSEC_PER_SEC / HZ; - - set_current_state(TASK_UNINTERRUPTIBLE); -@@ -4712,6 +4825,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) - p->on_cpu = 0; - #endif - init_task_preempt_count(p); -+#ifdef CONFIG_HAVE_PREEMPT_LAZY -+ task_thread_info(p)->preempt_lazy_count = 0; -+#endif - #ifdef CONFIG_SMP - plist_node_init(&p->pushable_tasks, MAX_PRIO); - RB_CLEAR_NODE(&p->pushable_dl_tasks); -@@ -6588,6 +6704,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) - - next = pick_next_task(rq, prev, &rf); - clear_tsk_need_resched(prev); -+ clear_tsk_need_resched_lazy(prev); - clear_preempt_need_resched(); - #ifdef CONFIG_SCHED_DEBUG - rq->last_seen_need_resched_ns = 0; -@@ -6648,14 +6765,11 @@ void __noreturn do_task_dead(void) - cpu_relax(); - } - --static inline void sched_submit_work(struct task_struct *tsk) -+void sched_submit_work(void) - { -- unsigned int task_flags; -+ struct task_struct *tsk = current; -+ unsigned int task_flags = tsk->flags; - -- if (task_is_running(tsk)) -- return; -- -- task_flags = tsk->flags; - /* - * If a worker goes to sleep, notify and ask workqueue whether it - * wants to wake up a task to maintain concurrency. -@@ -6681,8 +6795,10 @@ static inline void sched_submit_work(struct task_struct *tsk) - blk_flush_plug(tsk->plug, true); - } - --static void sched_update_worker(struct task_struct *tsk) -+void sched_resume_work(void) - { -+ struct task_struct *tsk = current; -+ - if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { - if (tsk->flags & PF_WQ_WORKER) - wq_worker_running(tsk); -@@ -6691,20 +6807,29 @@ static void sched_update_worker(struct task_struct *tsk) - } - } - --asmlinkage __visible void __sched schedule(void) -+static void schedule_loop(unsigned int sched_mode) - { -- struct task_struct *tsk = current; -- -- sched_submit_work(tsk); - do { - preempt_disable(); -- __schedule(SM_NONE); -+ __schedule(sched_mode); - sched_preempt_enable_no_resched(); - } while (need_resched()); -- sched_update_worker(tsk); -+} -+ -+asmlinkage __visible void __sched schedule(void) -+{ -+ if (!task_is_running(current)) -+ sched_submit_work(); -+ schedule_loop(SM_NONE); -+ sched_resume_work(); - } - EXPORT_SYMBOL(schedule); - -+void schedule_rtmutex(void) -+{ -+ schedule_loop(SM_NONE); -+} -+ - /* - * synchronize_rcu_tasks() makes sure that no task is stuck in preempted - * state (have scheduled out non-voluntarily) by making sure that all -@@ -6764,11 +6889,7 @@ void __sched schedule_preempt_disabled(void) - #ifdef CONFIG_PREEMPT_RT - void __sched notrace schedule_rtlock(void) - { -- do { -- preempt_disable(); -- __schedule(SM_RTLOCK_WAIT); -- sched_preempt_enable_no_resched(); -- } while (need_resched()); -+ schedule_loop(SM_RTLOCK_WAIT); - } - NOKPROBE_SYMBOL(schedule_rtlock); - #endif -@@ -6802,6 +6923,30 @@ static void __sched notrace preempt_schedule_common(void) - } while (need_resched()); - } - -+#ifdef CONFIG_PREEMPT_LAZY -+/* -+ * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is -+ * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as -+ * preempt_lazy_count counter >0. -+ */ -+static __always_inline int preemptible_lazy(void) -+{ -+ if (test_thread_flag(TIF_NEED_RESCHED)) -+ return 1; -+ if (current_thread_info()->preempt_lazy_count) -+ return 0; -+ return 1; -+} -+ -+#else -+ -+static inline int preemptible_lazy(void) -+{ -+ return 1; -+} -+ -+#endif -+ - #ifdef CONFIG_PREEMPTION - /* - * This is the entry point to schedule() from in-kernel preemption -@@ -6815,6 +6960,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) - */ - if (likely(!preemptible())) - return; -+ if (!preemptible_lazy()) -+ return; - preempt_schedule_common(); - } - NOKPROBE_SYMBOL(preempt_schedule); -@@ -6862,6 +7009,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) - if (likely(!preemptible())) - return; - -+ if (!preemptible_lazy()) -+ return; -+ - do { - /* - * Because the function tracer can trace preempt_count_sub() -@@ -9167,7 +9317,9 @@ void __init init_idle(struct task_struct *idle, int cpu) - - /* Set the preempt count _outside_ the spinlocks! */ - init_idle_preempt_count(idle, cpu); -- -+#ifdef CONFIG_HAVE_PREEMPT_LAZY -+ task_thread_info(idle)->preempt_lazy_count = 0; -+#endif - /* - * The idle tasks have their own, simple scheduling class: - */ -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index ed89be0aa6503..46ffbbfde97b0 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -4948,7 +4948,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) - - delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; - if (delta_exec > ideal_runtime) { -- resched_curr(rq_of(cfs_rq)); -+ resched_curr_lazy(rq_of(cfs_rq)); - /* - * The current task ran long enough, ensure it doesn't get - * re-elected due to buddy favours. -@@ -4972,7 +4972,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) - return; - - if (delta > ideal_runtime) -- resched_curr(rq_of(cfs_rq)); -+ resched_curr_lazy(rq_of(cfs_rq)); - } - - static void -@@ -5118,7 +5118,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) - * validating it and just reschedule. - */ - if (queued) { -- resched_curr(rq_of(cfs_rq)); -+ resched_curr_lazy(rq_of(cfs_rq)); - return; - } - /* -@@ -5267,7 +5267,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) - * hierarchy can be throttled - */ - if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) -- resched_curr(rq_of(cfs_rq)); -+ resched_curr_lazy(rq_of(cfs_rq)); - } - - static __always_inline -@@ -6142,7 +6142,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) - - if (delta < 0) { - if (task_current(rq, p)) -- resched_curr(rq); -+ resched_curr_lazy(rq); - return; - } - hrtick_start(rq, delta); -@@ -7871,7 +7871,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ - return; - - preempt: -- resched_curr(rq); -+ resched_curr_lazy(rq); - /* - * Only set the backward buddy when the current task is still - * on the rq. This can happen when a wakeup gets interleaved -@@ -12036,7 +12036,7 @@ static void task_fork_fair(struct task_struct *p) - * 'current' within the tree based on its new key value. - */ - swap(curr->vruntime, se->vruntime); -- resched_curr(rq); -+ resched_curr_lazy(rq); - } - - se->vruntime -= cfs_rq->min_vruntime; -@@ -12063,7 +12063,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) - */ - if (task_current(rq, p)) { - if (p->prio > oldprio) -- resched_curr(rq); -+ resched_curr_lazy(rq); - } else - check_preempt_curr(rq, p, 0); - } -diff --git a/kernel/sched/features.h b/kernel/sched/features.h -index ee7f23c76bd33..e13090e33f3c4 100644 ---- a/kernel/sched/features.h -+++ b/kernel/sched/features.h -@@ -48,6 +48,9 @@ SCHED_FEAT(NONTASK_CAPACITY, true) - - #ifdef CONFIG_PREEMPT_RT - SCHED_FEAT(TTWU_QUEUE, false) -+# ifdef CONFIG_PREEMPT_LAZY -+SCHED_FEAT(PREEMPT_LAZY, true) -+# endif - #else - - /* -diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index 3e8df6d31c1e3..6f272ef973675 100644 ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -2370,6 +2370,15 @@ extern void reweight_task(struct task_struct *p, int prio); - extern void resched_curr(struct rq *rq); - extern void resched_cpu(int cpu); - -+#ifdef CONFIG_PREEMPT_LAZY -+extern void resched_curr_lazy(struct rq *rq); -+#else -+static inline void resched_curr_lazy(struct rq *rq) -+{ -+ resched_curr(rq); -+} -+#endif -+ - extern struct rt_bandwidth def_rt_bandwidth; - extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); - extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); -diff --git a/kernel/signal.c b/kernel/signal.c -index 8cb28f1df2941..138d68cfc204d 100644 ---- a/kernel/signal.c -+++ b/kernel/signal.c -@@ -432,7 +432,18 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags, - return NULL; - - if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { -- q = kmem_cache_alloc(sigqueue_cachep, gfp_flags); -+ -+ if (!sigqueue_flags) { -+ struct sighand_struct *sighand = t->sighand; -+ -+ lockdep_assert_held(&sighand->siglock); -+ if (sighand->sigqueue_cache) { -+ q = sighand->sigqueue_cache; -+ sighand->sigqueue_cache = NULL; -+ } -+ } -+ if (!q) -+ q = kmem_cache_alloc(sigqueue_cachep, gfp_flags); - } else { - print_dropped_signal(sig); - } -@@ -447,14 +458,43 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags, - return q; - } - --static void __sigqueue_free(struct sigqueue *q) -+static bool sigqueue_cleanup_accounting(struct sigqueue *q) - { - if (q->flags & SIGQUEUE_PREALLOC) -- return; -+ return false; - if (q->ucounts) { - dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING); - q->ucounts = NULL; - } -+ return true; -+} -+ -+static void __sigqueue_free(struct sigqueue *q) -+{ -+ if (!sigqueue_cleanup_accounting(q)) -+ return; -+ kmem_cache_free(sigqueue_cachep, q); -+} -+ -+void sigqueue_free_cached_entry(struct sigqueue *q) -+{ -+ if (!q) -+ return; -+ kmem_cache_free(sigqueue_cachep, q); -+} -+ -+static void sigqueue_cache_or_free(struct sigqueue *q) -+{ -+ struct sighand_struct *sighand = current->sighand; -+ -+ if (!sigqueue_cleanup_accounting(q)) -+ return; -+ -+ lockdep_assert_held(&sighand->siglock); -+ if (!sighand->sigqueue_cache) { -+ sighand->sigqueue_cache = q; -+ return; -+ } - kmem_cache_free(sigqueue_cachep, q); - } - -@@ -594,7 +634,7 @@ static void collect_signal(int sig, struct sigpending *list, kernel_siginfo_t *i - (info->si_code == SI_TIMER) && - (info->si_sys_private); - -- __sigqueue_free(first); -+ sigqueue_cache_or_free(first); - } else { - /* - * Ok, it wasn't in the queue. This must be -@@ -2296,15 +2336,31 @@ static int ptrace_stop(int exit_code, int why, unsigned long message, - do_notify_parent_cldstop(current, false, why); - - /* -- * Don't want to allow preemption here, because -- * sys_ptrace() needs this task to be inactive. -+ * The previous do_notify_parent_cldstop() invocation woke ptracer. -+ * One a PREEMPTION kernel this can result in preemption requirement -+ * which will be fulfilled after read_unlock() and the ptracer will be -+ * put on the CPU. -+ * The ptracer is in wait_task_inactive(, __TASK_TRACED) waiting for -+ * this task wait in schedule(). If this task gets preempted then it -+ * remains enqueued on the runqueue. The ptracer will observe this and -+ * then sleep for a delay of one HZ tick. In the meantime this task -+ * gets scheduled, enters schedule() and will wait for the ptracer. - * -- * XXX: implement read_unlock_no_resched(). -+ * This preemption point is not bad from correctness point of view but -+ * extends the runtime by one HZ tick time due to the ptracer's sleep. -+ * The preempt-disable section ensures that there will be no preemption -+ * between unlock and schedule() and so improving the performance since -+ * the ptracer has no reason to sleep. -+ * -+ * This optimisation is not doable on PREEMPT_RT due to the spinlock_t -+ * within the preempt-disable section. - */ -- preempt_disable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ preempt_disable(); - read_unlock(&tasklist_lock); - cgroup_enter_frozen(); -- preempt_enable_no_resched(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ preempt_enable_no_resched(); - schedule(); - cgroup_leave_frozen(true); - -diff --git a/kernel/softirq.c b/kernel/softirq.c -index c8a6913c067d9..af9e879bbbf75 100644 ---- a/kernel/softirq.c -+++ b/kernel/softirq.c -@@ -80,21 +80,6 @@ static void wakeup_softirqd(void) - wake_up_process(tsk); - } - --/* -- * If ksoftirqd is scheduled, we do not want to process pending softirqs -- * right now. Let ksoftirqd handle this at its own rate, to get fairness, -- * unless we're doing some of the synchronous softirqs. -- */ --#define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ)) --static bool ksoftirqd_running(unsigned long pending) --{ -- struct task_struct *tsk = __this_cpu_read(ksoftirqd); -- -- if (pending & SOFTIRQ_NOW_MASK) -- return false; -- return tsk && task_is_running(tsk) && !__kthread_should_park(tsk); --} -- - #ifdef CONFIG_TRACE_IRQFLAGS - DEFINE_PER_CPU(int, hardirqs_enabled); - DEFINE_PER_CPU(int, hardirq_context); -@@ -236,7 +221,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) - goto out; - - pending = local_softirq_pending(); -- if (!pending || ksoftirqd_running(pending)) -+ if (!pending) - goto out; - - /* -@@ -432,9 +417,6 @@ static inline bool should_wake_ksoftirqd(void) - - static inline void invoke_softirq(void) - { -- if (ksoftirqd_running(local_softirq_pending())) -- return; -- - if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) { - #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK - /* -@@ -468,7 +450,7 @@ asmlinkage __visible void do_softirq(void) - - pending = local_softirq_pending(); - -- if (pending && !ksoftirqd_running(pending)) -+ if (pending) - do_softirq_own_stack(); - - local_irq_restore(flags); -@@ -637,6 +619,24 @@ static inline void tick_irq_exit(void) - #endif - } - -+#ifdef CONFIG_PREEMPT_RT -+DEFINE_PER_CPU(struct task_struct *, timersd); -+DEFINE_PER_CPU(unsigned long, pending_timer_softirq); -+ -+static void wake_timersd(void) -+{ -+ struct task_struct *tsk = __this_cpu_read(timersd); -+ -+ if (tsk) -+ wake_up_process(tsk); -+} -+ -+#else -+ -+static inline void wake_timersd(void) { } -+ -+#endif -+ - static inline void __irq_exit_rcu(void) - { - #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED -@@ -649,6 +649,10 @@ static inline void __irq_exit_rcu(void) - if (!in_interrupt() && local_softirq_pending()) - invoke_softirq(); - -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers() && -+ !(in_nmi() | in_hardirq())) -+ wake_timersd(); -+ - tick_irq_exit(); - } - -@@ -976,12 +980,70 @@ static struct smp_hotplug_thread softirq_threads = { - .thread_comm = "ksoftirqd/%u", - }; - -+#ifdef CONFIG_PREEMPT_RT -+static void timersd_setup(unsigned int cpu) -+{ -+ sched_set_fifo_low(current); -+} -+ -+static int timersd_should_run(unsigned int cpu) -+{ -+ return local_pending_timers(); -+} -+ -+static void run_timersd(unsigned int cpu) -+{ -+ unsigned int timer_si; -+ -+ ksoftirqd_run_begin(); -+ -+ timer_si = local_pending_timers(); -+ __this_cpu_write(pending_timer_softirq, 0); -+ or_softirq_pending(timer_si); -+ -+ __do_softirq(); -+ -+ ksoftirqd_run_end(); -+} -+ -+static void raise_ktimers_thread(unsigned int nr) -+{ -+ trace_softirq_raise(nr); -+ __this_cpu_or(pending_timer_softirq, 1 << nr); -+} -+ -+void raise_hrtimer_softirq(void) -+{ -+ raise_ktimers_thread(HRTIMER_SOFTIRQ); -+} -+ -+void raise_timer_softirq(void) -+{ -+ unsigned long flags; -+ -+ local_irq_save(flags); -+ raise_ktimers_thread(TIMER_SOFTIRQ); -+ wake_timersd(); -+ local_irq_restore(flags); -+} -+ -+static struct smp_hotplug_thread timer_threads = { -+ .store = &timersd, -+ .setup = timersd_setup, -+ .thread_should_run = timersd_should_run, -+ .thread_fn = run_timersd, -+ .thread_comm = "ktimers/%u", -+}; -+#endif -+ - static __init int spawn_ksoftirqd(void) - { - cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, - takeover_tasklets); - BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); -- -+#ifdef CONFIG_PREEMPT_RT -+ BUG_ON(smpboot_register_percpu_thread(&timer_threads)); -+#endif - return 0; - } - early_initcall(spawn_ksoftirqd); -diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c -index e8c08292defcb..10c1246cdba76 100644 ---- a/kernel/time/hrtimer.c -+++ b/kernel/time/hrtimer.c -@@ -1805,7 +1805,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) - if (!ktime_before(now, cpu_base->softirq_expires_next)) { - cpu_base->softirq_expires_next = KTIME_MAX; - cpu_base->softirq_activated = 1; -- raise_softirq_irqoff(HRTIMER_SOFTIRQ); -+ raise_hrtimer_softirq(); - } - - __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); -@@ -1918,7 +1918,7 @@ void hrtimer_run_queues(void) - if (!ktime_before(now, cpu_base->softirq_expires_next)) { - cpu_base->softirq_expires_next = KTIME_MAX; - cpu_base->softirq_activated = 1; -- raise_softirq_irqoff(HRTIMER_SOFTIRQ); -+ raise_hrtimer_softirq(); - } - - __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); -diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c -index 808a247205a9a..c0a32aab8f276 100644 ---- a/kernel/time/posix-timers.c -+++ b/kernel/time/posix-timers.c -@@ -140,25 +140,29 @@ static struct k_itimer *posix_timer_by_id(timer_t id) - static int posix_timer_add(struct k_itimer *timer) - { - struct signal_struct *sig = current->signal; -- int first_free_id = sig->posix_timer_id; - struct hlist_head *head; -- int ret = -ENOENT; -+ unsigned int start, id; - -- do { -+ /* Can be written by a different task concurrently in the loop below */ -+ start = READ_ONCE(sig->next_posix_timer_id); -+ -+ for (id = ~start; start != id; id++) { - spin_lock(&hash_lock); -- head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)]; -- if (!__posix_timers_find(head, sig, sig->posix_timer_id)) { -+ id = sig->next_posix_timer_id; -+ -+ /* Write the next ID back. Clamp it to the positive space */ -+ WRITE_ONCE(sig->next_posix_timer_id, (id + 1) & INT_MAX); -+ -+ head = &posix_timers_hashtable[hash(sig, id)]; -+ if (!__posix_timers_find(head, sig, id)) { - hlist_add_head_rcu(&timer->t_hash, head); -- ret = sig->posix_timer_id; -+ spin_unlock(&hash_lock); -+ return id; - } -- if (++sig->posix_timer_id < 0) -- sig->posix_timer_id = 0; -- if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT)) -- /* Loop over all possible ids completed */ -- ret = -EAGAIN; - spin_unlock(&hash_lock); -- } while (ret == -ENOENT); -- return ret; -+ } -+ /* POSIX return code when no timer ID could be allocated */ -+ return -EAGAIN; - } - - static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) -@@ -1037,27 +1041,59 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) - } - - /* -- * return timer owned by the process, used by exit_itimers -+ * Delete a timer if it is armed, remove it from the hash and schedule it -+ * for RCU freeing. - */ - static void itimer_delete(struct k_itimer *timer) - { --retry_delete: -- spin_lock_irq(&timer->it_lock); -+ unsigned long flags; - -+retry_delete: -+ /* -+ * irqsave is required to make timer_wait_running() work. -+ */ -+ spin_lock_irqsave(&timer->it_lock, flags); -+ -+ /* -+ * Even if the timer is not longer accessible from other tasks -+ * it still might be armed and queued in the underlying timer -+ * mechanism. Worse, that timer mechanism might run the expiry -+ * function concurrently. -+ */ - if (timer_delete_hook(timer) == TIMER_RETRY) { -- spin_unlock_irq(&timer->it_lock); -+ /* -+ * Timer is expired concurrently, prevent livelocks -+ * and pointless spinning on RT. -+ * -+ * The CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y case is -+ * irrelevant here because obviously the exiting task -+ * cannot be expiring timer in task work concurrently. -+ * Ditto for CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n as the -+ * tick interrupt cannot run on this CPU because the above -+ * spin_lock disabled interrupts. -+ * -+ * timer_wait_running() drops timer::it_lock, which opens -+ * the possibility for another task to delete the timer. -+ * -+ * That's not possible here because this is invoked from -+ * do_exit() only for the last thread of the thread group. -+ * So no other task can access that timer. -+ */ -+ if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer)) -+ return; -+ - goto retry_delete; - } - list_del(&timer->list); - -- spin_unlock_irq(&timer->it_lock); -+ spin_unlock_irqrestore(&timer->it_lock, flags); - release_posix_timer(timer, IT_ID_SET); - } - - /* -- * This is called by do_exit or de_thread, only when nobody else can -- * modify the signal->posix_timers list. Yet we need sighand->siglock -- * to prevent the race with /proc/pid/timers. -+ * Invoked from do_exit() when the last thread of a thread group exits. -+ * At that point no other task can access the timers of the dying -+ * task anymore. - */ - void exit_itimers(struct task_struct *tsk) - { -@@ -1067,10 +1103,12 @@ void exit_itimers(struct task_struct *tsk) - if (list_empty(&tsk->signal->posix_timers)) - return; - -+ /* Protect against concurrent read via /proc/$PID/timers */ - spin_lock_irq(&tsk->sighand->siglock); - list_replace_init(&tsk->signal->posix_timers, &timers); - spin_unlock_irq(&tsk->sighand->siglock); - -+ /* The timers are not longer accessible via tsk::signal */ - while (!list_empty(&timers)) { - tmr = list_first_entry(&timers, struct k_itimer, list); - itimer_delete(tmr); -diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c -index a46506f7ec6d0..1ae9e4e8a0715 100644 ---- a/kernel/time/tick-sched.c -+++ b/kernel/time/tick-sched.c -@@ -789,7 +789,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) - - static inline bool local_timer_softirq_pending(void) - { -- return local_softirq_pending() & BIT(TIMER_SOFTIRQ); -+ return local_pending_timers() & BIT(TIMER_SOFTIRQ); - } - - static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) -diff --git a/kernel/time/timer.c b/kernel/time/timer.c -index 63a8ce7177dd4..7cad6fe3c035c 100644 ---- a/kernel/time/timer.c -+++ b/kernel/time/timer.c -@@ -2054,7 +2054,7 @@ static void run_local_timers(void) - if (time_before(jiffies, base->next_expiry)) - return; - } -- raise_softirq(TIMER_SOFTIRQ); -+ raise_timer_softirq(); - } - - /* -diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c -index 5909aaf2f4c08..2867def70f441 100644 ---- a/kernel/trace/trace.c -+++ b/kernel/trace/trace.c -@@ -2694,11 +2694,19 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) - if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) - trace_flags |= TRACE_FLAG_BH_OFF; - -- if (tif_need_resched()) -+ if (tif_need_resched_now()) - trace_flags |= TRACE_FLAG_NEED_RESCHED; -+#ifdef CONFIG_PREEMPT_LAZY -+ /* Run out of bits. Share the LAZY and PREEMPT_RESCHED */ -+ if (need_resched_lazy()) -+ trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; -+#else - if (test_preempt_need_resched()) - trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; -- return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | -+#endif -+ -+ return (trace_flags << 24) | (min_t(unsigned int, pc & 0xff, 0xf)) | -+ (preempt_lazy_count() & 0xff) << 16 | - (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; - } - -@@ -4287,15 +4295,17 @@ unsigned long trace_total_entries(struct trace_array *tr) - - static void print_lat_help_header(struct seq_file *m) - { -- seq_puts(m, "# _------=> CPU# \n" -- "# / _-----=> irqs-off/BH-disabled\n" -- "# | / _----=> need-resched \n" -- "# || / _---=> hardirq/softirq \n" -- "# ||| / _--=> preempt-depth \n" -- "# |||| / _-=> migrate-disable \n" -- "# ||||| / delay \n" -- "# cmd pid |||||| time | caller \n" -- "# \\ / |||||| \\ | / \n"); -+ seq_puts(m, "# _--------=> CPU# \n" -+ "# / _-------=> irqs-off/BH-disabled\n" -+ "# | / _------=> need-resched \n" -+ "# || / _-----=> need-resched-lazy\n" -+ "# ||| / _----=> hardirq/softirq \n" -+ "# |||| / _---=> preempt-depth \n" -+ "# ||||| / _--=> preempt-lazy-depth\n" -+ "# |||||| / _-=> migrate-disable \n" -+ "# ||||||| / delay \n" -+ "# cmd pid |||||||| time | caller \n" -+ "# \\ / |||||||| \\ | / \n"); - } - - static void print_event_info(struct array_buffer *buf, struct seq_file *m) -@@ -4329,14 +4339,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file - - print_event_info(buf, m); - -- seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); -- seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); -- seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); -- seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); -- seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); -- seq_printf(m, "# %.*s|||| / delay\n", prec, space); -- seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); -- seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); -+ seq_printf(m, "# %.*s _-------=> irqs-off/BH-disabled\n", prec, space); -+ seq_printf(m, "# %.*s / _------=> need-resched\n", prec, space); -+ seq_printf(m, "# %.*s| / _-----=> need-resched-lazy\n", prec, space); -+ seq_printf(m, "# %.*s|| / _----=> hardirq/softirq\n", prec, space); -+ seq_printf(m, "# %.*s||| / _---=> preempt-depth\n", prec, space); -+ seq_printf(m, "# %.*s|||| / _--=> preempt-lazy-depth\n", prec, space); -+ seq_printf(m, "# %.*s||||| / _-=> migrate-disable\n", prec, space); -+ seq_printf(m, "# %.*s|||||| / delay\n", prec, space); -+ seq_printf(m, "# TASK-PID %.*s CPU# ||||||| TIMESTAMP FUNCTION\n", prec, " TGID "); -+ seq_printf(m, "# | | %.*s | ||||||| | |\n", prec, " | "); - } - - void -diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c -index 654ffa40457aa..b2d52f8355b70 100644 ---- a/kernel/trace/trace_events.c -+++ b/kernel/trace/trace_events.c -@@ -208,6 +208,7 @@ static int trace_define_common_fields(void) - /* Holds both preempt_count and migrate_disable */ - __common_field(unsigned char, preempt_count); - __common_field(int, pid); -+ __common_field(unsigned char, preempt_lazy_count); - - return ret; - } -diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c -index bd475a00f96d1..89d4a3bfdc6d5 100644 ---- a/kernel/trace/trace_output.c -+++ b/kernel/trace/trace_output.c -@@ -442,6 +442,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) - { - char hardsoft_irq; - char need_resched; -+ char need_resched_lazy; - char irqs_off; - int hardirq; - int softirq; -@@ -462,20 +463,27 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) - - switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | - TRACE_FLAG_PREEMPT_RESCHED)) { -+#ifndef CONFIG_PREEMPT_LAZY - case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED: - need_resched = 'N'; - break; -+#endif - case TRACE_FLAG_NEED_RESCHED: - need_resched = 'n'; - break; -+#ifndef CONFIG_PREEMPT_LAZY - case TRACE_FLAG_PREEMPT_RESCHED: - need_resched = 'p'; - break; -+#endif - default: - need_resched = '.'; - break; - } - -+ need_resched_lazy = -+ (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.'; -+ - hardsoft_irq = - (nmi && hardirq) ? 'Z' : - nmi ? 'z' : -@@ -484,14 +492,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) - softirq ? 's' : - '.' ; - -- trace_seq_printf(s, "%c%c%c", -- irqs_off, need_resched, hardsoft_irq); -+ trace_seq_printf(s, "%c%c%c%c", -+ irqs_off, need_resched, need_resched_lazy, -+ hardsoft_irq); - - if (entry->preempt_count & 0xf) - trace_seq_printf(s, "%x", entry->preempt_count & 0xf); - else - trace_seq_putc(s, '.'); - -+ if (entry->preempt_lazy_count) -+ trace_seq_printf(s, "%x", entry->preempt_lazy_count); -+ else -+ trace_seq_putc(s, '.'); -+ - if (entry->preempt_count & 0xf0) - trace_seq_printf(s, "%x", entry->preempt_count >> 4); - else -diff --git a/localversion-rt b/localversion-rt -new file mode 100644 -index 0000000000000..18777ec0c27d4 ---- /dev/null -+++ b/localversion-rt -@@ -0,0 +1 @@ -+-rt15 -diff --git a/net/core/dev.c b/net/core/dev.c -index 1488f700bf819..8c3b70160be8c 100644 ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -4573,15 +4573,6 @@ static void rps_trigger_softirq(void *data) - - #endif /* CONFIG_RPS */ - --/* Called from hardirq (IPI) context */ --static void trigger_rx_softirq(void *data) --{ -- struct softnet_data *sd = data; -- -- __raise_softirq_irqoff(NET_RX_SOFTIRQ); -- smp_store_release(&sd->defer_ipi_scheduled, 0); --} -- - /* - * Check if this softnet_data structure is another cpu one - * If yes, queue it to our IPI list and return 1 -@@ -6632,6 +6623,30 @@ static void skb_defer_free_flush(struct softnet_data *sd) - } - } - -+#ifndef CONFIG_PREEMPT_RT -+/* Called from hardirq (IPI) context */ -+static void trigger_rx_softirq(void *data) -+{ -+ struct softnet_data *sd = data; -+ -+ __raise_softirq_irqoff(NET_RX_SOFTIRQ); -+ smp_store_release(&sd->defer_ipi_scheduled, 0); -+} -+ -+#else -+ -+static void trigger_rx_softirq(struct work_struct *defer_work) -+{ -+ struct softnet_data *sd; -+ -+ sd = container_of(defer_work, struct softnet_data, defer_work); -+ smp_store_release(&sd->defer_ipi_scheduled, 0); -+ local_bh_disable(); -+ skb_defer_free_flush(sd); -+ local_bh_enable(); -+} -+#endif -+ - static __latent_entropy void net_rx_action(struct softirq_action *h) - { - struct softnet_data *sd = this_cpu_ptr(&softnet_data); -@@ -11409,7 +11424,11 @@ static int __init net_dev_init(void) - INIT_CSD(&sd->csd, rps_trigger_softirq, sd); - sd->cpu = i; - #endif -+#ifndef CONFIG_PREEMPT_RT - INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd); -+#else -+ INIT_WORK(&sd->defer_work, trigger_rx_softirq); -+#endif - spin_lock_init(&sd->defer_lock); - - init_gro_hash(&sd->backlog); -diff --git a/net/core/skbuff.c b/net/core/skbuff.c -index 14bb41aafee30..3f8dac23205c6 100644 ---- a/net/core/skbuff.c -+++ b/net/core/skbuff.c -@@ -6856,6 +6856,11 @@ nodefer: __kfree_skb(skb); - /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU - * if we are unlucky enough (this seems very unlikely). - */ -- if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) -+ if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) { -+#ifndef CONFIG_PREEMPT_RT - smp_call_function_single_async(cpu, &sd->defer_csd); -+#else -+ schedule_work_on(cpu, &sd->defer_work); -+#endif -+ } - } diff --git a/packages/sysutils/system-utils/sources/scripts/volume_sense b/packages/sysutils/system-utils/sources/scripts/volume_sense index d3bb51a6f..0731213b5 100755 --- a/packages/sysutils/system-utils/sources/scripts/volume_sense +++ b/packages/sysutils/system-utils/sources/scripts/volume_sense @@ -24,7 +24,8 @@ VOL_EVENT='*(KEY_VOLUME*, value *' # This matches all volume events VOL_UP='*UP), value *' # Differentiate 'up' volume event VOL_DOWN='*DOWN), value *' #Differentiate 'down' volume event -V_FUNC_KEY_EVENT='*('${DEVICE_VOL_MODIFIER}'), value *' +V_FUNC_KEYA_EVENT='*('${DEVICE_FUNC_KEYA_MODIFIER}'), value *' +V_FUNC_KEYB_EVENT='*('${DEVICE_FUNC_KEYB_MODIFIER}'), value *' # Matches if a button was pressed (1), released (0) or held down (2) PRESS='value [1-9]' @@ -37,7 +38,8 @@ VOLUME_REPEAT_MOD=4 BRIGHTNESS_REPEAT_MOD=4 # Variable to keep track of Fn being currently pressed -FUNC_PRESSED=no +FUNCA_PRESSED=no +FUNCB_PRESSED=no CONTROLLER_DISCONNECTED="*error reading: No such device" DEVICE_DISCONNECTED="*error reading: No such device" @@ -122,15 +124,18 @@ mkcontroller 2>/dev/null ||: REPEAT_NUM=0 continue fi - # Setup for 'brightness' if Fn pressed - if [[ "${FUNC_PRESSED}" == "yes" ]]; then - COMMAND=/usr/bin/brightness + if [[ "${FUNCA_PRESSED}" == "yes" ]]; then + COMMAND="/usr/bin/brightness" UP="up" DOWN="down" REPEAT_MOD=${BRIGHTNESS_REPEAT_MOD} - # Default to 'volume' if Fn is not pressed + elif [[ "${FUNCB_PRESSED}" == "yes" ]]; then + COMMAND="/usr/bin/say" + UP="-b" + DOWN="-b" else + # Default to 'volume' if Fn a or b is not pressed COMMAND="/usr/bin/system_utils vol" UP="+" DOWN="-" @@ -152,21 +157,36 @@ mkcontroller 2>/dev/null ||: # Run the commands to adjust volume/brightness if [[ "${line}" == ${VOL_UP} ]]; then ${COMMAND} ${UP} ${INCREMENT_AMOUNT} > /dev/null + read -t 0.1 elif [[ "${line}" == ${VOL_DOWN} ]]; then ${COMMAND} ${DOWN} ${INCREMENT_AMOUNT} > /dev/null + read -t 0.1 fi ;; - (${V_FUNC_KEY_EVENT}) + (${V_FUNC_KEYA_EVENT}) #Reset the number of repeats when Fn is pressed/release # as repeat speed is different between volume/brightness REPEAT_NUM=0 if [[ "${line}" =~ ${PRESS} ]]; then - FUNC_PRESSED=yes + FUNCA_PRESSED=yes elif [[ "${line}" =~ ${RELEASE} ]]; then - FUNC_PRESSED=no + FUNCA_PRESSED=no + fi + ;; + + (${V_FUNC_KEYB_EVENT}) + + #Reset the number of repeats when Fn is pressed/release + # as repeat speed is different between volume/brightness + REPEAT_NUM=0 + + if [[ "${line}" =~ ${PRESS} ]]; then + FUNCB_PRESSED=yes + elif [[ "${line}" =~ ${RELEASE} ]]; then + FUNCB_PRESSED=no fi ;; esac diff --git a/packages/tools/jslisten/config/jslisten_hotkeys b/packages/tools/jslisten/config/jslisten_hotkeys index 004151934..6ec3296fd 100644 --- a/packages/tools/jslisten/config/jslisten_hotkeys +++ b/packages/tools/jslisten/config/jslisten_hotkeys @@ -19,6 +19,3 @@ BTN_BACK60S=${DEVICE_BTN_TL2} BTN_KILL_HOTKEY=${DEVICE_BTN_TL} BTN_KILL_SELECTA=${DEVICE_BTN_SELECT} BTN_KILL_SELECTB=${DEVICE_BTN_START} -BTN_BAT_TL2=${DEVICE_BTN_TL2} -BTN_BAT_TR2=${DEVICE_BTN_TR2} -BTN_BAT_HOTKEY=${DEVICE_BTN_SELECT} diff --git a/packages/tools/jslisten/config/jslisten_profile b/packages/tools/jslisten/config/jslisten_profile index f186525ee..1fe910579 100644 --- a/packages/tools/jslisten/config/jslisten_profile +++ b/packages/tools/jslisten/config/jslisten_profile @@ -59,11 +59,6 @@ button1=${BTN_KILL_HOTKEY} button2=${BTN_KILL_SELECTA} button3=${BTN_KILL_SELECTB} -[BatteryPercent] -program="/usr/bin/say -b" -button1=${BTN_BAT_HOTKEY} -button2=${BTN_BAT_TL2} -button3=${BTN_BAT_TR2} EOF systemctl start jslisten elif [ "$1" == "stop" ] diff --git a/projects/PC/devices/AMD64/device.config b/projects/PC/devices/AMD64/device.config index df1a70bcf..59f8266f6 100644 --- a/projects/PC/devices/AMD64/device.config +++ b/projects/PC/devices/AMD64/device.config @@ -2,7 +2,7 @@ DEVICE_VOLUMECTL=true DEVICE_AUDIO_MIXER="Master" DEVICE_PLAYBACK_PATH_SPK="SPK" DEVICE_PLAYBACK_PATH_HP="HP" -DEVICE_VOL_MODIFIER="ABS_RZ" +DEVICE_FUNC_KEYA_MODIFIER="ABS_RZ" UI_SERVICE="weston.service" DEVICE_BRIGHTNESS="hardware"