diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 11ecf09aadc86..98aa5a478719c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -33,6 +33,7 @@ config ARM select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE + select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_MEMTEST @@ -70,7 +71,7 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL @@ -113,6 +114,7 @@ config ARM select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_LAZY select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ @@ -128,6 +130,7 @@ config ARM select OLD_SIGSUSPEND3 select PCI_SYSCALL if PCI select PERF_USE_VMALLOC + select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM select RTC_LIB select SYS_SUPPORTS_APM_EMULATION select THREAD_INFO_IN_TASK diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index aecc403b28804..1b56e56f8f415 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -62,6 +62,7 @@ struct cpu_context_save { struct thread_info { unsigned long flags; /* low level flags */ int preempt_count; /* 0 => preemptable, <0 => bug */ + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ __u32 cpu; /* cpu */ __u32 cpu_domain; /* cpu domain */ struct cpu_context_save cpu_context; /* cpu context */ @@ -133,6 +134,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_SECCOMP 7 /* seccomp syscall filtering active */ #define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */ +#define TIF_NEED_RESCHED_LAZY 9 #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ @@ -147,6 +149,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) /* Checks for any syscall work in entry-common.S */ @@ -156,7 +159,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, /* * Change these and you break ASM code in entry-common.S */ -#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ +#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ + _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ _TIF_NOTIFY_SIGNAL) diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 2c8d76fd7c662..c3bdec7d2df9c 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -43,6 +43,7 @@ int main(void) BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain)); DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context)); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index c39303e5c2347..cfb4660e9feab 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -222,11 +222,18 @@ ENDPROC(__dabt_svc) #ifdef CONFIG_PREEMPTION ldr r8, [tsk, #TI_PREEMPT] @ get preempt count - ldr r0, [tsk, #TI_FLAGS] @ get flags teq r8, #0 @ if preempt count != 0 + bne 1f @ return from exeption + ldr r0, [tsk, #TI_FLAGS] @ get flags + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set + blne svc_preempt @ preempt! + + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count + teq r8, #0 @ if preempt lazy count != 0 movne r0, #0 @ force flags to 0 - tst r0, #_TIF_NEED_RESCHED + tst r0, #_TIF_NEED_RESCHED_LAZY blne svc_preempt +1: #endif svc_exit r5, irq = 1 @ return from exception @@ -241,8 +248,14 @@ ENDPROC(__irq_svc) 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED + bne 1b + tst r0, #_TIF_NEED_RESCHED_LAZY reteq r8 @ go again - b 1b + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count + teq r0, #0 @ if preempt lazy count != 0 + beq 1b + ret r8 @ go again + #endif __und_fault: diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index ea128e32e8ca8..3671a4214d6f4 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -607,7 +607,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) */ trace_hardirqs_off(); do { - if (likely(thread_flags & _TIF_NEED_RESCHED)) { + if (likely(thread_flags & (_TIF_NEED_RESCHED | + _TIF_NEED_RESCHED_LAZY))) { schedule(); } else { if (unlikely(!user_mode(regs))) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 46cccd6bf705a..480a1976a9dce 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -421,6 +421,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); + if (interrupts_enabled(regs)) + local_irq_enable(); + if (user_mode(regs)) goto bad_area; @@ -491,6 +494,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { + if (interrupts_enabled(regs)) + local_irq_enable(); + do_bad_area(addr, fsr, regs); return 0; } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3795eb5ba1cdd..6922949e61b71 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -93,6 +93,7 @@ config ARM64 select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_SUPPORTS_PAGE_TABLE_CHECK + select ARCH_SUPPORTS_RT select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT @@ -200,6 +201,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_PREEMPT_LAZY select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_FUNCTION_ARG_ACCESS_API select MMU_GATHER_RCU_TABLE_FREE diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h index 0159b625cc7f0..a5486918e5eeb 100644 --- a/arch/arm64/include/asm/preempt.h +++ b/arch/arm64/include/asm/preempt.h @@ -71,13 +71,36 @@ static inline bool __preempt_count_dec_and_test(void) * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE * pair. */ - return !pc || !READ_ONCE(ti->preempt_count); + if (!pc || !READ_ONCE(ti->preempt_count)) + return true; +#ifdef CONFIG_PREEMPT_LAZY + if ((pc & ~PREEMPT_NEED_RESCHED)) + return false; + if (current_thread_info()->preempt_lazy_count) + return false; + return test_thread_flag(TIF_NEED_RESCHED_LAZY); +#else + return false; +#endif } static inline bool should_resched(int preempt_offset) { +#ifdef CONFIG_PREEMPT_LAZY + u64 pc = READ_ONCE(current_thread_info()->preempt_count); + if (pc == preempt_offset) + return true; + + if ((pc & ~PREEMPT_NEED_RESCHED) != preempt_offset) + return false; + + if (current_thread_info()->preempt_lazy_count) + return false; + return test_thread_flag(TIF_NEED_RESCHED_LAZY); +#else u64 pc = READ_ONCE(current_thread_info()->preempt_count); return pc == preempt_offset; +#endif } #ifdef CONFIG_PREEMPTION diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 848739c15de82..4b7148fd5551f 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -26,6 +26,7 @@ struct thread_info { #ifdef CONFIG_ARM64_SW_TTBR0_PAN u64 ttbr0; /* saved TTBR0_EL1 */ #endif + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ union { u64 preempt_count; /* 0 => preemptible, <0 => bug */ struct { @@ -68,6 +69,7 @@ int arch_dup_task_struct(struct task_struct *dst, #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ #define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */ #define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */ +#define TIF_NEED_RESCHED_LAZY 7 #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ @@ -100,8 +102,10 @@ int arch_dup_task_struct(struct task_struct *dst, #define _TIF_SVE (1 << TIF_SVE) #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) -#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ +#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ + _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ _TIF_NOTIFY_SIGNAL) @@ -110,6 +114,8 @@ int arch_dup_task_struct(struct task_struct *dst, _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ _TIF_SYSCALL_EMU) +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) + #ifdef CONFIG_SHADOW_CALL_STACK #define INIT_SCS \ .scs_base = init_shadow_call_stack, \ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 1197e7679882e..e74c0415f67ea 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -32,6 +32,7 @@ int main(void) DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu)); DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); + DEFINE(TSK_TI_PREEMPT_LAZY, offsetof(struct task_struct, thread_info.preempt_lazy_count)); #ifdef CONFIG_ARM64_SW_TTBR0_PAN DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); #endif diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 9ad911f1647c8..545c41a84411e 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -1103,7 +1103,7 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) { do { - if (thread_flags & _TIF_NEED_RESCHED) { + if (thread_flags & _TIF_NEED_RESCHED_MASK) { /* Unmask Debug and SError for the next task */ local_daif_restore(DAIF_PROCCTX_NOIRQ); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index cbe7bb029aec8..ad5bcc255f4e3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -149,6 +149,7 @@ config PPC select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x + select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_USE_MEMTEST @@ -241,8 +242,10 @@ config PPC select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_LAZY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE + select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM select HAVE_RSEQ select HAVE_SETUP_PER_CPU_AREA if PPC64 select HAVE_SOFTIRQ_ON_OWN_STACK diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h index 1c8460e235838..b1653c160bab9 100644 --- a/arch/powerpc/include/asm/stackprotector.h +++ b/arch/powerpc/include/asm/stackprotector.h @@ -24,7 +24,11 @@ static __always_inline void boot_init_stack_canary(void) unsigned long canary; /* Try to get a semi random initial value. */ +#ifdef CONFIG_PREEMPT_RT + canary = (unsigned long)&canary; +#else canary = get_random_canary(); +#endif canary ^= mftb(); canary ^= LINUX_VERSION_CODE; canary &= CANARY_MASK; diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index af58f1ed3952e..520864de8bb27 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -53,6 +53,8 @@ struct thread_info { int preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_lazy_count; /* 0 => preemptable, + <0 => BUG */ #ifdef CONFIG_SMP unsigned int cpu; #endif @@ -77,6 +79,7 @@ struct thread_info { #define INIT_THREAD_INFO(tsk) \ { \ .preempt_count = INIT_PREEMPT_COUNT, \ + .preempt_lazy_count = 0, \ .flags = 0, \ } @@ -102,6 +105,7 @@ void arch_setup_new_exec(void); #define TIF_PATCH_PENDING 6 /* pending live patching update */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ #define TIF_SECCOMP 10 /* secure computing */ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_NOERROR 12 /* Force successful syscall return */ @@ -117,6 +121,7 @@ void arch_setup_new_exec(void); #define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 20 /* 32 bit binary */ + /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<msr & MSR_EE)); again: - if (IS_ENABLED(CONFIG_PREEMPT)) { + if (IS_ENABLED(CONFIG_PREEMPTION)) { /* Return to preemptible kernel context */ if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) { if (preempt_count() == 0) preempt_schedule_irq(); + } else if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED_LAZY)) { + if ((preempt_count() == 0) && + (current_thread_info()->preempt_lazy_count == 0)) + preempt_schedule_irq(); } } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index dadfcef5d6db4..3bfe55d82b042 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -260,12 +260,17 @@ static char *get_mmu_str(void) static int __die(const char *str, struct pt_regs *regs, long err) { + const char *pr = ""; + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); + if (IS_ENABLED(CONFIG_PREEMPTION)) + pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT"; + printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n", IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE", PAGE_SIZE / 1024, get_mmu_str(), - IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", + pr, IS_ENABLED(CONFIG_SMP) ? " SMP" : "", IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "", debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index dcb398d5e0093..2cfa432afdb12 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -221,6 +221,7 @@ config KVM_E500MC config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 + depends on !PREEMPT_RT select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 561adac690229..61c4c0610aa6a 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -195,7 +196,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, return ret; } -static DEFINE_PER_CPU(__be64 *, tce_page); +struct tce_page { + __be64 * page; + local_lock_t lock; +}; +static DEFINE_PER_CPU(struct tce_page, tce_page) = { + .lock = INIT_LOCAL_LOCK(lock), +}; static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, @@ -218,9 +225,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, direction, attrs); } - local_irq_save(flags); /* to protect tcep and the page behind it */ + /* to protect tcep and the page behind it */ + local_lock_irqsave(&tce_page.lock, flags); - tcep = __this_cpu_read(tce_page); + tcep = __this_cpu_read(tce_page.page); /* This is safe to do since interrupts are off when we're called * from iommu_alloc{,_sg}() @@ -229,12 +237,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, tcep = (__be64 *)__get_free_page(GFP_ATOMIC); /* If allocation fails, fall back to the loop implementation */ if (!tcep) { - local_irq_restore(flags); + local_unlock_irqrestore(&tce_page.lock, flags); return tce_build_pSeriesLP(tbl->it_index, tcenum, tceshift, npages, uaddr, direction, attrs); } - __this_cpu_write(tce_page, tcep); + __this_cpu_write(tce_page.page, tcep); } rpn = __pa(uaddr) >> tceshift; @@ -264,7 +272,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, tcenum += limit; } while (npages > 0 && !rc); - local_irq_restore(flags); + local_unlock_irqrestore(&tce_page.lock, flags); if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { ret = (int)rc; @@ -440,16 +448,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, DMA_BIDIRECTIONAL, 0); } - local_irq_disable(); /* to protect tcep and the page behind it */ - tcep = __this_cpu_read(tce_page); + /* to protect tcep and the page behind it */ + local_lock_irq(&tce_page.lock); + tcep = __this_cpu_read(tce_page.page); if (!tcep) { tcep = (__be64 *)__get_free_page(GFP_ATOMIC); if (!tcep) { - local_irq_enable(); + local_unlock_irq(&tce_page.lock); return -ENOMEM; } - __this_cpu_write(tce_page, tcep); + __this_cpu_write(tce_page.page, tcep); } proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; @@ -492,7 +501,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, /* error cleanup: caller will clear whole range */ - local_irq_enable(); + local_unlock_irq(&tce_page.lock); return rc; } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 159c025ebb03e..4d62ceece1bb0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -109,6 +109,7 @@ config X86 select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096 select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_LTO_CLANG_THIN + select ARCH_SUPPORTS_RT select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS @@ -243,6 +244,7 @@ config X86 select HAVE_PCI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_LAZY select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT select MMU_GATHER_MERGE_VMAS select HAVE_POSIX_CPU_TIMERS_TASK_WORK diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 5f6daea1ee248..cd20b4a5719a4 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -90,17 +90,48 @@ static __always_inline void __preempt_count_sub(int val) * a decrement which hits zero means we have no preempt_count and should * reschedule. */ -static __always_inline bool __preempt_count_dec_and_test(void) +static __always_inline bool ____preempt_count_dec_and_test(void) { return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var])); } +static __always_inline bool __preempt_count_dec_and_test(void) +{ + if (____preempt_count_dec_and_test()) + return true; +#ifdef CONFIG_PREEMPT_LAZY + if (preempt_count()) + return false; + if (current_thread_info()->preempt_lazy_count) + return false; + return test_thread_flag(TIF_NEED_RESCHED_LAZY); +#else + return false; +#endif +} + /* * Returns true when we need to resched and can (barring IRQ state). */ static __always_inline bool should_resched(int preempt_offset) { +#ifdef CONFIG_PREEMPT_LAZY + u32 tmp; + tmp = raw_cpu_read_4(__preempt_count); + if (tmp == preempt_offset) + return true; + + /* preempt count == 0 ? */ + tmp &= ~PREEMPT_NEED_RESCHED; + if (tmp != preempt_offset) + return false; + /* XXX PREEMPT_LOCK_OFFSET */ + if (current_thread_info()->preempt_lazy_count) + return false; + return test_thread_flag(TIF_NEED_RESCHED_LAZY); +#else return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); +#endif } #ifdef CONFIG_PREEMPTION diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index f0cb881c1d690..fd8fb76f324fc 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -57,6 +57,8 @@ struct thread_info { unsigned long flags; /* low level flags */ unsigned long syscall_work; /* SYSCALL_WORK_ flags */ u32 status; /* thread synchronous flags */ + int preempt_lazy_count; /* 0 => lazy preemptable + <0 => BUG */ #ifdef CONFIG_SMP u32 cpu; /* current CPU */ #endif @@ -65,6 +67,7 @@ struct thread_info { #define INIT_THREAD_INFO(tsk) \ { \ .flags = 0, \ + .preempt_lazy_count = 0, \ } #else /* !__ASSEMBLY__ */ @@ -92,6 +95,7 @@ struct thread_info { #define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */ +#define TIF_NEED_RESCHED_LAZY 19 /* lazy rescheduling necessary */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ @@ -115,6 +119,7 @@ struct thread_info { #define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c index fac8ff983aec8..65fb9bad1577a 100644 --- a/drivers/bcma/driver_gpio.c +++ b/drivers/bcma/driver_gpio.c @@ -115,7 +115,7 @@ static irqreturn_t bcma_gpio_irq_handler(int irq, void *dev_id) return IRQ_NONE; for_each_set_bit(gpio, &irqs, gc->ngpio) - generic_handle_irq(irq_find_mapping(gc->irq.domain, gpio)); + generic_handle_domain_irq_safe(gc->irq.domain, gpio); bcma_chipco_gpio_polarity(cc, irqs, val & irqs); return IRQ_HANDLED; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 226ea76cc8197..4043d909d41bf 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -60,6 +60,40 @@ static void zram_free_page(struct zram *zram, size_t index); static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio); +#ifdef CONFIG_PREEMPT_RT +static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) +{ + size_t index; + + for (index = 0; index < num_pages; index++) + spin_lock_init(&zram->table[index].lock); +} + +static int zram_slot_trylock(struct zram *zram, u32 index) +{ + int ret; + + ret = spin_trylock(&zram->table[index].lock); + if (ret) + __set_bit(ZRAM_LOCK, &zram->table[index].flags); + return ret; +} + +static void zram_slot_lock(struct zram *zram, u32 index) +{ + spin_lock(&zram->table[index].lock); + __set_bit(ZRAM_LOCK, &zram->table[index].flags); +} + +static void zram_slot_unlock(struct zram *zram, u32 index) +{ + __clear_bit(ZRAM_LOCK, &zram->table[index].flags); + spin_unlock(&zram->table[index].lock); +} + +#else + +static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { } static int zram_slot_trylock(struct zram *zram, u32 index) { @@ -75,6 +109,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index) { bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); } +#endif static inline bool init_done(struct zram *zram) { @@ -1198,6 +1233,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) if (!huge_class_size) huge_class_size = zs_huge_class_size(zram->mem_pool); + zram_meta_init_table_locks(zram, num_pages); return true; } diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 80c3b43b4828f..ff021a9728d1e 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -63,6 +63,9 @@ struct zram_table_entry { unsigned long element; }; unsigned long flags; +#ifdef CONFIG_PREEMPT_RT + spinlock_t lock; +#endif #ifdef CONFIG_ZRAM_MEMORY_TRACKING ktime_t ac_time; #endif diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index bcff6429e0b4f..4a9ae338a2bdf 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -50,6 +50,31 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da return container_of(data, struct tpm_tis_tcg_phy, priv); } +#ifdef CONFIG_PREEMPT_RT +/* + * Flushes previous write operations to chip so that a subsequent + * ioread*()s won't stall a cpu. + */ +static inline void tpm_tis_flush(void __iomem *iobase) +{ + ioread8(iobase + TPM_ACCESS(0)); +} +#else +#define tpm_tis_flush(iobase) do { } while (0) +#endif + +static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr) +{ + iowrite8(b, iobase + addr); + tpm_tis_flush(iobase); +} + +static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr) +{ + iowrite32(b, iobase + addr); + tpm_tis_flush(iobase); +} + static int interrupts = -1; module_param(interrupts, int, 0444); MODULE_PARM_DESC(interrupts, "Enable interrupts"); @@ -185,12 +210,12 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len, switch (io_mode) { case TPM_TIS_PHYS_8: while (len--) - iowrite8(*value++, phy->iobase + addr); + tpm_tis_iowrite8(*value++, phy->iobase, addr); break; case TPM_TIS_PHYS_16: return -EINVAL; case TPM_TIS_PHYS_32: - iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase + addr); + tpm_tis_iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase, addr); break; } diff --git a/drivers/gpio/gpio-mlxbf2.c b/drivers/gpio/gpio-mlxbf2.c index 64cb060d9d753..77a41151c921b 100644 --- a/drivers/gpio/gpio-mlxbf2.c +++ b/drivers/gpio/gpio-mlxbf2.c @@ -273,10 +273,8 @@ static irqreturn_t mlxbf2_gpio_irq_handler(int irq, void *ptr) pending = readl(gs->gpio_io + YU_GPIO_CAUSE_OR_CAUSE_EVTEN0); writel(pending, gs->gpio_io + YU_GPIO_CAUSE_OR_CLRCAUSE); - for_each_set_bit(level, &pending, gc->ngpio) { - int gpio_irq = irq_find_mapping(gc->irq.domain, level); - generic_handle_irq(gpio_irq); - } + for_each_set_bit(level, &pending, gc->ngpio) + generic_handle_domain_irq_safe(gc->irq.domain, level); return IRQ_RETVAL(pending); } diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 7ae3b7d67fcfc..844f54f1daea9 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -3,7 +3,6 @@ config DRM_I915 tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics" depends on DRM depends on X86 && PCI - depends on !PREEMPT_RT select INTEL_GTT if X86 select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index 4442aa355f868..23085e82c3ed5 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -522,7 +522,8 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state) */ intel_psr_wait_for_idle_locked(new_crtc_state); - local_irq_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); crtc->debug.min_vbl = min; crtc->debug.max_vbl = max; @@ -547,11 +548,13 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state) break; } - local_irq_enable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable(); timeout = schedule_timeout(timeout); - local_irq_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); } finish_wait(wq, &wait); @@ -584,7 +587,8 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state) return; irq_disable: - local_irq_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE) @@ -685,7 +689,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) */ intel_vrr_send_push(new_crtc_state); - local_irq_enable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable(); if (intel_vgpu_active(dev_priv)) return; diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index ecc990ec1b952..8d04b10681f0d 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -312,10 +312,9 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) /* Kick the work once more to drain the signalers, and disarm the irq */ irq_work_sync(&b->irq_work); while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { - local_irq_disable(); - signal_irq_work(&b->irq_work); - local_irq_enable(); + irq_work_queue(&b->irq_work); cond_resched(); + irq_work_sync(&b->irq_work); } } diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index c718e6dc40b51..0e592999b7d60 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -1302,7 +1302,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock(&sched_engine->lock); + spin_lock_irq(&sched_engine->lock); /* * If the queue is higher priority than the last @@ -1402,7 +1402,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ - spin_unlock(&sched_engine->lock); + spin_unlock_irq(&sched_engine->lock); return; } } @@ -1428,7 +1428,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.sched_engine->lock); - spin_unlock(&engine->sched_engine->lock); + spin_unlock_irq(&engine->sched_engine->lock); return; /* leave this for another sibling */ } @@ -1590,7 +1590,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ sched_engine->queue_priority_hint = queue_prio(sched_engine); i915_sched_engine_reset_on_empty(sched_engine); - spin_unlock(&sched_engine->lock); + spin_unlock_irq(&sched_engine->lock); /* * We can skip poking the HW if we ended up with exactly the same set @@ -1616,13 +1616,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } } -static void execlists_dequeue_irq(struct intel_engine_cs *engine) -{ - local_irq_disable(); /* Suspend interrupts across request submission */ - execlists_dequeue(engine); - local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ -} - static void clear_ports(struct i915_request **ports, int count) { memset_p((void **)ports, NULL, count); @@ -2468,7 +2461,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t) } if (!engine->execlists.pending[0]) { - execlists_dequeue_irq(engine); + execlists_dequeue(engine); start_timeslice(engine); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 73cebc6aa6507..98305fb393413 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -917,7 +917,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, */ spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); /* Get optional system timestamp before query. */ if (stime) @@ -981,7 +982,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, if (etime) *etime = ktime_get(); - /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 62fad16a55e84..af07927650b24 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -612,7 +612,6 @@ bool __i915_request_submit(struct i915_request *request) RQ_TRACE(request, "\n"); - GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->sched_engine->lock); /* @@ -721,7 +720,6 @@ void __i915_request_unsubmit(struct i915_request *request) */ RQ_TRACE(request, "\n"); - GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->sched_engine->lock); /* diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 37b5c9e9d260e..73f29d8008f0c 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -6,6 +6,10 @@ #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) #define _I915_TRACE_H_ +#ifdef CONFIG_PREEMPT_RT +#define NOTRACE +#endif + #include #include #include @@ -323,7 +327,7 @@ DEFINE_EVENT(i915_request, i915_request_add, TP_ARGS(rq) ); -#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) +#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE) DEFINE_EVENT(i915_request, i915_request_guc_submit, TP_PROTO(struct i915_request *rq), TP_ARGS(rq) diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index c10d68cdc3ca5..593f3a7e0e4fc 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -294,7 +294,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ -#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) +#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT) # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) #else # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) diff --git a/drivers/net/ethernet/alacritech/slic.h b/drivers/net/ethernet/alacritech/slic.h index 4eecbdfff3ff1..82071d0e5f7fc 100644 --- a/drivers/net/ethernet/alacritech/slic.h +++ b/drivers/net/ethernet/alacritech/slic.h @@ -288,13 +288,13 @@ do { \ u64_stats_update_end(&(st)->syncp); \ } while (0) -#define SLIC_GET_STATS_COUNTER(newst, st, counter) \ -{ \ - unsigned int start; \ +#define SLIC_GET_STATS_COUNTER(newst, st, counter) \ +{ \ + unsigned int start; \ do { \ - start = u64_stats_fetch_begin_irq(&(st)->syncp); \ - newst = (st)->counter; \ - } while (u64_stats_fetch_retry_irq(&(st)->syncp, start)); \ + start = u64_stats_fetch_begin(&(st)->syncp); \ + newst = (st)->counter; \ + } while (u64_stats_fetch_retry(&(st)->syncp, start)); \ } struct slic_upr { diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 39242c5a17290..8f81d288c4880 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -118,9 +118,9 @@ static void ena_safe_update_stat(u64 *src, u64 *dst, unsigned int start; do { - start = u64_stats_fetch_begin_irq(syncp); + start = u64_stats_fetch_begin(syncp); *(dst) = *src; - } while (u64_stats_fetch_retry_irq(syncp, start)); + } while (u64_stats_fetch_retry(syncp, start)); } static void ena_queue_stats(struct ena_adapter *adapter, u64 **data) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 6a356a6cee15a..1c5d482990806 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3270,10 +3270,10 @@ static void ena_get_stats64(struct net_device *netdev, tx_ring = &adapter->tx_ring[i]; do { - start = u64_stats_fetch_begin_irq(&tx_ring->syncp); + start = u64_stats_fetch_begin(&tx_ring->syncp); packets = tx_ring->tx_stats.cnt; bytes = tx_ring->tx_stats.bytes; - } while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start)); + } while (u64_stats_fetch_retry(&tx_ring->syncp, start)); stats->tx_packets += packets; stats->tx_bytes += bytes; @@ -3281,20 +3281,20 @@ static void ena_get_stats64(struct net_device *netdev, rx_ring = &adapter->rx_ring[i]; do { - start = u64_stats_fetch_begin_irq(&rx_ring->syncp); + start = u64_stats_fetch_begin(&rx_ring->syncp); packets = rx_ring->rx_stats.cnt; bytes = rx_ring->rx_stats.bytes; - } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start)); + } while (u64_stats_fetch_retry(&rx_ring->syncp, start)); stats->rx_packets += packets; stats->rx_bytes += bytes; } do { - start = u64_stats_fetch_begin_irq(&adapter->syncp); + start = u64_stats_fetch_begin(&adapter->syncp); rx_drops = adapter->dev_stats.rx_drops; tx_drops = adapter->dev_stats.tx_drops; - } while (u64_stats_fetch_retry_irq(&adapter->syncp, start)); + } while (u64_stats_fetch_retry(&adapter->syncp, start)); stats->rx_dropped = rx_drops; stats->tx_dropped = tx_drops; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 25129e723b575..1e8d902e1c8ea 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -934,7 +934,7 @@ unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data) /* This data should mimic aq_ethtool_queue_rx_stat_names structure */ do { count = 0; - start = u64_stats_fetch_begin_irq(&self->stats.rx.syncp); + start = u64_stats_fetch_begin(&self->stats.rx.syncp); data[count] = self->stats.rx.packets; data[++count] = self->stats.rx.jumbo_packets; data[++count] = self->stats.rx.lro_packets; @@ -951,15 +951,15 @@ unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data) data[++count] = self->stats.rx.xdp_tx; data[++count] = self->stats.rx.xdp_invalid; data[++count] = self->stats.rx.xdp_redirect; - } while (u64_stats_fetch_retry_irq(&self->stats.rx.syncp, start)); + } while (u64_stats_fetch_retry(&self->stats.rx.syncp, start)); } else { /* This data should mimic aq_ethtool_queue_tx_stat_names structure */ do { count = 0; - start = u64_stats_fetch_begin_irq(&self->stats.tx.syncp); + start = u64_stats_fetch_begin(&self->stats.tx.syncp); data[count] = self->stats.tx.packets; data[++count] = self->stats.tx.queue_restarts; - } while (u64_stats_fetch_retry_irq(&self->stats.tx.syncp, start)); + } while (u64_stats_fetch_retry(&self->stats.tx.syncp, start)); } return ++count; diff --git a/drivers/net/ethernet/asix/ax88796c_main.c b/drivers/net/ethernet/asix/ax88796c_main.c index 6ba5b024a7be7..25e7beb68e515 100644 --- a/drivers/net/ethernet/asix/ax88796c_main.c +++ b/drivers/net/ethernet/asix/ax88796c_main.c @@ -662,12 +662,12 @@ static void ax88796c_get_stats64(struct net_device *ndev, s = per_cpu_ptr(ax_local->stats, cpu); do { - start = u64_stats_fetch_begin_irq(&s->syncp); + start = u64_stats_fetch_begin(&s->syncp); rx_packets = u64_stats_read(&s->rx_packets); rx_bytes = u64_stats_read(&s->rx_bytes); tx_packets = u64_stats_read(&s->tx_packets); tx_bytes = u64_stats_read(&s->tx_bytes); - } while (u64_stats_fetch_retry_irq(&s->syncp, start)); + } while (u64_stats_fetch_retry(&s->syncp, start)); stats->rx_packets += rx_packets; stats->rx_bytes += rx_bytes; diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index e5857e88c2076..caf1714f36a18 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1680,7 +1680,7 @@ static void b44_get_stats64(struct net_device *dev, unsigned int start; do { - start = u64_stats_fetch_begin_irq(&hwstat->syncp); + start = u64_stats_fetch_begin(&hwstat->syncp); /* Convert HW stats into rtnl_link_stats64 stats. */ nstat->rx_packets = hwstat->rx_pkts; @@ -1714,7 +1714,7 @@ static void b44_get_stats64(struct net_device *dev, /* Carrier lost counter seems to be broken for some devices */ nstat->tx_carrier_errors = hwstat->tx_carrier_lost; #endif - } while (u64_stats_fetch_retry_irq(&hwstat->syncp, start)); + } while (u64_stats_fetch_retry(&hwstat->syncp, start)); } @@ -2082,12 +2082,12 @@ static void b44_get_ethtool_stats(struct net_device *dev, do { data_src = &hwstat->tx_good_octets; data_dst = data; - start = u64_stats_fetch_begin_irq(&hwstat->syncp); + start = u64_stats_fetch_begin(&hwstat->syncp); for (i = 0; i < ARRAY_SIZE(b44_gstrings); i++) *data_dst++ = *data_src++; - } while (u64_stats_fetch_retry_irq(&hwstat->syncp, start)); + } while (u64_stats_fetch_retry(&hwstat->syncp, start)); } static void b44_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 47fc8e6963d59..98d5bd15ee433 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -457,10 +457,10 @@ static void bcm_sysport_update_tx_stats(struct bcm_sysport_priv *priv, for (q = 0; q < priv->netdev->num_tx_queues; q++) { ring = &priv->tx_rings[q]; do { - start = u64_stats_fetch_begin_irq(&priv->syncp); + start = u64_stats_fetch_begin(&priv->syncp); bytes = ring->bytes; packets = ring->packets; - } while (u64_stats_fetch_retry_irq(&priv->syncp, start)); + } while (u64_stats_fetch_retry(&priv->syncp, start)); *tx_bytes += bytes; *tx_packets += packets; @@ -504,9 +504,9 @@ static void bcm_sysport_get_stats(struct net_device *dev, if (s->stat_sizeof == sizeof(u64) && s->type == BCM_SYSPORT_STAT_NETDEV64) { do { - start = u64_stats_fetch_begin_irq(syncp); + start = u64_stats_fetch_begin(syncp); data[i] = *(u64 *)p; - } while (u64_stats_fetch_retry_irq(syncp, start)); + } while (u64_stats_fetch_retry(syncp, start)); } else data[i] = *(u32 *)p; j++; @@ -1878,10 +1878,10 @@ static void bcm_sysport_get_stats64(struct net_device *dev, &stats->tx_packets); do { - start = u64_stats_fetch_begin_irq(&priv->syncp); + start = u64_stats_fetch_begin(&priv->syncp); stats->rx_packets = stats64->rx_packets; stats->rx_bytes = stats64->rx_bytes; - } while (u64_stats_fetch_retry_irq(&priv->syncp, start)); + } while (u64_stats_fetch_retry(&priv->syncp, start)); } static void bcm_sysport_netif_start(struct net_device *dev) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 6dae768671e3d..9e6de2f968fa3 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -1919,7 +1919,7 @@ static void gmac_get_stats64(struct net_device *netdev, /* Racing with RX NAPI */ do { - start = u64_stats_fetch_begin_irq(&port->rx_stats_syncp); + start = u64_stats_fetch_begin(&port->rx_stats_syncp); stats->rx_packets = port->stats.rx_packets; stats->rx_bytes = port->stats.rx_bytes; @@ -1931,11 +1931,11 @@ static void gmac_get_stats64(struct net_device *netdev, stats->rx_crc_errors = port->stats.rx_crc_errors; stats->rx_frame_errors = port->stats.rx_frame_errors; - } while (u64_stats_fetch_retry_irq(&port->rx_stats_syncp, start)); + } while (u64_stats_fetch_retry(&port->rx_stats_syncp, start)); /* Racing with MIB and TX completion interrupts */ do { - start = u64_stats_fetch_begin_irq(&port->ir_stats_syncp); + start = u64_stats_fetch_begin(&port->ir_stats_syncp); stats->tx_errors = port->stats.tx_errors; stats->tx_packets = port->stats.tx_packets; @@ -1945,15 +1945,15 @@ static void gmac_get_stats64(struct net_device *netdev, stats->rx_missed_errors = port->stats.rx_missed_errors; stats->rx_fifo_errors = port->stats.rx_fifo_errors; - } while (u64_stats_fetch_retry_irq(&port->ir_stats_syncp, start)); + } while (u64_stats_fetch_retry(&port->ir_stats_syncp, start)); /* Racing with hard_start_xmit */ do { - start = u64_stats_fetch_begin_irq(&port->tx_stats_syncp); + start = u64_stats_fetch_begin(&port->tx_stats_syncp); stats->tx_dropped = port->stats.tx_dropped; - } while (u64_stats_fetch_retry_irq(&port->tx_stats_syncp, start)); + } while (u64_stats_fetch_retry(&port->tx_stats_syncp, start)); stats->rx_dropped += stats->rx_missed_errors; } @@ -2031,18 +2031,18 @@ static void gmac_get_ethtool_stats(struct net_device *netdev, /* Racing with MIB interrupt */ do { p = values; - start = u64_stats_fetch_begin_irq(&port->ir_stats_syncp); + start = u64_stats_fetch_begin(&port->ir_stats_syncp); for (i = 0; i < RX_STATS_NUM; i++) *p++ = port->hw_stats[i]; - } while (u64_stats_fetch_retry_irq(&port->ir_stats_syncp, start)); + } while (u64_stats_fetch_retry(&port->ir_stats_syncp, start)); values = p; /* Racing with RX NAPI */ do { p = values; - start = u64_stats_fetch_begin_irq(&port->rx_stats_syncp); + start = u64_stats_fetch_begin(&port->rx_stats_syncp); for (i = 0; i < RX_STATUS_NUM; i++) *p++ = port->rx_stats[i]; @@ -2050,13 +2050,13 @@ static void gmac_get_ethtool_stats(struct net_device *netdev, *p++ = port->rx_csum_stats[i]; *p++ = port->rx_napi_exits; - } while (u64_stats_fetch_retry_irq(&port->rx_stats_syncp, start)); + } while (u64_stats_fetch_retry(&port->rx_stats_syncp, start)); values = p; /* Racing with TX start_xmit */ do { p = values; - start = u64_stats_fetch_begin_irq(&port->tx_stats_syncp); + start = u64_stats_fetch_begin(&port->tx_stats_syncp); for (i = 0; i < TX_MAX_FRAGS; i++) { *values++ = port->tx_frag_stats[i]; @@ -2065,7 +2065,7 @@ static void gmac_get_ethtool_stats(struct net_device *netdev, *values++ = port->tx_frags_linearized; *values++ = port->tx_hw_csummed; - } while (u64_stats_fetch_retry_irq(&port->tx_stats_syncp, start)); + } while (u64_stats_fetch_retry(&port->tx_stats_syncp, start)); } static int gmac_get_ksettings(struct net_device *netdev, diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index bd0df189d8719..39e7a4a3c15e6 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -389,10 +389,10 @@ static void be_get_ethtool_stats(struct net_device *netdev, struct be_rx_stats *stats = rx_stats(rxo); do { - start = u64_stats_fetch_begin_irq(&stats->sync); + start = u64_stats_fetch_begin(&stats->sync); data[base] = stats->rx_bytes; data[base + 1] = stats->rx_pkts; - } while (u64_stats_fetch_retry_irq(&stats->sync, start)); + } while (u64_stats_fetch_retry(&stats->sync, start)); for (i = 2; i < ETHTOOL_RXSTATS_NUM; i++) { p = (u8 *)stats + et_rx_stats[i].offset; @@ -405,19 +405,19 @@ static void be_get_ethtool_stats(struct net_device *netdev, struct be_tx_stats *stats = tx_stats(txo); do { - start = u64_stats_fetch_begin_irq(&stats->sync_compl); + start = u64_stats_fetch_begin(&stats->sync_compl); data[base] = stats->tx_compl; - } while (u64_stats_fetch_retry_irq(&stats->sync_compl, start)); + } while (u64_stats_fetch_retry(&stats->sync_compl, start)); do { - start = u64_stats_fetch_begin_irq(&stats->sync); + start = u64_stats_fetch_begin(&stats->sync); for (i = 1; i < ETHTOOL_TXSTATS_NUM; i++) { p = (u8 *)stats + et_tx_stats[i].offset; data[base + i] = (et_tx_stats[i].size == sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } - } while (u64_stats_fetch_retry_irq(&stats->sync, start)); + } while (u64_stats_fetch_retry(&stats->sync, start)); base += ETHTOOL_TXSTATS_NUM; } } diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 414362febbb9d..9350c901aa27b 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -665,10 +665,10 @@ static void be_get_stats64(struct net_device *netdev, const struct be_rx_stats *rx_stats = rx_stats(rxo); do { - start = u64_stats_fetch_begin_irq(&rx_stats->sync); + start = u64_stats_fetch_begin(&rx_stats->sync); pkts = rx_stats(rxo)->rx_pkts; bytes = rx_stats(rxo)->rx_bytes; - } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start)); + } while (u64_stats_fetch_retry(&rx_stats->sync, start)); stats->rx_packets += pkts; stats->rx_bytes += bytes; stats->multicast += rx_stats(rxo)->rx_mcast_pkts; @@ -680,10 +680,10 @@ static void be_get_stats64(struct net_device *netdev, const struct be_tx_stats *tx_stats = tx_stats(txo); do { - start = u64_stats_fetch_begin_irq(&tx_stats->sync); + start = u64_stats_fetch_begin(&tx_stats->sync); pkts = tx_stats(txo)->tx_pkts; bytes = tx_stats(txo)->tx_bytes; - } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start)); + } while (u64_stats_fetch_retry(&tx_stats->sync, start)); stats->tx_packets += pkts; stats->tx_bytes += bytes; } @@ -2155,16 +2155,16 @@ static int be_get_new_eqd(struct be_eq_obj *eqo) for_all_rx_queues_on_eq(adapter, eqo, rxo, i) { do { - start = u64_stats_fetch_begin_irq(&rxo->stats.sync); + start = u64_stats_fetch_begin(&rxo->stats.sync); rx_pkts += rxo->stats.rx_pkts; - } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start)); + } while (u64_stats_fetch_retry(&rxo->stats.sync, start)); } for_all_tx_queues_on_eq(adapter, eqo, txo, i) { do { - start = u64_stats_fetch_begin_irq(&txo->stats.sync); + start = u64_stats_fetch_begin(&txo->stats.sync); tx_pkts += txo->stats.tx_reqs; - } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start)); + } while (u64_stats_fetch_retry(&txo->stats.sync, start)); } /* Skip, if wrapped around or first calculation */ diff --git a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h index 671f51135c269..53b7e95213a85 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h +++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h @@ -206,9 +206,9 @@ struct funeth_rxq { #define FUN_QSTAT_READ(q, seq, stats_copy) \ do { \ - seq = u64_stats_fetch_begin_irq(&(q)->syncp); \ + seq = u64_stats_fetch_begin(&(q)->syncp); \ stats_copy = (q)->stats; \ - } while (u64_stats_fetch_retry_irq(&(q)->syncp, (seq))) + } while (u64_stats_fetch_retry(&(q)->syncp, (seq))) #define FUN_INT_NAME_LEN (IFNAMSIZ + 16) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 7b9a2d9d96243..50b384910c839 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -177,14 +177,14 @@ gve_get_ethtool_stats(struct net_device *netdev, struct gve_rx_ring *rx = &priv->rx[ring]; start = - u64_stats_fetch_begin_irq(&priv->rx[ring].statss); + u64_stats_fetch_begin(&priv->rx[ring].statss); tmp_rx_pkts = rx->rpackets; tmp_rx_bytes = rx->rbytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = rx->rx_desc_err_dropped_pkt; - } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, + } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); rx_pkts += tmp_rx_pkts; rx_bytes += tmp_rx_bytes; @@ -198,10 +198,10 @@ gve_get_ethtool_stats(struct net_device *netdev, if (priv->tx) { do { start = - u64_stats_fetch_begin_irq(&priv->tx[ring].statss); + u64_stats_fetch_begin(&priv->tx[ring].statss); tmp_tx_pkts = priv->tx[ring].pkt_done; tmp_tx_bytes = priv->tx[ring].bytes_done; - } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, + } while (u64_stats_fetch_retry(&priv->tx[ring].statss, start)); tx_pkts += tmp_tx_pkts; tx_bytes += tmp_tx_bytes; @@ -259,13 +259,13 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = rx->fill_cnt - rx->cnt; do { start = - u64_stats_fetch_begin_irq(&priv->rx[ring].statss); + u64_stats_fetch_begin(&priv->rx[ring].statss); tmp_rx_bytes = rx->rbytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = rx->rx_desc_err_dropped_pkt; - } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, + } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); data[i++] = tmp_rx_bytes; data[i++] = rx->rx_cont_packet_cnt; @@ -331,9 +331,9 @@ gve_get_ethtool_stats(struct net_device *netdev, } do { start = - u64_stats_fetch_begin_irq(&priv->tx[ring].statss); + u64_stats_fetch_begin(&priv->tx[ring].statss); tmp_tx_bytes = tx->bytes_done; - } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, + } while (u64_stats_fetch_retry(&priv->tx[ring].statss, start)); data[i++] = tmp_tx_bytes; data[i++] = tx->wake_queue; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 044db3ebb071c..6cafee55efc32 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -51,10 +51,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { do { start = - u64_stats_fetch_begin_irq(&priv->rx[ring].statss); + u64_stats_fetch_begin(&priv->rx[ring].statss); packets = priv->rx[ring].rpackets; bytes = priv->rx[ring].rbytes; - } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, + } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); s->rx_packets += packets; s->rx_bytes += bytes; @@ -64,10 +64,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { do { start = - u64_stats_fetch_begin_irq(&priv->tx[ring].statss); + u64_stats_fetch_begin(&priv->tx[ring].statss); packets = priv->tx[ring].pkt_done; bytes = priv->tx[ring].bytes_done; - } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, + } while (u64_stats_fetch_retry(&priv->tx[ring].statss, start)); s->tx_packets += packets; s->tx_bytes += bytes; @@ -1274,9 +1274,9 @@ void gve_handle_report_stats(struct gve_priv *priv) } do { - start = u64_stats_fetch_begin_irq(&priv->tx[idx].statss); + start = u64_stats_fetch_begin(&priv->tx[idx].statss); tx_bytes = priv->tx[idx].bytes_done; - } while (u64_stats_fetch_retry_irq(&priv->tx[idx].statss, start)); + } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); stats[stats_idx++] = (struct stats) { .stat_name = cpu_to_be32(TX_WAKE_CNT), .value = cpu_to_be64(priv->tx[idx].wake_queue), diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 35d70041b9e84..f82e98263307a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2486,7 +2486,7 @@ static void hns3_fetch_stats(struct rtnl_link_stats64 *stats, unsigned int start; do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); if (is_tx) { stats->tx_bytes += ring->stats.tx_bytes; stats->tx_packets += ring->stats.tx_pkts; @@ -2520,7 +2520,7 @@ static void hns3_fetch_stats(struct rtnl_link_stats64 *stats, stats->multicast += ring->stats.rx_multicast; stats->rx_length_errors += ring->stats.err_pkt_len; } - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); } static void hns3_nic_get_stats64(struct net_device *netdev, diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index e5828a658caf4..a866bea651103 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -74,14 +74,14 @@ void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats) unsigned int start; do { - start = u64_stats_fetch_begin_irq(&rxq_stats->syncp); + start = u64_stats_fetch_begin(&rxq_stats->syncp); stats->pkts = rxq_stats->pkts; stats->bytes = rxq_stats->bytes; stats->errors = rxq_stats->csum_errors + rxq_stats->other_errors; stats->csum_errors = rxq_stats->csum_errors; stats->other_errors = rxq_stats->other_errors; - } while (u64_stats_fetch_retry_irq(&rxq_stats->syncp, start)); + } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); } /** diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index 3b6c7b5857376..5051cdff2384b 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -99,14 +99,14 @@ void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats) unsigned int start; do { - start = u64_stats_fetch_begin_irq(&txq_stats->syncp); + start = u64_stats_fetch_begin(&txq_stats->syncp); stats->pkts = txq_stats->pkts; stats->bytes = txq_stats->bytes; stats->tx_busy = txq_stats->tx_busy; stats->tx_wake = txq_stats->tx_wake; stats->tx_dropped = txq_stats->tx_dropped; stats->big_frags_pkts = txq_stats->big_frags_pkts; - } while (u64_stats_fetch_retry_irq(&txq_stats->syncp, start)); + } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); } /** diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 2cca9e84e31e1..34ab5ff9823b7 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1229,10 +1229,10 @@ static void fm10k_get_stats64(struct net_device *netdev, continue; do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->rx_packets += packets; stats->rx_bytes += bytes; @@ -1245,10 +1245,10 @@ static void fm10k_get_stats64(struct net_device *netdev, continue; do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->tx_packets += packets; stats->tx_bytes += bytes; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index e9cd0fa6a0d2f..90f2eee78a3ee 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -154,7 +154,7 @@ __i40e_add_ethtool_stats(u64 **data, void *pointer, * @ring: the ring to copy * * Queue statistics must be copied while protected by - * u64_stats_fetch_begin_irq, so we can't directly use i40e_add_ethtool_stats. + * u64_stats_fetch_begin, so we can't directly use i40e_add_ethtool_stats. * Assumes that queue stats are defined in i40e_gstrings_queue_stats. If the * ring pointer is null, zero out the queue stat values and update the data * pointer. Otherwise safely copy the stats from the ring into the supplied @@ -172,16 +172,16 @@ i40e_add_queue_stats(u64 **data, struct i40e_ring *ring) /* To avoid invalid statistics values, ensure that we keep retrying * the copy until we get a consistent value according to - * u64_stats_fetch_retry_irq. But first, make sure our ring is + * u64_stats_fetch_retry. But first, make sure our ring is * non-null before attempting to access its syncp. */ do { - start = !ring ? 0 : u64_stats_fetch_begin_irq(&ring->syncp); + start = !ring ? 0 : u64_stats_fetch_begin(&ring->syncp); for (i = 0; i < size; i++) { i40e_add_one_ethtool_stat(&(*data)[i], ring, &stats[i]); } - } while (ring && u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (ring && u64_stats_fetch_retry(&ring->syncp, start)); /* Once we successfully copy the stats in, update the data pointer */ *data += size; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e3d9804aeb25e..09a9f67d9ebc0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -418,10 +418,10 @@ static void i40e_get_netdev_stats_struct_tx(struct i40e_ring *ring, unsigned int start; do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->tx_packets += packets; stats->tx_bytes += bytes; @@ -471,10 +471,10 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev, if (!ring) continue; do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->rx_packets += packets; stats->rx_bytes += bytes; @@ -896,10 +896,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) continue; do { - start = u64_stats_fetch_begin_irq(&p->syncp); + start = u64_stats_fetch_begin(&p->syncp); packets = p->stats.packets; bytes = p->stats.bytes; - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + } while (u64_stats_fetch_retry(&p->syncp, start)); tx_b += bytes; tx_p += packets; tx_restart += p->tx_stats.restart_queue; @@ -914,10 +914,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) continue; do { - start = u64_stats_fetch_begin_irq(&p->syncp); + start = u64_stats_fetch_begin(&p->syncp); packets = p->stats.packets; bytes = p->stats.bytes; - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + } while (u64_stats_fetch_retry(&p->syncp, start)); rx_b += bytes; rx_p += packets; rx_buf += p->rx_stats.alloc_buff_failed; @@ -934,10 +934,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) continue; do { - start = u64_stats_fetch_begin_irq(&p->syncp); + start = u64_stats_fetch_begin(&p->syncp); packets = p->stats.packets; bytes = p->stats.bytes; - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + } while (u64_stats_fetch_retry(&p->syncp, start)); tx_b += bytes; tx_p += packets; tx_restart += p->tx_stats.restart_queue; diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index e535d4c3da49d..fafa3406e0bcc 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -147,7 +147,7 @@ __iavf_add_ethtool_stats(u64 **data, void *pointer, * @ring: the ring to copy * * Queue statistics must be copied while protected by - * u64_stats_fetch_begin_irq, so we can't directly use iavf_add_ethtool_stats. + * u64_stats_fetch_begin, so we can't directly use iavf_add_ethtool_stats. * Assumes that queue stats are defined in iavf_gstrings_queue_stats. If the * ring pointer is null, zero out the queue stat values and update the data * pointer. Otherwise safely copy the stats from the ring into the supplied @@ -165,14 +165,14 @@ iavf_add_queue_stats(u64 **data, struct iavf_ring *ring) /* To avoid invalid statistics values, ensure that we keep retrying * the copy until we get a consistent value according to - * u64_stats_fetch_retry_irq. But first, make sure our ring is + * u64_stats_fetch_retry. But first, make sure our ring is * non-null before attempting to access its syncp. */ do { - start = !ring ? 0 : u64_stats_fetch_begin_irq(&ring->syncp); + start = !ring ? 0 : u64_stats_fetch_begin(&ring->syncp); for (i = 0; i < size; i++) iavf_add_one_ethtool_stat(&(*data)[i], ring, &stats[i]); - } while (ring && u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (ring && u64_stats_fetch_retry(&ring->syncp, start)); /* Once we successfully copy the stats in, update the data pointer */ *data += size; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index e109cb93886be..b7394c7e5eed2 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6295,10 +6295,10 @@ ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, unsigned int start; do { - start = u64_stats_fetch_begin_irq(syncp); + start = u64_stats_fetch_begin(syncp); *pkts = stats.pkts; *bytes = stats.bytes; - } while (u64_stats_fetch_retry_irq(syncp, start)); + } while (u64_stats_fetch_retry(syncp, start)); } /** diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index c14fc871dd417..23c6fcfcb905c 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2311,15 +2311,15 @@ static void igb_get_ethtool_stats(struct net_device *netdev, ring = adapter->tx_ring[j]; do { - start = u64_stats_fetch_begin_irq(&ring->tx_syncp); + start = u64_stats_fetch_begin(&ring->tx_syncp); data[i] = ring->tx_stats.packets; data[i+1] = ring->tx_stats.bytes; data[i+2] = ring->tx_stats.restart_queue; - } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); do { - start = u64_stats_fetch_begin_irq(&ring->tx_syncp2); + start = u64_stats_fetch_begin(&ring->tx_syncp2); restart2 = ring->tx_stats.restart_queue2; - } while (u64_stats_fetch_retry_irq(&ring->tx_syncp2, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp2, start)); data[i+2] += restart2; i += IGB_TX_QUEUE_STATS_LEN; @@ -2327,13 +2327,13 @@ static void igb_get_ethtool_stats(struct net_device *netdev, for (j = 0; j < adapter->num_rx_queues; j++) { ring = adapter->rx_ring[j]; do { - start = u64_stats_fetch_begin_irq(&ring->rx_syncp); + start = u64_stats_fetch_begin(&ring->rx_syncp); data[i] = ring->rx_stats.packets; data[i+1] = ring->rx_stats.bytes; data[i+2] = ring->rx_stats.drops; data[i+3] = ring->rx_stats.csum_err; data[i+4] = ring->rx_stats.alloc_failed; - } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); i += IGB_RX_QUEUE_STATS_LEN; } spin_unlock(&adapter->stats64_lock); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 2796e81d27260..98df55dc1e933 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6633,10 +6633,10 @@ void igb_update_stats(struct igb_adapter *adapter) } do { - start = u64_stats_fetch_begin_irq(&ring->rx_syncp); + start = u64_stats_fetch_begin(&ring->rx_syncp); _bytes = ring->rx_stats.bytes; _packets = ring->rx_stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); bytes += _bytes; packets += _packets; } @@ -6649,10 +6649,10 @@ void igb_update_stats(struct igb_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) { struct igb_ring *ring = adapter->tx_ring[i]; do { - start = u64_stats_fetch_begin_irq(&ring->tx_syncp); + start = u64_stats_fetch_begin(&ring->tx_syncp); _bytes = ring->tx_stats.bytes; _packets = ring->tx_stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); bytes += _bytes; packets += _packets; } diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 8cc077b712add..5a26a7805ef80 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -839,15 +839,15 @@ static void igc_ethtool_get_stats(struct net_device *netdev, ring = adapter->tx_ring[j]; do { - start = u64_stats_fetch_begin_irq(&ring->tx_syncp); + start = u64_stats_fetch_begin(&ring->tx_syncp); data[i] = ring->tx_stats.packets; data[i + 1] = ring->tx_stats.bytes; data[i + 2] = ring->tx_stats.restart_queue; - } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); do { - start = u64_stats_fetch_begin_irq(&ring->tx_syncp2); + start = u64_stats_fetch_begin(&ring->tx_syncp2); restart2 = ring->tx_stats.restart_queue2; - } while (u64_stats_fetch_retry_irq(&ring->tx_syncp2, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp2, start)); data[i + 2] += restart2; i += IGC_TX_QUEUE_STATS_LEN; @@ -855,13 +855,13 @@ static void igc_ethtool_get_stats(struct net_device *netdev, for (j = 0; j < adapter->num_rx_queues; j++) { ring = adapter->rx_ring[j]; do { - start = u64_stats_fetch_begin_irq(&ring->rx_syncp); + start = u64_stats_fetch_begin(&ring->rx_syncp); data[i] = ring->rx_stats.packets; data[i + 1] = ring->rx_stats.bytes; data[i + 2] = ring->rx_stats.drops; data[i + 3] = ring->rx_stats.csum_err; data[i + 4] = ring->rx_stats.alloc_failed; - } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); i += IGC_RX_QUEUE_STATS_LEN; } spin_unlock(&adapter->stats64_lock); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index ebff0e04045d6..944299b06cc3d 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -4645,10 +4645,10 @@ void igc_update_stats(struct igc_adapter *adapter) } do { - start = u64_stats_fetch_begin_irq(&ring->rx_syncp); + start = u64_stats_fetch_begin(&ring->rx_syncp); _bytes = ring->rx_stats.bytes; _packets = ring->rx_stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); bytes += _bytes; packets += _packets; } @@ -4662,10 +4662,10 @@ void igc_update_stats(struct igc_adapter *adapter) struct igc_ring *ring = adapter->tx_ring[i]; do { - start = u64_stats_fetch_begin_irq(&ring->tx_syncp); + start = u64_stats_fetch_begin(&ring->tx_syncp); _bytes = ring->tx_stats.bytes; _packets = ring->tx_stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); bytes += _bytes; packets += _packets; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 04f453eabef64..51bcf0df3adcc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1335,10 +1335,10 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, } do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); data[i] = ring->stats.packets; data[i+1] = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); i += 2; } for (j = 0; j < IXGBE_NUM_RX_QUEUES; j++) { @@ -1351,10 +1351,10 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, } do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); data[i] = ring->stats.packets; data[i+1] = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); i += 2; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index d1e430b8c8aa1..01c5548f181d5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9041,10 +9041,10 @@ static void ixgbe_get_ring_stats64(struct rtnl_link_stats64 *stats, if (ring) { do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->tx_packets += packets; stats->tx_bytes += bytes; } @@ -9064,10 +9064,10 @@ static void ixgbe_get_stats64(struct net_device *netdev, if (ring) { do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->rx_packets += packets; stats->rx_bytes += bytes; } diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index fed46872af2bf..b4632b67ab143 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -458,10 +458,10 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, } do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); data[i] = ring->stats.packets; data[i + 1] = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); i += 2; } @@ -475,10 +475,10 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, } do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); data[i] = ring->stats.packets; data[i + 1] = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); i += 2; } @@ -492,10 +492,10 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, } do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); data[i] = ring->stats.packets; data[i + 1] = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); i += 2; } } diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 2f12fbe229c15..1d31b8cff4f10 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4350,10 +4350,10 @@ static void ixgbevf_get_tx_ring_stats(struct rtnl_link_stats64 *stats, if (ring) { do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); bytes = ring->stats.bytes; packets = ring->stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->tx_bytes += bytes; stats->tx_packets += packets; } @@ -4376,10 +4376,10 @@ static void ixgbevf_get_stats(struct net_device *netdev, for (i = 0; i < adapter->num_rx_queues; i++) { ring = adapter->rx_ring[i]; do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); bytes = ring->stats.bytes; packets = ring->stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->rx_bytes += bytes; stats->rx_packets += packets; } diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 0caa2df87c044..89ea3ef0ee162 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -813,14 +813,14 @@ mvneta_get_stats64(struct net_device *dev, cpu_stats = per_cpu_ptr(pp->stats, cpu); do { - start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + start = u64_stats_fetch_begin(&cpu_stats->syncp); rx_packets = cpu_stats->es.ps.rx_packets; rx_bytes = cpu_stats->es.ps.rx_bytes; rx_dropped = cpu_stats->rx_dropped; rx_errors = cpu_stats->rx_errors; tx_packets = cpu_stats->es.ps.tx_packets; tx_bytes = cpu_stats->es.ps.tx_bytes; - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); stats->rx_packets += rx_packets; stats->rx_bytes += rx_bytes; @@ -4762,7 +4762,7 @@ mvneta_ethtool_update_pcpu_stats(struct mvneta_port *pp, stats = per_cpu_ptr(pp->stats, cpu); do { - start = u64_stats_fetch_begin_irq(&stats->syncp); + start = u64_stats_fetch_begin(&stats->syncp); skb_alloc_error = stats->es.skb_alloc_error; refill_error = stats->es.refill_error; xdp_redirect = stats->es.ps.xdp_redirect; @@ -4772,7 +4772,7 @@ mvneta_ethtool_update_pcpu_stats(struct mvneta_port *pp, xdp_xmit_err = stats->es.ps.xdp_xmit_err; xdp_tx = stats->es.ps.xdp_tx; xdp_tx_err = stats->es.ps.xdp_tx_err; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + } while (u64_stats_fetch_retry(&stats->syncp, start)); es->skb_alloc_error += skb_alloc_error; es->refill_error += refill_error; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index eaa51cd7456b6..9dd8e0315dd4f 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -2008,7 +2008,7 @@ mvpp2_get_xdp_stats(struct mvpp2_port *port, struct mvpp2_pcpu_stats *xdp_stats) cpu_stats = per_cpu_ptr(port->stats, cpu); do { - start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + start = u64_stats_fetch_begin(&cpu_stats->syncp); xdp_redirect = cpu_stats->xdp_redirect; xdp_pass = cpu_stats->xdp_pass; xdp_drop = cpu_stats->xdp_drop; @@ -2016,7 +2016,7 @@ mvpp2_get_xdp_stats(struct mvpp2_port *port, struct mvpp2_pcpu_stats *xdp_stats) xdp_xmit_err = cpu_stats->xdp_xmit_err; xdp_tx = cpu_stats->xdp_tx; xdp_tx_err = cpu_stats->xdp_tx_err; - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); xdp_stats->xdp_redirect += xdp_redirect; xdp_stats->xdp_pass += xdp_pass; @@ -5115,12 +5115,12 @@ mvpp2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) cpu_stats = per_cpu_ptr(port->stats, cpu); do { - start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + start = u64_stats_fetch_begin(&cpu_stats->syncp); rx_packets = cpu_stats->rx_packets; rx_bytes = cpu_stats->rx_bytes; tx_packets = cpu_stats->tx_packets; tx_bytes = cpu_stats->tx_bytes; - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); stats->rx_packets += rx_packets; stats->rx_bytes += rx_bytes; diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index bbea5458000bf..c9bb92187719c 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -3894,19 +3894,19 @@ static void sky2_get_stats(struct net_device *dev, u64 _bytes, _packets; do { - start = u64_stats_fetch_begin_irq(&sky2->rx_stats.syncp); + start = u64_stats_fetch_begin(&sky2->rx_stats.syncp); _bytes = sky2->rx_stats.bytes; _packets = sky2->rx_stats.packets; - } while (u64_stats_fetch_retry_irq(&sky2->rx_stats.syncp, start)); + } while (u64_stats_fetch_retry(&sky2->rx_stats.syncp, start)); stats->rx_packets = _packets; stats->rx_bytes = _bytes; do { - start = u64_stats_fetch_begin_irq(&sky2->tx_stats.syncp); + start = u64_stats_fetch_begin(&sky2->tx_stats.syncp); _bytes = sky2->tx_stats.bytes; _packets = sky2->tx_stats.packets; - } while (u64_stats_fetch_retry_irq(&sky2->tx_stats.syncp, start)); + } while (u64_stats_fetch_retry(&sky2->tx_stats.syncp, start)); stats->tx_packets = _packets; stats->tx_bytes = _bytes; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index b344632beaddf..988927f8c5d7d 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -853,7 +853,7 @@ static void mtk_get_stats64(struct net_device *dev, } do { - start = u64_stats_fetch_begin_irq(&hw_stats->syncp); + start = u64_stats_fetch_begin(&hw_stats->syncp); storage->rx_packets = hw_stats->rx_packets; storage->tx_packets = hw_stats->tx_packets; storage->rx_bytes = hw_stats->rx_bytes; @@ -865,7 +865,7 @@ static void mtk_get_stats64(struct net_device *dev, storage->rx_crc_errors = hw_stats->rx_fcs_errors; storage->rx_errors = hw_stats->rx_checksum_errors; storage->tx_aborted_errors = hw_stats->tx_skip; - } while (u64_stats_fetch_retry_irq(&hw_stats->syncp, start)); + } while (u64_stats_fetch_retry(&hw_stats->syncp, start)); storage->tx_errors = dev->stats.tx_errors; storage->rx_dropped = dev->stats.rx_dropped; @@ -3664,13 +3664,13 @@ static void mtk_get_ethtool_stats(struct net_device *dev, do { data_dst = data; - start = u64_stats_fetch_begin_irq(&hwstats->syncp); + start = u64_stats_fetch_begin(&hwstats->syncp); for (i = 0; i < ARRAY_SIZE(mtk_ethtool_stats); i++) *data_dst++ = *(data_src + mtk_ethtool_stats[i].offset); if (mtk_page_pool_enabled(mac->hw)) mtk_ethtool_pp_stats(mac->hw, data_dst); - } while (u64_stats_fetch_retry_irq(&hwstats->syncp, start)); + } while (u64_stats_fetch_retry(&hwstats->syncp, start)); } static int mtk_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 30c7b0e157218..fa2753318cdf7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -827,12 +827,12 @@ mlxsw_sp_port_get_sw_stats64(const struct net_device *dev, for_each_possible_cpu(i) { p = per_cpu_ptr(mlxsw_sp_port->pcpu_stats, i); do { - start = u64_stats_fetch_begin_irq(&p->syncp); + start = u64_stats_fetch_begin(&p->syncp); rx_packets = p->rx_packets; rx_bytes = p->rx_bytes; tx_packets = p->tx_packets; tx_bytes = p->tx_bytes; - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + } while (u64_stats_fetch_retry(&p->syncp, start)); stats->rx_packets += rx_packets; stats->rx_bytes += rx_bytes; diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 9259a74eca40b..318dbbb482797 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -315,10 +315,10 @@ static void mana_get_stats64(struct net_device *ndev, rx_stats = &apc->rxqs[q]->stats; do { - start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + start = u64_stats_fetch_begin(&rx_stats->syncp); packets = rx_stats->packets; bytes = rx_stats->bytes; - } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); + } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); st->rx_packets += packets; st->rx_bytes += bytes; @@ -328,10 +328,10 @@ static void mana_get_stats64(struct net_device *ndev, tx_stats = &apc->tx_qp[q].txq.stats; do { - start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + start = u64_stats_fetch_begin(&tx_stats->syncp); packets = tx_stats->packets; bytes = tx_stats->bytes; - } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); + } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); st->tx_packets += packets; st->tx_bytes += bytes; diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index c530db76880f0..96d55c91c9698 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -90,13 +90,13 @@ static void mana_get_ethtool_stats(struct net_device *ndev, rx_stats = &apc->rxqs[q]->stats; do { - start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + start = u64_stats_fetch_begin(&rx_stats->syncp); packets = rx_stats->packets; bytes = rx_stats->bytes; xdp_drop = rx_stats->xdp_drop; xdp_tx = rx_stats->xdp_tx; xdp_redirect = rx_stats->xdp_redirect; - } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); + } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); data[i++] = packets; data[i++] = bytes; @@ -109,11 +109,11 @@ static void mana_get_ethtool_stats(struct net_device *ndev, tx_stats = &apc->tx_qp[q].txq.stats; do { - start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + start = u64_stats_fetch_begin(&tx_stats->syncp); packets = tx_stats->packets; bytes = tx_stats->bytes; xdp_xmit = tx_stats->xdp_xmit; - } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); + } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); data[i++] = packets; data[i++] = bytes; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 349a2b1a19a24..cf4d6f1129fa2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1630,21 +1630,21 @@ static void nfp_net_stat64(struct net_device *netdev, unsigned int start; do { - start = u64_stats_fetch_begin_irq(&r_vec->rx_sync); + start = u64_stats_fetch_begin(&r_vec->rx_sync); data[0] = r_vec->rx_pkts; data[1] = r_vec->rx_bytes; data[2] = r_vec->rx_drops; - } while (u64_stats_fetch_retry_irq(&r_vec->rx_sync, start)); + } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); stats->rx_packets += data[0]; stats->rx_bytes += data[1]; stats->rx_dropped += data[2]; do { - start = u64_stats_fetch_begin_irq(&r_vec->tx_sync); + start = u64_stats_fetch_begin(&r_vec->tx_sync); data[0] = r_vec->tx_pkts; data[1] = r_vec->tx_bytes; data[2] = r_vec->tx_errors; - } while (u64_stats_fetch_retry_irq(&r_vec->tx_sync, start)); + } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); stats->tx_packets += data[0]; stats->tx_bytes += data[1]; stats->tx_errors += data[2]; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index b1b1b648e40cb..eeb1455a4e5db 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -649,7 +649,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) unsigned int start; do { - start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].rx_sync); + start = u64_stats_fetch_begin(&nn->r_vecs[i].rx_sync); data[0] = nn->r_vecs[i].rx_pkts; tmp[0] = nn->r_vecs[i].hw_csum_rx_ok; tmp[1] = nn->r_vecs[i].hw_csum_rx_inner_ok; @@ -657,10 +657,10 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) tmp[3] = nn->r_vecs[i].hw_csum_rx_error; tmp[4] = nn->r_vecs[i].rx_replace_buf_alloc_fail; tmp[5] = nn->r_vecs[i].hw_tls_rx; - } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].rx_sync, start)); + } while (u64_stats_fetch_retry(&nn->r_vecs[i].rx_sync, start)); do { - start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].tx_sync); + start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync); data[1] = nn->r_vecs[i].tx_pkts; data[2] = nn->r_vecs[i].tx_busy; tmp[6] = nn->r_vecs[i].hw_csum_tx; @@ -670,7 +670,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) tmp[10] = nn->r_vecs[i].hw_tls_tx; tmp[11] = nn->r_vecs[i].tls_tx_fallback; tmp[12] = nn->r_vecs[i].tls_tx_no_fallback; - } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].tx_sync, start)); + } while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start)); data += NN_RVEC_PER_Q_STATS; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index 8b77582bdfa01..a6b6ca1fd55ee 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -134,13 +134,13 @@ nfp_repr_get_host_stats64(const struct net_device *netdev, repr_stats = per_cpu_ptr(repr->stats, i); do { - start = u64_stats_fetch_begin_irq(&repr_stats->syncp); + start = u64_stats_fetch_begin(&repr_stats->syncp); tbytes = repr_stats->tx_bytes; tpkts = repr_stats->tx_packets; tdrops = repr_stats->tx_drops; rbytes = repr_stats->rx_bytes; rpkts = repr_stats->rx_packets; - } while (u64_stats_fetch_retry_irq(&repr_stats->syncp, start)); + } while (u64_stats_fetch_retry(&repr_stats->syncp, start)); stats->tx_bytes += tbytes; stats->tx_packets += tpkts; diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 5116badaf0919..50ebbd7e91c48 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -1734,12 +1734,12 @@ static void nv_get_stats(int cpu, struct fe_priv *np, u64 tx_packets, tx_bytes, tx_dropped; do { - syncp_start = u64_stats_fetch_begin_irq(&np->swstats_rx_syncp); + syncp_start = u64_stats_fetch_begin(&np->swstats_rx_syncp); rx_packets = src->stat_rx_packets; rx_bytes = src->stat_rx_bytes; rx_dropped = src->stat_rx_dropped; rx_missed_errors = src->stat_rx_missed_errors; - } while (u64_stats_fetch_retry_irq(&np->swstats_rx_syncp, syncp_start)); + } while (u64_stats_fetch_retry(&np->swstats_rx_syncp, syncp_start)); storage->rx_packets += rx_packets; storage->rx_bytes += rx_bytes; @@ -1747,11 +1747,11 @@ static void nv_get_stats(int cpu, struct fe_priv *np, storage->rx_missed_errors += rx_missed_errors; do { - syncp_start = u64_stats_fetch_begin_irq(&np->swstats_tx_syncp); + syncp_start = u64_stats_fetch_begin(&np->swstats_tx_syncp); tx_packets = src->stat_tx_packets; tx_bytes = src->stat_tx_bytes; tx_dropped = src->stat_tx_dropped; - } while (u64_stats_fetch_retry_irq(&np->swstats_tx_syncp, syncp_start)); + } while (u64_stats_fetch_retry(&np->swstats_tx_syncp, syncp_start)); storage->tx_packets += tx_packets; storage->tx_bytes += tx_bytes; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 1b2119b1d48aa..3f5e6572d20e7 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -135,9 +135,9 @@ static void rmnet_get_stats64(struct net_device *dev, pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu); do { - start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp); + start = u64_stats_fetch_begin(&pcpu_ptr->syncp); snapshot = pcpu_ptr->stats; /* struct assignment */ - } while (u64_stats_fetch_retry_irq(&pcpu_ptr->syncp, start)); + } while (u64_stats_fetch_retry(&pcpu_ptr->syncp, start)); total_stats.rx_pkts += snapshot.rx_pkts; total_stats.rx_bytes += snapshot.rx_bytes; diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 15b40fd93cd2e..82bd0eb614634 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -2532,16 +2532,16 @@ rtl8139_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) netdev_stats_to_stats64(stats, &dev->stats); do { - start = u64_stats_fetch_begin_irq(&tp->rx_stats.syncp); + start = u64_stats_fetch_begin(&tp->rx_stats.syncp); stats->rx_packets = tp->rx_stats.packets; stats->rx_bytes = tp->rx_stats.bytes; - } while (u64_stats_fetch_retry_irq(&tp->rx_stats.syncp, start)); + } while (u64_stats_fetch_retry(&tp->rx_stats.syncp, start)); do { - start = u64_stats_fetch_begin_irq(&tp->tx_stats.syncp); + start = u64_stats_fetch_begin(&tp->tx_stats.syncp); stats->tx_packets = tp->tx_stats.packets; stats->tx_bytes = tp->tx_stats.bytes; - } while (u64_stats_fetch_retry_irq(&tp->tx_stats.syncp, start)); + } while (u64_stats_fetch_retry(&tp->tx_stats.syncp, start)); } /* Set or clear the multicast filter for this adaptor. diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c index f0c8de2c60755..d4f7238333bb7 100644 --- a/drivers/net/ethernet/socionext/sni_ave.c +++ b/drivers/net/ethernet/socionext/sni_ave.c @@ -1506,16 +1506,16 @@ static void ave_get_stats64(struct net_device *ndev, unsigned int start; do { - start = u64_stats_fetch_begin_irq(&priv->stats_rx.syncp); + start = u64_stats_fetch_begin(&priv->stats_rx.syncp); stats->rx_packets = priv->stats_rx.packets; stats->rx_bytes = priv->stats_rx.bytes; - } while (u64_stats_fetch_retry_irq(&priv->stats_rx.syncp, start)); + } while (u64_stats_fetch_retry(&priv->stats_rx.syncp, start)); do { - start = u64_stats_fetch_begin_irq(&priv->stats_tx.syncp); + start = u64_stats_fetch_begin(&priv->stats_tx.syncp); stats->tx_packets = priv->stats_tx.packets; stats->tx_bytes = priv->stats_tx.bytes; - } while (u64_stats_fetch_retry_irq(&priv->stats_tx.syncp, start)); + } while (u64_stats_fetch_retry(&priv->stats_tx.syncp, start)); stats->rx_errors = priv->stats_rx.errors; stats->tx_errors = priv->stats_tx.errors; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index f4a6b590a1e39..1b62400c19049 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1365,12 +1365,12 @@ static void am65_cpsw_nuss_ndo_get_stats(struct net_device *dev, cpu_stats = per_cpu_ptr(ndev_priv->stats, cpu); do { - start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + start = u64_stats_fetch_begin(&cpu_stats->syncp); rx_packets = cpu_stats->rx_packets; rx_bytes = cpu_stats->rx_bytes; tx_packets = cpu_stats->tx_packets; tx_bytes = cpu_stats->tx_bytes; - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); stats->rx_packets += rx_packets; stats->rx_bytes += rx_bytes; diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index b15d44261e766..68c7b2c05aab3 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1916,16 +1916,16 @@ netcp_get_stats(struct net_device *ndev, struct rtnl_link_stats64 *stats) unsigned int start; do { - start = u64_stats_fetch_begin_irq(&p->syncp_rx); + start = u64_stats_fetch_begin(&p->syncp_rx); rxpackets = p->rx_packets; rxbytes = p->rx_bytes; - } while (u64_stats_fetch_retry_irq(&p->syncp_rx, start)); + } while (u64_stats_fetch_retry(&p->syncp_rx, start)); do { - start = u64_stats_fetch_begin_irq(&p->syncp_tx); + start = u64_stats_fetch_begin(&p->syncp_tx); txpackets = p->tx_packets; txbytes = p->tx_bytes; - } while (u64_stats_fetch_retry_irq(&p->syncp_tx, start)); + } while (u64_stats_fetch_retry(&p->syncp_tx, start)); stats->rx_packets = rxpackets; stats->rx_bytes = rxbytes; diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 509c5e9b29dfa..5301c907b5ae3 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -2217,16 +2217,16 @@ rhine_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) netdev_stats_to_stats64(stats, &dev->stats); do { - start = u64_stats_fetch_begin_irq(&rp->rx_stats.syncp); + start = u64_stats_fetch_begin(&rp->rx_stats.syncp); stats->rx_packets = rp->rx_stats.packets; stats->rx_bytes = rp->rx_stats.bytes; - } while (u64_stats_fetch_retry_irq(&rp->rx_stats.syncp, start)); + } while (u64_stats_fetch_retry(&rp->rx_stats.syncp, start)); do { - start = u64_stats_fetch_begin_irq(&rp->tx_stats.syncp); + start = u64_stats_fetch_begin(&rp->tx_stats.syncp); stats->tx_packets = rp->tx_stats.packets; stats->tx_bytes = rp->tx_stats.bytes; - } while (u64_stats_fetch_retry_irq(&rp->tx_stats.syncp, start)); + } while (u64_stats_fetch_retry(&rp->tx_stats.syncp, start)); } static void rhine_set_rx_mode(struct net_device *dev) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 9262988d26a32..2c233b59e7d93 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1305,16 +1305,16 @@ axienet_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) netdev_stats_to_stats64(stats, &dev->stats); do { - start = u64_stats_fetch_begin_irq(&lp->rx_stat_sync); + start = u64_stats_fetch_begin(&lp->rx_stat_sync); stats->rx_packets = u64_stats_read(&lp->rx_packets); stats->rx_bytes = u64_stats_read(&lp->rx_bytes); - } while (u64_stats_fetch_retry_irq(&lp->rx_stat_sync, start)); + } while (u64_stats_fetch_retry(&lp->rx_stat_sync, start)); do { - start = u64_stats_fetch_begin_irq(&lp->tx_stat_sync); + start = u64_stats_fetch_begin(&lp->tx_stat_sync); stats->tx_packets = u64_stats_read(&lp->tx_packets); stats->tx_bytes = u64_stats_read(&lp->tx_bytes); - } while (u64_stats_fetch_retry_irq(&lp->tx_stat_sync, start)); + } while (u64_stats_fetch_retry(&lp->tx_stat_sync, start)); } static const struct net_device_ops axienet_netdev_ops = { diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 8113ac17ab70a..2fd8b9c51e839 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1264,12 +1264,12 @@ static void netvsc_get_vf_stats(struct net_device *net, unsigned int start; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); + start = u64_stats_fetch_begin(&stats->syncp); rx_packets = stats->rx_packets; tx_packets = stats->tx_packets; rx_bytes = stats->rx_bytes; tx_bytes = stats->tx_bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + } while (u64_stats_fetch_retry(&stats->syncp, start)); tot->rx_packets += rx_packets; tot->tx_packets += tx_packets; @@ -1294,12 +1294,12 @@ static void netvsc_get_pcpu_stats(struct net_device *net, unsigned int start; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); + start = u64_stats_fetch_begin(&stats->syncp); this_tot->vf_rx_packets = stats->rx_packets; this_tot->vf_tx_packets = stats->tx_packets; this_tot->vf_rx_bytes = stats->rx_bytes; this_tot->vf_tx_bytes = stats->tx_bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + } while (u64_stats_fetch_retry(&stats->syncp, start)); this_tot->rx_packets = this_tot->vf_rx_packets; this_tot->tx_packets = this_tot->vf_tx_packets; this_tot->rx_bytes = this_tot->vf_rx_bytes; @@ -1318,20 +1318,20 @@ static void netvsc_get_pcpu_stats(struct net_device *net, tx_stats = &nvchan->tx_stats; do { - start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + start = u64_stats_fetch_begin(&tx_stats->syncp); packets = tx_stats->packets; bytes = tx_stats->bytes; - } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); + } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); this_tot->tx_bytes += bytes; this_tot->tx_packets += packets; rx_stats = &nvchan->rx_stats; do { - start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + start = u64_stats_fetch_begin(&rx_stats->syncp); packets = rx_stats->packets; bytes = rx_stats->bytes; - } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); + } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); this_tot->rx_bytes += bytes; this_tot->rx_packets += packets; @@ -1370,21 +1370,21 @@ static void netvsc_get_stats64(struct net_device *net, tx_stats = &nvchan->tx_stats; do { - start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + start = u64_stats_fetch_begin(&tx_stats->syncp); packets = tx_stats->packets; bytes = tx_stats->bytes; - } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); + } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); t->tx_bytes += bytes; t->tx_packets += packets; rx_stats = &nvchan->rx_stats; do { - start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + start = u64_stats_fetch_begin(&rx_stats->syncp); packets = rx_stats->packets; bytes = rx_stats->bytes; multicast = rx_stats->multicast + rx_stats->broadcast; - } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); + } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); t->rx_bytes += bytes; t->rx_packets += packets; @@ -1527,24 +1527,24 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, tx_stats = &nvdev->chan_table[j].tx_stats; do { - start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + start = u64_stats_fetch_begin(&tx_stats->syncp); packets = tx_stats->packets; bytes = tx_stats->bytes; xdp_xmit = tx_stats->xdp_xmit; - } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); + } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); data[i++] = packets; data[i++] = bytes; data[i++] = xdp_xmit; rx_stats = &nvdev->chan_table[j].rx_stats; do { - start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + start = u64_stats_fetch_begin(&rx_stats->syncp); packets = rx_stats->packets; bytes = rx_stats->bytes; xdp_drop = rx_stats->xdp_drop; xdp_redirect = rx_stats->xdp_redirect; xdp_tx = rx_stats->xdp_tx; - } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); + } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); data[i++] = packets; data[i++] = bytes; data[i++] = xdp_drop; diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 1c64d5347b8e0..78253ad57b2ef 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -162,18 +162,18 @@ static void ifb_stats64(struct net_device *dev, for (i = 0; i < dev->num_tx_queues; i++,txp++) { do { - start = u64_stats_fetch_begin_irq(&txp->rx_stats.sync); + start = u64_stats_fetch_begin(&txp->rx_stats.sync); packets = txp->rx_stats.packets; bytes = txp->rx_stats.bytes; - } while (u64_stats_fetch_retry_irq(&txp->rx_stats.sync, start)); + } while (u64_stats_fetch_retry(&txp->rx_stats.sync, start)); stats->rx_packets += packets; stats->rx_bytes += bytes; do { - start = u64_stats_fetch_begin_irq(&txp->tx_stats.sync); + start = u64_stats_fetch_begin(&txp->tx_stats.sync); packets = txp->tx_stats.packets; bytes = txp->tx_stats.bytes; - } while (u64_stats_fetch_retry_irq(&txp->tx_stats.sync, start)); + } while (u64_stats_fetch_retry(&txp->tx_stats.sync, start)); stats->tx_packets += packets; stats->tx_bytes += bytes; } @@ -245,12 +245,12 @@ static void ifb_fill_stats_data(u64 **data, int j; do { - start = u64_stats_fetch_begin_irq(&q_stats->sync); + start = u64_stats_fetch_begin(&q_stats->sync); for (j = 0; j < IFB_Q_STATS_LEN; j++) { offset = ifb_q_stats_desc[j].offset; (*data)[j] = *(u64 *)(stats_base + offset); } - } while (u64_stats_fetch_retry_irq(&q_stats->sync, start)); + } while (u64_stats_fetch_retry(&q_stats->sync, start)); *data += IFB_Q_STATS_LEN; } diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 49ba8a50dfb1e..8a58d74638cd8 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -299,13 +299,13 @@ static void ipvlan_get_stats64(struct net_device *dev, for_each_possible_cpu(idx) { pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx); do { - strt= u64_stats_fetch_begin_irq(&pcptr->syncp); + strt = u64_stats_fetch_begin(&pcptr->syncp); rx_pkts = u64_stats_read(&pcptr->rx_pkts); rx_bytes = u64_stats_read(&pcptr->rx_bytes); rx_mcast = u64_stats_read(&pcptr->rx_mcast); tx_pkts = u64_stats_read(&pcptr->tx_pkts); tx_bytes = u64_stats_read(&pcptr->tx_bytes); - } while (u64_stats_fetch_retry_irq(&pcptr->syncp, + } while (u64_stats_fetch_retry(&pcptr->syncp, strt)); s->rx_packets += rx_pkts; diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 14e8d04cb4347..c4ad98d39ea60 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -106,10 +106,10 @@ void dev_lstats_read(struct net_device *dev, u64 *packets, u64 *bytes) lb_stats = per_cpu_ptr(dev->lstats, i); do { - start = u64_stats_fetch_begin_irq(&lb_stats->syncp); + start = u64_stats_fetch_begin(&lb_stats->syncp); tpackets = u64_stats_read(&lb_stats->packets); tbytes = u64_stats_read(&lb_stats->bytes); - } while (u64_stats_fetch_retry_irq(&lb_stats->syncp, start)); + } while (u64_stats_fetch_retry(&lb_stats->syncp, start)); *bytes += tbytes; *packets += tpackets; } diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index c6d271e5687e9..5056f3cd5699a 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2823,9 +2823,9 @@ static void get_rx_sc_stats(struct net_device *dev, stats = per_cpu_ptr(rx_sc->stats, cpu); do { - start = u64_stats_fetch_begin_irq(&stats->syncp); + start = u64_stats_fetch_begin(&stats->syncp); memcpy(&tmp, &stats->stats, sizeof(tmp)); - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + } while (u64_stats_fetch_retry(&stats->syncp, start)); sum->InOctetsValidated += tmp.InOctetsValidated; sum->InOctetsDecrypted += tmp.InOctetsDecrypted; @@ -2904,9 +2904,9 @@ static void get_tx_sc_stats(struct net_device *dev, stats = per_cpu_ptr(macsec_priv(dev)->secy.tx_sc.stats, cpu); do { - start = u64_stats_fetch_begin_irq(&stats->syncp); + start = u64_stats_fetch_begin(&stats->syncp); memcpy(&tmp, &stats->stats, sizeof(tmp)); - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + } while (u64_stats_fetch_retry(&stats->syncp, start)); sum->OutPktsProtected += tmp.OutPktsProtected; sum->OutPktsEncrypted += tmp.OutPktsEncrypted; @@ -2960,9 +2960,9 @@ static void get_secy_stats(struct net_device *dev, struct macsec_dev_stats *sum) stats = per_cpu_ptr(macsec_priv(dev)->stats, cpu); do { - start = u64_stats_fetch_begin_irq(&stats->syncp); + start = u64_stats_fetch_begin(&stats->syncp); memcpy(&tmp, &stats->stats, sizeof(tmp)); - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + } while (u64_stats_fetch_retry(&stats->syncp, start)); sum->OutPktsUntagged += tmp.OutPktsUntagged; sum->InPktsUntagged += tmp.InPktsUntagged; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 1080d6ebff63b..a1c7823f0ba66 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -948,13 +948,13 @@ static void macvlan_dev_get_stats64(struct net_device *dev, for_each_possible_cpu(i) { p = per_cpu_ptr(vlan->pcpu_stats, i); do { - start = u64_stats_fetch_begin_irq(&p->syncp); + start = u64_stats_fetch_begin(&p->syncp); rx_packets = u64_stats_read(&p->rx_packets); rx_bytes = u64_stats_read(&p->rx_bytes); rx_multicast = u64_stats_read(&p->rx_multicast); tx_packets = u64_stats_read(&p->tx_packets); tx_bytes = u64_stats_read(&p->tx_bytes); - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + } while (u64_stats_fetch_retry(&p->syncp, start)); stats->rx_packets += rx_packets; stats->rx_bytes += rx_bytes; diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c index 0b1b6f650104b..ff302144029de 100644 --- a/drivers/net/mhi_net.c +++ b/drivers/net/mhi_net.c @@ -104,19 +104,19 @@ static void mhi_ndo_get_stats64(struct net_device *ndev, unsigned int start; do { - start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.rx_syncp); + start = u64_stats_fetch_begin(&mhi_netdev->stats.rx_syncp); stats->rx_packets = u64_stats_read(&mhi_netdev->stats.rx_packets); stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes); stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors); - } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start)); + } while (u64_stats_fetch_retry(&mhi_netdev->stats.rx_syncp, start)); do { - start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.tx_syncp); + start = u64_stats_fetch_begin(&mhi_netdev->stats.tx_syncp); stats->tx_packets = u64_stats_read(&mhi_netdev->stats.tx_packets); stats->tx_bytes = u64_stats_read(&mhi_netdev->stats.tx_bytes); stats->tx_errors = u64_stats_read(&mhi_netdev->stats.tx_errors); stats->tx_dropped = u64_stats_read(&mhi_netdev->stats.tx_dropped); - } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.tx_syncp, start)); + } while (u64_stats_fetch_retry(&mhi_netdev->stats.tx_syncp, start)); } static const struct net_device_ops mhi_netdev_ops = { diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 9a1a5b2036240..e470e3398abc2 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -67,10 +67,10 @@ nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) unsigned int start; do { - start = u64_stats_fetch_begin_irq(&ns->syncp); + start = u64_stats_fetch_begin(&ns->syncp); stats->tx_bytes = ns->tx_bytes; stats->tx_packets = ns->tx_packets; - } while (u64_stats_fetch_retry_irq(&ns->syncp, start)); + } while (u64_stats_fetch_retry(&ns->syncp, start)); } static int diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 154a3c0a6dfd8..3de937141c168 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1865,13 +1865,13 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) for_each_possible_cpu(i) { p = per_cpu_ptr(team->pcpu_stats, i); do { - start = u64_stats_fetch_begin_irq(&p->syncp); + start = u64_stats_fetch_begin(&p->syncp); rx_packets = u64_stats_read(&p->rx_packets); rx_bytes = u64_stats_read(&p->rx_bytes); rx_multicast = u64_stats_read(&p->rx_multicast); tx_packets = u64_stats_read(&p->tx_packets); tx_bytes = u64_stats_read(&p->tx_bytes); - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + } while (u64_stats_fetch_retry(&p->syncp, start)); stats->rx_packets += rx_packets; stats->rx_bytes += rx_bytes; diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index b095a4b4957bb..18d99fda997cf 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -466,9 +466,9 @@ static void __lb_one_cpu_stats_add(struct lb_stats *acc_stats, struct lb_stats tmp; do { - start = u64_stats_fetch_begin_irq(syncp); + start = u64_stats_fetch_begin(syncp); tmp.tx_bytes = cpu_stats->tx_bytes; - } while (u64_stats_fetch_retry_irq(syncp, start)); + } while (u64_stats_fetch_retry(syncp, start)); acc_stats->tx_bytes += tmp.tx_bytes; } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 466da01ba2e3e..2da7cfcfe1c31 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -182,12 +182,12 @@ static void veth_get_ethtool_stats(struct net_device *dev, size_t offset; do { - start = u64_stats_fetch_begin_irq(&rq_stats->syncp); + start = u64_stats_fetch_begin(&rq_stats->syncp); for (j = 0; j < VETH_RQ_STATS_LEN; j++) { offset = veth_rq_stats_desc[j].offset; data[idx + j] = *(u64 *)(stats_base + offset); } - } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start)); + } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); idx += VETH_RQ_STATS_LEN; } @@ -203,12 +203,12 @@ static void veth_get_ethtool_stats(struct net_device *dev, tx_idx += (i % dev->real_num_tx_queues) * VETH_TQ_STATS_LEN; do { - start = u64_stats_fetch_begin_irq(&rq_stats->syncp); + start = u64_stats_fetch_begin(&rq_stats->syncp); for (j = 0; j < VETH_TQ_STATS_LEN; j++) { offset = veth_tq_stats_desc[j].offset; data[tx_idx + j] += *(u64 *)(base + offset); } - } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start)); + } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); } } @@ -379,13 +379,13 @@ static void veth_stats_rx(struct veth_stats *result, struct net_device *dev) unsigned int start; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); + start = u64_stats_fetch_begin(&stats->syncp); peer_tq_xdp_xmit_err = stats->vs.peer_tq_xdp_xmit_err; xdp_tx_err = stats->vs.xdp_tx_err; packets = stats->vs.xdp_packets; bytes = stats->vs.xdp_bytes; drops = stats->vs.rx_drops; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + } while (u64_stats_fetch_retry(&stats->syncp, start)); result->peer_tq_xdp_xmit_err += peer_tq_xdp_xmit_err; result->xdp_tx_err += xdp_tx_err; result->xdp_packets += packets; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9cce7dec7366d..a94d9d8f67fd0 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2066,18 +2066,18 @@ static void virtnet_stats(struct net_device *dev, struct send_queue *sq = &vi->sq[i]; do { - start = u64_stats_fetch_begin_irq(&sq->stats.syncp); + start = u64_stats_fetch_begin(&sq->stats.syncp); tpackets = sq->stats.packets; tbytes = sq->stats.bytes; terrors = sq->stats.tx_timeouts; - } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start)); + } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); do { - start = u64_stats_fetch_begin_irq(&rq->stats.syncp); + start = u64_stats_fetch_begin(&rq->stats.syncp); rpackets = rq->stats.packets; rbytes = rq->stats.bytes; rdrops = rq->stats.drops; - } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start)); + } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); tot->rx_packets += rpackets; tot->tx_packets += tpackets; @@ -2688,12 +2688,12 @@ static void virtnet_get_ethtool_stats(struct net_device *dev, stats_base = (u8 *)&rq->stats; do { - start = u64_stats_fetch_begin_irq(&rq->stats.syncp); + start = u64_stats_fetch_begin(&rq->stats.syncp); for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { offset = virtnet_rq_stats_desc[j].offset; data[idx + j] = *(u64 *)(stats_base + offset); } - } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start)); + } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); idx += VIRTNET_RQ_STATS_LEN; } @@ -2702,12 +2702,12 @@ static void virtnet_get_ethtool_stats(struct net_device *dev, stats_base = (u8 *)&sq->stats; do { - start = u64_stats_fetch_begin_irq(&sq->stats.syncp); + start = u64_stats_fetch_begin(&sq->stats.syncp); for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { offset = virtnet_sq_stats_desc[j].offset; data[idx + j] = *(u64 *)(stats_base + offset); } - } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start)); + } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); idx += VIRTNET_SQ_STATS_LEN; } } diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 5df7a0abc39d5..191ebc482f0c1 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -159,13 +159,13 @@ static void vrf_get_stats64(struct net_device *dev, dstats = per_cpu_ptr(dev->dstats, i); do { - start = u64_stats_fetch_begin_irq(&dstats->syncp); + start = u64_stats_fetch_begin(&dstats->syncp); tbytes = dstats->tx_bytes; tpkts = dstats->tx_pkts; tdrops = dstats->tx_drps; rbytes = dstats->rx_bytes; rpkts = dstats->rx_pkts; - } while (u64_stats_fetch_retry_irq(&dstats->syncp, start)); + } while (u64_stats_fetch_retry(&dstats->syncp, start)); stats->tx_bytes += tbytes; stats->tx_packets += tpkts; stats->tx_dropped += tdrops; diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c index 3e04af4c5daa1..a3de081cda5ee 100644 --- a/drivers/net/vxlan/vxlan_vnifilter.c +++ b/drivers/net/vxlan/vxlan_vnifilter.c @@ -129,9 +129,9 @@ static void vxlan_vnifilter_stats_get(const struct vxlan_vni_node *vninode, pstats = per_cpu_ptr(vninode->stats, i); do { - start = u64_stats_fetch_begin_irq(&pstats->syncp); + start = u64_stats_fetch_begin(&pstats->syncp); memcpy(&temp, &pstats->stats, sizeof(temp)); - } while (u64_stats_fetch_retry_irq(&pstats->syncp, start)); + } while (u64_stats_fetch_retry(&pstats->syncp, start)); dest->rx_packets += temp.rx_packets; dest->rx_bytes += temp.rx_bytes; diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c index 6872782e8dd89..22b5939a42bb3 100644 --- a/drivers/net/wwan/mhi_wwan_mbim.c +++ b/drivers/net/wwan/mhi_wwan_mbim.c @@ -456,19 +456,19 @@ static void mhi_mbim_ndo_get_stats64(struct net_device *ndev, unsigned int start; do { - start = u64_stats_fetch_begin_irq(&link->rx_syncp); + start = u64_stats_fetch_begin(&link->rx_syncp); stats->rx_packets = u64_stats_read(&link->rx_packets); stats->rx_bytes = u64_stats_read(&link->rx_bytes); stats->rx_errors = u64_stats_read(&link->rx_errors); - } while (u64_stats_fetch_retry_irq(&link->rx_syncp, start)); + } while (u64_stats_fetch_retry(&link->rx_syncp, start)); do { - start = u64_stats_fetch_begin_irq(&link->tx_syncp); + start = u64_stats_fetch_begin(&link->tx_syncp); stats->tx_packets = u64_stats_read(&link->tx_packets); stats->tx_bytes = u64_stats_read(&link->tx_bytes); stats->tx_errors = u64_stats_read(&link->tx_errors); stats->tx_dropped = u64_stats_read(&link->tx_dropped); - } while (u64_stats_fetch_retry_irq(&link->tx_syncp, start)); + } while (u64_stats_fetch_retry(&link->tx_syncp, start)); } static void mhi_mbim_ul_callback(struct mhi_device *mhi_dev, diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 27a11cc08c61e..df4dc02638a00 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1392,16 +1392,16 @@ static void xennet_get_stats64(struct net_device *dev, unsigned int start; do { - start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + start = u64_stats_fetch_begin(&tx_stats->syncp); tx_packets = tx_stats->packets; tx_bytes = tx_stats->bytes; - } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); + } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); do { - start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + start = u64_stats_fetch_begin(&rx_stats->syncp); rx_packets = rx_stats->packets; rx_bytes = rx_stats->bytes; - } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); + } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); tot->rx_packets += rx_packets; tot->tx_packets += tx_packets; diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 2a4b3efb7e12b..9f6ed09538cd0 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -639,7 +639,7 @@ static bool do_amd_gpio_irq_handler(int irq, void *dev_id) if (!(regval & PIN_IRQ_PENDING) || !(regval & BIT(INTERRUPT_MASK_OFF))) continue; - generic_handle_domain_irq(gc->irq.domain, irqnr + i); + generic_handle_domain_irq_safe(gc->irq.domain, irqnr + i); /* Clear interrupt. * We must read the pin register again, in case the diff --git a/drivers/platform/x86/intel/int0002_vgpio.c b/drivers/platform/x86/intel/int0002_vgpio.c index 617dbf98980ec..97cfbc520a02c 100644 --- a/drivers/platform/x86/intel/int0002_vgpio.c +++ b/drivers/platform/x86/intel/int0002_vgpio.c @@ -125,8 +125,7 @@ static irqreturn_t int0002_irq(int irq, void *data) if (!(gpe_sts_reg & GPE0A_PME_B0_STS_BIT)) return IRQ_NONE; - generic_handle_irq(irq_find_mapping(chip->irq.domain, - GPE0A_PME_B0_VIRT_GPIO_PIN)); + generic_handle_domain_irq_safe(chip->irq.domain, GPE0A_PME_B0_VIRT_GPIO_PIN); pm_wakeup_hard_event(chip->parent); diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 4b42f2302a8a8..d4f77f6688cf7 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -127,10 +127,10 @@ do { \ unsigned int start; \ pcpu_stats = per_cpu_ptr(in, i); \ do { \ - start = u64_stats_fetch_begin_irq( \ + start = u64_stats_fetch_begin( \ &pcpu_stats->syncp); \ inc = u64_stats_read(&pcpu_stats->field); \ - } while (u64_stats_fetch_retry_irq( \ + } while (u64_stats_fetch_retry( \ &pcpu_stats->syncp, start)); \ ret += inc; \ } \ diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c index 2de3896489c84..897cb8db5084f 100644 --- a/drivers/ssb/driver_gpio.c +++ b/drivers/ssb/driver_gpio.c @@ -132,7 +132,8 @@ static irqreturn_t ssb_gpio_irq_chipco_handler(int irq, void *dev_id) return IRQ_NONE; for_each_set_bit(gpio, &irqs, bus->gpio.ngpio) - generic_handle_irq(ssb_gpio_to_irq(&bus->gpio, gpio)); + generic_handle_domain_irq_safe(bus->irq_domain, gpio); + ssb_chipco_gpio_polarity(chipco, irqs, val & irqs); return IRQ_HANDLED; @@ -330,7 +331,8 @@ static irqreturn_t ssb_gpio_irq_extif_handler(int irq, void *dev_id) return IRQ_NONE; for_each_set_bit(gpio, &irqs, bus->gpio.ngpio) - generic_handle_irq(ssb_gpio_to_irq(&bus->gpio, gpio)); + generic_handle_domain_irq_safe(bus->irq_domain, gpio); + ssb_extif_gpio_polarity(extif, irqs, val & irqs); return IRQ_HANDLED; diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 287153d325365..81f5fce6e895f 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -177,12 +177,74 @@ static inline void serial_dl_write(struct uart_8250_port *up, int value) up->dl_write(up, value); } +static inline int serial8250_in_IER(struct uart_8250_port *up) +{ + struct uart_port *port = &up->port; + unsigned long flags; + bool is_console; + int ier; + + is_console = uart_console(port); + + if (is_console) + printk_cpu_sync_get_irqsave(flags); + + ier = serial_in(up, UART_IER); + + if (is_console) + printk_cpu_sync_put_irqrestore(flags); + + return ier; +} + +static inline void serial8250_set_IER(struct uart_8250_port *up, int ier) +{ + struct uart_port *port = &up->port; + unsigned long flags; + bool is_console; + + is_console = uart_console(port); + + if (is_console) + printk_cpu_sync_get_irqsave(flags); + + serial_out(up, UART_IER, ier); + + if (is_console) + printk_cpu_sync_put_irqrestore(flags); +} + +static inline int serial8250_clear_IER(struct uart_8250_port *up) +{ + struct uart_port *port = &up->port; + unsigned int clearval = 0; + unsigned long flags; + bool is_console; + int prior; + + is_console = uart_console(port); + + if (up->capabilities & UART_CAP_UUE) + clearval = UART_IER_UUE; + + if (is_console) + printk_cpu_sync_get_irqsave(flags); + + prior = serial_in(up, UART_IER); + serial_out(up, UART_IER, clearval); + + if (is_console) + printk_cpu_sync_put_irqrestore(flags); + + return prior; +} + static inline bool serial8250_set_THRI(struct uart_8250_port *up) { if (up->ier & UART_IER_THRI) return false; up->ier |= UART_IER_THRI; - serial_out(up, UART_IER, up->ier); + serial8250_set_IER(up, up->ier); return true; } @@ -191,7 +253,7 @@ static inline bool serial8250_clear_THRI(struct uart_8250_port *up) if (!(up->ier & UART_IER_THRI)) return false; up->ier &= ~UART_IER_THRI; - serial_out(up, UART_IER, up->ier); + serial8250_set_IER(up, up->ier); return true; } diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c index 9d2a7856784f7..7cc6b527c088b 100644 --- a/drivers/tty/serial/8250/8250_aspeed_vuart.c +++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c @@ -278,7 +278,7 @@ static void __aspeed_vuart_set_throttle(struct uart_8250_port *up, up->ier &= ~irqs; if (!throttle) up->ier |= irqs; - serial_out(up, UART_IER, up->ier); + serial8250_set_IER(up, up->ier); } static void aspeed_vuart_set_throttle(struct uart_port *port, bool throttle) { diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c index 8efdc271eb75f..d30c74618411f 100644 --- a/drivers/tty/serial/8250/8250_bcm7271.c +++ b/drivers/tty/serial/8250/8250_bcm7271.c @@ -609,7 +609,7 @@ static int brcmuart_startup(struct uart_port *port) * will handle this. */ up->ier &= ~UART_IER_RDI; - serial_port_out(port, UART_IER, up->ier); + serial8250_set_IER(up, up->ier); priv->tx_running = false; priv->dma.rx_dma = NULL; @@ -775,10 +775,12 @@ static int brcmuart_handle_irq(struct uart_port *p) unsigned int iir = serial_port_in(p, UART_IIR); struct brcmuart_priv *priv = p->private_data; struct uart_8250_port *up = up_to_u8250p(p); + unsigned long cs_flags; unsigned int status; unsigned long flags; unsigned int ier; unsigned int mcr; + bool is_console; int handled = 0; /* @@ -789,6 +791,10 @@ static int brcmuart_handle_irq(struct uart_port *p) spin_lock_irqsave(&p->lock, flags); status = serial_port_in(p, UART_LSR); if ((status & UART_LSR_DR) == 0) { + is_console = uart_console(p); + + if (is_console) + printk_cpu_sync_get_irqsave(cs_flags); ier = serial_port_in(p, UART_IER); /* @@ -809,6 +815,9 @@ static int brcmuart_handle_irq(struct uart_port *p) serial_port_in(p, UART_RX); } + if (is_console) + printk_cpu_sync_put_irqrestore(cs_flags); + handled = 1; } spin_unlock_irqrestore(&p->lock, flags); @@ -823,8 +832,10 @@ static enum hrtimer_restart brcmuart_hrtimer_func(struct hrtimer *t) struct brcmuart_priv *priv = container_of(t, struct brcmuart_priv, hrt); struct uart_port *p = priv->up; struct uart_8250_port *up = up_to_u8250p(p); + unsigned long cs_flags; unsigned int status; unsigned long flags; + bool is_console; if (priv->shutdown) return HRTIMER_NORESTART; @@ -846,12 +857,20 @@ static enum hrtimer_restart brcmuart_hrtimer_func(struct hrtimer *t) /* re-enable receive unless upper layer has disabled it */ if ((up->ier & (UART_IER_RLSI | UART_IER_RDI)) == (UART_IER_RLSI | UART_IER_RDI)) { + is_console = uart_console(p); + + if (is_console) + printk_cpu_sync_get_irqsave(cs_flags); + status = serial_port_in(p, UART_IER); status |= (UART_IER_RLSI | UART_IER_RDI); serial_port_out(p, UART_IER, status); status = serial_port_in(p, UART_MCR); status |= UART_MCR_RTS; serial_port_out(p, UART_MCR, status); + + if (is_console) + printk_cpu_sync_put_irqrestore(cs_flags); } spin_unlock_irqrestore(&p->lock, flags); return HRTIMER_NORESTART; diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 94fbf0add2ce2..196d0c55dfe99 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -255,8 +255,11 @@ static void serial8250_timeout(struct timer_list *t) static void serial8250_backup_timeout(struct timer_list *t) { struct uart_8250_port *up = from_timer(up, t, timer); + struct uart_port *port = &up->port; unsigned int iir, ier = 0, lsr; + unsigned long cs_flags; unsigned long flags; + bool is_console; spin_lock_irqsave(&up->port.lock, flags); @@ -265,8 +268,16 @@ static void serial8250_backup_timeout(struct timer_list *t) * based handler. */ if (up->port.irq) { + is_console = uart_console(port); + + if (is_console) + printk_cpu_sync_get_irqsave(cs_flags); + ier = serial_in(up, UART_IER); serial_out(up, UART_IER, 0); + + if (is_console) + printk_cpu_sync_put_irqrestore(cs_flags); } iir = serial_in(up, UART_IIR); @@ -289,7 +300,7 @@ static void serial8250_backup_timeout(struct timer_list *t) serial8250_tx_chars(up); if (up->port.irq) - serial_out(up, UART_IER, ier); + serial8250_set_IER(up, ier); spin_unlock_irqrestore(&up->port.lock, flags); @@ -575,6 +586,14 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev) #ifdef CONFIG_SERIAL_8250_CONSOLE +static void univ8250_console_write_atomic(struct console *co, const char *s, + unsigned int count) +{ + struct uart_8250_port *up = &serial8250_ports[co->index]; + + serial8250_console_write_atomic(up, s, count); +} + static void univ8250_console_write(struct console *co, const char *s, unsigned int count) { @@ -668,6 +687,7 @@ static int univ8250_console_match(struct console *co, char *name, int idx, static struct console univ8250_console = { .name = "ttyS", + .write_atomic = univ8250_console_write_atomic, .write = univ8250_console_write, .device = uart_console_device, .setup = univ8250_console_setup, @@ -961,7 +981,7 @@ static void serial_8250_overrun_backoff_work(struct work_struct *work) spin_lock_irqsave(&port->lock, flags); up->ier |= UART_IER_RLSI | UART_IER_RDI; up->port.read_status_mask |= UART_LSR_DR; - serial_out(up, UART_IER, up->ier); + serial8250_set_IER(up, up->ier); spin_unlock_irqrestore(&port->lock, flags); } diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index 314a05e009df9..9809517de8270 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -179,6 +179,8 @@ static void xr17v35x_set_divisor(struct uart_port *p, unsigned int baud, static int xr17v35x_startup(struct uart_port *port) { + struct uart_8250_port *up = up_to_u8250p(port); + /* * First enable access to IER [7:5], ISR [5:4], FCR [5:4], * MCR [7:5] and MSR [7:0] @@ -189,7 +191,7 @@ static int xr17v35x_startup(struct uart_port *port) * Make sure all interrups are masked until initialization is * complete and the FIFOs are cleared */ - serial_port_out(port, UART_IER, 0); + serial8250_set_IER(up, 0); return serial8250_do_startup(port); } diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c index 8aad15622a2e5..74bb85b705e7f 100644 --- a/drivers/tty/serial/8250/8250_fsl.c +++ b/drivers/tty/serial/8250/8250_fsl.c @@ -58,7 +58,8 @@ int fsl8250_handle_irq(struct uart_port *port) if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) { unsigned long delay; - up->ier = port->serial_in(port, UART_IER); + up->ier = serial8250_in_IER(up); + if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { port->ops->stop_rx(port); } else { diff --git a/drivers/tty/serial/8250/8250_ingenic.c b/drivers/tty/serial/8250/8250_ingenic.c index 2b2f5d8d24b91..2b78e6c394fb9 100644 --- a/drivers/tty/serial/8250/8250_ingenic.c +++ b/drivers/tty/serial/8250/8250_ingenic.c @@ -146,6 +146,7 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic,x1000-uart", static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value) { + struct uart_8250_port *up = up_to_u8250p(p); int ier; switch (offset) { @@ -167,7 +168,7 @@ static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value) * If we have enabled modem status IRQs we should enable * modem mode. */ - ier = p->serial_in(p, UART_IER); + ier = serial8250_in_IER(up); if (ier & UART_IER_MSI) value |= UART_MCR_MDCE | UART_MCR_FCM; diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index 54051ec7b4992..6092c75808fb9 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -222,12 +222,40 @@ static void mtk8250_shutdown(struct uart_port *port) static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask) { - serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask)); + struct uart_port *port = &up->port; + unsigned long flags; + bool is_console; + int ier; + + is_console = uart_console(port); + + if (is_console) + printk_cpu_sync_get_irqsave(flags); + + ier = serial_in(up, UART_IER); + serial_out(up, UART_IER, ier & (~mask)); + + if (is_console) + printk_cpu_sync_put_irqrestore(flags); } static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask) { - serial_out(up, UART_IER, serial_in(up, UART_IER) | mask); + struct uart_port *port = &up->port; + unsigned long flags; + bool is_console; + int ier; + + is_console = uart_console(port); + + if (is_console) + printk_cpu_sync_get_irqsave(flags); + + ier = serial_in(up, UART_IER); + serial_out(up, UART_IER, ier | mask); + + if (is_console) + printk_cpu_sync_put_irqrestore(flags); } static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 38ee3e42251af..8dc983a8cad15 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -325,7 +325,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up) /* drop TCR + TLR access, we setup XON/XOFF later */ serial8250_out_MCR(up, up->mcr); - serial_out(up, UART_IER, up->ier); + serial8250_set_IER(up, up->ier); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_dl_write(up, priv->quot); @@ -515,7 +515,7 @@ static void omap_8250_pm(struct uart_port *port, unsigned int state, serial_out(up, UART_EFR, efr | UART_EFR_ECB); serial_out(up, UART_LCR, 0); - serial_out(up, UART_IER, (state != 0) ? UART_IERX_SLEEP : 0); + serial8250_set_IER(up, (state != 0) ? UART_IERX_SLEEP : 0); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(up, UART_EFR, efr); serial_out(up, UART_LCR, 0); @@ -636,7 +636,7 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) if ((lsr & UART_LSR_OE) && up->overrun_backoff_time_ms > 0) { unsigned long delay; - up->ier = port->serial_in(port, UART_IER); + up->ier = serial8250_in_IER(up); if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { port->ops->stop_rx(port); } else { @@ -696,7 +696,7 @@ static int omap_8250_startup(struct uart_port *port) goto err; up->ier = UART_IER_RLSI | UART_IER_RDI; - serial_out(up, UART_IER, up->ier); + serial8250_set_IER(up, up->ier); #ifdef CONFIG_PM up->capabilities |= UART_CAP_RPM; @@ -737,7 +737,7 @@ static void omap_8250_shutdown(struct uart_port *port) serial_out(up, UART_OMAP_EFR2, 0x0); up->ier = 0; - serial_out(up, UART_IER, 0); + serial8250_set_IER(up, 0); if (up->dma) serial8250_release_dma(up); @@ -785,7 +785,7 @@ static void omap_8250_unthrottle(struct uart_port *port) up->dma->rx_dma(up); up->ier |= UART_IER_RLSI | UART_IER_RDI; port->read_status_mask |= UART_LSR_DR; - serial_out(up, UART_IER, up->ier); + serial8250_set_IER(up, up->ier); spin_unlock_irqrestore(&port->lock, flags); pm_runtime_mark_last_busy(port->dev); @@ -876,7 +876,7 @@ static void __dma_rx_complete(void *param) __dma_rx_do_complete(p); if (!priv->throttled) { p->ier |= UART_IER_RLSI | UART_IER_RDI; - serial_out(p, UART_IER, p->ier); + serial8250_set_IER(p, p->ier); if (!(priv->habit & UART_HAS_EFR2)) omap_8250_rx_dma(p); } @@ -933,7 +933,7 @@ static int omap_8250_rx_dma(struct uart_8250_port *p) * callback to run. */ p->ier &= ~(UART_IER_RLSI | UART_IER_RDI); - serial_out(p, UART_IER, p->ier); + serial8250_set_IER(p, p->ier); } goto out; } @@ -1148,12 +1148,12 @@ static void am654_8250_handle_rx_dma(struct uart_8250_port *up, u8 iir, * periodic timeouts, re-enable interrupts. */ up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); - serial_out(up, UART_IER, up->ier); + serial8250_set_IER(up, up->ier); omap_8250_rx_dma_flush(up); serial_in(up, UART_IIR); serial_out(up, UART_OMAP_EFR2, 0x0); up->ier |= UART_IER_RLSI | UART_IER_RDI; - serial_out(up, UART_IER, up->ier); + serial8250_set_IER(up, up->ier); } } diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 2030a92ac66e7..326549603740d 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -743,7 +743,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) serial_out(p, UART_EFR, UART_EFR_ECB); serial_out(p, UART_LCR, 0); } - serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0); + serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0); if (p->capabilities & UART_CAP_EFR) { serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(p, UART_EFR, efr); @@ -1017,8 +1017,11 @@ static int broken_efr(struct uart_8250_port *up) */ static void autoconfig_16550a(struct uart_8250_port *up) { + struct uart_port *port = &up->port; unsigned char status1, status2; unsigned int iersave; + unsigned long flags; + bool is_console; up->port.type = PORT_16550A; up->capabilities |= UART_CAP_FIFO; @@ -1130,6 +1133,11 @@ static void autoconfig_16550a(struct uart_8250_port *up) return; } + is_console = uart_console(port); + + if (is_console) + printk_cpu_sync_get_irqsave(flags); + /* * Try writing and reading the UART_IER_UUE bit (b6). * If it works, this is probably one of the Xscale platform's @@ -1165,6 +1173,9 @@ static void autoconfig_16550a(struct uart_8250_port *up) } serial_out(up, UART_IER, iersave); + if (is_console) + printk_cpu_sync_put_irqrestore(flags); + /* * We distinguish between 16550A and U6 16550A by counting * how many bytes are in the FIFO. @@ -1187,8 +1198,10 @@ static void autoconfig(struct uart_8250_port *up) unsigned char status1, scratch, scratch2, scratch3; unsigned char save_lcr, save_mcr; struct uart_port *port = &up->port; + unsigned long cs_flags; unsigned long flags; unsigned int old_capabilities; + bool is_console; if (!port->iobase && !port->mapbase && !port->membase) return; @@ -1206,6 +1219,11 @@ static void autoconfig(struct uart_8250_port *up) up->bugs = 0; if (!(port->flags & UPF_BUGGY_UART)) { + is_console = uart_console(port); + + if (is_console) + printk_cpu_sync_get_irqsave(cs_flags); + /* * Do a simple existence test first; if we fail this, * there's no point trying anything else. @@ -1235,6 +1253,10 @@ static void autoconfig(struct uart_8250_port *up) #endif scratch3 = serial_in(up, UART_IER) & 0x0f; serial_out(up, UART_IER, scratch); + + if (is_console) + printk_cpu_sync_put_irqrestore(cs_flags); + if (scratch2 != 0 || scratch3 != 0x0F) { /* * We failed; there's nothing here @@ -1332,10 +1354,7 @@ static void autoconfig(struct uart_8250_port *up) serial8250_out_MCR(up, save_mcr); serial8250_clear_fifos(up); serial_in(up, UART_RX); - if (up->capabilities & UART_CAP_UUE) - serial_out(up, UART_IER, UART_IER_UUE); - else - serial_out(up, UART_IER, 0); + serial8250_clear_IER(up); out_unlock: spin_unlock_irqrestore(&port->lock, flags); @@ -1361,7 +1380,9 @@ static void autoconfig_irq(struct uart_8250_port *up) unsigned char save_mcr, save_ier; unsigned char save_ICP = 0; unsigned int ICP = 0; + unsigned long flags; unsigned long irqs; + bool is_console; int irq; if (port->flags & UPF_FOURPORT) { @@ -1371,8 +1392,12 @@ static void autoconfig_irq(struct uart_8250_port *up) inb_p(ICP); } - if (uart_console(port)) + is_console = uart_console(port); + + if (is_console) { console_lock(); + printk_cpu_sync_get_irqsave(flags); + } /* forget possible initially masked and pending IRQ */ probe_irq_off(probe_irq_on()); @@ -1404,8 +1429,10 @@ static void autoconfig_irq(struct uart_8250_port *up) if (port->flags & UPF_FOURPORT) outb_p(save_ICP, ICP); - if (uart_console(port)) + if (is_console) { + printk_cpu_sync_put_irqrestore(flags); console_unlock(); + } port->irq = (irq > 0) ? irq : 0; } @@ -1418,7 +1445,7 @@ static void serial8250_stop_rx(struct uart_port *port) up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); up->port.read_status_mask &= ~UART_LSR_DR; - serial_port_out(port, UART_IER, up->ier); + serial8250_set_IER(up, up->ier); serial8250_rpm_put(up); } @@ -1448,7 +1475,7 @@ void serial8250_em485_stop_tx(struct uart_8250_port *p) serial8250_clear_and_reinit_fifos(p); p->ier |= UART_IER_RLSI | UART_IER_RDI; - serial_port_out(&p->port, UART_IER, p->ier); + serial8250_set_IER(p, p->ier); } } EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx); @@ -1697,7 +1724,7 @@ static void serial8250_disable_ms(struct uart_port *port) mctrl_gpio_disable_ms(up->gpios); up->ier &= ~UART_IER_MSI; - serial_port_out(port, UART_IER, up->ier); + serial8250_set_IER(up, up->ier); } static void serial8250_enable_ms(struct uart_port *port) @@ -1713,7 +1740,7 @@ static void serial8250_enable_ms(struct uart_port *port) up->ier |= UART_IER_MSI; serial8250_rpm_get(up); - serial_port_out(port, UART_IER, up->ier); + serial8250_set_IER(up, up->ier); serial8250_rpm_put(up); } @@ -2144,14 +2171,7 @@ static void serial8250_put_poll_char(struct uart_port *port, struct uart_8250_port *up = up_to_u8250p(port); serial8250_rpm_get(up); - /* - * First save the IER then disable the interrupts - */ - ier = serial_port_in(port, UART_IER); - if (up->capabilities & UART_CAP_UUE) - serial_port_out(port, UART_IER, UART_IER_UUE); - else - serial_port_out(port, UART_IER, 0); + ier = serial8250_clear_IER(up); wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); /* @@ -2164,7 +2184,7 @@ static void serial8250_put_poll_char(struct uart_port *port, * and restore the IER */ wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); - serial_port_out(port, UART_IER, ier); + serial8250_set_IER(up, ier); serial8250_rpm_put(up); } @@ -2173,8 +2193,10 @@ static void serial8250_put_poll_char(struct uart_port *port, int serial8250_do_startup(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); + unsigned long cs_flags; unsigned long flags; unsigned char iir; + bool is_console; int retval; u16 lsr; @@ -2195,7 +2217,7 @@ int serial8250_do_startup(struct uart_port *port) up->acr = 0; serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); serial_port_out(port, UART_EFR, UART_EFR_ECB); - serial_port_out(port, UART_IER, 0); + serial8250_set_IER(up, 0); serial_port_out(port, UART_LCR, 0); serial_icr_write(up, UART_CSR, 0); /* Reset the UART */ serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); @@ -2205,7 +2227,7 @@ int serial8250_do_startup(struct uart_port *port) if (port->type == PORT_DA830) { /* Reset the port */ - serial_port_out(port, UART_IER, 0); + serial8250_set_IER(up, 0); serial_port_out(port, UART_DA830_PWREMU_MGMT, 0); mdelay(10); @@ -2304,6 +2326,8 @@ int serial8250_do_startup(struct uart_port *port) if (retval) goto out; + is_console = uart_console(port); + if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) { unsigned char iir1; @@ -2320,6 +2344,9 @@ int serial8250_do_startup(struct uart_port *port) */ spin_lock_irqsave(&port->lock, flags); + if (is_console) + printk_cpu_sync_get_irqsave(cs_flags); + wait_for_xmitr(up, UART_LSR_THRE); serial_port_out_sync(port, UART_IER, UART_IER_THRI); udelay(1); /* allow THRE to set */ @@ -2330,6 +2357,9 @@ int serial8250_do_startup(struct uart_port *port) iir = serial_port_in(port, UART_IIR); serial_port_out(port, UART_IER, 0); + if (is_console) + printk_cpu_sync_put_irqrestore(cs_flags); + spin_unlock_irqrestore(&port->lock, flags); if (port->irqflags & IRQF_SHARED) @@ -2384,10 +2414,14 @@ int serial8250_do_startup(struct uart_port *port) * Do a quick test to see if we receive an interrupt when we enable * the TX irq. */ + if (is_console) + printk_cpu_sync_get_irqsave(cs_flags); serial_port_out(port, UART_IER, UART_IER_THRI); lsr = serial_port_in(port, UART_LSR); iir = serial_port_in(port, UART_IIR); serial_port_out(port, UART_IER, 0); + if (is_console) + printk_cpu_sync_put_irqrestore(cs_flags); if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) { if (!(up->bugs & UART_BUG_TXEN)) { @@ -2419,7 +2453,7 @@ int serial8250_do_startup(struct uart_port *port) if (up->dma) { const char *msg = NULL; - if (uart_console(port)) + if (is_console) msg = "forbid DMA for kernel console"; else if (serial8250_request_dma(up)) msg = "failed to request DMA"; @@ -2470,7 +2504,7 @@ void serial8250_do_shutdown(struct uart_port *port) */ spin_lock_irqsave(&port->lock, flags); up->ier = 0; - serial_port_out(port, UART_IER, 0); + serial8250_set_IER(up, 0); spin_unlock_irqrestore(&port->lock, flags); synchronize_irq(port->irq); @@ -2836,7 +2870,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, if (up->capabilities & UART_CAP_RTOIE) up->ier |= UART_IER_RTOIE; - serial_port_out(port, UART_IER, up->ier); + serial8250_set_IER(up, up->ier); if (up->capabilities & UART_CAP_EFR) { unsigned char efr = 0; @@ -3301,7 +3335,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_defaults); #ifdef CONFIG_SERIAL_8250_CONSOLE -static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) +static void serial8250_console_putchar_locked(struct uart_port *port, unsigned char ch) { struct uart_8250_port *up = up_to_u8250p(port); @@ -3309,6 +3343,18 @@ static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) serial_port_out(port, UART_TX, ch); } +static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) +{ + struct uart_8250_port *up = up_to_u8250p(port); + unsigned long flags; + + wait_for_xmitr(up, UART_LSR_THRE); + + printk_cpu_sync_get_irqsave(flags); + serial8250_console_putchar_locked(port, ch); + printk_cpu_sync_put_irqrestore(flags); +} + /* * Restore serial console when h/w power-off detected */ @@ -3335,6 +3381,32 @@ static void serial8250_console_restore(struct uart_8250_port *up) serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); } +void serial8250_console_write_atomic(struct uart_8250_port *up, + const char *s, unsigned int count) +{ + struct uart_port *port = &up->port; + unsigned long flags; + unsigned int ier; + + printk_cpu_sync_get_irqsave(flags); + + touch_nmi_watchdog(); + + ier = serial8250_clear_IER(up); + + if (atomic_fetch_inc(&up->console_printing)) { + uart_console_write(port, "\n", 1, + serial8250_console_putchar_locked); + } + uart_console_write(port, s, count, serial8250_console_putchar_locked); + atomic_dec(&up->console_printing); + + wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); + serial8250_set_IER(up, ier); + + printk_cpu_sync_put_irqrestore(flags); +} + /* * Print a string to the serial port using the device FIFO * @@ -3380,24 +3452,12 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, struct uart_port *port = &up->port; unsigned long flags; unsigned int ier, use_fifo; - int locked = 1; touch_nmi_watchdog(); - if (oops_in_progress) - locked = spin_trylock_irqsave(&port->lock, flags); - else - spin_lock_irqsave(&port->lock, flags); + spin_lock_irqsave(&port->lock, flags); - /* - * First save the IER then disable the interrupts - */ - ier = serial_port_in(port, UART_IER); - - if (up->capabilities & UART_CAP_UUE) - serial_port_out(port, UART_IER, UART_IER_UUE); - else - serial_port_out(port, UART_IER, 0); + ier = serial8250_clear_IER(up); /* check scratch reg to see if port powered off during system sleep */ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { @@ -3431,10 +3491,12 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, */ !(up->port.flags & UPF_CONS_FLOW); + atomic_inc(&up->console_printing); if (likely(use_fifo)) serial8250_console_fifo_write(up, s, count); else uart_console_write(port, s, count, serial8250_console_putchar); + atomic_dec(&up->console_printing); /* * Finally, wait for transmitter to become empty @@ -3447,8 +3509,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, if (em485->tx_stopped) up->rs485_stop_tx(up); } - - serial_port_out(port, UART_IER, ier); + serial8250_set_IER(up, ier); /* * The receive handling will happen properly because the @@ -3460,8 +3521,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, if (up->msr_saved_flags) serial8250_modem_status(up); - if (locked) - spin_unlock_irqrestore(&port->lock, flags); + spin_unlock_irqrestore(&port->lock, flags); } static unsigned int probe_baud(struct uart_port *port) @@ -3481,6 +3541,7 @@ static unsigned int probe_baud(struct uart_port *port) int serial8250_console_setup(struct uart_port *port, char *options, bool probe) { + struct uart_8250_port *up = up_to_u8250p(port); int baud = 9600; int bits = 8; int parity = 'n'; @@ -3490,6 +3551,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe) if (!port->iobase && !port->membase) return -ENODEV; + atomic_set(&up->console_printing, 0); + if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); else if (probe) diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index d0b49e15fbf5e..02c308467339c 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -9,6 +9,7 @@ config SERIAL_8250 depends on !S390 select SERIAL_CORE select SERIAL_MCTRL_GPIO if GPIOLIB + select HAVE_ATOMIC_CONSOLE help This selects whether you want to include the driver for the standard serial ports. The standard answer is Y. People who might say N diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 15f0e4d88c5a0..ffdb001e3d109 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -2308,18 +2308,24 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) { struct uart_amba_port *uap = amba_ports[co->index]; unsigned int old_cr = 0, new_cr; - unsigned long flags; + unsigned long flags = 0; int locked = 1; clk_enable(uap->clk); - local_irq_save(flags); + /* + * local_irq_save(flags); + * + * This local_irq_save() is nonsense. If we come in via sysrq + * handling then interrupts are already disabled. Aside of + * that the port.sysrq check is racy on SMP regardless. + */ if (uap->port.sysrq) locked = 0; else if (oops_in_progress) - locked = spin_trylock(&uap->port.lock); + locked = spin_trylock_irqsave(&uap->port.lock, flags); else - spin_lock(&uap->port.lock); + spin_lock_irqsave(&uap->port.lock, flags); /* * First save the CR then disable the interrupts @@ -2345,8 +2351,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) pl011_write(old_cr, uap, REG_CR); if (locked) - spin_unlock(&uap->port.lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&uap->port.lock, flags); clk_disable(uap->clk); } diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 0aa666e247d57..d7130d1ae64c0 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -1240,13 +1240,10 @@ serial_omap_console_write(struct console *co, const char *s, unsigned int ier; int locked = 1; - local_irq_save(flags); - if (up->port.sysrq) - locked = 0; - else if (oops_in_progress) - locked = spin_trylock(&up->port.lock); + if (up->port.sysrq || oops_in_progress) + locked = spin_trylock_irqsave(&up->port.lock, flags); else - spin_lock(&up->port.lock); + spin_lock_irqsave(&up->port.lock, flags); /* * First save the IER then disable the interrupts @@ -1273,8 +1270,7 @@ serial_omap_console_write(struct console *co, const char *s, check_modem_status(up); if (locked) - spin_unlock(&up->port.lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&up->port.lock, flags); } static int __init diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index d2b2720db6ca7..18e623325887f 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -581,6 +581,7 @@ void __handle_sysrq(int key, bool check_mask) rcu_sysrq_start(); rcu_read_lock(); + printk_prefer_direct_enter(); /* * Raise the apparent loglevel to maximum so that the sysrq header * is shown to provide the user with positive feedback. We do not @@ -622,6 +623,7 @@ void __handle_sysrq(int key, bool check_mask) pr_cont("\n"); console_loglevel = orig_log_level; } + printk_prefer_direct_exit(); rcu_read_unlock(); rcu_sysrq_end(); diff --git a/drivers/vdpa/vdpa_user/iova_domain.h b/drivers/vdpa/vdpa_user/iova_domain.h index 4e0e50e7ac153..173e979b84a93 100644 --- a/drivers/vdpa/vdpa_user/iova_domain.h +++ b/drivers/vdpa/vdpa_user/iova_domain.h @@ -14,7 +14,6 @@ #include #include #include -#include #define IOVA_START_PFN 1 diff --git a/fs/dcache.c b/fs/dcache.c index bb0c4d0038dbd..2ee8636016ee9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2597,15 +2597,7 @@ EXPORT_SYMBOL(d_rehash); static inline unsigned start_dir_add(struct inode *dir) { - /* - * The caller holds a spinlock (dentry::d_lock). On !PREEMPT_RT - * kernels spin_lock() implicitly disables preemption, but not on - * PREEMPT_RT. So for RT it has to be done explicitly to protect - * the sequence count write side critical section against a reader - * or another writer preempting, which would result in a live lock. - */ - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_disable(); + preempt_disable_nested(); for (;;) { unsigned n = dir->i_dir_seq; if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n) @@ -2618,8 +2610,7 @@ static inline void end_dir_add(struct inode *dir, unsigned int n, wait_queue_head_t *d_wait) { smp_store_release(&dir->i_dir_seq, n + 2); - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_enable(); + preempt_enable_nested(); wake_up_all(d_wait); } diff --git a/include/linux/console.h b/include/linux/console.h index 8c1686e2c2337..8a813cbaf9285 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -16,6 +16,7 @@ #include #include +#include struct vc_data; struct console_font_op; @@ -137,9 +138,19 @@ static inline int con_debug_leave(void) #define CON_BRL (32) /* Used for a braille device */ #define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */ +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE +struct console_atomic_data { + u64 seq; + char *text; + char *ext_text; + char *dropped_text; +}; +#endif + struct console { char name[16]; void (*write)(struct console *, const char *, unsigned); + void (*write_atomic)(struct console *, const char *, unsigned); int (*read)(struct console *, char *, unsigned); struct tty_driver *(*device)(struct console *, int *); void (*unblank)(void); @@ -152,7 +163,26 @@ struct console { uint ispeed; uint ospeed; u64 seq; - unsigned long dropped; + atomic_long_t dropped; +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE + struct console_atomic_data *atomic_data; +#endif + struct task_struct *thread; + bool blocked; + + /* + * The per-console lock is used by printing kthreads to synchronize + * this console with callers of console_lock(). This is necessary in + * order to allow printing kthreads to run in parallel to each other, + * while each safely accessing the @blocked field and synchronizing + * against direct printing via console_lock/console_unlock. + * + * Note: For synchronizing against direct printing via + * console_trylock/console_unlock, see the static global + * variable @console_kthreads_active. + */ + struct mutex lock; + void *data; struct console *next; }; @@ -167,6 +197,7 @@ extern int console_set_on_cmdline; extern struct console *early_console; enum con_flush_mode { + CONSOLE_ATOMIC_FLUSH_PENDING, CONSOLE_FLUSH_PENDING, CONSOLE_REPLAY_ALL, }; diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 84a466b176cf4..df6d17bc30aa3 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -57,9 +57,15 @@ # define ARCH_EXIT_TO_USER_MODE_WORK (0) #endif +#ifdef CONFIG_PREEMPT_LAZY +# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) +#else +# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED) +#endif + #define EXIT_TO_USER_MODE_WORK \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ + _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ ARCH_EXIT_TO_USER_MODE_WORK) /** diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a92bce40b04b3..bf82980f569df 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -605,6 +605,35 @@ extern void __raise_softirq_irqoff(unsigned int nr); extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); +#ifdef CONFIG_PREEMPT_RT +DECLARE_PER_CPU(struct task_struct *, timersd); +DECLARE_PER_CPU(unsigned long, pending_timer_softirq); + +extern void raise_timer_softirq(void); +extern void raise_hrtimer_softirq(void); + +static inline unsigned int local_pending_timers(void) +{ + return __this_cpu_read(pending_timer_softirq); +} + +#else +static inline void raise_timer_softirq(void) +{ + raise_softirq(TIMER_SOFTIRQ); +} + +static inline void raise_hrtimer_softirq(void) +{ + raise_softirq_irqoff(HRTIMER_SOFTIRQ); +} + +static inline unsigned int local_pending_timers(void) +{ + return local_softirq_pending(); +} +#endif + DECLARE_PER_CPU(struct task_struct *, ksoftirqd); static inline struct task_struct *this_cpu_ksoftirqd(void) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 1cd4e36890fbf..844a8e30e6de5 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -169,6 +169,7 @@ int generic_handle_irq_safe(unsigned int irq); * conversion failed. */ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq); +int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq); int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq); #endif diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 1f1099dac3f05..1023f349af716 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -435,7 +435,6 @@ enum xhlock_context_t { XHLOCK_CTX_NR, }; -#define lockdep_init_map_crosslock(m, n, k, s) do {} while (0) /* * To initialize a lockdep_map statically use this macro. * Note that _name must not be NULL. diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 15ae78cd28536..b8728d11c9490 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -94,6 +94,12 @@ void dump_mm(const struct mm_struct *mm); #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) #endif +#ifdef CONFIG_DEBUG_VM_IRQSOFF +#define VM_WARN_ON_IRQS_ENABLED() WARN_ON_ONCE(!irqs_disabled()) +#else +#define VM_WARN_ON_IRQS_ENABLED() do { } while (0) +#endif + #ifdef CONFIG_DEBUG_VIRTUAL #define VIRTUAL_BUG_ON(cond) BUG_ON(cond) #else diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 05d6f3facd5a5..5e6b840f5a9ac 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3156,7 +3156,11 @@ struct softnet_data { int defer_count; int defer_ipi_scheduled; struct sk_buff *defer_list; +#ifndef CONFIG_PREEMPT_RT call_single_data_t defer_csd; +#else + struct work_struct defer_work; +#endif }; static inline void input_queue_head_incr(struct softnet_data *sd) diff --git a/include/linux/preempt.h b/include/linux/preempt.h index b4381f255a5ca..12f59cdaaedda 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -196,6 +196,20 @@ extern void preempt_count_sub(int val); #define preempt_count_inc() preempt_count_add(1) #define preempt_count_dec() preempt_count_sub(1) +#ifdef CONFIG_PREEMPT_LAZY +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0) +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0) +#define inc_preempt_lazy_count() add_preempt_lazy_count(1) +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1) +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count) +#else +#define add_preempt_lazy_count(val) do { } while (0) +#define sub_preempt_lazy_count(val) do { } while (0) +#define inc_preempt_lazy_count() do { } while (0) +#define dec_preempt_lazy_count() do { } while (0) +#define preempt_lazy_count() (0) +#endif + #ifdef CONFIG_PREEMPT_COUNT #define preempt_disable() \ @@ -204,6 +218,12 @@ do { \ barrier(); \ } while (0) +#define preempt_lazy_disable() \ +do { \ + inc_preempt_lazy_count(); \ + barrier(); \ +} while (0) + #define sched_preempt_enable_no_resched() \ do { \ barrier(); \ @@ -235,6 +255,18 @@ do { \ __preempt_schedule(); \ } while (0) +/* + * open code preempt_check_resched() because it is not exported to modules and + * used by local_unlock() or bpf_enable_instrumentation(). + */ +#define preempt_lazy_enable() \ +do { \ + dec_preempt_lazy_count(); \ + barrier(); \ + if (should_resched(0)) \ + __preempt_schedule(); \ +} while (0) + #else /* !CONFIG_PREEMPTION */ #define preempt_enable() \ do { \ @@ -242,6 +274,12 @@ do { \ preempt_count_dec(); \ } while (0) +#define preempt_lazy_enable() \ +do { \ + dec_preempt_lazy_count(); \ + barrier(); \ +} while (0) + #define preempt_enable_notrace() \ do { \ barrier(); \ @@ -282,6 +320,9 @@ do { \ #define preempt_enable_notrace() barrier() #define preemptible() 0 +#define preempt_lazy_disable() barrier() +#define preempt_lazy_enable() barrier() + #endif /* CONFIG_PREEMPT_COUNT */ #ifdef MODULE @@ -300,7 +341,7 @@ do { \ } while (0) #define preempt_fold_need_resched() \ do { \ - if (tif_need_resched()) \ + if (tif_need_resched_now()) \ set_preempt_need_resched(); \ } while (0) @@ -416,9 +457,58 @@ extern void migrate_enable(void); #else -static inline void migrate_disable(void) { } -static inline void migrate_enable(void) { } +static inline void migrate_disable(void) +{ + preempt_lazy_disable(); +} + +static inline void migrate_enable(void) +{ + preempt_lazy_enable(); +} #endif /* CONFIG_SMP */ +/** + * preempt_disable_nested - Disable preemption inside a normally preempt disabled section + * + * Use for code which requires preemption protection inside a critical + * section which has preemption disabled implicitly on non-PREEMPT_RT + * enabled kernels, by e.g.: + * - holding a spinlock/rwlock + * - soft interrupt context + * - regular interrupt handlers + * + * On PREEMPT_RT enabled kernels spinlock/rwlock held sections, soft + * interrupt context and regular interrupt handlers are preemptible and + * only prevent migration. preempt_disable_nested() ensures that preemption + * is disabled for cases which require CPU local serialization even on + * PREEMPT_RT. For non-PREEMPT_RT kernels this is a NOP. + * + * The use cases are code sequences which are not serialized by a + * particular lock instance, e.g.: + * - seqcount write side critical sections where the seqcount is not + * associated to a particular lock and therefore the automatic + * protection mechanism does not work. This prevents a live lock + * against a preempting high priority reader. + * - RMW per CPU variable updates like vmstat. + */ +/* Macro to avoid header recursion hell vs. lockdep */ +#define preempt_disable_nested() \ +do { \ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) \ + preempt_disable(); \ + else \ + lockdep_assert_preemption_disabled(); \ +} while (0) + +/** + * preempt_enable_nested - Undo the effect of preempt_disable_nested() + */ +static __always_inline void preempt_enable_nested(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); +} + #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/printk.h b/include/linux/printk.h index cf7d666ab1f8e..f88ec15f83dcc 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -169,7 +169,11 @@ extern void __printk_safe_exit(void); #define printk_deferred_enter __printk_safe_enter #define printk_deferred_exit __printk_safe_exit +extern void printk_prefer_direct_enter(void); +extern void printk_prefer_direct_exit(void); + extern bool pr_flush(int timeout_ms, bool reset_on_progress); +extern void try_block_console_kthreads(int timeout_ms); /* * Please don't use printk_ratelimit(), because it shares ratelimiting state @@ -221,11 +225,23 @@ static inline void printk_deferred_exit(void) { } +static inline void printk_prefer_direct_enter(void) +{ +} + +static inline void printk_prefer_direct_exit(void) +{ +} + static inline bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; } +static inline void try_block_console_kthreads(int timeout_ms) +{ +} + static inline int printk_ratelimit(void) { return 0; diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 8f416c5e929ea..c0ef596f340b5 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -1,7 +1,7 @@ #ifndef __LINUX_RWLOCK_H #define __LINUX_RWLOCK_H -#ifndef __LINUX_SPINLOCK_H +#ifndef __LINUX_INSIDE_SPINLOCK_H # error "please don't include this file directly" #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 8d82d6d326701..e1623b3001c5b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2038,6 +2038,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } +#ifdef CONFIG_PREEMPT_LAZY +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk) +{ + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); +} + +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) +{ + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); +} + +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk) +{ + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY)); +} + +static inline int need_resched_lazy(void) +{ + return test_thread_flag(TIF_NEED_RESCHED_LAZY); +} + +static inline int need_resched_now(void) +{ + return test_thread_flag(TIF_NEED_RESCHED); +} + +#else +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { } +static inline int need_resched_lazy(void) { return 0; } + +static inline int need_resched_now(void) +{ + return test_thread_flag(TIF_NEED_RESCHED); +} + +#endif + /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 16e3d75a324c7..ee1f719a21678 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -7,6 +7,7 @@ #ifndef _LINUX_SERIAL_8250_H #define _LINUX_SERIAL_8250_H +#include #include #include #include @@ -125,6 +126,8 @@ struct uart_8250_port { #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA unsigned char msr_saved_flags; + atomic_t console_printing; + struct uart_8250_dma *dma; const struct uart_8250_ops *ops; @@ -180,6 +183,8 @@ void serial8250_init_port(struct uart_8250_port *up); void serial8250_set_defaults(struct uart_8250_port *up); void serial8250_console_write(struct uart_8250_port *up, const char *s, unsigned int count); +void serial8250_console_write_atomic(struct uart_8250_port *up, const char *s, + unsigned int count); int serial8250_console_setup(struct uart_port *port, char *options, bool probe); int serial8250_console_exit(struct uart_port *port); diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 5c0c5174155d0..1341f7d62da44 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_SPINLOCK_H #define __LINUX_SPINLOCK_H +#define __LINUX_INSIDE_SPINLOCK_H /* * include/linux/spinlock.h - generic spinlock/rwlock declarations @@ -492,4 +493,5 @@ int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, void free_bucket_spinlocks(spinlock_t *locks); +#undef __LINUX_INSIDE_SPINLOCK_H #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 51fa0dab68c4d..89eb6f4c659c7 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -1,7 +1,7 @@ #ifndef __LINUX_SPINLOCK_API_SMP_H #define __LINUX_SPINLOCK_API_SMP_H -#ifndef __LINUX_SPINLOCK_H +#ifndef __LINUX_INSIDE_SPINLOCK_H # error "please don't include this file directly" #endif diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h index b8ba00ccccdeb..819aeba1c87e6 100644 --- a/include/linux/spinlock_api_up.h +++ b/include/linux/spinlock_api_up.h @@ -1,7 +1,7 @@ #ifndef __LINUX_SPINLOCK_API_UP_H #define __LINUX_SPINLOCK_API_UP_H -#ifndef __LINUX_SPINLOCK_H +#ifndef __LINUX_INSIDE_SPINLOCK_H # error "please don't include this file directly" #endif diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h index 835aedaf68acd..61c49b16f69ab 100644 --- a/include/linux/spinlock_rt.h +++ b/include/linux/spinlock_rt.h @@ -2,7 +2,7 @@ #ifndef __LINUX_SPINLOCK_RT_H #define __LINUX_SPINLOCK_RT_H -#ifndef __LINUX_SPINLOCK_H +#ifndef __LINUX_INSIDE_SPINLOCK_H #error Do not include directly. Use spinlock.h #endif diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 16521074b6f7c..c87204247592f 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -1,7 +1,7 @@ #ifndef __LINUX_SPINLOCK_UP_H #define __LINUX_SPINLOCK_UP_H -#ifndef __LINUX_SPINLOCK_H +#ifndef __LINUX_INSIDE_SPINLOCK_H # error "please don't include this file directly" #endif diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 9f392ec76f2bb..779e0e96b9cb0 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -177,7 +177,17 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti clear_ti_thread_flag(task_thread_info(t), TIF_##fl) #endif /* !CONFIG_GENERIC_ENTRY */ -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) +#ifdef CONFIG_PREEMPT_LAZY +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \ + test_thread_flag(TIF_NEED_RESCHED_LAZY)) +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED)) +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY) + +#else +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED) +#define tif_need_resched_lazy() 0 +#endif #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES static inline int arch_within_stack_frames(const void * const stack, diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 20749bd9db718..224bf60d6563c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -70,6 +70,7 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; + unsigned char preempt_lazy_count; }; #define TRACE_EVENT_TYPE_MAX \ @@ -159,9 +160,10 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry, unsigned int trace_ctx) { entry->preempt_count = trace_ctx & 0xff; + entry->preempt_lazy_count = (trace_ctx >> 16) & 0xff; entry->pid = current->pid; entry->type = type; - entry->flags = trace_ctx >> 16; + entry->flags = trace_ctx >> 24; } unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); @@ -172,7 +174,13 @@ enum trace_flag_type { TRACE_FLAG_NEED_RESCHED = 0x04, TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, +#ifdef CONFIG_PREEMPT_LAZY + TRACE_FLAG_PREEMPT_RESCHED = 0x00, + TRACE_FLAG_NEED_RESCHED_LAZY = 0x20, +#else + TRACE_FLAG_NEED_RESCHED_LAZY = 0x00, TRACE_FLAG_PREEMPT_RESCHED = 0x20, +#endif TRACE_FLAG_NMI = 0x40, TRACE_FLAG_BH_OFF = 0x80, }; diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index 6ad4e9032d538..ffe48e69b3f3a 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -8,7 +8,7 @@ * * Key points : * - * - Use a seqcount on 32-bit SMP, only disable preemption for 32-bit UP. + * - Use a seqcount on 32-bit * - The whole thing is a no-op on 64-bit architectures. * * Usage constraints: @@ -20,7 +20,8 @@ * writer and also spin forever. * * 3) Write side must use the _irqsave() variant if other writers, or a reader, - * can be invoked from an IRQ context. + * can be invoked from an IRQ context. On 64bit systems this variant does not + * disable interrupts. * * 4) If reader fetches several counters, there is no guarantee the whole values * are consistent w.r.t. each other (remember point #2: seqcounts are not @@ -29,11 +30,6 @@ * 5) Readers are allowed to sleep or be preempted/interrupted: they perform * pure reads. * - * 6) Readers must use both u64_stats_fetch_{begin,retry}_irq() if the stats - * might be updated from a hardirq or softirq context (remember point #1: - * seqcounts are not used for UP kernels). 32-bit UP stat readers could read - * corrupted 64-bit values otherwise. - * * Usage : * * Stats producer (writer) should use following template granted it already got @@ -66,7 +62,7 @@ #include struct u64_stats_sync { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) +#if BITS_PER_LONG == 32 seqcount_t seq; #endif }; @@ -98,7 +94,22 @@ static inline void u64_stats_inc(u64_stats_t *p) local64_inc(&p->v); } -#else +static inline void u64_stats_init(struct u64_stats_sync *syncp) { } +static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { } +static inline void __u64_stats_update_end(struct u64_stats_sync *syncp) { } +static inline unsigned long __u64_stats_irqsave(void) { return 0; } +static inline void __u64_stats_irqrestore(unsigned long flags) { } +static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) +{ + return 0; +} +static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, + unsigned int start) +{ + return false; +} + +#else /* 64 bit */ typedef struct { u64 v; @@ -123,122 +134,82 @@ static inline void u64_stats_inc(u64_stats_t *p) { p->v++; } -#endif -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) -#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) -#else static inline void u64_stats_init(struct u64_stats_sync *syncp) { + seqcount_init(&syncp->seq); } -#endif -static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) +static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_disable(); + preempt_disable_nested(); write_seqcount_begin(&syncp->seq); -#endif } -static inline void u64_stats_update_end(struct u64_stats_sync *syncp) +static inline void __u64_stats_update_end(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) write_seqcount_end(&syncp->seq); - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_enable(); -#endif + preempt_enable_nested(); } -static inline unsigned long -u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) +static inline unsigned long __u64_stats_irqsave(void) { - unsigned long flags = 0; + unsigned long flags; -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_disable(); - else - local_irq_save(flags); - write_seqcount_begin(&syncp->seq); -#endif + local_irq_save(flags); return flags; } -static inline void -u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, - unsigned long flags) +static inline void __u64_stats_irqrestore(unsigned long flags) { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) - write_seqcount_end(&syncp->seq); - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_enable(); - else - local_irq_restore(flags); -#endif + local_irq_restore(flags); } static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) return read_seqcount_begin(&syncp->seq); -#else - return 0; -#endif +} + +static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, + unsigned int start) +{ + return read_seqcount_retry(&syncp->seq, start); +} +#endif /* !64 bit */ + +static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) +{ + __u64_stats_update_begin(syncp); +} + +static inline void u64_stats_update_end(struct u64_stats_sync *syncp) +{ + __u64_stats_update_end(syncp); +} + +static inline unsigned long u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) +{ + unsigned long flags = __u64_stats_irqsave(); + + __u64_stats_update_begin(syncp); + return flags; +} + +static inline void u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, + unsigned long flags) +{ + __u64_stats_update_end(syncp); + __u64_stats_irqrestore(flags); } static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) - preempt_disable(); -#endif return __u64_stats_fetch_begin(syncp); } -static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, - unsigned int start) -{ -#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) - return read_seqcount_retry(&syncp->seq, start); -#else - return false; -#endif -} - static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, unsigned int start) { -#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) - preempt_enable(); -#endif - return __u64_stats_fetch_retry(syncp, start); -} - -/* - * In case irq handlers can update u64 counters, readers can use following helpers - * - SMP 32bit arches use seqcount protection, irq safe. - * - UP 32bit must disable irqs. - * - 64bit have no problem atomically reading u64 values, irq safe. - */ -static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) -{ -#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) - preempt_disable(); -#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) - local_irq_disable(); -#endif - return __u64_stats_fetch_begin(syncp); -} - -static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, - unsigned int start) -{ -#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) - preempt_enable(); -#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) - local_irq_enable(); -#endif return __u64_stats_fetch_retry(syncp, start); } diff --git a/init/Kconfig b/init/Kconfig index 532362fcfe31f..08ec5f25e6642 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1574,6 +1574,10 @@ config PRINTK very difficult to diagnose system problems, saying N here is strongly discouraged. +config HAVE_ATOMIC_CONSOLE + bool + default n + config BUG bool "BUG() support" if EXPERT default y diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index c2f1fd95a8214..260c08efeb486 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -1,5 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only +config HAVE_PREEMPT_LAZY + bool + +config PREEMPT_LAZY + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT + config PREEMPT_NONE_BUILD bool diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 22e7a805c6723..b492e482b63a9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2107,11 +2107,11 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog, st = per_cpu_ptr(prog->stats, cpu); do { - start = u64_stats_fetch_begin_irq(&st->syncp); + start = u64_stats_fetch_begin(&st->syncp); tnsecs = u64_stats_read(&st->nsecs); tcnt = u64_stats_read(&st->cnt); tmisses = u64_stats_read(&st->misses); - } while (u64_stats_fetch_retry_irq(&st->syncp, start)); + } while (u64_stats_fetch_retry(&st->syncp, start)); nsecs += tnsecs; cnt += tcnt; misses += tmisses; diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 063068a9ea9b3..26b772720b227 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -153,7 +153,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, local_irq_enable_exit_to_user(ti_work); - if (ti_work & _TIF_NEED_RESCHED) + if (ti_work & _TIF_NEED_RESCHED_MASK) schedule(); if (ti_work & _TIF_UPROBE) @@ -381,7 +381,7 @@ void raw_irqentry_exit_cond_resched(void) rcu_irq_exit_check_preempt(); if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) WARN_ON_ONCE(!on_thread_stack()); - if (need_resched()) + if (should_resched(0)) preempt_schedule_irq(); } } diff --git a/kernel/hung_task.c b/kernel/hung_task.c index bb2354f73dedc..19c9de825d248 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -127,6 +127,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) * complain: */ if (sysctl_hung_task_warnings) { + printk_prefer_direct_enter(); + if (sysctl_hung_task_warnings > 0) sysctl_hung_task_warnings--; pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", @@ -142,6 +144,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) if (sysctl_hung_task_all_cpu_backtrace) hung_task_show_all_bt = true; + + printk_prefer_direct_exit(); } touch_nmi_watchdog(); @@ -204,12 +208,17 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) } unlock: rcu_read_unlock(); - if (hung_task_show_lock) + if (hung_task_show_lock) { + printk_prefer_direct_enter(); debug_show_all_locks(); + printk_prefer_direct_exit(); + } if (hung_task_show_all_bt) { hung_task_show_all_bt = false; + printk_prefer_direct_enter(); trigger_all_cpu_backtrace(); + printk_prefer_direct_exit(); } if (hung_task_call_panic) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 5db0230aa6b52..476a3fecb8c53 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -705,6 +705,30 @@ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq) } EXPORT_SYMBOL_GPL(generic_handle_domain_irq); + /** + * generic_handle_irq_safe - Invoke the handler for a HW irq belonging + * to a domain from any context. + * @domain: The domain where to perform the lookup + * @hwirq: The HW irq number to convert to a logical one + * + * Returns: 0 on success, a negative value on error. + * + * This function can be called from any context (IRQ or process context). It + * will report an error if not invoked from IRQ context and the irq has been + * marked to enforce IRQ-context only. + */ +int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = handle_irq_desc(irq_resolve_mapping(domain, hwirq)); + local_irq_restore(flags); + return ret; +} +EXPORT_SYMBOL_GPL(generic_handle_domain_irq_safe); + /** * generic_handle_domain_nmi - Invoke the handler for a HW nmi belonging * to a domain. diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index b1292a57c2a53..a6514db7ef58e 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -137,6 +137,15 @@ KERNEL_ATTR_RO(vmcoreinfo); #endif /* CONFIG_CRASH_CORE */ +#if defined(CONFIG_PREEMPT_RT) +static ssize_t realtime_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", 1); +} +KERNEL_ATTR_RO(realtime); +#endif + /* whether file capabilities are enabled */ static ssize_t fscaps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -227,6 +236,9 @@ static struct attribute * kernel_attrs[] = { #ifndef CONFIG_TINY_RCU &rcu_expedited_attr.attr, &rcu_normal_attr.attr, +#endif +#ifdef CONFIG_PREEMPT_RT + &realtime_attr.attr, #endif NULL }; diff --git a/kernel/panic.c b/kernel/panic.c index c6eb8f8db0c05..c4e8896e3caba 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -257,7 +257,6 @@ void panic(const char *fmt, ...) panic_smp_self_stop(); console_verbose(); - bust_spinlocks(1); va_start(args, fmt); len = vscnprintf(buf, sizeof(buf), fmt, args); va_end(args); @@ -274,6 +273,11 @@ void panic(const char *fmt, ...) dump_stack(); #endif + /* If atomic consoles are available, flush the kernel log. */ + console_flush_on_panic(CONSOLE_ATOMIC_FLUSH_PENDING); + + bust_spinlocks(1); + /* * If kgdb is enabled, give it a chance to run before we stop all * the other CPUs or else we won't be able to debug processes left @@ -297,6 +301,7 @@ void panic(const char *fmt, ...) * unfortunately means it may not be hardened to work in a * panic situation. */ + try_block_console_kthreads(10000); smp_send_stop(); } else { /* @@ -304,6 +309,7 @@ void panic(const char *fmt, ...) * kmsg_dump, we will need architecture dependent extra * works in addition to stopping other CPUs. */ + try_block_console_kthreads(10000); crash_smp_send_stop(); } @@ -604,6 +610,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, { disable_trace_on_warning(); + printk_prefer_direct_enter(); + if (file) pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n", raw_smp_processor_id(), current->pid, file, line, @@ -633,6 +641,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, /* Just a warning, don't kill lockdep. */ add_taint(taint, LOCKDEP_STILL_OK); + + printk_prefer_direct_exit(); } #ifndef __WARN_FLAGS diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index d947ca6c84f99..e7d8578860adf 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -20,6 +20,8 @@ enum printk_info_flags { LOG_CONT = 8, /* text is a fragment of a continuation line */ }; +extern bool block_console_kthreads; + __printf(4, 0) int vprintk_store(int facility, int level, const struct dev_printk_info *dev_info, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a1a81fd9889bb..f1f9ce9b23f60 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -223,6 +224,36 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, /* Number of registered extended console drivers. */ static int nr_ext_console_drivers; +/* + * Used to synchronize printing kthreads against direct printing via + * console_trylock/console_unlock. + * + * Values: + * -1 = console kthreads atomically blocked (via global trylock) + * 0 = no kthread printing, console not locked (via trylock) + * >0 = kthread(s) actively printing + * + * Note: For synchronizing against direct printing via + * console_lock/console_unlock, see the @lock variable in + * struct console. + */ +static atomic_t console_kthreads_active = ATOMIC_INIT(0); + +#define console_kthreads_atomic_tryblock() \ + (atomic_cmpxchg(&console_kthreads_active, 0, -1) == 0) +#define console_kthreads_atomic_unblock() \ + atomic_cmpxchg(&console_kthreads_active, -1, 0) +#define console_kthreads_atomically_blocked() \ + (atomic_read(&console_kthreads_active) == -1) + +#define console_kthread_printing_tryenter() \ + atomic_inc_unless_negative(&console_kthreads_active) +#define console_kthread_printing_exit() \ + atomic_dec(&console_kthreads_active) + +/* Block console kthreads to avoid processing new messages. */ +bool block_console_kthreads; + /* * Helper macros to handle lockdep when locking/unlocking console_sem. We use * macros instead of functions so that _RET_IP_ contains useful information. @@ -271,14 +302,49 @@ static bool panic_in_progress(void) } /* - * This is used for debugging the mess that is the VT code by - * keeping track if we have the console semaphore held. It's - * definitely not the perfect debug tool (we don't know if _WE_ - * hold it and are racing, but it helps tracking those weird code - * paths in the console code where we end up in places I want - * locked without the console semaphore held). + * Tracks whether kthread printers are all blocked. A value of true implies + * that the console is locked via console_lock() or the console is suspended. + * Writing to this variable requires holding @console_sem. */ -static int console_locked, console_suspended; +static bool console_kthreads_blocked; + +/* + * Block all kthread printers from a schedulable context. + * + * Requires holding @console_sem. + */ +static void console_kthreads_block(void) +{ + struct console *con; + + for_each_console(con) { + mutex_lock(&con->lock); + con->blocked = true; + mutex_unlock(&con->lock); + } + + console_kthreads_blocked = true; +} + +/* + * Unblock all kthread printers from a schedulable context. + * + * Requires holding @console_sem. + */ +static void console_kthreads_unblock(void) +{ + struct console *con; + + for_each_console(con) { + mutex_lock(&con->lock); + con->blocked = false; + mutex_unlock(&con->lock); + } + + console_kthreads_blocked = false; +} + +static int console_suspended; /* * Array of consoles built from command line options (console=) @@ -361,7 +427,75 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; /* syslog_lock protects syslog_* variables and write access to clear_seq. */ static DEFINE_MUTEX(syslog_lock); +/* + * A flag to signify if printk_activate_kthreads() has already started the + * kthread printers. If true, any later registered consoles must start their + * own kthread directly. The flag is write protected by the console_lock. + */ +static bool printk_kthreads_available; + #ifdef CONFIG_PRINTK +static atomic_t printk_prefer_direct = ATOMIC_INIT(0); + +/** + * printk_prefer_direct_enter - cause printk() calls to attempt direct + * printing to all enabled consoles + * + * Since it is not possible to call into the console printing code from any + * context, there is no guarantee that direct printing will occur. + * + * This globally effects all printk() callers. + * + * Context: Any context. + */ +void printk_prefer_direct_enter(void) +{ + atomic_inc(&printk_prefer_direct); +} + +/** + * printk_prefer_direct_exit - restore printk() behavior + * + * Context: Any context. + */ +void printk_prefer_direct_exit(void) +{ + WARN_ON(atomic_dec_if_positive(&printk_prefer_direct) < 0); +} + +/* + * Calling printk() always wakes kthread printers so that they can + * flush the new message to their respective consoles. Also, if direct + * printing is allowed, printk() tries to flush the messages directly. + * + * Direct printing is allowed in situations when the kthreads + * are not available or the system is in a problematic state. + * + * See the implementation about possible races. + */ +static inline bool allow_direct_printing(void) +{ + /* + * Checking kthread availability is a possible race because the + * kthread printers can become permanently disabled during runtime. + * However, doing that requires holding the console_lock, so any + * pending messages will be direct printed by console_unlock(). + */ + if (!printk_kthreads_available) + return true; + + /* + * Prefer direct printing when the system is in a problematic state. + * The context that sets this state will always see the updated value. + * The other contexts do not care. Anyway, direct printing is just a + * best effort. The direct output is only possible when console_lock + * is not already taken and no kthread printers are actively printing. + */ + return (system_state > SYSTEM_RUNNING || + oops_in_progress || + atomic_read(&printk_prefer_direct)); +} + DECLARE_WAIT_QUEUE_HEAD(log_wait); /* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ @@ -1850,6 +1984,7 @@ static int console_lock_spinning_disable_and_check(void) return 1; } +#if !IS_ENABLED(CONFIG_PREEMPT_RT) /** * console_trylock_spinning - try to get console_lock by busy waiting * @@ -1923,6 +2058,7 @@ static int console_trylock_spinning(void) return 1; } +#endif /* CONFIG_PREEMPT_RT */ /* * Call the specified console driver, asking it to write out the specified @@ -1930,19 +2066,28 @@ static int console_trylock_spinning(void) * dropped, a dropped message will be written out first. */ static void call_console_driver(struct console *con, const char *text, size_t len, - char *dropped_text) + char *dropped_text, bool atomic_printing) { + unsigned long dropped = 0; size_t dropped_len; - if (con->dropped && dropped_text) { + if (dropped_text) + dropped = atomic_long_xchg_relaxed(&con->dropped, 0); + + if (dropped) { dropped_len = snprintf(dropped_text, DROPPED_TEXT_MAX, "** %lu printk messages dropped **\n", - con->dropped); - con->dropped = 0; - con->write(con, dropped_text, dropped_len); + dropped); + if (atomic_printing) + con->write_atomic(con, dropped_text, dropped_len); + else + con->write(con, dropped_text, dropped_len); } - con->write(con, text, len); + if (atomic_printing) + con->write_atomic(con, text, len); + else + con->write(con, text, len); } /* @@ -2252,10 +2397,22 @@ asmlinkage int vprintk_emit(int facility, int level, printed_len = vprintk_store(facility, level, dev_info, fmt, args); /* If called from the scheduler, we can not call up(). */ - if (!in_sched) { + if (!in_sched && allow_direct_printing()) { +#if IS_ENABLED(CONFIG_PREEMPT_RT) + /* + * Use the non-spinning trylock since PREEMPT_RT does not + * support console lock handovers. + * + * Direct printing will most likely involve taking spinlocks. + * For PREEMPT_RT, this is only allowed if in a preemptible + * context. + */ + if (preemptible() && console_trylock()) + console_unlock(); +#else /* * The caller may be holding system-critical or - * timing-sensitive locks. Disable preemption during + * timing-sensitive locks. Disable preemption during direct * printing of all remaining records to all consoles so that * this context can return as soon as possible. Hopefully * another printk() caller will take over the printing. @@ -2270,6 +2427,7 @@ asmlinkage int vprintk_emit(int facility, int level, if (console_trylock_spinning()) console_unlock(); preempt_enable(); +#endif } wake_up_klogd(); @@ -2296,8 +2454,80 @@ asmlinkage __visible int _printk(const char *fmt, ...) } EXPORT_SYMBOL(_printk); +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE +static void __free_atomic_data(struct console_atomic_data *d) +{ + kfree(d->text); + kfree(d->ext_text); + kfree(d->dropped_text); +} + +static void free_atomic_data(struct console_atomic_data *d) +{ + int count = 1; + int i; + + if (!d) + return; + +#ifdef CONFIG_HAVE_NMI + count = 2; +#endif + + for (i = 0; i < count; i++) + __free_atomic_data(&d[i]); + kfree(d); +} + +static int __alloc_atomic_data(struct console_atomic_data *d, short flags) +{ + d->text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); + if (!d->text) + return -1; + + if (flags & CON_EXTENDED) { + d->ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); + if (!d->ext_text) + return -1; + } else { + d->dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL); + if (!d->dropped_text) + return -1; + } + + return 0; +} + +static struct console_atomic_data *alloc_atomic_data(short flags) +{ + struct console_atomic_data *d; + int count = 1; + int i; + +#ifdef CONFIG_HAVE_NMI + count = 2; +#endif + + d = kzalloc(sizeof(*d) * count, GFP_KERNEL); + if (!d) + goto err_out; + + for (i = 0; i < count; i++) { + if (__alloc_atomic_data(&d[i], flags) != 0) + goto err_out; + } + + return d; +err_out: + free_atomic_data(d); + return NULL; +} +#endif /* CONFIG_HAVE_ATOMIC_CONSOLE */ + static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); +static void printk_start_kthread(struct console *con); + #else /* CONFIG_PRINTK */ #define CONSOLE_LOG_MAX 0 @@ -2308,6 +2538,8 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre #define prb_first_valid_seq(rb) 0 #define prb_next_seq(rb) 0 +#define free_atomic_data(d) + static u64 syslog_seq; static size_t record_print_text(const struct printk_record *r, @@ -2326,11 +2558,13 @@ static ssize_t msg_print_ext_body(char *buf, size_t size, static void console_lock_spinning_enable(void) { } static int console_lock_spinning_disable_and_check(void) { return 0; } static void call_console_driver(struct console *con, const char *text, size_t len, - char *dropped_text) + char *dropped_text, bool atomic_printing) { } static bool suppress_message_printing(int level) { return false; } static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } +static void printk_start_kthread(struct console *con) { } +static bool allow_direct_printing(void) { return true; } #endif /* CONFIG_PRINTK */ @@ -2549,6 +2783,14 @@ static int console_cpu_notify(unsigned int cpu) /* If trylock fails, someone else is doing the printing */ if (console_trylock()) console_unlock(); + else { + /* + * If a new CPU comes online, the conditions for + * printer_should_wake() may have changed for some + * kthread printer with !CON_ANYTIME. + */ + wake_up_klogd(); + } } return 0; } @@ -2568,7 +2810,7 @@ void console_lock(void) down_console_sem(); if (console_suspended) return; - console_locked = 1; + console_kthreads_block(); console_may_schedule = 1; } EXPORT_SYMBOL(console_lock); @@ -2589,15 +2831,30 @@ int console_trylock(void) up_console_sem(); return 0; } - console_locked = 1; + if (!console_kthreads_atomic_tryblock()) { + up_console_sem(); + return 0; + } console_may_schedule = 0; return 1; } EXPORT_SYMBOL(console_trylock); +/* + * This is used to help to make sure that certain paths within the VT code are + * running with the console lock held. It is definitely not the perfect debug + * tool (it is not known if the VT code is the task holding the console lock), + * but it helps tracking those weird code paths in the console code such as + * when the console is suspended: where the console is not locked but no + * console printing may occur. + * + * Note: This returns true when the console is suspended but is not locked. + * This is intentional because the VT code must consider that situation + * the same as if the console was locked. + */ int is_console_locked(void) { - return console_locked; + return (console_kthreads_blocked || atomic_read(&console_kthreads_active)); } EXPORT_SYMBOL(is_console_locked); @@ -2620,18 +2877,9 @@ static bool abandon_console_lock_in_panic(void) return atomic_read(&panic_cpu) != raw_smp_processor_id(); } -/* - * Check if the given console is currently capable and allowed to print - * records. - * - * Requires the console_lock. - */ -static inline bool console_is_usable(struct console *con) +static inline bool __console_is_usable(short flags) { - if (!(con->flags & CON_ENABLED)) - return false; - - if (!con->write) + if (!(flags & CON_ENABLED)) return false; /* @@ -2640,18 +2888,116 @@ static inline bool console_is_usable(struct console *con) * cope (CON_ANYTIME) don't call them until this CPU is officially up. */ if (!cpu_online(raw_smp_processor_id()) && - !(con->flags & CON_ANYTIME)) + !(flags & CON_ANYTIME)) return false; return true; } +/* + * Check if the given console is currently capable and allowed to print + * records. + * + * Requires holding the console_lock. + */ +static inline bool console_is_usable(struct console *con, bool atomic_printing) +{ + if (atomic_printing) { +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE + if (!con->write_atomic) + return false; + if (!con->atomic_data) + return false; +#else + return false; +#endif + } else if (!con->write) { + return false; + } + + return __console_is_usable(con->flags); +} + static void __console_unlock(void) { - console_locked = 0; + /* + * Depending on whether console_lock() or console_trylock() was used, + * appropriately allow the kthread printers to continue. + */ + if (console_kthreads_blocked) + console_kthreads_unblock(); + else + console_kthreads_atomic_unblock(); + + /* + * New records may have arrived while the console was locked. + * Wake the kthread printers to print them. + */ + wake_up_klogd(); + up_console_sem(); } +static u64 read_console_seq(struct console *con) +{ +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE + unsigned long flags; + u64 seq2; + u64 seq; + + if (!con->atomic_data) + return con->seq; + + printk_cpu_sync_get_irqsave(flags); + + seq = con->seq; + seq2 = con->atomic_data[0].seq; + if (seq2 > seq) + seq = seq2; +#ifdef CONFIG_HAVE_NMI + seq2 = con->atomic_data[1].seq; + if (seq2 > seq) + seq = seq2; +#endif + + printk_cpu_sync_put_irqrestore(flags); + + return seq; +#else /* CONFIG_HAVE_ATOMIC_CONSOLE */ + return con->seq; +#endif +} + +static void write_console_seq(struct console *con, u64 val, bool atomic_printing) +{ +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE + unsigned long flags; + u64 *seq; + + if (!con->atomic_data) { + con->seq = val; + return; + } + + printk_cpu_sync_get_irqsave(flags); + + if (atomic_printing) { + seq = &con->atomic_data[0].seq; +#ifdef CONFIG_HAVE_NMI + if (in_nmi()) + seq = &con->atomic_data[1].seq; +#endif + } else { + seq = &con->seq; + } + *seq = val; + + printk_cpu_sync_put_irqrestore(flags); +#else /* CONFIG_HAVE_ATOMIC_CONSOLE */ + con->seq = val; +#endif +} + /* * Print one record for the given console. The record printed is whatever * record is the next available record for the given console. @@ -2664,36 +3010,47 @@ static void __console_unlock(void) * If dropped messages should be printed, @dropped_text is a buffer of size * DROPPED_TEXT_MAX. Otherwise @dropped_text must be NULL. * + * @atomic_printing specifies if atomic printing should be used. + * * @handover will be set to true if a printk waiter has taken over the * console_lock, in which case the caller is no longer holding the - * console_lock. Otherwise it is set to false. + * console_lock. Otherwise it is set to false. A NULL pointer may be provided + * to disable allowing the console_lock to be taken over by a printk waiter. * * Returns false if the given console has no next record to print, otherwise * true. * - * Requires the console_lock. + * Requires the console_lock if @handover is non-NULL. + * Requires con->lock otherwise. */ -static bool console_emit_next_record(struct console *con, char *text, char *ext_text, - char *dropped_text, bool *handover) +static bool __console_emit_next_record(struct console *con, char *text, char *ext_text, + char *dropped_text, bool atomic_printing, + bool *handover) { - static int panic_console_dropped; + static atomic_t panic_console_dropped = ATOMIC_INIT(0); struct printk_info info; struct printk_record r; unsigned long flags; char *write_text; size_t len; + u64 seq; prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); - *handover = false; + if (handover) + *handover = false; - if (!prb_read_valid(prb, con->seq, &r)) + seq = read_console_seq(con); + + if (!prb_read_valid(prb, seq, &r)) return false; - if (con->seq != r.info->seq) { - con->dropped += r.info->seq - con->seq; - con->seq = r.info->seq; - if (panic_in_progress() && panic_console_dropped++ > 10) { + if (seq != r.info->seq) { + atomic_long_add((unsigned long)(r.info->seq - seq), &con->dropped); + write_console_seq(con, r.info->seq, atomic_printing); + seq = r.info->seq; + if (panic_in_progress() && + atomic_fetch_inc_relaxed(&panic_console_dropped) > 10) { suppress_panic_printk = 1; pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); } @@ -2701,7 +3058,7 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_ /* Skip record that has level above the console loglevel. */ if (suppress_message_printing(r.info->level)) { - con->seq++; + write_console_seq(con, seq + 1, atomic_printing); goto skip; } @@ -2715,31 +3072,65 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_ len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); } - /* - * While actively printing out messages, if another printk() - * were to occur on another CPU, it may wait for this one to - * finish. This task can not be preempted if there is a - * waiter waiting to take over. - * - * Interrupts are disabled because the hand over to a waiter - * must not be interrupted until the hand over is completed - * (@console_waiter is cleared). - */ - printk_safe_enter_irqsave(flags); - console_lock_spinning_enable(); + if (handover) { + /* + * While actively printing out messages, if another printk() + * were to occur on another CPU, it may wait for this one to + * finish. This task can not be preempted if there is a + * waiter waiting to take over. + * + * Interrupts are disabled because the hand over to a waiter + * must not be interrupted until the hand over is completed + * (@console_waiter is cleared). + */ + printk_safe_enter_irqsave(flags); + console_lock_spinning_enable(); - stop_critical_timings(); /* don't trace print latency */ - call_console_driver(con, write_text, len, dropped_text); - start_critical_timings(); + /* don't trace irqsoff print latency */ + stop_critical_timings(); + } - con->seq++; + call_console_driver(con, write_text, len, dropped_text, atomic_printing); - *handover = console_lock_spinning_disable_and_check(); - printk_safe_exit_irqrestore(flags); + write_console_seq(con, seq + 1, atomic_printing); + + if (handover) { + start_critical_timings(); + *handover = console_lock_spinning_disable_and_check(); + printk_safe_exit_irqrestore(flags); + } skip: return true; } +/* + * Print a record for a given console, but allow another printk() caller to + * take over the console_lock and continue printing. + * + * Requires the console_lock, but depending on @handover after the call, the + * caller may no longer have the console_lock. + * + * See __console_emit_next_record() for argument and return details. + */ +static bool console_emit_next_record_transferable(struct console *con, char *text, char *ext_text, + char *dropped_text, bool *handover) +{ + /* + * Handovers are only supported if threaded printers are atomically + * blocked. The context taking over the console_lock may be atomic. + * + * PREEMPT_RT also does not support handovers because the spinning + * waiter can cause large latencies. + */ + if (!console_kthreads_atomically_blocked() || + IS_ENABLED(CONFIG_PREEMPT_RT)) { + *handover = false; + handover = NULL; + } + + return __console_emit_next_record(con, text, ext_text, dropped_text, false, handover); +} + /* * Print out all remaining records to all consoles. * @@ -2758,8 +3149,8 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_ * were flushed to all usable consoles. A returned false informs the caller * that everything was not flushed (either there were no usable consoles or * another context has taken over printing or it is a panic situation and this - * is not the panic CPU). Regardless the reason, the caller should assume it - * is not useful to immediately try again. + * is not the panic CPU or direct printing is not preferred). Regardless the + * reason, the caller should assume it is not useful to immediately try again. * * Requires the console_lock. */ @@ -2776,24 +3167,26 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove *handover = false; do { + /* Let the kthread printers do the work if they can. */ + if (!allow_direct_printing()) + return false; + any_progress = false; for_each_console(con) { bool progress; - if (!console_is_usable(con)) + if (!console_is_usable(con, false)) continue; any_usable = true; if (con->flags & CON_EXTENDED) { /* Extended consoles do not print "dropped messages". */ - progress = console_emit_next_record(con, &text[0], - &ext_text[0], NULL, - handover); + progress = console_emit_next_record_transferable(con, &text[0], + &ext_text[0], NULL, handover); } else { - progress = console_emit_next_record(con, &text[0], - NULL, &dropped_text[0], - handover); + progress = console_emit_next_record_transferable(con, &text[0], + NULL, &dropped_text[0], handover); } if (*handover) return false; @@ -2818,6 +3211,68 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove return any_usable; } +#if defined(CONFIG_HAVE_ATOMIC_CONSOLE) && defined(CONFIG_PRINTK) +static bool console_emit_next_record(struct console *con, char *text, char *ext_text, + char *dropped_text, bool atomic_printing); + +static void atomic_console_flush_all(void) +{ + unsigned long flags; + struct console *con; + bool any_progress; + int index = 0; + + if (console_suspended) + return; + +#ifdef CONFIG_HAVE_NMI + if (in_nmi()) + index = 1; +#endif + + printk_cpu_sync_get_irqsave(flags); + + do { + any_progress = false; + + for_each_console(con) { + bool progress; + + if (!console_is_usable(con, true)) + continue; + + if (con->flags & CON_EXTENDED) { + /* Extended consoles do not print "dropped messages". */ + progress = console_emit_next_record(con, + &con->atomic_data->text[index], + &con->atomic_data->ext_text[index], + NULL, + true); + } else { + progress = console_emit_next_record(con, + &con->atomic_data->text[index], + NULL, + &con->atomic_data->dropped_text[index], + true); + } + + if (!progress) + continue; + any_progress = true; + + touch_softlockup_watchdog_sync(); + clocksource_touch_watchdog(); + rcu_cpu_stall_reset(); + touch_nmi_watchdog(); + } + } while (any_progress); + + printk_cpu_sync_put_irqrestore(flags); +} +#else /* CONFIG_HAVE_ATOMIC_CONSOLE && CONFIG_PRINTK */ +#define atomic_console_flush_all() +#endif + /** * console_unlock - unlock the console system * @@ -2908,10 +3363,13 @@ void console_unblank(void) if (oops_in_progress) { if (down_trylock_console_sem() != 0) return; + if (!console_kthreads_atomic_tryblock()) { + up_console_sem(); + return; + } } else console_lock(); - console_locked = 1; console_may_schedule = 0; for_each_console(c) if ((c->flags & CON_ENABLED) && c->unblank) @@ -2930,6 +3388,11 @@ void console_unblank(void) */ void console_flush_on_panic(enum con_flush_mode mode) { + if (mode == CONSOLE_ATOMIC_FLUSH_PENDING) { + atomic_console_flush_all(); + return; + } + /* * If someone else is holding the console lock, trylock will fail * and may_schedule may be set. Ignore and proceed to unlock so @@ -2946,7 +3409,7 @@ void console_flush_on_panic(enum con_flush_mode mode) seq = prb_first_valid_seq(prb); for_each_console(c) - c->seq = seq; + write_console_seq(c, seq, false); } console_unlock(); } @@ -3189,16 +3652,27 @@ void register_console(struct console *newcon) if (newcon->flags & CON_EXTENDED) nr_ext_console_drivers++; - newcon->dropped = 0; + atomic_long_set(&newcon->dropped, 0); + newcon->thread = NULL; + newcon->blocked = true; + mutex_init(&newcon->lock); +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE + newcon->atomic_data = NULL; +#endif + if (newcon->flags & CON_PRINTBUFFER) { /* Get a consistent copy of @syslog_seq. */ mutex_lock(&syslog_lock); - newcon->seq = syslog_seq; + write_console_seq(newcon, syslog_seq, false); mutex_unlock(&syslog_lock); } else { /* Begin with next message. */ - newcon->seq = prb_next_seq(prb); + write_console_seq(newcon, prb_next_seq(prb), false); } + + if (printk_kthreads_available) + printk_start_kthread(newcon); + console_unlock(); console_sysfs_notify(); @@ -3225,6 +3699,7 @@ EXPORT_SYMBOL(register_console); int unregister_console(struct console *console) { + struct task_struct *thd; struct console *con; int res; @@ -3265,9 +3740,26 @@ int unregister_console(struct console *console) console_drivers->flags |= CON_CONSDEV; console->flags &= ~CON_ENABLED; + + /* + * console->thread can only be cleared under the console lock. But + * stopping the thread must be done without the console lock. The + * task that clears @thread is the task that stops the kthread. + */ + thd = console->thread; + console->thread = NULL; + console_unlock(); + + if (thd) + kthread_stop(thd); + console_sysfs_notify(); +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE + free_atomic_data(console->atomic_data); +#endif + if (console->exit) res = console->exit(console); @@ -3361,6 +3853,20 @@ static int __init printk_late_init(void) } late_initcall(printk_late_init); +static int __init printk_activate_kthreads(void) +{ + struct console *con; + + console_lock(); + printk_kthreads_available = true; + for_each_console(con) + printk_start_kthread(con); + console_unlock(); + + return 0; +} +early_initcall(printk_activate_kthreads); + #if defined CONFIG_PRINTK /* If @con is specified, only wait for that console. Otherwise wait for all. */ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) @@ -3384,7 +3890,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre for_each_console(c) { if (con && con != c) continue; - if (!console_is_usable(c)) + if (!console_is_usable(c, false)) continue; printk_seq = c->seq; if (printk_seq < seq) @@ -3444,11 +3950,215 @@ bool pr_flush(int timeout_ms, bool reset_on_progress) } EXPORT_SYMBOL(pr_flush); +static void __printk_fallback_preferred_direct(void) +{ + printk_prefer_direct_enter(); + pr_err("falling back to preferred direct printing\n"); + printk_kthreads_available = false; +} + +/* + * Enter preferred direct printing, but never exit. Mark console threads as + * unavailable. The system is then forever in preferred direct printing and + * any printing threads will exit. + * + * Must *not* be called under console_lock. Use + * __printk_fallback_preferred_direct() if already holding console_lock. + */ +static void printk_fallback_preferred_direct(void) +{ + console_lock(); + __printk_fallback_preferred_direct(); + console_unlock(); +} + +/* + * Print a record for a given console, not allowing another printk() caller + * to take over. This is appropriate for contexts that do not have the + * console_lock. + * + * See __console_emit_next_record() for argument and return details. + */ +static bool console_emit_next_record(struct console *con, char *text, char *ext_text, + char *dropped_text, bool atomic_printing) +{ + return __console_emit_next_record(con, text, ext_text, dropped_text, + atomic_printing, NULL); +} + +static bool printer_should_wake(struct console *con, u64 seq) +{ + short flags; + + if (kthread_should_stop() || !printk_kthreads_available) + return true; + + if (con->blocked || + console_kthreads_atomically_blocked() || + block_console_kthreads || + system_state > SYSTEM_RUNNING || + oops_in_progress) { + return false; + } + + /* + * This is an unsafe read from con->flags, but a false positive is + * not a problem. Worst case it would allow the printer to wake up + * although it is disabled. But the printer will notice that when + * attempting to print and instead go back to sleep. + */ + flags = data_race(READ_ONCE(con->flags)); + + if (!__console_is_usable(flags)) + return false; + + return prb_read_valid(prb, seq, NULL); +} + +static int printk_kthread_func(void *data) +{ + struct console *con = data; + char *dropped_text = NULL; + char *ext_text = NULL; + u64 seq = 0; + char *text; + int error; + +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE + if (con->write_atomic) + con->atomic_data = alloc_atomic_data(con->flags); +#endif + + text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); + if (!text) { + con_printk(KERN_ERR, con, "failed to allocate text buffer\n"); + printk_fallback_preferred_direct(); + goto out; + } + + if (con->flags & CON_EXTENDED) { + ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); + if (!ext_text) { + con_printk(KERN_ERR, con, "failed to allocate ext_text buffer\n"); + printk_fallback_preferred_direct(); + goto out; + } + } else { + dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL); + if (!dropped_text) { + con_printk(KERN_ERR, con, "failed to allocate dropped_text buffer\n"); + printk_fallback_preferred_direct(); + goto out; + } + } + + con_printk(KERN_INFO, con, "printing thread started\n"); + + for (;;) { + /* + * Guarantee this task is visible on the waitqueue before + * checking the wake condition. + * + * The full memory barrier within set_current_state() of + * prepare_to_wait_event() pairs with the full memory barrier + * within wq_has_sleeper(). + * + * This pairs with __wake_up_klogd:A. + */ + error = wait_event_interruptible(log_wait, + printer_should_wake(con, seq)); /* LMM(printk_kthread_func:A) */ + + if (kthread_should_stop() || !printk_kthreads_available) + break; + + if (error) + continue; + + error = mutex_lock_interruptible(&con->lock); + if (error) + continue; + + if (con->blocked || + !console_kthread_printing_tryenter()) { + /* Another context has locked the console_lock. */ + mutex_unlock(&con->lock); + continue; + } + + /* + * Although this context has not locked the console_lock, it + * is known that the console_lock is not locked and it is not + * possible for any other context to lock the console_lock. + * Therefore it is safe to read con->flags. + */ + + if (!__console_is_usable(con->flags)) { + console_kthread_printing_exit(); + mutex_unlock(&con->lock); + continue; + } + + /* + * Even though the printk kthread is always preemptible, it is + * still not allowed to call cond_resched() from within + * console drivers. The task may become non-preemptible in the + * console driver call chain. For example, vt_console_print() + * takes a spinlock and then can call into fbcon_redraw(), + * which can conditionally invoke cond_resched(). + */ + console_may_schedule = 0; + console_emit_next_record(con, text, ext_text, dropped_text, false); + + seq = con->seq; + + console_kthread_printing_exit(); + + mutex_unlock(&con->lock); + } + + con_printk(KERN_INFO, con, "printing thread stopped\n"); +out: + kfree(dropped_text); + kfree(ext_text); + kfree(text); + + console_lock(); + /* + * If this kthread is being stopped by another task, con->thread will + * already be NULL. That is fine. The important thing is that it is + * NULL after the kthread exits. + */ + con->thread = NULL; + console_unlock(); + + return 0; +} + +/* Must be called under console_lock. */ +static void printk_start_kthread(struct console *con) +{ + /* + * Do not start a kthread if there is no write() callback. The + * kthreads assume the write() callback exists. + */ + if (!con->write) + return; + + con->thread = kthread_run(printk_kthread_func, con, + "pr/%s%d", con->name, con->index); + if (IS_ERR(con->thread)) { + con->thread = NULL; + con_printk(KERN_ERR, con, "unable to start printing thread\n"); + __printk_fallback_preferred_direct(); + return; + } +} + /* * Delayed printk version, for scheduler-internal messages: */ -#define PRINTK_PENDING_WAKEUP 0x01 -#define PRINTK_PENDING_OUTPUT 0x02 +#define PRINTK_PENDING_WAKEUP 0x01 +#define PRINTK_PENDING_DIRECT_OUTPUT 0x02 static DEFINE_PER_CPU(int, printk_pending); @@ -3456,10 +4166,14 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) { int pending = this_cpu_xchg(printk_pending, 0); - if (pending & PRINTK_PENDING_OUTPUT) { + if (pending & PRINTK_PENDING_DIRECT_OUTPUT) { + printk_prefer_direct_enter(); + /* If trylock fails, someone else is doing the printing */ if (console_trylock()) console_unlock(); + + printk_prefer_direct_exit(); } if (pending & PRINTK_PENDING_WAKEUP) @@ -3484,10 +4198,11 @@ static void __wake_up_klogd(int val) * prepare_to_wait_event(), which is called after ___wait_event() adds * the waiter but before it has checked the wait condition. * - * This pairs with devkmsg_read:A and syslog_print:A. + * This pairs with devkmsg_read:A, syslog_print:A, and + * printk_kthread_func:A. */ if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */ - (val & PRINTK_PENDING_OUTPUT)) { + (val & PRINTK_PENDING_DIRECT_OUTPUT)) { this_cpu_or(printk_pending, val); irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); } @@ -3505,7 +4220,17 @@ void defer_console_output(void) * New messages may have been added directly to the ringbuffer * using vprintk_store(), so wake any waiters as well. */ - __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); + int val = PRINTK_PENDING_WAKEUP; + + /* + * Make sure that some context will print the messages when direct + * printing is allowed. This happens in situations when the kthreads + * may not be as reliable or perhaps unusable. + */ + if (allow_direct_printing()) + val |= PRINTK_PENDING_DIRECT_OUTPUT; + + __wake_up_klogd(val); } void printk_trigger_flush(void) diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index ef0f9a2044da1..caac4de1ea59a 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -8,7 +8,9 @@ #include #include #include +#include #include +#include #include "internal.h" @@ -50,3 +52,33 @@ asmlinkage int vprintk(const char *fmt, va_list args) return vprintk_default(fmt, args); } EXPORT_SYMBOL(vprintk); + +/** + * try_block_console_kthreads() - Try to block console kthreads and + * make the global console_lock() avaialble + * + * @timeout_ms: The maximum time (in ms) to wait. + * + * Prevent console kthreads from starting processing new messages. Wait + * until the global console_lock() become available. + * + * Context: Can be called in any context. + */ +void try_block_console_kthreads(int timeout_ms) +{ + block_console_kthreads = true; + + /* Do not wait when the console lock could not be safely taken. */ + if (this_cpu_read(printk_context) || in_nmi()) + return; + + while (timeout_ms > 0) { + if (console_trylock()) { + console_unlock(); + return; + } + + udelay(1000); + timeout_ms -= 1; + } +} diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index d8e1b270a065f..257cb6f5ea622 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2157,6 +2157,12 @@ static int rcutorture_booster_init(unsigned int cpu) WARN_ON_ONCE(!t); sp.sched_priority = 2; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); +#ifdef CONFIG_PREEMPT_RT + t = per_cpu(timersd, cpu); + WARN_ON_ONCE(!t); + sp.sched_priority = 2; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); +#endif } /* Don't allow time recalculation while creating a new task. */ diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index c3fbbcc09327f..195cad14742dd 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -643,6 +643,7 @@ static void print_cpu_stall(unsigned long gps) * See Documentation/RCU/stallwarn.rst for info on how to debug * RCU CPU stall warnings. */ + printk_prefer_direct_enter(); trace_rcu_stall_warning(rcu_state.name, TPS("SelfDetected")); pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name); raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); @@ -677,6 +678,7 @@ static void print_cpu_stall(unsigned long gps) */ set_tsk_need_resched(current); set_preempt_need_resched(); + printk_prefer_direct_exit(); } static void check_cpu_stall(struct rcu_data *rdp) diff --git a/kernel/reboot.c b/kernel/reboot.c index 3c35445bf5ad3..80564ffafabff 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -82,6 +82,7 @@ void kernel_restart_prepare(char *cmd) { blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); system_state = SYSTEM_RESTART; + try_block_console_kthreads(10000); usermodehelper_disable(); device_shutdown(); } @@ -270,6 +271,7 @@ static void kernel_shutdown_prepare(enum system_states state) blocking_notifier_call_chain(&reboot_notifier_list, (state == SYSTEM_HALT) ? SYS_HALT : SYS_POWER_OFF, NULL); system_state = state; + try_block_console_kthreads(10000); usermodehelper_disable(); device_shutdown(); } @@ -819,9 +821,11 @@ static int __orderly_reboot(void) ret = run_cmd(reboot_cmd); if (ret) { + printk_prefer_direct_enter(); pr_warn("Failed to start orderly reboot: forcing the issue\n"); emergency_sync(); kernel_restart(NULL); + printk_prefer_direct_exit(); } return ret; @@ -834,6 +838,7 @@ static int __orderly_poweroff(bool force) ret = run_cmd(poweroff_cmd); if (ret && force) { + printk_prefer_direct_enter(); pr_warn("Failed to start orderly shutdown: forcing the issue\n"); /* @@ -843,6 +848,7 @@ static int __orderly_poweroff(bool force) */ emergency_sync(); kernel_power_off(); + printk_prefer_direct_exit(); } return ret; @@ -900,6 +906,8 @@ EXPORT_SYMBOL_GPL(orderly_reboot); */ static void hw_failure_emergency_poweroff_func(struct work_struct *work) { + printk_prefer_direct_enter(); + /* * We have reached here after the emergency shutdown waiting period has * expired. This means orderly_poweroff has not been able to shut off @@ -916,6 +924,8 @@ static void hw_failure_emergency_poweroff_func(struct work_struct *work) */ pr_emerg("Hardware protection shutdown failed. Trying emergency restart\n"); emergency_restart(); + + printk_prefer_direct_exit(); } static DECLARE_DELAYED_WORK(hw_failure_emergency_poweroff_work, @@ -954,11 +964,13 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced) { static atomic_t allow_proceed = ATOMIC_INIT(1); + printk_prefer_direct_enter(); + pr_emerg("HARDWARE PROTECTION shutdown (%s)\n", reason); /* Shutdown should be initiated only once. */ if (!atomic_dec_and_test(&allow_proceed)) - return; + goto out; /* * Queue a backup emergency shutdown in the event of @@ -966,6 +978,8 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced) */ hw_failure_emergency_poweroff(ms_until_forced); orderly_poweroff(true); +out: + printk_prefer_direct_exit(); } EXPORT_SYMBOL_GPL(hw_protection_shutdown); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ee28253c9ac0c..2ce515d3e6f8d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1046,6 +1046,46 @@ void resched_curr(struct rq *rq) trace_sched_wake_idle_without_ipi(cpu); } +#ifdef CONFIG_PREEMPT_LAZY + +static int tsk_is_polling(struct task_struct *p) +{ +#ifdef TIF_POLLING_NRFLAG + return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); +#else + return 0; +#endif +} + +void resched_curr_lazy(struct rq *rq) +{ + struct task_struct *curr = rq->curr; + int cpu; + + if (!sched_feat(PREEMPT_LAZY)) { + resched_curr(rq); + return; + } + + if (test_tsk_need_resched(curr)) + return; + + if (test_tsk_need_resched_lazy(curr)) + return; + + set_tsk_need_resched_lazy(curr); + + cpu = cpu_of(rq); + if (cpu == smp_processor_id()) + return; + + /* NEED_RESCHED_LAZY must be visible before we test polling */ + smp_mb(); + if (!tsk_is_polling(curr)) + smp_send_reschedule(cpu); +} +#endif + void resched_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -2227,6 +2267,7 @@ void migrate_disable(void) preempt_disable(); this_rq()->nr_pinned++; p->migration_disabled = 1; + preempt_lazy_disable(); preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_disable); @@ -2258,6 +2299,7 @@ void migrate_enable(void) barrier(); p->migration_disabled = 0; this_rq()->nr_pinned--; + preempt_lazy_enable(); preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_enable); @@ -3251,6 +3293,70 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p, } #endif /* CONFIG_NUMA_BALANCING */ +#ifdef CONFIG_PREEMPT_RT + +/* + * Consider: + * + * set_special_state(X); + * + * do_things() + * // Somewhere in there is an rtlock that can be contended: + * current_save_and_set_rtlock_wait_state(); + * [...] + * schedule_rtlock(); (A) + * [...] + * current_restore_rtlock_saved_state(); + * + * schedule(); (B) + * + * If p->saved_state is anything else than TASK_RUNNING, then p blocked on an + * rtlock (A) *before* voluntarily calling into schedule() (B) after setting its + * state to X. For things like ptrace (X=TASK_TRACED), the task could have more + * work to do upon acquiring the lock in do_things() before whoever called + * wait_task_inactive() should return. IOW, we have to wait for: + * + * p.saved_state = TASK_RUNNING + * p.__state = X + * + * which implies the task isn't blocked on an RT lock and got to schedule() (B). + * + * Also see comments in ttwu_state_match(). + */ + +static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state) +{ + unsigned long flags; + bool mismatch; + + raw_spin_lock_irqsave(&p->pi_lock, flags); + mismatch = READ_ONCE(p->__state) != match_state && + READ_ONCE(p->saved_state) != match_state; + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + return mismatch; +} +static __always_inline bool state_match(struct task_struct *p, unsigned int match_state, + bool *wait) +{ + if (READ_ONCE(p->__state) == match_state) + return true; + if (READ_ONCE(p->saved_state) != match_state) + return false; + *wait = true; + return true; +} +#else +static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state) +{ + return READ_ONCE(p->__state) != match_state; +} +static __always_inline bool state_match(struct task_struct *p, unsigned int match_state, + bool *wait) +{ + return READ_ONCE(p->__state) == match_state; +} +#endif + /* * wait_task_inactive - wait for a thread to unschedule. * @@ -3269,7 +3375,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p, */ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) { - int running, queued; + bool running, wait; struct rq_flags rf; unsigned long ncsw; struct rq *rq; @@ -3295,7 +3401,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state * is actually now running somewhere else! */ while (task_running(rq, p)) { - if (match_state && unlikely(READ_ONCE(p->__state) != match_state)) + if (match_state && state_mismatch(p, match_state)) return 0; cpu_relax(); } @@ -3308,10 +3414,12 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state rq = task_rq_lock(p, &rf); trace_sched_wait_task(p); running = task_running(rq, p); - queued = task_on_rq_queued(p); + wait = task_on_rq_queued(p); ncsw = 0; - if (!match_state || READ_ONCE(p->__state) == match_state) + + if (!match_state || state_match(p, match_state, &wait)) ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ + task_rq_unlock(rq, p, &rf); /* @@ -3340,7 +3448,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state * running right now), it's preempted, and we should * yield - it could be a while. */ - if (unlikely(queued)) { + if (unlikely(wait)) { ktime_t to = NSEC_PER_SEC / HZ; set_current_state(TASK_UNINTERRUPTIBLE); @@ -4589,6 +4697,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->on_cpu = 0; #endif init_task_preempt_count(p); +#ifdef CONFIG_HAVE_PREEMPT_LAZY + task_thread_info(p)->preempt_lazy_count = 0; +#endif #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); @@ -6457,6 +6568,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); + clear_tsk_need_resched_lazy(prev); clear_preempt_need_resched(); #ifdef CONFIG_SCHED_DEBUG rq->last_seen_need_resched_ns = 0; @@ -6671,6 +6783,30 @@ static void __sched notrace preempt_schedule_common(void) } while (need_resched()); } +#ifdef CONFIG_PREEMPT_LAZY +/* + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as + * preempt_lazy_count counter >0. + */ +static __always_inline int preemptible_lazy(void) +{ + if (test_thread_flag(TIF_NEED_RESCHED)) + return 1; + if (current_thread_info()->preempt_lazy_count) + return 0; + return 1; +} + +#else + +static inline int preemptible_lazy(void) +{ + return 1; +} + +#endif + #ifdef CONFIG_PREEMPTION /* * This is the entry point to schedule() from in-kernel preemption @@ -6684,6 +6820,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) */ if (likely(!preemptible())) return; + if (!preemptible_lazy()) + return; preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); @@ -6731,6 +6869,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) if (likely(!preemptible())) return; + if (!preemptible_lazy()) + return; + do { /* * Because the function tracer can trace preempt_count_sub() @@ -8988,7 +9129,9 @@ void __init init_idle(struct task_struct *idle, int cpu) /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); - +#ifdef CONFIG_HAVE_PREEMPT_LAZY + task_thread_info(idle)->preempt_lazy_count = 0; +#endif /* * The idle tasks have their own, simple scheduling class: */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 914096c5b1ae1..3cb55e6ede337 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4576,7 +4576,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; if (delta_exec > ideal_runtime) { - resched_curr(rq_of(cfs_rq)); + resched_curr_lazy(rq_of(cfs_rq)); /* * The current task ran long enough, ensure it doesn't get * re-elected due to buddy favours. @@ -4600,7 +4600,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) return; if (delta > ideal_runtime) - resched_curr(rq_of(cfs_rq)); + resched_curr_lazy(rq_of(cfs_rq)); } static void @@ -4746,7 +4746,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) * validating it and just reschedule. */ if (queued) { - resched_curr(rq_of(cfs_rq)); + resched_curr_lazy(rq_of(cfs_rq)); return; } /* @@ -4895,7 +4895,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) * hierarchy can be throttled */ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) - resched_curr(rq_of(cfs_rq)); + resched_curr_lazy(rq_of(cfs_rq)); } static __always_inline @@ -5646,7 +5646,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) if (delta < 0) { if (task_current(rq, p)) - resched_curr(rq); + resched_curr_lazy(rq); return; } hrtick_start(rq, delta); @@ -7307,7 +7307,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; preempt: - resched_curr(rq); + resched_curr_lazy(rq); /* * Only set the backward buddy when the current task is still * on the rq. This can happen when a wakeup gets interleaved @@ -11454,7 +11454,7 @@ static void task_fork_fair(struct task_struct *p) * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); - resched_curr(rq); + resched_curr_lazy(rq); } se->vruntime -= cfs_rq->min_vruntime; @@ -11481,7 +11481,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) */ if (task_current(rq, p)) { if (p->prio > oldprio) - resched_curr(rq); + resched_curr_lazy(rq); } else check_preempt_curr(rq, p, 0); } diff --git a/kernel/sched/features.h b/kernel/sched/features.h index ee7f23c76bd33..e13090e33f3c4 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -48,6 +48,9 @@ SCHED_FEAT(NONTASK_CAPACITY, true) #ifdef CONFIG_PREEMPT_RT SCHED_FEAT(TTWU_QUEUE, false) +# ifdef CONFIG_PREEMPT_LAZY +SCHED_FEAT(PREEMPT_LAZY, true) +# endif #else /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e26688d387aeb..5b889de29e3c9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2356,6 +2356,15 @@ extern void reweight_task(struct task_struct *p, int prio); extern void resched_curr(struct rq *rq); extern void resched_cpu(int cpu); +#ifdef CONFIG_PREEMPT_LAZY +extern void resched_curr_lazy(struct rq *rq); +#else +static inline void resched_curr_lazy(struct rq *rq) +{ + resched_curr(rq); +} +#endif + extern struct rt_bandwidth def_rt_bandwidth; extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); diff --git a/kernel/signal.c b/kernel/signal.c index 6f86fda5e432a..139b965e4fafc 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2297,13 +2297,13 @@ static int ptrace_stop(int exit_code, int why, unsigned long message, /* * Don't want to allow preemption here, because * sys_ptrace() needs this task to be inactive. - * - * XXX: implement read_unlock_no_resched(). */ - preempt_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); read_unlock(&tasklist_lock); cgroup_enter_frozen(); - preempt_enable_no_resched(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable_no_resched(); freezable_schedule(); cgroup_leave_frozen(true); diff --git a/kernel/softirq.c b/kernel/softirq.c index c8a6913c067d9..ab1fe34326bab 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -637,6 +637,24 @@ static inline void tick_irq_exit(void) #endif } +#ifdef CONFIG_PREEMPT_RT +DEFINE_PER_CPU(struct task_struct *, timersd); +DEFINE_PER_CPU(unsigned long, pending_timer_softirq); + +static void wake_timersd(void) +{ + struct task_struct *tsk = __this_cpu_read(timersd); + + if (tsk) + wake_up_process(tsk); +} + +#else + +static inline void wake_timersd(void) { } + +#endif + static inline void __irq_exit_rcu(void) { #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED @@ -646,8 +664,13 @@ static inline void __irq_exit_rcu(void) #endif account_hardirq_exit(current); preempt_count_sub(HARDIRQ_OFFSET); - if (!in_interrupt() && local_softirq_pending()) - invoke_softirq(); + if (!in_interrupt()) { + if (local_softirq_pending()) + invoke_softirq(); + + if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers()) + wake_timersd(); + } tick_irq_exit(); } @@ -976,12 +999,70 @@ static struct smp_hotplug_thread softirq_threads = { .thread_comm = "ksoftirqd/%u", }; +#ifdef CONFIG_PREEMPT_RT +static void timersd_setup(unsigned int cpu) +{ + sched_set_fifo_low(current); +} + +static int timersd_should_run(unsigned int cpu) +{ + return local_pending_timers(); +} + +static void run_timersd(unsigned int cpu) +{ + unsigned int timer_si; + + ksoftirqd_run_begin(); + + timer_si = local_pending_timers(); + __this_cpu_write(pending_timer_softirq, 0); + or_softirq_pending(timer_si); + + __do_softirq(); + + ksoftirqd_run_end(); +} + +static void raise_ktimers_thread(unsigned int nr) +{ + trace_softirq_raise(nr); + __this_cpu_or(pending_timer_softirq, 1 << nr); +} + +void raise_hrtimer_softirq(void) +{ + raise_ktimers_thread(HRTIMER_SOFTIRQ); +} + +void raise_timer_softirq(void) +{ + unsigned long flags; + + local_irq_save(flags); + raise_ktimers_thread(TIMER_SOFTIRQ); + wake_timersd(); + local_irq_restore(flags); +} + +static struct smp_hotplug_thread timer_threads = { + .store = &timersd, + .setup = timersd_setup, + .thread_should_run = timersd_should_run, + .thread_fn = run_timersd, + .thread_comm = "ktimers/%u", +}; +#endif + static __init int spawn_ksoftirqd(void) { cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, takeover_tasklets); BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); - +#ifdef CONFIG_PREEMPT_RT + BUG_ON(smpboot_register_percpu_thread(&timer_threads)); +#endif return 0; } early_initcall(spawn_ksoftirqd); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 23af5eca11b14..b0b4e44dd0968 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1805,7 +1805,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) if (!ktime_before(now, cpu_base->softirq_expires_next)) { cpu_base->softirq_expires_next = KTIME_MAX; cpu_base->softirq_activated = 1; - raise_softirq_irqoff(HRTIMER_SOFTIRQ); + raise_hrtimer_softirq(); } __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); @@ -1918,7 +1918,7 @@ void hrtimer_run_queues(void) if (!ktime_before(now, cpu_base->softirq_expires_next)) { cpu_base->softirq_expires_next = KTIME_MAX; cpu_base->softirq_activated = 1; - raise_softirq_irqoff(HRTIMER_SOFTIRQ); + raise_hrtimer_softirq(); } __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b0e3c9205946f..133e4160ed54b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -779,7 +779,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) static inline bool local_timer_softirq_pending(void) { - return local_softirq_pending() & BIT(TIMER_SOFTIRQ); + return local_pending_timers() & BIT(TIMER_SOFTIRQ); } static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 717fcb9fb14aa..e6219da89933d 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1822,7 +1822,7 @@ static void run_local_timers(void) if (time_before(jiffies, base->next_expiry)) return; } - raise_softirq(TIMER_SOFTIRQ); + raise_timer_softirq(); } /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index cc65887b31bd9..1d01756752676 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2640,11 +2640,19 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) trace_flags |= TRACE_FLAG_BH_OFF; - if (tif_need_resched()) + if (tif_need_resched_now()) trace_flags |= TRACE_FLAG_NEED_RESCHED; +#ifdef CONFIG_PREEMPT_LAZY + /* Run out of bits. Share the LAZY and PREEMPT_RESCHED */ + if (need_resched_lazy()) + trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; +#else if (test_preempt_need_resched()) trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; - return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | +#endif + + return (trace_flags << 24) | (min_t(unsigned int, pc & 0xff, 0xf)) | + (preempt_lazy_count() & 0xff) << 16 | (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; } @@ -4230,15 +4238,17 @@ unsigned long trace_total_entries(struct trace_array *tr) static void print_lat_help_header(struct seq_file *m) { - seq_puts(m, "# _------=> CPU# \n" - "# / _-----=> irqs-off/BH-disabled\n" - "# | / _----=> need-resched \n" - "# || / _---=> hardirq/softirq \n" - "# ||| / _--=> preempt-depth \n" - "# |||| / _-=> migrate-disable \n" - "# ||||| / delay \n" - "# cmd pid |||||| time | caller \n" - "# \\ / |||||| \\ | / \n"); + seq_puts(m, "# _--------=> CPU# \n" + "# / _-------=> irqs-off/BH-disabled\n" + "# | / _------=> need-resched \n" + "# || / _-----=> need-resched-lazy\n" + "# ||| / _----=> hardirq/softirq \n" + "# |||| / _---=> preempt-depth \n" + "# ||||| / _--=> preempt-lazy-depth\n" + "# |||||| / _-=> migrate-disable \n" + "# ||||||| / delay \n" + "# cmd pid |||||||| time | caller \n" + "# \\ / |||||||| \\ | / \n"); } static void print_event_info(struct array_buffer *buf, struct seq_file *m) @@ -4272,14 +4282,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file print_event_info(buf, m); - seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); - seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); - seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); - seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); - seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); - seq_printf(m, "# %.*s|||| / delay\n", prec, space); - seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); - seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); + seq_printf(m, "# %.*s _-------=> irqs-off/BH-disabled\n", prec, space); + seq_printf(m, "# %.*s / _------=> need-resched\n", prec, space); + seq_printf(m, "# %.*s| / _-----=> need-resched-lazy\n", prec, space); + seq_printf(m, "# %.*s|| / _----=> hardirq/softirq\n", prec, space); + seq_printf(m, "# %.*s||| / _---=> preempt-depth\n", prec, space); + seq_printf(m, "# %.*s|||| / _--=> preempt-lazy-depth\n", prec, space); + seq_printf(m, "# %.*s||||| / _-=> migrate-disable\n", prec, space); + seq_printf(m, "# %.*s|||||| / delay\n", prec, space); + seq_printf(m, "# TASK-PID %.*s CPU# ||||||| TIMESTAMP FUNCTION\n", prec, " TGID "); + seq_printf(m, "# | | %.*s | ||||||| | |\n", prec, " | "); } void diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0356cae0cf74e..585380a3db753 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -193,6 +193,7 @@ static int trace_define_common_fields(void) /* Holds both preempt_count and migrate_disable */ __common_field(unsigned char, preempt_count); __common_field(int, pid); + __common_field(unsigned char, preempt_lazy_count); return ret; } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 67f47ea27921d..de58eaaf1ac7a 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -442,6 +442,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) { char hardsoft_irq; char need_resched; + char need_resched_lazy; char irqs_off; int hardirq; int softirq; @@ -462,20 +463,27 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED)) { +#ifndef CONFIG_PREEMPT_LAZY case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED: need_resched = 'N'; break; +#endif case TRACE_FLAG_NEED_RESCHED: need_resched = 'n'; break; +#ifndef CONFIG_PREEMPT_LAZY case TRACE_FLAG_PREEMPT_RESCHED: need_resched = 'p'; break; +#endif default: need_resched = '.'; break; } + need_resched_lazy = + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.'; + hardsoft_irq = (nmi && hardirq) ? 'Z' : nmi ? 'z' : @@ -484,14 +492,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) softirq ? 's' : '.' ; - trace_seq_printf(s, "%c%c%c", - irqs_off, need_resched, hardsoft_irq); + trace_seq_printf(s, "%c%c%c%c", + irqs_off, need_resched, need_resched_lazy, + hardsoft_irq); if (entry->preempt_count & 0xf) trace_seq_printf(s, "%x", entry->preempt_count & 0xf); else trace_seq_putc(s, '.'); + if (entry->preempt_lazy_count) + trace_seq_printf(s, "%x", entry->preempt_lazy_count); + else + trace_seq_putc(s, '.'); + if (entry->preempt_count & 0xf0) trace_seq_printf(s, "%x", entry->preempt_count >> 4); else diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 8e61f21e7e33e..41596c415111b 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -424,6 +424,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* Start period for the next softlockup warning. */ update_report_ts(); + printk_prefer_direct_enter(); + pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", smp_processor_id(), duration, current->comm, task_pid_nr(current)); @@ -442,6 +444,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); if (softlockup_panic) panic("softlockup: hung tasks"); + + printk_prefer_direct_exit(); } return HRTIMER_RESTART; diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 247bf0b1582ca..701f35f0e2d44 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -135,6 +135,8 @@ static void watchdog_overflow_callback(struct perf_event *event, if (__this_cpu_read(hard_watchdog_warn) == true) return; + printk_prefer_direct_enter(); + pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", this_cpu); print_modules(); @@ -155,6 +157,8 @@ static void watchdog_overflow_callback(struct perf_event *event, if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); + printk_prefer_direct_exit(); + __this_cpu_write(hard_watchdog_warn, true); return; } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cb131fad117cc..c65e69bf4eebb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -811,6 +811,9 @@ config ARCH_HAS_DEBUG_VM_PGTABLE An architecture should select this when it can successfully build and run DEBUG_VM_PGTABLE. +config DEBUG_VM_IRQSOFF + def_bool DEBUG_VM && !PREEMPT_RT + config DEBUG_VM bool "Debug VM" depends on DEBUG_KERNEL diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index 05cccbcf1661a..83332fefa6f42 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -70,6 +70,7 @@ bool fprop_new_period(struct fprop_global *p, int periods) */ if (events <= 1) return false; + preempt_disable_nested(); write_seqcount_begin(&p->sequence); if (periods < 64) events -= events >> periods; @@ -77,6 +78,7 @@ bool fprop_new_period(struct fprop_global *p, int periods) percpu_counter_add(&p->events, -events); p->period += periods; write_seqcount_end(&p->sequence); + preempt_enable_nested(); return true; } diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3c1853a9d1c09..ffaba68e6a290 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -750,37 +750,42 @@ static int __init debug_boot_weak_hash_enable(char *str) } early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable); -static DEFINE_STATIC_KEY_FALSE(filled_random_ptr_key); +static bool filled_random_ptr_key; +static siphash_key_t ptr_key __read_mostly; +static void fill_ptr_key_workfn(struct work_struct *work); +static DECLARE_DELAYED_WORK(fill_ptr_key_work, fill_ptr_key_workfn); -static void enable_ptr_key_workfn(struct work_struct *work) +static void fill_ptr_key_workfn(struct work_struct *work) { - static_branch_enable(&filled_random_ptr_key); + if (!rng_is_initialized()) { + queue_delayed_work(system_unbound_wq, &fill_ptr_key_work, HZ * 2); + return; + } + + get_random_bytes(&ptr_key, sizeof(ptr_key)); + + /* Pairs with smp_rmb() before reading ptr_key. */ + smp_wmb(); + WRITE_ONCE(filled_random_ptr_key, true); } +static int __init vsprintf_init_hashval(void) +{ + fill_ptr_key_workfn(NULL); + return 0; +} +subsys_initcall(vsprintf_init_hashval) + /* Maps a pointer to a 32 bit unique identifier. */ static inline int __ptr_to_hashval(const void *ptr, unsigned long *hashval_out) { - static siphash_key_t ptr_key __read_mostly; unsigned long hashval; - if (!static_branch_likely(&filled_random_ptr_key)) { - static bool filled = false; - static DEFINE_SPINLOCK(filling); - static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn); - unsigned long flags; - - if (!system_unbound_wq || !rng_is_initialized() || - !spin_trylock_irqsave(&filling, flags)) - return -EAGAIN; - - if (!filled) { - get_random_bytes(&ptr_key, sizeof(ptr_key)); - queue_work(system_unbound_wq, &enable_ptr_key_work); - filled = true; - } - spin_unlock_irqrestore(&filling, flags); - } + if (!READ_ONCE(filled_random_ptr_key)) + return -EBUSY; + /* Pairs with smp_wmb() after writing ptr_key. */ + smp_rmb(); #ifdef CONFIG_64BIT hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key); diff --git a/localversion-rt b/localversion-rt new file mode 100644 index 0000000000000..08b3e75841adc --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ +-rt14 diff --git a/mm/Kconfig b/mm/Kconfig index 0331f1461f81c..3897e924e40f2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -579,6 +579,12 @@ config COMPACTION it and then we would be really interested to hear about that at linux-mm@kvack.org. +config COMPACT_UNEVICTABLE_DEFAULT + int + depends on COMPACTION + default 0 if PREEMPT_RT + default 1 + # # support for free page reporting config PAGE_REPORTING diff --git a/mm/compaction.c b/mm/compaction.c index 640fa76228dd9..10561cb1aaad9 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1727,11 +1727,7 @@ typedef enum { * Allow userspace to control policy on scanning the unevictable LRU for * compactable pages. */ -#ifdef CONFIG_PREEMPT_RT -int sysctl_compact_unevictable_allowed __read_mostly = 0; -#else -int sysctl_compact_unevictable_allowed __read_mostly = 1; -#endif +int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNEVICTABLE_DEFAULT; static inline void update_fast_start_pfn(struct compact_control *cc, unsigned long pfn) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b69979c9ced5c..d35b6fa560f0a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -597,25 +597,18 @@ static u64 flush_next_time; */ static void memcg_stats_lock(void) { -#ifdef CONFIG_PREEMPT_RT - preempt_disable(); -#else - VM_BUG_ON(!irqs_disabled()); -#endif + preempt_disable_nested(); + VM_WARN_ON_IRQS_ENABLED(); } static void __memcg_stats_lock(void) { -#ifdef CONFIG_PREEMPT_RT - preempt_disable(); -#endif + preempt_disable_nested(); } static void memcg_stats_unlock(void) { -#ifdef CONFIG_PREEMPT_RT - preempt_enable(); -#endif + preempt_enable_nested(); } static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val) @@ -715,7 +708,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, * interrupt context while other caller need to have disabled interrupt. */ __memcg_stats_lock(); - if (IS_ENABLED(CONFIG_DEBUG_VM) && !IS_ENABLED(CONFIG_PREEMPT_RT)) { + if (IS_ENABLED(CONFIG_DEBUG_VM)) { switch (idx) { case NR_ANON_MAPPED: case NR_FILE_MAPPED: @@ -725,7 +718,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, WARN_ON_ONCE(!in_task()); break; default: - WARN_ON_ONCE(!irqs_disabled()); + VM_WARN_ON_IRQS_ENABLED(); } } diff --git a/mm/slub.c b/mm/slub.c index 4b98dff9be8e3..59173fa5901a0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -50,7 +50,7 @@ * 1. slab_mutex (Global Mutex) * 2. node->list_lock (Spinlock) * 3. kmem_cache->cpu_slab->lock (Local lock) - * 4. slab_lock(slab) (Only on some arches or for debugging) + * 4. slab_lock(slab) (Only on some arches) * 5. object_map_lock (Only for debugging) * * slab_mutex @@ -64,8 +64,9 @@ * The slab_lock is a wrapper around the page lock, thus it is a bit * spinlock. * - * The slab_lock is only used for debugging and on arches that do not - * have the ability to do a cmpxchg_double. It only protects: + * The slab_lock is only used on arches that do not have the ability + * to do a cmpxchg_double. It only protects: + * * A. slab->freelist -> List of free objects in a slab * B. slab->inuse -> Number of objects in use * C. slab->objects -> Number of objects in slab @@ -94,15 +95,20 @@ * allocating a long series of objects that fill up slabs does not require * the list lock. * + * For debug caches, all allocations are forced to go through a list_lock + * protected region to serialize against concurrent validation. + * * cpu_slab->lock local lock * * This locks protect slowpath manipulation of all kmem_cache_cpu fields * except the stat counters. This is a percpu structure manipulated only by * the local cpu, so the lock protects against being preempted or interrupted * by an irq. Fast path operations rely on lockless operations instead. - * On PREEMPT_RT, the local lock does not actually disable irqs (and thus - * prevent the lockless operations), so fastpath operations also need to take - * the lock and are no longer lockless. + * + * On PREEMPT_RT, the local lock neither disables interrupts nor preemption + * which means the lockless fastpath cannot be used as it might interfere with + * an in-progress slow path operations. In this case the local lock is always + * taken but it still utilizes the freelist for the common operations. * * lockless fastpaths * @@ -163,8 +169,9 @@ * function call even on !PREEMPT_RT, use inline preempt_disable() there. */ #ifndef CONFIG_PREEMPT_RT -#define slub_get_cpu_ptr(var) get_cpu_ptr(var) -#define slub_put_cpu_ptr(var) put_cpu_ptr(var) +#define slub_get_cpu_ptr(var) get_cpu_ptr(var) +#define slub_put_cpu_ptr(var) put_cpu_ptr(var) +#define USE_LOCKLESS_FAST_PATH() (true) #else #define slub_get_cpu_ptr(var) \ ({ \ @@ -176,6 +183,7 @@ do { \ (void)(var); \ migrate_enable(); \ } while (0) +#define USE_LOCKLESS_FAST_PATH() (false) #endif #ifdef CONFIG_SLUB_DEBUG @@ -447,7 +455,7 @@ slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects) /* * Per slab locking using the pagelock */ -static __always_inline void __slab_lock(struct slab *slab) +static __always_inline void slab_lock(struct slab *slab) { struct page *page = slab_page(slab); @@ -455,7 +463,7 @@ static __always_inline void __slab_lock(struct slab *slab) bit_spin_lock(PG_locked, &page->flags); } -static __always_inline void __slab_unlock(struct slab *slab) +static __always_inline void slab_unlock(struct slab *slab) { struct page *page = slab_page(slab); @@ -463,31 +471,19 @@ static __always_inline void __slab_unlock(struct slab *slab) __bit_spin_unlock(PG_locked, &page->flags); } -static __always_inline void slab_lock(struct slab *slab, unsigned long *flags) -{ - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - local_irq_save(*flags); - __slab_lock(slab); -} - -static __always_inline void slab_unlock(struct slab *slab, unsigned long *flags) -{ - __slab_unlock(slab); - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - local_irq_restore(*flags); -} - /* * Interrupts must be disabled (for the fallback code to work right), typically - * by an _irqsave() lock variant. Except on PREEMPT_RT where locks are different - * so we disable interrupts as part of slab_[un]lock(). + * by an _irqsave() lock variant. Except on PREEMPT_RT where these variants do + * not actually disable interrupts. On the other hand the migrate_disable() + * done by bit_spin_lock() is sufficient on PREEMPT_RT thanks to its threaded + * interrupts. */ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab, void *freelist_old, unsigned long counters_old, void *freelist_new, unsigned long counters_new, const char *n) { - if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + if (USE_LOCKLESS_FAST_PATH()) lockdep_assert_irqs_disabled(); #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) @@ -499,18 +495,15 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab } else #endif { - /* init to 0 to prevent spurious warnings */ - unsigned long flags = 0; - - slab_lock(slab, &flags); + slab_lock(slab); if (slab->freelist == freelist_old && slab->counters == counters_old) { slab->freelist = freelist_new; slab->counters = counters_new; - slab_unlock(slab, &flags); + slab_unlock(slab); return true; } - slab_unlock(slab, &flags); + slab_unlock(slab); } cpu_relax(); @@ -541,16 +534,16 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab, unsigned long flags; local_irq_save(flags); - __slab_lock(slab); + slab_lock(slab); if (slab->freelist == freelist_old && slab->counters == counters_old) { slab->freelist = freelist_new; slab->counters = counters_new; - __slab_unlock(slab); + slab_unlock(slab); local_irq_restore(flags); return true; } - __slab_unlock(slab); + slab_unlock(slab); local_irq_restore(flags); } @@ -566,7 +559,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab, #ifdef CONFIG_SLUB_DEBUG static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)]; -static DEFINE_RAW_SPINLOCK(object_map_lock); +static DEFINE_SPINLOCK(object_map_lock); static void __fill_map(unsigned long *obj_map, struct kmem_cache *s, struct slab *slab) @@ -600,30 +593,6 @@ static bool slab_add_kunit_errors(void) static inline bool slab_add_kunit_errors(void) { return false; } #endif -/* - * Determine a map of objects in use in a slab. - * - * Node listlock must be held to guarantee that the slab does - * not vanish from under us. - */ -static unsigned long *get_map(struct kmem_cache *s, struct slab *slab) - __acquires(&object_map_lock) -{ - VM_BUG_ON(!irqs_disabled()); - - raw_spin_lock(&object_map_lock); - - __fill_map(object_map, s, slab); - - return object_map; -} - -static void put_map(unsigned long *map) __releases(&object_map_lock) -{ - VM_BUG_ON(map != object_map); - raw_spin_unlock(&object_map_lock); -} - static inline unsigned int size_from_object(struct kmem_cache *s) { if (s->flags & SLAB_RED_ZONE) @@ -1329,17 +1298,14 @@ static inline int alloc_consistency_checks(struct kmem_cache *s, } static noinline int alloc_debug_processing(struct kmem_cache *s, - struct slab *slab, - void *object, unsigned long addr) + struct slab *slab, void *object) { if (s->flags & SLAB_CONSISTENCY_CHECKS) { if (!alloc_consistency_checks(s, slab, object)) goto bad; } - /* Success perform special debug activities for allocs */ - if (s->flags & SLAB_STORE_USER) - set_track(s, object, TRACK_ALLOC, addr); + /* Success. Perform special debug activities for allocs */ trace(s, slab, object, 1); init_object(s, object, SLUB_RED_ACTIVE); return 1; @@ -1390,63 +1356,6 @@ static inline int free_consistency_checks(struct kmem_cache *s, return 1; } -/* Supports checking bulk free of a constructed freelist */ -static noinline int free_debug_processing( - struct kmem_cache *s, struct slab *slab, - void *head, void *tail, int bulk_cnt, - unsigned long addr) -{ - struct kmem_cache_node *n = get_node(s, slab_nid(slab)); - void *object = head; - int cnt = 0; - unsigned long flags, flags2; - int ret = 0; - depot_stack_handle_t handle = 0; - - if (s->flags & SLAB_STORE_USER) - handle = set_track_prepare(); - - spin_lock_irqsave(&n->list_lock, flags); - slab_lock(slab, &flags2); - - if (s->flags & SLAB_CONSISTENCY_CHECKS) { - if (!check_slab(s, slab)) - goto out; - } - -next_object: - cnt++; - - if (s->flags & SLAB_CONSISTENCY_CHECKS) { - if (!free_consistency_checks(s, slab, object, addr)) - goto out; - } - - if (s->flags & SLAB_STORE_USER) - set_track_update(s, object, TRACK_FREE, addr, handle); - trace(s, slab, object, 0); - /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */ - init_object(s, object, SLUB_RED_INACTIVE); - - /* Reached end of constructed freelist yet? */ - if (object != tail) { - object = get_freepointer(s, object); - goto next_object; - } - ret = 1; - -out: - if (cnt != bulk_cnt) - slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n", - bulk_cnt, cnt); - - slab_unlock(slab, &flags2); - spin_unlock_irqrestore(&n->list_lock, flags); - if (!ret) - slab_fix(s, "Object at 0x%p not freed", object); - return ret; -} - /* * Parse a block of slub_debug options. Blocks are delimited by ';' * @@ -1666,16 +1575,18 @@ static inline void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {} static inline int alloc_debug_processing(struct kmem_cache *s, - struct slab *slab, void *object, unsigned long addr) { return 0; } + struct slab *slab, void *object) { return 0; } -static inline int free_debug_processing( +static inline void free_debug_processing( struct kmem_cache *s, struct slab *slab, void *head, void *tail, int bulk_cnt, - unsigned long addr) { return 0; } + unsigned long addr) {} static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {} static inline int check_object(struct kmem_cache *s, struct slab *slab, void *object, u8 val) { return 1; } +static inline void set_track(struct kmem_cache *s, void *object, + enum track_item alloc, unsigned long addr) {} static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, struct slab *slab) {} static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, @@ -1981,11 +1892,13 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) */ slab = alloc_slab_page(alloc_gfp, node, oo); if (unlikely(!slab)) - goto out; + return NULL; stat(s, ORDER_FALLBACK); } slab->objects = oo_objects(oo); + slab->inuse = 0; + slab->frozen = 0; account_slab(slab, oo_order(oo), s, flags); @@ -2012,15 +1925,6 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) set_freepointer(s, p, NULL); } - slab->inuse = slab->objects; - slab->frozen = 1; - -out: - if (!slab) - return NULL; - - inc_slabs_node(s, slab_nid(slab), slab->objects); - return slab; } @@ -2107,6 +2011,75 @@ static inline void remove_partial(struct kmem_cache_node *n, n->nr_partial--; } +/* + * Called only for kmem_cache_debug() caches instead of acquire_slab(), with a + * slab from the n->partial list. Remove only a single object from the slab, do + * the alloc_debug_processing() checks and leave the slab on the list, or move + * it to full list if it was the last free object. + */ +static void *alloc_single_from_partial(struct kmem_cache *s, + struct kmem_cache_node *n, struct slab *slab) +{ + void *object; + + lockdep_assert_held(&n->list_lock); + + object = slab->freelist; + slab->freelist = get_freepointer(s, object); + slab->inuse++; + + if (!alloc_debug_processing(s, slab, object)) { + remove_partial(n, slab); + return NULL; + } + + if (slab->inuse == slab->objects) { + remove_partial(n, slab); + add_full(s, n, slab); + } + + return object; +} + +/* + * Called only for kmem_cache_debug() caches to allocate from a freshly + * allocated slab. Allocate a single object instead of whole freelist + * and put the slab to the partial (or full) list. + */ +static void *alloc_single_from_new_slab(struct kmem_cache *s, + struct slab *slab) +{ + int nid = slab_nid(slab); + struct kmem_cache_node *n = get_node(s, nid); + unsigned long flags; + void *object; + + + object = slab->freelist; + slab->freelist = get_freepointer(s, object); + slab->inuse = 1; + + if (!alloc_debug_processing(s, slab, object)) + /* + * It's not really expected that this would fail on a + * freshly allocated slab, but a concurrent memory + * corruption in theory could cause that. + */ + return NULL; + + spin_lock_irqsave(&n->list_lock, flags); + + if (slab->inuse == slab->objects) + add_full(s, n, slab); + else + add_partial(n, slab, DEACTIVATE_TO_HEAD); + + inc_slabs_node(s, nid, slab->objects); + spin_unlock_irqrestore(&n->list_lock, flags); + + return object; +} + /* * Remove slab from the partial list, freeze it and * return the pointer to the freelist. @@ -2187,6 +2160,13 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, if (!pfmemalloc_match(slab, gfpflags)) continue; + if (kmem_cache_debug(s)) { + object = alloc_single_from_partial(s, n, slab); + if (object) + break; + continue; + } + t = acquire_slab(s, n, slab, object == NULL); if (!t) break; @@ -2793,6 +2773,109 @@ static inline unsigned long node_nr_objs(struct kmem_cache_node *n) { return atomic_long_read(&n->total_objects); } + +/* Supports checking bulk free of a constructed freelist */ +static noinline void free_debug_processing( + struct kmem_cache *s, struct slab *slab, + void *head, void *tail, int bulk_cnt, + unsigned long addr) +{ + struct kmem_cache_node *n = get_node(s, slab_nid(slab)); + struct slab *slab_free = NULL; + void *object = head; + int cnt = 0; + unsigned long flags; + bool checks_ok = false; + depot_stack_handle_t handle = 0; + + if (s->flags & SLAB_STORE_USER) + handle = set_track_prepare(); + + spin_lock_irqsave(&n->list_lock, flags); + + if (s->flags & SLAB_CONSISTENCY_CHECKS) { + if (!check_slab(s, slab)) + goto out; + } + + if (slab->inuse < bulk_cnt) { + slab_err(s, slab, "Slab has %d allocated objects but %d are to be freed\n", + slab->inuse, bulk_cnt); + goto out; + } + +next_object: + + if (++cnt > bulk_cnt) + goto out_cnt; + + if (s->flags & SLAB_CONSISTENCY_CHECKS) { + if (!free_consistency_checks(s, slab, object, addr)) + goto out; + } + + if (s->flags & SLAB_STORE_USER) + set_track_update(s, object, TRACK_FREE, addr, handle); + trace(s, slab, object, 0); + /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */ + init_object(s, object, SLUB_RED_INACTIVE); + + /* Reached end of constructed freelist yet? */ + if (object != tail) { + object = get_freepointer(s, object); + goto next_object; + } + checks_ok = true; + +out_cnt: + if (cnt != bulk_cnt) + slab_err(s, slab, "Bulk free expected %d objects but found %d\n", + bulk_cnt, cnt); + +out: + if (checks_ok) { + void *prior = slab->freelist; + + /* Perform the actual freeing while we still hold the locks */ + slab->inuse -= cnt; + set_freepointer(s, tail, prior); + slab->freelist = head; + + /* Do we need to remove the slab from full or partial list? */ + if (!prior) { + remove_full(s, n, slab); + } else if (slab->inuse == 0) { + remove_partial(n, slab); + stat(s, FREE_REMOVE_PARTIAL); + } + + /* Do we need to discard the slab or add to partial list? */ + if (slab->inuse == 0) { + slab_free = slab; + } else if (!prior) { + add_partial(n, slab, DEACTIVATE_TO_TAIL); + stat(s, FREE_ADD_PARTIAL); + } + } + + if (slab_free) { + /* + * Update the counters while still holding n->list_lock to + * prevent spurious validation warnings + */ + dec_slabs_node(s, slab_nid(slab_free), slab_free->objects); + } + + spin_unlock_irqrestore(&n->list_lock, flags); + + if (!checks_ok) + slab_fix(s, "Object at 0x%p not freed", object); + + if (slab_free) { + stat(s, FREE_SLAB); + free_slab(s, slab_free); + } +} #endif /* CONFIG_SLUB_DEBUG */ #if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS) @@ -3041,36 +3124,52 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, return NULL; } + stat(s, ALLOC_SLAB); + + if (kmem_cache_debug(s)) { + freelist = alloc_single_from_new_slab(s, slab); + + if (unlikely(!freelist)) + goto new_objects; + + if (s->flags & SLAB_STORE_USER) + set_track(s, freelist, TRACK_ALLOC, addr); + + return freelist; + } + /* * No other reference to the slab yet so we can * muck around with it freely without cmpxchg */ freelist = slab->freelist; slab->freelist = NULL; + slab->inuse = slab->objects; + slab->frozen = 1; - stat(s, ALLOC_SLAB); + inc_slabs_node(s, slab_nid(slab), slab->objects); check_new_slab: if (kmem_cache_debug(s)) { - if (!alloc_debug_processing(s, slab, freelist, addr)) { - /* Slab failed checks. Next slab needed */ - goto new_slab; - } else { - /* - * For debug case, we don't load freelist so that all - * allocations go through alloc_debug_processing() - */ - goto return_single; - } + /* + * For debug caches here we had to go through + * alloc_single_from_partial() so just store the tracking info + * and return the object + */ + if (s->flags & SLAB_STORE_USER) + set_track(s, freelist, TRACK_ALLOC, addr); + return freelist; } - if (unlikely(!pfmemalloc_match(slab, gfpflags))) + if (unlikely(!pfmemalloc_match(slab, gfpflags))) { /* * For !pfmemalloc_match() case we don't load freelist so that * we don't make further mismatched allocations easier. */ - goto return_single; + deactivate_slab(s, slab, get_freepointer(s, freelist)); + return freelist; + } retry_load_slab: @@ -3094,11 +3193,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, c->slab = slab; goto load_freelist; - -return_single: - - deactivate_slab(s, slab, get_freepointer(s, freelist)); - return freelist; } /* @@ -3202,14 +3296,8 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_l object = c->freelist; slab = c->slab; - /* - * We cannot use the lockless fastpath on PREEMPT_RT because if a - * slowpath has taken the local_lock_irqsave(), it is not protected - * against a fast path operation in an irq handler. So we need to take - * the slow path which uses local_lock. It is still relatively fast if - * there is a suitable cpu freelist. - */ - if (IS_ENABLED(CONFIG_PREEMPT_RT) || + + if (!USE_LOCKLESS_FAST_PATH() || unlikely(!object || !slab || !node_match(slab, node))) { object = __slab_alloc(s, gfpflags, node, addr, c); } else { @@ -3346,9 +3434,10 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, if (kfence_free(head)) return; - if (kmem_cache_debug(s) && - !free_debug_processing(s, slab, head, tail, cnt, addr)) + if (kmem_cache_debug(s)) { + free_debug_processing(s, slab, head, tail, cnt, addr); return; + } do { if (unlikely(n)) { @@ -3468,6 +3557,7 @@ static __always_inline void do_slab_free(struct kmem_cache *s, void *tail_obj = tail ? : head; struct kmem_cache_cpu *c; unsigned long tid; + void **freelist; redo: /* @@ -3482,9 +3572,13 @@ static __always_inline void do_slab_free(struct kmem_cache *s, /* Same with comment on barrier() in slab_alloc_node() */ barrier(); - if (likely(slab == c->slab)) { -#ifndef CONFIG_PREEMPT_RT - void **freelist = READ_ONCE(c->freelist); + if (unlikely(slab != c->slab)) { + __slab_free(s, slab, head, tail_obj, cnt, addr); + return; + } + + if (USE_LOCKLESS_FAST_PATH()) { + freelist = READ_ONCE(c->freelist); set_freepointer(s, tail_obj, freelist); @@ -3496,16 +3590,8 @@ static __always_inline void do_slab_free(struct kmem_cache *s, note_cmpxchg_failure("slab_free", s, tid); goto redo; } -#else /* CONFIG_PREEMPT_RT */ - /* - * We cannot use the lockless fastpath on PREEMPT_RT because if - * a slowpath has taken the local_lock_irqsave(), it is not - * protected against a fast path operation in an irq handler. So - * we need to take the local_lock. We shouldn't simply defer to - * __slab_free() as that wouldn't use the cpu freelist at all. - */ - void **freelist; - + } else { + /* Update the free list under the local lock */ local_lock(&s->cpu_slab->lock); c = this_cpu_ptr(s->cpu_slab); if (unlikely(slab != c->slab)) { @@ -3520,11 +3606,8 @@ static __always_inline void do_slab_free(struct kmem_cache *s, c->tid = next_tid(tid); local_unlock(&s->cpu_slab->lock); -#endif - stat(s, FREE_FASTPATH); - } else - __slab_free(s, slab, head, tail_obj, cnt, addr); - + } + stat(s, FREE_FASTPATH); } static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab, @@ -3941,6 +4024,7 @@ static void early_kmem_cache_node_alloc(int node) slab = new_slab(kmem_cache_node, GFP_NOWAIT, node); BUG_ON(!slab); + inc_slabs_node(kmem_cache_node, slab_nid(slab), slab->objects); if (slab_nid(slab) != node) { pr_err("SLUB: Unable to allocate memory from node %d\n", node); pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n"); @@ -3955,7 +4039,6 @@ static void early_kmem_cache_node_alloc(int node) n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false); slab->freelist = get_freepointer(kmem_cache_node, n); slab->inuse = 1; - slab->frozen = 0; kmem_cache_node->node[node] = n; init_kmem_cache_node(n); inc_slabs_node(kmem_cache_node, node, slab->objects); @@ -4242,23 +4325,21 @@ static void list_slab_objects(struct kmem_cache *s, struct slab *slab, { #ifdef CONFIG_SLUB_DEBUG void *addr = slab_address(slab); - unsigned long flags; - unsigned long *map; void *p; slab_err(s, slab, text, s->name); - slab_lock(slab, &flags); - map = get_map(s, slab); + spin_lock(&object_map_lock); + __fill_map(object_map, s, slab); + for_each_object(p, s, addr, slab->objects) { - if (!test_bit(__obj_to_index(s, addr, p), map)) { + if (!test_bit(__obj_to_index(s, addr, p), object_map)) { pr_err("Object 0x%p @offset=%tu\n", p, p - addr); print_tracking(s, p); } } - put_map(map); - slab_unlock(slab, &flags); + spin_unlock(&object_map_lock); #endif } @@ -4616,6 +4697,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s) if (free == slab->objects) { list_move(&slab->slab_list, &discard); n->nr_partial--; + dec_slabs_node(s, node, slab->objects); } else if (free <= SHRINK_PROMOTE_MAX) list_move(&slab->slab_list, promote + free - 1); } @@ -4631,7 +4713,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s) /* Release empty slabs */ list_for_each_entry_safe(slab, t, &discard, slab_list) - discard_slab(s, slab); + free_slab(s, slab); if (slabs_node(s, node)) ret = 1; @@ -4991,12 +5073,9 @@ static void validate_slab(struct kmem_cache *s, struct slab *slab, { void *p; void *addr = slab_address(slab); - unsigned long flags; - - slab_lock(slab, &flags); if (!check_slab(s, slab) || !on_freelist(s, slab, NULL)) - goto unlock; + return; /* Now we know that a valid freelist exists */ __fill_map(obj_map, s, slab); @@ -5007,8 +5086,6 @@ static void validate_slab(struct kmem_cache *s, struct slab *slab, if (!check_object(s, slab, p, val)) break; } -unlock: - slab_unlock(slab, &flags); } static int validate_slab_node(struct kmem_cache *s, @@ -5612,7 +5689,7 @@ static ssize_t validate_store(struct kmem_cache *s, { int ret = -EINVAL; - if (buf[0] == '1') { + if (buf[0] == '1' && kmem_cache_debug(s)) { ret = validate_slab_cache(s); if (ret >= 0) ret = length; diff --git a/mm/vmstat.c b/mm/vmstat.c index 90af9a8572f5a..7a2d73f152304 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -355,8 +355,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, * CPU migrations and preemption potentially corrupts a counter so * disable preemption. */ - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_disable(); + preempt_disable_nested(); x = delta + __this_cpu_read(*p); @@ -368,8 +367,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, } __this_cpu_write(*p, x); - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_enable(); + preempt_enable_nested(); } EXPORT_SYMBOL(__mod_zone_page_state); @@ -393,8 +391,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, } /* See __mod_node_page_state */ - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_disable(); + preempt_disable_nested(); x = delta + __this_cpu_read(*p); @@ -406,8 +403,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, } __this_cpu_write(*p, x); - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_enable(); + preempt_enable_nested(); } EXPORT_SYMBOL(__mod_node_page_state); @@ -441,8 +437,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) s8 v, t; /* See __mod_node_page_state */ - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_disable(); + preempt_disable_nested(); v = __this_cpu_inc_return(*p); t = __this_cpu_read(pcp->stat_threshold); @@ -453,8 +448,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) __this_cpu_write(*p, -overstep); } - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_enable(); + preempt_enable_nested(); } void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) @@ -466,8 +460,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); /* See __mod_node_page_state */ - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_disable(); + preempt_disable_nested(); v = __this_cpu_inc_return(*p); t = __this_cpu_read(pcp->stat_threshold); @@ -478,8 +471,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) __this_cpu_write(*p, -overstep); } - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_enable(); + preempt_enable_nested(); } void __inc_zone_page_state(struct page *page, enum zone_stat_item item) @@ -501,8 +493,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) s8 v, t; /* See __mod_node_page_state */ - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_disable(); + preempt_disable_nested(); v = __this_cpu_dec_return(*p); t = __this_cpu_read(pcp->stat_threshold); @@ -513,8 +504,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) __this_cpu_write(*p, overstep); } - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_enable(); + preempt_enable_nested(); } void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) @@ -526,8 +516,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); /* See __mod_node_page_state */ - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_disable(); + preempt_disable_nested(); v = __this_cpu_dec_return(*p); t = __this_cpu_read(pcp->stat_threshold); @@ -538,8 +527,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) __this_cpu_write(*p, overstep); } - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - preempt_enable(); + preempt_enable_nested(); } void __dec_zone_page_state(struct page *page, enum zone_stat_item item) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 035812b0461cc..ecdb47712d956 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -712,13 +712,13 @@ static void vlan_dev_get_stats64(struct net_device *dev, p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); do { - start = u64_stats_fetch_begin_irq(&p->syncp); + start = u64_stats_fetch_begin(&p->syncp); rxpackets = u64_stats_read(&p->rx_packets); rxbytes = u64_stats_read(&p->rx_bytes); rxmulticast = u64_stats_read(&p->rx_multicast); txpackets = u64_stats_read(&p->tx_packets); txbytes = u64_stats_read(&p->tx_bytes); - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + } while (u64_stats_fetch_retry(&p->syncp, start)); stats->rx_packets += rxpackets; stats->rx_bytes += rxbytes; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index db4f2641d1cd1..7e2a9fb5786c9 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4899,9 +4899,9 @@ void br_multicast_get_stats(const struct net_bridge *br, unsigned int start; do { - start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + start = u64_stats_fetch_begin(&cpu_stats->syncp); memcpy(&temp, &cpu_stats->mstats, sizeof(temp)); - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); mcast_stats_add_dir(tdst.igmp_v1queries, temp.igmp_v1queries); mcast_stats_add_dir(tdst.igmp_v2queries, temp.igmp_v2queries); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 6e53dc9914094..f2fc284abab38 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1378,12 +1378,12 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v, cpu_stats = per_cpu_ptr(v->stats, i); do { - start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + start = u64_stats_fetch_begin(&cpu_stats->syncp); rxpackets = u64_stats_read(&cpu_stats->rx_packets); rxbytes = u64_stats_read(&cpu_stats->rx_bytes); txbytes = u64_stats_read(&cpu_stats->tx_bytes); txpackets = u64_stats_read(&cpu_stats->tx_packets); - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); u64_stats_add(&stats->rx_packets, rxpackets); u64_stats_add(&stats->rx_bytes, rxbytes); diff --git a/net/core/dev.c b/net/core/dev.c index 56c8b0921c9fd..d96506980d2f2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4582,15 +4582,6 @@ static void rps_trigger_softirq(void *data) #endif /* CONFIG_RPS */ -/* Called from hardirq (IPI) context */ -static void trigger_rx_softirq(void *data) -{ - struct softnet_data *sd = data; - - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - smp_store_release(&sd->defer_ipi_scheduled, 0); -} - /* * Check if this softnet_data structure is another cpu one * If yes, queue it to our IPI list and return 1 @@ -6661,6 +6652,30 @@ static void skb_defer_free_flush(struct softnet_data *sd) } } +#ifndef CONFIG_PREEMPT_RT +/* Called from hardirq (IPI) context */ +static void trigger_rx_softirq(void *data) +{ + struct softnet_data *sd = data; + + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + smp_store_release(&sd->defer_ipi_scheduled, 0); +} + +#else + +static void trigger_rx_softirq(struct work_struct *defer_work) +{ + struct softnet_data *sd; + + sd = container_of(defer_work, struct softnet_data, defer_work); + smp_store_release(&sd->defer_ipi_scheduled, 0); + local_bh_disable(); + skb_defer_free_flush(sd); + local_bh_enable(); +} +#endif + static __latent_entropy void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); @@ -10492,12 +10507,12 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, stats = per_cpu_ptr(netstats, cpu); do { - start = u64_stats_fetch_begin_irq(&stats->syncp); + start = u64_stats_fetch_begin(&stats->syncp); rx_packets = u64_stats_read(&stats->rx_packets); rx_bytes = u64_stats_read(&stats->rx_bytes); tx_packets = u64_stats_read(&stats->tx_packets); tx_bytes = u64_stats_read(&stats->tx_bytes); - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + } while (u64_stats_fetch_retry(&stats->syncp, start)); s->rx_packets += rx_packets; s->rx_bytes += rx_bytes; @@ -11412,7 +11427,11 @@ static int __init net_dev_init(void) INIT_CSD(&sd->csd, rps_trigger_softirq, sd); sd->cpu = i; #endif +#ifndef CONFIG_PREEMPT_RT INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd); +#else + INIT_WORK(&sd->defer_work, trigger_rx_softirq); +#endif spin_lock_init(&sd->defer_lock); init_gro_hash(&sd->backlog); diff --git a/net/core/devlink.c b/net/core/devlink.c index b50bcc18b8d9e..cfa6a099457ae 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -8268,10 +8268,10 @@ static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats, cpu_stats = per_cpu_ptr(trap_stats, i); do { - start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + start = u64_stats_fetch_begin(&cpu_stats->syncp); rx_packets = u64_stats_read(&cpu_stats->rx_packets); rx_bytes = u64_stats_read(&cpu_stats->rx_bytes); - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); u64_stats_add(&stats->rx_packets, rx_packets); u64_stats_add(&stats->rx_bytes, rx_bytes); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 75501e1bdd25b..dfcaf61d972c7 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -1432,9 +1432,9 @@ static void net_dm_stats_read(struct net_dm_stats *stats) u64 dropped; do { - start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + start = u64_stats_fetch_begin(&cpu_stats->syncp); dropped = u64_stats_read(&cpu_stats->dropped); - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); u64_stats_add(&stats->dropped, dropped); } @@ -1476,9 +1476,9 @@ static void net_dm_hw_stats_read(struct net_dm_stats *stats) u64 dropped; do { - start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + start = u64_stats_fetch_begin(&cpu_stats->syncp); dropped = u64_stats_read(&cpu_stats->dropped); - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); u64_stats_add(&stats->dropped, dropped); } diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index c8d137ef5980e..b71ccaec09914 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -135,10 +135,10 @@ static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats, u64 bytes, packets; do { - start = u64_stats_fetch_begin_irq(&bcpu->syncp); + start = u64_stats_fetch_begin(&bcpu->syncp); bytes = u64_stats_read(&bcpu->bytes); packets = u64_stats_read(&bcpu->packets); - } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); + } while (u64_stats_fetch_retry(&bcpu->syncp, start)); t_bytes += bytes; t_packets += packets; @@ -162,10 +162,10 @@ void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, } do { if (running) - start = u64_stats_fetch_begin_irq(&b->syncp); + start = u64_stats_fetch_begin(&b->syncp); bytes = u64_stats_read(&b->bytes); packets = u64_stats_read(&b->packets); - } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); + } while (running && u64_stats_fetch_retry(&b->syncp, start)); _bstats_update(bstats, bytes, packets); } @@ -187,10 +187,10 @@ static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets, u64 bytes, packets; do { - start = u64_stats_fetch_begin_irq(&bcpu->syncp); + start = u64_stats_fetch_begin(&bcpu->syncp); bytes = u64_stats_read(&bcpu->bytes); packets = u64_stats_read(&bcpu->packets); - } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); + } while (u64_stats_fetch_retry(&bcpu->syncp, start)); t_bytes += bytes; t_packets += packets; @@ -201,10 +201,10 @@ static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets, } do { if (running) - start = u64_stats_fetch_begin_irq(&b->syncp); + start = u64_stats_fetch_begin(&b->syncp); *ret_bytes = u64_stats_read(&b->bytes); *ret_packets = u64_stats_read(&b->packets); - } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); + } while (running && u64_stats_fetch_retry(&b->syncp, start)); } static int diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 417463da4fac7..505c72a9b1534 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -6555,6 +6555,11 @@ nodefer: __kfree_skb(skb); /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU * if we are unlucky enough (this seems very unlikely). */ - if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) + if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) { +#ifndef CONFIG_PREEMPT_RT smp_call_function_single_async(cpu, &sd->defer_csd); +#else + schedule_work_on(cpu, &sd->defer_work); +#endif + } } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 1291c2431d440..dcc550b871623 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -934,12 +934,12 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, s = per_cpu_ptr(dev->tstats, i); do { - start = u64_stats_fetch_begin_irq(&s->syncp); + start = u64_stats_fetch_begin(&s->syncp); tx_packets = u64_stats_read(&s->tx_packets); tx_bytes = u64_stats_read(&s->tx_bytes); rx_packets = u64_stats_read(&s->rx_packets); rx_bytes = u64_stats_read(&s->rx_bytes); - } while (u64_stats_fetch_retry_irq(&s->syncp, start)); + } while (u64_stats_fetch_retry(&s->syncp, start)); data[0] += tx_packets; data[1] += tx_bytes; data[2] += rx_packets; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3ca0cc4678862..dbae0c79d5cfb 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1684,9 +1684,9 @@ u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offt, bhptr = per_cpu_ptr(mib, cpu); syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); do { - start = u64_stats_fetch_begin_irq(syncp); + start = u64_stats_fetch_begin(syncp); v = *(((u64 *)bhptr) + offt); - } while (u64_stats_fetch_retry_irq(syncp, start)); + } while (u64_stats_fetch_retry(syncp, start)); return v; } diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index b7de5e46fdd8f..f84da849819cc 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -1508,13 +1508,13 @@ static int put_nla_counters(struct sk_buff *skb, struct seg6_local_lwt *slwt) pcounters = per_cpu_ptr(slwt->pcpu_counters, i); do { - start = u64_stats_fetch_begin_irq(&pcounters->syncp); + start = u64_stats_fetch_begin(&pcounters->syncp); packets = u64_stats_read(&pcounters->packets); bytes = u64_stats_read(&pcounters->bytes); errors = u64_stats_read(&pcounters->errors); - } while (u64_stats_fetch_retry_irq(&pcounters->syncp, start)); + } while (u64_stats_fetch_retry(&pcounters->syncp, start)); counters.packets += packets; counters.bytes += bytes; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 9d7b238a67372..965b9cb2ef3f2 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2316,9 +2316,9 @@ static inline u64 sta_get_tidstats_msdu(struct ieee80211_sta_rx_stats *rxstats, u64 value; do { - start = u64_stats_fetch_begin_irq(&rxstats->syncp); + start = u64_stats_fetch_begin(&rxstats->syncp); value = rxstats->msdu[tid]; - } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start)); + } while (u64_stats_fetch_retry(&rxstats->syncp, start)); return value; } @@ -2384,9 +2384,9 @@ static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats) u64 value; do { - start = u64_stats_fetch_begin_irq(&rxstats->syncp); + start = u64_stats_fetch_begin(&rxstats->syncp); value = rxstats->bytes; - } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start)); + } while (u64_stats_fetch_retry(&rxstats->syncp, start)); return value; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index b52afe316dc41..35b5f806fdda1 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1079,9 +1079,9 @@ static void mpls_get_stats(struct mpls_dev *mdev, p = per_cpu_ptr(mdev->stats, i); do { - start = u64_stats_fetch_begin_irq(&p->syncp); + start = u64_stats_fetch_begin(&p->syncp); local = p->stats; - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + } while (u64_stats_fetch_retry(&p->syncp, start)); stats->rx_packets += local.rx_packets; stats->rx_bytes += local.rx_bytes; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index efab2b06d3732..5a7349002508e 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2296,13 +2296,13 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) u64 conns, inpkts, outpkts, inbytes, outbytes; do { - start = u64_stats_fetch_begin_irq(&u->syncp); + start = u64_stats_fetch_begin(&u->syncp); conns = u->cnt.conns; inpkts = u->cnt.inpkts; outpkts = u->cnt.outpkts; inbytes = u->cnt.inbytes; outbytes = u->cnt.outbytes; - } while (u64_stats_fetch_retry_irq(&u->syncp, start)); + } while (u64_stats_fetch_retry(&u->syncp, start)); seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n", i, (u64)conns, (u64)inpkts, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 63c70141b3e5d..cde0d9f0d838e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1534,10 +1534,10 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) for_each_possible_cpu(cpu) { cpu_stats = per_cpu_ptr(stats, cpu); do { - seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + seq = u64_stats_fetch_begin(&cpu_stats->syncp); pkts = cpu_stats->pkts; bytes = cpu_stats->bytes; - } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); + } while (u64_stats_fetch_retry(&cpu_stats->syncp, seq)); total.pkts += pkts; total.bytes += bytes; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 93c596e3b22b9..b05458c170484 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -715,9 +715,9 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, percpu_stats = per_cpu_ptr(dp->stats_percpu, i); do { - start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); + start = u64_stats_fetch_begin(&percpu_stats->syncp); local_stats = *percpu_stats; - } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); + } while (u64_stats_fetch_retry(&percpu_stats->syncp, start)); stats->n_hit += local_stats.n_hit; stats->n_missed += local_stats.n_missed; diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index d4a2db0b22998..0a0e4c283f02e 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -205,9 +205,9 @@ static void tbl_mask_array_reset_counters(struct mask_array *ma) stats = per_cpu_ptr(ma->masks_usage_stats, cpu); do { - start = u64_stats_fetch_begin_irq(&stats->syncp); + start = u64_stats_fetch_begin(&stats->syncp); counter = stats->usage_cntrs[i]; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + } while (u64_stats_fetch_retry(&stats->syncp, start)); ma->masks_usage_zero_cntr[i] += counter; } @@ -1136,10 +1136,9 @@ void ovs_flow_masks_rebalance(struct flow_table *table) stats = per_cpu_ptr(ma->masks_usage_stats, cpu); do { - start = u64_stats_fetch_begin_irq(&stats->syncp); + start = u64_stats_fetch_begin(&stats->syncp); counter = stats->usage_cntrs[i]; - } while (u64_stats_fetch_retry_irq(&stats->syncp, - start)); + } while (u64_stats_fetch_retry(&stats->syncp, start)); masks_and_count[i].counter += counter; }