diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 67a21b2ef3e4..2fe0b85b693d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1917,6 +1917,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. improves system performance, but it may also expose users to several CPU vulnerabilities. Equivalent to: nopti [X86] + nospectre_v1 [X86] nospectre_v2 [X86] spectre_v2_user=off [X86] spec_store_bypass_disable=off [X86] @@ -2215,6 +2216,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. register save and restore. The kernel will only save legacy floating-point registers on task switch. + nospectre_v1 [X86] Disable mitigations for Spectre Variant 1 + (bounds check bypass). With this option data leaks are + possible in the system. + nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2 (indirect branch prediction) vulnerability. System may allow data leaks with this option, which is equivalent diff --git a/Documentation/siphash.txt b/Documentation/siphash.txt new file mode 100644 index 000000000000..e8e6ddbbaab4 --- /dev/null +++ b/Documentation/siphash.txt @@ -0,0 +1,100 @@ + SipHash - a short input PRF +----------------------------------------------- +Written by Jason A. Donenfeld + +SipHash is a cryptographically secure PRF -- a keyed hash function -- that +performs very well for short inputs, hence the name. It was designed by +cryptographers Daniel J. Bernstein and Jean-Philippe Aumasson. It is intended +as a replacement for some uses of: `jhash`, `md5_transform`, `sha_transform`, +and so forth. + +SipHash takes a secret key filled with randomly generated numbers and either +an input buffer or several input integers. It spits out an integer that is +indistinguishable from random. You may then use that integer as part of secure +sequence numbers, secure cookies, or mask it off for use in a hash table. + +1. Generating a key + +Keys should always be generated from a cryptographically secure source of +random numbers, either using get_random_bytes or get_random_once: + +siphash_key_t key; +get_random_bytes(&key, sizeof(key)); + +If you're not deriving your key from here, you're doing it wrong. + +2. Using the functions + +There are two variants of the function, one that takes a list of integers, and +one that takes a buffer: + +u64 siphash(const void *data, size_t len, const siphash_key_t *key); + +And: + +u64 siphash_1u64(u64, const siphash_key_t *key); +u64 siphash_2u64(u64, u64, const siphash_key_t *key); +u64 siphash_3u64(u64, u64, u64, const siphash_key_t *key); +u64 siphash_4u64(u64, u64, u64, u64, const siphash_key_t *key); +u64 siphash_1u32(u32, const siphash_key_t *key); +u64 siphash_2u32(u32, u32, const siphash_key_t *key); +u64 siphash_3u32(u32, u32, u32, const siphash_key_t *key); +u64 siphash_4u32(u32, u32, u32, u32, const siphash_key_t *key); + +If you pass the generic siphash function something of a constant length, it +will constant fold at compile-time and automatically choose one of the +optimized functions. + +3. Hashtable key function usage: + +struct some_hashtable { + DECLARE_HASHTABLE(hashtable, 8); + siphash_key_t key; +}; + +void init_hashtable(struct some_hashtable *table) +{ + get_random_bytes(&table->key, sizeof(table->key)); +} + +static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input) +{ + return &table->hashtable[siphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)]; +} + +You may then iterate like usual over the returned hash bucket. + +4. Security + +SipHash has a very high security margin, with its 128-bit key. So long as the +key is kept secret, it is impossible for an attacker to guess the outputs of +the function, even if being able to observe many outputs, since 2^128 outputs +is significant. + +Linux implements the "2-4" variant of SipHash. + +5. Struct-passing Pitfalls + +Often times the XuY functions will not be large enough, and instead you'll +want to pass a pre-filled struct to siphash. When doing this, it's important +to always ensure the struct has no padding holes. The easiest way to do this +is to simply arrange the members of the struct in descending order of size, +and to use offsetendof() instead of sizeof() for getting the size. For +performance reasons, if possible, it's probably a good thing to align the +struct to the right boundary. Here's an example: + +const struct { + struct in6_addr saddr; + u32 counter; + u16 dport; +} __aligned(SIPHASH_ALIGNMENT) combined = { + .saddr = *(struct in6_addr *)saddr, + .counter = counter, + .dport = dport +}; +u64 h = siphash(&combined, offsetofend(typeof(combined), dport), &secret); + +6. Resources + +Read the SipHash paper if you're interested in learning more: +https://131002.net/siphash/siphash.pdf diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt index 1392b61d6ebe..3073ea800389 100644 --- a/Documentation/usb/power-management.txt +++ b/Documentation/usb/power-management.txt @@ -345,11 +345,15 @@ autosuspend the interface's device. When the usage counter is = 0 then the interface is considered to be idle, and the kernel may autosuspend the device. -Drivers need not be concerned about balancing changes to the usage -counter; the USB core will undo any remaining "get"s when a driver -is unbound from its interface. As a corollary, drivers must not call -any of the usb_autopm_* functions after their disconnect() routine has -returned. +Drivers must be careful to balance their overall changes to the usage +counter. Unbalanced "get"s will remain in effect when a driver is +unbound from its interface, preventing the device from going into +runtime suspend should the interface be bound to a driver again. On +the other hand, drivers are allowed to achieve this balance by calling +the usb_autopm_* functions even after their disconnect routine +has returned -- say from within a work-queue routine -- provided they +retain an active reference to the interface (via usb_get_intf and +usb_put_intf). Drivers using the async routines are responsible for their own synchronization and mutual exclusion. diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e86da4377402..fcafeed97244 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -13,7 +13,7 @@ of a virtual machine. The ioctls belong to three classes - VM ioctls: These query and set attributes that affect an entire virtual machine, for example memory layout. In addition a VM ioctl is used to - create virtual cpus (vcpus). + create virtual cpus (vcpus) and devices. Only run VM ioctls from the same process (address space) that was used to create the VM. @@ -24,6 +24,11 @@ of a virtual machine. The ioctls belong to three classes Only run vcpu ioctls from the same thread that was used to create the vcpu. + - device ioctls: These query and set attributes that control the operation + of a single device. + + device ioctls must be issued from the same process (address space) that + was used to create the VM. 2. File descriptors ------------------- @@ -32,10 +37,11 @@ The kvm API is centered around file descriptors. An initial open("/dev/kvm") obtains a handle to the kvm subsystem; this handle can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this handle will create a VM file descriptor which can be used to issue VM -ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu -and return a file descriptor pointing to it. Finally, ioctls on a vcpu -fd can be used to control the vcpu, including the important task of -actually running guest code. +ioctls. A KVM_CREATE_VCPU or KVM_CREATE_DEVICE ioctl on a VM fd will +create a virtual cpu or device and return a file descriptor pointing to +the new resource. Finally, ioctls on a vcpu or device fd can be used +to control the vcpu or device. For vcpus, this includes the important +task of actually running guest code. In general file descriptors can be migrated among processes by means of fork() and the SCM_RIGHTS facility of unix domain socket. These diff --git a/MAINTAINERS b/MAINTAINERS index b2a5243e9d0b..61dbb398b540 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8188,6 +8188,13 @@ F: arch/arm/mach-s3c24xx/mach-bast.c F: arch/arm/mach-s3c24xx/bast-ide.c F: arch/arm/mach-s3c24xx/bast-irq.c +SIPHASH PRF ROUTINES +M: Jason A. Donenfeld +S: Maintained +F: lib/siphash.c +F: lib/test_siphash.c +F: include/linux/siphash.h + TI DAVINCI MACHINE SUPPORT M: Sekhar Nori M: Kevin Hilman diff --git a/Makefile b/Makefile index c2c6a3580e8a..e2d6e0b9f22d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 16 -SUBLEVEL = 71 +SUBLEVEL = 72 EXTRAVERSION = NAME = Museum of Fishiegoodies diff --git a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi index 50c7718cb84e..0214e1199a06 100644 --- a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi +++ b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi @@ -302,6 +302,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; + phy-reset-duration = <10>; /* in msecs */ phy-reset-gpios = <&gpio3 23 GPIO_ACTIVE_LOW>; status = "disabled"; }; diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c index 6bcae0479049..e338a6d23b77 100644 --- a/arch/arm/mach-imx/cpuidle-imx6q.c +++ b/arch/arm/mach-imx/cpuidle-imx6q.c @@ -14,30 +14,23 @@ #include "common.h" #include "cpuidle.h" -static atomic_t master = ATOMIC_INIT(0); -static DEFINE_SPINLOCK(master_lock); +static int num_idle_cpus = 0; +static DEFINE_SPINLOCK(cpuidle_lock); static int imx6q_enter_wait(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - if (atomic_inc_return(&master) == num_online_cpus()) { - /* - * With this lock, we prevent other cpu to exit and enter - * this function again and become the master. - */ - if (!spin_trylock(&master_lock)) - goto idle; + spin_lock(&cpuidle_lock); + if (++num_idle_cpus == num_online_cpus()) imx6q_set_lpm(WAIT_UNCLOCKED); - cpu_do_idle(); - imx6q_set_lpm(WAIT_CLOCKED); - spin_unlock(&master_lock); - goto done; - } + spin_unlock(&cpuidle_lock); -idle: cpu_do_idle(); -done: - atomic_dec(&master); + + spin_lock(&cpuidle_lock); + if (num_idle_cpus-- == num_online_cpus()) + imx6q_set_lpm(WAIT_CLOCKED); + spin_unlock(&cpuidle_lock); return index; } diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 37361502d63b..ff9987fd5fd0 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -124,7 +124,7 @@ NESTED(handle_sys, PT_SIZE, sp) subu t1, v0, __NR_O32_Linux move a1, v0 bnez t1, 1f /* __NR_syscall at offset 0 */ - lw a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ + ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ .set pop 1: jal syscall_trace_enter diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index b73a8199f161..1e0ee59c8276 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -82,10 +82,10 @@ struct vdso_data { __u32 icache_block_size; /* L1 i-cache block size */ __u32 dcache_log_block_size; /* L1 d-cache log block size */ __u32 icache_log_block_size; /* L1 i-cache log block size */ - __s32 wtom_clock_sec; /* Wall to monotonic clock */ - __s32 wtom_clock_nsec; - struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */ - __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ + __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ + __s32 wtom_clock_nsec; /* Wall to monotonic clock nsec */ + __s64 wtom_clock_sec; /* Wall to monotonic clock sec */ + struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */ __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */ __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ }; diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index e7e8c4db2651..90d1d10eff05 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1274,6 +1274,9 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, goto bad; if (MSR_TM_ACTIVE(msr_hi<<32)) { + /* Trying to start TM on non TM system */ + if (!cpu_has_feature(CPU_FTR_TM)) + goto bad; /* We only recheckpoint on return if we're * transaction. */ diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 0bcae29336c0..5437dae4f12f 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -702,6 +702,11 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, if (MSR_TM_ACTIVE(msr)) { /* We recheckpoint on return. */ struct ucontext __user *uc_transact; + + /* Trying to start TM on non TM system */ + if (!cpu_has_feature(CPU_FTR_TM)) + goto badframe; + if (__get_user(uc_transact, &uc->uc_link)) goto badframe; if (restore_tm_sigcontexts(regs, &uc->uc_mcontext, diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 6b2b69616e77..8bacb8721961 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -98,7 +98,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) * can be used, r7 contains NSEC_PER_SEC. */ - lwz r5,WTOM_CLOCK_SEC(r9) + lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9) lwz r6,WTOM_CLOCK_NSEC(r9) /* We now have our offset in r5,r6. We create a fake dependency diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 382021324883..6a6a8495bd55 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -85,7 +85,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) * At this point, r4,r5 contain our sec/nsec values. */ - lwa r6,WTOM_CLOCK_SEC(r3) + ld r6,WTOM_CLOCK_SEC(r3) lwa r9,WTOM_CLOCK_NSEC(r3) /* We now have our result in r6,r9. We create a fake dependency diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index cb4c73bfeb48..129e29721835 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -47,6 +47,7 @@ For 32-bit we have the following conventions - kernel is built with */ #include +#include #ifdef CONFIG_X86_64 @@ -195,6 +196,23 @@ For 32-bit we have the following conventions - kernel is built with .byte 0xf1 .endm +/* + * Mitigate Spectre v1 for conditional swapgs code paths. + * + * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to + * prevent a speculative swapgs when coming from kernel space. + * + * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path, + * to prevent the swapgs from getting speculatively skipped when coming from + * user space. + */ +.macro FENCE_SWAPGS_USER_ENTRY + ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER +.endm +.macro FENCE_SWAPGS_KERNEL_ENTRY + ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL +.endm + #else /* CONFIG_X86_64 */ /* diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 99137cbe34b8..ba48ab887acf 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -177,29 +177,33 @@ #define X86_FEATURE_ARAT ( 7*32+ 1) /* Always Running APIC Timer */ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -#define X86_FEATURE_XSAVEOPT ( 7*32+ 4) /* Optimized Xsave */ +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+4) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_PLN ( 7*32+ 5) /* Intel Power Limit Notification */ #define X86_FEATURE_PTS ( 7*32+ 6) /* Intel Package Thermal Status */ #define X86_FEATURE_DTHERM ( 7*32+ 7) /* Digital Thermal Sensor */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_INVPCID_SINGLE ( 7*32+10) /* Effectively INVPCID && CR4.PCIDE=1 */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+11) /* "" Fill RSB on context switches */ -#define X86_FEATURE_USE_IBPB ( 7*32+12) /* "" Indirect Branch Prediction Barrier enabled */ -#define X86_FEATURE_USE_IBRS_FW ( 7*32+13) /* "" Use IBRS during runtime firmware calls */ -#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+14) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_LS_CFG_SSBD ( 7*32+15) /* "" AMD SSBD implementation */ -#define X86_FEATURE_IBRS ( 7*32+16) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_IBPB ( 7*32+17) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_STIBP ( 7*32+18) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+19) /* "" MSR SPEC_CTRL is implemented */ -#define X86_FEATURE_SSBD ( 7*32+20) /* Speculative Store Bypass Disable */ -#define X86_FEATURE_ZEN ( 7*32+21) /* "" CPU is AMD family 0x17 (Zen) */ -#define X86_FEATURE_L1TF_PTEINV ( 7*32+22) /* "" L1TF workaround PTE inversion */ -#define X86_FEATURE_IBRS_ENHANCED ( 7*32+23) /* Enhanced IBRS */ -#define X86_FEATURE_RETPOLINE ( 7*32+29) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* "" AMD Retpoline mitigation for Spectre variant 2 */ -/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ +#define X86_FEATURE_FENCE_SWAPGS_USER ( 7*32+10) /* "" LFENCE in user entry SWAPGS path */ +#define X86_FEATURE_FENCE_SWAPGS_KERNEL ( 7*32+11) /* "" LFENCE in kernel entry SWAPGS path */ +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ + +#define X86_FEATURE_XSAVEOPT ( 7*32+15) /* Optimized Xsave */ +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ + +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ + +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation */ +#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ +#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ +#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ /* Virtualization flags: Linux defined, word 8 */ @@ -274,5 +278,6 @@ #define X86_BUG_L1TF X86_BUG(9) /* CPU is affected by L1 Terminal Fault */ #define X86_BUG_MDS X86_BUG(10) /* CPU is affected by Microarchitectural data sampling */ #define X86_BUG_MSBDS_ONLY X86_BUG(11) /* CPU is only affected by the MSDBS variant of BUG_MDS */ +#define X86_BUG_SWAPGS X86_BUG(12) /* CPU is affected by speculation through SWAPGS */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e9eae4fbdde2..92a9ba2595b3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -88,7 +88,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) #define IOPL_SHIFT 12 #define KVM_PERMILLE_MMU_PAGES 20 -#define KVM_MIN_ALLOC_MMU_PAGES 64 +#define KVM_MIN_ALLOC_MMU_PAGES 64UL #define KVM_MMU_HASH_SHIFT 10 #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) #define KVM_MIN_FREE_MMU_PAGES 5 @@ -363,6 +363,7 @@ struct kvm_vcpu_arch { int mp_state; u64 ia32_misc_enable_msr; bool tpr_access_reporting; + u64 arch_capabilities; /* * Paging state of the vcpu @@ -551,9 +552,9 @@ struct kvm_apic_map { }; struct kvm_arch { - unsigned int n_used_mmu_pages; - unsigned int n_requested_mmu_pages; - unsigned int n_max_mmu_pages; + unsigned long n_used_mmu_pages; + unsigned long n_requested_mmu_pages; + unsigned long n_max_mmu_pages; unsigned int indirect_shadow_pages; unsigned long mmu_valid_gen; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; @@ -809,8 +810,8 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm); -unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); -void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); +unsigned long kvm_mmu_calculate_mmu_pages(struct kvm *kvm); +void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages); int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index da45f9fc1913..5876b28f9331 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -215,6 +215,9 @@ privcmd_call(unsigned call, __HYPERCALL_DECLS; __HYPERCALL_5ARG(a1, a2, a3, a4, a5); + if (call >= PAGE_SIZE / sizeof(hypercall_page[0])) + return -EINVAL; + stac(); asm volatile(CALL_NOSPEC : __HYPERCALL_5PARAM diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e325029ff01a..f9e23ee19bcb 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -30,6 +30,7 @@ #include #include +static void __init spectre_v1_select_mitigation(void); static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); @@ -148,17 +149,11 @@ void __init check_bugs(void) if (boot_cpu_has(X86_FEATURE_STIBP)) x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; - /* Select the proper spectre mitigation before patching alternatives */ + /* Select the proper CPU mitigations before patching alternatives: */ + spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); - - /* - * Select proper mitigation for any exposure to the Speculative Store - * Bypass vulnerability. - */ ssb_select_mitigation(); - l1tf_select_mitigation(); - mds_select_mitigation(); arch_smt_update(); @@ -317,6 +312,98 @@ static int __init mds_cmdline(char *str) } early_param("mds", mds_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V1 : " fmt + +enum spectre_v1_mitigation { + SPECTRE_V1_MITIGATION_NONE, + SPECTRE_V1_MITIGATION_AUTO, +}; + +static enum spectre_v1_mitigation spectre_v1_mitigation = + SPECTRE_V1_MITIGATION_AUTO; + +static const char * const spectre_v1_strings[] = { + [SPECTRE_V1_MITIGATION_NONE] = "Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers", + [SPECTRE_V1_MITIGATION_AUTO] = "Mitigation: usercopy/swapgs barriers and __user pointer sanitization", +}; + +/* + * Does SMAP provide full mitigation against speculative kernel access to + * userspace? + */ +static bool smap_works_speculatively(void) +{ + if (!boot_cpu_has(X86_FEATURE_SMAP)) + return false; + + /* + * On CPUs which are vulnerable to Meltdown, SMAP does not + * prevent speculative access to user data in the L1 cache. + * Consider SMAP to be non-functional as a mitigation on these + * CPUs. + */ + if (boot_cpu_has(X86_BUG_CPU_MELTDOWN)) + return false; + + return true; +} + +static void __init spectre_v1_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off()) { + spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE; + return; + } + + if (spectre_v1_mitigation == SPECTRE_V1_MITIGATION_AUTO) { + /* + * With Spectre v1, a user can speculatively control either + * path of a conditional swapgs with a user-controlled GS + * value. The mitigation is to add lfences to both code paths. + * + * If FSGSBASE is enabled, the user can put a kernel address in + * GS, in which case SMAP provides no protection. + * + * [ NOTE: Don't check for X86_FEATURE_FSGSBASE until the + * FSGSBASE enablement patches have been merged. ] + * + * If FSGSBASE is disabled, the user can only put a user space + * address in GS. That makes an attack harder, but still + * possible if there's no SMAP protection. + */ + if (!smap_works_speculatively()) { + /* + * Mitigation can be provided from SWAPGS itself or + * PTI as the CR3 write in the Meltdown mitigation + * is serializing. + * + * If neither is there, mitigate with an LFENCE to + * stop speculation through swapgs. + */ + if (boot_cpu_has_bug(X86_BUG_SWAPGS) && + !boot_cpu_has(X86_FEATURE_KAISER)) + setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_USER); + + /* + * Enable lfences in the kernel entry (non-swapgs) + * paths, to prevent user entry from speculatively + * skipping swapgs. + */ + setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_KERNEL); + } + } + + pr_info("%s\n", spectre_v1_strings[spectre_v1_mitigation]); +} + +static int __init nospectre_v1_cmdline(char *str) +{ + spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE; + return 0; +} +early_param("nospectre_v1", nospectre_v1_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt @@ -1210,7 +1297,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr break; case X86_BUG_SPECTRE_V1: - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); case X86_BUG_SPECTRE_V2: return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9d4638d437de..337fcac8c8a0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -813,6 +813,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_L1TF BIT(3) #define NO_MDS BIT(4) #define MSBDS_ONLY BIT(5) +#define NO_SWAPGS BIT(6) #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } @@ -836,29 +837,37 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), - VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), - VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY), - VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY), - VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), - VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY), - VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), VULNWL_INTEL(CORE_YONAH, NO_SSB), - VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF), - VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS), + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS), + + /* + * Technically, swapgs isn't serializing on AMD (despite it previously + * being documented as such in the APM). But according to AMD, %gs is + * updated non-speculatively, and the issuing of %gs-relative memory + * operands will be blocked until the %gs update completes, which is + * good enough for our purposes. + */ /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS), {} }; @@ -895,6 +904,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); } + if (!cpu_matches(NO_SWAPGS)) + setup_force_cpu_bug(X86_BUG_SWAPGS); + if (cpu_matches(NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 6e5feb342ee4..467069f38f21 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -265,14 +265,19 @@ ENDPROC(native_usergs_sysret64) testl $3, CS-RBP(%rsi) je 1f SWAPGS + FENCE_SWAPGS_USER_ENTRY SWITCH_KERNEL_CR3 + jmp 2f +1: + FENCE_SWAPGS_KERNEL_ENTRY +2: /* * irq_count is used to check if a CPU is already on an interrupt stack * or not. While this is essentially redundant with preempt_count it is * a little cheaper to use a separate counter in the PDA (short of * moving irq_enter into assembly, which would be too much work) */ -1: incl PER_CPU_VAR(irq_count) + incl PER_CPU_VAR(irq_count) cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp CFI_DEF_CFA_REGISTER rsi @@ -337,6 +342,13 @@ ENTRY(save_paranoid) movq %rax, %cr3 2: #endif + /* + * The above doesn't do an unconditional CR3 write, even in the PTI + * case. So do an lfence to prevent GS speculation, regardless of + * whether PTI is enabled. + */ + FENCE_SWAPGS_KERNEL_ENTRY + ret CFI_ENDPROC END(save_paranoid) @@ -1445,10 +1457,27 @@ ENTRY(error_entry) */ SWITCH_KERNEL_CR3 testl $3,CS+8(%rsp) - je error_kernelspace -error_swapgs: + jz .Lerror_kernelspace + + /* + * We entered from user mode or we're pretending to have entered + * from user mode due to an IRET fault. + */ SWAPGS -error_sti: + FENCE_SWAPGS_USER_ENTRY + +.Lerror_entry_from_usermode_after_swapgs: + /* + * We need to tell lockdep that IRQs are off. We can't do this until + * we fix gsbase, and we should do it before enter_from_user_mode + * (which can take locks). + */ + TRACE_IRQS_OFF + ret + +.Lerror_entry_done_lfence: + FENCE_SWAPGS_KERNEL_ENTRY +.Lerror_entry_done: TRACE_IRQS_OFF ret @@ -1458,28 +1487,46 @@ ENTRY(error_entry) * truncated RIP for IRET exceptions returning to compat mode. Check * for these here too. */ -error_kernelspace: +.Lerror_kernelspace: leaq native_irq_return_iret(%rip),%rcx cmpq %rcx,RIP+8(%rsp) - je error_bad_iret + je .Lerror_bad_iret movl %ecx,%eax /* zero extend */ cmpq %rax,RIP+8(%rsp) - je bstep_iret + je .Lbstep_iret cmpq $gs_change,RIP+8(%rsp) - je error_swapgs - jmp error_sti + jne .Lerror_entry_done_lfence + + /* + * hack: gs_change can fail with user gsbase. If this happens, fix up + * gsbase and proceed. We'll fix up the exception and land in + * gs_change's error handler with kernel gsbase. + */ + SWAPGS + FENCE_SWAPGS_USER_ENTRY + jmp .Lerror_entry_done -bstep_iret: +.Lbstep_iret: /* Fix truncated RIP */ movq %rcx,RIP+8(%rsp) /* fall through */ -error_bad_iret: +.Lerror_bad_iret: + /* + * We came from an IRET to user mode, so we have user gsbase. + * Switch to kernel gsbase: + */ SWAPGS + FENCE_SWAPGS_USER_ENTRY + + /* + * Pretend that the exception came from user mode: set up pt_regs + * as if we faulted immediately after IRET. + */ mov %rsp,%rdi call fixup_bad_iret mov %rax,%rsp - jmp error_sti + jmp .Lerror_entry_from_usermode_after_swapgs CFI_ENDPROC END(error_entry) @@ -1579,6 +1626,7 @@ ENTRY(nmi) * to switch CR3 here. */ cld + FENCE_SWAPGS_USER_ENTRY movq %rsp, %rdx movq PER_CPU_VAR(kernel_stack), %rsp addq $KERNEL_STACK_OFFSET, %rsp @@ -1624,6 +1672,7 @@ ENTRY(nmi) movq %rax, %cr3 2: #endif + FENCE_SWAPGS_KERNEL_ENTRY call do_nmi #ifdef CONFIG_PAGE_TABLE_ISOLATION diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 608f18c62412..cb6657f35f41 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -494,6 +494,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) unsigned long *sara = stack_addr(regs); ri->ret_addr = (kprobe_opcode_t *) *sara; + ri->fp = sara; /* Replace the return addr with trampoline addr */ *sara = (unsigned long) &kretprobe_trampoline; @@ -685,26 +686,48 @@ static void __used kretprobe_trampoline_holder(void) NOKPROBE_SYMBOL(kretprobe_trampoline_holder); NOKPROBE_SYMBOL(kretprobe_trampoline); +static struct kprobe kretprobe_kprobe = { + .addr = (void *)kretprobe_trampoline, +}; + /* * Called from kretprobe_trampoline */ __visible __used void *trampoline_handler(struct pt_regs *regs) { + struct kprobe_ctlblk *kcb; struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; kprobe_opcode_t *correct_ret_addr = NULL; + void *frame_pointer; + bool skipped = false; + + preempt_disable(); + + /* + * Set a dummy kprobe for avoiding kretprobe recursion. + * Since kretprobe never run in kprobe handler, kprobe must not + * be running at this point. + */ + kcb = get_kprobe_ctlblk(); + __this_cpu_write(current_kprobe, &kretprobe_kprobe); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ #ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; + /* On x86-64, we use pt_regs->sp for return address holder. */ + frame_pointer = ®s->sp; #else regs->cs = __KERNEL_CS | get_kernel_rpl(); regs->gs = 0; + /* On x86-32, we use pt_regs->flags for return address holder. */ + frame_pointer = ®s->flags; #endif regs->ip = trampoline_address; regs->orig_ax = ~0UL; @@ -726,8 +749,25 @@ __visible __used void *trampoline_handler(struct pt_regs *regs) if (ri->task != current) /* another task is sharing our hash bucket */ continue; + /* + * Return probes must be pushed on this hash list correct + * order (same as return order) so that it can be poped + * correctly. However, if we find it is pushed it incorrect + * order, this means we find a function which should not be + * probed, because the wrong order entry is pushed on the + * path of processing other kretprobe itself. + */ + if (ri->fp != frame_pointer) { + if (!skipped) + pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n"); + skipped = true; + continue; + } orig_ret_address = (unsigned long)ri->ret_addr; + if (skipped) + pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n", + ri->rp->kp.addr); if (orig_ret_address != trampoline_address) /* @@ -745,14 +785,15 @@ __visible __used void *trampoline_handler(struct pt_regs *regs) if (ri->task != current) /* another task is sharing our hash bucket */ continue; + if (ri->fp != frame_pointer) + continue; orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { __this_cpu_write(current_kprobe, &ri->rp->kp); - get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, NULL); + __this_cpu_write(current_kprobe, &kretprobe_kprobe); } recycle_rp_inst(ri, &empty_rp); @@ -768,6 +809,9 @@ __visible __used void *trampoline_handler(struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); + __this_cpu_write(current_kprobe, NULL); + preempt_enable(); + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 074510b5ea77..feca78bf2a6b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -351,6 +351,8 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, u64 msr = x86_spec_ctrl_base; bool updmsr = false; + lockdep_assert_irqs_disabled(); + /* * If TIF_SSBD is different, select the proper mitigation * method. Note that if SSBD mitigation is disabled or permanentely @@ -402,10 +404,12 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) void speculation_ctrl_update(unsigned long tif) { + unsigned long flags; + /* Forced update. Make sure all relevant TIF flags are different */ - preempt_disable(); + local_irq_save(flags); __speculation_ctrl_update(~tif, tif); - preempt_enable(); + local_irq_restore(flags); } /* Called from seccomp/prctl update */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 95cd58cdee99..6531ffcb174b 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -395,6 +395,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ebx |= F(TSC_ADJUST); entry->edx &= kvm_cpuid_7_0_edx_x86_features; cpuid_mask(&entry->edx, 10); + /* + * We emulate ARCH_CAPABILITIES in software even + * if the host doesn't support it. + */ + entry->edx |= F(ARCH_CAPABILITIES); } else { entry->ebx = 0; entry->edx = 0; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c81ccc0d8da0..886fb53b4604 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1492,7 +1492,7 @@ static int is_empty_shadow_page(u64 *spt) * aggregate version in order to make the slab shrinker * faster */ -static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr) +static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr) { kvm->arch.n_used_mmu_pages += nr; percpu_counter_add(&kvm_total_used_mmu_pages, nr); @@ -2207,7 +2207,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, * Changing the number of mmu pages allocated to the vm * Note: if goal_nr_mmu_pages is too small, you will get dead lock */ -void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) +void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages) { LIST_HEAD(invalid_list); @@ -4505,10 +4505,10 @@ int kvm_mmu_module_init(void) /* * Caculate mmu pages needed for kvm. */ -unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) +unsigned long kvm_mmu_calculate_mmu_pages(struct kvm *kvm) { - unsigned int nr_mmu_pages; - unsigned int nr_pages = 0; + unsigned long nr_mmu_pages; + unsigned long nr_pages = 0; struct kvm_memslots *slots; struct kvm_memory_slot *memslot; @@ -4518,8 +4518,7 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) nr_pages += memslot->npages; nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; - nr_mmu_pages = max(nr_mmu_pages, - (unsigned int) KVM_MIN_ALLOC_MMU_PAGES); + nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES); return nr_mmu_pages; } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 58b2f51fdfdc..f3dc8f614512 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -81,7 +81,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, bool execonly); bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); -static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) +static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm) { if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages) return kvm->arch.n_max_mmu_pages - diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bd6b883f3075..00dd9ea13c22 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -433,7 +433,6 @@ struct vcpu_vmx { u64 msr_guest_kernel_gs_base; #endif - u64 arch_capabilities; u64 spec_ctrl; u32 vm_entry_controls_shadow; @@ -2481,12 +2480,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = to_vmx(vcpu)->spec_ctrl; break; - case MSR_IA32_ARCH_CAPABILITIES: - if (!msr_info->host_initiated && - !guest_cpuid_has_arch_capabilities(vcpu)) - return 1; - msr_info->data = to_vmx(vcpu)->arch_capabilities; - break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); break; @@ -2636,11 +2629,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W); break; - case MSR_IA32_ARCH_CAPABILITIES: - if (!msr_info->host_initiated) - return 1; - vmx->arch_capabilities = data; - break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -4583,9 +4571,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); - vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); /* 22.2.1, 20.8.1 */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cab3ca9d3f03..2c0f1ea119d6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2089,6 +2089,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_F15H_EX_CFG: break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + vcpu->arch.arch_capabilities = data; + break; case MSR_EFER: return set_efer(vcpu, data); case MSR_K7_HWCR: @@ -2479,6 +2484,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_UCODE_REV: msr_info->data = 0x100000000ULL; break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has_arch_capabilities(vcpu)) + return 1; + msr_info->data = vcpu->arch.arch_capabilities; + break; case MSR_MTRRcap: msr_info->data = 0x500 | KVM_NR_VAR_MTRR; break; @@ -3518,7 +3529,7 @@ static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm, } static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, - u32 kvm_nr_mmu_pages) + unsigned long kvm_nr_mmu_pages) { if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) return -EINVAL; @@ -3532,7 +3543,7 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, return 0; } -static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) +static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) { return kvm->arch.n_max_mmu_pages; } @@ -6957,6 +6968,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { int r; + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, + vcpu->arch.arch_capabilities); vcpu->arch.mtrr_state.have_fixed = 1; r = vcpu_load(vcpu); if (r) diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c index 7d2c317bd98b..922525c0d25c 100644 --- a/arch/xtensa/kernel/stacktrace.c +++ b/arch/xtensa/kernel/stacktrace.c @@ -107,10 +107,14 @@ static int return_address_cb(struct stackframe *frame, void *data) return 1; } +/* + * level == 0 is for the return address from the caller of this function, + * not from this function itself. + */ unsigned long return_address(unsigned level) { struct return_addr_data r = { - .skip = level + 1, + .skip = level, }; walk_stackframe(stack_pointer(NULL), return_address_cb, &r); return r.addr; diff --git a/block/bio.c b/block/bio.c index 4218dab2bb47..3163dac6735d 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1216,8 +1216,11 @@ struct bio *bio_copy_user_iov(struct request_queue *q, } } - if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) + if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { + if (!map_data) + __free_page(page); break; + } len -= bytes; offset = 0; diff --git a/drivers/acpi/acpica/nsobject.c b/drivers/acpi/acpica/nsobject.c index f1ea8e56cd87..7abcfa6b055c 100644 --- a/drivers/acpi/acpica/nsobject.c +++ b/drivers/acpi/acpica/nsobject.c @@ -222,6 +222,10 @@ void acpi_ns_detach_object(struct acpi_namespace_node *node) } } + if (obj_desc->common.type == ACPI_TYPE_REGION) { + acpi_ut_remove_address_range(obj_desc->region.space_id, node); + } + /* Clear the Node entry in all cases */ node->object = NULL; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index df51673d6591..3999b8c6a09d 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2113,6 +2113,9 @@ static void setup_format_params(int track) raw_cmd->kernel_data = floppy_track_buffer; raw_cmd->length = 4 * F_SECT_PER_TRACK; + if (!F_SECT_PER_TRACK) + return; + /* allow for about 30ms for data transport per track */ head_shift = (F_SECT_PER_TRACK + 5) / 6; @@ -3233,8 +3236,12 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g, int cnt; /* sanity checking for parameters. */ - if (g->sect <= 0 || - g->head <= 0 || + if ((int)g->sect <= 0 || + (int)g->head <= 0 || + /* check for overflow in max_sector */ + (int)(g->sect * g->head) <= 0 || + /* check for zero in F_SECT_PER_TRACK */ + (unsigned char)((g->sect << 2) >> FD_SIZECODE(g)) == 0 || g->track <= 0 || g->track > UDP->tracks >> STRETCH(g) || /* check if reserved bits are set */ (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_SECTBASEMASK)) != 0) @@ -3378,6 +3385,24 @@ static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } +static bool valid_floppy_drive_params(const short autodetect[8], + int native_format) +{ + size_t floppy_type_size = ARRAY_SIZE(floppy_type); + size_t i = 0; + + for (i = 0; i < 8; ++i) { + if (autodetect[i] < 0 || + autodetect[i] >= floppy_type_size) + return false; + } + + if (native_format < 0 || native_format >= floppy_type_size) + return false; + + return true; +} + static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long param) { @@ -3504,6 +3529,9 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int SUPBOUND(size, strlen((const char *)outparam) + 1); break; case FDSETDRVPRM: + if (!valid_floppy_drive_params(inparam.dp.autodetect, + inparam.dp.native_format)) + return -EINVAL; *UDP = inparam.dp; break; case FDGETDRVPRM: diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index ab3ea62e5dfc..f98a9207ec7e 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -1062,6 +1062,8 @@ static int ace_setup(struct ace_device *ace) return 0; err_read: + /* prevent double queue cleanup */ + ace->gd->queue = NULL; put_disk(ace->gd); err_alloc_disk: blk_cleanup_queue(ace->queue); diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c index 0bc8a6a6a148..353ed68db711 100644 --- a/drivers/bluetooth/hci_ath.c +++ b/drivers/bluetooth/hci_ath.c @@ -112,6 +112,9 @@ static int ath_open(struct hci_uart *hu) BT_DBG("hu %p", hu); + if (!hci_uart_has_flow_control(hu)) + return -EOPNOTSUPP; + ath = kzalloc(sizeof(*ath), GFP_KERNEL); if (!ath) return -ENOMEM; diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index e00f8f5b5c8e..9ad437053e22 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -261,6 +261,15 @@ static int hci_uart_send_frame(struct hci_dev *hdev, struct sk_buff *skb) return 0; } +/* Check the underlying device or tty has flow control support */ +bool hci_uart_has_flow_control(struct hci_uart *hu) +{ + if (hu->tty->driver->ops->tiocmget && hu->tty->driver->ops->tiocmset) + return true; + + return false; +} + /* ------ LDISC part ------ */ /* hci_uart_tty_open * diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index 12df101ca942..58e0bb8c041b 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -90,6 +90,7 @@ int hci_uart_register_proto(struct hci_uart_proto *p); int hci_uart_unregister_proto(struct hci_uart_proto *p); int hci_uart_tx_wakeup(struct hci_uart *hu); int hci_uart_init_ready(struct hci_uart *hu); +bool hci_uart_has_flow_control(struct hci_uart *hu); #ifdef CONFIG_BT_HCIUART_H4 int h4_init(void); diff --git a/drivers/gpio/gpio-adnp.c b/drivers/gpio/gpio-adnp.c index b2239d678d01..f2c092676e01 100644 --- a/drivers/gpio/gpio-adnp.c +++ b/drivers/gpio/gpio-adnp.c @@ -140,8 +140,10 @@ static int adnp_gpio_direction_input(struct gpio_chip *chip, unsigned offset) if (err < 0) goto out; - if (err & BIT(pos)) - err = -EACCES; + if (value & BIT(pos)) { + err = -EPERM; + goto out; + } err = 0; diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index dbcea7941059..3837cbdaf100 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -121,6 +121,7 @@ static int ad_sd_read_reg_raw(struct ad_sigma_delta *sigma_delta, if (sigma_delta->info->has_registers) { data[0] = reg << sigma_delta->info->addr_shift; data[0] |= sigma_delta->info->read_mask; + data[0] |= sigma_delta->comm; spi_message_add_tail(&t[0], &m); } spi_message_add_tail(&t[1], &m); diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 10d5ec213091..2f7ad2538cec 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -702,23 +702,29 @@ static int at91_adc_read_raw(struct iio_dev *idev, ret = wait_event_interruptible_timeout(st->wq_data_avail, st->done, msecs_to_jiffies(1000)); - if (ret == 0) - ret = -ETIMEDOUT; - if (ret < 0) { - mutex_unlock(&st->lock); - return ret; - } - - *val = st->last_value; + /* Disable interrupts, regardless if adc conversion was + * successful or not + */ at91_adc_writel(st, AT91_ADC_CHDR, AT91_ADC_CH(chan->channel)); at91_adc_writel(st, AT91_ADC_IDR, BIT(chan->channel)); - st->last_value = 0; - st->done = false; + if (ret > 0) { + /* a valid conversion took place */ + *val = st->last_value; + st->last_value = 0; + st->done = false; + ret = IIO_VAL_INT; + } else if (ret == 0) { + /* conversion timeout */ + dev_err(&idev->dev, "ADC Channel %d timeout.\n", + chan->channel); + ret = -ETIMEDOUT; + } + mutex_unlock(&st->lock); - return IIO_VAL_INT; + return ret; case IIO_CHAN_INFO_SCALE: *val = st->vref_mv; diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c index b4dde8315210..89f695a958e9 100644 --- a/drivers/iio/dac/mcp4725.c +++ b/drivers/iio/dac/mcp4725.c @@ -86,6 +86,7 @@ static ssize_t mcp4725_store_eeprom(struct device *dev, return 0; inoutbuf[0] = 0x60; /* write EEPROM */ + inoutbuf[0] |= data->powerdown ? ((data->powerdown_mode + 1) << 1) : 0; inoutbuf[1] = data->dac_value >> 4; inoutbuf[2] = (data->dac_value & 0xf) << 4; diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index 7562531ebf0e..77b19ca8b763 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -836,10 +836,8 @@ int iio_scan_mask_set(struct iio_dev *indio_dev, const unsigned long *mask; unsigned long *trialmask; - trialmask = kmalloc(sizeof(*trialmask)* - BITS_TO_LONGS(indio_dev->masklength), - GFP_KERNEL); - + trialmask = kcalloc(BITS_TO_LONGS(indio_dev->masklength), + sizeof(*trialmask), GFP_KERNEL); if (trialmask == NULL) return -ENOMEM; if (!indio_dev->masklength) { diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 4d1b400ed260..5ea56edcdf42 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1195,12 +1195,12 @@ EXPORT_SYMBOL(iio_device_register); **/ void iio_device_unregister(struct iio_dev *indio_dev) { - mutex_lock(&indio_dev->info_exist_lock); - device_del(&indio_dev->dev); if (indio_dev->chrdev.dev) cdev_del(&indio_dev->chrdev); + + mutex_lock(&indio_dev->info_exist_lock); iio_device_unregister_debugfs(indio_dev); iio_disable_all_buffers(indio_dev); diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 0eb141c41416..fb60229bf191 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -579,8 +579,8 @@ void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev) unsigned long flags; for (i = 0 ; i < dev->num_ports; i++) { - cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work); det = &sriov->alias_guid.ports_guid[i]; + cancel_delayed_work_sync(&det->alias_guid_work); spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags); while (!list_empty(&det->cb_list)) { cb_ctx = list_entry(det->cb_list.next, diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c index fe1ab5067b5d..cf3af3a3297a 100644 --- a/drivers/input/tablet/gtco.c +++ b/drivers/input/tablet/gtco.c @@ -78,6 +78,7 @@ Scott Hill shill@gtcocalcomp.com /* Max size of a single report */ #define REPORT_MAX_SIZE 10 +#define MAX_COLLECTION_LEVELS 10 /* Bitmask whether pen is in range */ @@ -224,8 +225,7 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report, char maintype = 'x'; char globtype[12]; int indent = 0; - char indentstr[10] = ""; - + char indentstr[MAX_COLLECTION_LEVELS + 1] = { 0 }; dev_dbg(ddev, "======>>>>>>PARSE<<<<<<======\n"); @@ -351,6 +351,13 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report, case TAG_MAIN_COL_START: maintype = 'S'; + if (indent == MAX_COLLECTION_LEVELS) { + dev_err(ddev, "Collection level %d would exceed limit of %d\n", + indent + 1, + MAX_COLLECTION_LEVELS); + break; + } + if (data == 0) { dev_dbg(ddev, "======>>>>>> Physical\n"); strcpy(globtype, "Physical"); @@ -370,8 +377,15 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report, break; case TAG_MAIN_COL_END: - dev_dbg(ddev, "<<<<<<======\n"); maintype = 'E'; + + if (indent == 0) { + dev_err(ddev, "Collection level already at zero\n"); + break; + } + + dev_dbg(ddev, "<<<<<<======\n"); + indent--; for (x = 0; x < indent; x++) indentstr[x] = '-'; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 48b726f4ad48..006f4f9a3170 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -293,7 +293,7 @@ static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) static void iommu_set_exclusion_range(struct amd_iommu *iommu) { u64 start = iommu->exclusion_start & PAGE_MASK; - u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; + u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK; u64 entry; if (!iommu->exclusion_start) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index b60eb1cca150..c2a44d2ca5b6 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1394,6 +1394,9 @@ static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) u32 pmen; unsigned long flags; + if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap)) + return; + raw_spin_lock_irqsave(&iommu->register_lock, flags); pmen = readl(iommu->reg + DMAR_PMEN_REG); pmen &= ~DMA_PMEN_EPM; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index f5f1837dfa5b..b010f9600d87 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1432,6 +1432,36 @@ static bool dm_table_supports_write_same(struct dm_table *t) return true; } +static int device_requires_stable_pages(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && bdi_cap_stable_pages_required(&q->backing_dev_info); +} + +/* + * If any underlying device requires stable pages, a table must require + * them as well. Only targets that support iterate_devices are considered: + * don't want error, zero, etc to require stable pages. + */ +static bool dm_table_requires_stable_pages(struct dm_table *t) +{ + struct dm_target *ti; + unsigned i; + + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + + if (ti->type->iterate_devices && + ti->type->iterate_devices(ti, device_requires_stable_pages, NULL)) + return true; + } + + return false; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1473,6 +1503,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, dm_table_set_integrity(t); + /* + * Some devices don't use blk_integrity but still want stable pages + * because they do their own checksumming. + */ + if (dm_table_requires_stable_pages(t)) + q->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES; + else + q->backing_dev_info.capabilities &= ~BDI_CAP_STABLE_WRITES; + /* * Determine whether or not this queue's I/O timings contribute * to the entropy pool, Only request-based targets use this. diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 043ad8d8d700..30ae713aecf8 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -756,6 +756,15 @@ static void dec_pending(struct dm_io *io, int error) } } +static void disable_discard(struct mapped_device *md) +{ + struct queue_limits *limits = dm_get_queue_limits(md); + + /* device doesn't really support DISCARD, disable it */ + limits->max_discard_sectors = 0; + queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue); +} + static void disable_write_same(struct mapped_device *md) { struct queue_limits *limits = dm_get_queue_limits(md); @@ -792,9 +801,14 @@ static void clone_endio(struct bio *bio, int error) } } - if (unlikely(r == -EREMOTEIO && (bio->bi_rw & REQ_WRITE_SAME) && - !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)) - disable_write_same(md); + if (unlikely(r == -EREMOTEIO)) { + if (bio->bi_rw & REQ_DISCARD && + !bdev_get_queue(bio->bi_bdev)->limits.max_discard_sectors) + disable_discard(md); + else if (bio->bi_rw & REQ_WRITE_SAME && + !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors) + disable_write_same(md); + } free_tio(md, tio); dec_pending(io, error); @@ -996,9 +1010,14 @@ static void dm_done(struct request *clone, int error, bool mapped) r = rq_end_io(tio->ti, clone, error, &tio->info); } - if (unlikely(r == -EREMOTEIO && (clone->cmd_flags & REQ_WRITE_SAME) && - !clone->q->limits.max_write_same_sectors)) - disable_write_same(tio->md); + if (unlikely(r == -EREMOTEIO)) { + if (clone->cmd_flags & REQ_DISCARD && + !clone->q->limits.max_discard_sectors) + disable_discard(tio->md); + else if (clone->cmd_flags & REQ_WRITE_SAME && + !clone->q->limits.max_write_same_sectors) + disable_write_same(tio->md); + } if (r <= 0) /* The target wants to complete the I/O */ diff --git a/drivers/media/usb/tlg2300/pd-common.h b/drivers/media/usb/tlg2300/pd-common.h index 9e23ad32d2fe..562a58886ca2 100644 --- a/drivers/media/usb/tlg2300/pd-common.h +++ b/drivers/media/usb/tlg2300/pd-common.h @@ -257,7 +257,7 @@ void set_debug_mode(struct video_device *vfd, int debug_mode); #else #define in_hibernation(pd) (0) #endif -#define get_pm_count(p) (atomic_read(&(p)->interface->pm_usage_cnt)) +#define get_pm_count(p) (atomic_read(&(p)->interface->dev.power.usage_count)) #define log(a, ...) printk(KERN_DEBUG "\t[ %s : %.3d ] "a"\n", \ __func__, __LINE__, ## __VA_ARGS__) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 92303daf44dc..40047a2f4696 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -1538,7 +1538,11 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip, continue; } - if (time_after(jiffies, timeo) && !chip_ready(map, adr)) + /* + * We check "time_after" and "!chip_good" before checking "chip_good" to avoid + * the failure due to scheduling. + */ + if (time_after(jiffies, timeo) && !chip_good(map, adr, datum)) break; if (chip_good(map, adr, datum)) { diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index 94c656f5a05d..d4552e2c7189 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -1524,7 +1524,7 @@ static void update_stats(int ioaddr, struct net_device *dev) static void set_rx_mode(struct net_device *dev) { int ioaddr = dev->base_addr; - short new_mode; + unsigned short new_mode; if (dev->flags & IFF_PROMISC) { if (corkscrew_debug > 3) diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c index 90e825e8abfe..02beb18942d2 100644 --- a/drivers/net/ethernet/8390/mac8390.c +++ b/drivers/net/ethernet/8390/mac8390.c @@ -153,11 +153,6 @@ static void dayna_block_input(struct net_device *dev, int count, static void dayna_block_output(struct net_device *dev, int count, const unsigned char *buf, int start_page); -#define memcpy_fromio(a, b, c) memcpy((a), (void *)(b), (c)) -#define memcpy_toio(a, b, c) memcpy((void *)(a), (b), (c)) - -#define memcmp_withio(a, b, c) memcmp((a), (void *)(b), (c)) - /* Slow Sane (16-bit chunk memory read/write) Cabletron uses this */ static void slow_sane_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr, int ring_page); @@ -244,19 +239,26 @@ static enum mac8390_type __init mac8390_ident(struct nubus_dev *dev) static enum mac8390_access __init mac8390_testio(volatile unsigned long membase) { - unsigned long outdata = 0xA5A0B5B0; - unsigned long indata = 0x00000000; + u32 outdata = 0xA5A0B5B0; + u32 indata = 0; + /* Try writing 32 bits */ - memcpy_toio(membase, &outdata, 4); - /* Now compare them */ - if (memcmp_withio(&outdata, membase, 4) == 0) + nubus_writel(outdata, membase); + /* Now read it back */ + indata = nubus_readl(membase); + if (outdata == indata) return ACCESS_32; + + outdata = 0xC5C0D5D0; + indata = 0; + /* Write 16 bit output */ word_memcpy_tocard(membase, &outdata, 4); /* Now read it back */ word_memcpy_fromcard(&indata, membase, 4); if (outdata == indata) return ACCESS_16; + return ACCESS_UNKNOWN; } @@ -742,7 +744,7 @@ static void sane_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr, int ring_page) { unsigned long hdr_start = (ring_page - WD_START_PG)<<8; - memcpy_fromio(hdr, dev->mem_start + hdr_start, 4); + memcpy_fromio(hdr, (void __iomem *)dev->mem_start + hdr_start, 4); /* Fix endianness */ hdr->count = swab16(hdr->count); } @@ -756,13 +758,16 @@ static void sane_block_input(struct net_device *dev, int count, if (xfer_start + count > ei_status.rmem_end) { /* We must wrap the input move. */ int semi_count = ei_status.rmem_end - xfer_start; - memcpy_fromio(skb->data, dev->mem_start + xfer_base, + memcpy_fromio(skb->data, + (void __iomem *)dev->mem_start + xfer_base, semi_count); count -= semi_count; - memcpy_fromio(skb->data + semi_count, ei_status.rmem_start, - count); + memcpy_fromio(skb->data + semi_count, + (void __iomem *)ei_status.rmem_start, count); } else { - memcpy_fromio(skb->data, dev->mem_start + xfer_base, count); + memcpy_fromio(skb->data, + (void __iomem *)dev->mem_start + xfer_base, + count); } } @@ -771,7 +776,7 @@ static void sane_block_output(struct net_device *dev, int count, { long shmem = (start_page - WD_START_PG)<<8; - memcpy_toio(dev->mem_start + shmem, buf, count); + memcpy_toio((void __iomem *)dev->mem_start + shmem, buf, count); } /* dayna block input/output */ diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c index 4332ebbd7162..39f4b38fd068 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c @@ -2381,6 +2381,7 @@ static void *__vxge_hw_blockpool_malloc(struct __vxge_hw_device *devh, u32 size, vxge_os_dma_free(devh->pdev, memblock, &dma_object->acc_handle); status = VXGE_HW_ERR_OUT_OF_MEMORY; + memblock = NULL; goto exit; } diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 47eba38ae526..75cc676a3239 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -16,7 +16,6 @@ #include #include -#include #include #include #include @@ -571,8 +570,6 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, break; case VIRTIO_NET_HDR_GSO_UDP: gso_type = SKB_GSO_UDP; - if (skb->protocol == htons(ETH_P_IPV6)) - ipv6_proxy_select_ident(skb); break; default: return -EINVAL; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 7a6d2f8c5201..77fcb1bd81bc 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1072,7 +1072,10 @@ int genphy_soft_reset(struct phy_device *phydev) { int ret; - ret = phy_write(phydev, MII_BMCR, BMCR_RESET); + ret = phy_read(phydev, MII_BMCR); + if (ret < 0) + return ret; + ret = phy_write(phydev, MII_BMCR, ret | BMCR_RESET); if (ret < 0) return ret; diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 5dd0fe1635b9..a97b20773508 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -284,7 +284,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) nf_reset(skb); skb->ip_summed = CHECKSUM_NONE; - ip_select_ident(skb, NULL); + ip_select_ident(sock_net(sk), skb, NULL); ip_send_check(iph); ip_local_out(skb); diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index b52eabc168a0..f8234b5439f5 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -153,7 +153,7 @@ slhc_init(int rslots, int tslots) void slhc_free(struct slcompress *comp) { - if ( comp == NULLSLCOMPR ) + if ( IS_ERR_OR_NULL(comp) ) return; if ( comp->tstate != NULLSLSTATE ) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index d6511041c766..8d408cd7c170 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1116,6 +1116,12 @@ static int team_port_add(struct team *team, struct net_device *port_dev) return -EINVAL; } + if (netdev_has_upper_dev(dev, port_dev)) { + netdev_err(dev, "Device %s is already an upper device of the team interface\n", + portname); + return -EBUSY; + } + if (port_dev->features & NETIF_F_VLAN_CHALLENGED && vlan_uses_dev(dev)) { netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n", diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 48ac45f26fa3..1bbcb278e6df 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -65,7 +65,6 @@ #include #include #include -#include #include #include #include @@ -1143,8 +1142,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, break; } - skb_reset_network_header(skb); - if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) { pr_debug("GSO!\n"); switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -1156,8 +1153,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, break; case VIRTIO_NET_HDR_GSO_UDP: skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - if (skb->protocol == htons(ETH_P_IPV6)) - ipv6_proxy_select_ident(skb); break; default: tun->dev->stats.rx_frame_errors++; @@ -1187,6 +1182,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; } + skb_reset_network_header(skb); skb_probe_transport_header(skb, 0); rxhash = skb_get_hash(skb); diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index d13f25cd70d5..91aebb03841e 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -666,7 +666,6 @@ enum rt2x00_state_flags { CONFIG_CHANNEL_HT40, CONFIG_POWERSAVING, CONFIG_HT_DISABLED, - CONFIG_QOS_DISABLED, /* * Mark we currently are sequentially reading TX_STA_FIFO register diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index 004dff9b962d..e7c152dd0bef 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -682,18 +682,8 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw, rt2x00dev->intf_associated--; rt2x00leds_led_assoc(rt2x00dev, !!rt2x00dev->intf_associated); - - clear_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags); } - /* - * Check for access point which do not support 802.11e . We have to - * generate data frames sequence number in S/W for such AP, because - * of H/W bug. - */ - if (changes & BSS_CHANGED_QOS && !bss_conf->qos) - set_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags); - /* * When the erp information has changed, we should perform * additional configuration steps. For all other changes we are done. diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c index 22d49d575d3f..8705092a052e 100644 --- a/drivers/net/wireless/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/rt2x00/rt2x00queue.c @@ -201,15 +201,18 @@ static void rt2x00queue_create_tx_descriptor_seq(struct rt2x00_dev *rt2x00dev, if (!test_bit(REQUIRE_SW_SEQNO, &rt2x00dev->cap_flags)) { /* * rt2800 has a H/W (or F/W) bug, device incorrectly increase - * seqno on retransmited data (non-QOS) frames. To workaround - * the problem let's generate seqno in software if QOS is - * disabled. + * seqno on retransmitted data (non-QOS) and management frames. + * To workaround the problem let's generate seqno in software. + * Except for beacons which are transmitted periodically by H/W + * hence hardware has to assign seqno for them. */ - if (test_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags)) - __clear_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags); - else + if (ieee80211_is_beacon(hdr->frame_control)) { + __set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags); /* H/W will generate sequence number */ return; + } + + __clear_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags); } /* diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index f5d1f51101cf..57cc4e7d2bbc 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3514,6 +3514,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9123, /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c14 */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9130, quirk_dma_func1_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9170, + quirk_dma_func1_alias); /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c47 + c57 */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9172, quirk_dma_func1_alias); diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 21a6f6d76bbb..3a62c049696e 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -652,6 +652,20 @@ static void zfcp_erp_strategy_memwait(struct zfcp_erp_action *erp_action) add_timer(&erp_action->timer); } +void zfcp_erp_port_forced_reopen_all(struct zfcp_adapter *adapter, + int clear, char *dbftag) +{ + unsigned long flags; + struct zfcp_port *port; + + write_lock_irqsave(&adapter->erp_lock, flags); + read_lock(&adapter->port_list_lock); + list_for_each_entry(port, &adapter->port_list, list) + _zfcp_erp_port_forced_reopen(port, clear, dbftag); + read_unlock(&adapter->port_list_lock); + write_unlock_irqrestore(&adapter->erp_lock, flags); +} + static void _zfcp_erp_port_reopen_all(struct zfcp_adapter *adapter, int clear, char *id) { @@ -1313,6 +1327,9 @@ static void zfcp_erp_try_rport_unblock(struct zfcp_port *port) struct zfcp_scsi_dev *zsdev = sdev_to_zfcp(sdev); int lun_status; + if (sdev->sdev_state == SDEV_DEL || + sdev->sdev_state == SDEV_CANCEL) + continue; if (zsdev->port != port) continue; /* LUN under port of interest */ diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index c5023d6811a8..cfe2a3b2d3b2 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -68,6 +68,8 @@ extern void zfcp_erp_clear_port_status(struct zfcp_port *, u32); extern int zfcp_erp_port_reopen(struct zfcp_port *, int, char *); extern void zfcp_erp_port_shutdown(struct zfcp_port *, int, char *); extern void zfcp_erp_port_forced_reopen(struct zfcp_port *, int, char *); +extern void zfcp_erp_port_forced_reopen_all(struct zfcp_adapter *adapter, + int clear, char *dbftag); extern void zfcp_erp_set_lun_status(struct scsi_device *, u32); extern void zfcp_erp_clear_lun_status(struct scsi_device *, u32); extern void zfcp_erp_lun_reopen(struct scsi_device *, int, char *); diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index f213786fe96c..9e9f0ad00bef 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -347,6 +347,10 @@ static int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt) struct zfcp_adapter *adapter = zfcp_sdev->port->adapter; int ret = SUCCESS, fc_ret; + if (!(adapter->connection_features & FSF_FEATURE_NPIV_MODE)) { + zfcp_erp_port_forced_reopen_all(adapter, 0, "schrh_p"); + zfcp_erp_wait(adapter); + } zfcp_erp_adapter_reopen(adapter, 0, "schrh_1"); zfcp_erp_wait(adapter); fc_ret = fc_block_scsi_eh(scpnt); diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 7d5d66edae87..65826111d996 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -47,17 +47,16 @@ static void smp_task_timedout(unsigned long _task) unsigned long flags; spin_lock_irqsave(&task->task_state_lock, flags); - if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) + if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { task->task_state_flags |= SAS_TASK_STATE_ABORTED; + complete(&task->slow_task->completion); + } spin_unlock_irqrestore(&task->task_state_lock, flags); - - complete(&task->slow_task->completion); } static void smp_task_done(struct sas_task *task) { - if (!del_timer(&task->slow_task->timer)) - return; + del_timer(&task->slow_task->timer); complete(&task->slow_task->completion); } diff --git a/drivers/staging/comedi/drivers/vmk80xx.c b/drivers/staging/comedi/drivers/vmk80xx.c index 0adf3cffddb0..87b6e1aa107a 100644 --- a/drivers/staging/comedi/drivers/vmk80xx.c +++ b/drivers/staging/comedi/drivers/vmk80xx.c @@ -757,10 +757,8 @@ static int vmk80xx_alloc_usb_buffers(struct comedi_device *dev) size = le16_to_cpu(devpriv->ep_tx->wMaxPacketSize); devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL); - if (!devpriv->usb_tx_buf) { - kfree(devpriv->usb_rx_buf); + if (!devpriv->usb_tx_buf) return -ENOMEM; - } return 0; } @@ -872,6 +870,8 @@ static int vmk80xx_auto_attach(struct comedi_device *dev, devpriv->model = boardinfo->model; + sema_init(&devpriv->limit_sem, 8); + ret = vmk80xx_find_usb_endpoints(dev); if (ret) return ret; @@ -880,8 +880,6 @@ static int vmk80xx_auto_attach(struct comedi_device *dev, if (ret) return ret; - sema_init(&devpriv->limit_sem, 8); - usb_set_intfdata(intf, devpriv); if (devpriv->model == VMK8061_MODEL) { diff --git a/drivers/staging/iio/meter/ade7854.c b/drivers/staging/iio/meter/ade7854.c index bcfdb650c2c5..c318e263c1bb 100644 --- a/drivers/staging/iio/meter/ade7854.c +++ b/drivers/staging/iio/meter/ade7854.c @@ -269,7 +269,7 @@ static IIO_DEV_ATTR_VPEAK(S_IWUSR | S_IRUGO, static IIO_DEV_ATTR_IPEAK(S_IWUSR | S_IRUGO, ade7854_read_32bit, ade7854_write_32bit, - ADE7854_VPEAK); + ADE7854_IPEAK); static IIO_DEV_ATTR_APHCAL(S_IWUSR | S_IRUGO, ade7854_read_16bit, ade7854_write_16bit, diff --git a/drivers/staging/rtl8712/rtl8712_cmd.c b/drivers/staging/rtl8712/rtl8712_cmd.c index 8ca7d7e68dca..2a0225900d89 100644 --- a/drivers/staging/rtl8712/rtl8712_cmd.c +++ b/drivers/staging/rtl8712/rtl8712_cmd.c @@ -155,19 +155,11 @@ static u8 write_macreg_hdl(struct _adapter *padapter, u8 *pbuf) static u8 read_bbreg_hdl(struct _adapter *padapter, u8 *pbuf) { - u32 val; - void (*pcmd_callback)(struct _adapter *dev, struct cmd_obj *pcmd); struct readBB_parm *prdbbparm; struct cmd_obj *pcmd = (struct cmd_obj *)pbuf; prdbbparm = (struct readBB_parm *)pcmd->parmbuf; - if (pcmd->rsp && pcmd->rspsz > 0) - memcpy(pcmd->rsp, (u8 *)&val, pcmd->rspsz); - pcmd_callback = cmd_callback[pcmd->cmdcode].callback; - if (pcmd_callback == NULL) - r8712_free_cmd_obj(pcmd); - else - pcmd_callback(padapter, pcmd); + r8712_free_cmd_obj(pcmd); return H2C_SUCCESS; } diff --git a/drivers/staging/rtl8712/rtl8712_cmd.h b/drivers/staging/rtl8712/rtl8712_cmd.h index 039ab3e97172..efa2fc98907f 100644 --- a/drivers/staging/rtl8712/rtl8712_cmd.h +++ b/drivers/staging/rtl8712/rtl8712_cmd.h @@ -152,7 +152,7 @@ enum rtl8712_h2c_cmd { static struct _cmd_callback cmd_callback[] = { {GEN_CMD_CODE(_Read_MACREG), NULL}, /*0*/ {GEN_CMD_CODE(_Write_MACREG), NULL}, - {GEN_CMD_CODE(_Read_BBREG), &r8712_getbbrfreg_cmdrsp_callback}, + {GEN_CMD_CODE(_Read_BBREG), NULL}, {GEN_CMD_CODE(_Write_BBREG), NULL}, {GEN_CMD_CODE(_Read_RFREG), &r8712_getbbrfreg_cmdrsp_callback}, {GEN_CMD_CODE(_Write_RFREG), NULL}, /*5*/ diff --git a/drivers/staging/speakup/speakup_soft.c b/drivers/staging/speakup/speakup_soft.c index 9ed726509261..c5bf6b1aa80e 100644 --- a/drivers/staging/speakup/speakup_soft.c +++ b/drivers/staging/speakup/speakup_soft.c @@ -213,10 +213,13 @@ static ssize_t softsynth_read(struct file *fp, char __user *buf, size_t count, DEFINE_WAIT(wait); spin_lock_irqsave(&speakup_info.spinlock, flags); + synth_soft.alive = 1; while (1) { prepare_to_wait(&speakup_event, &wait, TASK_INTERRUPTIBLE); - if (!synth_buffer_empty() || speakup_info.flushing) - break; + if (synth_current() == &synth_soft) { + if (!synth_buffer_empty() || speakup_info.flushing) + break; + } spin_unlock_irqrestore(&speakup_info.spinlock, flags); if (fp->f_flags & O_NONBLOCK) { finish_wait(&speakup_event, &wait); @@ -234,6 +237,8 @@ static ssize_t softsynth_read(struct file *fp, char __user *buf, size_t count, cp = buf; init = get_initstring(); while (chars_sent < count) { + if (synth_current() != &synth_soft) + break; if (speakup_info.flushing) { speakup_info.flushing = 0; ch = '\x18'; @@ -286,7 +291,8 @@ static unsigned int softsynth_poll(struct file *fp, poll_wait(fp, &speakup_event, wait); spin_lock_irqsave(&speakup_info.spinlock, flags); - if (!synth_buffer_empty() || speakup_info.flushing) + if (synth_current() == &synth_soft && + (!synth_buffer_empty() || speakup_info.flushing)) ret = POLLIN | POLLRDNORM; spin_unlock_irqrestore(&speakup_info.spinlock, flags); return ret; diff --git a/drivers/staging/speakup/spk_priv.h b/drivers/staging/speakup/spk_priv.h index 637ba6760ec0..b669021455dd 100644 --- a/drivers/staging/speakup/spk_priv.h +++ b/drivers/staging/speakup/spk_priv.h @@ -72,6 +72,7 @@ extern int synth_request_region(u_long, u_long); extern int synth_release_region(u_long, u_long); extern int synth_add(struct spk_synth *in_synth); extern void synth_remove(struct spk_synth *in_synth); +struct spk_synth *synth_current(void); extern struct speakup_info_t speakup_info; diff --git a/drivers/staging/speakup/synth.c b/drivers/staging/speakup/synth.c index 172cf62b1aaf..1219089af4b5 100644 --- a/drivers/staging/speakup/synth.c +++ b/drivers/staging/speakup/synth.c @@ -475,4 +475,10 @@ void synth_remove(struct spk_synth *in_synth) } EXPORT_SYMBOL_GPL(synth_remove); +struct spk_synth *synth_current(void) +{ + return synth; +} +EXPORT_SYMBOL_GPL(synth_current); + short spk_punc_masks[] = { 0, SOME, MOST, PUNC, PUNC|B_SYM }; diff --git a/drivers/staging/usbip/stub_rx.c b/drivers/staging/usbip/stub_rx.c index 2ed1118d3d8b..7f10bc79a719 100644 --- a/drivers/staging/usbip/stub_rx.c +++ b/drivers/staging/usbip/stub_rx.c @@ -375,16 +375,10 @@ static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu) } if (usb_endpoint_xfer_isoc(epd)) { - /* validate packet size and number of packets */ - unsigned int maxp, packets, bytes; - - maxp = usb_endpoint_maxp(epd); - maxp *= usb_endpoint_maxp_mult(epd); - bytes = pdu->u.cmd_submit.transfer_buffer_length; - packets = DIV_ROUND_UP(bytes, maxp); - + /* validate number of packets */ if (pdu->u.cmd_submit.number_of_packets < 0 || - pdu->u.cmd_submit.number_of_packets > packets) { + pdu->u.cmd_submit.number_of_packets > + USBIP_MAX_ISO_PACKETS) { dev_err(&sdev->udev->dev, "CMD_SUBMIT: isoc invalid num packets %d\n", pdu->u.cmd_submit.number_of_packets); diff --git a/drivers/staging/usbip/usbip_common.h b/drivers/staging/usbip/usbip_common.h index 58787c49fb68..8b358911563b 100644 --- a/drivers/staging/usbip/usbip_common.h +++ b/drivers/staging/usbip/usbip_common.h @@ -134,6 +134,13 @@ extern struct device_attribute dev_attr_usbip_debug; #define USBIP_DIR_OUT 0x00 #define USBIP_DIR_IN 0x01 +/* + * Arbitrary limit for the maximum number of isochronous packets in an URB, + * compare for example the uhci_submit_isochronous function in + * drivers/usb/host/uhci-q.c + */ +#define USBIP_MAX_ISO_PACKETS 1024 + /** * struct usbip_header_basic - data pertinent to every request * @command: the usbip request type diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index bc1c68c926bd..d8676716467b 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -1045,6 +1045,10 @@ static int atmel_prepare_rx_dma(struct uart_port *port) sg_dma_len(&atmel_port->sg_rx)/2, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT); + if (!desc) { + dev_err(port->dev, "Preparing DMA cyclic failed\n"); + goto chan_err; + } desc->callback = atmel_complete_rx_dma; desc->callback_param = port; atmel_port->desc_rx = desc; diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index ba285cd45b59..8ccab7cf437c 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1324,6 +1324,8 @@ static int max310x_spi_probe(struct spi_device *spi) if (spi->dev.of_node) { const struct of_device_id *of_id = of_match_device(max310x_dt_ids, &spi->dev); + if (!of_id) + return -ENODEV; devtype = (struct max310x_devtype *)of_id->data; } else { diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c index 8f6d6b5a2eeb..acacce37ec66 100644 --- a/drivers/tty/serial/mxs-auart.c +++ b/drivers/tty/serial/mxs-auart.c @@ -1075,6 +1075,10 @@ static int mxs_auart_probe(struct platform_device *pdev) s->port.mapbase = r->start; s->port.membase = ioremap(r->start, resource_size(r)); + if (!s->port.membase) { + ret = -ENOMEM; + goto out_free_clk; + } s->port.ops = &mxs_auart_ops; s->port.iotype = UPIO_MEM; s->port.fifosize = MXS_AUART_FIFO_SIZE; diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index e2224213111c..0dc1d1ac4a9a 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -633,19 +633,9 @@ static void sci_transmit_chars(struct uart_port *port) if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); - if (uart_circ_empty(xmit)) { + if (uart_circ_empty(xmit)) sci_stop_tx(port); - } else { - ctrl = serial_port_in(port, SCSCR); - - if (port->type != PORT_SCI) { - serial_port_in(port, SCxSR); /* Dummy read */ - serial_port_out(port, SCxSR, SCxSR_TDxE_CLEAR(port)); - } - ctrl |= SCSCR_TIE; - serial_port_out(port, SCSCR, ctrl); - } } /* On SH3, SCIF may read end-of-break as a space->mark char */ diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index d002c23af6f4..71dfbff5839d 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -483,11 +483,6 @@ static int usb_unbind_interface(struct device *dev) pm_runtime_disable(dev); pm_runtime_set_suspended(dev); - /* Undo any residual pm_autopm_get_interface_* calls */ - for (r = atomic_read(&intf->pm_usage_cnt); r > 0; --r) - usb_autopm_put_interface_no_suspend(intf); - atomic_set(&intf->pm_usage_cnt, 0); - if (!error) usb_autosuspend_device(udev); @@ -1638,7 +1633,6 @@ void usb_autopm_put_interface(struct usb_interface *intf) int status; usb_mark_last_busy(udev); - atomic_dec(&intf->pm_usage_cnt); status = pm_runtime_put_sync(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), @@ -1667,7 +1661,6 @@ void usb_autopm_put_interface_async(struct usb_interface *intf) int status; usb_mark_last_busy(udev); - atomic_dec(&intf->pm_usage_cnt); status = pm_runtime_put(&intf->dev); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), @@ -1689,7 +1682,6 @@ void usb_autopm_put_interface_no_suspend(struct usb_interface *intf) struct usb_device *udev = interface_to_usbdev(intf); usb_mark_last_busy(udev); - atomic_dec(&intf->pm_usage_cnt); pm_runtime_put_noidle(&intf->dev); } EXPORT_SYMBOL_GPL(usb_autopm_put_interface_no_suspend); @@ -1720,8 +1712,6 @@ int usb_autopm_get_interface(struct usb_interface *intf) status = pm_runtime_get_sync(&intf->dev); if (status < 0) pm_runtime_put_sync(&intf->dev); - else - atomic_inc(&intf->pm_usage_cnt); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), status); @@ -1755,8 +1745,6 @@ int usb_autopm_get_interface_async(struct usb_interface *intf) status = pm_runtime_get(&intf->dev); if (status < 0 && status != -EINPROGRESS) pm_runtime_put_noidle(&intf->dev); - else - atomic_inc(&intf->pm_usage_cnt); dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n", __func__, atomic_read(&intf->dev.power.usage_count), status); @@ -1780,7 +1768,6 @@ void usb_autopm_get_interface_no_resume(struct usb_interface *intf) struct usb_device *udev = interface_to_usbdev(intf); usb_mark_last_busy(udev); - atomic_inc(&intf->pm_usage_cnt); pm_runtime_get_noresume(&intf->dev); } EXPORT_SYMBOL_GPL(usb_autopm_get_interface_no_resume); diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 6af648c307b7..f7019c7e9bc5 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -822,9 +822,11 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size) if (dev->state == USB_STATE_SUSPENDED) return -EHOSTUNREACH; - if (size <= 0 || !buf || !index) + if (size <= 0 || !buf) return -EINVAL; buf[0] = 0; + if (index <= 0 || index >= 256) + return -EINVAL; tbuf = kmalloc(256, GFP_NOIO); if (!tbuf) return -ENOMEM; diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index fd697bba07ff..dc3270e48dfb 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1199,20 +1199,25 @@ int xhci_bus_suspend(struct usb_hcd *hcd) port_index = max_ports; while (port_index--) { u32 t1, t2; - + int retries = 10; +retry: t1 = readl(port_array[port_index]); t2 = xhci_port_state_to_neutral(t1); portsc_buf[port_index] = 0; - /* Bail out if a USB3 port has a new device in link training */ - if ((hcd->speed >= HCD_USB3) && + /* + * Give a USB3 port in link training time to finish, but don't + * prevent suspend as port might be stuck + */ + if ((hcd->speed >= HCD_USB3) && retries-- && (t1 & PORT_PLS_MASK) == XDEV_POLLING) { - bus_state->bus_suspended = 0; spin_unlock_irqrestore(&xhci->lock, flags); - xhci_dbg(xhci, "Bus suspend bailout, port in polling\n"); - return -EBUSY; + msleep(XHCI_PORT_POLLING_LFPS_TIME); + spin_lock_irqsave(&xhci->lock, flags); + xhci_dbg(xhci, "port %d polling in bus suspend, waiting\n", + port_index); + goto retry; } - /* suspend ports in U0, or bail out for new connect changes */ if ((t1 & PORT_PE) && (t1 & PORT_PLS_MASK) == XDEV_U0) { if ((t1 & PORT_CSC) && wake_enabled) { diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index feb702516470..bd191fbfdf40 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -413,6 +413,14 @@ struct xhci_op_regs { */ #define XHCI_DEFAULT_BESL 4 +/* + * USB3 specification define a 360ms tPollingLFPSTiemout for USB3 ports + * to complete link training. usually link trainig completes much faster + * so check status 10 times with 36ms sleep in places we need to wait for + * polling to complete. + */ +#define XHCI_PORT_POLLING_LFPS_TIME 36 + /** * struct xhci_intr_reg - Interrupt Register Set * @irq_pending: IMAN - Interrupt Management Register. Used to enable diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 3114c8d061e9..1c9d08157708 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -332,6 +332,7 @@ static void yurex_disconnect(struct usb_interface *interface) usb_deregister_dev(interface, &yurex_class); /* prevent more I/O from starting */ + usb_poison_urb(dev->urb); mutex_lock(&dev->io_mutex); dev->interface = NULL; mutex_unlock(&dev->io_mutex); diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index b60dac48587c..3e22dd294879 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -76,6 +76,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x804E) }, /* Software Bisque Paramount ME build-in converter */ { USB_DEVICE(0x10C4, 0x8053) }, /* Enfora EDG1228 */ { USB_DEVICE(0x10C4, 0x8054) }, /* Enfora GSM2228 */ + { USB_DEVICE(0x10C4, 0x8056) }, /* Lorenz Messtechnik devices */ { USB_DEVICE(0x10C4, 0x8066) }, /* Argussoft In-System Programmer */ { USB_DEVICE(0x10C4, 0x806F) }, /* IMS USB to RS422 Converter Cable */ { USB_DEVICE(0x10C4, 0x807A) }, /* Crumb128 board */ diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index aa2bec8687fd..3f89153bc122 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -617,6 +617,8 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLXM_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLX_PLUS_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_NT_ORION_IO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SYNAPSE_SS200_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index ecc2424eb8e0..5258cec99219 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -566,7 +566,9 @@ /* * NovaTech product ids (FTDI_VID) */ -#define FTDI_NT_ORIONLXM_PID 0x7c90 /* OrionLXm Substation Automation Platform */ +#define FTDI_NT_ORIONLXM_PID 0x7c90 /* OrionLXm Substation Automation Platform */ +#define FTDI_NT_ORIONLX_PLUS_PID 0x7c91 /* OrionLX+ Substation Automation Platform */ +#define FTDI_NT_ORION_IO_PID 0x7c92 /* Orion I/O */ /* * Synapse Wireless product ids (FTDI_VID) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 56c4f6d074ca..454898c4a137 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -362,8 +362,6 @@ static int write_parport_reg_nonblock(struct mos7715_parport *mos_parport, if (!urbtrack) return -ENOMEM; - kref_get(&mos_parport->ref_count); - urbtrack->mos_parport = mos_parport; urbtrack->urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urbtrack->urb) { kfree(urbtrack); @@ -384,6 +382,8 @@ static int write_parport_reg_nonblock(struct mos7715_parport *mos_parport, usb_sndctrlpipe(usbdev, 0), (unsigned char *)urbtrack->setup, NULL, 0, async_complete, urbtrack); + kref_get(&mos_parport->ref_count); + urbtrack->mos_parport = mos_parport; kref_init(&urbtrack->ref_count); INIT_LIST_HEAD(&urbtrack->urblist_entry); diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c index 281be56d5648..56bfef517f68 100644 --- a/drivers/usb/storage/realtek_cr.c +++ b/drivers/usb/storage/realtek_cr.c @@ -767,18 +767,16 @@ static void rts51x_suspend_timer_fn(unsigned long data) break; case RTS51X_STAT_IDLE: case RTS51X_STAT_SS: - usb_stor_dbg(us, "RTS51X_STAT_SS, intf->pm_usage_cnt:%d, power.usage:%d\n", - atomic_read(&us->pusb_intf->pm_usage_cnt), + usb_stor_dbg(us, "RTS51X_STAT_SS, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); - if (atomic_read(&us->pusb_intf->pm_usage_cnt) > 0) { + if (atomic_read(&us->pusb_intf->dev.power.usage_count) > 0) { usb_stor_dbg(us, "Ready to enter SS state\n"); rts51x_set_stat(chip, RTS51X_STAT_SS); /* ignore mass storage interface's children */ pm_suspend_ignore_children(&us->pusb_intf->dev, true); usb_autopm_put_interface_async(us->pusb_intf); - usb_stor_dbg(us, "RTS51X_STAT_SS 01, intf->pm_usage_cnt:%d, power.usage:%d\n", - atomic_read(&us->pusb_intf->pm_usage_cnt), + usb_stor_dbg(us, "RTS51X_STAT_SS 01, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); } break; @@ -811,11 +809,10 @@ static void rts51x_invoke_transport(struct scsi_cmnd *srb, struct us_data *us) int ret; if (working_scsi(srb)) { - usb_stor_dbg(us, "working scsi, intf->pm_usage_cnt:%d, power.usage:%d\n", - atomic_read(&us->pusb_intf->pm_usage_cnt), + usb_stor_dbg(us, "working scsi, power.usage:%d\n", atomic_read(&us->pusb_intf->dev.power.usage_count)); - if (atomic_read(&us->pusb_intf->pm_usage_cnt) <= 0) { + if (atomic_read(&us->pusb_intf->dev.power.usage_count) <= 0) { ret = usb_autopm_get_interface(us->pusb_intf); usb_stor_dbg(us, "working scsi, ret=%d\n", ret); } diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index f544cfaa0a10..3f16c299263f 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -39,6 +39,12 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" * Using this limit prevents one virtqueue from starving others. */ #define VHOST_NET_WEIGHT 0x80000 +/* Max number of packets transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving others with small + * pkts. + */ +#define VHOST_NET_PKT_WEIGHT 256 + /* MAX number of TX used buffers for outstanding zerocopy */ #define VHOST_MAX_PEND 128 #define VHOST_GOODCOPY_LEN 256 @@ -351,6 +357,7 @@ static void handle_tx(struct vhost_net *net) struct socket *sock; struct vhost_net_ubuf_ref *uninitialized_var(ubufs); bool zcopy, zcopy_used; + int sent_pkts = 0; mutex_lock(&vq->mutex); sock = vq->private_data; @@ -362,7 +369,7 @@ static void handle_tx(struct vhost_net *net) hdr_size = nvq->vhost_hlen; zcopy = nvq->ubufs; - for (;;) { + do { /* Release DMAs done buffers first */ if (zcopy) vhost_zerocopy_signal_used(net, vq); @@ -450,11 +457,7 @@ static void handle_tx(struct vhost_net *net) vhost_zerocopy_signal_used(net, vq); total_len += len; vhost_net_tx_packet(net); - if (unlikely(total_len >= VHOST_NET_WEIGHT)) { - vhost_poll_queue(&vq->poll); - break; - } - } + } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); out: mutex_unlock(&vq->mutex); } @@ -575,6 +578,7 @@ static void handle_rx(struct vhost_net *net) size_t vhost_hlen, sock_hlen; size_t vhost_len, sock_len; struct socket *sock; + int recv_pkts = 0; mutex_lock(&vq->mutex); sock = vq->private_data; @@ -589,7 +593,10 @@ static void handle_rx(struct vhost_net *net) vq->log : NULL; mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); - while ((sock_len = peek_head_len(sock->sk))) { + do { + sock_len = peek_head_len(sock->sk); + if (!sock_len) + break; sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; headcount = get_rx_bufs(vq, vq->heads, vhost_len, @@ -659,11 +666,8 @@ static void handle_rx(struct vhost_net *net) if (unlikely(vq_log)) vhost_log_write(vq, vq_log, log, vhost_len); total_len += vhost_len; - if (unlikely(total_len >= VHOST_NET_WEIGHT)) { - vhost_poll_queue(&vq->poll); - break; - } - } + } while (likely(!vhost_exceeds_weight(vq, ++recv_pkts, total_len))); + out: mutex_unlock(&vq->mutex); } @@ -732,7 +736,8 @@ static int vhost_net_open(struct inode *inode, struct file *f) n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; } - vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); + vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, + VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT); vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 0dfb3fd6e836..498de4bfcd60 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -60,6 +60,12 @@ #define TCM_VHOST_PREALLOC_UPAGES 2048 #define TCM_VHOST_PREALLOC_PROT_SGLS 512 +/* Max number of requests before requeueing the job. + * Using this limit prevents one virtqueue from starving others with + * request. + */ +#define VHOST_SCSI_WEIGHT 256 + struct vhost_scsi_inflight { /* Wait for the flush operation to finish */ struct completion comp; @@ -992,7 +998,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) u64 tag; u32 exp_data_len, data_first, data_num, data_direction, prot_first; unsigned out, in, i; - int head, ret, data_niov, prot_niov, prot_bytes; + int head, ret, data_niov, prot_niov, prot_bytes, c = 0; size_t req_size; u16 lun; u8 *target, *lunp, task_attr; @@ -1010,7 +1016,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) vhost_disable_notify(&vs->dev, vq); - for (;;) { + do { head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), &out, &in, NULL, NULL); @@ -1213,7 +1219,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) */ INIT_WORK(&cmd->work, tcm_vhost_submission_work); queue_work(tcm_vhost_workqueue, &cmd->work); - } + } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); mutex_unlock(&vq->mutex); return; @@ -1576,7 +1582,8 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) vqs[i] = &vs->vqs[i].vq; vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; } - vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ); + vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, + VHOST_SCSI_WEIGHT, 0); tcm_vhost_init_inflight(vs, NULL); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 337e1d04871f..48507ef0542a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -292,8 +292,24 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev) vhost_vq_free_iovecs(dev->vqs[i]); } +bool vhost_exceeds_weight(struct vhost_virtqueue *vq, + int pkts, int total_len) +{ + struct vhost_dev *dev = vq->dev; + + if ((dev->byte_weight && total_len >= dev->byte_weight) || + pkts >= dev->weight) { + vhost_poll_queue(&vq->poll); + return true; + } + + return false; +} +EXPORT_SYMBOL_GPL(vhost_exceeds_weight); + void vhost_dev_init(struct vhost_dev *dev, - struct vhost_virtqueue **vqs, int nvqs) + struct vhost_virtqueue **vqs, int nvqs, + int weight, int byte_weight) { struct vhost_virtqueue *vq; int i; @@ -308,6 +324,8 @@ void vhost_dev_init(struct vhost_dev *dev, spin_lock_init(&dev->work_lock); INIT_LIST_HEAD(&dev->work_list); dev->worker = NULL; + dev->weight = weight; + dev->byte_weight = byte_weight; for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 3eda654b8f5a..6c4c8aade233 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -123,9 +123,13 @@ struct vhost_dev { spinlock_t work_lock; struct list_head work_list; struct task_struct *worker; + int weight; + int byte_weight; }; -void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); +bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len); +void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, + int nvqs, int weight, int byte_weight); long vhost_dev_set_owner(struct vhost_dev *dev); bool vhost_dev_has_owner(struct vhost_dev *dev); long vhost_dev_check_owner(struct vhost_dev *); diff --git a/drivers/w1/masters/ds2490.c b/drivers/w1/masters/ds2490.c index 176b88fa694c..ed420aa9216b 100644 --- a/drivers/w1/masters/ds2490.c +++ b/drivers/w1/masters/ds2490.c @@ -1041,15 +1041,15 @@ static int ds_probe(struct usb_interface *intf, /* alternative 3, 1ms interrupt (greatly speeds search), 64 byte bulk */ alt = 3; err = usb_set_interface(dev->udev, - intf->altsetting[alt].desc.bInterfaceNumber, alt); + intf->cur_altsetting->desc.bInterfaceNumber, alt); if (err) { dev_err(&dev->udev->dev, "Failed to set alternative setting %d " "for %d interface: err=%d.\n", alt, - intf->altsetting[alt].desc.bInterfaceNumber, err); + intf->cur_altsetting->desc.bInterfaceNumber, err); goto err_out_clear; } - iface_desc = &intf->altsetting[alt]; + iface_desc = intf->cur_altsetting; if (iface_desc->desc.bNumEndpoints != NUM_EP-1) { printk(KERN_INFO "Num endpoints=%d. It is not DS9490R.\n", iface_desc->desc.bNumEndpoints); err = -EINVAL; diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 8db7abb6ef5a..cdb055207753 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -502,8 +502,15 @@ static void balloon_process(struct work_struct *work) state = reserve_additional_memory(credit); } - if (credit < 0) - state = decrease_reservation(-credit, GFP_BALLOON); + if (credit < 0) { + long n_pages; + + n_pages = min(-credit, si_mem_available()); + state = decrease_reservation(n_pages, GFP_BALLOON); + if (state == BP_DONE && n_pages != -credit && + n_pages < totalreserve_pages) + state = BP_EAGAIN; + } state = update_schedule(state); @@ -561,6 +568,9 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem) enum bp_state st; if (page) balloon_append(page); + if (si_mem_available() < nr_pages) + return -ENOMEM; + st = decrease_reservation(nr_pages - pgno, highmem ? GFP_HIGHUSER : GFP_USER); if (st != BP_DONE) @@ -692,7 +702,7 @@ static int __init balloon_init(void) balloon_stats.schedule_delay = 1; balloon_stats.max_schedule_delay = 32; balloon_stats.retry_count = 1; - balloon_stats.max_retry_count = RETRY_UNLIMITED; + balloon_stats.max_retry_count = 4; #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG balloon_stats.hotplug_pages = 0; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index c2e930ec2888..696db173e26f 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1382,8 +1382,8 @@ static int afs_fs_setattr_size64(struct afs_server *server, struct key *key, xdr_encode_AFS_StoreStatus(&bp, attr); - *bp++ = 0; /* position of start of write */ - *bp++ = 0; + *bp++ = htonl(attr->ia_size >> 32); /* position of start of write */ + *bp++ = htonl((u32) attr->ia_size); *bp++ = 0; /* size of write */ *bp++ = 0; *bp++ = htonl(attr->ia_size >> 32); /* new file length */ @@ -1433,7 +1433,7 @@ static int afs_fs_setattr_size(struct afs_server *server, struct key *key, xdr_encode_AFS_StoreStatus(&bp, attr); - *bp++ = 0; /* position of start of write */ + *bp++ = htonl(attr->ia_size); /* position of start of write */ *bp++ = 0; /* size of write */ *bp++ = htonl(attr->ia_size); /* new file length */ diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 6db91cdbd92d..6a02cb3e5650 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -42,6 +42,8 @@ #include "extent_io.h" #include "extent_map.h" +static const char* const btrfs_compress_types[] = { "", "zlib", "lzo" }; + struct compressed_bio { /* number of bios pending for this compressed extent */ atomic_t pending_bios; @@ -81,6 +83,22 @@ struct compressed_bio { u32 sums; }; +bool btrfs_compress_is_valid_type(const char *str, size_t len) +{ + int i; + + for (i = 1; i < ARRAY_SIZE(btrfs_compress_types); i++) { + size_t comp_len = strlen(btrfs_compress_types[i]); + + if (len < comp_len) + continue; + + if (!strncmp(btrfs_compress_types[i], str, comp_len)) + return true; + } + return false; +} + static int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, struct bio_vec *bvec, int vcnt, size_t srclen); diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index d181f70caae0..5f15b34d88f7 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -80,4 +80,5 @@ struct btrfs_compress_op { extern struct btrfs_compress_op btrfs_zlib_compress; extern struct btrfs_compress_op btrfs_lzo_compress; +bool btrfs_compress_is_valid_type(const char *str, size_t len); #endif diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 129b1dd28527..8b041d83533a 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -22,6 +22,7 @@ #include "hash.h" #include "transaction.h" #include "xattr.h" +#include "compression.h" #define BTRFS_PROP_HANDLERS_HT_BITS 8 static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS); @@ -378,9 +379,7 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans, static int prop_compression_validate(const char *value, size_t len) { - if (!strncmp("lzo", value, len)) - return 0; - else if (!strncmp("zlib", value, len)) + if (btrfs_compress_is_valid_type(value, len)) return 0; return -EINVAL; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 5e2005030f54..bdda508cc85d 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1327,6 +1327,7 @@ void ceph_dentry_lru_del(struct dentry *dn) unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) { struct ceph_inode_info *dci = ceph_inode(dir); + unsigned hash; switch (dci->i_dir_layout.dl_dir_hash) { case 0: /* for backward compat */ @@ -1334,8 +1335,11 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) return dn->d_name.hash; default: - return ceph_str_hash(dci->i_dir_layout.dl_dir_hash, + spin_lock(&dn->d_lock); + hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash, dn->d_name.name, dn->d_name.len); + spin_unlock(&dn->d_lock); + return hash; } } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 23f5cb1ee904..ff85c3129e9d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1092,6 +1092,7 @@ cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file) } struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file); +void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_hdlr); void cifsFileInfo_put(struct cifsFileInfo *cifs_file); #define CIFS_CACHE_READ_FLG 1 @@ -1579,6 +1580,7 @@ GLOBAL_EXTERN spinlock_t gidsidlock; #endif /* CONFIG_CIFS_ACL */ void cifs_oplock_break(struct work_struct *work); +void cifs_queue_oplock_break(struct cifsFileInfo *cfile); extern const struct slow_work_ops cifs_oplock_break_ops; extern struct workqueue_struct *cifsiod_wq; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4b8870f889e3..9b3d28b79329 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -359,12 +359,30 @@ cifsFileInfo_get(struct cifsFileInfo *cifs_file) return cifs_file; } -/* - * Release a reference on the file private data. This may involve closing - * the filehandle out on the server. Must be called without holding - * tcon->open_file_lock and cifs_file->file_info_lock. +/** + * cifsFileInfo_put - release a reference of file priv data + * + * Always potentially wait for oplock handler. See _cifsFileInfo_put(). */ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) +{ + _cifsFileInfo_put(cifs_file, true); +} + +/** + * _cifsFileInfo_put - release a reference of file priv data + * + * This may involve closing the filehandle @cifs_file out on the + * server. Must be called without holding tcon->open_file_lock and + * cifs_file->file_info_lock. + * + * If @wait_for_oplock_handler is true and we are releasing the last + * reference, wait for any running oplock break handler of the file + * and cancel any pending one. If calling this function from the + * oplock break handler, you need to pass false. + * + */ +void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) { struct inode *inode = cifs_file->dentry->d_inode; struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); @@ -412,7 +430,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) spin_unlock(&tcon->open_file_lock); - oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break); + oplock_break_cancelled = wait_oplock_handler ? + cancel_work_sync(&cifs_file->oplock_break) : false; if (!tcon->need_reconnect && !cifs_file->invalidHandle) { struct TCP_Server_Info *server = tcon->ses->server; @@ -3701,6 +3720,7 @@ void cifs_oplock_break(struct work_struct *work) cinode); cifs_dbg(FYI, "Oplock release rc = %d\n", rc); } + _cifsFileInfo_put(cfile, false /* do not wait for ourself */); cifs_done_oplock_break(cinode); } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 09ca7c978700..f681091ec3db 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1627,6 +1627,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, if (rc == 0 || rc != -EBUSY) goto do_rename_exit; + /* Don't fall back to using SMB on SMB 2+ mount */ + if (server->vals->protocol_id != 0) + goto do_rename_exit; + /* open-file renames don't work across directories */ if (to_dentry->d_parent != from_dentry->d_parent) goto do_rename_exit; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index f03fecafc5d5..debc26b95cd7 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -477,8 +477,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &pCifsInode->flags); - queue_work(cifsoplockd_wq, - &netfile->oplock_break); + cifs_queue_oplock_break(netfile); netfile->oplock_break_cancelled = false; spin_unlock(&tcon->open_file_lock); @@ -610,6 +609,28 @@ void cifs_put_writer(struct cifsInodeInfo *cinode) spin_unlock(&cinode->writers_lock); } +/** + * cifs_queue_oplock_break - queue the oplock break handler for cfile + * + * This function is called from the demultiplex thread when it + * receives an oplock break for @cfile. + * + * Assumes the tcon->open_file_lock is held. + * Assumes cfile->file_info_lock is NOT held. + */ +void cifs_queue_oplock_break(struct cifsFileInfo *cfile) +{ + /* + * Bump the handle refcount now while we hold the + * open_file_lock to enforce the validity of it for the oplock + * break handler. The matching put is done at the end of the + * handler. + */ + cifsFileInfo_get(cfile); + + queue_work(cifsoplockd_wq, &cfile->oplock_break); +} + void cifs_done_oplock_break(struct cifsInodeInfo *cinode) { clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags); diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 1485ab8c2d65..f80fcc6921ee 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -458,7 +458,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags); - queue_work(cifsoplockd_wq, &cfile->oplock_break); + cifs_queue_oplock_break(cfile); kfree(lw); return true; } @@ -602,8 +602,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags); spin_unlock(&cfile->file_info_lock); - queue_work(cifsoplockd_wq, - &cfile->oplock_break); + + cifs_queue_oplock_break(cfile); spin_unlock(&tcon->open_file_lock); spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 376ccd96127f..d95a547cf94e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -906,6 +906,8 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_buf); + if (!rc) + SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); if (!rc || !err_buf) { kfree(utf16_path); return -ENOENT; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 40c86790109e..201f683bea89 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -79,7 +79,7 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos) struct super_block *sb = inode->i_sb; int blockmask = sb->s_blocksize - 1; - if (pos >= i_size_read(inode)) + if (pos >= ALIGN(i_size_read(inode), sb->s_blocksize)) return 0; if ((pos | iov_iter_alignment(from)) & blockmask) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 80c3f1ed1afa..63597cd0265c 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -908,11 +908,18 @@ static int add_new_gdb_meta_bg(struct super_block *sb, memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); n_group_desc[gdb_num] = gdb_bh; + + BUFFER_TRACE(gdb_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, gdb_bh); + if (err) { + kvfree(n_group_desc); + brelse(gdb_bh); + return err; + } + EXT4_SB(sb)->s_group_desc = n_group_desc; EXT4_SB(sb)->s_gdb_count++; ext4_kvfree(o_group_desc); - BUFFER_TRACE(gdb_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, gdb_bh); return err; } diff --git a/fs/lockd/host.c b/fs/lockd/host.c index b31117c12102..6f12147a2fea 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -288,12 +288,11 @@ void nlmclnt_release_host(struct nlm_host *host) WARN_ON_ONCE(host->h_server); - if (atomic_dec_and_test(&host->h_count)) { + if (atomic_dec_and_mutex_lock(&host->h_count, &nlm_host_mutex)) { WARN_ON_ONCE(!list_empty(&host->h_lockowners)); WARN_ON_ONCE(!list_empty(&host->h_granted)); WARN_ON_ONCE(!list_empty(&host->h_reclaim)); - mutex_lock(&nlm_host_mutex); nlm_destroy_host_locked(host); mutex_unlock(&nlm_host_mutex); } diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 7445af0b1aa3..a0bc66039269 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -27,10 +27,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) struct vmalloc_info vmi; long cached; long available; - unsigned long pagecache; - unsigned long wmark_low = 0; unsigned long pages[NR_LRU_LISTS]; - struct zone *zone; int lru; /* @@ -51,36 +48,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) pages[lru] = global_page_state(NR_LRU_BASE + lru); - for_each_zone(zone) - wmark_low += zone->watermark[WMARK_LOW]; - - /* - * Estimate the amount of memory available for userspace allocations, - * without causing swapping. - * - * Free memory cannot be taken below the low watermark, before the - * system starts swapping. - */ - available = i.freeram - wmark_low; - - /* - * Not all the page cache can be freed, otherwise the system will - * start swapping. Assume at least half of the page cache, or the - * low watermark worth of cache, needs to stay. - */ - pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE]; - pagecache -= min(pagecache / 2, wmark_low); - available += pagecache; - - /* - * Part of the reclaimable slab consists of items that are in use, - * and cannot be freed. Cap this estimate at the low watermark. - */ - available += global_page_state(NR_SLAB_RECLAIMABLE) - - min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low); - - if (available < 0) - available = 0; + available = si_mem_available(); /* * Tagged format, for easy grepping and expansion. diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index bfb8e8d588b8..eb7067de78db 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1550,8 +1550,11 @@ static void drop_sysctl_table(struct ctl_table_header *header) if (--header->nreg) return; - put_links(header); - start_unregistering(header); + if (parent) { + put_links(header); + start_unregistering(header); + } + if (!--header->count) kfree_rcu(header, rcu); diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 8a9657d7f7c6..8df2632a2966 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -261,6 +261,9 @@ void udf_truncate_extents(struct inode *inode) epos.block = eloc; epos.bh = udf_tread(sb, udf_get_lb_pblock(sb, &eloc, 0)); + /* Error reading indirect block? */ + if (!epos.bh) + return; if (elen) indirect_ext_len = (elen + sb->s_blocksize - 1) >> diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 3f9463f8cf2f..f877d5cadd98 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -228,7 +228,7 @@ ufs_get_inode_gid(struct super_block *sb, struct ufs_inode *inode) case UFS_UID_44BSD: return fs32_to_cpu(sb, inode->ui_u3.ui_44.ui_gid); case UFS_UID_EFT: - if (inode->ui_u1.oldids.ui_suid == 0xFFFF) + if (inode->ui_u1.oldids.ui_sgid == 0xFFFF) return fs32_to_cpu(sb, inode->ui_u3.ui_sun.ui_gid); /* Fall through */ default: diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 8c0ff78ff45a..83fac3e091c2 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -197,6 +197,7 @@ struct kretprobe_instance { struct kretprobe *rp; kprobe_opcode_t *ret_addr; struct task_struct *task; + void *fp; char data[0]; }; diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 64c7425afbce..f2bca5a77851 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -525,9 +525,24 @@ do { \ lock_acquire(&(lock)->dep_map, 0, 0, 1, 1, NULL, _THIS_IP_); \ lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ } while (0) + +#define lockdep_assert_irqs_enabled() do { \ + WARN_ONCE(debug_locks && !current->lockdep_recursion && \ + !current->hardirqs_enabled, \ + "IRQs not enabled as expected\n"); \ + } while (0) + +#define lockdep_assert_irqs_disabled() do { \ + WARN_ONCE(debug_locks && !current->lockdep_recursion && \ + current->hardirqs_enabled, \ + "IRQs not disabled as expected\n"); \ + } while (0) + #else # define might_lock(lock) do { } while (0) # define might_lock_read(lock) do { } while (0) +# define lockdep_assert_irqs_enabled() do { } while (0) +# define lockdep_assert_irqs_disabled() do { } while (0) #endif #ifdef CONFIG_PROVE_RCU diff --git a/include/linux/mm.h b/include/linux/mm.h index a576467cd4a5..e67e12641b63 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1699,6 +1699,7 @@ extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); extern void __init mmap_init(void); extern void show_mem(unsigned int flags); +extern long si_mem_available(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); diff --git a/include/linux/siphash.h b/include/linux/siphash.h new file mode 100644 index 000000000000..c8c7ae2e687b --- /dev/null +++ b/include/linux/siphash.h @@ -0,0 +1,90 @@ +/* Copyright (C) 2016 Jason A. Donenfeld . All Rights Reserved. + * + * This file is provided under a dual BSD/GPLv2 license. + * + * SipHash: a fast short-input PRF + * https://131002.net/siphash/ + * + * This implementation is specifically for SipHash2-4. + */ + +#ifndef _LINUX_SIPHASH_H +#define _LINUX_SIPHASH_H + +#include +#include + +#define SIPHASH_ALIGNMENT __alignof__(u64) +typedef struct { + u64 key[2]; +} siphash_key_t; + +static inline bool siphash_key_is_zero(const siphash_key_t *key) +{ + return !(key->key[0] | key->key[1]); +} + +u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); +#endif + +u64 siphash_1u64(const u64 a, const siphash_key_t *key); +u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); +u64 siphash_3u64(const u64 a, const u64 b, const u64 c, + const siphash_key_t *key); +u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d, + const siphash_key_t *key); +u64 siphash_1u32(const u32 a, const siphash_key_t *key); +u64 siphash_3u32(const u32 a, const u32 b, const u32 c, + const siphash_key_t *key); + +static inline u64 siphash_2u32(const u32 a, const u32 b, + const siphash_key_t *key) +{ + return siphash_1u64((u64)b << 32 | a, key); +} +static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c, + const u32 d, const siphash_key_t *key) +{ + return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key); +} + + +static inline u64 ___siphash_aligned(const __le64 *data, size_t len, + const siphash_key_t *key) +{ + if (__builtin_constant_p(len) && len == 4) + return siphash_1u32(le32_to_cpup((const __le32 *)data), key); + if (__builtin_constant_p(len) && len == 8) + return siphash_1u64(le64_to_cpu(data[0]), key); + if (__builtin_constant_p(len) && len == 16) + return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), + key); + if (__builtin_constant_p(len) && len == 24) + return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), + le64_to_cpu(data[2]), key); + if (__builtin_constant_p(len) && len == 32) + return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), + le64_to_cpu(data[2]), le64_to_cpu(data[3]), + key); + return __siphash_aligned(data, len, key); +} + +/** + * siphash - compute 64-bit siphash PRF value + * @data: buffer to hash + * @size: size of @data + * @key: the siphash key + */ +static inline u64 siphash(const void *data, size_t len, + const siphash_key_t *key) +{ +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) + return __siphash_unaligned(data, len, key); +#endif + return ___siphash_aligned(data, len, key); +} + +#endif /* _LINUX_SIPHASH_H */ diff --git a/include/linux/string.h b/include/linux/string.h index dd71dd08a840..85491dd76903 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -113,6 +113,9 @@ extern void * memscan(void *,int,__kernel_size_t); #ifndef __HAVE_ARCH_MEMCMP extern int memcmp(const void *,const void *,__kernel_size_t); #endif +#ifndef __HAVE_ARCH_BCMP +extern int bcmp(const void *,const void *,__kernel_size_t); +#endif #ifndef __HAVE_ARCH_MEMCHR extern void * memchr(const void *,int,__kernel_size_t); #endif diff --git a/include/linux/usb.h b/include/linux/usb.h index a53e036b2252..64a39f02a5f6 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -125,7 +125,6 @@ enum usb_interface_condition { * @dev: driver model's view of this device * @usb_dev: if an interface is bound to the USB major, this will point * to the sysfs representation for that device. - * @pm_usage_cnt: PM usage counter for this interface * @reset_ws: Used for scheduling resets from atomic context. * @reset_running: set to 1 if the interface is currently running a * queued reset so that usb_cancel_queued_reset() doesn't try to @@ -186,7 +185,6 @@ struct usb_interface { struct device dev; /* interface specific device info */ struct device *usb_dev; - atomic_t pm_usage_cnt; /* usage counter for autosuspend */ struct work_struct reset_ws; /* for resets in atomic context */ }; #define to_usb_interface(d) container_of(d, struct usb_interface, dev) diff --git a/include/net/ip.h b/include/net/ip.h index 27dd9826e05d..8ec53320c902 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -319,9 +319,10 @@ static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb) } u32 ip_idents_reserve(u32 hash, int segs); -void __ip_select_ident(struct iphdr *iph, int segs); +void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); -static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs) +static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, + struct sock *sk, int segs) { struct iphdr *iph = ip_hdr(skb); @@ -338,13 +339,14 @@ static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, in iph->id = 0; } } else { - __ip_select_ident(iph, segs); + __ip_select_ident(net, iph, segs); } } -static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk) +static inline void ip_select_ident(struct net *net, struct sk_buff *skb, + struct sock *sk) { - ip_select_ident_segs(skb, sk, 1); + ip_select_ident_segs(net, skb, sk, 1); } static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c30ba46ef2f0..c4e455f4bfe6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -688,7 +688,9 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } -void ipv6_proxy_select_ident(struct sk_buff *skb); +void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr, + struct rt6_info *rt); +void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); int ip6_dst_hoplimit(struct dst_entry *dst); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5c53572b5f0d..3b66e31a7819 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -289,6 +289,8 @@ void init_nf_conntrack_hash_rnd(void); void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl); +u32 nf_ct_get_id(const struct nf_conn *ct); + #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 80a1c572b9a0..213f53d4f6b2 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -7,6 +7,7 @@ #include #include +#include struct tcpm_hash_bucket; struct ctl_table_header; @@ -98,5 +99,6 @@ struct netns_ipv4 { #endif #endif atomic_t rt_genid; + siphash_key_t ip_id_key; }; #endif diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 32ee65a30aff..1c6e6c0766ca 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -61,7 +61,7 @@ static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { - struct sctphdr *sh = sctp_hdr(skb); + struct sctphdr *sh = (struct sctphdr *)(skb->data + offset); const struct skb_checksum_ops ops = { .update = sctp_csum_update, .combine = sctp_csum_combine, diff --git a/kernel/events/core.c b/kernel/events/core.c index e8be52939ed1..3beed0ea98d9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5445,6 +5445,7 @@ static void perf_event_mmap_output(struct perf_event *event, struct perf_output_handle handle; struct perf_sample_data sample; int size = mmap_event->event_id.header.size; + u32 type = mmap_event->event_id.header.type; int ret; if (!perf_event_mmap_match(event, data)) @@ -5488,6 +5489,7 @@ static void perf_event_mmap_output(struct perf_event *event, perf_output_end(&handle); out: mmap_event->event_id.header.size = size; + mmap_event->event_id.header.type = type; } static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) diff --git a/kernel/futex.c b/kernel/futex.c index 0ee2f54d74fb..99679c0040cc 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2909,6 +2909,10 @@ int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) { u32 uval, uninitialized_var(nval), mval; + /* Futex address must be 32bit aligned */ + if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0) + return -1; + retry: if (get_user(uval, uaddr)) return -1; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 328410652be6..f967ff776e5b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1503,6 +1503,10 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period) if (p->last_task_numa_placement) { delta = runtime - p->last_sum_exec_runtime; *period = now - p->last_task_numa_placement; + + /* Avoid time going backwards, prevent potential divide error: */ + if (unlikely((s64)*period < 0)) + *period = 0; } else { delta = p->se.avg.runnable_avg_sum; *period = p->se.avg.runnable_avg_period; @@ -3704,6 +3708,8 @@ static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) return HRTIMER_NORESTART; } +extern const u64 max_cfs_quota_period; + static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) { struct cfs_bandwidth *cfs_b = @@ -3711,6 +3717,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) ktime_t now; int overrun; int idle = 0; + int count = 0; raw_spin_lock(&cfs_b->lock); for (;;) { @@ -3720,6 +3727,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) if (!overrun) break; + if (++count > 3) { + u64 new, old = ktime_to_ns(cfs_b->period); + + new = (old * 147) / 128; /* ~115% */ + new = min(new, max_cfs_quota_period); + + cfs_b->period = ns_to_ktime(new); + + /* since max is 1s, this is limited to 1e9^2, which fits in u64 */ + cfs_b->quota *= new; + cfs_b->quota = div64_u64(cfs_b->quota, old); + + pr_warn_ratelimited( + "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n", + smp_processor_id(), + div_u64(new, NSEC_PER_USEC), + div_u64(cfs_b->quota, NSEC_PER_USEC)); + + /* reset count so we don't come right back in here */ + count = 0; + } + idle = do_sched_cfs_period_timer(cfs_b, overrun); } raw_spin_unlock(&cfs_b->lock); @@ -5487,10 +5516,10 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq) if (cfs_rq->last_h_load_update == now) return; - cfs_rq->h_load_next = NULL; + ACCESS_ONCE(cfs_rq->h_load_next) = NULL; for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); - cfs_rq->h_load_next = se; + ACCESS_ONCE(cfs_rq->h_load_next) = se; if (cfs_rq->last_h_load_update == now) break; } @@ -5500,7 +5529,7 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq) cfs_rq->last_h_load_update = now; } - while ((se = cfs_rq->h_load_next) != NULL) { + while ((se = ACCESS_ONCE(cfs_rq->h_load_next)) != NULL) { load = cfs_rq->h_load; load = div64_ul(load * se->avg.load_avg_contrib, cfs_rq->runnable_load_avg + 1); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8ee705e1d472..c59d43d54d32 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -32,6 +32,7 @@ #include #include #include +#include #include @@ -4508,7 +4509,7 @@ static struct ftrace_ops control_ops = { INIT_OPS_HASH(control_ops) }; -static inline void +static nokprobe_inline void __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ignored, struct pt_regs *regs) { @@ -4561,11 +4562,13 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, { __ftrace_ops_list_func(ip, parent_ip, NULL, regs); } +NOKPROBE_SYMBOL(ftrace_ops_list_func); #else static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) { __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); } +NOKPROBE_SYMBOL(ftrace_ops_no_ops); #endif static void clear_ftrace_swapper(void) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 107e8ce1a87e..89d99ccb1617 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -729,7 +729,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu) preempt_disable_notrace(); time = rb_time_stamp(buffer); - preempt_enable_no_resched_notrace(); + preempt_enable_notrace(); return time; } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7a638aa3545b..3678fc124dc1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1550,6 +1550,16 @@ config TEST_STRING_HELPERS config TEST_KSTRTOX tristate "Test kstrto*() family of functions at runtime" +config TEST_HASH + tristate "Perform selftest on hash functions" + default n + help + Enable this option to test the kernel's siphash () + hash functions on boot (or module load). + + This is intended to help people writing architecture-specific + optimized versions. If unsure, say N. + endmenu # runtime tests config PROVIDE_OHCI1394_DMA_INIT diff --git a/lib/Makefile b/lib/Makefile index ba967a19edba..31a4389ff179 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -26,10 +26,11 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ - percpu-refcount.o percpu_ida.o hash.o + percpu-refcount.o percpu_ida.o hash.o siphash.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o +obj-$(CONFIG_TEST_HASH) += test_siphash.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_MODULE) += test_module.o obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o diff --git a/lib/siphash.c b/lib/siphash.c new file mode 100644 index 000000000000..c43cf406e71b --- /dev/null +++ b/lib/siphash.c @@ -0,0 +1,232 @@ +/* Copyright (C) 2016 Jason A. Donenfeld . All Rights Reserved. + * + * This file is provided under a dual BSD/GPLv2 license. + * + * SipHash: a fast short-input PRF + * https://131002.net/siphash/ + * + * This implementation is specifically for SipHash2-4. + */ + +#include +#include + +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 +#include +#include +#endif + +#define SIPROUND \ + do { \ + v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \ + v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \ + v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \ + v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \ + } while (0) + +#define PREAMBLE(len) \ + u64 v0 = 0x736f6d6570736575ULL; \ + u64 v1 = 0x646f72616e646f6dULL; \ + u64 v2 = 0x6c7967656e657261ULL; \ + u64 v3 = 0x7465646279746573ULL; \ + u64 b = ((u64)(len)) << 56; \ + v3 ^= key->key[1]; \ + v2 ^= key->key[0]; \ + v1 ^= key->key[1]; \ + v0 ^= key->key[0]; + +#define POSTAMBLE \ + v3 ^= b; \ + SIPROUND; \ + SIPROUND; \ + v0 ^= b; \ + v2 ^= 0xff; \ + SIPROUND; \ + SIPROUND; \ + SIPROUND; \ + SIPROUND; \ + return (v0 ^ v1) ^ (v2 ^ v3); + +u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u64)); + const u8 left = len & (sizeof(u64) - 1); + u64 m; + PREAMBLE(len) + for (; data != end; data += sizeof(u64)) { + m = le64_to_cpup(data); + v3 ^= m; + SIPROUND; + SIPROUND; + v0 ^= m; + } +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 + if (left) + b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & + bytemask_from_count(left))); +#else + switch (left) { + case 7: b |= ((u64)end[6]) << 48; + case 6: b |= ((u64)end[5]) << 40; + case 5: b |= ((u64)end[4]) << 32; + case 4: b |= le32_to_cpup(data); break; + case 3: b |= ((u64)end[2]) << 16; + case 2: b |= le16_to_cpup(data); break; + case 1: b |= end[0]; + } +#endif + POSTAMBLE +} +EXPORT_SYMBOL(__siphash_aligned); + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u64)); + const u8 left = len & (sizeof(u64) - 1); + u64 m; + PREAMBLE(len) + for (; data != end; data += sizeof(u64)) { + m = get_unaligned_le64(data); + v3 ^= m; + SIPROUND; + SIPROUND; + v0 ^= m; + } +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 + if (left) + b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & + bytemask_from_count(left))); +#else + switch (left) { + case 7: b |= ((u64)end[6]) << 48; + case 6: b |= ((u64)end[5]) << 40; + case 5: b |= ((u64)end[4]) << 32; + case 4: b |= get_unaligned_le32(end); break; + case 3: b |= ((u64)end[2]) << 16; + case 2: b |= get_unaligned_le16(end); break; + case 1: b |= end[0]; + } +#endif + POSTAMBLE +} +EXPORT_SYMBOL(__siphash_unaligned); +#endif + +/** + * siphash_1u64 - compute 64-bit siphash PRF value of a u64 + * @first: first u64 + * @key: the siphash key + */ +u64 siphash_1u64(const u64 first, const siphash_key_t *key) +{ + PREAMBLE(8) + v3 ^= first; + SIPROUND; + SIPROUND; + v0 ^= first; + POSTAMBLE +} +EXPORT_SYMBOL(siphash_1u64); + +/** + * siphash_2u64 - compute 64-bit siphash PRF value of 2 u64 + * @first: first u64 + * @second: second u64 + * @key: the siphash key + */ +u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key) +{ + PREAMBLE(16) + v3 ^= first; + SIPROUND; + SIPROUND; + v0 ^= first; + v3 ^= second; + SIPROUND; + SIPROUND; + v0 ^= second; + POSTAMBLE +} +EXPORT_SYMBOL(siphash_2u64); + +/** + * siphash_3u64 - compute 64-bit siphash PRF value of 3 u64 + * @first: first u64 + * @second: second u64 + * @third: third u64 + * @key: the siphash key + */ +u64 siphash_3u64(const u64 first, const u64 second, const u64 third, + const siphash_key_t *key) +{ + PREAMBLE(24) + v3 ^= first; + SIPROUND; + SIPROUND; + v0 ^= first; + v3 ^= second; + SIPROUND; + SIPROUND; + v0 ^= second; + v3 ^= third; + SIPROUND; + SIPROUND; + v0 ^= third; + POSTAMBLE +} +EXPORT_SYMBOL(siphash_3u64); + +/** + * siphash_4u64 - compute 64-bit siphash PRF value of 4 u64 + * @first: first u64 + * @second: second u64 + * @third: third u64 + * @forth: forth u64 + * @key: the siphash key + */ +u64 siphash_4u64(const u64 first, const u64 second, const u64 third, + const u64 forth, const siphash_key_t *key) +{ + PREAMBLE(32) + v3 ^= first; + SIPROUND; + SIPROUND; + v0 ^= first; + v3 ^= second; + SIPROUND; + SIPROUND; + v0 ^= second; + v3 ^= third; + SIPROUND; + SIPROUND; + v0 ^= third; + v3 ^= forth; + SIPROUND; + SIPROUND; + v0 ^= forth; + POSTAMBLE +} +EXPORT_SYMBOL(siphash_4u64); + +u64 siphash_1u32(const u32 first, const siphash_key_t *key) +{ + PREAMBLE(4) + b |= first; + POSTAMBLE +} +EXPORT_SYMBOL(siphash_1u32); + +u64 siphash_3u32(const u32 first, const u32 second, const u32 third, + const siphash_key_t *key) +{ + u64 combined = (u64)second << 32 | first; + PREAMBLE(12) + v3 ^= combined; + SIPROUND; + SIPROUND; + v0 ^= combined; + b |= third; + POSTAMBLE +} +EXPORT_SYMBOL(siphash_3u32); diff --git a/lib/string.c b/lib/string.c index 80e8bdb60538..94928426f511 100644 --- a/lib/string.c +++ b/lib/string.c @@ -776,6 +776,26 @@ __visible int memcmp(const void *cs, const void *ct, size_t count) EXPORT_SYMBOL(memcmp); #endif +#ifndef __HAVE_ARCH_BCMP +/** + * bcmp - returns 0 if and only if the buffers have identical contents. + * @a: pointer to first buffer. + * @b: pointer to second buffer. + * @len: size of buffers. + * + * The sign or magnitude of a non-zero return value has no particular + * meaning, and architectures may implement their own more efficient bcmp(). So + * while this particular implementation is a simple (tail) call to memcmp, do + * not rely on anything but whether the return value is zero or non-zero. + */ +#undef bcmp +int bcmp(const void *a, const void *b, size_t len) +{ + return memcmp(a, b, len); +} +EXPORT_SYMBOL(bcmp); +#endif + #ifndef __HAVE_ARCH_MEMSCAN /** * memscan - Find a character in an area of memory. diff --git a/lib/test_siphash.c b/lib/test_siphash.c new file mode 100644 index 000000000000..d972acfc15e4 --- /dev/null +++ b/lib/test_siphash.c @@ -0,0 +1,131 @@ +/* Test cases for siphash.c + * + * Copyright (C) 2016 Jason A. Donenfeld . All Rights Reserved. + * + * This file is provided under a dual BSD/GPLv2 license. + * + * SipHash: a fast short-input PRF + * https://131002.net/siphash/ + * + * This implementation is specifically for SipHash2-4. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +/* Test vectors taken from official reference source available at: + * https://131002.net/siphash/siphash24.c + */ + +static const siphash_key_t test_key_siphash = + {{ 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }}; + +static const u64 test_vectors_siphash[64] = { + 0x726fdb47dd0e0e31ULL, 0x74f839c593dc67fdULL, 0x0d6c8009d9a94f5aULL, + 0x85676696d7fb7e2dULL, 0xcf2794e0277187b7ULL, 0x18765564cd99a68dULL, + 0xcbc9466e58fee3ceULL, 0xab0200f58b01d137ULL, 0x93f5f5799a932462ULL, + 0x9e0082df0ba9e4b0ULL, 0x7a5dbbc594ddb9f3ULL, 0xf4b32f46226bada7ULL, + 0x751e8fbc860ee5fbULL, 0x14ea5627c0843d90ULL, 0xf723ca908e7af2eeULL, + 0xa129ca6149be45e5ULL, 0x3f2acc7f57c29bdbULL, 0x699ae9f52cbe4794ULL, + 0x4bc1b3f0968dd39cULL, 0xbb6dc91da77961bdULL, 0xbed65cf21aa2ee98ULL, + 0xd0f2cbb02e3b67c7ULL, 0x93536795e3a33e88ULL, 0xa80c038ccd5ccec8ULL, + 0xb8ad50c6f649af94ULL, 0xbce192de8a85b8eaULL, 0x17d835b85bbb15f3ULL, + 0x2f2e6163076bcfadULL, 0xde4daaaca71dc9a5ULL, 0xa6a2506687956571ULL, + 0xad87a3535c49ef28ULL, 0x32d892fad841c342ULL, 0x7127512f72f27cceULL, + 0xa7f32346f95978e3ULL, 0x12e0b01abb051238ULL, 0x15e034d40fa197aeULL, + 0x314dffbe0815a3b4ULL, 0x027990f029623981ULL, 0xcadcd4e59ef40c4dULL, + 0x9abfd8766a33735cULL, 0x0e3ea96b5304a7d0ULL, 0xad0c42d6fc585992ULL, + 0x187306c89bc215a9ULL, 0xd4a60abcf3792b95ULL, 0xf935451de4f21df2ULL, + 0xa9538f0419755787ULL, 0xdb9acddff56ca510ULL, 0xd06c98cd5c0975ebULL, + 0xe612a3cb9ecba951ULL, 0xc766e62cfcadaf96ULL, 0xee64435a9752fe72ULL, + 0xa192d576b245165aULL, 0x0a8787bf8ecb74b2ULL, 0x81b3e73d20b49b6fULL, + 0x7fa8220ba3b2eceaULL, 0x245731c13ca42499ULL, 0xb78dbfaf3a8d83bdULL, + 0xea1ad565322a1a0bULL, 0x60e61c23a3795013ULL, 0x6606d7e446282b93ULL, + 0x6ca4ecb15c5f91e1ULL, 0x9f626da15c9625f3ULL, 0xe51b38608ef25f57ULL, + 0x958a324ceb064572ULL +}; + +static int __init siphash_test_init(void) +{ + u8 in[64] __aligned(SIPHASH_ALIGNMENT); + u8 in_unaligned[65] __aligned(SIPHASH_ALIGNMENT); + u8 i; + int ret = 0; + + for (i = 0; i < 64; ++i) { + in[i] = i; + in_unaligned[i + 1] = i; + if (siphash(in, i, &test_key_siphash) != + test_vectors_siphash[i]) { + pr_info("siphash self-test aligned %u: FAIL\n", i + 1); + ret = -EINVAL; + } + if (siphash(in_unaligned + 1, i, &test_key_siphash) != + test_vectors_siphash[i]) { + pr_info("siphash self-test unaligned %u: FAIL\n", i + 1); + ret = -EINVAL; + } + } + if (siphash_1u64(0x0706050403020100ULL, &test_key_siphash) != + test_vectors_siphash[8]) { + pr_info("siphash self-test 1u64: FAIL\n"); + ret = -EINVAL; + } + if (siphash_2u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, + &test_key_siphash) != test_vectors_siphash[16]) { + pr_info("siphash self-test 2u64: FAIL\n"); + ret = -EINVAL; + } + if (siphash_3u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, + 0x1716151413121110ULL, &test_key_siphash) != + test_vectors_siphash[24]) { + pr_info("siphash self-test 3u64: FAIL\n"); + ret = -EINVAL; + } + if (siphash_4u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, + 0x1716151413121110ULL, 0x1f1e1d1c1b1a1918ULL, + &test_key_siphash) != test_vectors_siphash[32]) { + pr_info("siphash self-test 4u64: FAIL\n"); + ret = -EINVAL; + } + if (siphash_1u32(0x03020100U, &test_key_siphash) != + test_vectors_siphash[4]) { + pr_info("siphash self-test 1u32: FAIL\n"); + ret = -EINVAL; + } + if (siphash_2u32(0x03020100U, 0x07060504U, &test_key_siphash) != + test_vectors_siphash[8]) { + pr_info("siphash self-test 2u32: FAIL\n"); + ret = -EINVAL; + } + if (siphash_3u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, &test_key_siphash) != + test_vectors_siphash[12]) { + pr_info("siphash self-test 3u32: FAIL\n"); + ret = -EINVAL; + } + if (siphash_4u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, 0x0f0e0d0cU, &test_key_siphash) != + test_vectors_siphash[16]) { + pr_info("siphash self-test 4u32: FAIL\n"); + ret = -EINVAL; + } + if (!ret) + pr_info("self-tests: pass\n"); + return ret; +} + +static void __exit siphash_test_exit(void) +{ +} + +module_init(siphash_test_init); +module_exit(siphash_test_exit); + +MODULE_AUTHOR("Jason A. Donenfeld "); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7a783dc67305..b491f75f7491 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3072,6 +3072,49 @@ static inline void show_node(struct zone *zone) printk("Node %d ", zone_to_nid(zone)); } +long si_mem_available(void) +{ + long available; + unsigned long pagecache; + unsigned long wmark_low = 0; + unsigned long pages[NR_LRU_LISTS]; + struct zone *zone; + int lru; + + for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) + pages[lru] = global_page_state(NR_LRU_BASE + lru); + + for_each_zone(zone) + wmark_low += zone->watermark[WMARK_LOW]; + + /* + * Estimate the amount of memory available for userspace allocations, + * without causing swapping. + */ + available = global_page_state(NR_FREE_PAGES) - totalreserve_pages; + + /* + * Not all the page cache can be freed, otherwise the system will + * start swapping. Assume at least half of the page cache, or the + * low watermark worth of cache, needs to stay. + */ + pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE]; + pagecache -= min(pagecache / 2, wmark_low); + available += pagecache; + + /* + * Part of the reclaimable slab consists of items that are in use, + * and cannot be freed. Cap this estimate at the low watermark. + */ + available += global_page_state(NR_SLAB_RECLAIMABLE) - + min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low); + + if (available < 0) + available = 0; + return available; +} +EXPORT_SYMBOL_GPL(si_mem_available); + void si_meminfo(struct sysinfo *val) { val->totalram = totalram_pages; diff --git a/mm/vmstat.c b/mm/vmstat.c index ae3c911843fa..8272a99dce41 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -861,13 +861,8 @@ const char * const vmstat_text[] = { "thp_zero_page_alloc_failed", #endif #ifdef CONFIG_DEBUG_TLBFLUSH -#ifdef CONFIG_SMP "nr_tlb_remote_flush", "nr_tlb_remote_flush_received", -#else - "", /* nr_tlb_remote_flush */ - "", /* nr_tlb_remote_flush_received */ -#endif /* CONFIG_SMP */ "nr_tlb_local_flush_all", "nr_tlb_local_flush_one", #endif /* CONFIG_DEBUG_TLBFLUSH */ diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 335401d6351c..2a1611e65904 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -677,6 +677,8 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, const uint8_t *mac, const unsigned short vid) { struct batadv_bla_claim search_claim, *claim; + struct batadv_bla_claim *claim_removed_entry; + struct hlist_node *claim_removed_node; ether_addr_copy(search_claim.addr, mac); search_claim.vid = vid; @@ -687,10 +689,18 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n", mac, BATADV_PRINT_VID(vid)); - batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, - batadv_choose_claim, claim); - batadv_claim_free_ref(claim); /* reference from the hash is gone */ + claim_removed_node = batadv_hash_remove(bat_priv->bla.claim_hash, + batadv_compare_claim, + batadv_choose_claim, claim); + if (!claim_removed_node) + goto free_claim; + /* reference from the hash is gone */ + claim_removed_entry = hlist_entry(claim_removed_node, + struct batadv_bla_claim, hash_entry); + batadv_claim_free_ref(claim_removed_entry); + +free_claim: /* don't need the reference from hash_find() anymore */ batadv_claim_free_ref(claim); } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index c9ac7d948a21..422d54f6a243 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -483,14 +483,26 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global, const char *message) { + struct batadv_tt_global_entry *tt_removed_entry; + struct hlist_node *tt_removed_node; + batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, BATADV_PRINT_VID(tt_global->common.vid), message); - batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, - batadv_choose_tt, &tt_global->common); - batadv_tt_global_entry_free_ref(tt_global); + tt_removed_node = batadv_hash_remove(bat_priv->tt.global_hash, + batadv_compare_tt, + batadv_choose_tt, + &tt_global->common); + if (!tt_removed_node) + return; + + /* drop reference of remove hash entry */ + tt_removed_entry = hlist_entry(tt_removed_node, + struct batadv_tt_global_entry, + common.hash_entry); + batadv_tt_global_entry_free_ref(tt_removed_entry); } /** @@ -1021,9 +1033,10 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, const uint8_t *addr, unsigned short vid, const char *message, bool roaming) { + struct batadv_tt_local_entry *tt_removed_entry; struct batadv_tt_local_entry *tt_local_entry; uint16_t flags, curr_flags = BATADV_NO_FLAGS; - void *tt_entry_exists; + struct hlist_node *tt_removed_node; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) @@ -1052,15 +1065,18 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, */ batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL); - tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash, + tt_removed_node = batadv_hash_remove(bat_priv->tt.local_hash, batadv_compare_tt, batadv_choose_tt, &tt_local_entry->common); - if (!tt_entry_exists) + if (!tt_removed_node) goto out; - /* extra call to free the local tt entry */ - batadv_tt_local_entry_free_ref(tt_local_entry); + /* drop reference of remove hash entry */ + tt_removed_entry = hlist_entry(tt_removed_node, + struct batadv_tt_local_entry, + common.hash_entry); + batadv_tt_local_entry_free_ref(tt_removed_entry); out: if (tt_local_entry) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 3c594185de36..7725dd99fdc6 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2036,7 +2036,8 @@ static void br_multicast_start_querier(struct net_bridge *br, __br_multicast_open(br, query); - list_for_each_entry(port, &br->port_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(port, &br->port_list, list) { if (port->state == BR_STATE_DISABLED || port->state == BR_STATE_BLOCKING) continue; @@ -2048,6 +2049,7 @@ static void br_multicast_start_querier(struct net_bridge *br, br_multicast_enable(&port->ip6_own_query); #endif } + rcu_read_unlock(); } int br_multicast_toggle(struct net_bridge *br, unsigned long val) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 2cf4e30ac41a..7ddb8b322556 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -659,6 +659,8 @@ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, return NF_DROP; skb->protocol = htons(ETH_P_IPV6); + skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, br_nf_pre_routing_finish_ipv6); @@ -715,6 +717,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, return NF_DROP; store_orig_dstaddr(skb); skb->protocol = htons(ETH_P_IP); + skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4; NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, br_nf_pre_routing_finish); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 2df71bc7959d..75929f9bd4e6 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2011,7 +2011,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, if (match_kern) match_kern->match_size = ret; - if (WARN_ON(type == EBT_COMPAT_TARGET && size_left)) + /* rule should have no remaining data after target */ + if (type == EBT_COMPAT_TARGET && size_left) return -EINVAL; match32 = (struct compat_ebt_entry_mwt *) buf; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index cd332d0fa930..7550d098e3b7 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -788,6 +788,8 @@ static int rx_queue_add_kobject(struct net_device *net, int index) if (error) return error; + dev_hold(queue->dev); + if (net->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, net->sysfs_rx_queue_group); if (error) { @@ -797,7 +799,6 @@ static int rx_queue_add_kobject(struct net_device *net, int index) } kobject_uevent(kobj, KOBJ_ADD); - dev_hold(queue->dev); return error; } @@ -1146,6 +1147,8 @@ static int netdev_queue_add_kobject(struct net_device *net, int index) if (error) return error; + dev_hold(queue->dev); + #ifdef CONFIG_BQL error = sysfs_create_group(kobj, &dql_group); if (error) { @@ -1155,7 +1158,6 @@ static int netdev_queue_add_kobject(struct net_device *net, int index) #endif kobject_uevent(kobj, KOBJ_ADD); - dev_hold(queue->dev); return 0; } diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 9733ddbc96cb..fa99d53f29e5 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -738,7 +738,12 @@ static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local, if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len)) return -ENOMEM; - return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval); + if (dccp_feat_push_change(fn, feat, is_local, mandatory, &fval)) { + kfree(fval.sp.vec); + return -ENOMEM; + } + + return 0; } /** diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 30455bf91b18..c3ed865d262f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -491,8 +491,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; - newnp->mcast_oif = inet6_iif(skb); - newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; + newnp->mcast_oif = inet_iif(skb); + newnp->mcast_hops = ip_hdr(skb)->ttl; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 48444c4c3c51..680ce5f4fb65 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -395,7 +395,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); ((u8 *)&pip[1])[0] = IPOPT_RA; ((u8 *)&pip[1])[1] = 4; ((u8 *)&pip[1])[2] = 0; @@ -739,7 +739,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->daddr = dst; iph->saddr = fl4.saddr; iph->protocol = IPPROTO_IGMP; - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; ((u8 *)&iph[1])[2] = 0; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 109eddf0248a..2a05dfc9a35e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -150,7 +150,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - ip_select_ident(skb, sk); + ip_select_ident(sock_net(sk), skb, sk); if (opt && opt->opt.optlen) { iph->ihl += opt->opt.optlen>>2; @@ -432,7 +432,8 @@ int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); } - ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1); + ip_select_ident_segs(sock_net(sk), skb, sk, + skb_shinfo(skb)->gso_segs ?: 1); /* TODO : should we use skb->sk here instead of sk ? */ skb->priority = sk->sk_priority; @@ -1385,7 +1386,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); - ip_select_ident(skb, sk); + ip_select_ident(net, skb, sk); if (opt) { iph->ihl += opt->optlen>>2; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 88c386cf7d85..ce63ab21b6cd 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -74,7 +74,8 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; - __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1); + __ip_select_ident(dev_net(rt->dst.dev), iph, + skb_shinfo(skb)->gso_segs ?: 1); err = ip_local_out_sk(sk, skb); if (unlikely(net_xmit_eval(err))) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 643ec0bb80a5..5859c0f5bd41 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1647,7 +1647,8 @@ static struct notifier_block ip_mr_notifier = { * important for multicast video. */ -static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) +static void ip_encap(struct net *net, struct sk_buff *skb, + __be32 saddr, __be32 daddr) { struct iphdr *iph; const struct iphdr *old_iph = ip_hdr(skb); @@ -1666,7 +1667,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) iph->protocol = IPPROTO_IPIP; iph->ihl = 5; iph->tot_len = htons(skb->len); - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); ip_send_check(iph); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1763,7 +1764,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * What do we do with netfilter? -- RR */ if (vif->flags & VIFF_TUNNEL) { - ip_encap(skb, vif->local, vif->remote); + ip_encap(net, skb, vif->local, vif->remote); /* FIXME: extra output firewall step used to be here. --RR */ vif->dev->stats.tx_packets++; vif->dev->stats.tx_bytes += skb->len; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index e43a585abb35..04700c745ca0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -399,7 +399,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, iph->check = 0; iph->tot_len = htons(length); if (!iph->id) - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 660848116761..858596c80e0e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -484,19 +484,19 @@ u32 ip_idents_reserve(u32 hash, int segs) } EXPORT_SYMBOL(ip_idents_reserve); -void __ip_select_ident(struct iphdr *iph, int segs) +void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) { - static u32 ip_idents_hashrnd __read_mostly; - static u32 ip_idents_hashrnd_extra __read_mostly; u32 hash, id; - net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); - net_get_random_once(&ip_idents_hashrnd_extra, sizeof(ip_idents_hashrnd_extra)); + /* Note the following code is not safe, but this is okay. */ + if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) + get_random_bytes(&net->ipv4.ip_id_key, + sizeof(net->ipv4.ip_id_key)); - hash = jhash_3words((__force u32)iph->daddr, + hash = siphash_3u32((__force u32)iph->daddr, (__force u32)iph->saddr, - iph->protocol ^ ip_idents_hashrnd_extra, - ip_idents_hashrnd); + iph->protocol, + &net->ipv4.ip_id_key); id = ip_idents_reserve(hash, segs); iph->id = htons(id); } diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 91771a7c802f..35feda676464 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -63,7 +63,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; - ip_select_ident(skb, NULL); + ip_select_ident(dev_net(dst->dev), skb, NULL); return 0; } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 3a4c1f93023a..1b99e1406186 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -103,7 +103,8 @@ static void _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) { const struct iphdr *iph = ip_hdr(skb); - u8 *xprth = skb_network_header(skb) + iph->ihl * 4; + int ihl = iph->ihl; + u8 *xprth = skb_network_header(skb) + ihl * 4; struct flowi4 *fl4 = &fl->u.ip4; int oif = 0; @@ -114,6 +115,11 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) fl4->flowi4_mark = skb->mark; fl4->flowi4_oif = reverse ? skb->skb_iif : oif; + fl4->flowi4_proto = iph->protocol; + fl4->daddr = reverse ? iph->saddr : iph->daddr; + fl4->saddr = reverse ? iph->daddr : iph->saddr; + fl4->flowi4_tos = iph->tos; + if (!ip_is_fragment(iph)) { switch (iph->protocol) { case IPPROTO_UDP: @@ -123,7 +129,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_DCCP: if (xprth + 4 < skb->data || pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ports = (__be16 *)xprth; + __be16 *ports; + + xprth = skb_network_header(skb) + ihl * 4; + ports = (__be16 *)xprth; fl4->fl4_sport = ports[!!reverse]; fl4->fl4_dport = ports[!reverse]; @@ -131,8 +140,12 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; case IPPROTO_ICMP: - if (pskb_may_pull(skb, xprth + 2 - skb->data)) { - u8 *icmp = xprth; + if (xprth + 2 < skb->data || + pskb_may_pull(skb, xprth + 2 - skb->data)) { + u8 *icmp; + + xprth = skb_network_header(skb) + ihl * 4; + icmp = xprth; fl4->fl4_icmp_type = icmp[0]; fl4->fl4_icmp_code = icmp[1]; @@ -140,33 +153,50 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; case IPPROTO_ESP: - if (pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be32 *ehdr = (__be32 *)xprth; + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { + __be32 *ehdr; + + xprth = skb_network_header(skb) + ihl * 4; + ehdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ehdr[0]; } break; case IPPROTO_AH: - if (pskb_may_pull(skb, xprth + 8 - skb->data)) { - __be32 *ah_hdr = (__be32 *)xprth; + if (xprth + 8 < skb->data || + pskb_may_pull(skb, xprth + 8 - skb->data)) { + __be32 *ah_hdr; + + xprth = skb_network_header(skb) + ihl * 4; + ah_hdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ah_hdr[1]; } break; case IPPROTO_COMP: - if (pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ipcomp_hdr = (__be16 *)xprth; + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { + __be16 *ipcomp_hdr; + + xprth = skb_network_header(skb) + ihl * 4; + ipcomp_hdr = (__be16 *)xprth; fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); } break; case IPPROTO_GRE: - if (pskb_may_pull(skb, xprth + 12 - skb->data)) { - __be16 *greflags = (__be16 *)xprth; - __be32 *gre_hdr = (__be32 *)xprth; + if (xprth + 12 < skb->data || + pskb_may_pull(skb, xprth + 12 - skb->data)) { + __be16 *greflags; + __be32 *gre_hdr; + + xprth = skb_network_header(skb) + ihl * 4; + greflags = (__be16 *)xprth; + gre_hdr = (__be32 *)xprth; if (greflags[0] & GRE_KEY) { if (greflags[0] & GRE_CSUM) @@ -181,10 +211,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; } } - fl4->flowi4_proto = iph->protocol; - fl4->daddr = reverse ? iph->saddr : iph->daddr; - fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos; } static inline int xfrm4_garbage_collect(struct dst_ops *ops) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f40ba684d69b..d8e8008aa7a4 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -94,16 +94,22 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) return fl; } +static void fl_free_rcu(struct rcu_head *head) +{ + struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu); + + if (fl->share == IPV6_FL_S_PROCESS) + put_pid(fl->owner.pid); + release_net(fl->fl_net); + kfree(fl->opt); + kfree(fl); +} + static void fl_free(struct ip6_flowlabel *fl) { - if (fl) { - if (fl->share == IPV6_FL_S_PROCESS) - put_pid(fl->owner.pid); - release_net(fl->fl_net); - kfree(fl->opt); - kfree_rcu(fl, rcu); - } + if (fl) + call_rcu(&fl->rcu, fl_free_rcu); } static void fl_release(struct ip6_flowlabel *fl) @@ -630,9 +636,9 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (fl1->share == IPV6_FL_S_EXCL || fl1->share != fl->share || ((fl1->share == IPV6_FL_S_PROCESS) && - (fl1->owner.pid == fl->owner.pid)) || + (fl1->owner.pid != fl->owner.pid)) || ((fl1->share == IPV6_FL_S_USER) && - uid_eq(fl1->owner.uid, fl->owner.uid))) + !uid_eq(fl1->owner.uid, fl->owner.uid))) goto release; err = -ENOMEM; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index bb98cde51476..1e68db035daf 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -538,23 +538,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } -static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) -{ - static u32 ip6_idents_hashrnd __read_mostly; - static u32 ip6_idents_hashrnd_extra __read_mostly; - u32 hash, id; - - net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); - net_get_random_once(&ip6_idents_hashrnd_extra, sizeof(ip6_idents_hashrnd_extra)); - - hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); - hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash); - hash = jhash_1word(hash, ip6_idents_hashrnd_extra); - - id = ip_idents_reserve(hash, 1); - fhdr->identification = htonl(id); -} - int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; @@ -649,7 +632,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb_reset_network_header(skb); memcpy(skb_network_header(skb), tmp_hdr, hlen); - ipv6_select_ident(fh, rt); + ipv6_select_ident(net, fh, rt); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(IP6_MF); @@ -802,7 +785,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) fh->nexthdr = nexthdr; fh->reserved = 0; if (!frag_id) { - ipv6_select_ident(fh, rt); + ipv6_select_ident(net, fh, rt); frag_id = fh->identification; } else fh->identification = frag_id; @@ -1096,7 +1079,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - sizeof(struct frag_hdr)) & ~7; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - ipv6_select_ident(&fhdr, rt); + ipv6_select_ident(sock_net(sk), &fhdr, rt); skb_shinfo(skb)->ip6_frag_id = fhdr.identification; append: diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 42978998534c..5733b05558a5 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1662,6 +1662,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns struct net *net = sock_net(sk); struct mr6_table *mrt; + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->inet_num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (mrt == NULL) return -ENOENT; @@ -1673,9 +1677,6 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns switch (optname) { case MRT6_INIT: - if (sk->sk_type != SOCK_RAW || - inet_sk(sk)->inet_num != IPPROTO_ICMPV6) - return -EOPNOTSUPP; if (optlen < sizeof(int)) return -EINVAL; @@ -1812,6 +1813,10 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, struct net *net = sock_net(sk); struct mr6_table *mrt; + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->inet_num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (mrt == NULL) return -ENOENT; diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index e32c1ff35f78..a8274abad231 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -9,6 +9,36 @@ #include #include +static u32 __ipv6_select_ident(struct net *net, + struct in6_addr *dst, struct in6_addr *src) +{ + const struct { + struct in6_addr dst; + struct in6_addr src; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .dst = *dst, + .src = *src, + }; + u32 hash, id; + + /* Note the following code is not safe, but this is okay. */ + if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) + get_random_bytes(&net->ipv4.ip_id_key, + sizeof(net->ipv4.ip_id_key)); + + hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key); + + /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve, + * set the hight order instead thus minimizing possible future + * collisions. + */ + id = ip_idents_reserve(hash, 1); + if (unlikely(!id)) + id = 1 << 31; + + return id; +} + /* This function exists only for tap drivers that must support broken * clients requesting UFO without specifying an IPv6 fragment ID. * @@ -17,12 +47,11 @@ * * The network header must be set before calling this. */ -void ipv6_proxy_select_ident(struct sk_buff *skb) +void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) { - static u32 ip6_proxy_idents_hashrnd __read_mostly; struct in6_addr buf[2]; struct in6_addr *addrs; - u32 hash, id; + u32 id; addrs = skb_header_pointer(skb, skb_network_offset(skb) + @@ -31,17 +60,21 @@ void ipv6_proxy_select_ident(struct sk_buff *skb) if (!addrs) return; - net_get_random_once(&ip6_proxy_idents_hashrnd, - sizeof(ip6_proxy_idents_hashrnd)); - - hash = __ipv6_addr_jhash(&addrs[1], ip6_proxy_idents_hashrnd); - hash = __ipv6_addr_jhash(&addrs[0], hash); - - id = ip_idents_reserve(hash, 1); + id = __ipv6_select_ident(net, &addrs[1], &addrs[0]); skb_shinfo(skb)->ip6_frag_id = htonl(id); } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); +void ipv6_select_ident(struct net *net, struct frag_hdr *fhdr, + struct rt6_info *rt) +{ + u32 id; + + id = __ipv6_select_ident(net, &rt->rt6i_dst.addr, &rt->rt6i_src.addr); + fhdr->identification = htonl(id); +} +EXPORT_SYMBOL(ipv6_select_ident); + int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { unsigned int offset = sizeof(struct ipv6hdr); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 70a0f92cbbc0..4370371e0aa6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1183,11 +1183,11 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; - newnp->mcast_oif = inet6_iif(skb); - newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); + newnp->mcast_oif = inet_iif(skb); + newnp->mcast_hops = ip_hdr(skb)->ttl; + newnp->rcv_flowinfo = 0; if (np->repflow) - newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); + newnp->flow_label = 0; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index fa9ea1a72f99..28d02353743f 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -75,6 +75,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + /* Set the IPv6 fragment id if not set yet */ + if (!skb_shinfo(skb)->ip6_frag_id) + ipv6_proxy_select_ident(dev_net(skb->dev), skb); + segs = NULL; goto out; } @@ -120,6 +124,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); fptr->nexthdr = nexthdr; fptr->reserved = 0; + if (!skb_shinfo(skb)->ip6_frag_id) + ipv6_proxy_select_ident(dev_net(skb->dev), skb); fptr->identification = skb_shinfo(skb)->ip6_frag_id; /* Fragment the skb. ipv6 header and the remaining fields of the diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 1c66465a42dd..f1491b0004cd 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -390,6 +390,10 @@ static void __exit xfrm6_tunnel_fini(void) xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6); xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); unregister_pernet_subsys(&xfrm6_tunnel_net_ops); + /* Someone maybe has gotten the xfrm6_tunnel_spi. + * So need to wait it. + */ + rcu_barrier(); kmem_cache_destroy(xfrm6_tunnel_spi_kmem); } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 661252aeb7e0..c8b5c8c355b4 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -217,8 +217,8 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) rcu_read_lock_bh(); list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (tunnel->tunnel_id == tunnel_id) { - l2tp_tunnel_inc_refcount(tunnel); + if (tunnel->tunnel_id == tunnel_id && + atomic_inc_not_zero(&tunnel->ref_count)) { rcu_read_unlock_bh(); return tunnel; @@ -238,8 +238,8 @@ struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth) rcu_read_lock_bh(); list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (++count > nth) { - l2tp_tunnel_inc_refcount(tunnel); + if (++count > nth && + atomic_inc_not_zero(&tunnel->ref_count)) { rcu_read_unlock_bh(); return tunnel; } @@ -997,7 +997,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct l2tp_tunnel *tunnel; - tunnel = l2tp_tunnel(sk); + tunnel = rcu_dereference_sk_user_data(sk); if (tunnel == NULL) goto pass_up; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 1ab5e1a51c98..607d8fdd5e68 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -735,7 +735,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) dir = sdata->vif.debugfs_dir; - if (!dir) + if (IS_ERR_OR_NULL(dir)) return; sprintf(buf, "netdev:%s", sdata->name); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 7abd786d9d89..44fa945ed6b4 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -813,7 +813,8 @@ int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { - struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + struct net *net = skb_net(skb); + struct netns_ipvs *ipvs = net_ipvs(net); struct rtable *rt; /* Route to the other host */ __be32 saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ @@ -882,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->daddr = cp->daddr.ip; iph->saddr = saddr; iph->ttl = old_iph->ttl; - ip_select_ident(skb, NULL); + ip_select_ident(net, skb, NULL); /* Another hack: avoid icmp_send in ip_fragment */ skb->ignore_df = 1; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index cdd47388723b..170681b21d25 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +53,7 @@ #include #include #include +#include #define NF_CONNTRACK_VERSION "0.5.0" @@ -232,6 +234,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, } EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); +/* Generate a almost-unique pseudo-id for a given conntrack. + * + * intentionally doesn't re-use any of the seeds used for hash + * table location, we assume id gets exposed to userspace. + * + * Following nf_conn items do not change throughout lifetime + * of the nf_conn after it has been committed to main hash table: + * + * 1. nf_conn address + * 2. nf_conn->ext address + * 3. nf_conn->master address (normally NULL) + * 4. tuple + * 5. the associated net namespace + */ +u32 nf_ct_get_id(const struct nf_conn *ct) +{ + static __read_mostly siphash_key_t ct_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); + + a = (unsigned long)ct; + b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct)); + c = (unsigned long)ct->ext; + d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash), + &ct_id_seed); +#ifdef CONFIG_64BIT + return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed); +#else + return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed); +#endif +} +EXPORT_SYMBOL_GPL(nf_ct_get_id); + static void clean_from_lists(struct nf_conn *ct) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index faa736bae869..b9bf8b0f4ec1 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -435,7 +436,9 @@ ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, const struct nf_conn *ct) static inline int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { - if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct))) + __be32 id = (__force __be32)nf_ct_get_id(ct); + + if (nla_put_be32(skb, CTA_ID, id)) goto nla_put_failure; return 0; @@ -1047,8 +1050,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, ct = nf_ct_tuplehash_to_ctrack(h); if (cda[CTA_ID]) { - u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); - if (id != (u32)(unsigned long)ct) { + __be32 id = nla_get_be32(cda[CTA_ID]); + + if (id != (__force __be32)nf_ct_get_id(ct)) { nf_ct_put(ct); return -ENOENT; } @@ -2321,6 +2325,25 @@ ctnetlink_exp_dump_mask(struct sk_buff *skb, static const union nf_inet_addr any_addr; +static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp) +{ + static __read_mostly siphash_key_t exp_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&exp_id_seed, sizeof(exp_id_seed)); + + a = (unsigned long)exp; + b = (unsigned long)exp->helper; + c = (unsigned long)exp->master; + d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed); + +#ifdef CONFIG_64BIT + return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed); +#else + return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed); +#endif +} + static int ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) @@ -2368,7 +2391,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, } #endif if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) || - nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) || + nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) || nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) goto nla_put_failure; @@ -2664,7 +2687,8 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); - if (ntohl(id) != (u32)(unsigned long)exp) { + + if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); return -ENOENT; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 20a844788443..0777d98d3dec 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2278,8 +2278,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) void *ph; DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); + unsigned char *addr = NULL; int tp_len, size_max; - unsigned char *addr; int len_sum = 0; int status = TP_STATUS_AVAILABLE; int hlen, tlen; @@ -2289,7 +2289,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; - addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -2299,10 +2298,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out_put; + if (po->sk.sk_socket->type == SOCK_DGRAM) { + if (dev && msg->msg_namelen < dev->addr_len + + offsetof(struct sockaddr_ll, sll_addr)) + goto out_put; + addr = saddr->sll_addr; + } } err = -ENXIO; @@ -2435,7 +2437,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) struct sk_buff *skb; struct net_device *dev; __be16 proto; - unsigned char *addr; + unsigned char *addr = NULL; int err, reserve = 0; struct virtio_net_hdr vnet_hdr = { 0 }; int offset = 0; @@ -2453,7 +2455,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; - addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -2461,10 +2462,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out_unlock; + if (sock->type == SOCK_DGRAM) { + if (dev && msg->msg_namelen < dev->addr_len + + offsetof(struct sockaddr_ll, sll_addr)) + goto out_unlock; + addr = saddr->sll_addr; + } } err = -ENXIO; @@ -3027,19 +3031,28 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + int copy_len; + /* If the address length field is there to be filled * in, we fill it in now. */ if (sock->type == SOCK_PACKET) { __sockaddr_check_size(sizeof(struct sockaddr_pkt)); msg->msg_namelen = sizeof(struct sockaddr_pkt); + copy_len = msg->msg_namelen; } else { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); + copy_len = msg->msg_namelen; + if (msg->msg_namelen < sizeof(struct sockaddr_ll)) { + memset(msg->msg_name + + offsetof(struct sockaddr_ll, sll_addr), + 0, sizeof(sll->sll_addr)); + msg->msg_namelen = sizeof(struct sockaddr_ll); + } } - memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, - msg->msg_namelen); + memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } if (pkt_sk(sk)->auxdata) { diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 344456206b70..d5199b25b821 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -16,15 +16,19 @@ #include static struct sk_buff_head loopback_queue; +#define ROSE_LOOPBACK_LIMIT 1000 static struct timer_list loopback_timer; static void rose_set_loopback_timer(void); +static void rose_loopback_timer(unsigned long); void rose_loopback_init(void) { skb_queue_head_init(&loopback_queue); init_timer(&loopback_timer); + loopback_timer.data = 0; + loopback_timer.function = &rose_loopback_timer; } static int rose_loopback_running(void) @@ -34,33 +38,27 @@ static int rose_loopback_running(void) int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh) { - struct sk_buff *skbn; - - skbn = skb_clone(skb, GFP_ATOMIC); + struct sk_buff *skbn = NULL; - kfree_skb(skb); + if (skb_queue_len(&loopback_queue) < ROSE_LOOPBACK_LIMIT) + skbn = skb_clone(skb, GFP_ATOMIC); - if (skbn != NULL) { + if (skbn) { + consume_skb(skb); skb_queue_tail(&loopback_queue, skbn); if (!rose_loopback_running()) rose_set_loopback_timer(); + } else { + kfree_skb(skb); } return 1; } -static void rose_loopback_timer(unsigned long); - static void rose_set_loopback_timer(void) { - del_timer(&loopback_timer); - - loopback_timer.data = 0; - loopback_timer.function = &rose_loopback_timer; - loopback_timer.expires = jiffies + 10; - - add_timer(&loopback_timer); + mod_timer(&loopback_timer, jiffies + 10); } static void rose_loopback_timer(unsigned long param) @@ -71,8 +69,12 @@ static void rose_loopback_timer(unsigned long param) struct sock *sk; unsigned short frametype; unsigned int lci_i, lci_o; + int count; - while ((skb = skb_dequeue(&loopback_queue)) != NULL) { + for (count = 0; count < ROSE_LOOPBACK_LIMIT; count++) { + skb = skb_dequeue(&loopback_queue); + if (!skb) + return; if (skb->len < ROSE_MIN_LEN) { kfree_skb(skb); continue; @@ -109,6 +111,8 @@ static void rose_loopback_timer(unsigned long param) kfree_skb(skb); } } + if (!skb_queue_empty(&loopback_queue)) + mod_timer(&loopback_timer, jiffies + 1); } void __exit rose_loopback_clear(void) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index cdfa7f4c0a59..4a5598a42cd6 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -50,6 +50,7 @@ static void cache_init(struct cache_head *h) h->last_refresh = now; } +static inline int cache_is_valid(struct cache_head *h); static void cache_fresh_locked(struct cache_head *head, time_t expiry); static void cache_fresh_unlocked(struct cache_head *head, struct cache_detail *detail); @@ -98,6 +99,8 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, *hp = tmp->next; tmp->next = NULL; detail->entries --; + if (cache_is_valid(tmp) == -EAGAIN) + set_bit(CACHE_NEGATIVE, &tmp->flags); cache_fresh_locked(tmp, 0); freeme = tmp; break; diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c index f3fef93325a8..a4125b069833 100644 --- a/net/tipc/sysctl.c +++ b/net/tipc/sysctl.c @@ -37,6 +37,7 @@ #include +static int one = 1; static struct ctl_table_header *tipc_ctl_hdr; static struct ctl_table tipc_table[] = { @@ -45,7 +46,8 @@ static struct ctl_table tipc_table[] = { .data = &sysctl_tipc_rmem, .maxlen = sizeof(sysctl_tipc_rmem), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, {} }; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 62fbbdc58566..b0f84a040f32 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1255,7 +1255,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) ret = verify_policy_dir(p->dir); if (ret) return ret; - if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir)) + if (p->index && (xfrm_policy_id2dir(p->index) != p->dir)) return -EINVAL; return 0; diff --git a/security/device_cgroup.c b/security/device_cgroup.c index d9d69e6930ed..8fee5f7be4f1 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -568,7 +568,7 @@ static int propagate_exception(struct dev_cgroup *devcg_root, devcg->behavior == DEVCG_DEFAULT_ALLOW) { rc = dev_exception_add(devcg, ex); if (rc) - break; + return rc; } else { /* * in the other possible cases: diff --git a/sound/core/init.c b/sound/core/init.c index 7bdfd19e24a8..e0d04b91d561 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -389,14 +389,7 @@ int snd_card_disconnect(struct snd_card *card) card->shutdown = 1; spin_unlock(&card->files_lock); - /* phase 1: disable fops (user space) operations for ALSA API */ - mutex_lock(&snd_card_mutex); - snd_cards[card->number] = NULL; - clear_bit(card->number, snd_cards_lock); - mutex_unlock(&snd_card_mutex); - - /* phase 2: replace file->f_op with special dummy operations */ - + /* replace file->f_op with special dummy operations */ spin_lock(&card->files_lock); list_for_each_entry(mfile, &card->files_list, list) { /* it's critical part, use endless loop */ @@ -412,7 +405,7 @@ int snd_card_disconnect(struct snd_card *card) } spin_unlock(&card->files_lock); - /* phase 3: notify all connected devices about disconnection */ + /* notify all connected devices about disconnection */ /* at this point, they cannot respond to any calls except release() */ #if IS_ENABLED(CONFIG_SND_MIXER_OSS) @@ -430,6 +423,13 @@ int snd_card_disconnect(struct snd_card *card) device_del(&card->card_dev); card->registered = false; } + + /* disable fops (user space) operations for ALSA API */ + mutex_lock(&snd_card_mutex); + snd_cards[card->number] = NULL; + clear_bit(card->number, snd_cards_lock); + mutex_unlock(&snd_card_mutex); + #ifdef CONFIG_PM wake_up(&card->power_sleep); #endif diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 41bae2baea71..f853b62befa1 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -951,6 +951,28 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) oss_frame_size = snd_pcm_format_physical_width(params_format(params)) * params_channels(params) / 8; + err = snd_pcm_oss_period_size(substream, params, sparams); + if (err < 0) + goto failure; + + n = snd_pcm_plug_slave_size(substream, runtime->oss.period_bytes / oss_frame_size); + err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, n, NULL); + if (err < 0) + goto failure; + + err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIODS, + runtime->oss.periods, NULL); + if (err < 0) + goto failure; + + snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); + + err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_HW_PARAMS, sparams); + if (err < 0) { + pcm_dbg(substream->pcm, "HW_PARAMS failed: %i\n", err); + goto failure; + } + #ifdef CONFIG_SND_PCM_OSS_PLUGINS snd_pcm_oss_plugin_clear(substream); if (!direct) { @@ -985,27 +1007,6 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) } #endif - err = snd_pcm_oss_period_size(substream, params, sparams); - if (err < 0) - goto failure; - - n = snd_pcm_plug_slave_size(substream, runtime->oss.period_bytes / oss_frame_size); - err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, n, NULL); - if (err < 0) - goto failure; - - err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIODS, - runtime->oss.periods, NULL); - if (err < 0) - goto failure; - - snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); - - if ((err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_HW_PARAMS, sparams)) < 0) { - pcm_dbg(substream->pcm, "HW_PARAMS failed: %i\n", err); - goto failure; - } - memset(sw_params, 0, sizeof(*sw_params)); if (runtime->oss.trigger) { sw_params->start_threshold = 1; diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 7c407ab2d82a..a0ee065a9f8c 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1063,8 +1063,15 @@ static int snd_pcm_pause(struct snd_pcm_substream *substream, int push) static int snd_pcm_pre_suspend(struct snd_pcm_substream *substream, int state) { struct snd_pcm_runtime *runtime = substream->runtime; - if (runtime->status->state == SNDRV_PCM_STATE_SUSPENDED) + switch (runtime->status->state) { + case SNDRV_PCM_STATE_SUSPENDED: return -EBUSY; + /* unresumable PCM state; return -EBUSY for skipping suspend */ + case SNDRV_PCM_STATE_OPEN: + case SNDRV_PCM_STATE_SETUP: + case SNDRV_PCM_STATE_DISCONNECTED: + return -EBUSY; + } runtime->trigger_master = substream; return 0; } diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 2bd5b8524181..09e05c0b59a1 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -601,6 +602,7 @@ static int __snd_rawmidi_info_select(struct snd_card *card, return -ENXIO; if (info->stream < 0 || info->stream > 1) return -EINVAL; + info->stream = array_index_nospec(info->stream, 2); pstr = &rmidi->streams[info->stream]; if (pstr->substream_count == 0) return -ENOENT; diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c index 8bf5335d953b..6ab2de7695d8 100644 --- a/sound/core/seq/oss/seq_oss_synth.c +++ b/sound/core/seq/oss/seq_oss_synth.c @@ -617,13 +617,14 @@ int snd_seq_oss_synth_make_info(struct seq_oss_devinfo *dp, int dev, struct synth_info *inf) { struct seq_oss_synth *rec; + struct seq_oss_synthinfo *info = get_synthinfo_nospec(dp, dev); - if (dev < 0 || dev >= dp->max_synthdev) + if (!info) return -ENXIO; - if (dp->synths[dev].is_midi) { + if (info->is_midi) { struct midi_info minf; - snd_seq_oss_midi_make_info(dp, dp->synths[dev].midi_mapped, &minf); + snd_seq_oss_midi_make_info(dp, info->midi_mapped, &minf); inf->synth_type = SYNTH_TYPE_MIDI; inf->synth_subtype = 0; inf->nr_voices = 16; diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 60fb2c708d75..f6396e012a0f 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1249,7 +1249,7 @@ static int snd_seq_ioctl_set_client_info(struct snd_seq_client *client, /* fill the info fields */ if (client_info.name[0]) - strlcpy(client->name, client_info.name, sizeof(client->name)); + strscpy(client->name, client_info.name, sizeof(client->name)); client->filter = client_info.filter; client->event_lost = client_info.event_lost; @@ -1564,7 +1564,7 @@ static int snd_seq_ioctl_create_queue(struct snd_seq_client *client, /* set queue name */ if (! info.name[0]) snprintf(info.name, sizeof(info.name), "Queue-%d", q->queue); - strlcpy(q->name, info.name, sizeof(q->name)); + strscpy(q->name, info.name, sizeof(q->name)); queuefree(q); if (copy_to_user(arg, &info, sizeof(info))) @@ -1642,7 +1642,7 @@ static int snd_seq_ioctl_set_queue_info(struct snd_seq_client *client, queuefree(q); return -EPERM; } - strlcpy(q->name, info.name, sizeof(q->name)); + strscpy(q->name, info.name, sizeof(q->name)); queuefree(q); return 0; diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 95b2df232d71..383aaaebad6c 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2065,7 +2065,7 @@ eval_type_str(unsigned long long val, const char *type, int pointer) return val & 0xffffffff; if (strcmp(type, "u64") == 0 || - strcmp(type, "s64")) + strcmp(type, "s64") == 0) return val; if (strcmp(type, "s8") == 0) diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index c81bd9a31db7..fd3a453a21cf 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -77,5 +77,6 @@ int test__perf_evsel__tp_sched_test(void) if (perf_evsel__test_field(evsel, "target_cpu", 4, true)) ret = -1; + perf_evsel__delete(evsel); return ret; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ad0c85a63c80..cc0b4c5f5c72 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2240,6 +2240,9 @@ static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, { struct kvm_device *dev = filp->private_data; + if (dev->kvm->mm != current->mm) + return -EIO; + switch (ioctl) { case KVM_SET_DEVICE_ATTR: return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg);