diff options
author | Mike Pagano <mpagano@gentoo.org> | 2022-09-05 08:03:09 -0400 |
---|---|---|
committer | Mike Pagano <mpagano@gentoo.org> | 2022-09-05 08:03:09 -0400 |
commit | 3ec134904b0cebc13533ac81b0841332b1dd2af1 (patch) | |
tree | e628246bc60bfa1fc366d6bf8cde5e0d12dd6d44 | |
parent | Linux patch 5.15.64 (diff) | |
download | linux-patches-3ec134904b0cebc13533ac81b0841332b1dd2af1.tar.gz linux-patches-3ec134904b0cebc13533ac81b0841332b1dd2af1.tar.bz2 linux-patches-3ec134904b0cebc13533ac81b0841332b1dd2af1.zip |
Linux patch 5.15.655.15-69
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 1064_linux-5.15.65.patch | 4302 |
2 files changed, 4306 insertions, 0 deletions
diff --git a/0000_README b/0000_README index 4b74f06a..079ad2a6 100644 --- a/0000_README +++ b/0000_README @@ -299,6 +299,10 @@ Patch: 1063_linux-5.15.64.patch From: http://www.kernel.org Desc: Linux 5.15.64 +Patch: 1064_linux-5.15.65.patch +From: http://www.kernel.org +Desc: Linux 5.15.65 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1064_linux-5.15.65.patch b/1064_linux-5.15.65.patch new file mode 100644 index 00000000..c209655f --- /dev/null +++ b/1064_linux-5.15.65.patch @@ -0,0 +1,4302 @@ +diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst +index 7c1750bcc5bd8..46644736e5835 100644 +--- a/Documentation/arm64/silicon-errata.rst ++++ b/Documentation/arm64/silicon-errata.rst +@@ -92,6 +92,8 @@ stable kernels. + +----------------+-----------------+-----------------+-----------------------------+ + | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 | + +----------------+-----------------+-----------------+-----------------------------+ ++| ARM | Cortex-A510 | #2441009 | ARM64_ERRATUM_2441009 | +++----------------+-----------------+-----------------+-----------------------------+ + | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | + +----------------+-----------------+-----------------+-----------------------------+ + | ARM | Neoverse-N1 | #1349291 | N/A | +diff --git a/Makefile b/Makefile +index b2b65f7c168c5..9142dbf41f0d8 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 5 + PATCHLEVEL = 15 +-SUBLEVEL = 64 ++SUBLEVEL = 65 + EXTRAVERSION = + NAME = Trick or Treat + +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index 69e7e293f72e4..9d80c783142f9 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -666,6 +666,23 @@ config ARM64_ERRATUM_1508412 + + If unsure, say Y. + ++config ARM64_ERRATUM_2441009 ++ bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI" ++ default y ++ select ARM64_WORKAROUND_REPEAT_TLBI ++ help ++ This option adds a workaround for ARM Cortex-A510 erratum #2441009. ++ ++ Under very rare circumstances, affected Cortex-A510 CPUs ++ may not handle a race between a break-before-make sequence on one ++ CPU, and another CPU accessing the same page. This could allow a ++ store to a page that has been unmapped. ++ ++ Work around this by adding the affected CPUs to the list that needs ++ TLB sequences to be done twice. ++ ++ If unsure, say Y. ++ + config CAVIUM_ERRATUM_22375 + bool "Cavium erratum 22375, 24313" + default y +diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c +index 292a3091b5dee..23c57e0a7fd14 100644 +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -213,6 +213,12 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { + /* Kryo4xx Gold (rcpe to rfpe) => (r0p0 to r3p0) */ + ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe), + }, ++#endif ++#ifdef CONFIG_ARM64_ERRATUM_2441009 ++ { ++ /* Cortex-A510 r0p0 -> r1p1. Fixed in r1p2 */ ++ ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1), ++ }, + #endif + {}, + }; +@@ -429,7 +435,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { + #endif + #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI + { +- .desc = "Qualcomm erratum 1009, or ARM erratum 1286807", ++ .desc = "Qualcomm erratum 1009, or ARM erratum 1286807, 2441009", + .capability = ARM64_WORKAROUND_REPEAT_TLBI, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = cpucap_multi_entry_cap_matches, +diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c +index f0bc4dc3e9bf0..6511d15ace45e 100644 +--- a/arch/s390/hypfs/hypfs_diag.c ++++ b/arch/s390/hypfs/hypfs_diag.c +@@ -437,7 +437,7 @@ __init int hypfs_diag_init(void) + int rc; + + if (diag204_probe()) { +- pr_err("The hardware system does not support hypfs\n"); ++ pr_info("The hardware system does not support hypfs\n"); + return -ENODATA; + } + +diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c +index 5c97f48cea91d..ee919bfc81867 100644 +--- a/arch/s390/hypfs/inode.c ++++ b/arch/s390/hypfs/inode.c +@@ -496,9 +496,9 @@ fail_hypfs_sprp_exit: + hypfs_vm_exit(); + fail_hypfs_diag_exit: + hypfs_diag_exit(); ++ pr_err("Initialization of hypfs failed with rc=%i\n", rc); + fail_dbfs_exit: + hypfs_dbfs_exit(); +- pr_err("Initialization of hypfs failed with rc=%i\n", rc); + return rc; + } + device_initcall(hypfs_init) +diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c +index 95105db642b98..155bbabcc6f5d 100644 +--- a/drivers/acpi/thermal.c ++++ b/drivers/acpi/thermal.c +@@ -1098,8 +1098,6 @@ static int acpi_thermal_resume(struct device *dev) + return -EINVAL; + + for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) { +- if (!(&tz->trips.active[i])) +- break; + if (!tz->trips.active[i].flags.valid) + break; + tz->trips.active[i].flags.enabled = 1; +diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c +index bd827533e7e83..f2d9587833d4b 100644 +--- a/drivers/android/binder_alloc.c ++++ b/drivers/android/binder_alloc.c +@@ -315,12 +315,19 @@ static inline void binder_alloc_set_vma(struct binder_alloc *alloc, + { + unsigned long vm_start = 0; + ++ /* ++ * Allow clearing the vma with holding just the read lock to allow ++ * munmapping downgrade of the write lock before freeing and closing the ++ * file using binder_alloc_vma_close(). ++ */ + if (vma) { + vm_start = vma->vm_start; + alloc->vma_vm_mm = vma->vm_mm; ++ mmap_assert_write_locked(alloc->vma_vm_mm); ++ } else { ++ mmap_assert_locked(alloc->vma_vm_mm); + } + +- mmap_assert_write_locked(alloc->vma_vm_mm); + alloc->vma_addr = vm_start; + } + +diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c +index 9631f2fd2faf7..38e8767ec3715 100644 +--- a/drivers/dma-buf/udmabuf.c ++++ b/drivers/dma-buf/udmabuf.c +@@ -368,7 +368,23 @@ static struct miscdevice udmabuf_misc = { + + static int __init udmabuf_dev_init(void) + { +- return misc_register(&udmabuf_misc); ++ int ret; ++ ++ ret = misc_register(&udmabuf_misc); ++ if (ret < 0) { ++ pr_err("Could not initialize udmabuf device\n"); ++ return ret; ++ } ++ ++ ret = dma_coerce_mask_and_coherent(udmabuf_misc.this_device, ++ DMA_BIT_MASK(64)); ++ if (ret < 0) { ++ pr_err("Could not setup DMA mask for udmabuf device\n"); ++ misc_deregister(&udmabuf_misc); ++ return ret; ++ } ++ ++ return 0; + } + + static void __exit udmabuf_dev_exit(void) +diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c +index 5654c5e9862b1..037db21de510c 100644 +--- a/drivers/firmware/tegra/bpmp.c ++++ b/drivers/firmware/tegra/bpmp.c +@@ -201,7 +201,7 @@ static ssize_t __tegra_bpmp_channel_read(struct tegra_bpmp_channel *channel, + int err; + + if (data && size > 0) +- memcpy(data, channel->ib->data, size); ++ memcpy_fromio(data, channel->ib->data, size); + + err = tegra_bpmp_ack_response(channel); + if (err < 0) +@@ -245,7 +245,7 @@ static ssize_t __tegra_bpmp_channel_write(struct tegra_bpmp_channel *channel, + channel->ob->flags = flags; + + if (data && size > 0) +- memcpy(channel->ob->data, data, size); ++ memcpy_toio(channel->ob->data, data, size); + + return tegra_bpmp_post_request(channel); + } +@@ -420,7 +420,7 @@ void tegra_bpmp_mrq_return(struct tegra_bpmp_channel *channel, int code, + channel->ob->code = code; + + if (data && size > 0) +- memcpy(channel->ob->data, data, size); ++ memcpy_toio(channel->ob->data, data, size); + + err = tegra_bpmp_post_response(channel); + if (WARN_ON(err < 0)) +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index 5f95d03fd46a0..4f62f422bcb78 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -312,7 +312,7 @@ enum amdgpu_kiq_irq { + AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, + AMDGPU_CP_KIQ_IRQ_LAST + }; +- ++#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ + #define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ + #define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ + #define MAX_KIQ_REG_TRY 1000 +diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +index 93a4da4284ede..9c07ec8b97327 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +@@ -414,6 +414,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint32_t seq; + uint16_t queried_pasid; + bool ret; ++ u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + +@@ -432,7 +433,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); +- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); ++ r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); + if (r < 1) { + dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); + return -ETIME; +diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +index 0e731016921be..70d24b522df8d 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +@@ -863,6 +863,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint32_t seq; + uint16_t queried_pasid; + bool ret; ++ u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + +@@ -902,7 +903,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); +- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); ++ r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); + if (r < 1) { + dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); + up_read(&adev->reset_sem); +diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +index 054823d12403d..5f1b735da5063 100644 +--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c ++++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +@@ -545,9 +545,11 @@ static void dce112_get_pix_clk_dividers_helper ( + switch (pix_clk_params->color_depth) { + case COLOR_DEPTH_101010: + actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 5) >> 2; ++ actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10; + break; + case COLOR_DEPTH_121212: + actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 6) >> 2; ++ actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10; + break; + case COLOR_DEPTH_161616: + actual_pixel_clock_100hz = actual_pixel_clock_100hz * 2; +diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +index 11019c2c62ccb..8192f1967e924 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +@@ -126,6 +126,12 @@ struct mpcc *mpc1_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id) + while (tmp_mpcc != NULL) { + if (tmp_mpcc->dpp_id == dpp_id) + return tmp_mpcc; ++ ++ /* avoid circular linked list */ ++ ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot); ++ if (tmp_mpcc == tmp_mpcc->mpcc_bot) ++ break; ++ + tmp_mpcc = tmp_mpcc->mpcc_bot; + } + return NULL; +diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +index 37848f4577b18..92fee47278e5a 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +@@ -480,6 +480,11 @@ void optc1_enable_optc_clock(struct timing_generator *optc, bool enable) + OTG_CLOCK_ON, 1, + 1, 1000); + } else { ++ ++ //last chance to clear underflow, otherwise, it will always there due to clock is off. ++ if (optc->funcs->is_optc_underflow_occurred(optc) == true) ++ optc->funcs->clear_optc_underflow(optc); ++ + REG_UPDATE_2(OTG_CLOCK_CONTROL, + OTG_CLOCK_GATE_DIS, 0, + OTG_CLOCK_EN, 0); +diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +index 947eb0df3f125..142fc0a3a536c 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +@@ -532,6 +532,12 @@ struct mpcc *mpc2_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id) + while (tmp_mpcc != NULL) { + if (tmp_mpcc->dpp_id == 0xf || tmp_mpcc->dpp_id == dpp_id) + return tmp_mpcc; ++ ++ /* avoid circular linked list */ ++ ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot); ++ if (tmp_mpcc == tmp_mpcc->mpcc_bot) ++ break; ++ + tmp_mpcc = tmp_mpcc->mpcc_bot; + } + return NULL; +diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c +index 36044cb8ec834..1c0f56d8ba8bb 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c +@@ -67,9 +67,15 @@ static uint32_t convert_and_clamp( + void dcn21_dchvm_init(struct hubbub *hubbub) + { + struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); +- uint32_t riommu_active; ++ uint32_t riommu_active, prefetch_done; + int i; + ++ REG_GET(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, &prefetch_done); ++ ++ if (prefetch_done) { ++ hubbub->riommu_active = true; ++ return; ++ } + //Init DCHVM block + REG_UPDATE(DCHVM_CTRL0, HOSTVM_INIT_REQ, 1); + +diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c +index f246125232482..33c2337c4edf3 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c +@@ -86,7 +86,7 @@ bool hubp3_program_surface_flip_and_addr( + VMID, address->vmid); + + if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) { +- REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1); ++ REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0); + REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1); + + } else { +diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +index b99aa232bd8b1..4bee6d018bfa9 100644 +--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c ++++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +@@ -567,10 +567,6 @@ static void build_vrr_infopacket_data_v1(const struct mod_vrr_params *vrr, + * Note: We should never go above the field rate of the mode timing set. + */ + infopacket->sb[8] = (unsigned char)((vrr->max_refresh_in_uhz + 500000) / 1000000); +- +- /* FreeSync HDR */ +- infopacket->sb[9] = 0; +- infopacket->sb[10] = 0; + } + + static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr, +@@ -638,10 +634,6 @@ static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr, + + /* PB16 : Reserved bits 7:1, FixedRate bit 0 */ + infopacket->sb[16] = (vrr->state == VRR_STATE_ACTIVE_FIXED) ? 1 : 0; +- +- //FreeSync HDR +- infopacket->sb[9] = 0; +- infopacket->sb[10] = 0; + } + + static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf, +@@ -726,8 +718,7 @@ static void build_vrr_infopacket_header_v2(enum signal_type signal, + /* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length = 0x09] */ + infopacket->hb2 = 0x09; + +- *payload_size = 0x0A; +- ++ *payload_size = 0x09; + } else if (dc_is_dp_signal(signal)) { + + /* HEADER */ +@@ -776,9 +767,9 @@ static void build_vrr_infopacket_header_v3(enum signal_type signal, + infopacket->hb1 = version; + + /* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length] */ +- *payload_size = 0x10; +- infopacket->hb2 = *payload_size - 1; //-1 for checksum ++ infopacket->hb2 = 0x10; + ++ *payload_size = 0x10; + } else if (dc_is_dp_signal(signal)) { + + /* HEADER */ +diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +index 918d5c7c2328b..79976921dc46f 100644 +--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c ++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +@@ -3915,6 +3915,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { + .dump_pptable = sienna_cichlid_dump_pptable, + .init_microcode = smu_v11_0_init_microcode, + .load_microcode = smu_v11_0_load_microcode, ++ .fini_microcode = smu_v11_0_fini_microcode, + .init_smc_tables = sienna_cichlid_init_smc_tables, + .fini_smc_tables = smu_v11_0_fini_smc_tables, + .init_power = smu_v11_0_init_power, +diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c +index 3a76000d15bfd..ed8ad3b263959 100644 +--- a/drivers/gpu/drm/i915/gt/intel_gt.c ++++ b/drivers/gpu/drm/i915/gt/intel_gt.c +@@ -949,6 +949,9 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + ++ if (intel_gt_is_wedged(gt)) ++ return; ++ + if (GRAPHICS_VER(i915) == 12) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); +diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig +index 345a5570a3da7..e2c147d4015ef 100644 +--- a/drivers/gpu/drm/vc4/Kconfig ++++ b/drivers/gpu/drm/vc4/Kconfig +@@ -5,6 +5,7 @@ config DRM_VC4 + depends on DRM + depends on SND && SND_SOC + depends on COMMON_CLK ++ depends on PM + select DRM_KMS_HELPER + select DRM_KMS_CMA_HELPER + select DRM_GEM_CMA_HELPER +diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c +index 10cf623d28303..9b3e642a08e1a 100644 +--- a/drivers/gpu/drm/vc4/vc4_hdmi.c ++++ b/drivers/gpu/drm/vc4/vc4_hdmi.c +@@ -2122,7 +2122,7 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi) + return 0; + } + +-static int __maybe_unused vc4_hdmi_runtime_suspend(struct device *dev) ++static int vc4_hdmi_runtime_suspend(struct device *dev) + { + struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); + +@@ -2219,17 +2219,15 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) + if (ret) + goto err_put_ddc; + ++ pm_runtime_enable(dev); ++ + /* +- * We need to have the device powered up at this point to call +- * our reset hook and for the CEC init. ++ * We need to have the device powered up at this point to call ++ * our reset hook and for the CEC init. + */ +- ret = vc4_hdmi_runtime_resume(dev); ++ ret = pm_runtime_resume_and_get(dev); + if (ret) +- goto err_put_ddc; +- +- pm_runtime_get_noresume(dev); +- pm_runtime_set_active(dev); +- pm_runtime_enable(dev); ++ goto err_disable_runtime_pm; + + if (vc4_hdmi->variant->reset) + vc4_hdmi->variant->reset(vc4_hdmi); +@@ -2278,6 +2276,7 @@ err_destroy_conn: + err_destroy_encoder: + drm_encoder_cleanup(encoder); + pm_runtime_put_sync(dev); ++err_disable_runtime_pm: + pm_runtime_disable(dev); + err_put_ddc: + put_device(&vc4_hdmi->ddc->dev); +diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +index 13a4db42cd7a7..f17f061aeb792 100644 +--- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c ++++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +@@ -281,11 +281,29 @@ static int amd_sfh_irq_init(struct amd_mp2_dev *privdata) + return 0; + } + ++static const struct dmi_system_id dmi_nodevs[] = { ++ { ++ /* ++ * Google Chromebooks use Chrome OS Embedded Controller Sensor ++ * Hub instead of Sensor Hub Fusion and leaves MP2 ++ * uninitialized, which disables all functionalities, even ++ * including the registers necessary for feature detections. ++ */ ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Google"), ++ }, ++ }, ++ { } ++}; ++ + static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) + { + struct amd_mp2_dev *privdata; + int rc; + ++ if (dmi_first_match(dmi_nodevs)) ++ return -ENODEV; ++ + privdata = devm_kzalloc(&pdev->dev, sizeof(*privdata), GFP_KERNEL); + if (!privdata) + return -ENOMEM; +diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c +index 08c9a9a60ae47..b59c3dafa6a48 100644 +--- a/drivers/hid/hid-asus.c ++++ b/drivers/hid/hid-asus.c +@@ -1212,6 +1212,13 @@ static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc, + rdesc = new_rdesc; + } + ++ if (drvdata->quirks & QUIRK_ROG_NKEY_KEYBOARD && ++ *rsize == 331 && rdesc[190] == 0x85 && rdesc[191] == 0x5a && ++ rdesc[204] == 0x95 && rdesc[205] == 0x05) { ++ hid_info(hdev, "Fixing up Asus N-KEY keyb report descriptor\n"); ++ rdesc[205] = 0x01; ++ } ++ + return rdesc; + } + +diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h +index ceaa36fc429ef..cb2b48d6915ee 100644 +--- a/drivers/hid/hid-ids.h ++++ b/drivers/hid/hid-ids.h +@@ -399,6 +399,7 @@ + #define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706 + #define I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN 0x261A + #define I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN 0x2A1C ++#define I2C_DEVICE_ID_LENOVO_YOGA_C630_TOUCHSCREEN 0x279F + + #define USB_VENDOR_ID_ELECOM 0x056e + #define USB_DEVICE_ID_ELECOM_BM084 0x0061 +diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c +index 125043a28a35c..f197aed6444a5 100644 +--- a/drivers/hid/hid-input.c ++++ b/drivers/hid/hid-input.c +@@ -335,6 +335,8 @@ static const struct hid_device_id hid_battery_quirks[] = { + HID_BATTERY_QUIRK_IGNORE }, + { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN), + HID_BATTERY_QUIRK_IGNORE }, ++ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_LENOVO_YOGA_C630_TOUCHSCREEN), ++ HID_BATTERY_QUIRK_IGNORE }, + {} + }; + +diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c +index a3b151b29bd71..fc616db4231bb 100644 +--- a/drivers/hid/hid-steam.c ++++ b/drivers/hid/hid-steam.c +@@ -134,6 +134,11 @@ static int steam_recv_report(struct steam_device *steam, + int ret; + + r = steam->hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0]; ++ if (!r) { ++ hid_err(steam->hdev, "No HID_FEATURE_REPORT submitted - nothing to read\n"); ++ return -EINVAL; ++ } ++ + if (hid_report_len(r) < 64) + return -EINVAL; + +@@ -165,6 +170,11 @@ static int steam_send_report(struct steam_device *steam, + int ret; + + r = steam->hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0]; ++ if (!r) { ++ hid_err(steam->hdev, "No HID_FEATURE_REPORT submitted - nothing to read\n"); ++ return -EINVAL; ++ } ++ + if (hid_report_len(r) < 64) + return -EINVAL; + +diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c +index a28c3e5756506..2221bc26e611a 100644 +--- a/drivers/hid/hid-thrustmaster.c ++++ b/drivers/hid/hid-thrustmaster.c +@@ -67,12 +67,13 @@ static const struct tm_wheel_info tm_wheels_infos[] = { + {0x0200, 0x0005, "Thrustmaster T300RS (Missing Attachment)"}, + {0x0206, 0x0005, "Thrustmaster T300RS"}, + {0x0209, 0x0005, "Thrustmaster T300RS (Open Wheel Attachment)"}, ++ {0x020a, 0x0005, "Thrustmaster T300RS (Sparco R383 Mod)"}, + {0x0204, 0x0005, "Thrustmaster T300 Ferrari Alcantara Edition"}, + {0x0002, 0x0002, "Thrustmaster T500RS"} + //{0x0407, 0x0001, "Thrustmaster TMX"} + }; + +-static const uint8_t tm_wheels_infos_length = 4; ++static const uint8_t tm_wheels_infos_length = 7; + + /* + * This structs contains (in little endian) the response data +diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c +index 79faac87a06ff..11b0ed4f3f8cc 100644 +--- a/drivers/hid/hidraw.c ++++ b/drivers/hid/hidraw.c +@@ -346,10 +346,13 @@ static int hidraw_release(struct inode * inode, struct file * file) + unsigned int minor = iminor(inode); + struct hidraw_list *list = file->private_data; + unsigned long flags; ++ int i; + + mutex_lock(&minors_lock); + + spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags); ++ for (i = list->tail; i < list->head; i++) ++ kfree(list->buffer[i].value); + list_del(&list->node); + spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags); + kfree(list); +diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c +index 3cf334c46c312..3248b48f37f61 100644 +--- a/drivers/hv/hv_balloon.c ++++ b/drivers/hv/hv_balloon.c +@@ -17,6 +17,7 @@ + #include <linux/slab.h> + #include <linux/kthread.h> + #include <linux/completion.h> ++#include <linux/count_zeros.h> + #include <linux/memory_hotplug.h> + #include <linux/memory.h> + #include <linux/notifier.h> +@@ -1130,6 +1131,7 @@ static void post_status(struct hv_dynmem_device *dm) + struct dm_status status; + unsigned long now = jiffies; + unsigned long last_post = last_post_time; ++ unsigned long num_pages_avail, num_pages_committed; + + if (pressure_report_delay > 0) { + --pressure_report_delay; +@@ -1154,16 +1156,21 @@ static void post_status(struct hv_dynmem_device *dm) + * num_pages_onlined) as committed to the host, otherwise it can try + * asking us to balloon them out. + */ +- status.num_avail = si_mem_available(); +- status.num_committed = vm_memory_committed() + ++ num_pages_avail = si_mem_available(); ++ num_pages_committed = vm_memory_committed() + + dm->num_pages_ballooned + + (dm->num_pages_added > dm->num_pages_onlined ? + dm->num_pages_added - dm->num_pages_onlined : 0) + + compute_balloon_floor(); + +- trace_balloon_status(status.num_avail, status.num_committed, ++ trace_balloon_status(num_pages_avail, num_pages_committed, + vm_memory_committed(), dm->num_pages_ballooned, + dm->num_pages_added, dm->num_pages_onlined); ++ ++ /* Convert numbers of pages into numbers of HV_HYP_PAGEs. */ ++ status.num_avail = num_pages_avail * NR_HV_HYP_PAGES_IN_PAGE; ++ status.num_committed = num_pages_committed * NR_HV_HYP_PAGES_IN_PAGE; ++ + /* + * If our transaction ID is no longer current, just don't + * send the status. This can happen if we were interrupted +diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c +index fccd1798445d5..d22ce328a2797 100644 +--- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c ++++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c +@@ -2610,6 +2610,7 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf, + del_timer_sync(&hdw->encoder_run_timer); + del_timer_sync(&hdw->encoder_wait_timer); + flush_work(&hdw->workpoll); ++ v4l2_device_unregister(&hdw->v4l2_dev); + usb_free_urb(hdw->ctl_read_urb); + usb_free_urb(hdw->ctl_write_urb); + kfree(hdw->ctl_read_buffer); +diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c +index f9b2897569bb4..99d8881a7d6c2 100644 +--- a/drivers/mmc/host/mtk-sd.c ++++ b/drivers/mmc/host/mtk-sd.c +@@ -2345,6 +2345,9 @@ static void msdc_cqe_disable(struct mmc_host *mmc, bool recovery) + /* disable busy check */ + sdr_clr_bits(host->base + MSDC_PATCH_BIT1, MSDC_PB1_BUSY_CHECK_SEL); + ++ val = readl(host->base + MSDC_INT); ++ writel(val, host->base + MSDC_INT); ++ + if (recovery) { + sdr_set_field(host->base + MSDC_DMA_CTRL, + MSDC_DMA_CTRL_STOP, 1); +@@ -2785,11 +2788,14 @@ static int __maybe_unused msdc_suspend(struct device *dev) + { + struct mmc_host *mmc = dev_get_drvdata(dev); + int ret; ++ u32 val; + + if (mmc->caps2 & MMC_CAP2_CQE) { + ret = cqhci_suspend(mmc); + if (ret) + return ret; ++ val = readl(((struct msdc_host *)mmc_priv(mmc))->base + MSDC_INT); ++ writel(val, ((struct msdc_host *)mmc_priv(mmc))->base + MSDC_INT); + } + + return pm_runtime_force_suspend(dev); +diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c +index bac874ab0b33a..335c88fd849c4 100644 +--- a/drivers/mmc/host/sdhci-of-dwcmshc.c ++++ b/drivers/mmc/host/sdhci-of-dwcmshc.c +@@ -15,6 +15,7 @@ + #include <linux/module.h> + #include <linux/of.h> + #include <linux/of_device.h> ++#include <linux/reset.h> + #include <linux/sizes.h> + + #include "sdhci-pltfm.h" +@@ -55,14 +56,15 @@ + #define DLL_LOCK_WO_TMOUT(x) \ + ((((x) & DWCMSHC_EMMC_DLL_LOCKED) == DWCMSHC_EMMC_DLL_LOCKED) && \ + (((x) & DWCMSHC_EMMC_DLL_TIMEOUT) == 0)) +-#define RK3568_MAX_CLKS 3 ++#define RK35xx_MAX_CLKS 3 + + #define BOUNDARY_OK(addr, len) \ + ((addr | (SZ_128M - 1)) == ((addr + len - 1) | (SZ_128M - 1))) + +-struct rk3568_priv { ++struct rk35xx_priv { + /* Rockchip specified optional clocks */ +- struct clk_bulk_data rockchip_clks[RK3568_MAX_CLKS]; ++ struct clk_bulk_data rockchip_clks[RK35xx_MAX_CLKS]; ++ struct reset_control *reset; + u8 txclk_tapnum; + }; + +@@ -176,7 +178,7 @@ static void dwcmshc_rk3568_set_clock(struct sdhci_host *host, unsigned int clock + { + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct dwcmshc_priv *dwc_priv = sdhci_pltfm_priv(pltfm_host); +- struct rk3568_priv *priv = dwc_priv->priv; ++ struct rk35xx_priv *priv = dwc_priv->priv; + u8 txclk_tapnum = DLL_TXCLK_TAPNUM_DEFAULT; + u32 extra, reg; + int err; +@@ -255,6 +257,21 @@ static void dwcmshc_rk3568_set_clock(struct sdhci_host *host, unsigned int clock + sdhci_writel(host, extra, DWCMSHC_EMMC_DLL_STRBIN); + } + ++static void rk35xx_sdhci_reset(struct sdhci_host *host, u8 mask) ++{ ++ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); ++ struct dwcmshc_priv *dwc_priv = sdhci_pltfm_priv(pltfm_host); ++ struct rk35xx_priv *priv = dwc_priv->priv; ++ ++ if (mask & SDHCI_RESET_ALL && priv->reset) { ++ reset_control_assert(priv->reset); ++ udelay(1); ++ reset_control_deassert(priv->reset); ++ } ++ ++ sdhci_reset(host, mask); ++} ++ + static const struct sdhci_ops sdhci_dwcmshc_ops = { + .set_clock = sdhci_set_clock, + .set_bus_width = sdhci_set_bus_width, +@@ -264,12 +281,12 @@ static const struct sdhci_ops sdhci_dwcmshc_ops = { + .adma_write_desc = dwcmshc_adma_write_desc, + }; + +-static const struct sdhci_ops sdhci_dwcmshc_rk3568_ops = { ++static const struct sdhci_ops sdhci_dwcmshc_rk35xx_ops = { + .set_clock = dwcmshc_rk3568_set_clock, + .set_bus_width = sdhci_set_bus_width, + .set_uhs_signaling = dwcmshc_set_uhs_signaling, + .get_max_clock = sdhci_pltfm_clk_get_max_clock, +- .reset = sdhci_reset, ++ .reset = rk35xx_sdhci_reset, + .adma_write_desc = dwcmshc_adma_write_desc, + }; + +@@ -279,30 +296,46 @@ static const struct sdhci_pltfm_data sdhci_dwcmshc_pdata = { + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, + }; + +-static const struct sdhci_pltfm_data sdhci_dwcmshc_rk3568_pdata = { +- .ops = &sdhci_dwcmshc_rk3568_ops, ++#ifdef CONFIG_ACPI ++static const struct sdhci_pltfm_data sdhci_dwcmshc_bf3_pdata = { ++ .ops = &sdhci_dwcmshc_ops, ++ .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN, ++ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | ++ SDHCI_QUIRK2_ACMD23_BROKEN, ++}; ++#endif ++ ++static const struct sdhci_pltfm_data sdhci_dwcmshc_rk35xx_pdata = { ++ .ops = &sdhci_dwcmshc_rk35xx_ops, + .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN | + SDHCI_QUIRK_BROKEN_TIMEOUT_VAL, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | + SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN, + }; + +-static int dwcmshc_rk3568_init(struct sdhci_host *host, struct dwcmshc_priv *dwc_priv) ++static int dwcmshc_rk35xx_init(struct sdhci_host *host, struct dwcmshc_priv *dwc_priv) + { + int err; +- struct rk3568_priv *priv = dwc_priv->priv; ++ struct rk35xx_priv *priv = dwc_priv->priv; ++ ++ priv->reset = devm_reset_control_array_get_optional_exclusive(mmc_dev(host->mmc)); ++ if (IS_ERR(priv->reset)) { ++ err = PTR_ERR(priv->reset); ++ dev_err(mmc_dev(host->mmc), "failed to get reset control %d\n", err); ++ return err; ++ } + + priv->rockchip_clks[0].id = "axi"; + priv->rockchip_clks[1].id = "block"; + priv->rockchip_clks[2].id = "timer"; +- err = devm_clk_bulk_get_optional(mmc_dev(host->mmc), RK3568_MAX_CLKS, ++ err = devm_clk_bulk_get_optional(mmc_dev(host->mmc), RK35xx_MAX_CLKS, + priv->rockchip_clks); + if (err) { + dev_err(mmc_dev(host->mmc), "failed to get clocks %d\n", err); + return err; + } + +- err = clk_bulk_prepare_enable(RK3568_MAX_CLKS, priv->rockchip_clks); ++ err = clk_bulk_prepare_enable(RK35xx_MAX_CLKS, priv->rockchip_clks); + if (err) { + dev_err(mmc_dev(host->mmc), "failed to enable clocks %d\n", err); + return err; +@@ -324,7 +357,7 @@ static int dwcmshc_rk3568_init(struct sdhci_host *host, struct dwcmshc_priv *dwc + static const struct of_device_id sdhci_dwcmshc_dt_ids[] = { + { + .compatible = "rockchip,rk3568-dwcmshc", +- .data = &sdhci_dwcmshc_rk3568_pdata, ++ .data = &sdhci_dwcmshc_rk35xx_pdata, + }, + { + .compatible = "snps,dwcmshc-sdhci", +@@ -336,7 +369,10 @@ MODULE_DEVICE_TABLE(of, sdhci_dwcmshc_dt_ids); + + #ifdef CONFIG_ACPI + static const struct acpi_device_id sdhci_dwcmshc_acpi_ids[] = { +- { .id = "MLNXBF30" }, ++ { ++ .id = "MLNXBF30", ++ .driver_data = (kernel_ulong_t)&sdhci_dwcmshc_bf3_pdata, ++ }, + {} + }; + #endif +@@ -347,12 +383,12 @@ static int dwcmshc_probe(struct platform_device *pdev) + struct sdhci_pltfm_host *pltfm_host; + struct sdhci_host *host; + struct dwcmshc_priv *priv; +- struct rk3568_priv *rk_priv = NULL; ++ struct rk35xx_priv *rk_priv = NULL; + const struct sdhci_pltfm_data *pltfm_data; + int err; + u32 extra; + +- pltfm_data = of_device_get_match_data(&pdev->dev); ++ pltfm_data = device_get_match_data(&pdev->dev); + if (!pltfm_data) { + dev_err(&pdev->dev, "Error: No device match data found\n"); + return -ENODEV; +@@ -402,8 +438,8 @@ static int dwcmshc_probe(struct platform_device *pdev) + host->mmc_host_ops.request = dwcmshc_request; + host->mmc_host_ops.hs400_enhanced_strobe = dwcmshc_hs400_enhanced_strobe; + +- if (pltfm_data == &sdhci_dwcmshc_rk3568_pdata) { +- rk_priv = devm_kzalloc(&pdev->dev, sizeof(struct rk3568_priv), GFP_KERNEL); ++ if (pltfm_data == &sdhci_dwcmshc_rk35xx_pdata) { ++ rk_priv = devm_kzalloc(&pdev->dev, sizeof(struct rk35xx_priv), GFP_KERNEL); + if (!rk_priv) { + err = -ENOMEM; + goto err_clk; +@@ -411,7 +447,7 @@ static int dwcmshc_probe(struct platform_device *pdev) + + priv->priv = rk_priv; + +- err = dwcmshc_rk3568_init(host, priv); ++ err = dwcmshc_rk35xx_init(host, priv); + if (err) + goto err_clk; + } +@@ -428,7 +464,7 @@ err_clk: + clk_disable_unprepare(pltfm_host->clk); + clk_disable_unprepare(priv->bus_clk); + if (rk_priv) +- clk_bulk_disable_unprepare(RK3568_MAX_CLKS, ++ clk_bulk_disable_unprepare(RK35xx_MAX_CLKS, + rk_priv->rockchip_clks); + free_pltfm: + sdhci_pltfm_free(pdev); +@@ -440,14 +476,14 @@ static int dwcmshc_remove(struct platform_device *pdev) + struct sdhci_host *host = platform_get_drvdata(pdev); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host); +- struct rk3568_priv *rk_priv = priv->priv; ++ struct rk35xx_priv *rk_priv = priv->priv; + + sdhci_remove_host(host, 0); + + clk_disable_unprepare(pltfm_host->clk); + clk_disable_unprepare(priv->bus_clk); + if (rk_priv) +- clk_bulk_disable_unprepare(RK3568_MAX_CLKS, ++ clk_bulk_disable_unprepare(RK35xx_MAX_CLKS, + rk_priv->rockchip_clks); + sdhci_pltfm_free(pdev); + +@@ -460,7 +496,7 @@ static int dwcmshc_suspend(struct device *dev) + struct sdhci_host *host = dev_get_drvdata(dev); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host); +- struct rk3568_priv *rk_priv = priv->priv; ++ struct rk35xx_priv *rk_priv = priv->priv; + int ret; + + ret = sdhci_suspend_host(host); +@@ -472,7 +508,7 @@ static int dwcmshc_suspend(struct device *dev) + clk_disable_unprepare(priv->bus_clk); + + if (rk_priv) +- clk_bulk_disable_unprepare(RK3568_MAX_CLKS, ++ clk_bulk_disable_unprepare(RK35xx_MAX_CLKS, + rk_priv->rockchip_clks); + + return ret; +@@ -483,7 +519,7 @@ static int dwcmshc_resume(struct device *dev) + struct sdhci_host *host = dev_get_drvdata(dev); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host); +- struct rk3568_priv *rk_priv = priv->priv; ++ struct rk35xx_priv *rk_priv = priv->priv; + int ret; + + ret = clk_prepare_enable(pltfm_host->clk); +@@ -497,7 +533,7 @@ static int dwcmshc_resume(struct device *dev) + } + + if (rk_priv) { +- ret = clk_bulk_prepare_enable(RK3568_MAX_CLKS, ++ ret = clk_bulk_prepare_enable(RK35xx_MAX_CLKS, + rk_priv->rockchip_clks); + if (ret) + return ret; +diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c +index 1ac7fec47d6fb..604feeb84ee40 100644 +--- a/drivers/pci/pcie/portdrv_core.c ++++ b/drivers/pci/pcie/portdrv_core.c +@@ -222,8 +222,15 @@ static int get_port_device_capability(struct pci_dev *dev) + + #ifdef CONFIG_PCIEAER + if (dev->aer_cap && pci_aer_available() && +- (pcie_ports_native || host->native_aer)) ++ (pcie_ports_native || host->native_aer)) { + services |= PCIE_PORT_SERVICE_AER; ++ ++ /* ++ * Disable AER on this port in case it's been enabled by the ++ * BIOS (the AER service driver will enable it when necessary). ++ */ ++ pci_disable_pcie_error_reporting(dev); ++ } + #endif + + /* Root Ports and Root Complex Event Collectors may generate PMEs */ +diff --git a/drivers/video/fbdev/pm2fb.c b/drivers/video/fbdev/pm2fb.c +index c68725eebee3b..cbcf112c88d30 100644 +--- a/drivers/video/fbdev/pm2fb.c ++++ b/drivers/video/fbdev/pm2fb.c +@@ -617,6 +617,11 @@ static int pm2fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) + return -EINVAL; + } + ++ if (!var->pixclock) { ++ DPRINTK("pixclock is zero\n"); ++ return -EINVAL; ++ } ++ + if (PICOS2KHZ(var->pixclock) > PM2_MAX_PIXCLOCK) { + DPRINTK("pixclock too high (%ldKHz)\n", + PICOS2KHZ(var->pixclock)); +diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c +index 909cc00ef5ce3..474dcc0540a84 100644 +--- a/fs/btrfs/block-group.c ++++ b/fs/btrfs/block-group.c +@@ -418,39 +418,26 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, + btrfs_put_caching_control(caching_ctl); + } + +-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) ++static int btrfs_caching_ctl_wait_done(struct btrfs_block_group *cache, ++ struct btrfs_caching_control *caching_ctl) ++{ ++ wait_event(caching_ctl->wait, btrfs_block_group_done(cache)); ++ return cache->cached == BTRFS_CACHE_ERROR ? -EIO : 0; ++} ++ ++static int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) + { + struct btrfs_caching_control *caching_ctl; +- int ret = 0; ++ int ret; + + caching_ctl = btrfs_get_caching_control(cache); + if (!caching_ctl) + return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0; +- +- wait_event(caching_ctl->wait, btrfs_block_group_done(cache)); +- if (cache->cached == BTRFS_CACHE_ERROR) +- ret = -EIO; ++ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); + btrfs_put_caching_control(caching_ctl); + return ret; + } + +-static bool space_cache_v1_done(struct btrfs_block_group *cache) +-{ +- bool ret; +- +- spin_lock(&cache->lock); +- ret = cache->cached != BTRFS_CACHE_FAST; +- spin_unlock(&cache->lock); +- +- return ret; +-} +- +-void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache, +- struct btrfs_caching_control *caching_ctl) +-{ +- wait_event(caching_ctl->wait, space_cache_v1_done(cache)); +-} +- + #ifdef CONFIG_BTRFS_DEBUG + static void fragment_free_space(struct btrfs_block_group *block_group) + { +@@ -727,9 +714,8 @@ done: + btrfs_put_block_group(block_group); + } + +-int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only) ++int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) + { +- DEFINE_WAIT(wait); + struct btrfs_fs_info *fs_info = cache->fs_info; + struct btrfs_caching_control *caching_ctl = NULL; + int ret = 0; +@@ -762,10 +748,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only + } + WARN_ON(cache->caching_ctl); + cache->caching_ctl = caching_ctl; +- if (btrfs_test_opt(fs_info, SPACE_CACHE)) +- cache->cached = BTRFS_CACHE_FAST; +- else +- cache->cached = BTRFS_CACHE_STARTED; ++ cache->cached = BTRFS_CACHE_STARTED; + cache->has_caching_ctl = 1; + spin_unlock(&cache->lock); + +@@ -778,8 +761,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only + + btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); + out: +- if (load_cache_only && caching_ctl) +- btrfs_wait_space_cache_v1_finished(cache, caching_ctl); ++ if (wait && caching_ctl) ++ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); + if (caching_ctl) + btrfs_put_caching_control(caching_ctl); + +@@ -3200,7 +3183,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, + * space back to the block group, otherwise we will leak space. + */ + if (!alloc && !btrfs_block_group_done(cache)) +- btrfs_cache_block_group(cache, 1); ++ btrfs_cache_block_group(cache, true); + + byte_in_group = bytenr - cache->start; + WARN_ON(byte_in_group > cache->length); +diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h +index d73db0dfacb26..a15868d607a92 100644 +--- a/fs/btrfs/block-group.h ++++ b/fs/btrfs/block-group.h +@@ -251,9 +251,7 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr); + void btrfs_wait_nocow_writers(struct btrfs_block_group *bg); + void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, + u64 num_bytes); +-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache); +-int btrfs_cache_block_group(struct btrfs_block_group *cache, +- int load_cache_only); ++int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait); + void btrfs_put_caching_control(struct btrfs_caching_control *ctl); + struct btrfs_caching_control *btrfs_get_caching_control( + struct btrfs_block_group *cache); +diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c +index 341ce90d24b15..fb7e331b69756 100644 +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -1938,6 +1938,9 @@ cow_done: + + if (!p->skip_locking) { + level = btrfs_header_level(b); ++ ++ btrfs_maybe_reset_lockdep_class(root, b); ++ + if (level <= write_lock_level) { + btrfs_tree_lock(b); + p->locks[level] = BTRFS_WRITE_LOCK; +diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h +index 1831135fef1ab..02d3ee6c7d9b0 100644 +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -454,7 +454,6 @@ struct btrfs_free_cluster { + enum btrfs_caching_type { + BTRFS_CACHE_NO, + BTRFS_CACHE_STARTED, +- BTRFS_CACHE_FAST, + BTRFS_CACHE_FINISHED, + BTRFS_CACHE_ERROR, + }; +@@ -1105,6 +1104,8 @@ enum { + BTRFS_ROOT_QGROUP_FLUSHING, + /* This root has a drop operation that was started previously. */ + BTRFS_ROOT_UNFINISHED_DROP, ++ /* This reloc root needs to have its buffers lockdep class reset. */ ++ BTRFS_ROOT_RESET_LOCKDEP_CLASS, + }; + + static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info) +@@ -3166,7 +3167,6 @@ void __btrfs_del_delalloc_inode(struct btrfs_root *root, + struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); + int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index); + int btrfs_unlink_inode(struct btrfs_trans_handle *trans, +- struct btrfs_root *root, + struct btrfs_inode *dir, struct btrfs_inode *inode, + const char *name, int name_len); + int btrfs_add_link(struct btrfs_trans_handle *trans, +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c +index 247d7f9ced3b0..c76c360bece59 100644 +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -121,88 +121,6 @@ struct async_submit_bio { + blk_status_t status; + }; + +-/* +- * Lockdep class keys for extent_buffer->lock's in this root. For a given +- * eb, the lockdep key is determined by the btrfs_root it belongs to and +- * the level the eb occupies in the tree. +- * +- * Different roots are used for different purposes and may nest inside each +- * other and they require separate keysets. As lockdep keys should be +- * static, assign keysets according to the purpose of the root as indicated +- * by btrfs_root->root_key.objectid. This ensures that all special purpose +- * roots have separate keysets. +- * +- * Lock-nesting across peer nodes is always done with the immediate parent +- * node locked thus preventing deadlock. As lockdep doesn't know this, use +- * subclass to avoid triggering lockdep warning in such cases. +- * +- * The key is set by the readpage_end_io_hook after the buffer has passed +- * csum validation but before the pages are unlocked. It is also set by +- * btrfs_init_new_buffer on freshly allocated blocks. +- * +- * We also add a check to make sure the highest level of the tree is the +- * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code +- * needs update as well. +- */ +-#ifdef CONFIG_DEBUG_LOCK_ALLOC +-# if BTRFS_MAX_LEVEL != 8 +-# error +-# endif +- +-#define DEFINE_LEVEL(stem, level) \ +- .names[level] = "btrfs-" stem "-0" #level, +- +-#define DEFINE_NAME(stem) \ +- DEFINE_LEVEL(stem, 0) \ +- DEFINE_LEVEL(stem, 1) \ +- DEFINE_LEVEL(stem, 2) \ +- DEFINE_LEVEL(stem, 3) \ +- DEFINE_LEVEL(stem, 4) \ +- DEFINE_LEVEL(stem, 5) \ +- DEFINE_LEVEL(stem, 6) \ +- DEFINE_LEVEL(stem, 7) +- +-static struct btrfs_lockdep_keyset { +- u64 id; /* root objectid */ +- /* Longest entry: btrfs-free-space-00 */ +- char names[BTRFS_MAX_LEVEL][20]; +- struct lock_class_key keys[BTRFS_MAX_LEVEL]; +-} btrfs_lockdep_keysets[] = { +- { .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") }, +- { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") }, +- { .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") }, +- { .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") }, +- { .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") }, +- { .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") }, +- { .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") }, +- { .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") }, +- { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") }, +- { .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") }, +- { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") }, +- { .id = 0, DEFINE_NAME("tree") }, +-}; +- +-#undef DEFINE_LEVEL +-#undef DEFINE_NAME +- +-void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, +- int level) +-{ +- struct btrfs_lockdep_keyset *ks; +- +- BUG_ON(level >= ARRAY_SIZE(ks->keys)); +- +- /* find the matching keyset, id 0 is the default entry */ +- for (ks = btrfs_lockdep_keysets; ks->id; ks++) +- if (ks->id == objectid) +- break; +- +- lockdep_set_class_and_name(&eb->lock, +- &ks->keys[level], ks->names[level]); +-} +- +-#endif +- + /* + * Compute the csum of a btree block and store the result to provided buffer. + */ +diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h +index 0e7e9526b6a83..1b8fd3deafc92 100644 +--- a/fs/btrfs/disk-io.h ++++ b/fs/btrfs/disk-io.h +@@ -140,14 +140,4 @@ int btrfs_init_root_free_objectid(struct btrfs_root *root); + int __init btrfs_end_io_wq_init(void); + void __cold btrfs_end_io_wq_exit(void); + +-#ifdef CONFIG_DEBUG_LOCK_ALLOC +-void btrfs_set_buffer_lockdep_class(u64 objectid, +- struct extent_buffer *eb, int level); +-#else +-static inline void btrfs_set_buffer_lockdep_class(u64 objectid, +- struct extent_buffer *eb, int level) +-{ +-} +-#endif +- + #endif +diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c +index 248ea15c97346..401a425a587c4 100644 +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -2572,17 +2572,10 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, + return -EINVAL; + + /* +- * pull in the free space cache (if any) so that our pin +- * removes the free space from the cache. We have load_only set +- * to one because the slow code to read in the free extents does check +- * the pinned extents. ++ * Fully cache the free space first so that our pin removes the free space ++ * from the cache. + */ +- btrfs_cache_block_group(cache, 1); +- /* +- * Make sure we wait until the cache is completely built in case it is +- * missing or is invalid and therefore needs to be rebuilt. +- */ +- ret = btrfs_wait_block_group_cache_done(cache); ++ ret = btrfs_cache_block_group(cache, true); + if (ret) + goto out; + +@@ -2605,12 +2598,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info, + if (!block_group) + return -EINVAL; + +- btrfs_cache_block_group(block_group, 1); +- /* +- * Make sure we wait until the cache is completely built in case it is +- * missing or is invalid and therefore needs to be rebuilt. +- */ +- ret = btrfs_wait_block_group_cache_done(block_group); ++ ret = btrfs_cache_block_group(block_group, true); + if (ret) + goto out; + +@@ -4324,7 +4312,7 @@ have_block_group: + ffe_ctl.cached = btrfs_block_group_done(block_group); + if (unlikely(!ffe_ctl.cached)) { + ffe_ctl.have_caching_bg = true; +- ret = btrfs_cache_block_group(block_group, 0); ++ ret = btrfs_cache_block_group(block_group, false); + + /* + * If we get ENOMEM here or something else we want to +@@ -4781,6 +4769,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, + { + struct btrfs_fs_info *fs_info = root->fs_info; + struct extent_buffer *buf; ++ u64 lockdep_owner = owner; + + buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level); + if (IS_ERR(buf)) +@@ -4799,12 +4788,27 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, + return ERR_PTR(-EUCLEAN); + } + ++ /* ++ * The reloc trees are just snapshots, so we need them to appear to be ++ * just like any other fs tree WRT lockdep. ++ * ++ * The exception however is in replace_path() in relocation, where we ++ * hold the lock on the original fs root and then search for the reloc ++ * root. At that point we need to make sure any reloc root buffers are ++ * set to the BTRFS_TREE_RELOC_OBJECTID lockdep class in order to make ++ * lockdep happy. ++ */ ++ if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID && ++ !test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state)) ++ lockdep_owner = BTRFS_FS_TREE_OBJECTID; ++ + /* + * This needs to stay, because we could allocate a freed block from an + * old tree into a new tree, so we need to make sure this new block is + * set to the appropriate level and owner. + */ +- btrfs_set_buffer_lockdep_class(owner, buf, level); ++ btrfs_set_buffer_lockdep_class(lockdep_owner, buf, level); ++ + __btrfs_tree_lock(buf, nest); + btrfs_clean_tree_block(buf); + clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); +@@ -6066,13 +6070,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) + + if (end - start >= range->minlen) { + if (!btrfs_block_group_done(cache)) { +- ret = btrfs_cache_block_group(cache, 0); +- if (ret) { +- bg_failed++; +- bg_ret = ret; +- continue; +- } +- ret = btrfs_wait_block_group_cache_done(cache); ++ ret = btrfs_cache_block_group(cache, true); + if (ret) { + bg_failed++; + bg_ret = ret; +diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c +index a72a8d4d4a72e..7bd704779a99b 100644 +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -6109,6 +6109,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, + struct extent_buffer *exists = NULL; + struct page *p; + struct address_space *mapping = fs_info->btree_inode->i_mapping; ++ u64 lockdep_owner = owner_root; + int uptodate = 1; + int ret; + +@@ -6143,7 +6144,15 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, + eb = __alloc_extent_buffer(fs_info, start, len); + if (!eb) + return ERR_PTR(-ENOMEM); +- btrfs_set_buffer_lockdep_class(owner_root, eb, level); ++ ++ /* ++ * The reloc trees are just snapshots, so we need them to appear to be ++ * just like any other fs tree WRT lockdep. ++ */ ++ if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID) ++ lockdep_owner = BTRFS_FS_TREE_OBJECTID; ++ ++ btrfs_set_buffer_lockdep_class(lockdep_owner, eb, level); + + num_pages = num_extent_pages(eb); + for (i = 0; i < num_pages; i++, index++) { +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index 428a56f248bba..f8a01964a2169 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -4097,11 +4097,11 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, + * also drops the back refs in the inode to the directory + */ + static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, +- struct btrfs_root *root, + struct btrfs_inode *dir, + struct btrfs_inode *inode, + const char *name, int name_len) + { ++ struct btrfs_root *root = dir->root; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_path *path; + int ret = 0; +@@ -4201,15 +4201,14 @@ out: + } + + int btrfs_unlink_inode(struct btrfs_trans_handle *trans, +- struct btrfs_root *root, + struct btrfs_inode *dir, struct btrfs_inode *inode, + const char *name, int name_len) + { + int ret; +- ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len); ++ ret = __btrfs_unlink_inode(trans, dir, inode, name, name_len); + if (!ret) { + drop_nlink(&inode->vfs_inode); +- ret = btrfs_update_inode(trans, root, inode); ++ ret = btrfs_update_inode(trans, inode->root, inode); + } + return ret; + } +@@ -4238,7 +4237,6 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir) + + static int btrfs_unlink(struct inode *dir, struct dentry *dentry) + { +- struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_trans_handle *trans; + struct inode *inode = d_inode(dentry); + int ret; +@@ -4250,7 +4248,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) + btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), + 0); + +- ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), ++ ret = btrfs_unlink_inode(trans, BTRFS_I(dir), + BTRFS_I(d_inode(dentry)), dentry->d_name.name, + dentry->d_name.len); + if (ret) +@@ -4264,7 +4262,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) + + out: + btrfs_end_transaction(trans); +- btrfs_btree_balance_dirty(root->fs_info); ++ btrfs_btree_balance_dirty(BTRFS_I(dir)->root->fs_info); + return ret; + } + +@@ -4622,7 +4620,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) + { + struct inode *inode = d_inode(dentry); + int err = 0; +- struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_trans_handle *trans; + u64 last_unlink_trans; + +@@ -4647,7 +4644,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) + last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; + + /* now the directory is empty */ +- err = btrfs_unlink_inode(trans, root, BTRFS_I(dir), ++ err = btrfs_unlink_inode(trans, BTRFS_I(dir), + BTRFS_I(d_inode(dentry)), dentry->d_name.name, + dentry->d_name.len); + if (!err) { +@@ -4668,7 +4665,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) + } + out: + btrfs_end_transaction(trans); +- btrfs_btree_balance_dirty(root->fs_info); ++ btrfs_btree_balance_dirty(BTRFS_I(dir)->root->fs_info); + + return err; + } +@@ -9571,7 +9568,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, + if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { + ret = btrfs_unlink_subvol(trans, old_dir, old_dentry); + } else { /* src is an inode */ +- ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir), ++ ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir), + BTRFS_I(old_dentry->d_inode), + old_dentry->d_name.name, + old_dentry->d_name.len); +@@ -9587,7 +9584,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, + if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { + ret = btrfs_unlink_subvol(trans, new_dir, new_dentry); + } else { /* dest is an inode */ +- ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir), ++ ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir), + BTRFS_I(new_dentry->d_inode), + new_dentry->d_name.name, + new_dentry->d_name.len); +@@ -9862,7 +9859,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + */ + btrfs_pin_log_trans(root); + log_pinned = true; +- ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir), ++ ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir), + BTRFS_I(d_inode(old_dentry)), + old_dentry->d_name.name, + old_dentry->d_name.len); +@@ -9882,7 +9879,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, + ret = btrfs_unlink_subvol(trans, new_dir, new_dentry); + BUG_ON(new_inode->i_nlink == 0); + } else { +- ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir), ++ ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir), + BTRFS_I(d_inode(new_dentry)), + new_dentry->d_name.name, + new_dentry->d_name.len); +diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c +index 33461b4f9c8b5..9063072b399bd 100644 +--- a/fs/btrfs/locking.c ++++ b/fs/btrfs/locking.c +@@ -13,6 +13,93 @@ + #include "extent_io.h" + #include "locking.h" + ++/* ++ * Lockdep class keys for extent_buffer->lock's in this root. For a given ++ * eb, the lockdep key is determined by the btrfs_root it belongs to and ++ * the level the eb occupies in the tree. ++ * ++ * Different roots are used for different purposes and may nest inside each ++ * other and they require separate keysets. As lockdep keys should be ++ * static, assign keysets according to the purpose of the root as indicated ++ * by btrfs_root->root_key.objectid. This ensures that all special purpose ++ * roots have separate keysets. ++ * ++ * Lock-nesting across peer nodes is always done with the immediate parent ++ * node locked thus preventing deadlock. As lockdep doesn't know this, use ++ * subclass to avoid triggering lockdep warning in such cases. ++ * ++ * The key is set by the readpage_end_io_hook after the buffer has passed ++ * csum validation but before the pages are unlocked. It is also set by ++ * btrfs_init_new_buffer on freshly allocated blocks. ++ * ++ * We also add a check to make sure the highest level of the tree is the ++ * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code ++ * needs update as well. ++ */ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++#if BTRFS_MAX_LEVEL != 8 ++#error ++#endif ++ ++#define DEFINE_LEVEL(stem, level) \ ++ .names[level] = "btrfs-" stem "-0" #level, ++ ++#define DEFINE_NAME(stem) \ ++ DEFINE_LEVEL(stem, 0) \ ++ DEFINE_LEVEL(stem, 1) \ ++ DEFINE_LEVEL(stem, 2) \ ++ DEFINE_LEVEL(stem, 3) \ ++ DEFINE_LEVEL(stem, 4) \ ++ DEFINE_LEVEL(stem, 5) \ ++ DEFINE_LEVEL(stem, 6) \ ++ DEFINE_LEVEL(stem, 7) ++ ++static struct btrfs_lockdep_keyset { ++ u64 id; /* root objectid */ ++ /* Longest entry: btrfs-free-space-00 */ ++ char names[BTRFS_MAX_LEVEL][20]; ++ struct lock_class_key keys[BTRFS_MAX_LEVEL]; ++} btrfs_lockdep_keysets[] = { ++ { .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") }, ++ { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") }, ++ { .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") }, ++ { .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") }, ++ { .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") }, ++ { .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") }, ++ { .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") }, ++ { .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") }, ++ { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") }, ++ { .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") }, ++ { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") }, ++ { .id = 0, DEFINE_NAME("tree") }, ++}; ++ ++#undef DEFINE_LEVEL ++#undef DEFINE_NAME ++ ++void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level) ++{ ++ struct btrfs_lockdep_keyset *ks; ++ ++ BUG_ON(level >= ARRAY_SIZE(ks->keys)); ++ ++ /* Find the matching keyset, id 0 is the default entry */ ++ for (ks = btrfs_lockdep_keysets; ks->id; ks++) ++ if (ks->id == objectid) ++ break; ++ ++ lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]); ++} ++ ++void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb) ++{ ++ if (test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state)) ++ btrfs_set_buffer_lockdep_class(root->root_key.objectid, ++ eb, btrfs_header_level(eb)); ++} ++ ++#endif ++ + /* + * Extent buffer locking + * ===================== +@@ -164,6 +251,8 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) + + while (1) { + eb = btrfs_root_node(root); ++ ++ btrfs_maybe_reset_lockdep_class(root, eb); + btrfs_tree_lock(eb); + if (eb == root->node) + break; +@@ -185,6 +274,8 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) + + while (1) { + eb = btrfs_root_node(root); ++ ++ btrfs_maybe_reset_lockdep_class(root, eb); + btrfs_tree_read_lock(eb); + if (eb == root->node) + break; +diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h +index a2e1f1f5c6e34..26a2f962c268e 100644 +--- a/fs/btrfs/locking.h ++++ b/fs/btrfs/locking.h +@@ -130,4 +130,18 @@ void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock); + void btrfs_drew_read_lock(struct btrfs_drew_lock *lock); + void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock); + ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level); ++void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb); ++#else ++static inline void btrfs_set_buffer_lockdep_class(u64 objectid, ++ struct extent_buffer *eb, int level) ++{ ++} ++static inline void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, ++ struct extent_buffer *eb) ++{ ++} ++#endif ++ + #endif +diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c +index 673e11fcf3fc9..becf3396d533d 100644 +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -1326,7 +1326,9 @@ again: + btrfs_release_path(path); + + path->lowest_level = level; ++ set_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state); + ret = btrfs_search_slot(trans, src, &key, path, 0, 1); ++ clear_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state); + path->lowest_level = 0; + if (ret) { + if (ret > 0) +diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c +index 51382d2be3d44..a84d2d4895104 100644 +--- a/fs/btrfs/tree-checker.c ++++ b/fs/btrfs/tree-checker.c +@@ -1216,7 +1216,8 @@ static void extent_err(const struct extent_buffer *eb, int slot, + } + + static int check_extent_item(struct extent_buffer *leaf, +- struct btrfs_key *key, int slot) ++ struct btrfs_key *key, int slot, ++ struct btrfs_key *prev_key) + { + struct btrfs_fs_info *fs_info = leaf->fs_info; + struct btrfs_extent_item *ei; +@@ -1436,6 +1437,26 @@ static int check_extent_item(struct extent_buffer *leaf, + total_refs, inline_refs); + return -EUCLEAN; + } ++ ++ if ((prev_key->type == BTRFS_EXTENT_ITEM_KEY) || ++ (prev_key->type == BTRFS_METADATA_ITEM_KEY)) { ++ u64 prev_end = prev_key->objectid; ++ ++ if (prev_key->type == BTRFS_METADATA_ITEM_KEY) ++ prev_end += fs_info->nodesize; ++ else ++ prev_end += prev_key->offset; ++ ++ if (unlikely(prev_end > key->objectid)) { ++ extent_err(leaf, slot, ++ "previous extent [%llu %u %llu] overlaps current extent [%llu %u %llu]", ++ prev_key->objectid, prev_key->type, ++ prev_key->offset, key->objectid, key->type, ++ key->offset); ++ return -EUCLEAN; ++ } ++ } ++ + return 0; + } + +@@ -1604,7 +1625,7 @@ static int check_leaf_item(struct extent_buffer *leaf, + break; + case BTRFS_EXTENT_ITEM_KEY: + case BTRFS_METADATA_ITEM_KEY: +- ret = check_extent_item(leaf, key, slot); ++ ret = check_extent_item(leaf, key, slot, prev_key); + break; + case BTRFS_TREE_BLOCK_REF_KEY: + case BTRFS_SHARED_DATA_REF_KEY: +diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c +index 1d7e9812f55e1..7272896587302 100644 +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -884,6 +884,26 @@ out: + return ret; + } + ++static int unlink_inode_for_log_replay(struct btrfs_trans_handle *trans, ++ struct btrfs_inode *dir, ++ struct btrfs_inode *inode, ++ const char *name, ++ int name_len) ++{ ++ int ret; ++ ++ ret = btrfs_unlink_inode(trans, dir, inode, name, name_len); ++ if (ret) ++ return ret; ++ /* ++ * Whenever we need to check if a name exists or not, we check the ++ * fs/subvolume tree. So after an unlink we must run delayed items, so ++ * that future checks for a name during log replay see that the name ++ * does not exists anymore. ++ */ ++ return btrfs_run_delayed_items(trans); ++} ++ + /* + * when cleaning up conflicts between the directory names in the + * subvolume, directory names in the log and directory names in the +@@ -926,12 +946,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, + if (ret) + goto out; + +- ret = btrfs_unlink_inode(trans, root, dir, BTRFS_I(inode), name, ++ ret = unlink_inode_for_log_replay(trans, dir, BTRFS_I(inode), name, + name_len); +- if (ret) +- goto out; +- else +- ret = btrfs_run_delayed_items(trans); + out: + kfree(name); + iput(inode); +@@ -1091,12 +1107,9 @@ again: + inc_nlink(&inode->vfs_inode); + btrfs_release_path(path); + +- ret = btrfs_unlink_inode(trans, root, dir, inode, ++ ret = unlink_inode_for_log_replay(trans, dir, inode, + victim_name, victim_name_len); + kfree(victim_name); +- if (ret) +- return ret; +- ret = btrfs_run_delayed_items(trans); + if (ret) + return ret; + *search_done = 1; +@@ -1165,14 +1178,11 @@ again: + inc_nlink(&inode->vfs_inode); + btrfs_release_path(path); + +- ret = btrfs_unlink_inode(trans, root, ++ ret = unlink_inode_for_log_replay(trans, + BTRFS_I(victim_parent), + inode, + victim_name, + victim_name_len); +- if (!ret) +- ret = btrfs_run_delayed_items( +- trans); + } + iput(victim_parent); + kfree(victim_name); +@@ -1327,19 +1337,10 @@ again: + kfree(name); + goto out; + } +- ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), ++ ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), + inode, name, namelen); + kfree(name); + iput(dir); +- /* +- * Whenever we need to check if a name exists or not, we +- * check the subvolume tree. So after an unlink we must +- * run delayed items, so that future checks for a name +- * during log replay see that the name does not exists +- * anymore. +- */ +- if (!ret) +- ret = btrfs_run_delayed_items(trans); + if (ret) + goto out; + goto again; +@@ -1434,8 +1435,8 @@ static int add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root, + ret = -ENOENT; + goto out; + } +- ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), BTRFS_I(other_inode), +- name, namelen); ++ ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), BTRFS_I(other_inode), ++ name, namelen); + if (ret) + goto out; + /* +@@ -1443,11 +1444,7 @@ static int add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root, + * on the inode will not free it. We will fixup the link count later. + */ + if (other_inode->i_nlink == 0) +- inc_nlink(other_inode); +- +- ret = btrfs_run_delayed_items(trans); +- if (ret) +- goto out; ++ set_nlink(other_inode, 1); + add_link: + ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), + name, namelen, 0, ref_index); +@@ -1580,7 +1577,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, + ret = btrfs_inode_ref_exists(inode, dir, key->type, + name, namelen); + if (ret > 0) { +- ret = btrfs_unlink_inode(trans, root, ++ ret = unlink_inode_for_log_replay(trans, + BTRFS_I(dir), + BTRFS_I(inode), + name, namelen); +@@ -1590,16 +1587,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, + * free it. We will fixup the link count later. + */ + if (!ret && inode->i_nlink == 0) +- inc_nlink(inode); +- /* +- * Whenever we need to check if a name exists or +- * not, we check the subvolume tree. So after an +- * unlink we must run delayed items, so that future +- * checks for a name during log replay see that the +- * name does not exists anymore. +- */ +- if (!ret) +- ret = btrfs_run_delayed_items(trans); ++ set_nlink(inode, 1); + } + if (ret < 0) + goto out; +@@ -2197,7 +2185,7 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, + */ + static noinline int find_dir_range(struct btrfs_root *root, + struct btrfs_path *path, +- u64 dirid, int key_type, ++ u64 dirid, + u64 *start_ret, u64 *end_ret) + { + struct btrfs_key key; +@@ -2210,7 +2198,7 @@ static noinline int find_dir_range(struct btrfs_root *root, + return 1; + + key.objectid = dirid; +- key.type = key_type; ++ key.type = BTRFS_DIR_LOG_INDEX_KEY; + key.offset = *start_ret; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); +@@ -2224,7 +2212,7 @@ static noinline int find_dir_range(struct btrfs_root *root, + if (ret != 0) + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + +- if (key.type != key_type || key.objectid != dirid) { ++ if (key.type != BTRFS_DIR_LOG_INDEX_KEY || key.objectid != dirid) { + ret = 1; + goto next; + } +@@ -2251,7 +2239,7 @@ next: + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + +- if (key.type != key_type || key.objectid != dirid) { ++ if (key.type != BTRFS_DIR_LOG_INDEX_KEY || key.objectid != dirid) { + ret = 1; + goto out; + } +@@ -2282,95 +2270,75 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, + int ret; + struct extent_buffer *eb; + int slot; +- u32 item_size; + struct btrfs_dir_item *di; +- struct btrfs_dir_item *log_di; + int name_len; +- unsigned long ptr; +- unsigned long ptr_end; + char *name; +- struct inode *inode; ++ struct inode *inode = NULL; + struct btrfs_key location; + +-again: ++ /* ++ * Currenly we only log dir index keys. Even if we replay a log created ++ * by an older kernel that logged both dir index and dir item keys, all ++ * we need to do is process the dir index keys, we (and our caller) can ++ * safely ignore dir item keys (key type BTRFS_DIR_ITEM_KEY). ++ */ ++ ASSERT(dir_key->type == BTRFS_DIR_INDEX_KEY); ++ + eb = path->nodes[0]; + slot = path->slots[0]; +- item_size = btrfs_item_size_nr(eb, slot); +- ptr = btrfs_item_ptr_offset(eb, slot); +- ptr_end = ptr + item_size; +- while (ptr < ptr_end) { +- di = (struct btrfs_dir_item *)ptr; +- name_len = btrfs_dir_name_len(eb, di); +- name = kmalloc(name_len, GFP_NOFS); +- if (!name) { +- ret = -ENOMEM; +- goto out; +- } +- read_extent_buffer(eb, name, (unsigned long)(di + 1), +- name_len); +- log_di = NULL; +- if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) { +- log_di = btrfs_lookup_dir_item(trans, log, log_path, +- dir_key->objectid, +- name, name_len, 0); +- } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) { +- log_di = btrfs_lookup_dir_index_item(trans, log, +- log_path, +- dir_key->objectid, +- dir_key->offset, +- name, name_len, 0); +- } +- if (!log_di) { +- btrfs_dir_item_key_to_cpu(eb, di, &location); +- btrfs_release_path(path); +- btrfs_release_path(log_path); +- inode = read_one_inode(root, location.objectid); +- if (!inode) { +- kfree(name); +- return -EIO; +- } ++ di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); ++ name_len = btrfs_dir_name_len(eb, di); ++ name = kmalloc(name_len, GFP_NOFS); ++ if (!name) { ++ ret = -ENOMEM; ++ goto out; ++ } + +- ret = link_to_fixup_dir(trans, root, +- path, location.objectid); +- if (ret) { +- kfree(name); +- iput(inode); +- goto out; +- } ++ read_extent_buffer(eb, name, (unsigned long)(di + 1), name_len); + +- inc_nlink(inode); +- ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir), +- BTRFS_I(inode), name, name_len); +- if (!ret) +- ret = btrfs_run_delayed_items(trans); +- kfree(name); +- iput(inode); +- if (ret) +- goto out; ++ if (log) { ++ struct btrfs_dir_item *log_di; + +- /* there might still be more names under this key +- * check and repeat if required +- */ +- ret = btrfs_search_slot(NULL, root, dir_key, path, +- 0, 0); +- if (ret == 0) +- goto again; ++ log_di = btrfs_lookup_dir_index_item(trans, log, log_path, ++ dir_key->objectid, ++ dir_key->offset, ++ name, name_len, 0); ++ if (IS_ERR(log_di)) { ++ ret = PTR_ERR(log_di); ++ goto out; ++ } else if (log_di) { ++ /* The dentry exists in the log, we have nothing to do. */ + ret = 0; + goto out; +- } else if (IS_ERR(log_di)) { +- kfree(name); +- return PTR_ERR(log_di); + } +- btrfs_release_path(log_path); +- kfree(name); ++ } + +- ptr = (unsigned long)(di + 1); +- ptr += name_len; ++ btrfs_dir_item_key_to_cpu(eb, di, &location); ++ btrfs_release_path(path); ++ btrfs_release_path(log_path); ++ inode = read_one_inode(root, location.objectid); ++ if (!inode) { ++ ret = -EIO; ++ goto out; + } +- ret = 0; ++ ++ ret = link_to_fixup_dir(trans, root, path, location.objectid); ++ if (ret) ++ goto out; ++ ++ inc_nlink(inode); ++ ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), BTRFS_I(inode), ++ name, name_len); ++ /* ++ * Unlike dir item keys, dir index keys can only have one name (entry) in ++ * them, as there are no key collisions since each key has a unique offset ++ * (an index number), so we're done. ++ */ + out: + btrfs_release_path(path); + btrfs_release_path(log_path); ++ kfree(name); ++ iput(inode); + return ret; + } + +@@ -2490,7 +2458,6 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, + { + u64 range_start; + u64 range_end; +- int key_type = BTRFS_DIR_LOG_ITEM_KEY; + int ret = 0; + struct btrfs_key dir_key; + struct btrfs_key found_key; +@@ -2498,7 +2465,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, + struct inode *dir; + + dir_key.objectid = dirid; +- dir_key.type = BTRFS_DIR_ITEM_KEY; ++ dir_key.type = BTRFS_DIR_INDEX_KEY; + log_path = btrfs_alloc_path(); + if (!log_path) + return -ENOMEM; +@@ -2512,14 +2479,14 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, + btrfs_free_path(log_path); + return 0; + } +-again: ++ + range_start = 0; + range_end = 0; + while (1) { + if (del_all) + range_end = (u64)-1; + else { +- ret = find_dir_range(log, path, dirid, key_type, ++ ret = find_dir_range(log, path, dirid, + &range_start, &range_end); + if (ret < 0) + goto out; +@@ -2546,8 +2513,10 @@ again: + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + if (found_key.objectid != dirid || +- found_key.type != dir_key.type) +- goto next_type; ++ found_key.type != dir_key.type) { ++ ret = 0; ++ goto out; ++ } + + if (found_key.offset > range_end) + break; +@@ -2566,15 +2535,7 @@ again: + break; + range_start = range_end + 1; + } +- +-next_type: + ret = 0; +- if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { +- key_type = BTRFS_DIR_LOG_INDEX_KEY; +- dir_key.type = BTRFS_DIR_INDEX_KEY; +- btrfs_release_path(path); +- goto again; +- } + out: + btrfs_release_path(path); + btrfs_free_path(log_path); +diff --git a/fs/io_uring.c b/fs/io_uring.c +index 2680e9756b1d4..ed6abd74f3865 100644 +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -486,8 +486,6 @@ struct io_poll_iocb { + struct file *file; + struct wait_queue_head *head; + __poll_t events; +- bool done; +- bool canceled; + struct wait_queue_entry wait; + }; + +@@ -885,6 +883,9 @@ struct io_kiocb { + + /* store used ubuf, so we can prevent reloading */ + struct io_mapped_ubuf *imu; ++ /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ ++ struct io_buffer *kbuf; ++ atomic_t poll_refs; + }; + + struct io_tctx_node { +@@ -1079,8 +1080,8 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, + bool cancel_all); + static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); + +-static bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data, +- long res, unsigned int cflags); ++static void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags); ++ + static void io_put_req(struct io_kiocb *req); + static void io_put_req_deferred(struct io_kiocb *req); + static void io_dismantle_req(struct io_kiocb *req); +@@ -1154,12 +1155,6 @@ static inline bool req_ref_put_and_test(struct io_kiocb *req) + return atomic_dec_and_test(&req->refs); + } + +-static inline void req_ref_put(struct io_kiocb *req) +-{ +- WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); +- WARN_ON_ONCE(req_ref_put_and_test(req)); +-} +- + static inline void req_ref_get(struct io_kiocb *req) + { + WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); +@@ -1515,7 +1510,7 @@ static void io_kill_timeout(struct io_kiocb *req, int status) + atomic_set(&req->ctx->cq_timeouts, + atomic_read(&req->ctx->cq_timeouts) + 1); + list_del_init(&req->timeout.list); +- io_cqring_fill_event(req->ctx, req->user_data, status, 0); ++ io_fill_cqe_req(req, status, 0); + io_put_req_deferred(req); + } + } +@@ -1763,7 +1758,7 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task) + } + + static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, +- long res, unsigned int cflags) ++ s32 res, u32 cflags) + { + struct io_overflow_cqe *ocqe; + +@@ -1790,8 +1785,8 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, + return true; + } + +-static inline bool __io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data, +- long res, unsigned int cflags) ++static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data, ++ s32 res, u32 cflags) + { + struct io_uring_cqe *cqe; + +@@ -1812,20 +1807,25 @@ static inline bool __io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data + return io_cqring_event_overflow(ctx, user_data, res, cflags); + } + +-/* not as hot to bloat with inlining */ +-static noinline bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data, +- long res, unsigned int cflags) ++static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags) + { +- return __io_cqring_fill_event(ctx, user_data, res, cflags); ++ __io_fill_cqe(req->ctx, req->user_data, res, cflags); + } + +-static void io_req_complete_post(struct io_kiocb *req, long res, +- unsigned int cflags) ++static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, ++ s32 res, u32 cflags) ++{ ++ ctx->cq_extra++; ++ return __io_fill_cqe(ctx, user_data, res, cflags); ++} ++ ++static void io_req_complete_post(struct io_kiocb *req, s32 res, ++ u32 cflags) + { + struct io_ring_ctx *ctx = req->ctx; + + spin_lock(&ctx->completion_lock); +- __io_cqring_fill_event(ctx, req->user_data, res, cflags); ++ __io_fill_cqe(ctx, req->user_data, res, cflags); + /* + * If we're the last reference to this request, add to our locked + * free_list cache. +@@ -1861,8 +1861,8 @@ static inline bool io_req_needs_clean(struct io_kiocb *req) + return req->flags & IO_REQ_CLEAN_FLAGS; + } + +-static void io_req_complete_state(struct io_kiocb *req, long res, +- unsigned int cflags) ++static inline void io_req_complete_state(struct io_kiocb *req, s32 res, ++ u32 cflags) + { + if (io_req_needs_clean(req)) + io_clean_op(req); +@@ -1872,7 +1872,7 @@ static void io_req_complete_state(struct io_kiocb *req, long res, + } + + static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags, +- long res, unsigned cflags) ++ s32 res, u32 cflags) + { + if (issue_flags & IO_URING_F_COMPLETE_DEFER) + io_req_complete_state(req, res, cflags); +@@ -1880,12 +1880,12 @@ static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags, + io_req_complete_post(req, res, cflags); + } + +-static inline void io_req_complete(struct io_kiocb *req, long res) ++static inline void io_req_complete(struct io_kiocb *req, s32 res) + { + __io_req_complete(req, 0, res, 0); + } + +-static void io_req_complete_failed(struct io_kiocb *req, long res) ++static void io_req_complete_failed(struct io_kiocb *req, s32 res) + { + req_set_fail(req); + io_req_complete_post(req, res, 0); +@@ -2051,8 +2051,7 @@ static bool io_kill_linked_timeout(struct io_kiocb *req) + link->timeout.head = NULL; + if (hrtimer_try_to_cancel(&io->timer) != -1) { + list_del(&link->timeout.list); +- io_cqring_fill_event(link->ctx, link->user_data, +- -ECANCELED, 0); ++ io_fill_cqe_req(link, -ECANCELED, 0); + io_put_req_deferred(link); + return true; + } +@@ -2076,7 +2075,7 @@ static void io_fail_links(struct io_kiocb *req) + link->link = NULL; + + trace_io_uring_fail_link(req, link); +- io_cqring_fill_event(link->ctx, link->user_data, res, 0); ++ io_fill_cqe_req(link, res, 0); + io_put_req_deferred(link); + link = nxt; + } +@@ -2093,8 +2092,7 @@ static bool io_disarm_next(struct io_kiocb *req) + req->flags &= ~REQ_F_ARM_LTIMEOUT; + if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { + io_remove_next_linked(req); +- io_cqring_fill_event(link->ctx, link->user_data, +- -ECANCELED, 0); ++ io_fill_cqe_req(link, -ECANCELED, 0); + io_put_req_deferred(link); + posted = true; + } +@@ -2370,8 +2368,8 @@ static void io_submit_flush_completions(struct io_ring_ctx *ctx) + for (i = 0; i < nr; i++) { + struct io_kiocb *req = state->compl_reqs[i]; + +- __io_cqring_fill_event(ctx, req->user_data, req->result, +- req->compl.cflags); ++ __io_fill_cqe(ctx, req->user_data, req->result, ++ req->compl.cflags); + } + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); +@@ -2482,8 +2480,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, + req = list_first_entry(done, struct io_kiocb, inflight_entry); + list_del(&req->inflight_entry); + +- __io_cqring_fill_event(ctx, req->user_data, req->result, +- io_put_rw_kbuf(req)); ++ io_fill_cqe_req(req, req->result, io_put_rw_kbuf(req)); + (*nr_events)++; + + if (req_ref_put_and_test(req)) +@@ -2707,7 +2704,7 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res) + static void io_req_task_complete(struct io_kiocb *req, bool *locked) + { + unsigned int cflags = io_put_rw_kbuf(req); +- long res = req->result; ++ int res = req->result; + + if (*locked) { + struct io_ring_ctx *ctx = req->ctx; +@@ -5316,52 +5313,23 @@ struct io_poll_table { + int error; + }; + +-static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, +- __poll_t mask, io_req_tw_func_t func) +-{ +- /* for instances that support it check for an event match first: */ +- if (mask && !(mask & poll->events)) +- return 0; +- +- trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask); +- +- list_del_init(&poll->wait.entry); ++#define IO_POLL_CANCEL_FLAG BIT(31) ++#define IO_POLL_REF_MASK GENMASK(30, 0) + +- req->result = mask; +- req->io_task_work.func = func; +- +- /* +- * If this fails, then the task is exiting. When a task exits, the +- * work gets canceled, so just cancel this request as well instead +- * of executing it. We can't safely execute it anyway, as we may not +- * have the needed state needed for it anyway. +- */ +- io_req_task_work_add(req); +- return 1; ++/* ++ * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can ++ * bump it and acquire ownership. It's disallowed to modify requests while not ++ * owning it, that prevents from races for enqueueing task_work's and b/w ++ * arming poll and wakeups. ++ */ ++static inline bool io_poll_get_ownership(struct io_kiocb *req) ++{ ++ return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); + } + +-static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll) +- __acquires(&req->ctx->completion_lock) ++static void io_poll_mark_cancelled(struct io_kiocb *req) + { +- struct io_ring_ctx *ctx = req->ctx; +- +- /* req->task == current here, checking PF_EXITING is safe */ +- if (unlikely(req->task->flags & PF_EXITING)) +- WRITE_ONCE(poll->canceled, true); +- +- if (!req->result && !READ_ONCE(poll->canceled)) { +- struct poll_table_struct pt = { ._key = poll->events }; +- +- req->result = vfs_poll(req->file, &pt) & poll->events; +- } +- +- spin_lock(&ctx->completion_lock); +- if (!req->result && !READ_ONCE(poll->canceled)) { +- add_wait_queue(poll->head, &poll->wait); +- return true; +- } +- +- return false; ++ atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs); + } + + static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req) +@@ -5379,141 +5347,231 @@ static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req) + return &req->apoll->poll; + } + +-static void io_poll_remove_double(struct io_kiocb *req) +- __must_hold(&req->ctx->completion_lock) ++static void io_poll_req_insert(struct io_kiocb *req) + { +- struct io_poll_iocb *poll = io_poll_get_double(req); ++ struct io_ring_ctx *ctx = req->ctx; ++ struct hlist_head *list; + +- lockdep_assert_held(&req->ctx->completion_lock); ++ list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)]; ++ hlist_add_head(&req->hash_node, list); ++} + +- if (poll && poll->head) { +- struct wait_queue_head *head = poll->head; ++static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events, ++ wait_queue_func_t wake_func) ++{ ++ poll->head = NULL; ++#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) ++ /* mask in events that we always want/need */ ++ poll->events = events | IO_POLL_UNMASK; ++ INIT_LIST_HEAD(&poll->wait.entry); ++ init_waitqueue_func_entry(&poll->wait, wake_func); ++} + ++static inline void io_poll_remove_entry(struct io_poll_iocb *poll) ++{ ++ struct wait_queue_head *head = smp_load_acquire(&poll->head); ++ ++ if (head) { + spin_lock_irq(&head->lock); + list_del_init(&poll->wait.entry); +- if (poll->wait.private) +- req_ref_put(req); + poll->head = NULL; + spin_unlock_irq(&head->lock); + } + } + +-static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask) +- __must_hold(&req->ctx->completion_lock) ++static void io_poll_remove_entries(struct io_kiocb *req) ++{ ++ struct io_poll_iocb *poll = io_poll_get_single(req); ++ struct io_poll_iocb *poll_double = io_poll_get_double(req); ++ ++ /* ++ * While we hold the waitqueue lock and the waitqueue is nonempty, ++ * wake_up_pollfree() will wait for us. However, taking the waitqueue ++ * lock in the first place can race with the waitqueue being freed. ++ * ++ * We solve this as eventpoll does: by taking advantage of the fact that ++ * all users of wake_up_pollfree() will RCU-delay the actual free. If ++ * we enter rcu_read_lock() and see that the pointer to the queue is ++ * non-NULL, we can then lock it without the memory being freed out from ++ * under us. ++ * ++ * Keep holding rcu_read_lock() as long as we hold the queue lock, in ++ * case the caller deletes the entry from the queue, leaving it empty. ++ * In that case, only RCU prevents the queue memory from being freed. ++ */ ++ rcu_read_lock(); ++ io_poll_remove_entry(poll); ++ if (poll_double) ++ io_poll_remove_entry(poll_double); ++ rcu_read_unlock(); ++} ++ ++/* ++ * All poll tw should go through this. Checks for poll events, manages ++ * references, does rewait, etc. ++ * ++ * Returns a negative error on failure. >0 when no action require, which is ++ * either spurious wakeup or multishot CQE is served. 0 when it's done with ++ * the request, then the mask is stored in req->result. ++ */ ++static int io_poll_check_events(struct io_kiocb *req) + { + struct io_ring_ctx *ctx = req->ctx; +- unsigned flags = IORING_CQE_F_MORE; +- int error; ++ struct io_poll_iocb *poll = io_poll_get_single(req); ++ int v; ++ ++ /* req->task == current here, checking PF_EXITING is safe */ ++ if (unlikely(req->task->flags & PF_EXITING)) ++ io_poll_mark_cancelled(req); ++ ++ do { ++ v = atomic_read(&req->poll_refs); ++ ++ /* tw handler should be the owner, and so have some references */ ++ if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) ++ return 0; ++ if (v & IO_POLL_CANCEL_FLAG) ++ return -ECANCELED; ++ ++ if (!req->result) { ++ struct poll_table_struct pt = { ._key = poll->events }; ++ ++ req->result = vfs_poll(req->file, &pt) & poll->events; ++ } ++ ++ /* multishot, just fill an CQE and proceed */ ++ if (req->result && !(poll->events & EPOLLONESHOT)) { ++ __poll_t mask = mangle_poll(req->result & poll->events); ++ bool filled; + +- if (READ_ONCE(req->poll.canceled)) { +- error = -ECANCELED; +- req->poll.events |= EPOLLONESHOT; ++ spin_lock(&ctx->completion_lock); ++ filled = io_fill_cqe_aux(ctx, req->user_data, mask, ++ IORING_CQE_F_MORE); ++ io_commit_cqring(ctx); ++ spin_unlock(&ctx->completion_lock); ++ if (unlikely(!filled)) ++ return -ECANCELED; ++ io_cqring_ev_posted(ctx); ++ } else if (req->result) { ++ return 0; ++ } ++ ++ /* ++ * Release all references, retry if someone tried to restart ++ * task_work while we were executing it. ++ */ ++ } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); ++ ++ return 1; ++} ++ ++static void io_poll_task_func(struct io_kiocb *req, bool *locked) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ int ret; ++ ++ ret = io_poll_check_events(req); ++ if (ret > 0) ++ return; ++ ++ if (!ret) { ++ req->result = mangle_poll(req->result & req->poll.events); + } else { +- error = mangle_poll(mask); +- } +- if (req->poll.events & EPOLLONESHOT) +- flags = 0; +- if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) { +- req->poll.events |= EPOLLONESHOT; +- flags = 0; ++ req->result = ret; ++ req_set_fail(req); + } +- if (flags & IORING_CQE_F_MORE) +- ctx->cq_extra++; + +- return !(flags & IORING_CQE_F_MORE); ++ io_poll_remove_entries(req); ++ spin_lock(&ctx->completion_lock); ++ hash_del(&req->hash_node); ++ spin_unlock(&ctx->completion_lock); ++ io_req_complete_post(req, req->result, 0); + } + +-static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask) +- __must_hold(&req->ctx->completion_lock) ++static void io_apoll_task_func(struct io_kiocb *req, bool *locked) + { +- bool done; ++ struct io_ring_ctx *ctx = req->ctx; ++ int ret; + +- done = __io_poll_complete(req, mask); +- io_commit_cqring(req->ctx); +- return done; ++ ret = io_poll_check_events(req); ++ if (ret > 0) ++ return; ++ ++ io_poll_remove_entries(req); ++ spin_lock(&ctx->completion_lock); ++ hash_del(&req->hash_node); ++ spin_unlock(&ctx->completion_lock); ++ ++ if (!ret) ++ io_req_task_submit(req, locked); ++ else ++ io_req_complete_failed(req, ret); + } + +-static void io_poll_task_func(struct io_kiocb *req, bool *locked) ++static void __io_poll_execute(struct io_kiocb *req, int mask) + { +- struct io_ring_ctx *ctx = req->ctx; +- struct io_kiocb *nxt; ++ req->result = mask; ++ if (req->opcode == IORING_OP_POLL_ADD) ++ req->io_task_work.func = io_poll_task_func; ++ else ++ req->io_task_work.func = io_apoll_task_func; + +- if (io_poll_rewait(req, &req->poll)) { +- spin_unlock(&ctx->completion_lock); +- } else { +- bool done; ++ trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask); ++ io_req_task_work_add(req); ++} + +- if (req->poll.done) { +- spin_unlock(&ctx->completion_lock); +- return; +- } +- done = __io_poll_complete(req, req->result); +- if (done) { +- io_poll_remove_double(req); +- hash_del(&req->hash_node); +- req->poll.done = true; +- } else { +- req->result = 0; +- add_wait_queue(req->poll.head, &req->poll.wait); +- } +- io_commit_cqring(ctx); +- spin_unlock(&ctx->completion_lock); +- io_cqring_ev_posted(ctx); ++static inline void io_poll_execute(struct io_kiocb *req, int res) ++{ ++ if (io_poll_get_ownership(req)) ++ __io_poll_execute(req, res); ++} + +- if (done) { +- nxt = io_put_req_find_next(req); +- if (nxt) +- io_req_task_submit(nxt, locked); +- } +- } ++static void io_poll_cancel_req(struct io_kiocb *req) ++{ ++ io_poll_mark_cancelled(req); ++ /* kick tw, which should complete the request */ ++ io_poll_execute(req, 0); + } + +-static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode, +- int sync, void *key) ++static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, ++ void *key) + { + struct io_kiocb *req = wait->private; +- struct io_poll_iocb *poll = io_poll_get_single(req); ++ struct io_poll_iocb *poll = container_of(wait, struct io_poll_iocb, ++ wait); + __poll_t mask = key_to_poll(key); +- unsigned long flags; + +- /* for instances that support it check for an event match first: */ +- if (mask && !(mask & poll->events)) +- return 0; +- if (!(poll->events & EPOLLONESHOT)) +- return poll->wait.func(&poll->wait, mode, sync, key); ++ if (unlikely(mask & POLLFREE)) { ++ io_poll_mark_cancelled(req); ++ /* we have to kick tw in case it's not already */ ++ io_poll_execute(req, 0); + +- list_del_init(&wait->entry); ++ /* ++ * If the waitqueue is being freed early but someone is already ++ * holds ownership over it, we have to tear down the request as ++ * best we can. That means immediately removing the request from ++ * its waitqueue and preventing all further accesses to the ++ * waitqueue via the request. ++ */ ++ list_del_init(&poll->wait.entry); + +- if (poll->head) { +- bool done; +- +- spin_lock_irqsave(&poll->head->lock, flags); +- done = list_empty(&poll->wait.entry); +- if (!done) +- list_del_init(&poll->wait.entry); +- /* make sure double remove sees this as being gone */ +- wait->private = NULL; +- spin_unlock_irqrestore(&poll->head->lock, flags); +- if (!done) { +- /* use wait func handler, so it matches the rq type */ +- poll->wait.func(&poll->wait, mode, sync, key); +- } ++ /* ++ * Careful: this *must* be the last step, since as soon ++ * as req->head is NULL'ed out, the request can be ++ * completed and freed, since aio_poll_complete_work() ++ * will no longer need to take the waitqueue lock. ++ */ ++ smp_store_release(&poll->head, NULL); ++ return 1; + } +- req_ref_put(req); +- return 1; +-} + +-static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events, +- wait_queue_func_t wake_func) +-{ +- poll->head = NULL; +- poll->done = false; +- poll->canceled = false; +-#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) +- /* mask in events that we always want/need */ +- poll->events = events | IO_POLL_UNMASK; +- INIT_LIST_HEAD(&poll->wait.entry); +- init_waitqueue_func_entry(&poll->wait, wake_func); ++ /* for instances that support it check for an event match first */ ++ if (mask && !(mask & poll->events)) ++ return 0; ++ ++ if (io_poll_get_ownership(req)) ++ __io_poll_execute(req, mask); ++ return 1; + } + + static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, +@@ -5528,10 +5586,10 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, + * if this happens. + */ + if (unlikely(pt->nr_entries)) { +- struct io_poll_iocb *poll_one = poll; ++ struct io_poll_iocb *first = poll; + + /* double add on the same waitqueue head, ignore */ +- if (poll_one->head == head) ++ if (first->head == head) + return; + /* already have a 2nd entry, fail a third attempt */ + if (*poll_ptr) { +@@ -5540,25 +5598,19 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, + pt->error = -EINVAL; + return; + } +- /* +- * Can't handle multishot for double wait for now, turn it +- * into one-shot mode. +- */ +- if (!(poll_one->events & EPOLLONESHOT)) +- poll_one->events |= EPOLLONESHOT; ++ + poll = kmalloc(sizeof(*poll), GFP_ATOMIC); + if (!poll) { + pt->error = -ENOMEM; + return; + } +- io_init_poll_iocb(poll, poll_one->events, io_poll_double_wake); +- req_ref_get(req); +- poll->wait.private = req; ++ io_init_poll_iocb(poll, first->events, first->wait.func); + *poll_ptr = poll; + } + + pt->nr_entries++; + poll->head = head; ++ poll->wait.private = req; + + if (poll->events & EPOLLEXCLUSIVE) + add_wait_queue_exclusive(head, &poll->wait); +@@ -5566,70 +5618,24 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, + add_wait_queue(head, &poll->wait); + } + +-static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, ++static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, + struct poll_table_struct *p) + { + struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); +- struct async_poll *apoll = pt->req->apoll; +- +- __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); +-} +- +-static void io_async_task_func(struct io_kiocb *req, bool *locked) +-{ +- struct async_poll *apoll = req->apoll; +- struct io_ring_ctx *ctx = req->ctx; +- +- trace_io_uring_task_run(req->ctx, req, req->opcode, req->user_data); + +- if (io_poll_rewait(req, &apoll->poll)) { +- spin_unlock(&ctx->completion_lock); +- return; +- } +- +- hash_del(&req->hash_node); +- io_poll_remove_double(req); +- apoll->poll.done = true; +- spin_unlock(&ctx->completion_lock); +- +- if (!READ_ONCE(apoll->poll.canceled)) +- io_req_task_submit(req, locked); +- else +- io_req_complete_failed(req, -ECANCELED); +-} +- +-static int io_async_wake(struct wait_queue_entry *wait, unsigned mode, int sync, +- void *key) +-{ +- struct io_kiocb *req = wait->private; +- struct io_poll_iocb *poll = &req->apoll->poll; +- +- trace_io_uring_poll_wake(req->ctx, req->opcode, req->user_data, +- key_to_poll(key)); +- +- return __io_async_wake(req, poll, key_to_poll(key), io_async_task_func); ++ __io_queue_proc(&pt->req->poll, pt, head, ++ (struct io_poll_iocb **) &pt->req->async_data); + } + +-static void io_poll_req_insert(struct io_kiocb *req) ++static int __io_arm_poll_handler(struct io_kiocb *req, ++ struct io_poll_iocb *poll, ++ struct io_poll_table *ipt, __poll_t mask) + { + struct io_ring_ctx *ctx = req->ctx; +- struct hlist_head *list; +- +- list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)]; +- hlist_add_head(&req->hash_node, list); +-} +- +-static __poll_t __io_arm_poll_handler(struct io_kiocb *req, +- struct io_poll_iocb *poll, +- struct io_poll_table *ipt, __poll_t mask, +- wait_queue_func_t wake_func) +- __acquires(&ctx->completion_lock) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- bool cancel = false; ++ int v; + + INIT_HLIST_NODE(&req->hash_node); +- io_init_poll_iocb(poll, mask, wake_func); ++ io_init_poll_iocb(poll, mask, io_poll_wake); + poll->file = req->file; + poll->wait.private = req; + +@@ -5638,31 +5644,56 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req, + ipt->error = 0; + ipt->nr_entries = 0; + ++ /* ++ * Take the ownership to delay any tw execution up until we're done ++ * with poll arming. see io_poll_get_ownership(). ++ */ ++ atomic_set(&req->poll_refs, 1); + mask = vfs_poll(req->file, &ipt->pt) & poll->events; +- if (unlikely(!ipt->nr_entries) && !ipt->error) +- ipt->error = -EINVAL; ++ ++ if (mask && (poll->events & EPOLLONESHOT)) { ++ io_poll_remove_entries(req); ++ /* no one else has access to the req, forget about the ref */ ++ return mask; ++ } ++ if (!mask && unlikely(ipt->error || !ipt->nr_entries)) { ++ io_poll_remove_entries(req); ++ if (!ipt->error) ++ ipt->error = -EINVAL; ++ return 0; ++ } + + spin_lock(&ctx->completion_lock); +- if (ipt->error || (mask && (poll->events & EPOLLONESHOT))) +- io_poll_remove_double(req); +- if (likely(poll->head)) { +- spin_lock_irq(&poll->head->lock); +- if (unlikely(list_empty(&poll->wait.entry))) { +- if (ipt->error) +- cancel = true; ++ io_poll_req_insert(req); ++ spin_unlock(&ctx->completion_lock); ++ ++ if (mask) { ++ /* can't multishot if failed, just queue the event we've got */ ++ if (unlikely(ipt->error || !ipt->nr_entries)) { ++ poll->events |= EPOLLONESHOT; + ipt->error = 0; +- mask = 0; + } +- if ((mask && (poll->events & EPOLLONESHOT)) || ipt->error) +- list_del_init(&poll->wait.entry); +- else if (cancel) +- WRITE_ONCE(poll->canceled, true); +- else if (!poll->done) /* actually waiting for an event */ +- io_poll_req_insert(req); +- spin_unlock_irq(&poll->head->lock); ++ __io_poll_execute(req, mask); ++ return 0; + } + +- return mask; ++ /* ++ * Release ownership. If someone tried to queue a tw while it was ++ * locked, kick it off for them. ++ */ ++ v = atomic_dec_return(&req->poll_refs); ++ if (unlikely(v & IO_POLL_REF_MASK)) ++ __io_poll_execute(req, 0); ++ return 0; ++} ++ ++static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, ++ struct poll_table_struct *p) ++{ ++ struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); ++ struct async_poll *apoll = pt->req->apoll; ++ ++ __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); + } + + enum { +@@ -5677,7 +5708,8 @@ static int io_arm_poll_handler(struct io_kiocb *req) + struct io_ring_ctx *ctx = req->ctx; + struct async_poll *apoll; + struct io_poll_table ipt; +- __poll_t ret, mask = EPOLLONESHOT | POLLERR | POLLPRI; ++ __poll_t mask = EPOLLONESHOT | POLLERR | POLLPRI; ++ int ret; + + if (!req->file || !file_can_poll(req->file)) + return IO_APOLL_ABORTED; +@@ -5704,11 +5736,8 @@ static int io_arm_poll_handler(struct io_kiocb *req) + req->apoll = apoll; + req->flags |= REQ_F_POLLED; + ipt.pt._qproc = io_async_queue_proc; +- io_req_set_refcount(req); + +- ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, +- io_async_wake); +- spin_unlock(&ctx->completion_lock); ++ ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask); + if (ret || ipt.error) + return ret ? IO_APOLL_READY : IO_APOLL_ABORTED; + +@@ -5717,43 +5746,6 @@ static int io_arm_poll_handler(struct io_kiocb *req) + return IO_APOLL_OK; + } + +-static bool __io_poll_remove_one(struct io_kiocb *req, +- struct io_poll_iocb *poll, bool do_cancel) +- __must_hold(&req->ctx->completion_lock) +-{ +- bool do_complete = false; +- +- if (!poll->head) +- return false; +- spin_lock_irq(&poll->head->lock); +- if (do_cancel) +- WRITE_ONCE(poll->canceled, true); +- if (!list_empty(&poll->wait.entry)) { +- list_del_init(&poll->wait.entry); +- do_complete = true; +- } +- spin_unlock_irq(&poll->head->lock); +- hash_del(&req->hash_node); +- return do_complete; +-} +- +-static bool io_poll_remove_one(struct io_kiocb *req) +- __must_hold(&req->ctx->completion_lock) +-{ +- bool do_complete; +- +- io_poll_remove_double(req); +- do_complete = __io_poll_remove_one(req, io_poll_get_single(req), true); +- +- if (do_complete) { +- io_cqring_fill_event(req->ctx, req->user_data, -ECANCELED, 0); +- io_commit_cqring(req->ctx); +- req_set_fail(req); +- io_put_req_deferred(req); +- } +- return do_complete; +-} +- + /* + * Returns true if we found and killed one or more poll requests + */ +@@ -5762,7 +5754,8 @@ static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, + { + struct hlist_node *tmp; + struct io_kiocb *req; +- int posted = 0, i; ++ bool found = false; ++ int i; + + spin_lock(&ctx->completion_lock); + for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { +@@ -5770,16 +5763,15 @@ static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, + + list = &ctx->cancel_hash[i]; + hlist_for_each_entry_safe(req, tmp, list, hash_node) { +- if (io_match_task_safe(req, tsk, cancel_all)) +- posted += io_poll_remove_one(req); ++ if (io_match_task_safe(req, tsk, cancel_all)) { ++ hlist_del_init(&req->hash_node); ++ io_poll_cancel_req(req); ++ found = true; ++ } + } + } + spin_unlock(&ctx->completion_lock); +- +- if (posted) +- io_cqring_ev_posted(ctx); +- +- return posted != 0; ++ return found; + } + + static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr, +@@ -5800,19 +5792,26 @@ static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr, + return NULL; + } + ++static bool io_poll_disarm(struct io_kiocb *req) ++ __must_hold(&ctx->completion_lock) ++{ ++ if (!io_poll_get_ownership(req)) ++ return false; ++ io_poll_remove_entries(req); ++ hash_del(&req->hash_node); ++ return true; ++} ++ + static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr, + bool poll_only) + __must_hold(&ctx->completion_lock) + { +- struct io_kiocb *req; ++ struct io_kiocb *req = io_poll_find(ctx, sqe_addr, poll_only); + +- req = io_poll_find(ctx, sqe_addr, poll_only); + if (!req) + return -ENOENT; +- if (io_poll_remove_one(req)) +- return 0; +- +- return -EALREADY; ++ io_poll_cancel_req(req); ++ return 0; + } + + static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, +@@ -5862,23 +5861,6 @@ static int io_poll_update_prep(struct io_kiocb *req, + return 0; + } + +-static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, +- void *key) +-{ +- struct io_kiocb *req = wait->private; +- struct io_poll_iocb *poll = &req->poll; +- +- return __io_async_wake(req, poll, key_to_poll(key), io_poll_task_func); +-} +- +-static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, +- struct poll_table_struct *p) +-{ +- struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); +- +- __io_queue_proc(&pt->req->poll, pt, head, (struct io_poll_iocb **) &pt->req->async_data); +-} +- + static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) + { + struct io_poll_iocb *poll = &req->poll; +@@ -5900,90 +5882,57 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe + static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) + { + struct io_poll_iocb *poll = &req->poll; +- struct io_ring_ctx *ctx = req->ctx; + struct io_poll_table ipt; +- __poll_t mask; +- bool done; ++ int ret; + + ipt.pt._qproc = io_poll_queue_proc; + +- mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events, +- io_poll_wake); +- +- if (mask) { /* no async, we'd stolen it */ +- ipt.error = 0; +- done = io_poll_complete(req, mask); +- } +- spin_unlock(&ctx->completion_lock); +- +- if (mask) { +- io_cqring_ev_posted(ctx); +- if (done) +- io_put_req(req); +- } +- return ipt.error; ++ ret = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events); ++ if (!ret && ipt.error) ++ req_set_fail(req); ++ ret = ret ?: ipt.error; ++ if (ret) ++ __io_req_complete(req, issue_flags, ret, 0); ++ return 0; + } + + static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags) + { + struct io_ring_ctx *ctx = req->ctx; + struct io_kiocb *preq; +- bool completing; +- int ret; ++ int ret2, ret = 0; + + spin_lock(&ctx->completion_lock); + preq = io_poll_find(ctx, req->poll_update.old_user_data, true); +- if (!preq) { +- ret = -ENOENT; +- goto err; ++ if (!preq || !io_poll_disarm(preq)) { ++ spin_unlock(&ctx->completion_lock); ++ ret = preq ? -EALREADY : -ENOENT; ++ goto out; + } ++ spin_unlock(&ctx->completion_lock); + +- if (!req->poll_update.update_events && !req->poll_update.update_user_data) { +- completing = true; +- ret = io_poll_remove_one(preq) ? 0 : -EALREADY; +- goto err; +- } ++ if (req->poll_update.update_events || req->poll_update.update_user_data) { ++ /* only mask one event flags, keep behavior flags */ ++ if (req->poll_update.update_events) { ++ preq->poll.events &= ~0xffff; ++ preq->poll.events |= req->poll_update.events & 0xffff; ++ preq->poll.events |= IO_POLL_UNMASK; ++ } ++ if (req->poll_update.update_user_data) ++ preq->user_data = req->poll_update.new_user_data; + +- /* +- * Don't allow racy completion with singleshot, as we cannot safely +- * update those. For multishot, if we're racing with completion, just +- * let completion re-add it. +- */ +- io_poll_remove_double(preq); +- completing = !__io_poll_remove_one(preq, &preq->poll, false); +- if (completing && (preq->poll.events & EPOLLONESHOT)) { +- ret = -EALREADY; +- goto err; ++ ret2 = io_poll_add(preq, issue_flags); ++ /* successfully updated, don't complete poll request */ ++ if (!ret2) ++ goto out; + } +- /* we now have a detached poll request. reissue. */ +- ret = 0; +-err: +- if (ret < 0) { +- spin_unlock(&ctx->completion_lock); ++ req_set_fail(preq); ++ io_req_complete(preq, -ECANCELED); ++out: ++ if (ret < 0) + req_set_fail(req); +- io_req_complete(req, ret); +- return 0; +- } +- /* only mask one event flags, keep behavior flags */ +- if (req->poll_update.update_events) { +- preq->poll.events &= ~0xffff; +- preq->poll.events |= req->poll_update.events & 0xffff; +- preq->poll.events |= IO_POLL_UNMASK; +- } +- if (req->poll_update.update_user_data) +- preq->user_data = req->poll_update.new_user_data; +- spin_unlock(&ctx->completion_lock); +- + /* complete update request, we're done with it */ + io_req_complete(req, ret); +- +- if (!completing) { +- ret = io_poll_add(preq, issue_flags); +- if (ret < 0) { +- req_set_fail(preq); +- io_req_complete(preq, ret); +- } +- } + return 0; + } + +@@ -6045,7 +5994,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) + return PTR_ERR(req); + + req_set_fail(req); +- io_cqring_fill_event(ctx, req->user_data, -ECANCELED, 0); ++ io_fill_cqe_req(req, -ECANCELED, 0); + io_put_req_deferred(req); + return 0; + } +@@ -8271,8 +8220,7 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node) + + io_ring_submit_lock(ctx, lock_ring); + spin_lock(&ctx->completion_lock); +- io_cqring_fill_event(ctx, prsrc->tag, 0, 0); +- ctx->cq_extra++; ++ io_fill_cqe_aux(ctx, prsrc->tag, 0, 0); + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + io_cqring_ev_posted(ctx); +diff --git a/fs/ksmbd/mgmt/tree_connect.c b/fs/ksmbd/mgmt/tree_connect.c +index 0d28e723a28c7..940385c6a9135 100644 +--- a/fs/ksmbd/mgmt/tree_connect.c ++++ b/fs/ksmbd/mgmt/tree_connect.c +@@ -18,7 +18,7 @@ + struct ksmbd_tree_conn_status + ksmbd_tree_conn_connect(struct ksmbd_session *sess, char *share_name) + { +- struct ksmbd_tree_conn_status status = {-EINVAL, NULL}; ++ struct ksmbd_tree_conn_status status = {-ENOENT, NULL}; + struct ksmbd_tree_connect_response *resp = NULL; + struct ksmbd_share_config *sc; + struct ksmbd_tree_connect *tree_conn = NULL; +diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c +index 28b5d20c8766e..55ee639703ff0 100644 +--- a/fs/ksmbd/smb2pdu.c ++++ b/fs/ksmbd/smb2pdu.c +@@ -1932,8 +1932,9 @@ out_err1: + rsp->hdr.Status = STATUS_SUCCESS; + rc = 0; + break; ++ case -ENOENT: + case KSMBD_TREE_CONN_STATUS_NO_SHARE: +- rsp->hdr.Status = STATUS_BAD_NETWORK_PATH; ++ rsp->hdr.Status = STATUS_BAD_NETWORK_NAME; + break; + case -ENOMEM: + case KSMBD_TREE_CONN_STATUS_NOMEM: +@@ -2318,15 +2319,15 @@ static int smb2_remove_smb_xattrs(struct path *path) + name += strlen(name) + 1) { + ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name)); + +- if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && +- strncmp(&name[XATTR_USER_PREFIX_LEN], DOS_ATTRIBUTE_PREFIX, +- DOS_ATTRIBUTE_PREFIX_LEN) && +- strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN)) +- continue; +- +- err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, name); +- if (err) +- ksmbd_debug(SMB, "remove xattr failed : %s\n", name); ++ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && ++ !strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, ++ STREAM_PREFIX_LEN)) { ++ err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, ++ name); ++ if (err) ++ ksmbd_debug(SMB, "remove xattr failed : %s\n", ++ name); ++ } + } + out: + kvfree(xattr_list); +diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c +index e8bfa709270d1..4652b97969957 100644 +--- a/fs/ntfs3/xattr.c ++++ b/fs/ntfs3/xattr.c +@@ -118,7 +118,7 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea, + + run_init(&run); + +- err = attr_load_runs(attr_ea, ni, &run, NULL); ++ err = attr_load_runs_range(ni, ATTR_EA, NULL, 0, &run, 0, size); + if (!err) + err = ntfs_read_run_nb(sbi, &run, 0, ea_p, size, NULL); + run_close(&run); +@@ -443,6 +443,11 @@ update_ea: + /* Delete xattr, ATTR_EA */ + ni_remove_attr_le(ni, attr, mi, le); + } else if (attr->non_res) { ++ err = attr_load_runs_range(ni, ATTR_EA, NULL, 0, &ea_run, 0, ++ size); ++ if (err) ++ goto out; ++ + err = ntfs_sb_write_run(sbi, &ea_run, 0, ea_all, size, 0); + if (err) + goto out; +diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h +index 9cdbd209388ed..1648ce265cba0 100644 +--- a/include/drm/drm_bridge.h ++++ b/include/drm/drm_bridge.h +@@ -911,9 +911,20 @@ struct drm_bridge *devm_drm_panel_bridge_add(struct device *dev, + struct drm_bridge *devm_drm_panel_bridge_add_typed(struct device *dev, + struct drm_panel *panel, + u32 connector_type); ++struct drm_connector *drm_panel_bridge_connector(struct drm_bridge *bridge); ++#endif ++ ++#if defined(CONFIG_OF) && defined(CONFIG_DRM_PANEL_BRIDGE) + struct drm_bridge *devm_drm_of_get_bridge(struct device *dev, struct device_node *node, + u32 port, u32 endpoint); +-struct drm_connector *drm_panel_bridge_connector(struct drm_bridge *bridge); ++#else ++static inline struct drm_bridge *devm_drm_of_get_bridge(struct device *dev, ++ struct device_node *node, ++ u32 port, ++ u32 endpoint) ++{ ++ return ERR_PTR(-ENODEV); ++} + #endif + + #endif +diff --git a/include/linux/rmap.h b/include/linux/rmap.h +index c976cc6de2574..c29d9c13378b3 100644 +--- a/include/linux/rmap.h ++++ b/include/linux/rmap.h +@@ -39,12 +39,15 @@ struct anon_vma { + atomic_t refcount; + + /* +- * Count of child anon_vmas and VMAs which points to this anon_vma. ++ * Count of child anon_vmas. Equals to the count of all anon_vmas that ++ * have ->parent pointing to this one, including itself. + * + * This counter is used for making decision about reusing anon_vma + * instead of forking new one. See comments in function anon_vma_clone. + */ +- unsigned degree; ++ unsigned long num_children; ++ /* Count of VMAs whose ->anon_vma pointer points to this object. */ ++ unsigned long num_active_vmas; + + struct anon_vma *parent; /* Parent of this anon_vma */ + +diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h +index cbd719e5329a4..ae598ed86b50b 100644 +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -2328,6 +2328,14 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) + + #endif /* NET_SKBUFF_DATA_USES_OFFSET */ + ++static inline void skb_assert_len(struct sk_buff *skb) ++{ ++#ifdef CONFIG_DEBUG_NET ++ if (WARN_ONCE(!skb->len, "%s\n", __func__)) ++ DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); ++#endif /* CONFIG_DEBUG_NET */ ++} ++ + /* + * Add data to an sk_buff + */ +diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h +index 73bedd128d529..0c742cdf413c0 100644 +--- a/include/linux/skmsg.h ++++ b/include/linux/skmsg.h +@@ -283,7 +283,8 @@ static inline void sk_msg_sg_copy_clear(struct sk_msg *msg, u32 start) + + static inline struct sk_psock *sk_psock(const struct sock *sk) + { +- return rcu_dereference_sk_user_data(sk); ++ return __rcu_dereference_sk_user_data_with_flags(sk, ++ SK_USER_DATA_PSOCK); + } + + static inline void sk_psock_set_state(struct sk_psock *psock, +diff --git a/include/net/sock.h b/include/net/sock.h +index 49a6315d521ff..cb1a1bb64ed81 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -543,14 +543,26 @@ enum sk_pacing { + SK_PACING_FQ = 2, + }; + +-/* Pointer stored in sk_user_data might not be suitable for copying +- * when cloning the socket. For instance, it can point to a reference +- * counted object. sk_user_data bottom bit is set if pointer must not +- * be copied. ++/* flag bits in sk_user_data ++ * ++ * - SK_USER_DATA_NOCOPY: Pointer stored in sk_user_data might ++ * not be suitable for copying when cloning the socket. For instance, ++ * it can point to a reference counted object. sk_user_data bottom ++ * bit is set if pointer must not be copied. ++ * ++ * - SK_USER_DATA_BPF: Mark whether sk_user_data field is ++ * managed/owned by a BPF reuseport array. This bit should be set ++ * when sk_user_data's sk is added to the bpf's reuseport_array. ++ * ++ * - SK_USER_DATA_PSOCK: Mark whether pointer stored in ++ * sk_user_data points to psock type. This bit should be set ++ * when sk_user_data is assigned to a psock object. + */ + #define SK_USER_DATA_NOCOPY 1UL +-#define SK_USER_DATA_BPF 2UL /* Managed by BPF */ +-#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF) ++#define SK_USER_DATA_BPF 2UL ++#define SK_USER_DATA_PSOCK 4UL ++#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF |\ ++ SK_USER_DATA_PSOCK) + + /** + * sk_user_data_is_nocopy - Test if sk_user_data pointer must not be copied +@@ -563,24 +575,40 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk) + + #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) + ++/** ++ * __rcu_dereference_sk_user_data_with_flags - return the pointer ++ * only if argument flags all has been set in sk_user_data. Otherwise ++ * return NULL ++ * ++ * @sk: socket ++ * @flags: flag bits ++ */ ++static inline void * ++__rcu_dereference_sk_user_data_with_flags(const struct sock *sk, ++ uintptr_t flags) ++{ ++ uintptr_t sk_user_data = (uintptr_t)rcu_dereference(__sk_user_data(sk)); ++ ++ WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK); ++ ++ if ((sk_user_data & flags) == flags) ++ return (void *)(sk_user_data & SK_USER_DATA_PTRMASK); ++ return NULL; ++} ++ + #define rcu_dereference_sk_user_data(sk) \ ++ __rcu_dereference_sk_user_data_with_flags(sk, 0) ++#define __rcu_assign_sk_user_data_with_flags(sk, ptr, flags) \ + ({ \ +- void *__tmp = rcu_dereference(__sk_user_data((sk))); \ +- (void *)((uintptr_t)__tmp & SK_USER_DATA_PTRMASK); \ +-}) +-#define rcu_assign_sk_user_data(sk, ptr) \ +-({ \ +- uintptr_t __tmp = (uintptr_t)(ptr); \ +- WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \ +- rcu_assign_pointer(__sk_user_data((sk)), __tmp); \ +-}) +-#define rcu_assign_sk_user_data_nocopy(sk, ptr) \ +-({ \ +- uintptr_t __tmp = (uintptr_t)(ptr); \ +- WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \ ++ uintptr_t __tmp1 = (uintptr_t)(ptr), \ ++ __tmp2 = (uintptr_t)(flags); \ ++ WARN_ON_ONCE(__tmp1 & ~SK_USER_DATA_PTRMASK); \ ++ WARN_ON_ONCE(__tmp2 & SK_USER_DATA_PTRMASK); \ + rcu_assign_pointer(__sk_user_data((sk)), \ +- __tmp | SK_USER_DATA_NOCOPY); \ ++ __tmp1 | __tmp2); \ + }) ++#define rcu_assign_sk_user_data(sk, ptr) \ ++ __rcu_assign_sk_user_data_with_flags(sk, ptr, 0) + + /* + * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK +diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h +index e1c4c732aabac..5416f1f1a77a8 100644 +--- a/include/uapi/linux/btrfs_tree.h ++++ b/include/uapi/linux/btrfs_tree.h +@@ -146,7 +146,9 @@ + + /* + * dir items are the name -> inode pointers in a directory. There is one +- * for every name in a directory. ++ * for every name in a directory. BTRFS_DIR_LOG_ITEM_KEY is no longer used ++ * but it's still defined here for documentation purposes and to help avoid ++ * having its numerical value reused in the future. + */ + #define BTRFS_DIR_LOG_ITEM_KEY 60 + #define BTRFS_DIR_LOG_INDEX_KEY 72 +diff --git a/kernel/kprobes.c b/kernel/kprobes.c +index 3a3c0166bd1f3..ed3f24a81549c 100644 +--- a/kernel/kprobes.c ++++ b/kernel/kprobes.c +@@ -1705,11 +1705,12 @@ static struct kprobe *__disable_kprobe(struct kprobe *p) + /* Try to disarm and disable this/parent probe */ + if (p == orig_p || aggr_kprobe_disabled(orig_p)) { + /* +- * If kprobes_all_disarmed is set, orig_p +- * should have already been disarmed, so +- * skip unneed disarming process. ++ * Don't be lazy here. Even if 'kprobes_all_disarmed' ++ * is false, 'orig_p' might not have been armed yet. ++ * Note arm_all_kprobes() __tries__ to arm all kprobes ++ * on the best effort basis. + */ +- if (!kprobes_all_disarmed) { ++ if (!kprobes_all_disarmed && !kprobe_disabled(orig_p)) { + ret = disarm_kprobe(orig_p, true); + if (ret) { + p->flags &= ~KPROBE_FLAG_DISABLED; +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c +index e215a9c969711..e10cf1b548128 100644 +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -2901,6 +2901,16 @@ int ftrace_startup(struct ftrace_ops *ops, int command) + + ftrace_startup_enable(command); + ++ /* ++ * If ftrace is in an undefined state, we just remove ops from list ++ * to prevent the NULL pointer, instead of totally rolling it back and ++ * free trampoline, because those actions could cause further damage. ++ */ ++ if (unlikely(ftrace_disabled)) { ++ __unregister_ftrace_function(ops); ++ return -ENODEV; ++ } ++ + ops->flags &= ~FTRACE_OPS_FL_ADDING; + + return 0; +diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig +index 5056663c2aff7..a29eff4f969e3 100644 +--- a/lib/crypto/Kconfig ++++ b/lib/crypto/Kconfig +@@ -33,7 +33,6 @@ config CRYPTO_ARCH_HAVE_LIB_CHACHA + + config CRYPTO_LIB_CHACHA_GENERIC + tristate +- select XOR_BLOCKS + help + This symbol can be depended upon by arch implementations of the + ChaCha library interface that require the generic code as a +diff --git a/mm/hugetlb.c b/mm/hugetlb.c +index 405793b8cf0d2..d61b665c45d63 100644 +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -5371,7 +5371,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, + if (!huge_pte_none(huge_ptep_get(dst_pte))) + goto out_release_unlock; + +- if (vm_shared) { ++ if (page_in_pagecache) { + page_dup_rmap(page, true); + } else { + ClearHPageRestoreReserve(page); +diff --git a/mm/mmap.c b/mm/mmap.c +index b63336f6984c9..cd1d2680ac585 100644 +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -2643,6 +2643,18 @@ static void unmap_region(struct mm_struct *mm, + tlb_gather_mmu(&tlb, mm); + update_hiwater_rss(mm); + unmap_vmas(&tlb, vma, start, end); ++ ++ /* ++ * Ensure we have no stale TLB entries by the time this mapping is ++ * removed from the rmap. ++ * Note that we don't have to worry about nested flushes here because ++ * we're holding the mm semaphore for removing the mapping - so any ++ * concurrent flush in this region has to be coming through the rmap, ++ * and we synchronize against that using the rmap lock. ++ */ ++ if ((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) != 0) ++ tlb_flush_mmu(&tlb); ++ + free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, + next ? next->vm_start : USER_PGTABLES_CEILING); + tlb_finish_mmu(&tlb); +diff --git a/mm/rmap.c b/mm/rmap.c +index 3e340ee380cb9..330b361a460ea 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -90,7 +90,8 @@ static inline struct anon_vma *anon_vma_alloc(void) + anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); + if (anon_vma) { + atomic_set(&anon_vma->refcount, 1); +- anon_vma->degree = 1; /* Reference for first vma */ ++ anon_vma->num_children = 0; ++ anon_vma->num_active_vmas = 0; + anon_vma->parent = anon_vma; + /* + * Initialise the anon_vma root to point to itself. If called +@@ -198,6 +199,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma) + anon_vma = anon_vma_alloc(); + if (unlikely(!anon_vma)) + goto out_enomem_free_avc; ++ anon_vma->num_children++; /* self-parent link for new root */ + allocated = anon_vma; + } + +@@ -207,8 +209,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma) + if (likely(!vma->anon_vma)) { + vma->anon_vma = anon_vma; + anon_vma_chain_link(vma, avc, anon_vma); +- /* vma reference or self-parent link for new root */ +- anon_vma->degree++; ++ anon_vma->num_active_vmas++; + allocated = NULL; + avc = NULL; + } +@@ -293,19 +294,19 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) + anon_vma_chain_link(dst, avc, anon_vma); + + /* +- * Reuse existing anon_vma if its degree lower than two, +- * that means it has no vma and only one anon_vma child. ++ * Reuse existing anon_vma if it has no vma and only one ++ * anon_vma child. + * +- * Do not chose parent anon_vma, otherwise first child +- * will always reuse it. Root anon_vma is never reused: ++ * Root anon_vma is never reused: + * it has self-parent reference and at least one child. + */ + if (!dst->anon_vma && src->anon_vma && +- anon_vma != src->anon_vma && anon_vma->degree < 2) ++ anon_vma->num_children < 2 && ++ anon_vma->num_active_vmas == 0) + dst->anon_vma = anon_vma; + } + if (dst->anon_vma) +- dst->anon_vma->degree++; ++ dst->anon_vma->num_active_vmas++; + unlock_anon_vma_root(root); + return 0; + +@@ -355,6 +356,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) + anon_vma = anon_vma_alloc(); + if (!anon_vma) + goto out_error; ++ anon_vma->num_active_vmas++; + avc = anon_vma_chain_alloc(GFP_KERNEL); + if (!avc) + goto out_error_free_anon_vma; +@@ -375,7 +377,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) + vma->anon_vma = anon_vma; + anon_vma_lock_write(anon_vma); + anon_vma_chain_link(vma, avc, anon_vma); +- anon_vma->parent->degree++; ++ anon_vma->parent->num_children++; + anon_vma_unlock_write(anon_vma); + + return 0; +@@ -407,7 +409,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma) + * to free them outside the lock. + */ + if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) { +- anon_vma->parent->degree--; ++ anon_vma->parent->num_children--; + continue; + } + +@@ -415,7 +417,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma) + anon_vma_chain_free(avc); + } + if (vma->anon_vma) { +- vma->anon_vma->degree--; ++ vma->anon_vma->num_active_vmas--; + + /* + * vma would still be needed after unlink, and anon_vma will be prepared +@@ -433,7 +435,8 @@ void unlink_anon_vmas(struct vm_area_struct *vma) + list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { + struct anon_vma *anon_vma = avc->anon_vma; + +- VM_WARN_ON(anon_vma->degree); ++ VM_WARN_ON(anon_vma->num_children); ++ VM_WARN_ON(anon_vma->num_active_vmas); + put_anon_vma(anon_vma); + + list_del(&avc->same_vma); +diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c +index a0e0c2bdbb49a..e8de1e7d6ff48 100644 +--- a/net/bluetooth/l2cap_core.c ++++ b/net/bluetooth/l2cap_core.c +@@ -1992,11 +1992,11 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, + src_match = !bacmp(&c->src, src); + dst_match = !bacmp(&c->dst, dst); + if (src_match && dst_match) { +- c = l2cap_chan_hold_unless_zero(c); +- if (c) { +- read_unlock(&chan_list_lock); +- return c; +- } ++ if (!l2cap_chan_hold_unless_zero(c)) ++ continue; ++ ++ read_unlock(&chan_list_lock); ++ return c; + } + + /* Closest match */ +diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c +index 655ee0e2de86d..a9fb16b9c735a 100644 +--- a/net/bpf/test_run.c ++++ b/net/bpf/test_run.c +@@ -469,6 +469,9 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) + { + struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; + ++ if (!skb->len) ++ return -EINVAL; ++ + if (!__skb) + return 0; + +diff --git a/net/core/dev.c b/net/core/dev.c +index 276cca563325e..be51644e95dae 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4147,6 +4147,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) + bool again = false; + + skb_reset_mac_header(skb); ++ skb_assert_len(skb); + + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)) + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED); +diff --git a/net/core/neighbour.c b/net/core/neighbour.c +index ff049733cceeb..b3556c5c1c08e 100644 +--- a/net/core/neighbour.c ++++ b/net/core/neighbour.c +@@ -279,11 +279,26 @@ static int neigh_del_timer(struct neighbour *n) + return 0; + } + +-static void pneigh_queue_purge(struct sk_buff_head *list) ++static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) + { ++ struct sk_buff_head tmp; ++ unsigned long flags; + struct sk_buff *skb; + +- while ((skb = skb_dequeue(list)) != NULL) { ++ skb_queue_head_init(&tmp); ++ spin_lock_irqsave(&list->lock, flags); ++ skb = skb_peek(list); ++ while (skb != NULL) { ++ struct sk_buff *skb_next = skb_peek_next(skb, list); ++ if (net == NULL || net_eq(dev_net(skb->dev), net)) { ++ __skb_unlink(skb, list); ++ __skb_queue_tail(&tmp, skb); ++ } ++ skb = skb_next; ++ } ++ spin_unlock_irqrestore(&list->lock, flags); ++ ++ while ((skb = __skb_dequeue(&tmp))) { + dev_put(skb->dev); + kfree_skb(skb); + } +@@ -357,9 +372,9 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, + write_lock_bh(&tbl->lock); + neigh_flush_dev(tbl, dev, skip_perm); + pneigh_ifdown_and_unlock(tbl, dev); +- +- del_timer_sync(&tbl->proxy_timer); +- pneigh_queue_purge(&tbl->proxy_queue); ++ pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev)); ++ if (skb_queue_empty_lockless(&tbl->proxy_queue)) ++ del_timer_sync(&tbl->proxy_timer); + return 0; + } + +@@ -1735,7 +1750,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl) + /* It is not clean... Fix it to unload IPv6 module safely */ + cancel_delayed_work_sync(&tbl->gc_work); + del_timer_sync(&tbl->proxy_timer); +- pneigh_queue_purge(&tbl->proxy_queue); ++ pneigh_queue_purge(&tbl->proxy_queue, NULL); + neigh_ifdown(tbl, NULL); + if (atomic_read(&tbl->entries)) + pr_crit("neighbour leakage\n"); +diff --git a/net/core/skmsg.c b/net/core/skmsg.c +index f50f8d95b6283..4ddcfac344984 100644 +--- a/net/core/skmsg.c ++++ b/net/core/skmsg.c +@@ -731,7 +731,9 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node) + sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED); + refcount_set(&psock->refcnt, 1); + +- rcu_assign_sk_user_data_nocopy(sk, psock); ++ __rcu_assign_sk_user_data_with_flags(sk, psock, ++ SK_USER_DATA_NOCOPY | ++ SK_USER_DATA_PSOCK); + sock_hold(sk); + + out: +diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig +index 92a747896f808..4f645d51c2573 100644 +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -133,7 +133,6 @@ config NF_CONNTRACK_ZONES + + config NF_CONNTRACK_PROCFS + bool "Supply CT list in procfs (OBSOLETE)" +- default y + depends on PROC_FS + help + This option enables for the list of known conntrack entries +diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c +index 88c3b5cf8d94c..968dac3fcf58a 100644 +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2989,8 +2989,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) + if (err) + goto out_free; + +- if (sock->type == SOCK_RAW && +- !dev_validate_header(dev, skb->data, len)) { ++ if ((sock->type == SOCK_RAW && ++ !dev_validate_header(dev, skb->data, len)) || !skb->len) { + err = -EINVAL; + goto out_free; + } +diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost +index 48585c4d04ade..0273bf7375e26 100644 +--- a/scripts/Makefile.modpost ++++ b/scripts/Makefile.modpost +@@ -87,8 +87,7 @@ obj := $(KBUILD_EXTMOD) + src := $(obj) + + # Include the module's Makefile to find KBUILD_EXTRA_SYMBOLS +-include $(if $(wildcard $(KBUILD_EXTMOD)/Kbuild), \ +- $(KBUILD_EXTMOD)/Kbuild, $(KBUILD_EXTMOD)/Makefile) ++include $(if $(wildcard $(src)/Kbuild), $(src)/Kbuild, $(src)/Makefile) + + # modpost option for external modules + MODPOST += -e +diff --git a/sound/soc/sh/rz-ssi.c b/sound/soc/sh/rz-ssi.c +index 6d794eaaf4c39..2e33a1fa0a6f4 100644 +--- a/sound/soc/sh/rz-ssi.c ++++ b/sound/soc/sh/rz-ssi.c +@@ -1022,32 +1022,36 @@ static int rz_ssi_probe(struct platform_device *pdev) + + ssi->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL); + if (IS_ERR(ssi->rstc)) { +- rz_ssi_release_dma_channels(ssi); +- return PTR_ERR(ssi->rstc); ++ ret = PTR_ERR(ssi->rstc); ++ goto err_reset; + } + + reset_control_deassert(ssi->rstc); + pm_runtime_enable(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) { +- rz_ssi_release_dma_channels(ssi); +- pm_runtime_disable(ssi->dev); +- reset_control_assert(ssi->rstc); +- return dev_err_probe(ssi->dev, ret, "pm_runtime_resume_and_get failed\n"); ++ dev_err(&pdev->dev, "pm_runtime_resume_and_get failed\n"); ++ goto err_pm; + } + + ret = devm_snd_soc_register_component(&pdev->dev, &rz_ssi_soc_component, + rz_ssi_soc_dai, + ARRAY_SIZE(rz_ssi_soc_dai)); + if (ret < 0) { +- rz_ssi_release_dma_channels(ssi); +- +- pm_runtime_put(ssi->dev); +- pm_runtime_disable(ssi->dev); +- reset_control_assert(ssi->rstc); + dev_err(&pdev->dev, "failed to register snd component\n"); ++ goto err_snd_soc; + } + ++ return 0; ++ ++err_snd_soc: ++ pm_runtime_put(ssi->dev); ++err_pm: ++ pm_runtime_disable(ssi->dev); ++ reset_control_assert(ssi->rstc); ++err_reset: ++ rz_ssi_release_dma_channels(ssi); ++ + return ret; + } + +diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c +index 168fd802d70bd..9bfead5efc4c1 100644 +--- a/sound/usb/quirks.c ++++ b/sound/usb/quirks.c +@@ -1903,6 +1903,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { + QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x21b4, 0x0081, /* AudioQuest DragonFly */ + QUIRK_FLAG_GET_SAMPLE_RATE), ++ DEVICE_FLG(0x2522, 0x0007, /* LH Labs Geek Out HD Audio 1V5 */ ++ QUIRK_FLAG_SET_IFACE_FIRST), + DEVICE_FLG(0x2708, 0x0002, /* Audient iD14 */ + QUIRK_FLAG_IGNORE_CTL_ERROR), + DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */ +diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh +index d4ffebb989f88..c336e6c148d1f 100755 +--- a/tools/testing/selftests/netfilter/nft_flowtable.sh ++++ b/tools/testing/selftests/netfilter/nft_flowtable.sh +@@ -14,6 +14,11 @@ + # nft_flowtable.sh -o8000 -l1500 -r2000 + # + ++sfx=$(mktemp -u "XXXXXXXX") ++ns1="ns1-$sfx" ++ns2="ns2-$sfx" ++nsr1="nsr1-$sfx" ++nsr2="nsr2-$sfx" + + # Kselftest framework requirement - SKIP code is 4. + ksft_skip=4 +@@ -36,18 +41,17 @@ checktool (){ + checktool "nft --version" "run test without nft tool" + checktool "ip -Version" "run test without ip tool" + checktool "which nc" "run test without nc (netcat)" +-checktool "ip netns add nsr1" "create net namespace" ++checktool "ip netns add $nsr1" "create net namespace $nsr1" + +-ip netns add ns1 +-ip netns add ns2 +- +-ip netns add nsr2 ++ip netns add $ns1 ++ip netns add $ns2 ++ip netns add $nsr2 + + cleanup() { +- for i in 1 2; do +- ip netns del ns$i +- ip netns del nsr$i +- done ++ ip netns del $ns1 ++ ip netns del $ns2 ++ ip netns del $nsr1 ++ ip netns del $nsr2 + + rm -f "$ns1in" "$ns1out" + rm -f "$ns2in" "$ns2out" +@@ -59,22 +63,21 @@ trap cleanup EXIT + + sysctl -q net.netfilter.nf_log_all_netns=1 + +-ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1 +-ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2 ++ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1 ++ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2 + +-ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2 ++ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2 + + for dev in lo veth0 veth1; do +- for i in 1 2; do +- ip -net nsr$i link set $dev up +- done ++ ip -net $nsr1 link set $dev up ++ ip -net $nsr2 link set $dev up + done + +-ip -net nsr1 addr add 10.0.1.1/24 dev veth0 +-ip -net nsr1 addr add dead:1::1/64 dev veth0 ++ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 ++ip -net $nsr1 addr add dead:1::1/64 dev veth0 + +-ip -net nsr2 addr add 10.0.2.1/24 dev veth1 +-ip -net nsr2 addr add dead:2::1/64 dev veth1 ++ip -net $nsr2 addr add 10.0.2.1/24 dev veth1 ++ip -net $nsr2 addr add dead:2::1/64 dev veth1 + + # set different MTUs so we need to push packets coming from ns1 (large MTU) + # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), +@@ -106,49 +109,56 @@ do + esac + done + +-if ! ip -net nsr1 link set veth0 mtu $omtu; then ++if ! ip -net $nsr1 link set veth0 mtu $omtu; then + exit 1 + fi + +-ip -net ns1 link set eth0 mtu $omtu ++ip -net $ns1 link set eth0 mtu $omtu + +-if ! ip -net nsr2 link set veth1 mtu $rmtu; then ++if ! ip -net $nsr2 link set veth1 mtu $rmtu; then + exit 1 + fi + +-ip -net ns2 link set eth0 mtu $rmtu ++ip -net $ns2 link set eth0 mtu $rmtu + + # transfer-net between nsr1 and nsr2. + # these addresses are not used for connections. +-ip -net nsr1 addr add 192.168.10.1/24 dev veth1 +-ip -net nsr1 addr add fee1:2::1/64 dev veth1 +- +-ip -net nsr2 addr add 192.168.10.2/24 dev veth0 +-ip -net nsr2 addr add fee1:2::2/64 dev veth0 +- +-for i in 1 2; do +- ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +- ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +- +- ip -net ns$i link set lo up +- ip -net ns$i link set eth0 up +- ip -net ns$i addr add 10.0.$i.99/24 dev eth0 +- ip -net ns$i route add default via 10.0.$i.1 +- ip -net ns$i addr add dead:$i::99/64 dev eth0 +- ip -net ns$i route add default via dead:$i::1 +- if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then ++ip -net $nsr1 addr add 192.168.10.1/24 dev veth1 ++ip -net $nsr1 addr add fee1:2::1/64 dev veth1 ++ ++ip -net $nsr2 addr add 192.168.10.2/24 dev veth0 ++ip -net $nsr2 addr add fee1:2::2/64 dev veth0 ++ ++for i in 0 1; do ++ ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null ++ ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null ++done ++ ++for ns in $ns1 $ns2;do ++ ip -net $ns link set lo up ++ ip -net $ns link set eth0 up ++ ++ if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then + echo "ERROR: Check Originator/Responder values (problem during address addition)" + exit 1 + fi +- + # don't set ip DF bit for first two tests +- ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null ++ ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null + done + +-ip -net nsr1 route add default via 192.168.10.2 +-ip -net nsr2 route add default via 192.168.10.1 ++ip -net $ns1 addr add 10.0.1.99/24 dev eth0 ++ip -net $ns2 addr add 10.0.2.99/24 dev eth0 ++ip -net $ns1 route add default via 10.0.1.1 ++ip -net $ns2 route add default via 10.0.2.1 ++ip -net $ns1 addr add dead:1::99/64 dev eth0 ++ip -net $ns2 addr add dead:2::99/64 dev eth0 ++ip -net $ns1 route add default via dead:1::1 ++ip -net $ns2 route add default via dead:2::1 ++ ++ip -net $nsr1 route add default via 192.168.10.2 ++ip -net $nsr2 route add default via 192.168.10.1 + +-ip netns exec nsr1 nft -f - <<EOF ++ip netns exec $nsr1 nft -f - <<EOF + table inet filter { + flowtable f1 { + hook ingress priority 0 +@@ -197,18 +207,18 @@ if [ $? -ne 0 ]; then + fi + + # test basic connectivity +-if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then +- echo "ERROR: ns1 cannot reach ns2" 1>&2 ++if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then ++ echo "ERROR: $ns1 cannot reach ns2" 1>&2 + exit 1 + fi + +-if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then +- echo "ERROR: ns2 cannot reach ns1" 1>&2 ++if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then ++ echo "ERROR: $ns2 cannot reach $ns1" 1>&2 + exit 1 + fi + + if [ $ret -eq 0 ];then +- echo "PASS: netns routing/connectivity: ns1 can reach ns2" ++ echo "PASS: netns routing/connectivity: $ns1 can reach $ns2" + fi + + ns1in=$(mktemp) +@@ -312,24 +322,24 @@ make_file "$ns2in" + + # First test: + # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. +-if test_tcp_forwarding ns1 ns2; then ++if test_tcp_forwarding $ns1 $ns2; then + echo "PASS: flow offloaded for ns1/ns2" + else + echo "FAIL: flow offload for ns1/ns2:" 1>&2 +- ip netns exec nsr1 nft list ruleset ++ ip netns exec $nsr1 nft list ruleset + ret=1 + fi + + # delete default route, i.e. ns2 won't be able to reach ns1 and + # will depend on ns1 being masqueraded in nsr1. + # expect ns1 has nsr1 address. +-ip -net ns2 route del default via 10.0.2.1 +-ip -net ns2 route del default via dead:2::1 +-ip -net ns2 route add 192.168.10.1 via 10.0.2.1 ++ip -net $ns2 route del default via 10.0.2.1 ++ip -net $ns2 route del default via dead:2::1 ++ip -net $ns2 route add 192.168.10.1 via 10.0.2.1 + + # Second test: + # Same, but with NAT enabled. +-ip netns exec nsr1 nft -f - <<EOF ++ip netns exec $nsr1 nft -f - <<EOF + table ip nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; +@@ -343,47 +353,47 @@ table ip nat { + } + EOF + +-if test_tcp_forwarding_nat ns1 ns2; then ++if test_tcp_forwarding_nat $ns1 $ns2; then + echo "PASS: flow offloaded for ns1/ns2 with NAT" + else + echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 +- ip netns exec nsr1 nft list ruleset ++ ip netns exec $nsr1 nft list ruleset + ret=1 + fi + + # Third test: + # Same as second test, but with PMTU discovery enabled. +-handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2) ++handle=$(ip netns exec $nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2) + +-if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then ++if ! ip netns exec $nsr1 nft delete rule inet filter forward $handle; then + echo "FAIL: Could not delete large-packet accept rule" + exit 1 + fi + +-ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null +-ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null ++ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null ++ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null + +-if test_tcp_forwarding_nat ns1 ns2; then ++if test_tcp_forwarding_nat $ns1 $ns2; then + echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" + else + echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 +- ip netns exec nsr1 nft list ruleset ++ ip netns exec $nsr1 nft list ruleset + fi + + # Another test: + # Add bridge interface br0 to Router1, with NAT enabled. +-ip -net nsr1 link add name br0 type bridge +-ip -net nsr1 addr flush dev veth0 +-ip -net nsr1 link set up dev veth0 +-ip -net nsr1 link set veth0 master br0 +-ip -net nsr1 addr add 10.0.1.1/24 dev br0 +-ip -net nsr1 addr add dead:1::1/64 dev br0 +-ip -net nsr1 link set up dev br0 ++ip -net $nsr1 link add name br0 type bridge ++ip -net $nsr1 addr flush dev veth0 ++ip -net $nsr1 link set up dev veth0 ++ip -net $nsr1 link set veth0 master br0 ++ip -net $nsr1 addr add 10.0.1.1/24 dev br0 ++ip -net $nsr1 addr add dead:1::1/64 dev br0 ++ip -net $nsr1 link set up dev br0 + +-ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null ++ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null + + # br0 with NAT enabled. +-ip netns exec nsr1 nft -f - <<EOF ++ip netns exec $nsr1 nft -f - <<EOF + flush table ip nat + table ip nat { + chain prerouting { +@@ -398,59 +408,59 @@ table ip nat { + } + EOF + +-if test_tcp_forwarding_nat ns1 ns2; then ++if test_tcp_forwarding_nat $ns1 $ns2; then + echo "PASS: flow offloaded for ns1/ns2 with bridge NAT" + else + echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2 +- ip netns exec nsr1 nft list ruleset ++ ip netns exec $nsr1 nft list ruleset + ret=1 + fi + + # Another test: + # Add bridge interface br0 to Router1, with NAT and VLAN. +-ip -net nsr1 link set veth0 nomaster +-ip -net nsr1 link set down dev veth0 +-ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10 +-ip -net nsr1 link set up dev veth0 +-ip -net nsr1 link set up dev veth0.10 +-ip -net nsr1 link set veth0.10 master br0 +- +-ip -net ns1 addr flush dev eth0 +-ip -net ns1 link add link eth0 name eth0.10 type vlan id 10 +-ip -net ns1 link set eth0 up +-ip -net ns1 link set eth0.10 up +-ip -net ns1 addr add 10.0.1.99/24 dev eth0.10 +-ip -net ns1 route add default via 10.0.1.1 +-ip -net ns1 addr add dead:1::99/64 dev eth0.10 +- +-if test_tcp_forwarding_nat ns1 ns2; then ++ip -net $nsr1 link set veth0 nomaster ++ip -net $nsr1 link set down dev veth0 ++ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10 ++ip -net $nsr1 link set up dev veth0 ++ip -net $nsr1 link set up dev veth0.10 ++ip -net $nsr1 link set veth0.10 master br0 ++ ++ip -net $ns1 addr flush dev eth0 ++ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10 ++ip -net $ns1 link set eth0 up ++ip -net $ns1 link set eth0.10 up ++ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10 ++ip -net $ns1 route add default via 10.0.1.1 ++ip -net $ns1 addr add dead:1::99/64 dev eth0.10 ++ ++if test_tcp_forwarding_nat $ns1 $ns2; then + echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN" + else + echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2 +- ip netns exec nsr1 nft list ruleset ++ ip netns exec $nsr1 nft list ruleset + ret=1 + fi + + # restore test topology (remove bridge and VLAN) +-ip -net nsr1 link set veth0 nomaster +-ip -net nsr1 link set veth0 down +-ip -net nsr1 link set veth0.10 down +-ip -net nsr1 link delete veth0.10 type vlan +-ip -net nsr1 link delete br0 type bridge +-ip -net ns1 addr flush dev eth0.10 +-ip -net ns1 link set eth0.10 down +-ip -net ns1 link set eth0 down +-ip -net ns1 link delete eth0.10 type vlan ++ip -net $nsr1 link set veth0 nomaster ++ip -net $nsr1 link set veth0 down ++ip -net $nsr1 link set veth0.10 down ++ip -net $nsr1 link delete veth0.10 type vlan ++ip -net $nsr1 link delete br0 type bridge ++ip -net $ns1 addr flush dev eth0.10 ++ip -net $ns1 link set eth0.10 down ++ip -net $ns1 link set eth0 down ++ip -net $ns1 link delete eth0.10 type vlan + + # restore address in ns1 and nsr1 +-ip -net ns1 link set eth0 up +-ip -net ns1 addr add 10.0.1.99/24 dev eth0 +-ip -net ns1 route add default via 10.0.1.1 +-ip -net ns1 addr add dead:1::99/64 dev eth0 +-ip -net ns1 route add default via dead:1::1 +-ip -net nsr1 addr add 10.0.1.1/24 dev veth0 +-ip -net nsr1 addr add dead:1::1/64 dev veth0 +-ip -net nsr1 link set up dev veth0 ++ip -net $ns1 link set eth0 up ++ip -net $ns1 addr add 10.0.1.99/24 dev eth0 ++ip -net $ns1 route add default via 10.0.1.1 ++ip -net $ns1 addr add dead:1::99/64 dev eth0 ++ip -net $ns1 route add default via dead:1::1 ++ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 ++ip -net $nsr1 addr add dead:1::1/64 dev veth0 ++ip -net $nsr1 link set up dev veth0 + + KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1) + KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1) +@@ -480,23 +490,23 @@ do_esp() { + + } + +-do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 ++do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 + +-do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 ++do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 + +-ip netns exec nsr1 nft delete table ip nat ++ip netns exec $nsr1 nft delete table ip nat + + # restore default routes +-ip -net ns2 route del 192.168.10.1 via 10.0.2.1 +-ip -net ns2 route add default via 10.0.2.1 +-ip -net ns2 route add default via dead:2::1 ++ip -net $ns2 route del 192.168.10.1 via 10.0.2.1 ++ip -net $ns2 route add default via 10.0.2.1 ++ip -net $ns2 route add default via dead:2::1 + +-if test_tcp_forwarding ns1 ns2; then ++if test_tcp_forwarding $ns1 $ns2; then + echo "PASS: ipsec tunnel mode for ns1/ns2" + else + echo "FAIL: ipsec tunnel mode for ns1/ns2" +- ip netns exec nsr1 nft list ruleset 1>&2 +- ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2 ++ ip netns exec $nsr1 nft list ruleset 1>&2 ++ ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2 + fi + + exit $ret |