77 files changed, 3510 insertions, 5304 deletions
diff --git a/0001-update-Xen-version-to-4.16.2-pre.patch b/0001-update-Xen-version-to-4.16.3-pre.patch
index 2e62c21..6ae690c 100644
--- a/0001-update-Xen-version-to-4.16.2-pre.patch
+++ b/0001-update-Xen-version-to-4.16.3-pre.patch
@@ -1,25 +1,25 @@
-From 5be9edb482ab20cf3e7acb05b511465294d1e19b Mon Sep 17 00:00:00 2001
+From 4aa32912ebeda8cb94d1c3941e7f1f0a2d4f921b Mon Sep 17 00:00:00 2001
 From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 7 Jun 2022 13:55:17 +0200
-Subject: [PATCH 01/51] update Xen version to 4.16.2-pre
+Date: Tue, 11 Oct 2022 14:49:41 +0200
+Subject: [PATCH 01/26] update Xen version to 4.16.3-pre
 
 ---
  xen/Makefile | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/xen/Makefile b/xen/Makefile
-index 8abc71cf73aa..90a29782dbf4 100644
+index 76d0a3ff253f..8a403ee896cd 100644
 --- a/xen/Makefile
 +++ b/xen/Makefile
 @@ -2,7 +2,7 @@
  # All other places this is stored (eg. compile.h) should be autogenerated.
  export XEN_VERSION       = 4
  export XEN_SUBVERSION    = 16
--export XEN_EXTRAVERSION ?= .1$(XEN_VENDORVERSION)
-+export XEN_EXTRAVERSION ?= .2-pre$(XEN_VENDORVERSION)
+-export XEN_EXTRAVERSION ?= .2$(XEN_VENDORVERSION)
++export XEN_EXTRAVERSION ?= .3-pre$(XEN_VENDORVERSION)
  export XEN_FULLVERSION   = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
  -include xen-version
  
 -- 
-2.35.1
+2.37.3
 
diff --git a/0002-x86-irq-skip-unmap_domain_pirq-XSM-during-destructio.patch b/0002-x86-irq-skip-unmap_domain_pirq-XSM-during-destructio.patch
deleted file mode 100644
index 0ba090e..0000000
--- a/0002-x86-irq-skip-unmap_domain_pirq-XSM-during-destructio.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From b58fb6e81bd55b6bd946abc3070770f7994c9ef9 Mon Sep 17 00:00:00 2001
-From: Jason Andryuk <jandryuk@gmail.com>
-Date: Tue, 7 Jun 2022 13:55:39 +0200
-Subject: [PATCH 02/51] x86/irq: skip unmap_domain_pirq XSM during destruction
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-xsm_unmap_domain_irq was seen denying unmap_domain_pirq when called from
-complete_domain_destroy as an RCU callback.  The source context was an
-unexpected, random domain.  Since this is a xen-internal operation,
-going through the XSM hook is inapproriate.
-
-Check d->is_dying and skip the XSM hook when set since this is a cleanup
-operation for a domain being destroyed.
-
-Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
-Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-master commit: 2e6f95a942d1927a53f077c301db0b799c54c05a
-master date: 2022-04-08 14:51:52 +0200
----
- xen/arch/x86/irq.c | 10 ++++++++--
- 1 file changed, 8 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
-index 67cbf6b979dc..47b86af5dce9 100644
---- a/xen/arch/x86/irq.c
-+++ b/xen/arch/x86/irq.c
-@@ -2342,8 +2342,14 @@ int unmap_domain_pirq(struct domain *d, int pirq)
-         nr = msi_desc->msi.nvec;
-     }
- 
--    ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq,
--                               msi_desc ? msi_desc->dev : NULL);
-+    /*
-+     * When called by complete_domain_destroy via RCU, current is a random
-+     * domain.  Skip the XSM check since this is a Xen-initiated action.
-+     */
-+    if ( !d->is_dying )
-+        ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq,
-+                                   msi_desc ? msi_desc->dev : NULL);
-+
-     if ( ret )
-         goto done;
- 
--- 
-2.35.1
-
diff --git a/0002-xen-arm-p2m-Prevent-adding-mapping-when-domain-is-dy.patch b/0002-xen-arm-p2m-Prevent-adding-mapping-when-domain-is-dy.patch
new file mode 100644
index 0000000..fecc260
--- /dev/null
+++ b/0002-xen-arm-p2m-Prevent-adding-mapping-when-domain-is-dy.patch
@@ -0,0 +1,62 @@
+From 8d9531a3421dad2b0012e09e6f41d5274e162064 Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Tue, 11 Oct 2022 14:52:13 +0200
+Subject: [PATCH 02/26] xen/arm: p2m: Prevent adding mapping when domain is
+ dying
+
+During the domain destroy process, the domain will still be accessible
+until it is fully destroyed. So does the P2M because we don't bail
+out early if is_dying is non-zero. If a domain has permission to
+modify the other domain's P2M (i.e. dom0, or a stubdomain), then
+foreign mapping can be added past relinquish_p2m_mapping().
+
+Therefore, we need to prevent mapping to be added when the domain
+is dying. This commit prevents such adding of mapping by adding the
+d->is_dying check to p2m_set_entry(). Also this commit enhances the
+check in relinquish_p2m_mapping() to make sure that no mappings can
+be added in the P2M after the P2M lock is released.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Tested-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: 3ebe773293e3b945460a3d6f54f3b91915397bab
+master date: 2022-10-11 14:20:18 +0200
+---
+ xen/arch/arm/p2m.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index 3349b464a39e..1affdafadbeb 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -1093,6 +1093,15 @@ int p2m_set_entry(struct p2m_domain *p2m,
+ {
+     int rc = 0;
+ 
++    /*
++     * Any reference taken by the P2M mappings (e.g. foreign mapping) will
++     * be dropped in relinquish_p2m_mapping(). As the P2M will still
++     * be accessible after, we need to prevent mapping to be added when the
++     * domain is dying.
++     */
++    if ( unlikely(p2m->domain->is_dying) )
++        return -ENOMEM;
++
+     while ( nr )
+     {
+         unsigned long mask;
+@@ -1610,6 +1619,8 @@ int relinquish_p2m_mapping(struct domain *d)
+     unsigned int order;
+     gfn_t start, end;
+ 
++    BUG_ON(!d->is_dying);
++    /* No mappings can be added in the P2M after the P2M lock is released. */
+     p2m_write_lock(p2m);
+ 
+     start = p2m->lowest_mapped_gfn;
+-- 
+2.37.3
+
diff --git a/0003-xen-arm-p2m-Handle-preemption-when-freeing-intermedi.patch b/0003-xen-arm-p2m-Handle-preemption-when-freeing-intermedi.patch
new file mode 100644
index 0000000..3190db8
--- /dev/null
+++ b/0003-xen-arm-p2m-Handle-preemption-when-freeing-intermedi.patch
@@ -0,0 +1,167 @@
+From 937fdbad5180440888f1fcee46299103327efa90 Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Tue, 11 Oct 2022 14:52:27 +0200
+Subject: [PATCH 03/26] xen/arm: p2m: Handle preemption when freeing
+ intermediate page tables
+
+At the moment the P2M page tables will be freed when the domain structure
+is freed without any preemption. As the P2M is quite large, iterating
+through this may take more time than it is reasonable without intermediate
+preemption (to run softirqs and perhaps scheduler).
+
+Split p2m_teardown() in two parts: one preemptible and called when
+relinquishing the resources, the other one non-preemptible and called
+when freeing the domain structure.
+
+As we are now freeing the P2M pages early, we also need to prevent
+further allocation if someone call p2m_set_entry() past p2m_teardown()
+(I wasn't able to prove this will never happen). This is done by
+the checking domain->is_dying from previous patch in p2m_set_entry().
+
+Similarly, we want to make sure that no-one can accessed the free
+pages. Therefore the root is cleared before freeing pages.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Tested-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: 3202084566bba0ef0c45caf8c24302f83d92f9c8
+master date: 2022-10-11 14:20:56 +0200
+---
+ xen/arch/arm/domain.c     | 10 +++++++--
+ xen/arch/arm/p2m.c        | 47 ++++++++++++++++++++++++++++++++++++---
+ xen/include/asm-arm/p2m.h | 13 +++++++++--
+ 3 files changed, 63 insertions(+), 7 deletions(-)
+
+diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
+index 96e1b235501d..2694c39127c5 100644
+--- a/xen/arch/arm/domain.c
++++ b/xen/arch/arm/domain.c
+@@ -789,10 +789,10 @@ fail:
+ void arch_domain_destroy(struct domain *d)
+ {
+     /* IOMMU page table is shared with P2M, always call
+-     * iommu_domain_destroy() before p2m_teardown().
++     * iommu_domain_destroy() before p2m_final_teardown().
+      */
+     iommu_domain_destroy(d);
+-    p2m_teardown(d);
++    p2m_final_teardown(d);
+     domain_vgic_free(d);
+     domain_vuart_free(d);
+     free_xenheap_page(d->shared_info);
+@@ -996,6 +996,7 @@ enum {
+     PROG_xen,
+     PROG_page,
+     PROG_mapping,
++    PROG_p2m,
+     PROG_done,
+ };
+ 
+@@ -1056,6 +1057,11 @@ int domain_relinquish_resources(struct domain *d)
+         if ( ret )
+             return ret;
+ 
++    PROGRESS(p2m):
++        ret = p2m_teardown(d);
++        if ( ret )
++            return ret;
++
+     PROGRESS(done):
+         break;
+ 
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index 1affdafadbeb..27418ee5ee98 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -1527,17 +1527,58 @@ static void p2m_free_vmid(struct domain *d)
+     spin_unlock(&vmid_alloc_lock);
+ }
+ 
+-void p2m_teardown(struct domain *d)
++int p2m_teardown(struct domain *d)
+ {
+     struct p2m_domain *p2m = p2m_get_hostp2m(d);
++    unsigned long count = 0;
+     struct page_info *pg;
++    unsigned int i;
++    int rc = 0;
++
++    p2m_write_lock(p2m);
++
++    /*
++     * We are about to free the intermediate page-tables, so clear the
++     * root to prevent any walk to use them.
++     */
++    for ( i = 0; i < P2M_ROOT_PAGES; i++ )
++        clear_and_clean_page(p2m->root + i);
++
++    /*
++     * The domain will not be scheduled anymore, so in theory we should
++     * not need to flush the TLBs. Do it for safety purpose.
++     *
++     * Note that all the devices have already been de-assigned. So we don't
++     * need to flush the IOMMU TLB here.
++     */
++    p2m_force_tlb_flush_sync(p2m);
++
++    while ( (pg = page_list_remove_head(&p2m->pages)) )
++    {
++        free_domheap_page(pg);
++        count++;
++        /* Arbitrarily preempt every 512 iterations */
++        if ( !(count % 512) && hypercall_preempt_check() )
++        {
++            rc = -ERESTART;
++            break;
++        }
++    }
++
++    p2m_write_unlock(p2m);
++
++    return rc;
++}
++
++void p2m_final_teardown(struct domain *d)
++{
++    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ 
+     /* p2m not actually initialized */
+     if ( !p2m->domain )
+         return;
+ 
+-    while ( (pg = page_list_remove_head(&p2m->pages)) )
+-        free_domheap_page(pg);
++    ASSERT(page_list_empty(&p2m->pages));
+ 
+     if ( p2m->root )
+         free_domheap_pages(p2m->root, P2M_ROOT_ORDER);
+diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
+index 8f11d9c97b5d..b3ba83283e11 100644
+--- a/xen/include/asm-arm/p2m.h
++++ b/xen/include/asm-arm/p2m.h
+@@ -192,8 +192,17 @@ void setup_virt_paging(void);
+ /* Init the datastructures for later use by the p2m code */
+ int p2m_init(struct domain *d);
+ 
+-/* Return all the p2m resources to Xen. */
+-void p2m_teardown(struct domain *d);
++/*
++ * The P2M resources are freed in two parts:
++ *  - p2m_teardown() will be called when relinquish the resources. It
++ *    will free large resources (e.g. intermediate page-tables) that
++ *    requires preemption.
++ *  - p2m_final_teardown() will be called when domain struct is been
++ *    freed. This *cannot* be preempted and therefore one small
++ *    resources should be freed here.
++ */
++int p2m_teardown(struct domain *d);
++void p2m_final_teardown(struct domain *d);
+ 
+ /*
+  * Remove mapping refcount on each mapping page in the p2m
+-- 
+2.37.3
+
diff --git a/0003-xen-fix-XEN_DOMCTL_gdbsx_guestmemio-crash.patch b/0003-xen-fix-XEN_DOMCTL_gdbsx_guestmemio-crash.patch
deleted file mode 100644
index fa1443c..0000000
--- a/0003-xen-fix-XEN_DOMCTL_gdbsx_guestmemio-crash.patch
+++ /dev/null
@@ -1,63 +0,0 @@
-From 6c6bbfdff9374ef41f84c4ebed7b8a7a40767ef6 Mon Sep 17 00:00:00 2001
-From: Juergen Gross <jgross@suse.com>
-Date: Tue, 7 Jun 2022 13:56:54 +0200
-Subject: [PATCH 03/51] xen: fix XEN_DOMCTL_gdbsx_guestmemio crash
-
-A hypervisor built without CONFIG_GDBSX will crash in case the
-XEN_DOMCTL_gdbsx_guestmemio domctl is being called, as the call will
-end up in iommu_do_domctl() with d == NULL:
-
-  (XEN) CPU:    6
-  (XEN) RIP:    e008:[<ffff82d040269984>] iommu_do_domctl+0x4/0x30
-  (XEN) RFLAGS: 0000000000010202   CONTEXT: hypervisor (d0v0)
-  (XEN) rax: 00000000000003e8   rbx: ffff830856277ef8   rcx: ffff830856277fff
-  ...
-  (XEN) Xen call trace:
-  (XEN)    [<ffff82d040269984>] R iommu_do_domctl+0x4/0x30
-  (XEN)    [<ffff82d04035cd5f>] S arch_do_domctl+0x7f/0x2330
-  (XEN)    [<ffff82d040239e46>] S do_domctl+0xe56/0x1930
-  (XEN)    [<ffff82d040238ff0>] S do_domctl+0/0x1930
-  (XEN)    [<ffff82d0402f8c59>] S pv_hypercall+0x99/0x110
-  (XEN)    [<ffff82d0402f5161>] S arch/x86/pv/domain.c#_toggle_guest_pt+0x11/0x90
-  (XEN)    [<ffff82d040366288>] S lstar_enter+0x128/0x130
-  (XEN)
-  (XEN) Pagetable walk from 0000000000000144:
-  (XEN)  L4[0x000] = 0000000000000000 ffffffffffffffff
-  (XEN)
-  (XEN) ****************************************
-  (XEN) Panic on CPU 6:
-  (XEN) FATAL PAGE FAULT
-  (XEN) [error_code=0000]
-  (XEN) Faulting linear address: 0000000000000144
-  (XEN) ****************************************
-
-It used to be permitted to pass DOMID_IDLE to dbg_rw_mem(), which is why the
-special case skipping the domid checks exists.  Now that it is only permitted
-to pass proper domids, remove the special case, making 'd' always valid.
-
-Reported-by: Cheyenne Wills <cheyenne.wills@gmail.com>
-Fixes: e726a82ca0dc ("xen: make gdbsx support configurable")
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: f00daf1fb3213a9b0335d9dcd90fe9cb5c02b7a9
-master date: 2022-04-19 17:07:08 +0100
----
- xen/common/domctl.c | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/xen/common/domctl.c b/xen/common/domctl.c
-index 271862ae587f..419e4070f59d 100644
---- a/xen/common/domctl.c
-+++ b/xen/common/domctl.c
-@@ -304,7 +304,6 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
-         if ( op->domain == DOMID_INVALID )
-         {
-     case XEN_DOMCTL_createdomain:
--    case XEN_DOMCTL_gdbsx_guestmemio:
-             d = NULL;
-             break;
-         }
--- 
-2.35.1
-
diff --git a/0004-VT-d-refuse-to-use-IOMMU-with-reserved-CAP.ND-value.patch b/0004-VT-d-refuse-to-use-IOMMU-with-reserved-CAP.ND-value.patch
deleted file mode 100644
index a4d229a..0000000
--- a/0004-VT-d-refuse-to-use-IOMMU-with-reserved-CAP.ND-value.patch
+++ /dev/null
@@ -1,49 +0,0 @@
-From b378ee56c7e0bb5eeb35dcc55b3d29e5f50eb566 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 7 Jun 2022 13:58:16 +0200
-Subject: [PATCH 04/51] VT-d: refuse to use IOMMU with reserved CAP.ND value
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The field taking the value 7 (resulting in 18-bit DIDs when using the
-calculation in cap_ndoms(), when the DID fields are only 16 bits wide)
-is reserved. Instead of misbehaving in case we would encounter such an
-IOMMU, refuse to use it.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: a1545fbf45c689aff39ce76a6eaa609d32ef72a7
-master date: 2022-04-20 10:54:26 +0200
----
- xen/drivers/passthrough/vtd/iommu.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
-index 93dd8aa643aa..8975c1de61bc 100644
---- a/xen/drivers/passthrough/vtd/iommu.c
-+++ b/xen/drivers/passthrough/vtd/iommu.c
-@@ -1279,8 +1279,11 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd)
- 
-     quirk_iommu_caps(iommu);
- 
-+    nr_dom = cap_ndoms(iommu->cap);
-+
-     if ( cap_fault_reg_offset(iommu->cap) +
-          cap_num_fault_regs(iommu->cap) * PRIMARY_FAULT_REG_LEN >= PAGE_SIZE ||
-+         ((nr_dom - 1) >> 16) /* I.e. cap.nd > 6 */ ||
-          ecap_iotlb_offset(iommu->ecap) >= PAGE_SIZE )
-     {
-         printk(XENLOG_ERR VTDPREFIX "IOMMU: unsupported\n");
-@@ -1305,7 +1308,6 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd)
-         vtd_ops.sync_cache = sync_cache;
- 
-     /* allocate domain id bitmap */
--    nr_dom = cap_ndoms(iommu->cap);
-     iommu->domid_bitmap = xzalloc_array(unsigned long, BITS_TO_LONGS(nr_dom));
-     if ( !iommu->domid_bitmap )
-         return -ENOMEM;
--- 
-2.35.1
-
diff --git a/0004-x86-p2m-add-option-to-skip-root-pagetable-removal-in.patch b/0004-x86-p2m-add-option-to-skip-root-pagetable-removal-in.patch
new file mode 100644
index 0000000..b3edbd9
--- /dev/null
+++ b/0004-x86-p2m-add-option-to-skip-root-pagetable-removal-in.patch
@@ -0,0 +1,138 @@
+From 8fc19c143b8aa563077f3d5c46fcc0a54dc04f35 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 11 Oct 2022 14:52:39 +0200
+Subject: [PATCH 04/26] x86/p2m: add option to skip root pagetable removal in
+ p2m_teardown()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Add a new parameter to p2m_teardown() in order to select whether the
+root page table should also be freed.  Note that all users are
+adjusted to pass the parameter to remove the root page tables, so
+behavior is not modified.
+
+No functional change intended.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Suggested-by: Julien Grall <julien@xen.org>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+master commit: 1df52a270225527ae27bfa2fc40347bf93b78357
+master date: 2022-10-11 14:21:23 +0200
+---
+ xen/arch/x86/mm/hap/hap.c       |  6 +++---
+ xen/arch/x86/mm/p2m.c           | 20 ++++++++++++++++----
+ xen/arch/x86/mm/shadow/common.c |  4 ++--
+ xen/include/asm-x86/p2m.h       |  2 +-
+ 4 files changed, 22 insertions(+), 10 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index 47a7487fa7a3..a8f5a19da917 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -541,18 +541,18 @@ void hap_final_teardown(struct domain *d)
+         }
+ 
+         for ( i = 0; i < MAX_ALTP2M; i++ )
+-            p2m_teardown(d->arch.altp2m_p2m[i]);
++            p2m_teardown(d->arch.altp2m_p2m[i], true);
+     }
+ 
+     /* Destroy nestedp2m's first */
+     for (i = 0; i < MAX_NESTEDP2M; i++) {
+-        p2m_teardown(d->arch.nested_p2m[i]);
++        p2m_teardown(d->arch.nested_p2m[i], true);
+     }
+ 
+     if ( d->arch.paging.hap.total_pages != 0 )
+         hap_teardown(d, NULL);
+ 
+-    p2m_teardown(p2m_get_hostp2m(d));
++    p2m_teardown(p2m_get_hostp2m(d), true);
+     /* Free any memory that the p2m teardown released */
+     paging_lock(d);
+     hap_set_allocation(d, 0, NULL);
+diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
+index def1695cf00b..aba4f17cbe12 100644
+--- a/xen/arch/x86/mm/p2m.c
++++ b/xen/arch/x86/mm/p2m.c
+@@ -749,11 +749,11 @@ int p2m_alloc_table(struct p2m_domain *p2m)
+  * hvm fixme: when adding support for pvh non-hardware domains, this path must
+  * cleanup any foreign p2m types (release refcnts on them).
+  */
+-void p2m_teardown(struct p2m_domain *p2m)
++void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
+ /* Return all the p2m pages to Xen.
+  * We know we don't have any extra mappings to these pages */
+ {
+-    struct page_info *pg;
++    struct page_info *pg, *root_pg = NULL;
+     struct domain *d;
+ 
+     if (p2m == NULL)
+@@ -763,10 +763,22 @@ void p2m_teardown(struct p2m_domain *p2m)
+ 
+     p2m_lock(p2m);
+     ASSERT(atomic_read(&d->shr_pages) == 0);
+-    p2m->phys_table = pagetable_null();
++
++    if ( remove_root )
++        p2m->phys_table = pagetable_null();
++    else if ( !pagetable_is_null(p2m->phys_table) )
++    {
++        root_pg = pagetable_get_page(p2m->phys_table);
++        clear_domain_page(pagetable_get_mfn(p2m->phys_table));
++    }
+ 
+     while ( (pg = page_list_remove_head(&p2m->pages)) )
+-        d->arch.paging.free_page(d, pg);
++        if ( pg != root_pg )
++            d->arch.paging.free_page(d, pg);
++
++    if ( root_pg )
++        page_list_add(root_pg, &p2m->pages);
++
+     p2m_unlock(p2m);
+ }
+ 
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 8c1b041f7135..8c5baba9544d 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2701,7 +2701,7 @@ int shadow_enable(struct domain *d, u32 mode)
+     paging_unlock(d);
+  out_unlocked:
+     if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) )
+-        p2m_teardown(p2m);
++        p2m_teardown(p2m, true);
+     if ( rv != 0 && pg != NULL )
+     {
+         pg->count_info &= ~PGC_count_mask;
+@@ -2866,7 +2866,7 @@ void shadow_final_teardown(struct domain *d)
+         shadow_teardown(d, NULL);
+ 
+     /* It is now safe to pull down the p2m map. */
+-    p2m_teardown(p2m_get_hostp2m(d));
++    p2m_teardown(p2m_get_hostp2m(d), true);
+     /* Free any shadow memory that the p2m teardown released */
+     paging_lock(d);
+     shadow_set_allocation(d, 0, NULL);
+diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
+index f2af7a746ced..c3c16748e7d5 100644
+--- a/xen/include/asm-x86/p2m.h
++++ b/xen/include/asm-x86/p2m.h
+@@ -574,7 +574,7 @@ int p2m_init(struct domain *d);
+ int p2m_alloc_table(struct p2m_domain *p2m);
+ 
+ /* Return all the p2m resources to Xen. */
+-void p2m_teardown(struct p2m_domain *p2m);
++void p2m_teardown(struct p2m_domain *p2m, bool remove_root);
+ void p2m_final_teardown(struct domain *d);
+ 
+ /* Add a page to a domain's p2m table */
+-- 
+2.37.3
+
diff --git a/0005-x86-HAP-adjust-monitor-table-related-error-handling.patch b/0005-x86-HAP-adjust-monitor-table-related-error-handling.patch
new file mode 100644
index 0000000..33ab1ad
--- /dev/null
+++ b/0005-x86-HAP-adjust-monitor-table-related-error-handling.patch
@@ -0,0 +1,77 @@
+From 3422c19d85a3d23a9d798eafb739ffb8865522d2 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 14:52:59 +0200
+Subject: [PATCH 05/26] x86/HAP: adjust monitor table related error handling
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+hap_make_monitor_table() will return INVALID_MFN if it encounters an
+error condition, but hap_update_paging_modes() wasn’t handling this
+value, resulting in an inappropriate value being stored in
+monitor_table. This would subsequently misguide at least
+hap_vcpu_teardown(). Avoid this by bailing early.
+
+Further, when a domain has/was already crashed or (perhaps less
+important as there's no such path known to lead here) is already dying,
+avoid calling domain_crash() on it again - that's at best confusing.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 5b44a61180f4f2e4f490a28400c884dd357ff45d
+master date: 2022-10-11 14:21:56 +0200
+---
+ xen/arch/x86/mm/hap/hap.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index a8f5a19da917..d75dc2b9ed3d 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -39,6 +39,7 @@
+ #include <asm/domain.h>
+ #include <xen/numa.h>
+ #include <asm/hvm/nestedhvm.h>
++#include <public/sched.h>
+ 
+ #include "private.h"
+ 
+@@ -405,8 +406,13 @@ static mfn_t hap_make_monitor_table(struct vcpu *v)
+     return m4mfn;
+ 
+  oom:
+-    printk(XENLOG_G_ERR "out of memory building monitor pagetable\n");
+-    domain_crash(d);
++    if ( !d->is_dying &&
++         (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
++    {
++        printk(XENLOG_G_ERR "%pd: out of memory building monitor pagetable\n",
++               d);
++        domain_crash(d);
++    }
+     return INVALID_MFN;
+ }
+ 
+@@ -766,6 +772,9 @@ static void hap_update_paging_modes(struct vcpu *v)
+     if ( pagetable_is_null(v->arch.hvm.monitor_table) )
+     {
+         mfn_t mmfn = hap_make_monitor_table(v);
++
++        if ( mfn_eq(mmfn, INVALID_MFN) )
++            goto unlock;
+         v->arch.hvm.monitor_table = pagetable_from_mfn(mmfn);
+         make_cr3(v, mmfn);
+         hvm_update_host_cr3(v);
+@@ -774,6 +783,7 @@ static void hap_update_paging_modes(struct vcpu *v)
+     /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
+     hap_update_cr3(v, 0, false);
+ 
++ unlock:
+     paging_unlock(d);
+     put_gfn(d, cr3_gfn);
+ }
+-- 
+2.37.3
+
diff --git a/0005-x86-mm-avoid-inadvertently-degrading-a-TLB-flush-to-.patch b/0005-x86-mm-avoid-inadvertently-degrading-a-TLB-flush-to-.patch
deleted file mode 100644
index 45a1825..0000000
--- a/0005-x86-mm-avoid-inadvertently-degrading-a-TLB-flush-to-.patch
+++ /dev/null
@@ -1,116 +0,0 @@
-From 7c003ab4a398ff4ddd54d15d4158cffb463134cc Mon Sep 17 00:00:00 2001
-From: David Vrabel <dvrabel@amazon.co.uk>
-Date: Tue, 7 Jun 2022 13:59:31 +0200
-Subject: [PATCH 05/51] x86/mm: avoid inadvertently degrading a TLB flush to
- local only
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-If the direct map is incorrectly modified with interrupts disabled,
-the required TLB flushes are degraded to flushing the local CPU only.
-
-This could lead to very hard to diagnose problems as different CPUs will
-end up with different views of memory. Although, no such issues have yet
-been identified.
-
-Change the check in the flush_area() macro to look at system_state
-instead. This defers the switch from local to all later in the boot
-(see xen/arch/x86/setup.c:__start_xen()). This is fine because
-additional PCPUs are not brought up until after the system state is
-SYS_STATE_smp_boot.
-
-Signed-off-by: David Vrabel <dvrabel@amazon.co.uk>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-x86/flushtlb: remove flush_area check on system state
-
-Booting with Shadow Stacks leads to the following assert on a debug
-hypervisor:
-
-Assertion 'local_irq_is_enabled()' failed at arch/x86/smp.c:265
-----[ Xen-4.17.0-10.24-d  x86_64  debug=y  Not tainted ]----
-CPU:    0
-RIP:    e008:[<ffff82d040345300>] flush_area_mask+0x40/0x13e
-[...]
-Xen call trace:
-   [<ffff82d040345300>] R flush_area_mask+0x40/0x13e
-   [<ffff82d040338a40>] F modify_xen_mappings+0xc5/0x958
-   [<ffff82d0404474f9>] F arch/x86/alternative.c#_alternative_instructions+0xb7/0xb9
-   [<ffff82d0404476cc>] F alternative_branches+0xf/0x12
-   [<ffff82d04044e37d>] F __start_xen+0x1ef4/0x2776
-   [<ffff82d040203344>] F __high_start+0x94/0xa0
-
-This is due to SYS_STATE_smp_boot being set before calling
-alternative_branches(), and the flush in modify_xen_mappings() then
-using flush_area_all() with interrupts disabled.  Note that
-alternative_branches() is called before APs are started, so the flush
-must be a local one (and indeed the cpumask passed to
-flush_area_mask() just contains one CPU).
-
-Take the opportunity to simplify a bit the logic and make flush_area()
-an alias of flush_area_all() in mm.c, taking into account that
-cpu_online_map just contains the BSP before APs are started.  This
-requires widening the assert in flush_area_mask() to allow being
-called with interrupts disabled as long as it's strictly a local only
-flush.
-
-The overall result is that a conditional can be removed from
-flush_area().
-
-While there also introduce an ASSERT to check that a vCPU state flush
-is not issued for the local CPU only.
-
-Fixes: 78e072bc37 ('x86/mm: avoid inadvertently degrading a TLB flush to local only')
-Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 78e072bc375043e81691a59454e09f0b38241ddd
-master date: 2022-04-20 10:55:01 +0200
-master commit: 9f735ee4903f1b9f1966bb4ba5b5616b03ae08b5
-master date: 2022-05-25 11:09:46 +0200
----
- xen/arch/x86/mm.c  | 10 ++--------
- xen/arch/x86/smp.c |  5 ++++-
- 2 files changed, 6 insertions(+), 9 deletions(-)
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index 4d799032dc82..e222d9aa98ee 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -5051,14 +5051,8 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
- #define l1f_to_lNf(f) (((f) & _PAGE_PRESENT) ? ((f) |  _PAGE_PSE) : (f))
- #define lNf_to_l1f(f) (((f) & _PAGE_PRESENT) ? ((f) & ~_PAGE_PSE) : (f))
- 
--/*
-- * map_pages_to_xen() can be called with interrupts disabled during
-- * early bootstrap. In this case it is safe to use flush_area_local()
-- * and avoid locking because only the local CPU is online.
-- */
--#define flush_area(v,f) (!local_irq_is_enabled() ?              \
--                         flush_area_local((const void *)v, f) : \
--                         flush_area_all((const void *)v, f))
-+/* flush_area_all() can be used prior to any other CPU being online.  */
-+#define flush_area(v, f) flush_area_all((const void *)(v), f)
- 
- #define L3T_INIT(page) (page) = ZERO_BLOCK_PTR
- 
-diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c
-index eef0f9c6cbf4..3556ec116608 100644
---- a/xen/arch/x86/smp.c
-+++ b/xen/arch/x86/smp.c
-@@ -262,7 +262,10 @@ void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags)
- {
-     unsigned int cpu = smp_processor_id();
- 
--    ASSERT(local_irq_is_enabled());
-+    /* Local flushes can be performed with interrupts disabled. */
-+    ASSERT(local_irq_is_enabled() || cpumask_subset(mask, cpumask_of(cpu)));
-+    /* Exclude use of FLUSH_VCPU_STATE for the local CPU. */
-+    ASSERT(!cpumask_test_cpu(cpu, mask) || !(flags & FLUSH_VCPU_STATE));
- 
-     if ( (flags & ~(FLUSH_VCPU_STATE | FLUSH_ORDER_MASK)) &&
-          cpumask_test_cpu(cpu, mask) )
--- 
-2.35.1
-
diff --git a/0006-x86-shadow-tolerate-failure-of-sh_set_toplevel_shado.patch b/0006-x86-shadow-tolerate-failure-of-sh_set_toplevel_shado.patch
new file mode 100644
index 0000000..bbae48b
--- /dev/null
+++ b/0006-x86-shadow-tolerate-failure-of-sh_set_toplevel_shado.patch
@@ -0,0 +1,76 @@
+From 40e9daf6b56ae49bda3ba4e254ccf0e998e52a8c Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 14:53:12 +0200
+Subject: [PATCH 06/26] x86/shadow: tolerate failure of
+ sh_set_toplevel_shadow()
+
+Subsequently sh_set_toplevel_shadow() will be adjusted to install a
+blank entry in case prealloc fails. There are, in fact, pre-existing
+error paths which would put in place a blank entry. The 4- and 2-level
+code in sh_update_cr3(), however, assume the top level entry to be
+valid.
+
+Hence bail from the function in the unlikely event that it's not. Note
+that 3-level logic works differently: In particular a guest is free to
+supply a PDPTR pointing at 4 non-present (or otherwise deemed invalid)
+entries. The guest will crash, but we already cope with that.
+
+Really mfn_valid() is likely wrong to use in sh_set_toplevel_shadow(),
+and it should instead be !mfn_eq(gmfn, INVALID_MFN). Avoid such a change
+in security context, but add a respective assertion.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: eac000978c1feb5a9ee3236ab0c0da9a477e5336
+master date: 2022-10-11 14:22:24 +0200
+---
+ xen/arch/x86/mm/shadow/common.c |  1 +
+ xen/arch/x86/mm/shadow/multi.c  | 10 ++++++++++
+ 2 files changed, 11 insertions(+)
+
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 8c5baba9544d..00e520cbd05b 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2516,6 +2516,7 @@ void sh_set_toplevel_shadow(struct vcpu *v,
+     /* Now figure out the new contents: is this a valid guest MFN? */
+     if ( !mfn_valid(gmfn) )
+     {
++        ASSERT(mfn_eq(gmfn, INVALID_MFN));
+         new_entry = pagetable_null();
+         goto install_new_entry;
+     }
+diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
+index 7b8f4dd13b03..2ff78fe3362c 100644
+--- a/xen/arch/x86/mm/shadow/multi.c
++++ b/xen/arch/x86/mm/shadow/multi.c
+@@ -3312,6 +3312,11 @@ sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+     if ( sh_remove_write_access(d, gmfn, 4, 0) != 0 )
+         guest_flush_tlb_mask(d, d->dirty_cpumask);
+     sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow, sh_make_shadow);
++    if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
++    {
++        ASSERT(d->is_dying || d->is_shutting_down);
++        return;
++    }
+     if ( !shadow_mode_external(d) && !is_pv_32bit_domain(d) )
+     {
+         mfn_t smfn = pagetable_get_mfn(v->arch.paging.shadow.shadow_table[0]);
+@@ -3370,6 +3375,11 @@ sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+     if ( sh_remove_write_access(d, gmfn, 2, 0) != 0 )
+         guest_flush_tlb_mask(d, d->dirty_cpumask);
+     sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow, sh_make_shadow);
++    if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
++    {
++        ASSERT(d->is_dying || d->is_shutting_down);
++        return;
++    }
+ #else
+ #error This should never happen
+ #endif
+-- 
+2.37.3
+
diff --git a/0006-xen-build-Fix-dependency-for-the-MAP-rule.patch b/0006-xen-build-Fix-dependency-for-the-MAP-rule.patch
deleted file mode 100644
index 7eb13cd..0000000
--- a/0006-xen-build-Fix-dependency-for-the-MAP-rule.patch
+++ /dev/null
@@ -1,29 +0,0 @@
-From 4bb8c34ba4241c2bf7845cd8b80c17530dbfb085 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 7 Jun 2022 14:00:09 +0200
-Subject: [PATCH 06/51] xen/build: Fix dependency for the MAP rule
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
-master commit: e1e72198213b80b7a82bdc90f96ed05ae4f53e20
-master date: 2022-04-20 19:10:59 +0100
----
- xen/Makefile | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/Makefile b/xen/Makefile
-index 90a29782dbf4..ce4eca3ee4d7 100644
---- a/xen/Makefile
-+++ b/xen/Makefile
-@@ -507,7 +507,7 @@ cscope:
- 	cscope -k -b -q
- 
- .PHONY: _MAP
--_MAP:
-+_MAP: $(TARGET)
- 	$(NM) -n $(TARGET)-syms | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' > System.map
- 
- %.o %.i %.s: %.c FORCE
--- 
-2.35.1
-
diff --git a/0007-tools-libs-evtchn-don-t-set-errno-to-negative-values.patch b/0007-tools-libs-evtchn-don-t-set-errno-to-negative-values.patch
deleted file mode 100644
index ed98922..0000000
--- a/0007-tools-libs-evtchn-don-t-set-errno-to-negative-values.patch
+++ /dev/null
@@ -1,74 +0,0 @@
-From 13a29f3756bc4cab96c59f46c3875b483553fb8f Mon Sep 17 00:00:00 2001
-From: Juergen Gross <jgross@suse.com>
-Date: Tue, 7 Jun 2022 14:00:31 +0200
-Subject: [PATCH 07/51] tools/libs/evtchn: don't set errno to negative values
-
-Setting errno to a negative value makes no sense.
-
-Fixes: 6b6500b3cbaa ("tools/libs/evtchn: Add support for restricting a handle")
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 60245b71c1cd001686fa7b7a26869cbcb80d074c
-master date: 2022-04-22 20:39:34 +0100
----
- tools/libs/evtchn/freebsd.c | 2 +-
- tools/libs/evtchn/minios.c  | 2 +-
- tools/libs/evtchn/netbsd.c  | 2 +-
- tools/libs/evtchn/solaris.c | 2 +-
- 4 files changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/tools/libs/evtchn/freebsd.c b/tools/libs/evtchn/freebsd.c
-index 7427ab240860..fa17a0f8dbb5 100644
---- a/tools/libs/evtchn/freebsd.c
-+++ b/tools/libs/evtchn/freebsd.c
-@@ -58,7 +58,7 @@ int osdep_evtchn_close(xenevtchn_handle *xce)
- 
- int osdep_evtchn_restrict(xenevtchn_handle *xce, domid_t domid)
- {
--    errno = -EOPNOTSUPP;
-+    errno = EOPNOTSUPP;
- 
-     return -1;
- }
-diff --git a/tools/libs/evtchn/minios.c b/tools/libs/evtchn/minios.c
-index e5dfdc5ef52e..c0bd5429eea2 100644
---- a/tools/libs/evtchn/minios.c
-+++ b/tools/libs/evtchn/minios.c
-@@ -97,7 +97,7 @@ int osdep_evtchn_close(xenevtchn_handle *xce)
- 
- int osdep_evtchn_restrict(xenevtchn_handle *xce, domid_t domid)
- {
--    errno = -EOPNOTSUPP;
-+    errno = EOPNOTSUPP;
- 
-     return -1;
- }
-diff --git a/tools/libs/evtchn/netbsd.c b/tools/libs/evtchn/netbsd.c
-index 1cebc21ffce0..56409513bc23 100644
---- a/tools/libs/evtchn/netbsd.c
-+++ b/tools/libs/evtchn/netbsd.c
-@@ -53,7 +53,7 @@ int osdep_evtchn_close(xenevtchn_handle *xce)
- 
- int osdep_evtchn_restrict(xenevtchn_handle *xce, domid_t domid)
- {
--    errno = -EOPNOTSUPP;
-+    errno = EOPNOTSUPP;
- 
-     return -1;
- }
-diff --git a/tools/libs/evtchn/solaris.c b/tools/libs/evtchn/solaris.c
-index df9579df1778..beaa7721425f 100644
---- a/tools/libs/evtchn/solaris.c
-+++ b/tools/libs/evtchn/solaris.c
-@@ -53,7 +53,7 @@ int osdep_evtchn_close(xenevtchn_handle *xce)
- 
- int osdep_evtchn_restrict(xenevtchn_handle *xce, domid_t domid)
- {
--    errno = -EOPNOTSUPP;
-+    errno = EOPNOTSUPP;
-     return -1;
- }
- 
--- 
-2.35.1
-
diff --git a/0007-x86-shadow-tolerate-failure-in-shadow_prealloc.patch b/0007-x86-shadow-tolerate-failure-in-shadow_prealloc.patch
new file mode 100644
index 0000000..5e2f8ab
--- /dev/null
+++ b/0007-x86-shadow-tolerate-failure-in-shadow_prealloc.patch
@@ -0,0 +1,279 @@
+From 28d3f677ec97c98154311f64871ac48762cf980a Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 11 Oct 2022 14:53:27 +0200
+Subject: [PATCH 07/26] x86/shadow: tolerate failure in shadow_prealloc()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Prevent _shadow_prealloc() from calling BUG() when unable to fulfill
+the pre-allocation and instead return true/false.  Modify
+shadow_prealloc() to crash the domain on allocation failure (if the
+domain is not already dying), as shadow cannot operate normally after
+that.  Modify callers to also gracefully handle {_,}shadow_prealloc()
+failing to fulfill the request.
+
+Note this in turn requires adjusting the callers of
+sh_make_monitor_table() also to handle it returning INVALID_MFN.
+sh_update_paging_modes() is also modified to add additional error
+paths in case of allocation failure, some of those will return with
+null monitor page tables (and the domain likely crashed).  This is no
+different that current error paths, but the newly introduced ones are
+more likely to trigger.
+
+The now added failure points in sh_update_paging_modes() also require
+that on some error return paths the previous structures are cleared,
+and thus monitor table is null.
+
+While there adjust the 'type' parameter type of shadow_prealloc() to
+unsigned int rather than u32.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+master commit: b7f93c6afb12b6061e2d19de2f39ea09b569ac68
+master date: 2022-10-11 14:22:53 +0200
+---
+ xen/arch/x86/mm/shadow/common.c  | 69 ++++++++++++++++++++++++--------
+ xen/arch/x86/mm/shadow/hvm.c     |  4 +-
+ xen/arch/x86/mm/shadow/multi.c   | 11 +++--
+ xen/arch/x86/mm/shadow/private.h |  3 +-
+ 4 files changed, 66 insertions(+), 21 deletions(-)
+
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 00e520cbd05b..2067c7d16bb4 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -36,6 +36,7 @@
+ #include <asm/flushtlb.h>
+ #include <asm/shadow.h>
+ #include <xen/numa.h>
++#include <public/sched.h>
+ #include "private.h"
+ 
+ DEFINE_PER_CPU(uint32_t,trace_shadow_path_flags);
+@@ -928,14 +929,15 @@ static inline void trace_shadow_prealloc_unpin(struct domain *d, mfn_t smfn)
+ 
+ /* Make sure there are at least count order-sized pages
+  * available in the shadow page pool. */
+-static void _shadow_prealloc(struct domain *d, unsigned int pages)
++static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages)
+ {
+     struct vcpu *v;
+     struct page_info *sp, *t;
+     mfn_t smfn;
+     int i;
+ 
+-    if ( d->arch.paging.shadow.free_pages >= pages ) return;
++    if ( d->arch.paging.shadow.free_pages >= pages )
++        return true;
+ 
+     /* Shouldn't have enabled shadows if we've no vcpus. */
+     ASSERT(d->vcpu && d->vcpu[0]);
+@@ -951,7 +953,8 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages)
+         sh_unpin(d, smfn);
+ 
+         /* See if that freed up enough space */
+-        if ( d->arch.paging.shadow.free_pages >= pages ) return;
++        if ( d->arch.paging.shadow.free_pages >= pages )
++            return true;
+     }
+ 
+     /* Stage two: all shadow pages are in use in hierarchies that are
+@@ -974,7 +977,7 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages)
+                 if ( d->arch.paging.shadow.free_pages >= pages )
+                 {
+                     guest_flush_tlb_mask(d, d->dirty_cpumask);
+-                    return;
++                    return true;
+                 }
+             }
+         }
+@@ -987,7 +990,12 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages)
+            d->arch.paging.shadow.total_pages,
+            d->arch.paging.shadow.free_pages,
+            d->arch.paging.shadow.p2m_pages);
+-    BUG();
++
++    ASSERT(d->is_dying);
++
++    guest_flush_tlb_mask(d, d->dirty_cpumask);
++
++    return false;
+ }
+ 
+ /* Make sure there are at least count pages of the order according to
+@@ -995,9 +1003,19 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages)
+  * This must be called before any calls to shadow_alloc().  Since this
+  * will free existing shadows to make room, it must be called early enough
+  * to avoid freeing shadows that the caller is currently working on. */
+-void shadow_prealloc(struct domain *d, u32 type, unsigned int count)
++bool shadow_prealloc(struct domain *d, unsigned int type, unsigned int count)
+ {
+-    return _shadow_prealloc(d, shadow_size(type) * count);
++    bool ret = _shadow_prealloc(d, shadow_size(type) * count);
++
++    if ( !ret && !d->is_dying &&
++         (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
++        /*
++         * Failing to allocate memory required for shadow usage can only result in
++         * a domain crash, do it here rather that relying on every caller to do it.
++         */
++        domain_crash(d);
++
++    return ret;
+ }
+ 
+ /* Deliberately free all the memory we can: this will tear down all of
+@@ -1218,7 +1236,7 @@ void shadow_free(struct domain *d, mfn_t smfn)
+ static struct page_info *
+ shadow_alloc_p2m_page(struct domain *d)
+ {
+-    struct page_info *pg;
++    struct page_info *pg = NULL;
+ 
+     /* This is called both from the p2m code (which never holds the
+      * paging lock) and the log-dirty code (which always does). */
+@@ -1236,16 +1254,18 @@ shadow_alloc_p2m_page(struct domain *d)
+                     d->arch.paging.shadow.p2m_pages,
+                     shadow_min_acceptable_pages(d));
+         }
+-        paging_unlock(d);
+-        return NULL;
++        goto out;
+     }
+ 
+-    shadow_prealloc(d, SH_type_p2m_table, 1);
++    if ( !shadow_prealloc(d, SH_type_p2m_table, 1) )
++        goto out;
++
+     pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0));
+     d->arch.paging.shadow.p2m_pages++;
+     d->arch.paging.shadow.total_pages--;
+     ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask));
+ 
++ out:
+     paging_unlock(d);
+ 
+     return pg;
+@@ -1336,7 +1356,9 @@ int shadow_set_allocation(struct domain *d, unsigned int pages, bool *preempted)
+         else if ( d->arch.paging.shadow.total_pages > pages )
+         {
+             /* Need to return memory to domheap */
+-            _shadow_prealloc(d, 1);
++            if ( !_shadow_prealloc(d, 1) )
++                return -ENOMEM;
++
+             sp = page_list_remove_head(&d->arch.paging.shadow.freelist);
+             ASSERT(sp);
+             /*
+@@ -2334,12 +2356,13 @@ static void sh_update_paging_modes(struct vcpu *v)
+     if ( mfn_eq(v->arch.paging.shadow.oos_snapshot[0], INVALID_MFN) )
+     {
+         int i;
++
++        if ( !shadow_prealloc(d, SH_type_oos_snapshot, SHADOW_OOS_PAGES) )
++            return;
++
+         for(i = 0; i < SHADOW_OOS_PAGES; i++)
+-        {
+-            shadow_prealloc(d, SH_type_oos_snapshot, 1);
+             v->arch.paging.shadow.oos_snapshot[i] =
+                 shadow_alloc(d, SH_type_oos_snapshot, 0);
+-        }
+     }
+ #endif /* OOS */
+ 
+@@ -2403,6 +2426,9 @@ static void sh_update_paging_modes(struct vcpu *v)
+             mfn_t mmfn = sh_make_monitor_table(
+                              v, v->arch.paging.mode->shadow.shadow_levels);
+ 
++            if ( mfn_eq(mmfn, INVALID_MFN) )
++                return;
++
+             v->arch.hvm.monitor_table = pagetable_from_mfn(mmfn);
+             make_cr3(v, mmfn);
+             hvm_update_host_cr3(v);
+@@ -2441,6 +2467,12 @@ static void sh_update_paging_modes(struct vcpu *v)
+                 v->arch.hvm.monitor_table = pagetable_null();
+                 new_mfn = sh_make_monitor_table(
+                               v, v->arch.paging.mode->shadow.shadow_levels);
++                if ( mfn_eq(new_mfn, INVALID_MFN) )
++                {
++                    sh_destroy_monitor_table(v, old_mfn,
++                                             old_mode->shadow.shadow_levels);
++                    return;
++                }
+                 v->arch.hvm.monitor_table = pagetable_from_mfn(new_mfn);
+                 SHADOW_PRINTK("new monitor table %"PRI_mfn "\n",
+                                mfn_x(new_mfn));
+@@ -2526,7 +2558,12 @@ void sh_set_toplevel_shadow(struct vcpu *v,
+     if ( !mfn_valid(smfn) )
+     {
+         /* Make sure there's enough free shadow memory. */
+-        shadow_prealloc(d, root_type, 1);
++        if ( !shadow_prealloc(d, root_type, 1) )
++        {
++            new_entry = pagetable_null();
++            goto install_new_entry;
++        }
++
+         /* Shadow the page. */
+         smfn = make_shadow(v, gmfn, root_type);
+     }
+diff --git a/xen/arch/x86/mm/shadow/hvm.c b/xen/arch/x86/mm/shadow/hvm.c
+index d5f42102a0bd..a0878d9ad71a 100644
+--- a/xen/arch/x86/mm/shadow/hvm.c
++++ b/xen/arch/x86/mm/shadow/hvm.c
+@@ -700,7 +700,9 @@ mfn_t sh_make_monitor_table(const struct vcpu *v, unsigned int shadow_levels)
+     ASSERT(!pagetable_get_pfn(v->arch.hvm.monitor_table));
+ 
+     /* Guarantee we can get the memory we need */
+-    shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS);
++    if ( !shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS) )
++        return INVALID_MFN;
++
+     m4mfn = shadow_alloc(d, SH_type_monitor_table, 0);
+     mfn_to_page(m4mfn)->shadow_flags = 4;
+ 
+diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
+index 2ff78fe3362c..c07af0bd99da 100644
+--- a/xen/arch/x86/mm/shadow/multi.c
++++ b/xen/arch/x86/mm/shadow/multi.c
+@@ -2440,9 +2440,14 @@ static int sh_page_fault(struct vcpu *v,
+      * Preallocate shadow pages *before* removing writable accesses
+      * otherwhise an OOS L1 might be demoted and promoted again with
+      * writable mappings. */
+-    shadow_prealloc(d,
+-                    SH_type_l1_shadow,
+-                    GUEST_PAGING_LEVELS < 4 ? 1 : GUEST_PAGING_LEVELS - 1);
++    if ( !shadow_prealloc(d, SH_type_l1_shadow,
++                          GUEST_PAGING_LEVELS < 4
++                          ? 1 : GUEST_PAGING_LEVELS - 1) )
++    {
++        paging_unlock(d);
++        put_gfn(d, gfn_x(gfn));
++        return 0;
++    }
+ 
+     rc = gw_remove_write_accesses(v, va, &gw);
+ 
+diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h
+index 35efb1b984fb..738214f75e8d 100644
+--- a/xen/arch/x86/mm/shadow/private.h
++++ b/xen/arch/x86/mm/shadow/private.h
+@@ -383,7 +383,8 @@ void shadow_promote(struct domain *d, mfn_t gmfn, u32 type);
+ void shadow_demote(struct domain *d, mfn_t gmfn, u32 type);
+ 
+ /* Shadow page allocation functions */
+-void  shadow_prealloc(struct domain *d, u32 shadow_type, unsigned int count);
++bool __must_check shadow_prealloc(struct domain *d, unsigned int shadow_type,
++                                  unsigned int count);
+ mfn_t shadow_alloc(struct domain *d,
+                     u32 shadow_type,
+                     unsigned long backpointer);
+-- 
+2.37.3
+
diff --git a/0008-tools-libs-ctrl-don-t-set-errno-to-a-negative-value.patch b/0008-tools-libs-ctrl-don-t-set-errno-to-a-negative-value.patch
deleted file mode 100644
index 166f0ff..0000000
--- a/0008-tools-libs-ctrl-don-t-set-errno-to-a-negative-value.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From ba62afdbc31a8cfe897191efd25ed4449d9acd94 Mon Sep 17 00:00:00 2001
-From: Juergen Gross <jgross@suse.com>
-Date: Tue, 7 Jun 2022 14:01:03 +0200
-Subject: [PATCH 08/51] tools/libs/ctrl: don't set errno to a negative value
-
-The claimed reason for setting errno to -1 is wrong. On x86
-xc_domain_pod_target() will set errno to a sane value in the error
-case.
-
-Fixes: ff1745d5882b ("tools: libxl: do not set the PoD target on ARM")
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: a0fb7e0e73483ed042d5ca34861a891a51ad337b
-master date: 2022-04-22 20:39:34 +0100
----
- tools/libs/ctrl/xc_domain.c | 4 +---
- 1 file changed, 1 insertion(+), 3 deletions(-)
-
-diff --git a/tools/libs/ctrl/xc_domain.c b/tools/libs/ctrl/xc_domain.c
-index b155d6afd2ef..9d675c8f21e1 100644
---- a/tools/libs/ctrl/xc_domain.c
-+++ b/tools/libs/ctrl/xc_domain.c
-@@ -1297,9 +1297,7 @@ int xc_domain_get_pod_target(xc_interface *xch,
-                              uint64_t *pod_cache_pages,
-                              uint64_t *pod_entries)
- {
--    /* On x86 (above) xc_domain_pod_target will incorrectly return -1
--     * with errno==-1 on error. Do the same for least surprise. */
--    errno = -1;
-+    errno = EOPNOTSUPP;
-     return -1;
- }
- #endif
--- 
-2.35.1
-
diff --git a/0008-x86-p2m-refuse-new-allocations-for-dying-domains.patch b/0008-x86-p2m-refuse-new-allocations-for-dying-domains.patch
new file mode 100644
index 0000000..70b5cc9
--- /dev/null
+++ b/0008-x86-p2m-refuse-new-allocations-for-dying-domains.patch
@@ -0,0 +1,100 @@
+From 745e0b300dc3f5000e6d48c273b405d4bcc29ba7 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 11 Oct 2022 14:53:41 +0200
+Subject: [PATCH 08/26] x86/p2m: refuse new allocations for dying domains
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This will in particular prevent any attempts to add entries to the p2m,
+once - in a subsequent change - non-root entries have been removed.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+master commit: ff600a8cf8e36f8ecbffecf96a035952e022ab87
+master date: 2022-10-11 14:23:22 +0200
+---
+ xen/arch/x86/mm/hap/hap.c       |  5 ++++-
+ xen/arch/x86/mm/shadow/common.c | 18 ++++++++++++++----
+ 2 files changed, 18 insertions(+), 5 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index d75dc2b9ed3d..787991233e53 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -245,6 +245,9 @@ static struct page_info *hap_alloc(struct domain *d)
+ 
+     ASSERT(paging_locked_by_me(d));
+ 
++    if ( unlikely(d->is_dying) )
++        return NULL;
++
+     pg = page_list_remove_head(&d->arch.paging.hap.freelist);
+     if ( unlikely(!pg) )
+         return NULL;
+@@ -281,7 +284,7 @@ static struct page_info *hap_alloc_p2m_page(struct domain *d)
+         d->arch.paging.hap.p2m_pages++;
+         ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask));
+     }
+-    else if ( !d->arch.paging.p2m_alloc_failed )
++    else if ( !d->arch.paging.p2m_alloc_failed && !d->is_dying )
+     {
+         d->arch.paging.p2m_alloc_failed = 1;
+         dprintk(XENLOG_ERR, "d%i failed to allocate from HAP pool\n",
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 2067c7d16bb4..9807f6ec6c00 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -939,6 +939,10 @@ static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages)
+     if ( d->arch.paging.shadow.free_pages >= pages )
+         return true;
+ 
++    if ( unlikely(d->is_dying) )
++        /* No reclaim when the domain is dying, teardown will take care of it. */
++        return false;
++
+     /* Shouldn't have enabled shadows if we've no vcpus. */
+     ASSERT(d->vcpu && d->vcpu[0]);
+ 
+@@ -991,7 +995,7 @@ static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages)
+            d->arch.paging.shadow.free_pages,
+            d->arch.paging.shadow.p2m_pages);
+ 
+-    ASSERT(d->is_dying);
++    ASSERT_UNREACHABLE();
+ 
+     guest_flush_tlb_mask(d, d->dirty_cpumask);
+ 
+@@ -1005,10 +1009,13 @@ static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages)
+  * to avoid freeing shadows that the caller is currently working on. */
+ bool shadow_prealloc(struct domain *d, unsigned int type, unsigned int count)
+ {
+-    bool ret = _shadow_prealloc(d, shadow_size(type) * count);
++    bool ret;
+ 
+-    if ( !ret && !d->is_dying &&
+-         (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
++    if ( unlikely(d->is_dying) )
++       return false;
++
++    ret = _shadow_prealloc(d, shadow_size(type) * count);
++    if ( !ret && (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
+         /*
+          * Failing to allocate memory required for shadow usage can only result in
+          * a domain crash, do it here rather that relying on every caller to do it.
+@@ -1238,6 +1245,9 @@ shadow_alloc_p2m_page(struct domain *d)
+ {
+     struct page_info *pg = NULL;
+ 
++    if ( unlikely(d->is_dying) )
++       return NULL;
++
+     /* This is called both from the p2m code (which never holds the
+      * paging lock) and the log-dirty code (which always does). */
+     paging_lock_recursive(d);
+-- 
+2.37.3
+
diff --git a/0009-tools-libs-guest-don-t-set-errno-to-a-negative-value.patch b/0009-tools-libs-guest-don-t-set-errno-to-a-negative-value.patch
deleted file mode 100644
index 5d035f6..0000000
--- a/0009-tools-libs-guest-don-t-set-errno-to-a-negative-value.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-From a2cf30eec08db5df974a9e8bb7366fee8fc7fcd9 Mon Sep 17 00:00:00 2001
-From: Juergen Gross <jgross@suse.com>
-Date: Tue, 7 Jun 2022 14:01:27 +0200
-Subject: [PATCH 09/51] tools/libs/guest: don't set errno to a negative value
-
-Setting errno to a negative error value makes no sense.
-
-Fixes: cb99a64029c9 ("libxc: arm: allow passing a device tree blob to the guest")
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 438e96ab479495a932391a22e219ee62fa8c4f47
-master date: 2022-04-22 20:39:34 +0100
----
- tools/libs/guest/xg_dom_core.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/libs/guest/xg_dom_core.c b/tools/libs/guest/xg_dom_core.c
-index 2e4c1330ea6b..65975a75da37 100644
---- a/tools/libs/guest/xg_dom_core.c
-+++ b/tools/libs/guest/xg_dom_core.c
-@@ -856,7 +856,7 @@ int xc_dom_devicetree_file(struct xc_dom_image *dom, const char *filename)
-         return -1;
-     return 0;
- #else
--    errno = -EINVAL;
-+    errno = EINVAL;
-     return -1;
- #endif
- }
--- 
-2.35.1
-
diff --git a/0009-x86-p2m-truly-free-paging-pool-memory-for-dying-doma.patch b/0009-x86-p2m-truly-free-paging-pool-memory-for-dying-doma.patch
new file mode 100644
index 0000000..07e63ac
--- /dev/null
+++ b/0009-x86-p2m-truly-free-paging-pool-memory-for-dying-doma.patch
@@ -0,0 +1,115 @@
+From 943635d8f8486209e4e48966507ad57963e96284 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 11 Oct 2022 14:54:00 +0200
+Subject: [PATCH 09/26] x86/p2m: truly free paging pool memory for dying
+ domains
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Modify {hap,shadow}_free to free the page immediately if the domain is
+dying, so that pages don't accumulate in the pool when
+{shadow,hap}_final_teardown() get called. This is to limit the amount of
+work which needs to be done there (in a non-preemptable manner).
+
+Note the call to shadow_free() in shadow_free_p2m_page() is moved after
+increasing total_pages, so that the decrease done in shadow_free() in
+case the domain is dying doesn't underflow the counter, even if just for
+a short interval.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+master commit: f50a2c0e1d057c00d6061f40ae24d068226052ad
+master date: 2022-10-11 14:23:51 +0200
+---
+ xen/arch/x86/mm/hap/hap.c       | 12 ++++++++++++
+ xen/arch/x86/mm/shadow/common.c | 28 +++++++++++++++++++++++++---
+ 2 files changed, 37 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index 787991233e53..aef2297450e1 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -265,6 +265,18 @@ static void hap_free(struct domain *d, mfn_t mfn)
+ 
+     ASSERT(paging_locked_by_me(d));
+ 
++    /*
++     * For dying domains, actually free the memory here. This way less work is
++     * left to hap_final_teardown(), which cannot easily have preemption checks
++     * added.
++     */
++    if ( unlikely(d->is_dying) )
++    {
++        free_domheap_page(pg);
++        d->arch.paging.hap.total_pages--;
++        return;
++    }
++
+     d->arch.paging.hap.free_pages++;
+     page_list_add_tail(pg, &d->arch.paging.hap.freelist);
+ }
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 9807f6ec6c00..9eb33eafc7f7 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -1187,6 +1187,7 @@ mfn_t shadow_alloc(struct domain *d,
+ void shadow_free(struct domain *d, mfn_t smfn)
+ {
+     struct page_info *next = NULL, *sp = mfn_to_page(smfn);
++    bool dying = ACCESS_ONCE(d->is_dying);
+     struct page_list_head *pin_list;
+     unsigned int pages;
+     u32 shadow_type;
+@@ -1229,11 +1230,32 @@ void shadow_free(struct domain *d, mfn_t smfn)
+          * just before the allocator hands the page out again. */
+         page_set_tlbflush_timestamp(sp);
+         perfc_decr(shadow_alloc_count);
+-        page_list_add_tail(sp, &d->arch.paging.shadow.freelist);
++
++        /*
++         * For dying domains, actually free the memory here. This way less
++         * work is left to shadow_final_teardown(), which cannot easily have
++         * preemption checks added.
++         */
++        if ( unlikely(dying) )
++        {
++            /*
++             * The backpointer field (sh.back) used by shadow code aliases the
++             * domain owner field, unconditionally clear it here to avoid
++             * free_domheap_page() attempting to parse it.
++             */
++            page_set_owner(sp, NULL);
++            free_domheap_page(sp);
++        }
++        else
++            page_list_add_tail(sp, &d->arch.paging.shadow.freelist);
++
+         sp = next;
+     }
+ 
+-    d->arch.paging.shadow.free_pages += pages;
++    if ( unlikely(dying) )
++        d->arch.paging.shadow.total_pages -= pages;
++    else
++        d->arch.paging.shadow.free_pages += pages;
+ }
+ 
+ /* Divert a page from the pool to be used by the p2m mapping.
+@@ -1303,9 +1325,9 @@ shadow_free_p2m_page(struct domain *d, struct page_info *pg)
+      * paging lock) and the log-dirty code (which always does). */
+     paging_lock_recursive(d);
+ 
+-    shadow_free(d, page_to_mfn(pg));
+     d->arch.paging.shadow.p2m_pages--;
+     d->arch.paging.shadow.total_pages++;
++    shadow_free(d, page_to_mfn(pg));
+ 
+     paging_unlock(d);
+ }
+-- 
+2.37.3
+
diff --git a/0010-tools-libs-light-don-t-set-errno-to-a-negative-value.patch b/0010-tools-libs-light-don-t-set-errno-to-a-negative-value.patch
deleted file mode 100644
index ac900ae..0000000
--- a/0010-tools-libs-light-don-t-set-errno-to-a-negative-value.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-From 15391de8e2bb6153eadd483154c53044ab53d98d Mon Sep 17 00:00:00 2001
-From: Juergen Gross <jgross@suse.com>
-Date: Tue, 7 Jun 2022 14:01:44 +0200
-Subject: [PATCH 10/51] tools/libs/light: don't set errno to a negative value
-
-Setting errno to a negative value makes no sense.
-
-Fixes: e78e8b9bb649 ("libxl: Add interface for querying hypervisor about PCI topology")
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 2419a159fb943c24a6f2439604b9fdb1478fcd08
-master date: 2022-04-22 20:39:34 +0100
----
- tools/libs/light/libxl_linux.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/libs/light/libxl_linux.c b/tools/libs/light/libxl_linux.c
-index 8d62dfd255cb..27f2bce71837 100644
---- a/tools/libs/light/libxl_linux.c
-+++ b/tools/libs/light/libxl_linux.c
-@@ -288,7 +288,7 @@ int libxl__pci_topology_init(libxl__gc *gc,
-         if (i == num_devs) {
-             LOG(ERROR, "Too many devices");
-             err = ERROR_FAIL;
--            errno = -ENOSPC;
-+            errno = ENOSPC;
-             goto out;
-         }
- 
--- 
-2.35.1
-
diff --git a/0010-x86-p2m-free-the-paging-memory-pool-preemptively.patch b/0010-x86-p2m-free-the-paging-memory-pool-preemptively.patch
new file mode 100644
index 0000000..59c6940
--- /dev/null
+++ b/0010-x86-p2m-free-the-paging-memory-pool-preemptively.patch
@@ -0,0 +1,181 @@
+From f5959ed715e19cf2844656477dbf74c2f576c9d4 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 11 Oct 2022 14:54:21 +0200
+Subject: [PATCH 10/26] x86/p2m: free the paging memory pool preemptively
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The paging memory pool is currently freed in two different places:
+from {shadow,hap}_teardown() via domain_relinquish_resources() and
+from {shadow,hap}_final_teardown() via complete_domain_destroy().
+While the former does handle preemption, the later doesn't.
+
+Attempt to move as much p2m related freeing as possible to happen
+before the call to {shadow,hap}_teardown(), so that most memory can be
+freed in a preemptive way.  In order to avoid causing issues to
+existing callers leave the root p2m page tables set and free them in
+{hap,shadow}_final_teardown().  Also modify {hap,shadow}_free to free
+the page immediately if the domain is dying, so that pages don't
+accumulate in the pool when {shadow,hap}_final_teardown() get called.
+
+Move altp2m_vcpu_disable_ve() to be done in hap_teardown(), as that's
+the place where altp2m_active gets disabled now.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Reported-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+master commit: e7aa55c0aab36d994bf627c92bd5386ae167e16e
+master date: 2022-10-11 14:24:21 +0200
+---
+ xen/arch/x86/domain.c           |  7 ------
+ xen/arch/x86/mm/hap/hap.c       | 42 ++++++++++++++++++++-------------
+ xen/arch/x86/mm/shadow/common.c | 12 ++++++++++
+ 3 files changed, 38 insertions(+), 23 deletions(-)
+
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index 0d39981550ca..a4356893bdbc 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -38,7 +38,6 @@
+ #include <xen/livepatch.h>
+ #include <public/sysctl.h>
+ #include <public/hvm/hvm_vcpu.h>
+-#include <asm/altp2m.h>
+ #include <asm/regs.h>
+ #include <asm/mc146818rtc.h>
+ #include <asm/system.h>
+@@ -2381,12 +2380,6 @@ int domain_relinquish_resources(struct domain *d)
+             vpmu_destroy(v);
+         }
+ 
+-        if ( altp2m_active(d) )
+-        {
+-            for_each_vcpu ( d, v )
+-                altp2m_vcpu_disable_ve(v);
+-        }
+-
+         if ( is_pv_domain(d) )
+         {
+             for_each_vcpu ( d, v )
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index aef2297450e1..a44fcfd95e1e 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -28,6 +28,7 @@
+ #include <xen/domain_page.h>
+ #include <xen/guest_access.h>
+ #include <xen/keyhandler.h>
++#include <asm/altp2m.h>
+ #include <asm/event.h>
+ #include <asm/page.h>
+ #include <asm/current.h>
+@@ -546,24 +547,8 @@ void hap_final_teardown(struct domain *d)
+     unsigned int i;
+ 
+     if ( hvm_altp2m_supported() )
+-    {
+-        d->arch.altp2m_active = 0;
+-
+-        if ( d->arch.altp2m_eptp )
+-        {
+-            free_xenheap_page(d->arch.altp2m_eptp);
+-            d->arch.altp2m_eptp = NULL;
+-        }
+-
+-        if ( d->arch.altp2m_visible_eptp )
+-        {
+-            free_xenheap_page(d->arch.altp2m_visible_eptp);
+-            d->arch.altp2m_visible_eptp = NULL;
+-        }
+-
+         for ( i = 0; i < MAX_ALTP2M; i++ )
+             p2m_teardown(d->arch.altp2m_p2m[i], true);
+-    }
+ 
+     /* Destroy nestedp2m's first */
+     for (i = 0; i < MAX_NESTEDP2M; i++) {
+@@ -578,6 +563,8 @@ void hap_final_teardown(struct domain *d)
+     paging_lock(d);
+     hap_set_allocation(d, 0, NULL);
+     ASSERT(d->arch.paging.hap.p2m_pages == 0);
++    ASSERT(d->arch.paging.hap.free_pages == 0);
++    ASSERT(d->arch.paging.hap.total_pages == 0);
+     paging_unlock(d);
+ }
+ 
+@@ -603,6 +590,7 @@ void hap_vcpu_teardown(struct vcpu *v)
+ void hap_teardown(struct domain *d, bool *preempted)
+ {
+     struct vcpu *v;
++    unsigned int i;
+ 
+     ASSERT(d->is_dying);
+     ASSERT(d != current->domain);
+@@ -611,6 +599,28 @@ void hap_teardown(struct domain *d, bool *preempted)
+     for_each_vcpu ( d, v )
+         hap_vcpu_teardown(v);
+ 
++    /* Leave the root pt in case we get further attempts to modify the p2m. */
++    if ( hvm_altp2m_supported() )
++    {
++        if ( altp2m_active(d) )
++            for_each_vcpu ( d, v )
++                altp2m_vcpu_disable_ve(v);
++
++        d->arch.altp2m_active = 0;
++
++        FREE_XENHEAP_PAGE(d->arch.altp2m_eptp);
++        FREE_XENHEAP_PAGE(d->arch.altp2m_visible_eptp);
++
++        for ( i = 0; i < MAX_ALTP2M; i++ )
++            p2m_teardown(d->arch.altp2m_p2m[i], false);
++    }
++
++    /* Destroy nestedp2m's after altp2m. */
++    for ( i = 0; i < MAX_NESTEDP2M; i++ )
++        p2m_teardown(d->arch.nested_p2m[i], false);
++
++    p2m_teardown(p2m_get_hostp2m(d), false);
++
+     paging_lock(d); /* Keep various asserts happy */
+ 
+     if ( d->arch.paging.hap.total_pages != 0 )
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 9eb33eafc7f7..ac9a1ae07808 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2824,8 +2824,17 @@ void shadow_teardown(struct domain *d, bool *preempted)
+     for_each_vcpu ( d, v )
+         shadow_vcpu_teardown(v);
+ 
++    p2m_teardown(p2m_get_hostp2m(d), false);
++
+     paging_lock(d);
+ 
++    /*
++     * Reclaim all shadow memory so that shadow_set_allocation() doesn't find
++     * in-use pages, as _shadow_prealloc() will no longer try to reclaim pages
++     * because the domain is dying.
++     */
++    shadow_blow_tables(d);
++
+ #if (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC))
+     /* Free the virtual-TLB array attached to each vcpu */
+     for_each_vcpu(d, v)
+@@ -2946,6 +2955,9 @@ void shadow_final_teardown(struct domain *d)
+                    d->arch.paging.shadow.total_pages,
+                    d->arch.paging.shadow.free_pages,
+                    d->arch.paging.shadow.p2m_pages);
++    ASSERT(!d->arch.paging.shadow.total_pages);
++    ASSERT(!d->arch.paging.shadow.free_pages);
++    ASSERT(!d->arch.paging.shadow.p2m_pages);
+     paging_unlock(d);
+ }
+ 
+-- 
+2.37.3
+
diff --git a/0011-xen-iommu-cleanup-iommu-related-domctl-handling.patch b/0011-xen-iommu-cleanup-iommu-related-domctl-handling.patch
deleted file mode 100644
index 3c60de4..0000000
--- a/0011-xen-iommu-cleanup-iommu-related-domctl-handling.patch
+++ /dev/null
@@ -1,112 +0,0 @@
-From a6c32abd144ec6443c6a433b5a2ac00e2615aa86 Mon Sep 17 00:00:00 2001
-From: Juergen Gross <jgross@suse.com>
-Date: Tue, 7 Jun 2022 14:02:08 +0200
-Subject: [PATCH 11/51] xen/iommu: cleanup iommu related domctl handling
-
-Today iommu_do_domctl() is being called from arch_do_domctl() in the
-"default:" case of a switch statement. This has led already to crashes
-due to unvalidated parameters.
-
-Fix that by moving the call of iommu_do_domctl() to the main switch
-statement of do_domctl().
-
-Signed-off-by: Juergen Gross <jgross@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> # Arm
-master commit: 9cd7e31b3f584e97a138a770cfb031a91a867936
-master date: 2022-04-26 10:23:58 +0200
----
- xen/arch/arm/domctl.c   | 11 +----------
- xen/arch/x86/domctl.c   |  2 +-
- xen/common/domctl.c     |  7 +++++++
- xen/include/xen/iommu.h | 12 +++++++++---
- 4 files changed, 18 insertions(+), 14 deletions(-)
-
-diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
-index 6245af6d0bab..1baf25c3d98b 100644
---- a/xen/arch/arm/domctl.c
-+++ b/xen/arch/arm/domctl.c
-@@ -176,16 +176,7 @@ long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
-         return rc;
-     }
-     default:
--    {
--        int rc;
--
--        rc = subarch_do_domctl(domctl, d, u_domctl);
--
--        if ( rc == -ENOSYS )
--            rc = iommu_do_domctl(domctl, d, u_domctl);
--
--        return rc;
--    }
-+        return subarch_do_domctl(domctl, d, u_domctl);
-     }
- }
- 
-diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
-index 7d102e0647ec..0fa51f2ebd10 100644
---- a/xen/arch/x86/domctl.c
-+++ b/xen/arch/x86/domctl.c
-@@ -1380,7 +1380,7 @@ long arch_do_domctl(
-         break;
- 
-     default:
--        ret = iommu_do_domctl(domctl, d, u_domctl);
-+        ret = -ENOSYS;
-         break;
-     }
- 
-diff --git a/xen/common/domctl.c b/xen/common/domctl.c
-index 419e4070f59d..65d2a4588b71 100644
---- a/xen/common/domctl.c
-+++ b/xen/common/domctl.c
-@@ -870,6 +870,13 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
-             copyback = 1;
-         break;
- 
-+    case XEN_DOMCTL_assign_device:
-+    case XEN_DOMCTL_test_assign_device:
-+    case XEN_DOMCTL_deassign_device:
-+    case XEN_DOMCTL_get_device_group:
-+        ret = iommu_do_domctl(op, d, u_domctl);
-+        break;
-+
-     default:
-         ret = arch_do_domctl(op, d, u_domctl);
-         break;
-diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
-index 92b2d23f0ba2..861579562e8a 100644
---- a/xen/include/xen/iommu.h
-+++ b/xen/include/xen/iommu.h
-@@ -342,8 +342,17 @@ struct domain_iommu {
- /* Does the IOMMU pagetable need to be kept synchronized with the P2M */
- #ifdef CONFIG_HAS_PASSTHROUGH
- #define need_iommu_pt_sync(d)     (dom_iommu(d)->need_sync)
-+
-+int iommu_do_domctl(struct xen_domctl *domctl, struct domain *d,
-+                    XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl);
- #else
- #define need_iommu_pt_sync(d)     ({ (void)(d); false; })
-+
-+static inline int iommu_do_domctl(struct xen_domctl *domctl, struct domain *d,
-+                                  XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
-+{
-+    return -ENOSYS;
-+}
- #endif
- 
- int __must_check iommu_suspend(void);
-@@ -357,9 +366,6 @@ int iommu_do_pci_domctl(struct xen_domctl *, struct domain *d,
-                         XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
- #endif
- 
--int iommu_do_domctl(struct xen_domctl *, struct domain *d,
--                    XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
--
- void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev);
- 
- /*
--- 
-2.35.1
-
diff --git a/0011-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch b/0011-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch
new file mode 100644
index 0000000..5520627
--- /dev/null
+++ b/0011-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch
@@ -0,0 +1,197 @@
+From a603386b422f5cb4c5e2639a7e20a1d99dba2175 Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Tue, 11 Oct 2022 14:54:44 +0200
+Subject: [PATCH 11/26] xen/x86: p2m: Add preemption in p2m_teardown()
+
+The list p2m->pages contain all the pages used by the P2M. On large
+instance this can be quite large and the time spent to call
+d->arch.paging.free_page() will take more than 1ms for a 80GB guest
+on a Xen running in nested environment on a c5.metal.
+
+By extrapolation, it would take > 100ms for a 8TB guest (what we
+current security support). So add some preemption in p2m_teardown()
+and propagate to the callers. Note there are 3 places where
+the preemption is not enabled:
+    - hap_final_teardown()/shadow_final_teardown(): We are
+      preventing update the P2M once the domain is dying (so
+      no more pages could be allocated) and most of the P2M pages
+      will be freed in preemptive manneer when relinquishing the
+      resources. So this is fine to disable preemption.
+    - shadow_enable(): This is fine because it will undo the allocation
+      that may have been made by p2m_alloc_table() (so only the root
+      page table).
+
+The preemption is arbitrarily checked every 1024 iterations.
+
+We now need to include <xen/event.h> in p2m-basic in order to
+import the definition for local_events_need_delivery() used by
+general_preempt_check(). Ideally, the inclusion should happen in
+xen/sched.h but it opened a can of worms.
+
+Note that with the current approach, Xen doesn't keep track on whether
+the alt/nested P2Ms have been cleared. So there are some redundant work.
+However, this is not expected to incurr too much overhead (the P2M lock
+shouldn't be contended during teardown). So this is optimization is
+left outside of the security event.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+master commit: 8a2111250b424edc49c65c4d41b276766d30635c
+master date: 2022-10-11 14:24:48 +0200
+---
+ xen/arch/x86/mm/hap/hap.c       | 22 ++++++++++++++++------
+ xen/arch/x86/mm/p2m.c           | 18 +++++++++++++++---
+ xen/arch/x86/mm/shadow/common.c | 12 +++++++++---
+ xen/include/asm-x86/p2m.h       |  2 +-
+ 4 files changed, 41 insertions(+), 13 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index a44fcfd95e1e..1f9a157a0c34 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -548,17 +548,17 @@ void hap_final_teardown(struct domain *d)
+ 
+     if ( hvm_altp2m_supported() )
+         for ( i = 0; i < MAX_ALTP2M; i++ )
+-            p2m_teardown(d->arch.altp2m_p2m[i], true);
++            p2m_teardown(d->arch.altp2m_p2m[i], true, NULL);
+ 
+     /* Destroy nestedp2m's first */
+     for (i = 0; i < MAX_NESTEDP2M; i++) {
+-        p2m_teardown(d->arch.nested_p2m[i], true);
++        p2m_teardown(d->arch.nested_p2m[i], true, NULL);
+     }
+ 
+     if ( d->arch.paging.hap.total_pages != 0 )
+         hap_teardown(d, NULL);
+ 
+-    p2m_teardown(p2m_get_hostp2m(d), true);
++    p2m_teardown(p2m_get_hostp2m(d), true, NULL);
+     /* Free any memory that the p2m teardown released */
+     paging_lock(d);
+     hap_set_allocation(d, 0, NULL);
+@@ -612,14 +612,24 @@ void hap_teardown(struct domain *d, bool *preempted)
+         FREE_XENHEAP_PAGE(d->arch.altp2m_visible_eptp);
+ 
+         for ( i = 0; i < MAX_ALTP2M; i++ )
+-            p2m_teardown(d->arch.altp2m_p2m[i], false);
++        {
++            p2m_teardown(d->arch.altp2m_p2m[i], false, preempted);
++            if ( preempted && *preempted )
++                return;
++        }
+     }
+ 
+     /* Destroy nestedp2m's after altp2m. */
+     for ( i = 0; i < MAX_NESTEDP2M; i++ )
+-        p2m_teardown(d->arch.nested_p2m[i], false);
++    {
++        p2m_teardown(d->arch.nested_p2m[i], false, preempted);
++        if ( preempted && *preempted )
++            return;
++    }
+ 
+-    p2m_teardown(p2m_get_hostp2m(d), false);
++    p2m_teardown(p2m_get_hostp2m(d), false, preempted);
++    if ( preempted && *preempted )
++        return;
+ 
+     paging_lock(d); /* Keep various asserts happy */
+ 
+diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
+index aba4f17cbe12..8781df9dda8d 100644
+--- a/xen/arch/x86/mm/p2m.c
++++ b/xen/arch/x86/mm/p2m.c
+@@ -749,12 +749,13 @@ int p2m_alloc_table(struct p2m_domain *p2m)
+  * hvm fixme: when adding support for pvh non-hardware domains, this path must
+  * cleanup any foreign p2m types (release refcnts on them).
+  */
+-void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
++void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted)
+ /* Return all the p2m pages to Xen.
+  * We know we don't have any extra mappings to these pages */
+ {
+     struct page_info *pg, *root_pg = NULL;
+     struct domain *d;
++    unsigned int i = 0;
+ 
+     if (p2m == NULL)
+         return;
+@@ -773,8 +774,19 @@ void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
+     }
+ 
+     while ( (pg = page_list_remove_head(&p2m->pages)) )
+-        if ( pg != root_pg )
+-            d->arch.paging.free_page(d, pg);
++    {
++        if ( pg == root_pg )
++            continue;
++
++        d->arch.paging.free_page(d, pg);
++
++        /* Arbitrarily check preemption every 1024 iterations */
++        if ( preempted && !(++i % 1024) && general_preempt_check() )
++        {
++            *preempted = true;
++            break;
++        }
++    }
+ 
+     if ( root_pg )
+         page_list_add(root_pg, &p2m->pages);
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index ac9a1ae07808..3b0d781991b5 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2770,8 +2770,12 @@ int shadow_enable(struct domain *d, u32 mode)
+  out_locked:
+     paging_unlock(d);
+  out_unlocked:
++    /*
++     * This is fine to ignore the preemption here because only the root
++     * will be allocated by p2m_alloc_table().
++     */
+     if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) )
+-        p2m_teardown(p2m, true);
++        p2m_teardown(p2m, true, NULL);
+     if ( rv != 0 && pg != NULL )
+     {
+         pg->count_info &= ~PGC_count_mask;
+@@ -2824,7 +2828,9 @@ void shadow_teardown(struct domain *d, bool *preempted)
+     for_each_vcpu ( d, v )
+         shadow_vcpu_teardown(v);
+ 
+-    p2m_teardown(p2m_get_hostp2m(d), false);
++    p2m_teardown(p2m_get_hostp2m(d), false, preempted);
++    if ( preempted && *preempted )
++        return;
+ 
+     paging_lock(d);
+ 
+@@ -2945,7 +2951,7 @@ void shadow_final_teardown(struct domain *d)
+         shadow_teardown(d, NULL);
+ 
+     /* It is now safe to pull down the p2m map. */
+-    p2m_teardown(p2m_get_hostp2m(d), true);
++    p2m_teardown(p2m_get_hostp2m(d), true, NULL);
+     /* Free any shadow memory that the p2m teardown released */
+     paging_lock(d);
+     shadow_set_allocation(d, 0, NULL);
+diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
+index c3c16748e7d5..2db9ab0122f2 100644
+--- a/xen/include/asm-x86/p2m.h
++++ b/xen/include/asm-x86/p2m.h
+@@ -574,7 +574,7 @@ int p2m_init(struct domain *d);
+ int p2m_alloc_table(struct p2m_domain *p2m);
+ 
+ /* Return all the p2m resources to Xen. */
+-void p2m_teardown(struct p2m_domain *p2m, bool remove_root);
++void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted);
+ void p2m_final_teardown(struct domain *d);
+ 
+ /* Add a page to a domain's p2m table */
+-- 
+2.37.3
+
diff --git a/0012-IOMMU-make-domctl-handler-tolerate-NULL-domain.patch b/0012-IOMMU-make-domctl-handler-tolerate-NULL-domain.patch
deleted file mode 100644
index 37b9005..0000000
--- a/0012-IOMMU-make-domctl-handler-tolerate-NULL-domain.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From 4cf9a7c7bdb9d544fbac81105bbc1059ba3dd932 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 7 Jun 2022 14:02:30 +0200
-Subject: [PATCH 12/51] IOMMU: make domctl handler tolerate NULL domain
-
-Besides the reporter's issue of hitting a NULL deref when !CONFIG_GDBSX,
-XEN_DOMCTL_test_assign_device can legitimately end up having NULL passed
-here, when the domctl was passed DOMID_INVALID.
-
-Fixes: 71e617a6b8f6 ("use is_iommu_enabled() where appropriate...")
-Reported-by: Cheyenne Wills <cheyenne.wills@gmail.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-master commit: fa4d84e6dd3c3bfd23a525b75a5483d4ce15adbb
-master date: 2022-04-26 10:25:54 +0200
----
- xen/drivers/passthrough/iommu.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
-index caaba62c8865..287f63fc736f 100644
---- a/xen/drivers/passthrough/iommu.c
-+++ b/xen/drivers/passthrough/iommu.c
-@@ -535,7 +535,7 @@ int iommu_do_domctl(
- {
-     int ret = -ENODEV;
- 
--    if ( !is_iommu_enabled(d) )
-+    if ( !(d ? is_iommu_enabled(d) : iommu_enabled) )
-         return -EOPNOTSUPP;
- 
- #ifdef CONFIG_HAS_PCI
--- 
-2.35.1
-
diff --git a/0012-libxl-docs-Use-arch-specific-default-paging-memory.patch b/0012-libxl-docs-Use-arch-specific-default-paging-memory.patch
new file mode 100644
index 0000000..9390500
--- /dev/null
+++ b/0012-libxl-docs-Use-arch-specific-default-paging-memory.patch
@@ -0,0 +1,149 @@
+From 755a9b52844de3e1e47aa1fc9991a4240ccfbf35 Mon Sep 17 00:00:00 2001
+From: Henry Wang <Henry.Wang@arm.com>
+Date: Tue, 11 Oct 2022 14:55:08 +0200
+Subject: [PATCH 12/26] libxl, docs: Use arch-specific default paging memory
+
+The default paging memory (descibed in `shadow_memory` entry in xl
+config) in libxl is used to determine the memory pool size for xl
+guests. Currently this size is only used for x86, and contains a part
+of RAM to shadow the resident processes. Since on Arm there is no
+shadow mode guests, so the part of RAM to shadow the resident processes
+is not necessary. Therefore, this commit splits the function
+`libxl_get_required_shadow_memory()` to arch specific helpers and
+renamed the helper to `libxl__arch_get_required_paging_memory()`.
+
+On x86, this helper calls the original value from
+`libxl_get_required_shadow_memory()` so no functional change intended.
+
+On Arm, this helper returns 1MB per vcpu plus 4KB per MiB of RAM
+for the P2M map and additional 512KB.
+
+Also update the xl.cfg documentation to add Arm documentation
+according to code changes and correct the comment style following Xen
+coding style.
+
+This is part of CVE-2022-33747 / XSA-409.
+
+Suggested-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: 156a239ea288972425f967ac807b3cb5b5e14874
+master date: 2022-10-11 14:28:37 +0200
+---
+ docs/man/xl.cfg.5.pod.in       |  5 +++++
+ tools/libs/light/libxl_arch.h  |  4 ++++
+ tools/libs/light/libxl_arm.c   | 14 ++++++++++++++
+ tools/libs/light/libxl_utils.c |  9 ++-------
+ tools/libs/light/libxl_x86.c   | 13 +++++++++++++
+ 5 files changed, 38 insertions(+), 7 deletions(-)
+
+diff --git a/docs/man/xl.cfg.5.pod.in b/docs/man/xl.cfg.5.pod.in
+index b98d1613987e..eda1e77ebd06 100644
+--- a/docs/man/xl.cfg.5.pod.in
++++ b/docs/man/xl.cfg.5.pod.in
+@@ -1768,6 +1768,11 @@ are not using hardware assisted paging (i.e. you are using shadow
+ mode) and your guest workload consists of a very large number of
+ similar processes then increasing this value may improve performance.
+ 
++On Arm, this field is used to determine the size of the guest P2M pages
++pool, and the default value is 1MB per vCPU plus 4KB per MB of RAM for
++the P2M map and additional 512KB for extended regions. Users should
++adjust this value if bigger P2M pool size is needed.
++
+ =back
+ 
+ =head3 Processor and Platform Features
+diff --git a/tools/libs/light/libxl_arch.h b/tools/libs/light/libxl_arch.h
+index 1522ecb97f72..5a060c2c3033 100644
+--- a/tools/libs/light/libxl_arch.h
++++ b/tools/libs/light/libxl_arch.h
+@@ -90,6 +90,10 @@ void libxl__arch_update_domain_config(libxl__gc *gc,
+                                       libxl_domain_config *dst,
+                                       const libxl_domain_config *src);
+ 
++_hidden
++unsigned long libxl__arch_get_required_paging_memory(unsigned long maxmem_kb,
++                                                     unsigned int smp_cpus);
++
+ #if defined(__i386__) || defined(__x86_64__)
+ 
+ #define LAPIC_BASE_ADDRESS  0xfee00000
+diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
+index eef1de093914..73a95e83af24 100644
+--- a/tools/libs/light/libxl_arm.c
++++ b/tools/libs/light/libxl_arm.c
+@@ -154,6 +154,20 @@ out:
+     return rc;
+ }
+ 
++unsigned long libxl__arch_get_required_paging_memory(unsigned long maxmem_kb,
++                                                     unsigned int smp_cpus)
++{
++    /*
++     * 256 pages (1MB) per vcpu,
++     * plus 1 page per MiB of RAM for the P2M map,
++     * plus 1 page per MiB of extended region. This default value is 128 MiB
++     * which should be enough for domains that are not running backend.
++     * This is higher than the minimum that Xen would allocate if no value
++     * were given (but the Xen minimum is for safety, not performance).
++     */
++    return 4 * (256 * smp_cpus + maxmem_kb / 1024 + 128);
++}
++
+ static struct arch_info {
+     const char *guest_type;
+     const char *timer_compat;
+diff --git a/tools/libs/light/libxl_utils.c b/tools/libs/light/libxl_utils.c
+index 4699c4a0a36f..e276c0ee9cc3 100644
+--- a/tools/libs/light/libxl_utils.c
++++ b/tools/libs/light/libxl_utils.c
+@@ -18,6 +18,7 @@
+ #include <ctype.h>
+ 
+ #include "libxl_internal.h"
++#include "libxl_arch.h"
+ #include "_paths.h"
+ 
+ #ifndef LIBXL_HAVE_NONCONST_LIBXL_BASENAME_RETURN_VALUE
+@@ -39,13 +40,7 @@ char *libxl_basename(const char *name)
+ 
+ unsigned long libxl_get_required_shadow_memory(unsigned long maxmem_kb, unsigned int smp_cpus)
+ {
+-    /* 256 pages (1MB) per vcpu,
+-       plus 1 page per MiB of RAM for the P2M map,
+-       plus 1 page per MiB of RAM to shadow the resident processes.
+-       This is higher than the minimum that Xen would allocate if no value
+-       were given (but the Xen minimum is for safety, not performance).
+-     */
+-    return 4 * (256 * smp_cpus + 2 * (maxmem_kb / 1024));
++    return libxl__arch_get_required_paging_memory(maxmem_kb, smp_cpus);
+ }
+ 
+ char *libxl_domid_to_name(libxl_ctx *ctx, uint32_t domid)
+diff --git a/tools/libs/light/libxl_x86.c b/tools/libs/light/libxl_x86.c
+index 1feadebb1852..51362893cf98 100644
+--- a/tools/libs/light/libxl_x86.c
++++ b/tools/libs/light/libxl_x86.c
+@@ -882,6 +882,19 @@ void libxl__arch_update_domain_config(libxl__gc *gc,
+                     libxl_defbool_val(src->b_info.arch_x86.msr_relaxed));
+ }
+ 
++unsigned long libxl__arch_get_required_paging_memory(unsigned long maxmem_kb,
++                                                     unsigned int smp_cpus)
++{
++    /*
++     * 256 pages (1MB) per vcpu,
++     * plus 1 page per MiB of RAM for the P2M map,
++     * plus 1 page per MiB of RAM to shadow the resident processes.
++     * This is higher than the minimum that Xen would allocate if no value
++     * were given (but the Xen minimum is for safety, not performance).
++     */
++    return 4 * (256 * smp_cpus + 2 * (maxmem_kb / 1024));
++}
++
+ /*
+  * Local variables:
+  * mode: C
+-- 
+2.37.3
+
diff --git a/0013-IOMMU-x86-disallow-device-assignment-to-PoD-guests.patch b/0013-IOMMU-x86-disallow-device-assignment-to-PoD-guests.patch
deleted file mode 100644
index 8416c96..0000000
--- a/0013-IOMMU-x86-disallow-device-assignment-to-PoD-guests.patch
+++ /dev/null
@@ -1,229 +0,0 @@
-From 838f6c211f7f05f107e1acdfb0977ab61ec0bf2e Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 7 Jun 2022 14:03:20 +0200
-Subject: [PATCH 13/51] IOMMU/x86: disallow device assignment to PoD guests
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-While it is okay for IOMMU page tables to be set up for guests starting
-in PoD mode, actual device assignment may only occur once all PoD
-entries have been removed from the P2M. So far this was enforced only
-for boot-time assignment, and only in the tool stack.
-
-Also use the new function to replace p2m_pod_entry_count(): Its unlocked
-access to p2m->pod.entry_count wasn't really okay (irrespective of the
-result being stale by the time the caller gets to see it). Nor was the
-use of that function in line with the immediately preceding comment: A
-PoD guest isn't just one with a non-zero entry count, but also one with
-a non-empty cache (e.g. prior to actually launching the guest).
-
-To allow the tool stack to see a consistent snapshot of PoD state, move
-the tail of XENMEM_{get,set}_pod_target handling into a function, adding
-proper locking there.
-
-In libxl take the liberty to use the new local variable r also for a
-pre-existing call into libxc.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-master commit: ad4312d764e8b40a1e45b64aac6d840a60c59f13
-master date: 2022-05-02 08:48:02 +0200
----
- xen/arch/x86/mm.c                   |  6 +---
- xen/arch/x86/mm/p2m-pod.c           | 43 ++++++++++++++++++++++++++++-
- xen/common/vm_event.c               |  2 +-
- xen/drivers/passthrough/x86/iommu.c |  3 +-
- xen/include/asm-x86/p2m.h           | 21 +++++++-------
- 5 files changed, 57 insertions(+), 18 deletions(-)
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index e222d9aa98ee..4ee2de11051d 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -4777,7 +4777,6 @@ long arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
-     {
-         xen_pod_target_t target;
-         struct domain *d;
--        struct p2m_domain *p2m;
- 
-         if ( copy_from_guest(&target, arg, 1) )
-             return -EFAULT;
-@@ -4812,10 +4811,7 @@ long arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
-         }
-         else if ( rc >= 0 )
-         {
--            p2m = p2m_get_hostp2m(d);
--            target.tot_pages       = domain_tot_pages(d);
--            target.pod_cache_pages = p2m->pod.count;
--            target.pod_entries     = p2m->pod.entry_count;
-+            p2m_pod_get_mem_target(d, &target);
- 
-             if ( __copy_to_guest(arg, &target, 1) )
-             {
-diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
-index d8d1a0ce7ed7..a3c9d8a97423 100644
---- a/xen/arch/x86/mm/p2m-pod.c
-+++ b/xen/arch/x86/mm/p2m-pod.c
-@@ -20,6 +20,7 @@
-  */
- 
- #include <xen/event.h>
-+#include <xen/iocap.h>
- #include <xen/ioreq.h>
- #include <xen/mm.h>
- #include <xen/sched.h>
-@@ -362,7 +363,10 @@ p2m_pod_set_mem_target(struct domain *d, unsigned long target)
- 
-     ASSERT( pod_target >= p2m->pod.count );
- 
--    ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/);
-+    if ( has_arch_pdevs(d) || cache_flush_permitted(d) )
-+        ret = -ENOTEMPTY;
-+    else
-+        ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/);
- 
- out:
-     pod_unlock(p2m);
-@@ -370,6 +374,23 @@ out:
-     return ret;
- }
- 
-+void p2m_pod_get_mem_target(const struct domain *d, xen_pod_target_t *target)
-+{
-+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
-+
-+    ASSERT(is_hvm_domain(d));
-+
-+    pod_lock(p2m);
-+    lock_page_alloc(p2m);
-+
-+    target->tot_pages       = domain_tot_pages(d);
-+    target->pod_cache_pages = p2m->pod.count;
-+    target->pod_entries     = p2m->pod.entry_count;
-+
-+    unlock_page_alloc(p2m);
-+    pod_unlock(p2m);
-+}
-+
- int p2m_pod_empty_cache(struct domain *d)
- {
-     struct p2m_domain *p2m = p2m_get_hostp2m(d);
-@@ -1387,6 +1408,9 @@ guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
-     if ( !paging_mode_translate(d) )
-         return -EINVAL;
- 
-+    if ( has_arch_pdevs(d) || cache_flush_permitted(d) )
-+        return -ENOTEMPTY;
-+
-     do {
-         rc = mark_populate_on_demand(d, gfn, chunk_order);
- 
-@@ -1408,3 +1432,20 @@ void p2m_pod_init(struct p2m_domain *p2m)
-     for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i )
-         p2m->pod.mrp.list[i] = gfn_x(INVALID_GFN);
- }
-+
-+bool p2m_pod_active(const struct domain *d)
-+{
-+    struct p2m_domain *p2m;
-+    bool res;
-+
-+    if ( !is_hvm_domain(d) )
-+        return false;
-+
-+    p2m = p2m_get_hostp2m(d);
-+
-+    pod_lock(p2m);
-+    res = p2m->pod.entry_count | p2m->pod.count;
-+    pod_unlock(p2m);
-+
-+    return res;
-+}
-diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
-index 70ab3ba406ff..21d2f0edf727 100644
---- a/xen/common/vm_event.c
-+++ b/xen/common/vm_event.c
-@@ -639,7 +639,7 @@ int vm_event_domctl(struct domain *d, struct xen_domctl_vm_event_op *vec)
- 
-             rc = -EXDEV;
-             /* Disallow paging in a PoD guest */
--            if ( p2m_pod_entry_count(p2m_get_hostp2m(d)) )
-+            if ( p2m_pod_active(d) )
-                 break;
- 
-             /* domain_pause() not required here, see XSA-99 */
-diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c
-index a36a6bd4b249..dc9936e16930 100644
---- a/xen/drivers/passthrough/x86/iommu.c
-+++ b/xen/drivers/passthrough/x86/iommu.c
-@@ -502,11 +502,12 @@ bool arch_iommu_use_permitted(const struct domain *d)
- {
-     /*
-      * Prevent device assign if mem paging, mem sharing or log-dirty
--     * have been enabled for this domain.
-+     * have been enabled for this domain, or if PoD is still in active use.
-      */
-     return d == dom_io ||
-            (likely(!mem_sharing_enabled(d)) &&
-             likely(!mem_paging_enabled(d)) &&
-+            likely(!p2m_pod_active(d)) &&
-             likely(!p2m_get_hostp2m(d)->global_logdirty));
- }
- 
-diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
-index 357a8087481e..f2af7a746ced 100644
---- a/xen/include/asm-x86/p2m.h
-+++ b/xen/include/asm-x86/p2m.h
-@@ -661,6 +661,12 @@ int p2m_pod_empty_cache(struct domain *d);
-  * domain matches target */
- int p2m_pod_set_mem_target(struct domain *d, unsigned long target);
- 
-+/* Obtain a consistent snapshot of PoD related domain state. */
-+void p2m_pod_get_mem_target(const struct domain *d, xen_pod_target_t *target);
-+
-+/* Check whether PoD is (still) active in a domain. */
-+bool p2m_pod_active(const struct domain *d);
-+
- /* Scan pod cache when offline/broken page triggered */
- int
- p2m_pod_offline_or_broken_hit(struct page_info *p);
-@@ -669,11 +675,6 @@ p2m_pod_offline_or_broken_hit(struct page_info *p);
- void
- p2m_pod_offline_or_broken_replace(struct page_info *p);
- 
--static inline long p2m_pod_entry_count(const struct p2m_domain *p2m)
--{
--    return p2m->pod.entry_count;
--}
--
- void p2m_pod_init(struct p2m_domain *p2m);
- 
- #else
-@@ -689,6 +690,11 @@ static inline int p2m_pod_empty_cache(struct domain *d)
-     return 0;
- }
- 
-+static inline bool p2m_pod_active(const struct domain *d)
-+{
-+    return false;
-+}
-+
- static inline int p2m_pod_offline_or_broken_hit(struct page_info *p)
- {
-     return 0;
-@@ -699,11 +705,6 @@ static inline void p2m_pod_offline_or_broken_replace(struct page_info *p)
-     ASSERT_UNREACHABLE();
- }
- 
--static inline long p2m_pod_entry_count(const struct p2m_domain *p2m)
--{
--    return 0;
--}
--
- static inline void p2m_pod_init(struct p2m_domain *p2m) {}
- 
- #endif
--- 
-2.35.1
-
diff --git a/0013-xen-arm-Construct-the-P2M-pages-pool-for-guests.patch b/0013-xen-arm-Construct-the-P2M-pages-pool-for-guests.patch
new file mode 100644
index 0000000..dee9d9c
--- /dev/null
+++ b/0013-xen-arm-Construct-the-P2M-pages-pool-for-guests.patch
@@ -0,0 +1,189 @@
+From 914fc8e8b4cc003e90d51bee0aef54687358530a Mon Sep 17 00:00:00 2001
+From: Henry Wang <Henry.Wang@arm.com>
+Date: Tue, 11 Oct 2022 14:55:21 +0200
+Subject: [PATCH 13/26] xen/arm: Construct the P2M pages pool for guests
+
+This commit constructs the p2m pages pool for guests from the
+data structure and helper perspective.
+
+This is implemented by:
+
+- Adding a `struct paging_domain` which contains a freelist, a
+counter variable and a spinlock to `struct arch_domain` to
+indicate the free p2m pages and the number of p2m total pages in
+the p2m pages pool.
+
+- Adding a helper `p2m_get_allocation` to get the p2m pool size.
+
+- Adding a helper `p2m_set_allocation` to set the p2m pages pool
+size. This helper should be called before allocating memory for
+a guest.
+
+- Adding a helper `p2m_teardown_allocation` to free the p2m pages
+pool. This helper should be called during the xl domain destory.
+
+This is part of CVE-2022-33747 / XSA-409.
+
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: 55914f7fc91a468649b8a3ec3f53ae1c4aca6670
+master date: 2022-10-11 14:28:39 +0200
+---
+ xen/arch/arm/p2m.c           | 88 ++++++++++++++++++++++++++++++++++++
+ xen/include/asm-arm/domain.h | 10 ++++
+ xen/include/asm-arm/p2m.h    |  4 ++
+ 3 files changed, 102 insertions(+)
+
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index 27418ee5ee98..d8957dd8727c 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -50,6 +50,92 @@ static uint64_t generate_vttbr(uint16_t vmid, mfn_t root_mfn)
+     return (mfn_to_maddr(root_mfn) | ((uint64_t)vmid << 48));
+ }
+ 
++/* Return the size of the pool, rounded up to the nearest MB */
++unsigned int p2m_get_allocation(struct domain *d)
++{
++    unsigned long nr_pages = ACCESS_ONCE(d->arch.paging.p2m_total_pages);
++
++    return ROUNDUP(nr_pages, 1 << (20 - PAGE_SHIFT)) >> (20 - PAGE_SHIFT);
++}
++
++/*
++ * Set the pool of pages to the required number of pages.
++ * Returns 0 for success, non-zero for failure.
++ * Call with d->arch.paging.lock held.
++ */
++int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted)
++{
++    struct page_info *pg;
++
++    ASSERT(spin_is_locked(&d->arch.paging.lock));
++
++    for ( ; ; )
++    {
++        if ( d->arch.paging.p2m_total_pages < pages )
++        {
++            /* Need to allocate more memory from domheap */
++            pg = alloc_domheap_page(NULL, 0);
++            if ( pg == NULL )
++            {
++                printk(XENLOG_ERR "Failed to allocate P2M pages.\n");
++                return -ENOMEM;
++            }
++            ACCESS_ONCE(d->arch.paging.p2m_total_pages) =
++                d->arch.paging.p2m_total_pages + 1;
++            page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
++        }
++        else if ( d->arch.paging.p2m_total_pages > pages )
++        {
++            /* Need to return memory to domheap */
++            pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
++            if( pg )
++            {
++                ACCESS_ONCE(d->arch.paging.p2m_total_pages) =
++                    d->arch.paging.p2m_total_pages - 1;
++                free_domheap_page(pg);
++            }
++            else
++            {
++                printk(XENLOG_ERR
++                       "Failed to free P2M pages, P2M freelist is empty.\n");
++                return -ENOMEM;
++            }
++        }
++        else
++            break;
++
++        /* Check to see if we need to yield and try again */
++        if ( preempted && general_preempt_check() )
++        {
++            *preempted = true;
++            return -ERESTART;
++        }
++    }
++
++    return 0;
++}
++
++int p2m_teardown_allocation(struct domain *d)
++{
++    int ret = 0;
++    bool preempted = false;
++
++    spin_lock(&d->arch.paging.lock);
++    if ( d->arch.paging.p2m_total_pages != 0 )
++    {
++        ret = p2m_set_allocation(d, 0, &preempted);
++        if ( preempted )
++        {
++            spin_unlock(&d->arch.paging.lock);
++            return -ERESTART;
++        }
++        ASSERT(d->arch.paging.p2m_total_pages == 0);
++    }
++    spin_unlock(&d->arch.paging.lock);
++
++    return ret;
++}
++
+ /* Unlock the flush and do a P2M TLB flush if necessary */
+ void p2m_write_unlock(struct p2m_domain *p2m)
+ {
+@@ -1599,7 +1685,9 @@ int p2m_init(struct domain *d)
+     unsigned int cpu;
+ 
+     rwlock_init(&p2m->lock);
++    spin_lock_init(&d->arch.paging.lock);
+     INIT_PAGE_LIST_HEAD(&p2m->pages);
++    INIT_PAGE_LIST_HEAD(&d->arch.paging.p2m_freelist);
+ 
+     p2m->vmid = INVALID_VMID;
+ 
+diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
+index 7f8ddd3f5c3b..2f31795ab96d 100644
+--- a/xen/include/asm-arm/domain.h
++++ b/xen/include/asm-arm/domain.h
+@@ -40,6 +40,14 @@ struct vtimer {
+     uint64_t cval;
+ };
+ 
++struct paging_domain {
++    spinlock_t lock;
++    /* Free P2M pages from the pre-allocated P2M pool */
++    struct page_list_head p2m_freelist;
++    /* Number of pages from the pre-allocated P2M pool */
++    unsigned long p2m_total_pages;
++};
++
+ struct arch_domain
+ {
+ #ifdef CONFIG_ARM_64
+@@ -51,6 +59,8 @@ struct arch_domain
+ 
+     struct hvm_domain hvm;
+ 
++    struct paging_domain paging;
++
+     struct vmmio vmmio;
+ 
+     /* Continuable domain_relinquish_resources(). */
+diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
+index b3ba83283e11..c9598740bd02 100644
+--- a/xen/include/asm-arm/p2m.h
++++ b/xen/include/asm-arm/p2m.h
+@@ -218,6 +218,10 @@ void p2m_restore_state(struct vcpu *n);
+ /* Print debugging/statistial info about a domain's p2m */
+ void p2m_dump_info(struct domain *d);
+ 
++unsigned int p2m_get_allocation(struct domain *d);
++int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted);
++int p2m_teardown_allocation(struct domain *d);
++
+ static inline void p2m_write_lock(struct p2m_domain *p2m)
+ {
+     write_lock(&p2m->lock);
+-- 
+2.37.3
+
diff --git a/0014-x86-msr-handle-reads-to-MSR_P5_MC_-ADDR-TYPE.patch b/0014-x86-msr-handle-reads-to-MSR_P5_MC_-ADDR-TYPE.patch
deleted file mode 100644
index 69049f1..0000000
--- a/0014-x86-msr-handle-reads-to-MSR_P5_MC_-ADDR-TYPE.patch
+++ /dev/null
@@ -1,121 +0,0 @@
-From 9ebe2ba83644ec6cd33a93c68dab5f551adcbea0 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 7 Jun 2022 14:04:16 +0200
-Subject: [PATCH 14/51] x86/msr: handle reads to MSR_P5_MC_{ADDR,TYPE}
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Windows Server 2019 Essentials will unconditionally attempt to read
-P5_MC_ADDR MSR at boot and throw a BSOD if injected a #GP.
-
-Fix this by mapping MSR_P5_MC_{ADDR,TYPE} to
-MSR_IA32_MCi_{ADDR,STATUS}, as reported also done by hardware in Intel
-SDM "Mapping of the Pentium Processor Machine-Check Errors to the
-Machine-Check Architecture" section.
-
-Reported-by: Steffen Einsle <einsle@phptrix.de>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: ce59e472b581e4923f6892172dde62b88c8aa8b7
-master date: 2022-05-02 08:49:12 +0200
----
- xen/arch/x86/cpu/mcheck/mce.h       |  6 ++++++
- xen/arch/x86/cpu/mcheck/mce_intel.c | 19 +++++++++++++++++++
- xen/arch/x86/cpu/mcheck/vmce.c      |  2 ++
- xen/arch/x86/msr.c                  |  2 ++
- xen/include/asm-x86/msr-index.h     |  3 +++
- 5 files changed, 32 insertions(+)
-
-diff --git a/xen/arch/x86/cpu/mcheck/mce.h b/xen/arch/x86/cpu/mcheck/mce.h
-index 195362691904..192315ecfa3d 100644
---- a/xen/arch/x86/cpu/mcheck/mce.h
-+++ b/xen/arch/x86/cpu/mcheck/mce.h
-@@ -169,6 +169,12 @@ static inline int mce_vendor_bank_msr(const struct vcpu *v, uint32_t msr)
-         if (msr >= MSR_IA32_MC0_CTL2 &&
-             msr < MSR_IA32_MCx_CTL2(v->arch.vmce.mcg_cap & MCG_CAP_COUNT) )
-             return 1;
-+        fallthrough;
-+
-+    case X86_VENDOR_CENTAUR:
-+    case X86_VENDOR_SHANGHAI:
-+        if (msr == MSR_P5_MC_ADDR || msr == MSR_P5_MC_TYPE)
-+            return 1;
-         break;
- 
-     case X86_VENDOR_AMD:
-diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c b/xen/arch/x86/cpu/mcheck/mce_intel.c
-index bb9f3a3ff795..d364e9bf5ad1 100644
---- a/xen/arch/x86/cpu/mcheck/mce_intel.c
-+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
-@@ -1001,8 +1001,27 @@ int vmce_intel_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
- 
- int vmce_intel_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
- {
-+    const struct cpuid_policy *cp = v->domain->arch.cpuid;
-     unsigned int bank = msr - MSR_IA32_MC0_CTL2;
- 
-+    switch ( msr )
-+    {
-+    case MSR_P5_MC_ADDR:
-+        /*
-+         * Bank 0 is used for the 'bank 0 quirk' on older processors.
-+         * See vcpu_fill_mc_msrs() for reference.
-+         */
-+        *val = v->arch.vmce.bank[1].mci_addr;
-+        return 1;
-+
-+    case MSR_P5_MC_TYPE:
-+        *val = v->arch.vmce.bank[1].mci_status;
-+        return 1;
-+    }
-+
-+    if ( !(cp->x86_vendor & X86_VENDOR_INTEL) )
-+        return 0;
-+
-     if ( bank < GUEST_MC_BANK_NUM )
-     {
-         *val = v->arch.vmce.bank[bank].mci_ctl2;
-diff --git a/xen/arch/x86/cpu/mcheck/vmce.c b/xen/arch/x86/cpu/mcheck/vmce.c
-index eb6434a3ba20..0899df58bcbf 100644
---- a/xen/arch/x86/cpu/mcheck/vmce.c
-+++ b/xen/arch/x86/cpu/mcheck/vmce.c
-@@ -150,6 +150,8 @@ static int bank_mce_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
-     default:
-         switch ( boot_cpu_data.x86_vendor )
-         {
-+        case X86_VENDOR_CENTAUR:
-+        case X86_VENDOR_SHANGHAI:
-         case X86_VENDOR_INTEL:
-             ret = vmce_intel_rdmsr(v, msr, val);
-             break;
-diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
-index aaedb2c31287..da305c7aa4c9 100644
---- a/xen/arch/x86/msr.c
-+++ b/xen/arch/x86/msr.c
-@@ -282,6 +282,8 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
-         *val = msrs->misc_features_enables.raw;
-         break;
- 
-+    case MSR_P5_MC_ADDR:
-+    case MSR_P5_MC_TYPE:
-     case MSR_IA32_MCG_CAP     ... MSR_IA32_MCG_CTL:      /* 0x179 -> 0x17b */
-     case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
-     case MSR_IA32_MCx_CTL(0)  ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */
-diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
-index 3e038db618ff..31964b88af7a 100644
---- a/xen/include/asm-x86/msr-index.h
-+++ b/xen/include/asm-x86/msr-index.h
-@@ -15,6 +15,9 @@
-  * abbreviated name.  Exceptions will be considered on a case-by-case basis.
-  */
- 
-+#define MSR_P5_MC_ADDR                      0
-+#define MSR_P5_MC_TYPE                      0x00000001
-+
- #define MSR_APIC_BASE                       0x0000001b
- #define  APIC_BASE_BSP                      (_AC(1, ULL) <<  8)
- #define  APIC_BASE_EXTD                     (_AC(1, ULL) << 10)
--- 
-2.35.1
-
diff --git a/0014-xen-arm-libxl-Implement-XEN_DOMCTL_shadow_op-for-Arm.patch b/0014-xen-arm-libxl-Implement-XEN_DOMCTL_shadow_op-for-Arm.patch
new file mode 100644
index 0000000..fe24269
--- /dev/null
+++ b/0014-xen-arm-libxl-Implement-XEN_DOMCTL_shadow_op-for-Arm.patch
@@ -0,0 +1,108 @@
+From 3a16da801e14b8ff996b6f7408391ce488abd925 Mon Sep 17 00:00:00 2001
+From: Henry Wang <Henry.Wang@arm.com>
+Date: Tue, 11 Oct 2022 14:55:40 +0200
+Subject: [PATCH 14/26] xen/arm, libxl: Implement XEN_DOMCTL_shadow_op for Arm
+
+This commit implements the `XEN_DOMCTL_shadow_op` support in Xen
+for Arm. The p2m pages pool size for xl guests is supposed to be
+determined by `XEN_DOMCTL_shadow_op`. Hence, this commit:
+
+- Introduces a function `p2m_domctl` and implements the subops
+`XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION` and
+`XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION` of `XEN_DOMCTL_shadow_op`.
+
+- Adds the `XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION` support in libxl.
+
+Therefore enabling the setting of shadow memory pool size
+when creating a guest from xl and getting shadow memory pool size
+from Xen.
+
+Note that the `XEN_DOMCTL_shadow_op` added in this commit is only
+a dummy op, and the functionality of setting/getting p2m memory pool
+size for xl guests will be added in following commits.
+
+This is part of CVE-2022-33747 / XSA-409.
+
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: cf2a68d2ffbc3ce95e01449d46180bddb10d24a0
+master date: 2022-10-11 14:28:42 +0200
+---
+ tools/libs/light/libxl_arm.c | 12 ++++++++++++
+ xen/arch/arm/domctl.c        | 32 ++++++++++++++++++++++++++++++++
+ 2 files changed, 44 insertions(+)
+
+diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
+index 73a95e83af24..22a0c561bbc6 100644
+--- a/tools/libs/light/libxl_arm.c
++++ b/tools/libs/light/libxl_arm.c
+@@ -131,6 +131,18 @@ int libxl__arch_domain_create(libxl__gc *gc,
+                               libxl__domain_build_state *state,
+                               uint32_t domid)
+ {
++    libxl_ctx *ctx = libxl__gc_owner(gc);
++    unsigned int shadow_mb = DIV_ROUNDUP(d_config->b_info.shadow_memkb, 1024);
++
++    int r = xc_shadow_control(ctx->xch, domid,
++                              XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION,
++                              &shadow_mb, 0);
++    if (r) {
++        LOGED(ERROR, domid,
++              "Failed to set %u MiB shadow allocation", shadow_mb);
++        return ERROR_FAIL;
++    }
++
+     return 0;
+ }
+ 
+diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
+index 1baf25c3d98b..9bf72e693019 100644
+--- a/xen/arch/arm/domctl.c
++++ b/xen/arch/arm/domctl.c
+@@ -47,11 +47,43 @@ static int handle_vuart_init(struct domain *d,
+     return rc;
+ }
+ 
++static long p2m_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
++                       XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
++{
++    if ( unlikely(d == current->domain) )
++    {
++        printk(XENLOG_ERR "Tried to do a p2m domctl op on itself.\n");
++        return -EINVAL;
++    }
++
++    if ( unlikely(d->is_dying) )
++    {
++        printk(XENLOG_ERR "Tried to do a p2m domctl op on dying domain %u\n",
++               d->domain_id);
++        return -EINVAL;
++    }
++
++    switch ( sc->op )
++    {
++    case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
++        return 0;
++    case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
++        return 0;
++    default:
++    {
++        printk(XENLOG_ERR "Bad p2m domctl op %u\n", sc->op);
++        return -EINVAL;
++    }
++    }
++}
++
+ long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
+                     XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+ {
+     switch ( domctl->cmd )
+     {
++    case XEN_DOMCTL_shadow_op:
++        return p2m_domctl(d, &domctl->u.shadow_op, u_domctl);
+     case XEN_DOMCTL_cacheflush:
+     {
+         gfn_t s = _gfn(domctl->u.cacheflush.start_pfn);
+-- 
+2.37.3
+
diff --git a/0015-kconfig-detect-LD-implementation.patch b/0015-kconfig-detect-LD-implementation.patch
deleted file mode 100644
index 4507bc7..0000000
--- a/0015-kconfig-detect-LD-implementation.patch
+++ /dev/null
@@ -1,46 +0,0 @@
-From 3754bd128d1a6b3d5864d1a3ee5d27b67d35387a Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 7 Jun 2022 14:05:06 +0200
-Subject: [PATCH 15/51] kconfig: detect LD implementation
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Detect GNU and LLVM ld implementations. This is required for further
-patches that will introduce diverging behaviour depending on the
-linker implementation in use.
-
-Note that LLVM ld returns "compatible with GNU linkers" as part of the
-version string, so be on the safe side and use '^' to only match at
-the start of the line in case LLVM ever decides to change the text to
-use "compatible with GNU ld" instead.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Michal Orzel <michal.orzel@arm.com>
-Acked-by: Julien Grall <jgrall@amazon.com>
-master commit: c70c4b624f85f7d4e28c70a804a0a3f20d73092b
-master date: 2022-05-02 08:50:39 +0200
----
- xen/Kconfig | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/xen/Kconfig b/xen/Kconfig
-index bcbd2758e5d3..0c89afd50fcf 100644
---- a/xen/Kconfig
-+++ b/xen/Kconfig
-@@ -23,6 +23,12 @@ config CLANG_VERSION
- 	int
- 	default $(shell,$(BASEDIR)/scripts/clang-version.sh $(CC))
- 
-+config LD_IS_GNU
-+	def_bool $(success,$(LD) --version | head -n 1 | grep -q "^GNU ld")
-+
-+config LD_IS_LLVM
-+	def_bool $(success,$(LD) --version | head -n 1 | grep -q "^LLD")
-+
- # -fvisibility=hidden reduces -fpic cost, if it's available
- config CC_HAS_VISIBILITY_ATTRIBUTE
- 	def_bool $(cc-option,-fvisibility=hidden)
--- 
-2.35.1
-
diff --git a/0015-xen-arm-Allocate-and-free-P2M-pages-from-the-P2M-poo.patch b/0015-xen-arm-Allocate-and-free-P2M-pages-from-the-P2M-poo.patch
new file mode 100644
index 0000000..704543a
--- /dev/null
+++ b/0015-xen-arm-Allocate-and-free-P2M-pages-from-the-P2M-poo.patch
@@ -0,0 +1,289 @@
+From 44e9dcc48b81bca202a5b31926125a6a59a4c72e Mon Sep 17 00:00:00 2001
+From: Henry Wang <Henry.Wang@arm.com>
+Date: Tue, 11 Oct 2022 14:55:53 +0200
+Subject: [PATCH 15/26] xen/arm: Allocate and free P2M pages from the P2M pool
+
+This commit sets/tearsdown of p2m pages pool for non-privileged Arm
+guests by calling `p2m_set_allocation` and `p2m_teardown_allocation`.
+
+- For dom0, P2M pages should come from heap directly instead of p2m
+pool, so that the kernel may take advantage of the extended regions.
+
+- For xl guests, the setting of the p2m pool is called in
+`XEN_DOMCTL_shadow_op` and the p2m pool is destroyed in
+`domain_relinquish_resources`. Note that domctl->u.shadow_op.mb is
+updated with the new size when setting the p2m pool.
+
+- For dom0less domUs, the setting of the p2m pool is called before
+allocating memory during domain creation. Users can specify the p2m
+pool size by `xen,domain-p2m-mem-mb` dts property.
+
+To actually allocate/free pages from the p2m pool, this commit adds
+two helper functions namely `p2m_alloc_page` and `p2m_free_page` to
+`struct p2m_domain`. By replacing the `alloc_domheap_page` and
+`free_domheap_page` with these two helper functions, p2m pages can
+be added/removed from the list of p2m pool rather than from the heap.
+
+Since page from `p2m_alloc_page` is cleaned, take the opportunity
+to remove the redundant `clean_page` in `p2m_create_table`.
+
+This is part of CVE-2022-33747 / XSA-409.
+
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: cbea5a1149ca7fd4b7cdbfa3ec2e4f109b601ff7
+master date: 2022-10-11 14:28:44 +0200
+---
+ docs/misc/arm/device-tree/booting.txt |  8 ++++
+ xen/arch/arm/domain.c                 |  6 +++
+ xen/arch/arm/domain_build.c           | 29 ++++++++++++++
+ xen/arch/arm/domctl.c                 | 23 ++++++++++-
+ xen/arch/arm/p2m.c                    | 57 +++++++++++++++++++++++++--
+ 5 files changed, 118 insertions(+), 5 deletions(-)
+
+diff --git a/docs/misc/arm/device-tree/booting.txt b/docs/misc/arm/device-tree/booting.txt
+index 71895663a4de..d92ccc56ffe0 100644
+--- a/docs/misc/arm/device-tree/booting.txt
++++ b/docs/misc/arm/device-tree/booting.txt
+@@ -182,6 +182,14 @@ with the following properties:
+     Both #address-cells and #size-cells need to be specified because
+     both sub-nodes (described shortly) have reg properties.
+ 
++- xen,domain-p2m-mem-mb
++
++    Optional. A 32-bit integer specifying the amount of megabytes of RAM
++    used for the domain P2M pool. This is in-sync with the shadow_memory
++    option in xl.cfg. Leaving this field empty in device tree will lead to
++    the default size of domain P2M pool, i.e. 1MB per guest vCPU plus 4KB
++    per MB of guest RAM plus 512KB for guest extended regions.
++
+ Under the "xen,domain" compatible node, one or more sub-nodes are present
+ for the DomU kernel and ramdisk.
+ 
+diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
+index 2694c39127c5..a818f33a1afa 100644
+--- a/xen/arch/arm/domain.c
++++ b/xen/arch/arm/domain.c
+@@ -997,6 +997,7 @@ enum {
+     PROG_page,
+     PROG_mapping,
+     PROG_p2m,
++    PROG_p2m_pool,
+     PROG_done,
+ };
+ 
+@@ -1062,6 +1063,11 @@ int domain_relinquish_resources(struct domain *d)
+         if ( ret )
+             return ret;
+ 
++    PROGRESS(p2m_pool):
++        ret = p2m_teardown_allocation(d);
++        if( ret )
++            return ret;
++
+     PROGRESS(done):
+         break;
+ 
+diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
+index d02bacbcd1ed..8aec3755ca5d 100644
+--- a/xen/arch/arm/domain_build.c
++++ b/xen/arch/arm/domain_build.c
+@@ -2833,6 +2833,21 @@ static void __init find_gnttab_region(struct domain *d,
+            kinfo->gnttab_start, kinfo->gnttab_start + kinfo->gnttab_size);
+ }
+ 
++static unsigned long __init domain_p2m_pages(unsigned long maxmem_kb,
++                                             unsigned int smp_cpus)
++{
++    /*
++     * Keep in sync with libxl__get_required_paging_memory().
++     * 256 pages (1MB) per vcpu, plus 1 page per MiB of RAM for the P2M map,
++     * plus 128 pages to cover extended regions.
++     */
++    unsigned long memkb = 4 * (256 * smp_cpus + (maxmem_kb / 1024) + 128);
++
++    BUILD_BUG_ON(PAGE_SIZE != SZ_4K);
++
++    return DIV_ROUND_UP(memkb, 1024) << (20 - PAGE_SHIFT);
++}
++
+ static int __init construct_domain(struct domain *d, struct kernel_info *kinfo)
+ {
+     unsigned int i;
+@@ -2924,6 +2939,8 @@ static int __init construct_domU(struct domain *d,
+     struct kernel_info kinfo = {};
+     int rc;
+     u64 mem;
++    u32 p2m_mem_mb;
++    unsigned long p2m_pages;
+ 
+     rc = dt_property_read_u64(node, "memory", &mem);
+     if ( !rc )
+@@ -2933,6 +2950,18 @@ static int __init construct_domU(struct domain *d,
+     }
+     kinfo.unassigned_mem = (paddr_t)mem * SZ_1K;
+ 
++    rc = dt_property_read_u32(node, "xen,domain-p2m-mem-mb", &p2m_mem_mb);
++    /* If xen,domain-p2m-mem-mb is not specified, use the default value. */
++    p2m_pages = rc ?
++                p2m_mem_mb << (20 - PAGE_SHIFT) :
++                domain_p2m_pages(mem, d->max_vcpus);
++
++    spin_lock(&d->arch.paging.lock);
++    rc = p2m_set_allocation(d, p2m_pages, NULL);
++    spin_unlock(&d->arch.paging.lock);
++    if ( rc != 0 )
++        return rc;
++
+     printk("*** LOADING DOMU cpus=%u memory=%"PRIx64"KB ***\n", d->max_vcpus, mem);
+ 
+     kinfo.vpl011 = dt_property_read_bool(node, "vpl011");
+diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
+index 9bf72e693019..c8fdeb124084 100644
+--- a/xen/arch/arm/domctl.c
++++ b/xen/arch/arm/domctl.c
+@@ -50,6 +50,9 @@ static int handle_vuart_init(struct domain *d,
+ static long p2m_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
+                        XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+ {
++    long rc;
++    bool preempted = false;
++
+     if ( unlikely(d == current->domain) )
+     {
+         printk(XENLOG_ERR "Tried to do a p2m domctl op on itself.\n");
+@@ -66,9 +69,27 @@ static long p2m_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
+     switch ( sc->op )
+     {
+     case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
+-        return 0;
++    {
++        /* Allow and handle preemption */
++        spin_lock(&d->arch.paging.lock);
++        rc = p2m_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
++        spin_unlock(&d->arch.paging.lock);
++
++        if ( preempted )
++            /* Not finished. Set up to re-run the call. */
++            rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h",
++                                               u_domctl);
++        else
++            /* Finished. Return the new allocation. */
++            sc->mb = p2m_get_allocation(d);
++
++        return rc;
++    }
+     case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
++    {
++        sc->mb = p2m_get_allocation(d);
+         return 0;
++    }
+     default:
+     {
+         printk(XENLOG_ERR "Bad p2m domctl op %u\n", sc->op);
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index d8957dd8727c..b2d856a801af 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -50,6 +50,54 @@ static uint64_t generate_vttbr(uint16_t vmid, mfn_t root_mfn)
+     return (mfn_to_maddr(root_mfn) | ((uint64_t)vmid << 48));
+ }
+ 
++static struct page_info *p2m_alloc_page(struct domain *d)
++{
++    struct page_info *pg;
++
++    spin_lock(&d->arch.paging.lock);
++    /*
++     * For hardware domain, there should be no limit in the number of pages that
++     * can be allocated, so that the kernel may take advantage of the extended
++     * regions. Hence, allocate p2m pages for hardware domains from heap.
++     */
++    if ( is_hardware_domain(d) )
++    {
++        pg = alloc_domheap_page(NULL, 0);
++        if ( pg == NULL )
++        {
++            printk(XENLOG_G_ERR "Failed to allocate P2M pages for hwdom.\n");
++            spin_unlock(&d->arch.paging.lock);
++            return NULL;
++        }
++    }
++    else
++    {
++        pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
++        if ( unlikely(!pg) )
++        {
++            spin_unlock(&d->arch.paging.lock);
++            return NULL;
++        }
++        d->arch.paging.p2m_total_pages--;
++    }
++    spin_unlock(&d->arch.paging.lock);
++
++    return pg;
++}
++
++static void p2m_free_page(struct domain *d, struct page_info *pg)
++{
++    spin_lock(&d->arch.paging.lock);
++    if ( is_hardware_domain(d) )
++        free_domheap_page(pg);
++    else
++    {
++        d->arch.paging.p2m_total_pages++;
++        page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
++    }
++    spin_unlock(&d->arch.paging.lock);
++}
++
+ /* Return the size of the pool, rounded up to the nearest MB */
+ unsigned int p2m_get_allocation(struct domain *d)
+ {
+@@ -751,7 +799,7 @@ static int p2m_create_table(struct p2m_domain *p2m, lpae_t *entry)
+ 
+     ASSERT(!p2m_is_valid(*entry));
+ 
+-    page = alloc_domheap_page(NULL, 0);
++    page = p2m_alloc_page(p2m->domain);
+     if ( page == NULL )
+         return -ENOMEM;
+ 
+@@ -878,7 +926,7 @@ static void p2m_free_entry(struct p2m_domain *p2m,
+     pg = mfn_to_page(mfn);
+ 
+     page_list_del(pg, &p2m->pages);
+-    free_domheap_page(pg);
++    p2m_free_page(p2m->domain, pg);
+ }
+ 
+ static bool p2m_split_superpage(struct p2m_domain *p2m, lpae_t *entry,
+@@ -902,7 +950,7 @@ static bool p2m_split_superpage(struct p2m_domain *p2m, lpae_t *entry,
+     ASSERT(level < target);
+     ASSERT(p2m_is_superpage(*entry, level));
+ 
+-    page = alloc_domheap_page(NULL, 0);
++    page = p2m_alloc_page(p2m->domain);
+     if ( !page )
+         return false;
+ 
+@@ -1641,7 +1689,7 @@ int p2m_teardown(struct domain *d)
+ 
+     while ( (pg = page_list_remove_head(&p2m->pages)) )
+     {
+-        free_domheap_page(pg);
++        p2m_free_page(p2m->domain, pg);
+         count++;
+         /* Arbitrarily preempt every 512 iterations */
+         if ( !(count % 512) && hypercall_preempt_check() )
+@@ -1665,6 +1713,7 @@ void p2m_final_teardown(struct domain *d)
+         return;
+ 
+     ASSERT(page_list_empty(&p2m->pages));
++    ASSERT(page_list_empty(&d->arch.paging.p2m_freelist));
+ 
+     if ( p2m->root )
+         free_domheap_pages(p2m->root, P2M_ROOT_ORDER);
+-- 
+2.37.3
+
diff --git a/0016-gnttab-correct-locking-on-transitive-grant-copy-erro.patch b/0016-gnttab-correct-locking-on-transitive-grant-copy-erro.patch
new file mode 100644
index 0000000..6283d47
--- /dev/null
+++ b/0016-gnttab-correct-locking-on-transitive-grant-copy-erro.patch
@@ -0,0 +1,66 @@
+From 32cb81501c8b858fe9a451650804ec3024a8b364 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 14:56:29 +0200
+Subject: [PATCH 16/26] gnttab: correct locking on transitive grant copy error
+ path
+
+While the comment next to the lock dropping in preparation of
+recursively calling acquire_grant_for_copy() mistakenly talks about the
+rd == td case (excluded a few lines further up), the same concerns apply
+to the calling of release_grant_for_copy() on a subsequent error path.
+
+This is CVE-2022-33748 / XSA-411.
+
+Fixes: ad48fb963dbf ("gnttab: fix transitive grant handling")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+master commit: 6e3aab858eef614a21a782a3b73acc88e74690ea
+master date: 2022-10-11 14:29:30 +0200
+---
+ xen/common/grant_table.c | 19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index 4c742cd8fe81..d8ca645b96ff 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -2613,9 +2613,8 @@ acquire_grant_for_copy(
+                      trans_domid);
+ 
+         /*
+-         * acquire_grant_for_copy() could take the lock on the
+-         * remote table (if rd == td), so we have to drop the lock
+-         * here and reacquire.
++         * acquire_grant_for_copy() will take the lock on the remote table,
++         * so we have to drop the lock here and reacquire.
+          */
+         active_entry_release(act);
+         grant_read_unlock(rgt);
+@@ -2652,11 +2651,25 @@ acquire_grant_for_copy(
+                           act->trans_gref != trans_gref ||
+                           !act->is_sub_page)) )
+         {
++            /*
++             * Like above for acquire_grant_for_copy() we need to drop and then
++             * re-acquire the locks here to prevent lock order inversion issues.
++             * Unlike for acquire_grant_for_copy() we don't need to re-check
++             * anything, as release_grant_for_copy() doesn't depend on the grant
++             * table entry: It only updates internal state and the status flags.
++             */
++            active_entry_release(act);
++            grant_read_unlock(rgt);
++
+             release_grant_for_copy(td, trans_gref, readonly);
+             rcu_unlock_domain(td);
++
++            grant_read_lock(rgt);
++            act = active_entry_acquire(rgt, gref);
+             reduce_status_for_pin(rd, act, status, readonly);
+             active_entry_release(act);
+             grant_read_unlock(rgt);
++
+             put_page(*page);
+             *page = NULL;
+             return ERESTART;
+-- 
+2.37.3
+
diff --git a/0016-linker-lld-do-not-generate-quoted-section-names.patch b/0016-linker-lld-do-not-generate-quoted-section-names.patch
deleted file mode 100644
index 5b3a8cd..0000000
--- a/0016-linker-lld-do-not-generate-quoted-section-names.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-From 88b653f73928117461dc250acd1e830a47a14c2b Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Tue, 7 Jun 2022 14:05:24 +0200
-Subject: [PATCH 16/51] linker/lld: do not generate quoted section names
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-LLVM LD doesn't strip the quotes from the section names, and so the
-resulting binary ends up with section names like:
-
-  [ 1] ".text"           PROGBITS         ffff82d040200000  00008000
-       000000000018cbc1  0000000000000000  AX       0     0     4096
-
-This confuses some tools (like gdb) and prevents proper parsing of the
-binary.
-
-The issue has already been reported and is being fixed in LLD.  In
-order to workaround this issue and keep the GNU ld support define
-different DECL_SECTION macros depending on the used ld
-implementation.
-
-Drop the quotes from the definitions of the debug sections in
-DECL_DEBUG{2}, as those quotes are not required for GNU ld either.
-
-Fixes: 6254920587c3 ('x86: quote section names when defining them in linker script')
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 702c9a800eb3ecd4b8595998d37a769d470c5bb0
-master date: 2022-05-02 08:51:45 +0200
----
- xen/arch/x86/xen.lds.S | 6 +++++-
- 1 file changed, 5 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
-index 4c58f3209c3d..bc9b9651b192 100644
---- a/xen/arch/x86/xen.lds.S
-+++ b/xen/arch/x86/xen.lds.S
-@@ -18,7 +18,11 @@ ENTRY(efi_start)
- #else /* !EFI */
- 
- #define FORMAT "elf64-x86-64"
--#define DECL_SECTION(x) #x : AT(ADDR(#x) - __XEN_VIRT_START)
-+#ifdef CONFIG_LD_IS_GNU
-+# define DECL_SECTION(x) x : AT(ADDR(#x) - __XEN_VIRT_START)
-+#else
-+# define DECL_SECTION(x) x : AT(ADDR(x) - __XEN_VIRT_START)
-+#endif
- 
- ENTRY(start_pa)
- 
--- 
-2.35.1
-
diff --git a/0017-tools-libxl-Replace-deprecated-soundhw-on-QEMU-comma.patch b/0017-tools-libxl-Replace-deprecated-soundhw-on-QEMU-comma.patch
new file mode 100644
index 0000000..ffbc311
--- /dev/null
+++ b/0017-tools-libxl-Replace-deprecated-soundhw-on-QEMU-comma.patch
@@ -0,0 +1,112 @@
+From e85e2a3c17b6cd38de041cdaf14d9efdcdabad1a Mon Sep 17 00:00:00 2001
+From: Anthony PERARD <anthony.perard@citrix.com>
+Date: Tue, 11 Oct 2022 14:59:10 +0200
+Subject: [PATCH 17/26] tools/libxl: Replace deprecated -soundhw on QEMU
+ command line
+
+-soundhw is deprecated since 825ff02911c9 ("audio: add soundhw
+deprecation notice"), QEMU v5.1, and is been remove for upcoming v7.1
+by 039a68373c45 ("introduce -audio as a replacement for -soundhw").
+
+Instead we can just add the sound card with "-device", for most option
+that "-soundhw" could handle. "-device" is an option that existed
+before QEMU 1.0, and could already be used to add audio hardware.
+
+The list of possible option for libxl's "soundhw" is taken the list
+from QEMU 7.0.
+
+The list of options for "soundhw" are listed in order of preference in
+the manual. The first three (hda, ac97, es1370) are PCI devices and
+easy to test on Linux, and the last four are ISA devices which doesn't
+seems to work out of the box on linux.
+
+The sound card 'pcspk' isn't listed even if it used to be accepted by
+'-soundhw' because QEMU crash when trying to add it to a Xen domain.
+Also, it wouldn't work with "-device" might need to be "-machine
+pcspk-audiodev=default" instead.
+
+Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
+Reviewed-by: Jason Andryuk <jandryuk@gmail.com>
+master commit: 62ca138c2c052187783aca3957d3f47c4dcfd683
+master date: 2022-08-18 09:25:50 +0200
+---
+ docs/man/xl.cfg.5.pod.in                  |  6 +++---
+ tools/libs/light/libxl_dm.c               | 19 ++++++++++++++++++-
+ tools/libs/light/libxl_types_internal.idl | 10 ++++++++++
+ 3 files changed, 31 insertions(+), 4 deletions(-)
+
+diff --git a/docs/man/xl.cfg.5.pod.in b/docs/man/xl.cfg.5.pod.in
+index eda1e77ebd06..ab7541f22c3e 100644
+--- a/docs/man/xl.cfg.5.pod.in
++++ b/docs/man/xl.cfg.5.pod.in
+@@ -2545,9 +2545,9 @@ The form serial=DEVICE is also accepted for backwards compatibility.
+ 
+ =item B<soundhw="DEVICE">
+ 
+-Select the virtual sound card to expose to the guest. The valid
+-devices are defined by the device model configuration, please see the
+-B<qemu(1)> manpage for details. The default is not to export any sound
++Select the virtual sound card to expose to the guest. The valid devices are
++B<hda>, B<ac97>, B<es1370>, B<adlib>, B<cs4231a>, B<gus>, B<sb16> if there are
++available with the device model QEMU. The default is not to export any sound
+ device.
+ 
+ =item B<vkb_device=BOOLEAN>
+diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c
+index 04bf5d85632e..fc264a3a13a6 100644
+--- a/tools/libs/light/libxl_dm.c
++++ b/tools/libs/light/libxl_dm.c
+@@ -1204,6 +1204,7 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
+     uint64_t ram_size;
+     const char *path, *chardev;
+     bool is_stubdom = libxl_defbool_val(b_info->device_model_stubdomain);
++    int rc;
+ 
+     dm_args = flexarray_make(gc, 16, 1);
+     dm_envs = flexarray_make(gc, 16, 1);
+@@ -1531,7 +1532,23 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
+             }
+         }
+         if (b_info->u.hvm.soundhw) {
+-            flexarray_vappend(dm_args, "-soundhw", b_info->u.hvm.soundhw, NULL);
++            libxl__qemu_soundhw soundhw;
++
++            rc = libxl__qemu_soundhw_from_string(b_info->u.hvm.soundhw, &soundhw);
++            if (rc) {
++                LOGD(ERROR, guest_domid, "Unknown soundhw option '%s'", b_info->u.hvm.soundhw);
++                return ERROR_INVAL;
++            }
++
++            switch (soundhw) {
++            case LIBXL__QEMU_SOUNDHW_HDA:
++                flexarray_vappend(dm_args, "-device", "intel-hda",
++                                  "-device", "hda-duplex", NULL);
++                break;
++            default:
++                flexarray_append_pair(dm_args, "-device",
++                                      (char*)libxl__qemu_soundhw_to_string(soundhw));
++            }
+         }
+         if (!libxl__acpi_defbool_val(b_info)) {
+             flexarray_append(dm_args, "-no-acpi");
+diff --git a/tools/libs/light/libxl_types_internal.idl b/tools/libs/light/libxl_types_internal.idl
+index 3593e21dbb64..caa08d3229cd 100644
+--- a/tools/libs/light/libxl_types_internal.idl
++++ b/tools/libs/light/libxl_types_internal.idl
+@@ -55,3 +55,13 @@ libxl__device_action = Enumeration("device_action", [
+     (1, "ADD"),
+     (2, "REMOVE"),
+     ])
++
++libxl__qemu_soundhw = Enumeration("qemu_soundhw", [
++    (1, "ac97"),
++    (2, "adlib"),
++    (3, "cs4231a"),
++    (4, "es1370"),
++    (5, "gus"),
++    (6, "hda"),
++    (7, "sb16"),
++    ])
+-- 
+2.37.3
+
diff --git a/0017-xen-io-Fix-race-between-sending-an-I-O-and-domain-sh.patch b/0017-xen-io-Fix-race-between-sending-an-I-O-and-domain-sh.patch
deleted file mode 100644
index bc48a84..0000000
--- a/0017-xen-io-Fix-race-between-sending-an-I-O-and-domain-sh.patch
+++ /dev/null
@@ -1,142 +0,0 @@
-From 982a314bd3000a16c3128afadb36a8ff41029adc Mon Sep 17 00:00:00 2001
-From: Julien Grall <jgrall@amazon.com>
-Date: Tue, 7 Jun 2022 14:06:11 +0200
-Subject: [PATCH 17/51] xen: io: Fix race between sending an I/O and domain
- shutdown
-
-Xen provides hypercalls to shutdown (SCHEDOP_shutdown{,_code}) and
-resume a domain (XEN_DOMCTL_resumedomain). They can be used for checkpoint
-where the expectation is the domain should continue as nothing happened
-afterwards.
-
-hvmemul_do_io() and handle_pio() will act differently if the return
-code of hvm_send_ioreq() (resp. hvmemul_do_pio_buffer()) is X86EMUL_RETRY.
-
-In this case, the I/O state will be reset to STATE_IOREQ_NONE (i.e
-no I/O is pending) and/or the PC will not be advanced.
-
-If the shutdown request happens right after the I/O was sent to the
-IOREQ, then emulation code will end up to re-execute the instruction
-and therefore forward again the same I/O (at least when reading IO port).
-
-This would be problem if the access has a side-effect. A dumb example,
-is a device implementing a counter which is incremented by one for every
-access. When running shutdown/resume in a loop, the value read by the
-OS may not be the old value + 1.
-
-Add an extra boolean in the structure hvm_vcpu_io to indicate whether
-the I/O was suspended. This is then used in place of checking the domain
-is shutting down in hvmemul_do_io() and handle_pio() as they should
-act on suspend (i.e. vcpu_start_shutdown_deferral() returns false) rather
-than shutdown.
-
-Signed-off-by: Julien Grall <jgrall@amazon.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-master commit: b7e0d8978810b534725e94a321736496928f00a5
-master date: 2022-05-06 17:16:22 +0100
----
- xen/arch/arm/ioreq.c       | 3 ++-
- xen/arch/x86/hvm/emulate.c | 3 ++-
- xen/arch/x86/hvm/io.c      | 7 ++++---
- xen/common/ioreq.c         | 4 ++++
- xen/include/xen/sched.h    | 5 +++++
- 5 files changed, 17 insertions(+), 5 deletions(-)
-
-diff --git a/xen/arch/arm/ioreq.c b/xen/arch/arm/ioreq.c
-index 308650b40051..fbccef212bf1 100644
---- a/xen/arch/arm/ioreq.c
-+++ b/xen/arch/arm/ioreq.c
-@@ -80,9 +80,10 @@ enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
-         return IO_ABORT;
- 
-     vio->req = p;
-+    vio->suspended = false;
- 
-     rc = ioreq_send(s, &p, 0);
--    if ( rc != IO_RETRY || v->domain->is_shutting_down )
-+    if ( rc != IO_RETRY || vio->suspended )
-         vio->req.state = STATE_IOREQ_NONE;
-     else if ( !ioreq_needs_completion(&vio->req) )
-         rc = IO_HANDLED;
-diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
-index 76a2ccfafe23..7da348b5d486 100644
---- a/xen/arch/x86/hvm/emulate.c
-+++ b/xen/arch/x86/hvm/emulate.c
-@@ -239,6 +239,7 @@ static int hvmemul_do_io(
-     ASSERT(p.count);
- 
-     vio->req = p;
-+    vio->suspended = false;
- 
-     rc = hvm_io_intercept(&p);
- 
-@@ -334,7 +335,7 @@ static int hvmemul_do_io(
-         else
-         {
-             rc = ioreq_send(s, &p, 0);
--            if ( rc != X86EMUL_RETRY || currd->is_shutting_down )
-+            if ( rc != X86EMUL_RETRY || vio->suspended )
-                 vio->req.state = STATE_IOREQ_NONE;
-             else if ( !ioreq_needs_completion(&vio->req) )
-                 rc = X86EMUL_OKAY;
-diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
-index 93f1d1503fa6..80915f27e488 100644
---- a/xen/arch/x86/hvm/io.c
-+++ b/xen/arch/x86/hvm/io.c
-@@ -138,10 +138,11 @@ bool handle_pio(uint16_t port, unsigned int size, int dir)
- 
-     case X86EMUL_RETRY:
-         /*
--         * We should not advance RIP/EIP if the domain is shutting down or
--         * if X86EMUL_RETRY has been returned by an internal handler.
-+         * We should not advance RIP/EIP if the vio was suspended (e.g.
-+         * because the domain is shutting down) or if X86EMUL_RETRY has
-+         * been returned by an internal handler.
-          */
--        if ( curr->domain->is_shutting_down || !vcpu_ioreq_pending(curr) )
-+        if ( vio->suspended || !vcpu_ioreq_pending(curr) )
-             return false;
-         break;
- 
-diff --git a/xen/common/ioreq.c b/xen/common/ioreq.c
-index d732dc045df9..42414b750bef 100644
---- a/xen/common/ioreq.c
-+++ b/xen/common/ioreq.c
-@@ -1256,6 +1256,7 @@ int ioreq_send(struct ioreq_server *s, ioreq_t *proto_p,
-     struct vcpu *curr = current;
-     struct domain *d = curr->domain;
-     struct ioreq_vcpu *sv;
-+    struct vcpu_io *vio = &curr->io;
- 
-     ASSERT(s);
- 
-@@ -1263,7 +1264,10 @@ int ioreq_send(struct ioreq_server *s, ioreq_t *proto_p,
-         return ioreq_send_buffered(s, proto_p);
- 
-     if ( unlikely(!vcpu_start_shutdown_deferral(curr)) )
-+    {
-+        vio->suspended = true;
-         return IOREQ_STATUS_RETRY;
-+    }
- 
-     list_for_each_entry ( sv,
-                           &s->ioreq_vcpu_list,
-diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
-index 28146ee404e6..9671062360ac 100644
---- a/xen/include/xen/sched.h
-+++ b/xen/include/xen/sched.h
-@@ -159,6 +159,11 @@ enum vio_completion {
- struct vcpu_io {
-     /* I/O request in flight to device model. */
-     enum vio_completion  completion;
-+    /*
-+     * Indicate whether the I/O was not handled because the domain
-+     * is about to be paused.
-+     */
-+    bool                 suspended;
-     ioreq_t              req;
- };
- 
--- 
-2.35.1
-
diff --git a/0018-build-suppress-GNU-ld-warning-about-RWX-load-segment.patch b/0018-build-suppress-GNU-ld-warning-about-RWX-load-segment.patch
deleted file mode 100644
index b20a99a..0000000
--- a/0018-build-suppress-GNU-ld-warning-about-RWX-load-segment.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-From 4890031d224262a6cf43d3bef1af4a16c13db306 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 7 Jun 2022 14:06:51 +0200
-Subject: [PATCH 18/51] build: suppress GNU ld warning about RWX load segments
-
-We cannot really avoid such and we're also not really at risk because of
-them, as we control page table permissions ourselves rather than relying
-on a loader of some sort. Present GNU ld master started warning about
-such, and hence 2.39 is anticipated to have this warning.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Julien Grall <jgrall@amazon.com>
-master commit: 68f5aac012b9ae36ce9b65d9ca9cc9f232191ad3
-master date: 2022-05-18 11:17:19 +0200
----
- xen/Makefile | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/xen/Makefile b/xen/Makefile
-index ce4eca3ee4d7..4d9abe704628 100644
---- a/xen/Makefile
-+++ b/xen/Makefile
-@@ -260,6 +260,8 @@ endif
- 
- AFLAGS += -D__ASSEMBLY__
- 
-+LDFLAGS-$(call ld-option,--warn-rwx-segments) += --no-warn-rwx-segments
-+
- CFLAGS += $(CFLAGS-y)
- # allow extra CFLAGS externally via EXTRA_CFLAGS_XEN_CORE
- CFLAGS += $(EXTRA_CFLAGS_XEN_CORE)
--- 
-2.35.1
-
diff --git a/0018-x86-CPUID-surface-suitable-value-in-EBX-of-XSTATE-su.patch b/0018-x86-CPUID-surface-suitable-value-in-EBX-of-XSTATE-su.patch
new file mode 100644
index 0000000..d6ade98
--- /dev/null
+++ b/0018-x86-CPUID-surface-suitable-value-in-EBX-of-XSTATE-su.patch
@@ -0,0 +1,44 @@
+From e8882bcfe35520e950ba60acd6e67e65f1ce90a8 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 14:59:26 +0200
+Subject: [PATCH 18/26] x86/CPUID: surface suitable value in EBX of XSTATE
+ subleaf 1
+
+While the SDM isn't very clear about this, our present behavior make
+Linux 5.19 unhappy. As of commit 8ad7e8f69695 ("x86/fpu/xsave: Support
+XSAVEC in the kernel") they're using this CPUID output also to size
+the compacted area used by XSAVEC. Getting back zero there isn't really
+liked, yet for PV that's the default on capable hardware: XSAVES isn't
+exposed to PV domains.
+
+Considering that the size reported is that of the compacted save area,
+I view Linux'es assumption as appropriate (short of the SDM properly
+considering the case). Therefore we need to populate the field also when
+only XSAVEC is supported for a guest.
+
+Fixes: 460b9a4b3630 ("x86/xsaves: enable xsaves/xrstors for hvm guest")
+Fixes: 8d050ed1097c ("x86: don't expose XSAVES capability to PV guests")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: c3bd0b83ea5b7c0da6542687436042eeea1e7909
+master date: 2022-08-24 14:23:59 +0200
+---
+ xen/arch/x86/cpuid.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
+index ff335f16390d..a647331f4793 100644
+--- a/xen/arch/x86/cpuid.c
++++ b/xen/arch/x86/cpuid.c
+@@ -1060,7 +1060,7 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf,
+         switch ( subleaf )
+         {
+         case 1:
+-            if ( p->xstate.xsaves )
++            if ( p->xstate.xsavec || p->xstate.xsaves )
+             {
+                 /*
+                  * TODO: Figure out what to do for XSS state.  VT-x manages
+-- 
+2.37.3
+
diff --git a/0019-build-silence-GNU-ld-warning-about-executable-stacks.patch b/0019-build-silence-GNU-ld-warning-about-executable-stacks.patch
deleted file mode 100644
index e4d739b..0000000
--- a/0019-build-silence-GNU-ld-warning-about-executable-stacks.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-From 1bc669a568a9f4bdab9e9ddb95823ba370dc0baf Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 7 Jun 2022 14:07:11 +0200
-Subject: [PATCH 19/51] build: silence GNU ld warning about executable stacks
-
-While for C files the compiler is supposed to arrange for emitting
-respective information, for assembly sources we're responsible ourselves.
-Present GNU ld master started warning about such, and hence 2.39 is
-anticipated to have this warning.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Julien Grall <jgrall@amazon.com>
-master commit: 62d22296a95d259c934ca2f39ac511d729cfbb68
-master date: 2022-05-18 11:18:45 +0200
----
- xen/Makefile | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/xen/Makefile b/xen/Makefile
-index 4d9abe704628..971028eda240 100644
---- a/xen/Makefile
-+++ b/xen/Makefile
-@@ -260,6 +260,8 @@ endif
- 
- AFLAGS += -D__ASSEMBLY__
- 
-+$(call cc-option-add,AFLAGS,CC,-Wa$(comma)--noexecstack)
-+
- LDFLAGS-$(call ld-option,--warn-rwx-segments) += --no-warn-rwx-segments
- 
- CFLAGS += $(CFLAGS-y)
--- 
-2.35.1
-
diff --git a/0019-xen-sched-introduce-cpupool_update_node_affinity.patch b/0019-xen-sched-introduce-cpupool_update_node_affinity.patch
new file mode 100644
index 0000000..957d0fe
--- /dev/null
+++ b/0019-xen-sched-introduce-cpupool_update_node_affinity.patch
@@ -0,0 +1,257 @@
+From d4e971ad12dd27913dffcf96b5de378ea7b476e1 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 14:59:40 +0200
+Subject: [PATCH 19/26] xen/sched: introduce cpupool_update_node_affinity()
+
+For updating the node affinities of all domains in a cpupool add a new
+function cpupool_update_node_affinity().
+
+In order to avoid multiple allocations of cpumasks carve out memory
+allocation and freeing from domain_update_node_affinity() into new
+helpers, which can be used by cpupool_update_node_affinity().
+
+Modify domain_update_node_affinity() to take an additional parameter
+for passing the allocated memory in and to allocate and free the memory
+via the new helpers in case NULL was passed.
+
+This will help later to pre-allocate the cpumasks in order to avoid
+allocations in stop-machine context.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: a83fa1e2b96ace65b45dde6954d67012633a082b
+master date: 2022-09-05 11:42:30 +0100
+---
+ xen/common/sched/core.c    | 54 ++++++++++++++++++++++++++------------
+ xen/common/sched/cpupool.c | 39 +++++++++++++++------------
+ xen/common/sched/private.h |  7 +++++
+ xen/include/xen/sched.h    |  9 ++++++-
+ 4 files changed, 74 insertions(+), 35 deletions(-)
+
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index f07bd2681fcb..065a83eca912 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -1824,9 +1824,28 @@ int vcpu_affinity_domctl(struct domain *d, uint32_t cmd,
+     return ret;
+ }
+ 
+-void domain_update_node_affinity(struct domain *d)
++bool alloc_affinity_masks(struct affinity_masks *affinity)
+ {
+-    cpumask_var_t dom_cpumask, dom_cpumask_soft;
++    if ( !alloc_cpumask_var(&affinity->hard) )
++        return false;
++    if ( !alloc_cpumask_var(&affinity->soft) )
++    {
++        free_cpumask_var(affinity->hard);
++        return false;
++    }
++
++    return true;
++}
++
++void free_affinity_masks(struct affinity_masks *affinity)
++{
++    free_cpumask_var(affinity->soft);
++    free_cpumask_var(affinity->hard);
++}
++
++void domain_update_node_aff(struct domain *d, struct affinity_masks *affinity)
++{
++    struct affinity_masks masks;
+     cpumask_t *dom_affinity;
+     const cpumask_t *online;
+     struct sched_unit *unit;
+@@ -1836,14 +1855,16 @@ void domain_update_node_affinity(struct domain *d)
+     if ( !d->vcpu || !d->vcpu[0] )
+         return;
+ 
+-    if ( !zalloc_cpumask_var(&dom_cpumask) )
+-        return;
+-    if ( !zalloc_cpumask_var(&dom_cpumask_soft) )
++    if ( !affinity )
+     {
+-        free_cpumask_var(dom_cpumask);
+-        return;
++        affinity = &masks;
++        if ( !alloc_affinity_masks(affinity) )
++            return;
+     }
+ 
++    cpumask_clear(affinity->hard);
++    cpumask_clear(affinity->soft);
++
+     online = cpupool_domain_master_cpumask(d);
+ 
+     spin_lock(&d->node_affinity_lock);
+@@ -1864,22 +1885,21 @@ void domain_update_node_affinity(struct domain *d)
+          */
+         for_each_sched_unit ( d, unit )
+         {
+-            cpumask_or(dom_cpumask, dom_cpumask, unit->cpu_hard_affinity);
+-            cpumask_or(dom_cpumask_soft, dom_cpumask_soft,
+-                       unit->cpu_soft_affinity);
++            cpumask_or(affinity->hard, affinity->hard, unit->cpu_hard_affinity);
++            cpumask_or(affinity->soft, affinity->soft, unit->cpu_soft_affinity);
+         }
+         /* Filter out non-online cpus */
+-        cpumask_and(dom_cpumask, dom_cpumask, online);
+-        ASSERT(!cpumask_empty(dom_cpumask));
++        cpumask_and(affinity->hard, affinity->hard, online);
++        ASSERT(!cpumask_empty(affinity->hard));
+         /* And compute the intersection between hard, online and soft */
+-        cpumask_and(dom_cpumask_soft, dom_cpumask_soft, dom_cpumask);
++        cpumask_and(affinity->soft, affinity->soft, affinity->hard);
+ 
+         /*
+          * If not empty, the intersection of hard, soft and online is the
+          * narrowest set we want. If empty, we fall back to hard&online.
+          */
+-        dom_affinity = cpumask_empty(dom_cpumask_soft) ?
+-                           dom_cpumask : dom_cpumask_soft;
++        dom_affinity = cpumask_empty(affinity->soft) ? affinity->hard
++                                                     : affinity->soft;
+ 
+         nodes_clear(d->node_affinity);
+         for_each_cpu ( cpu, dom_affinity )
+@@ -1888,8 +1908,8 @@ void domain_update_node_affinity(struct domain *d)
+ 
+     spin_unlock(&d->node_affinity_lock);
+ 
+-    free_cpumask_var(dom_cpumask_soft);
+-    free_cpumask_var(dom_cpumask);
++    if ( affinity == &masks )
++        free_affinity_masks(affinity);
+ }
+ 
+ typedef long ret_t;
+diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
+index 8c6e6eb9ccd5..45b6ff99561a 100644
+--- a/xen/common/sched/cpupool.c
++++ b/xen/common/sched/cpupool.c
+@@ -401,6 +401,25 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c)
+     return ret;
+ }
+ 
++/* Update affinities of all domains in a cpupool. */
++static void cpupool_update_node_affinity(const struct cpupool *c)
++{
++    struct affinity_masks masks;
++    struct domain *d;
++
++    if ( !alloc_affinity_masks(&masks) )
++        return;
++
++    rcu_read_lock(&domlist_read_lock);
++
++    for_each_domain_in_cpupool(d, c)
++        domain_update_node_aff(d, &masks);
++
++    rcu_read_unlock(&domlist_read_lock);
++
++    free_affinity_masks(&masks);
++}
++
+ /*
+  * assign a specific cpu to a cpupool
+  * cpupool_lock must be held
+@@ -408,7 +427,6 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c)
+ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
+ {
+     int ret;
+-    struct domain *d;
+     const cpumask_t *cpus;
+ 
+     cpus = sched_get_opt_cpumask(c->gran, cpu);
+@@ -433,12 +451,7 @@ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
+ 
+     rcu_read_unlock(&sched_res_rculock);
+ 
+-    rcu_read_lock(&domlist_read_lock);
+-    for_each_domain_in_cpupool(d, c)
+-    {
+-        domain_update_node_affinity(d);
+-    }
+-    rcu_read_unlock(&domlist_read_lock);
++    cpupool_update_node_affinity(c);
+ 
+     return 0;
+ }
+@@ -447,18 +460,14 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
+ {
+     int cpu = cpupool_moving_cpu;
+     const cpumask_t *cpus;
+-    struct domain *d;
+     int ret;
+ 
+     if ( c != cpupool_cpu_moving )
+         return -EADDRNOTAVAIL;
+ 
+-    /*
+-     * We need this for scanning the domain list, both in
+-     * cpu_disable_scheduler(), and at the bottom of this function.
+-     */
+     rcu_read_lock(&domlist_read_lock);
+     ret = cpu_disable_scheduler(cpu);
++    rcu_read_unlock(&domlist_read_lock);
+ 
+     rcu_read_lock(&sched_res_rculock);
+     cpus = get_sched_res(cpu)->cpus;
+@@ -485,11 +494,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
+     }
+     rcu_read_unlock(&sched_res_rculock);
+ 
+-    for_each_domain_in_cpupool(d, c)
+-    {
+-        domain_update_node_affinity(d);
+-    }
+-    rcu_read_unlock(&domlist_read_lock);
++    cpupool_update_node_affinity(c);
+ 
+     return ret;
+ }
+diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
+index a870320146ef..2b04b01a0c0a 100644
+--- a/xen/common/sched/private.h
++++ b/xen/common/sched/private.h
+@@ -593,6 +593,13 @@ affinity_balance_cpumask(const struct sched_unit *unit, int step,
+         cpumask_copy(mask, unit->cpu_hard_affinity);
+ }
+ 
++struct affinity_masks {
++    cpumask_var_t hard;
++    cpumask_var_t soft;
++};
++
++bool alloc_affinity_masks(struct affinity_masks *affinity);
++void free_affinity_masks(struct affinity_masks *affinity);
+ void sched_rm_cpu(unsigned int cpu);
+ const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu);
+ void schedule_dump(struct cpupool *c);
+diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
+index 9671062360ac..3f4225738a40 100644
+--- a/xen/include/xen/sched.h
++++ b/xen/include/xen/sched.h
+@@ -655,8 +655,15 @@ static inline void get_knownalive_domain(struct domain *d)
+     ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
+ }
+ 
++struct affinity_masks;
++
+ int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity);
+-void domain_update_node_affinity(struct domain *d);
++void domain_update_node_aff(struct domain *d, struct affinity_masks *affinity);
++
++static inline void domain_update_node_affinity(struct domain *d)
++{
++    domain_update_node_aff(d, NULL);
++}
+ 
+ /*
+  * To be implemented by each architecture, sanity checking the configuration
+-- 
+2.37.3
+
diff --git a/0020-ns16550-use-poll-mode-if-INTERRUPT_LINE-is-0xff.patch b/0020-ns16550-use-poll-mode-if-INTERRUPT_LINE-is-0xff.patch
deleted file mode 100644
index baa1e15..0000000
--- a/0020-ns16550-use-poll-mode-if-INTERRUPT_LINE-is-0xff.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From f1be0b62a03b90a40a03e21f965e4cbb89809bb1 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
- <marmarek@invisiblethingslab.com>
-Date: Tue, 7 Jun 2022 14:07:34 +0200
-Subject: [PATCH 20/51] ns16550: use poll mode if INTERRUPT_LINE is 0xff
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Intel LPSS has INTERRUPT_LINE set to 0xff by default, that is declared
-by the PCI Local Bus Specification Revision 3.0 (from 2004) as
-"unknown"/"no connection". Fallback to poll mode in this case.
-The 0xff handling is x86-specific, the surrounding code is guarded with
-CONFIG_X86 anyway.
-
-Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-master commit: 6a2ea1a2370a0c8a0210accac0ae62e68c185134
-master date: 2022-05-20 12:19:45 +0200
----
- xen/drivers/char/ns16550.c | 13 +++++++++++++
- 1 file changed, 13 insertions(+)
-
-diff --git a/xen/drivers/char/ns16550.c b/xen/drivers/char/ns16550.c
-index 30596d60d4ed..2d2bd2a02469 100644
---- a/xen/drivers/char/ns16550.c
-+++ b/xen/drivers/char/ns16550.c
-@@ -1221,6 +1221,19 @@ pci_uart_config(struct ns16550 *uart, bool_t skip_amt, unsigned int idx)
-                             pci_conf_read8(PCI_SBDF(0, b, d, f),
-                                            PCI_INTERRUPT_LINE) : 0;
- 
-+#ifdef CONFIG_X86
-+                /*
-+                 * PCI Local Bus Specification Revision 3.0 defines 0xff value
-+                 * as special only for X86.
-+                 */
-+                if ( uart->irq == 0xff )
-+                    uart->irq = 0;
-+#endif
-+                if ( !uart->irq )
-+                    printk(XENLOG_INFO
-+                           "ns16550: %pp: no legacy IRQ, using poll mode\n",
-+                           &PCI_SBDF(0, b, d, f));
-+
-                 return 0;
-             }
-         }
--- 
-2.35.1
-
diff --git a/0020-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch b/0020-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch
new file mode 100644
index 0000000..30784c3
--- /dev/null
+++ b/0020-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch
@@ -0,0 +1,263 @@
+From c377ceab0a007690a1e71c81a5232613c99e944d Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 15:00:05 +0200
+Subject: [PATCH 20/26] xen/sched: carve out memory allocation and freeing from
+ schedule_cpu_rm()
+
+In order to prepare not allocating or freeing memory from
+schedule_cpu_rm(), move this functionality to dedicated functions.
+
+For now call those functions from schedule_cpu_rm().
+
+No change of behavior expected.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: d42be6f83480b3ada286dc18444331a816be88a3
+master date: 2022-09-05 11:42:30 +0100
+---
+ xen/common/sched/core.c    | 143 ++++++++++++++++++++++---------------
+ xen/common/sched/private.h |  11 +++
+ 2 files changed, 98 insertions(+), 56 deletions(-)
+
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index 065a83eca912..2decb1161a63 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -3221,6 +3221,75 @@ out:
+     return ret;
+ }
+ 
++/*
++ * Allocate all memory needed for free_cpu_rm_data(), as allocations cannot
++ * be made in stop_machine() context.
++ *
++ * Between alloc_cpu_rm_data() and the real cpu removal action the relevant
++ * contents of struct sched_resource can't change, as the cpu in question is
++ * locked against any other movement to or from cpupools, and the data copied
++ * by alloc_cpu_rm_data() is modified only in case the cpu in question is
++ * being moved from or to a cpupool.
++ */
++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu)
++{
++    struct cpu_rm_data *data;
++    const struct sched_resource *sr;
++    unsigned int idx;
++
++    rcu_read_lock(&sched_res_rculock);
++
++    sr = get_sched_res(cpu);
++    data = xmalloc_flex_struct(struct cpu_rm_data, sr, sr->granularity - 1);
++    if ( !data )
++        goto out;
++
++    data->old_ops = sr->scheduler;
++    data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
++    data->ppriv_old = sr->sched_priv;
++
++    for ( idx = 0; idx < sr->granularity - 1; idx++ )
++    {
++        data->sr[idx] = sched_alloc_res();
++        if ( data->sr[idx] )
++        {
++            data->sr[idx]->sched_unit_idle = sched_alloc_unit_mem();
++            if ( !data->sr[idx]->sched_unit_idle )
++            {
++                sched_res_free(&data->sr[idx]->rcu);
++                data->sr[idx] = NULL;
++            }
++        }
++        if ( !data->sr[idx] )
++        {
++            while ( idx > 0 )
++                sched_res_free(&data->sr[--idx]->rcu);
++            XFREE(data);
++            goto out;
++        }
++
++        data->sr[idx]->curr = data->sr[idx]->sched_unit_idle;
++        data->sr[idx]->scheduler = &sched_idle_ops;
++        data->sr[idx]->granularity = 1;
++
++        /* We want the lock not to change when replacing the resource. */
++        data->sr[idx]->schedule_lock = sr->schedule_lock;
++    }
++
++ out:
++    rcu_read_unlock(&sched_res_rculock);
++
++    return data;
++}
++
++void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu)
++{
++    sched_free_udata(mem->old_ops, mem->vpriv_old);
++    sched_free_pdata(mem->old_ops, mem->ppriv_old, cpu);
++
++    xfree(mem);
++}
++
+ /*
+  * Remove a pCPU from its cpupool. Its scheduler becomes &sched_idle_ops
+  * (the idle scheduler).
+@@ -3229,53 +3298,23 @@ out:
+  */
+ int schedule_cpu_rm(unsigned int cpu)
+ {
+-    void *ppriv_old, *vpriv_old;
+-    struct sched_resource *sr, **sr_new = NULL;
++    struct sched_resource *sr;
++    struct cpu_rm_data *data;
+     struct sched_unit *unit;
+-    struct scheduler *old_ops;
+     spinlock_t *old_lock;
+     unsigned long flags;
+-    int idx, ret = -ENOMEM;
++    int idx = 0;
+     unsigned int cpu_iter;
+ 
++    data = alloc_cpu_rm_data(cpu);
++    if ( !data )
++        return -ENOMEM;
++
+     rcu_read_lock(&sched_res_rculock);
+ 
+     sr = get_sched_res(cpu);
+-    old_ops = sr->scheduler;
+ 
+-    if ( sr->granularity > 1 )
+-    {
+-        sr_new = xmalloc_array(struct sched_resource *, sr->granularity - 1);
+-        if ( !sr_new )
+-            goto out;
+-        for ( idx = 0; idx < sr->granularity - 1; idx++ )
+-        {
+-            sr_new[idx] = sched_alloc_res();
+-            if ( sr_new[idx] )
+-            {
+-                sr_new[idx]->sched_unit_idle = sched_alloc_unit_mem();
+-                if ( !sr_new[idx]->sched_unit_idle )
+-                {
+-                    sched_res_free(&sr_new[idx]->rcu);
+-                    sr_new[idx] = NULL;
+-                }
+-            }
+-            if ( !sr_new[idx] )
+-            {
+-                for ( idx--; idx >= 0; idx-- )
+-                    sched_res_free(&sr_new[idx]->rcu);
+-                goto out;
+-            }
+-            sr_new[idx]->curr = sr_new[idx]->sched_unit_idle;
+-            sr_new[idx]->scheduler = &sched_idle_ops;
+-            sr_new[idx]->granularity = 1;
+-
+-            /* We want the lock not to change when replacing the resource. */
+-            sr_new[idx]->schedule_lock = sr->schedule_lock;
+-        }
+-    }
+-
+-    ret = 0;
++    ASSERT(sr->granularity);
+     ASSERT(sr->cpupool != NULL);
+     ASSERT(cpumask_test_cpu(cpu, &cpupool_free_cpus));
+     ASSERT(!cpumask_test_cpu(cpu, sr->cpupool->cpu_valid));
+@@ -3283,10 +3322,6 @@ int schedule_cpu_rm(unsigned int cpu)
+     /* See comment in schedule_cpu_add() regarding lock switching. */
+     old_lock = pcpu_schedule_lock_irqsave(cpu, &flags);
+ 
+-    vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
+-    ppriv_old = sr->sched_priv;
+-
+-    idx = 0;
+     for_each_cpu ( cpu_iter, sr->cpus )
+     {
+         per_cpu(sched_res_idx, cpu_iter) = 0;
+@@ -3300,27 +3335,27 @@ int schedule_cpu_rm(unsigned int cpu)
+         else
+         {
+             /* Initialize unit. */
+-            unit = sr_new[idx]->sched_unit_idle;
+-            unit->res = sr_new[idx];
++            unit = data->sr[idx]->sched_unit_idle;
++            unit->res = data->sr[idx];
+             unit->is_running = true;
+             sched_unit_add_vcpu(unit, idle_vcpu[cpu_iter]);
+             sched_domain_insert_unit(unit, idle_vcpu[cpu_iter]->domain);
+ 
+             /* Adjust cpu masks of resources (old and new). */
+             cpumask_clear_cpu(cpu_iter, sr->cpus);
+-            cpumask_set_cpu(cpu_iter, sr_new[idx]->cpus);
++            cpumask_set_cpu(cpu_iter, data->sr[idx]->cpus);
+             cpumask_set_cpu(cpu_iter, &sched_res_mask);
+ 
+             /* Init timer. */
+-            init_timer(&sr_new[idx]->s_timer, s_timer_fn, NULL, cpu_iter);
++            init_timer(&data->sr[idx]->s_timer, s_timer_fn, NULL, cpu_iter);
+ 
+             /* Last resource initializations and insert resource pointer. */
+-            sr_new[idx]->master_cpu = cpu_iter;
+-            set_sched_res(cpu_iter, sr_new[idx]);
++            data->sr[idx]->master_cpu = cpu_iter;
++            set_sched_res(cpu_iter, data->sr[idx]);
+ 
+             /* Last action: set the new lock pointer. */
+             smp_mb();
+-            sr_new[idx]->schedule_lock = &sched_free_cpu_lock;
++            data->sr[idx]->schedule_lock = &sched_free_cpu_lock;
+ 
+             idx++;
+         }
+@@ -3336,16 +3371,12 @@ int schedule_cpu_rm(unsigned int cpu)
+     /* _Not_ pcpu_schedule_unlock(): schedule_lock may have changed! */
+     spin_unlock_irqrestore(old_lock, flags);
+ 
+-    sched_deinit_pdata(old_ops, ppriv_old, cpu);
++    sched_deinit_pdata(data->old_ops, data->ppriv_old, cpu);
+ 
+-    sched_free_udata(old_ops, vpriv_old);
+-    sched_free_pdata(old_ops, ppriv_old, cpu);
+-
+-out:
+     rcu_read_unlock(&sched_res_rculock);
+-    xfree(sr_new);
++    free_cpu_rm_data(data, cpu);
+ 
+-    return ret;
++    return 0;
+ }
+ 
+ struct scheduler *scheduler_get_default(void)
+diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
+index 2b04b01a0c0a..e286849a1312 100644
+--- a/xen/common/sched/private.h
++++ b/xen/common/sched/private.h
+@@ -600,6 +600,15 @@ struct affinity_masks {
+ 
+ bool alloc_affinity_masks(struct affinity_masks *affinity);
+ void free_affinity_masks(struct affinity_masks *affinity);
++
++/* Memory allocation related data for schedule_cpu_rm(). */
++struct cpu_rm_data {
++    const struct scheduler *old_ops;
++    void *ppriv_old;
++    void *vpriv_old;
++    struct sched_resource *sr[];
++};
++
+ void sched_rm_cpu(unsigned int cpu);
+ const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu);
+ void schedule_dump(struct cpupool *c);
+@@ -608,6 +617,8 @@ struct scheduler *scheduler_alloc(unsigned int sched_id);
+ void scheduler_free(struct scheduler *sched);
+ int cpu_disable_scheduler(unsigned int cpu);
+ int schedule_cpu_add(unsigned int cpu, struct cpupool *c);
++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu);
++void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu);
+ int schedule_cpu_rm(unsigned int cpu);
+ int sched_move_domain(struct domain *d, struct cpupool *c);
+ struct cpupool *cpupool_get_by_id(unsigned int poolid);
+-- 
+2.37.3
+
diff --git a/0021-PCI-don-t-allow-pci-phantom-to-mark-real-devices-as-.patch b/0021-PCI-don-t-allow-pci-phantom-to-mark-real-devices-as-.patch
deleted file mode 100644
index 1312bda..0000000
--- a/0021-PCI-don-t-allow-pci-phantom-to-mark-real-devices-as-.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 8e11ec8fbf6f933f8854f4bc54226653316903f2 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 7 Jun 2022 14:08:06 +0200
-Subject: [PATCH 21/51] PCI: don't allow "pci-phantom=" to mark real devices as
- phantom functions
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-IOMMU code mapping / unmapping devices and interrupts will misbehave if
-a wrong command line option declared a function "phantom" when there's a
-real device at that position. Warn about this and adjust the specified
-stride (in the worst case ignoring the option altogether).
-
-Requested-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-master commit: 444b555dc9e09fa3ce90f066e0c88dec9b47f422
-master date: 2022-05-20 12:20:35 +0200
----
- xen/drivers/passthrough/pci.c | 19 ++++++++++++++++++-
- 1 file changed, 18 insertions(+), 1 deletion(-)
-
-diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
-index 395958698e6a..e0491c908f10 100644
---- a/xen/drivers/passthrough/pci.c
-+++ b/xen/drivers/passthrough/pci.c
-@@ -382,7 +382,24 @@ static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
-                          phantom_devs[i].slot == PCI_SLOT(devfn) &&
-                          phantom_devs[i].stride > PCI_FUNC(devfn) )
-                     {
--                        pdev->phantom_stride = phantom_devs[i].stride;
-+                        pci_sbdf_t sbdf = pdev->sbdf;
-+                        unsigned int stride = phantom_devs[i].stride;
-+
-+                        while ( (sbdf.fn += stride) > PCI_FUNC(devfn) )
-+                        {
-+                            if ( pci_conf_read16(sbdf, PCI_VENDOR_ID) == 0xffff &&
-+                                 pci_conf_read16(sbdf, PCI_DEVICE_ID) == 0xffff )
-+                                continue;
-+                            stride <<= 1;
-+                            printk(XENLOG_WARNING
-+                                   "%pp looks to be a real device; bumping %04x:%02x:%02x stride to %u\n",
-+                                   &sbdf, phantom_devs[i].seg,
-+                                   phantom_devs[i].bus, phantom_devs[i].slot,
-+                                   stride);
-+                            sbdf = pdev->sbdf;
-+                        }
-+                        if ( PCI_FUNC(stride) )
-+                           pdev->phantom_stride = stride;
-                         break;
-                     }
-             }
--- 
-2.35.1
-
diff --git a/0021-xen-sched-fix-cpu-hotplug.patch b/0021-xen-sched-fix-cpu-hotplug.patch
new file mode 100644
index 0000000..ea0b732
--- /dev/null
+++ b/0021-xen-sched-fix-cpu-hotplug.patch
@@ -0,0 +1,307 @@
+From 4f3204c2bc66db18c61600dd3e08bf1fd9584a1b Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 15:00:19 +0200
+Subject: [PATCH 21/26] xen/sched: fix cpu hotplug
+
+Cpu unplugging is calling schedule_cpu_rm() via stop_machine_run() with
+interrupts disabled, thus any memory allocation or freeing must be
+avoided.
+
+Since commit 5047cd1d5dea ("xen/common: Use enhanced
+ASSERT_ALLOC_CONTEXT in xmalloc()") this restriction is being enforced
+via an assertion, which will now fail.
+
+Fix this by allocating needed memory before entering stop_machine_run()
+and freeing any memory only after having finished stop_machine_run().
+
+Fixes: 1ec410112cdd ("xen/sched: support differing granularity in schedule_cpu_[add/rm]()")
+Reported-by: Gao Ruifeng <ruifeng.gao@intel.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: d84473689611eed32fd90b27e614f28af767fa3f
+master date: 2022-09-05 11:42:30 +0100
+---
+ xen/common/sched/core.c    | 25 +++++++++++---
+ xen/common/sched/cpupool.c | 69 +++++++++++++++++++++++++++++---------
+ xen/common/sched/private.h |  5 +--
+ 3 files changed, 77 insertions(+), 22 deletions(-)
+
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index 2decb1161a63..900aab8f66a7 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -3231,7 +3231,7 @@ out:
+  * by alloc_cpu_rm_data() is modified only in case the cpu in question is
+  * being moved from or to a cpupool.
+  */
+-struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu)
++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu, bool aff_alloc)
+ {
+     struct cpu_rm_data *data;
+     const struct sched_resource *sr;
+@@ -3244,6 +3244,17 @@ struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu)
+     if ( !data )
+         goto out;
+ 
++    if ( aff_alloc )
++    {
++        if ( !alloc_affinity_masks(&data->affinity) )
++        {
++            XFREE(data);
++            goto out;
++        }
++    }
++    else
++        memset(&data->affinity, 0, sizeof(data->affinity));
++
+     data->old_ops = sr->scheduler;
+     data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
+     data->ppriv_old = sr->sched_priv;
+@@ -3264,6 +3275,7 @@ struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu)
+         {
+             while ( idx > 0 )
+                 sched_res_free(&data->sr[--idx]->rcu);
++            free_affinity_masks(&data->affinity);
+             XFREE(data);
+             goto out;
+         }
+@@ -3286,6 +3298,7 @@ void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu)
+ {
+     sched_free_udata(mem->old_ops, mem->vpriv_old);
+     sched_free_pdata(mem->old_ops, mem->ppriv_old, cpu);
++    free_affinity_masks(&mem->affinity);
+ 
+     xfree(mem);
+ }
+@@ -3296,17 +3309,18 @@ void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu)
+  * The cpu is already marked as "free" and not valid any longer for its
+  * cpupool.
+  */
+-int schedule_cpu_rm(unsigned int cpu)
++int schedule_cpu_rm(unsigned int cpu, struct cpu_rm_data *data)
+ {
+     struct sched_resource *sr;
+-    struct cpu_rm_data *data;
+     struct sched_unit *unit;
+     spinlock_t *old_lock;
+     unsigned long flags;
+     int idx = 0;
+     unsigned int cpu_iter;
++    bool free_data = !data;
+ 
+-    data = alloc_cpu_rm_data(cpu);
++    if ( !data )
++        data = alloc_cpu_rm_data(cpu, false);
+     if ( !data )
+         return -ENOMEM;
+ 
+@@ -3374,7 +3388,8 @@ int schedule_cpu_rm(unsigned int cpu)
+     sched_deinit_pdata(data->old_ops, data->ppriv_old, cpu);
+ 
+     rcu_read_unlock(&sched_res_rculock);
+-    free_cpu_rm_data(data, cpu);
++    if ( free_data )
++        free_cpu_rm_data(data, cpu);
+ 
+     return 0;
+ }
+diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
+index 45b6ff99561a..b5a948639aad 100644
+--- a/xen/common/sched/cpupool.c
++++ b/xen/common/sched/cpupool.c
+@@ -402,22 +402,28 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c)
+ }
+ 
+ /* Update affinities of all domains in a cpupool. */
+-static void cpupool_update_node_affinity(const struct cpupool *c)
++static void cpupool_update_node_affinity(const struct cpupool *c,
++                                         struct affinity_masks *masks)
+ {
+-    struct affinity_masks masks;
++    struct affinity_masks local_masks;
+     struct domain *d;
+ 
+-    if ( !alloc_affinity_masks(&masks) )
+-        return;
++    if ( !masks )
++    {
++        if ( !alloc_affinity_masks(&local_masks) )
++            return;
++        masks = &local_masks;
++    }
+ 
+     rcu_read_lock(&domlist_read_lock);
+ 
+     for_each_domain_in_cpupool(d, c)
+-        domain_update_node_aff(d, &masks);
++        domain_update_node_aff(d, masks);
+ 
+     rcu_read_unlock(&domlist_read_lock);
+ 
+-    free_affinity_masks(&masks);
++    if ( masks == &local_masks )
++        free_affinity_masks(masks);
+ }
+ 
+ /*
+@@ -451,15 +457,17 @@ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
+ 
+     rcu_read_unlock(&sched_res_rculock);
+ 
+-    cpupool_update_node_affinity(c);
++    cpupool_update_node_affinity(c, NULL);
+ 
+     return 0;
+ }
+ 
+-static int cpupool_unassign_cpu_finish(struct cpupool *c)
++static int cpupool_unassign_cpu_finish(struct cpupool *c,
++                                       struct cpu_rm_data *mem)
+ {
+     int cpu = cpupool_moving_cpu;
+     const cpumask_t *cpus;
++    struct affinity_masks *masks = mem ? &mem->affinity : NULL;
+     int ret;
+ 
+     if ( c != cpupool_cpu_moving )
+@@ -482,7 +490,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
+      */
+     if ( !ret )
+     {
+-        ret = schedule_cpu_rm(cpu);
++        ret = schedule_cpu_rm(cpu, mem);
+         if ( ret )
+             cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus);
+         else
+@@ -494,7 +502,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
+     }
+     rcu_read_unlock(&sched_res_rculock);
+ 
+-    cpupool_update_node_affinity(c);
++    cpupool_update_node_affinity(c, masks);
+ 
+     return ret;
+ }
+@@ -558,7 +566,7 @@ static long cpupool_unassign_cpu_helper(void *info)
+                       cpupool_cpu_moving->cpupool_id, cpupool_moving_cpu);
+     spin_lock(&cpupool_lock);
+ 
+-    ret = cpupool_unassign_cpu_finish(c);
++    ret = cpupool_unassign_cpu_finish(c, NULL);
+ 
+     spin_unlock(&cpupool_lock);
+     debugtrace_printk("cpupool_unassign_cpu ret=%ld\n", ret);
+@@ -701,7 +709,7 @@ static int cpupool_cpu_add(unsigned int cpu)
+  * This function is called in stop_machine context, so we can be sure no
+  * non-idle vcpu is active on the system.
+  */
+-static void cpupool_cpu_remove(unsigned int cpu)
++static void cpupool_cpu_remove(unsigned int cpu, struct cpu_rm_data *mem)
+ {
+     int ret;
+ 
+@@ -709,7 +717,7 @@ static void cpupool_cpu_remove(unsigned int cpu)
+ 
+     if ( !cpumask_test_cpu(cpu, &cpupool_free_cpus) )
+     {
+-        ret = cpupool_unassign_cpu_finish(cpupool0);
++        ret = cpupool_unassign_cpu_finish(cpupool0, mem);
+         BUG_ON(ret);
+     }
+     cpumask_clear_cpu(cpu, &cpupool_free_cpus);
+@@ -775,7 +783,7 @@ static void cpupool_cpu_remove_forced(unsigned int cpu)
+         {
+             ret = cpupool_unassign_cpu_start(c, master_cpu);
+             BUG_ON(ret);
+-            ret = cpupool_unassign_cpu_finish(c);
++            ret = cpupool_unassign_cpu_finish(c, NULL);
+             BUG_ON(ret);
+         }
+     }
+@@ -993,12 +1001,24 @@ void dump_runq(unsigned char key)
+ static int cpu_callback(
+     struct notifier_block *nfb, unsigned long action, void *hcpu)
+ {
++    static struct cpu_rm_data *mem;
++
+     unsigned int cpu = (unsigned long)hcpu;
+     int rc = 0;
+ 
+     switch ( action )
+     {
+     case CPU_DOWN_FAILED:
++        if ( system_state <= SYS_STATE_active )
++        {
++            if ( mem )
++            {
++                free_cpu_rm_data(mem, cpu);
++                mem = NULL;
++            }
++            rc = cpupool_cpu_add(cpu);
++        }
++        break;
+     case CPU_ONLINE:
+         if ( system_state <= SYS_STATE_active )
+             rc = cpupool_cpu_add(cpu);
+@@ -1006,12 +1026,31 @@ static int cpu_callback(
+     case CPU_DOWN_PREPARE:
+         /* Suspend/Resume don't change assignments of cpus to cpupools. */
+         if ( system_state <= SYS_STATE_active )
++        {
+             rc = cpupool_cpu_remove_prologue(cpu);
++            if ( !rc )
++            {
++                ASSERT(!mem);
++                mem = alloc_cpu_rm_data(cpu, true);
++                rc = mem ? 0 : -ENOMEM;
++            }
++        }
+         break;
+     case CPU_DYING:
+         /* Suspend/Resume don't change assignments of cpus to cpupools. */
+         if ( system_state <= SYS_STATE_active )
+-            cpupool_cpu_remove(cpu);
++        {
++            ASSERT(mem);
++            cpupool_cpu_remove(cpu, mem);
++        }
++        break;
++    case CPU_DEAD:
++        if ( system_state <= SYS_STATE_active )
++        {
++            ASSERT(mem);
++            free_cpu_rm_data(mem, cpu);
++            mem = NULL;
++        }
+         break;
+     case CPU_RESUME_FAILED:
+         cpupool_cpu_remove_forced(cpu);
+diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
+index e286849a1312..0126a4bb9ed3 100644
+--- a/xen/common/sched/private.h
++++ b/xen/common/sched/private.h
+@@ -603,6 +603,7 @@ void free_affinity_masks(struct affinity_masks *affinity);
+ 
+ /* Memory allocation related data for schedule_cpu_rm(). */
+ struct cpu_rm_data {
++    struct affinity_masks affinity;
+     const struct scheduler *old_ops;
+     void *ppriv_old;
+     void *vpriv_old;
+@@ -617,9 +618,9 @@ struct scheduler *scheduler_alloc(unsigned int sched_id);
+ void scheduler_free(struct scheduler *sched);
+ int cpu_disable_scheduler(unsigned int cpu);
+ int schedule_cpu_add(unsigned int cpu, struct cpupool *c);
+-struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu);
++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu, bool aff_alloc);
+ void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu);
+-int schedule_cpu_rm(unsigned int cpu);
++int schedule_cpu_rm(unsigned int cpu, struct cpu_rm_data *mem);
+ int sched_move_domain(struct domain *d, struct cpupool *c);
+ struct cpupool *cpupool_get_by_id(unsigned int poolid);
+ void cpupool_put(struct cpupool *pool);
+-- 
+2.37.3
+
diff --git a/0022-Config.mk-correct-PIE-related-option-s-in-EMBEDDED_E.patch b/0022-Config.mk-correct-PIE-related-option-s-in-EMBEDDED_E.patch
new file mode 100644
index 0000000..03f485a
--- /dev/null
+++ b/0022-Config.mk-correct-PIE-related-option-s-in-EMBEDDED_E.patch
@@ -0,0 +1,58 @@
+From 2b694dd2932be78431b14257f23b738f2fc8f6a1 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 15:00:33 +0200
+Subject: [PATCH 22/26] Config.mk: correct PIE-related option(s) in
+ EMBEDDED_EXTRA_CFLAGS
+
+I haven't been able to find evidence of "-nopie" ever having been a
+supported compiler option. The correct spelling is "-no-pie".
+Furthermore like "-pie" this is an option which is solely passed to the
+linker. The compiler only recognizes "-fpie" / "-fPIE" / "-fno-pie", and
+it doesn't infer these options from "-pie" / "-no-pie".
+
+Add the compiler recognized form, but for the possible case of the
+variable also being used somewhere for linking keep the linker option as
+well (with corrected spelling).
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+
+Build: Drop -no-pie from EMBEDDED_EXTRA_CFLAGS
+
+This breaks all Clang builds, as demostrated by Gitlab CI.
+
+Contrary to the description in ecd6b9759919, -no-pie is not even an option
+passed to the linker.  GCC's actual behaviour is to inhibit the passing of
+-pie to the linker, as well as selecting different cr0 artefacts to be linked.
+
+EMBEDDED_EXTRA_CFLAGS is not used for $(CC)-doing-linking, and not liable to
+gain such a usecase.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+Tested-by: Stefano Stabellini <sstabellini@kernel.org>
+Fixes: ecd6b9759919 ("Config.mk: correct PIE-related option(s) in EMBEDDED_EXTRA_CFLAGS")
+master commit: ecd6b9759919fa6335b0be1b5fc5cce29a30c4f1
+master date: 2022-09-08 09:25:26 +0200
+master commit: 13a7c0074ac8fb31f6c0485429b7a20a1946cb22
+master date: 2022-09-27 15:40:42 -0700
+---
+ Config.mk | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/Config.mk b/Config.mk
+index 46de3cd1e0e1..6f95067b8de6 100644
+--- a/Config.mk
++++ b/Config.mk
+@@ -197,7 +197,7 @@ endif
+ APPEND_LDFLAGS += $(foreach i, $(APPEND_LIB), -L$(i))
+ APPEND_CFLAGS += $(foreach i, $(APPEND_INCLUDES), -I$(i))
+ 
+-EMBEDDED_EXTRA_CFLAGS := -nopie -fno-stack-protector -fno-stack-protector-all
++EMBEDDED_EXTRA_CFLAGS := -fno-pie -fno-stack-protector -fno-stack-protector-all
+ EMBEDDED_EXTRA_CFLAGS += -fno-exceptions -fno-asynchronous-unwind-tables
+ 
+ XEN_EXTFILES_URL ?= http://xenbits.xen.org/xen-extfiles
+-- 
+2.37.3
+
diff --git a/0022-x86-pv-Clean-up-_get_page_type.patch b/0022-x86-pv-Clean-up-_get_page_type.patch
deleted file mode 100644
index 0270beb..0000000
--- a/0022-x86-pv-Clean-up-_get_page_type.patch
+++ /dev/null
@@ -1,180 +0,0 @@
-From b152dfbc3ad71a788996440b18174d995c3bffc9 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 9 Jun 2022 15:27:19 +0200
-Subject: [PATCH 22/51] x86/pv: Clean up _get_page_type()
-
-Various fixes for clarity, ahead of making complicated changes.
-
- * Split the overflow check out of the if/else chain for type handling, as
-   it's somewhat unrelated.
- * Comment the main if/else chain to explain what is going on.  Adjust one
-   ASSERT() and state the bit layout for validate-locked and partial states.
- * Correct the comment about TLB flushing, as it's backwards.  The problem
-   case is when writeable mappings are retained to a page becoming read-only,
-   as it allows the guest to bypass Xen's safety checks for updates.
- * Reduce the scope of 'y'.  It is an artefact of the cmpxchg loop and not
-   valid for use by subsequent logic.  Switch to using ACCESS_ONCE() to treat
-   all reads as explicitly volatile.  The only thing preventing the validated
-   wait-loop being infinite is the compiler barrier hidden in cpu_relax().
- * Replace one page_get_owner(page) with the already-calculated 'd' already in
-   scope.
-
-No functional change.
-
-This is part of XSA-401 / CVE-2022-26362.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: George Dunlap <george.dunlap@citrix.com>
-master commit: 9186e96b199e4f7e52e033b238f9fe869afb69c7
-master date: 2022-06-09 14:20:36 +0200
----
- xen/arch/x86/mm.c | 72 +++++++++++++++++++++++++++++++++++++++--------
- 1 file changed, 61 insertions(+), 11 deletions(-)
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index 4ee2de11051d..79ad7fdd2b82 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -2906,16 +2906,17 @@ static int _put_page_type(struct page_info *page, unsigned int flags,
- static int _get_page_type(struct page_info *page, unsigned long type,
-                           bool preemptible)
- {
--    unsigned long nx, x, y = page->u.inuse.type_info;
-+    unsigned long nx, x;
-     int rc = 0;
- 
-     ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
-     ASSERT(!in_irq());
- 
--    for ( ; ; )
-+    for ( unsigned long y = ACCESS_ONCE(page->u.inuse.type_info); ; )
-     {
-         x  = y;
-         nx = x + 1;
-+
-         if ( unlikely((nx & PGT_count_mask) == 0) )
-         {
-             gdprintk(XENLOG_WARNING,
-@@ -2923,8 +2924,15 @@ static int _get_page_type(struct page_info *page, unsigned long type,
-                      mfn_x(page_to_mfn(page)));
-             return -EINVAL;
-         }
--        else if ( unlikely((x & PGT_count_mask) == 0) )
-+
-+        if ( unlikely((x & PGT_count_mask) == 0) )
-         {
-+            /*
-+             * Typeref 0 -> 1.
-+             *
-+             * Type changes are permitted when the typeref is 0.  If the type
-+             * actually changes, the page needs re-validating.
-+             */
-             struct domain *d = page_get_owner(page);
- 
-             if ( d && shadow_mode_enabled(d) )
-@@ -2935,8 +2943,8 @@ static int _get_page_type(struct page_info *page, unsigned long type,
-             {
-                 /*
-                  * On type change we check to flush stale TLB entries. It is
--                 * vital that no other CPUs are left with mappings of a frame
--                 * which is about to become writeable to the guest.
-+                 * vital that no other CPUs are left with writeable mappings
-+                 * to a frame which is intending to become pgtable/segdesc.
-                  */
-                 cpumask_t *mask = this_cpu(scratch_cpumask);
- 
-@@ -2948,7 +2956,7 @@ static int _get_page_type(struct page_info *page, unsigned long type,
- 
-                 if ( unlikely(!cpumask_empty(mask)) &&
-                      /* Shadow mode: track only writable pages. */
--                     (!shadow_mode_enabled(page_get_owner(page)) ||
-+                     (!shadow_mode_enabled(d) ||
-                       ((nx & PGT_type_mask) == PGT_writable_page)) )
-                 {
-                     perfc_incr(need_flush_tlb_flush);
-@@ -2979,7 +2987,14 @@ static int _get_page_type(struct page_info *page, unsigned long type,
-         }
-         else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
-         {
--            /* Don't log failure if it could be a recursive-mapping attempt. */
-+            /*
-+             * else, we're trying to take a new reference, of the wrong type.
-+             *
-+             * This (being able to prohibit use of the wrong type) is what the
-+             * typeref system exists for, but skip printing the failure if it
-+             * looks like a recursive mapping, as subsequent logic might
-+             * ultimately permit the attempt.
-+             */
-             if ( ((x & PGT_type_mask) == PGT_l2_page_table) &&
-                  (type == PGT_l1_page_table) )
-                 return -EINVAL;
-@@ -2998,18 +3013,46 @@ static int _get_page_type(struct page_info *page, unsigned long type,
-         }
-         else if ( unlikely(!(x & PGT_validated)) )
-         {
-+            /*
-+             * else, the count is non-zero, and we're grabbing the right type;
-+             * but the page hasn't been validated yet.
-+             *
-+             * The page is in one of two states (depending on PGT_partial),
-+             * and should have exactly one reference.
-+             */
-+            ASSERT((x & (PGT_type_mask | PGT_count_mask)) == (type | 1));
-+
-             if ( !(x & PGT_partial) )
-             {
--                /* Someone else is updating validation of this page. Wait... */
-+                /*
-+                 * The page has been left in the "validate locked" state
-+                 * (i.e. PGT_[type] | 1) which means that a concurrent caller
-+                 * of _get_page_type() is in the middle of validation.
-+                 *
-+                 * Spin waiting for the concurrent user to complete (partial
-+                 * or fully validated), then restart our attempt to acquire a
-+                 * type reference.
-+                 */
-                 do {
-                     if ( preemptible && hypercall_preempt_check() )
-                         return -EINTR;
-                     cpu_relax();
--                } while ( (y = page->u.inuse.type_info) == x );
-+                } while ( (y = ACCESS_ONCE(page->u.inuse.type_info)) == x );
-                 continue;
-             }
--            /* Type ref count was left at 1 when PGT_partial got set. */
--            ASSERT((x & PGT_count_mask) == 1);
-+
-+            /*
-+             * The page has been left in the "partial" state
-+             * (i.e., PGT_[type] | PGT_partial | 1).
-+             *
-+             * Rather than bumping the type count, we need to try to grab the
-+             * validation lock; if we succeed, we need to validate the page,
-+             * then drop the general ref associated with the PGT_partial bit.
-+             *
-+             * We grab the validation lock by setting nx to (PGT_[type] | 1)
-+             * (i.e., non-zero type count, neither PGT_validated nor
-+             * PGT_partial set).
-+             */
-             nx = x & ~PGT_partial;
-         }
- 
-@@ -3058,6 +3101,13 @@ static int _get_page_type(struct page_info *page, unsigned long type,
-     }
- 
-  out:
-+    /*
-+     * Did we drop the PGT_partial bit when acquiring the typeref?  If so,
-+     * drop the general reference that went along with it.
-+     *
-+     * N.B. validate_page() may have have re-set PGT_partial, not reflected in
-+     * nx, but will have taken an extra ref when doing so.
-+     */
-     if ( (x & PGT_partial) && !(nx & PGT_partial) )
-         put_page(page);
- 
--- 
-2.35.1
-
diff --git a/0023-tools-xenstore-minor-fix-of-the-migration-stream-doc.patch b/0023-tools-xenstore-minor-fix-of-the-migration-stream-doc.patch
new file mode 100644
index 0000000..45f7509
--- /dev/null
+++ b/0023-tools-xenstore-minor-fix-of-the-migration-stream-doc.patch
@@ -0,0 +1,41 @@
+From 49510071ee93905378e54664778760ed3908d447 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 15:00:59 +0200
+Subject: [PATCH 23/26] tools/xenstore: minor fix of the migration stream doc
+
+Drop mentioning the non-existent read-only socket in the migration
+stream description document.
+
+The related record field was removed in commit 8868a0e3f674 ("docs:
+update the xenstore migration stream documentation).
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+master commit: ace1d2eff80d3d66c37ae765dae3e3cb5697e5a4
+master date: 2022-09-08 09:25:58 +0200
+---
+ docs/designs/xenstore-migration.md | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/docs/designs/xenstore-migration.md b/docs/designs/xenstore-migration.md
+index 5f1155273ec3..78530bbb0ef4 100644
+--- a/docs/designs/xenstore-migration.md
++++ b/docs/designs/xenstore-migration.md
+@@ -129,11 +129,9 @@ xenstored state that needs to be restored.
+ | `evtchn-fd`    | The file descriptor used to communicate with |
+ |                | the event channel driver                     |
+ 
+-xenstored will resume in the original process context. Hence `rw-socket-fd` and
+-`ro-socket-fd` simply specify the file descriptors of the sockets. Sockets
+-are not always used, however, and so -1 will be used to denote an unused
+-socket.
+-
++xenstored will resume in the original process context. Hence `rw-socket-fd`
++simply specifies the file descriptor of the socket. Sockets are not always
++used, however, and so -1 will be used to denote an unused socket.
+ 
+ \pagebreak
+ 
+-- 
+2.37.3
+
diff --git a/0023-x86-pv-Fix-ABAC-cmpxchg-race-in-_get_page_type.patch b/0023-x86-pv-Fix-ABAC-cmpxchg-race-in-_get_page_type.patch
deleted file mode 100644
index 1e3febd..0000000
--- a/0023-x86-pv-Fix-ABAC-cmpxchg-race-in-_get_page_type.patch
+++ /dev/null
@@ -1,201 +0,0 @@
-From 8dab3f79b122e69cbcdebca72cdc14f004ee2193 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 9 Jun 2022 15:27:37 +0200
-Subject: [PATCH 23/51] x86/pv: Fix ABAC cmpxchg() race in _get_page_type()
-
-_get_page_type() suffers from a race condition where it incorrectly assumes
-that because 'x' was read and a subsequent a cmpxchg() succeeds, the type
-cannot have changed in-between.  Consider:
-
-CPU A:
-  1. Creates an L2e referencing pg
-     `-> _get_page_type(pg, PGT_l1_page_table), sees count 0, type PGT_writable_page
-  2.     Issues flush_tlb_mask()
-CPU B:
-  3. Creates a writeable mapping of pg
-     `-> _get_page_type(pg, PGT_writable_page), count increases to 1
-  4. Writes into new mapping, creating a TLB entry for pg
-  5. Removes the writeable mapping of pg
-     `-> _put_page_type(pg), count goes back down to 0
-CPU A:
-  7.     Issues cmpxchg(), setting count 1, type PGT_l1_page_table
-
-CPU B now has a writeable mapping to pg, which Xen believes is a pagetable and
-suitably protected (i.e. read-only).  The TLB flush in step 2 must be deferred
-until after the guest is prohibited from creating new writeable mappings,
-which is after step 7.
-
-Defer all safety actions until after the cmpxchg() has successfully taken the
-intended typeref, because that is what prevents concurrent users from using
-the old type.
-
-Also remove the early validation for writeable and shared pages.  This removes
-race conditions where one half of a parallel mapping attempt can return
-successfully before:
- * The IOMMU pagetables are in sync with the new page type
- * Writeable mappings to shared pages have been torn down
-
-This is part of XSA-401 / CVE-2022-26362.
-
-Reported-by: Jann Horn <jannh@google.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: George Dunlap <george.dunlap@citrix.com>
-master commit: 8cc5036bc385112a82f1faff27a0970e6440dfed
-master date: 2022-06-09 14:21:04 +0200
----
- xen/arch/x86/mm.c | 116 ++++++++++++++++++++++++++--------------------
- 1 file changed, 67 insertions(+), 49 deletions(-)
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index 79ad7fdd2b82..c6429b0f749a 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -2933,56 +2933,12 @@ static int _get_page_type(struct page_info *page, unsigned long type,
-              * Type changes are permitted when the typeref is 0.  If the type
-              * actually changes, the page needs re-validating.
-              */
--            struct domain *d = page_get_owner(page);
--
--            if ( d && shadow_mode_enabled(d) )
--               shadow_prepare_page_type_change(d, page, type);
- 
-             ASSERT(!(x & PGT_pae_xen_l2));
-             if ( (x & PGT_type_mask) != type )
-             {
--                /*
--                 * On type change we check to flush stale TLB entries. It is
--                 * vital that no other CPUs are left with writeable mappings
--                 * to a frame which is intending to become pgtable/segdesc.
--                 */
--                cpumask_t *mask = this_cpu(scratch_cpumask);
--
--                BUG_ON(in_irq());
--                cpumask_copy(mask, d->dirty_cpumask);
--
--                /* Don't flush if the timestamp is old enough */
--                tlbflush_filter(mask, page->tlbflush_timestamp);
--
--                if ( unlikely(!cpumask_empty(mask)) &&
--                     /* Shadow mode: track only writable pages. */
--                     (!shadow_mode_enabled(d) ||
--                      ((nx & PGT_type_mask) == PGT_writable_page)) )
--                {
--                    perfc_incr(need_flush_tlb_flush);
--                    /*
--                     * If page was a page table make sure the flush is
--                     * performed using an IPI in order to avoid changing the
--                     * type of a page table page under the feet of
--                     * spurious_page_fault().
--                     */
--                    flush_mask(mask,
--                               (x & PGT_type_mask) &&
--                               (x & PGT_type_mask) <= PGT_root_page_table
--                               ? FLUSH_TLB | FLUSH_FORCE_IPI
--                               : FLUSH_TLB);
--                }
--
--                /* We lose existing type and validity. */
-                 nx &= ~(PGT_type_mask | PGT_validated);
-                 nx |= type;
--
--                /*
--                 * No special validation needed for writable pages.
--                 * Page tables and GDT/LDT need to be scanned for validity.
--                 */
--                if ( type == PGT_writable_page || type == PGT_shared_page )
--                    nx |= PGT_validated;
-             }
-         }
-         else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
-@@ -3063,6 +3019,56 @@ static int _get_page_type(struct page_info *page, unsigned long type,
-             return -EINTR;
-     }
- 
-+    /*
-+     * One typeref has been taken and is now globally visible.
-+     *
-+     * The page is either in the "validate locked" state (PGT_[type] | 1) or
-+     * fully validated (PGT_[type] | PGT_validated | >0).
-+     */
-+
-+    if ( unlikely((x & PGT_count_mask) == 0) )
-+    {
-+        struct domain *d = page_get_owner(page);
-+
-+        if ( d && shadow_mode_enabled(d) )
-+            shadow_prepare_page_type_change(d, page, type);
-+
-+        if ( (x & PGT_type_mask) != type )
-+        {
-+            /*
-+             * On type change we check to flush stale TLB entries. It is
-+             * vital that no other CPUs are left with writeable mappings
-+             * to a frame which is intending to become pgtable/segdesc.
-+             */
-+            cpumask_t *mask = this_cpu(scratch_cpumask);
-+
-+            BUG_ON(in_irq());
-+            cpumask_copy(mask, d->dirty_cpumask);
-+
-+            /* Don't flush if the timestamp is old enough */
-+            tlbflush_filter(mask, page->tlbflush_timestamp);
-+
-+            if ( unlikely(!cpumask_empty(mask)) &&
-+                 /* Shadow mode: track only writable pages. */
-+                 (!shadow_mode_enabled(d) ||
-+                  ((nx & PGT_type_mask) == PGT_writable_page)) )
-+            {
-+                perfc_incr(need_flush_tlb_flush);
-+                /*
-+                 * If page was a page table make sure the flush is
-+                 * performed using an IPI in order to avoid changing the
-+                 * type of a page table page under the feet of
-+                 * spurious_page_fault().
-+                 */
-+                flush_mask(mask,
-+                           (x & PGT_type_mask) &&
-+                           (x & PGT_type_mask) <= PGT_root_page_table
-+                           ? FLUSH_TLB | FLUSH_FORCE_IPI
-+                           : FLUSH_TLB);
-+            }
-+        }
-+    }
-+
-     if ( unlikely(((x & PGT_type_mask) == PGT_writable_page) !=
-                   (type == PGT_writable_page)) )
-     {
-@@ -3091,13 +3097,25 @@ static int _get_page_type(struct page_info *page, unsigned long type,
- 
-     if ( unlikely(!(nx & PGT_validated)) )
-     {
--        if ( !(x & PGT_partial) )
-+        /*
-+         * No special validation needed for writable or shared pages.  Page
-+         * tables and GDT/LDT need to have their contents audited.
-+         *
-+         * per validate_page(), non-atomic updates are fine here.
-+         */
-+        if ( type == PGT_writable_page || type == PGT_shared_page )
-+            page->u.inuse.type_info |= PGT_validated;
-+        else
-         {
--            page->nr_validated_ptes = 0;
--            page->partial_flags = 0;
--            page->linear_pt_count = 0;
-+            if ( !(x & PGT_partial) )
-+            {
-+                page->nr_validated_ptes = 0;
-+                page->partial_flags = 0;
-+                page->linear_pt_count = 0;
-+            }
-+
-+            rc = validate_page(page, type, preemptible);
-         }
--        rc = validate_page(page, type, preemptible);
-     }
- 
-  out:
--- 
-2.35.1
-
diff --git a/0024-x86-page-Introduce-_PAGE_-constants-for-memory-types.patch b/0024-x86-page-Introduce-_PAGE_-constants-for-memory-types.patch
deleted file mode 100644
index 409b72f..0000000
--- a/0024-x86-page-Introduce-_PAGE_-constants-for-memory-types.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From 9cfd796ae05421ded8e4f70b2c55352491cfa841 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 9 Jun 2022 15:27:53 +0200
-Subject: [PATCH 24/51] x86/page: Introduce _PAGE_* constants for memory types
-
-... rather than opencoding the PAT/PCD/PWT attributes in __PAGE_HYPERVISOR_*
-constants.  These are going to be needed by forthcoming logic.
-
-No functional change.
-
-This is part of XSA-402.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 1be8707c75bf4ba68447c74e1618b521dd432499
-master date: 2022-06-09 14:21:38 +0200
----
- xen/include/asm-x86/page.h | 12 ++++++++++--
- 1 file changed, 10 insertions(+), 2 deletions(-)
-
-diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
-index 1d080cffbe84..2e542050f65a 100644
---- a/xen/include/asm-x86/page.h
-+++ b/xen/include/asm-x86/page.h
-@@ -331,6 +331,14 @@ void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t);
- 
- #define PAGE_CACHE_ATTRS (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
- 
-+/* Memory types, encoded under Xen's choice of MSR_PAT. */
-+#define _PAGE_WB         (                                0)
-+#define _PAGE_WT         (                        _PAGE_PWT)
-+#define _PAGE_UCM        (            _PAGE_PCD            )
-+#define _PAGE_UC         (            _PAGE_PCD | _PAGE_PWT)
-+#define _PAGE_WC         (_PAGE_PAT                        )
-+#define _PAGE_WP         (_PAGE_PAT |             _PAGE_PWT)
-+
- /*
-  * Debug option: Ensure that granted mappings are not implicitly unmapped.
-  * WARNING: This will need to be disabled to run OSes that use the spare PTE
-@@ -349,8 +357,8 @@ void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t);
- #define __PAGE_HYPERVISOR_RX      (_PAGE_PRESENT | _PAGE_ACCESSED)
- #define __PAGE_HYPERVISOR         (__PAGE_HYPERVISOR_RX | \
-                                    _PAGE_DIRTY | _PAGE_RW)
--#define __PAGE_HYPERVISOR_UCMINUS (__PAGE_HYPERVISOR | _PAGE_PCD)
--#define __PAGE_HYPERVISOR_UC      (__PAGE_HYPERVISOR | _PAGE_PCD | _PAGE_PWT)
-+#define __PAGE_HYPERVISOR_UCMINUS (__PAGE_HYPERVISOR | _PAGE_UCM)
-+#define __PAGE_HYPERVISOR_UC      (__PAGE_HYPERVISOR | _PAGE_UC)
- #define __PAGE_HYPERVISOR_SHSTK   (__PAGE_HYPERVISOR_RO | _PAGE_DIRTY)
- 
- #define MAP_SMALL_PAGES _PAGE_AVAIL0 /* don't use superpages mappings */
--- 
-2.35.1
-
diff --git a/0024-xen-gnttab-fix-gnttab_acquire_resource.patch b/0024-xen-gnttab-fix-gnttab_acquire_resource.patch
new file mode 100644
index 0000000..898503f
--- /dev/null
+++ b/0024-xen-gnttab-fix-gnttab_acquire_resource.patch
@@ -0,0 +1,69 @@
+From b9560762392c01b3ee84148c07be8017cb42dbc9 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 15:01:22 +0200
+Subject: [PATCH 24/26] xen/gnttab: fix gnttab_acquire_resource()
+
+Commit 9dc46386d89d ("gnttab: work around "may be used uninitialized"
+warning") was wrong, as vaddrs can legitimately be NULL in case
+XENMEM_resource_grant_table_id_status was specified for a grant table
+v1. This would result in crashes in debug builds due to
+ASSERT_UNREACHABLE() triggering.
+
+Check vaddrs only to be NULL in the rc == 0 case.
+
+Expand the tests in tools/tests/resource to tickle this path, and verify that
+using XENMEM_resource_grant_table_id_status on a v1 grant table fails.
+
+Fixes: 9dc46386d89d ("gnttab: work around "may be used uninitialized" warning")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com> # xen
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 52daa6a8483e4fbd6757c9d1b791e23931791608
+master date: 2022-09-09 16:28:38 +0100
+---
+ tools/tests/resource/test-resource.c | 15 +++++++++++++++
+ xen/common/grant_table.c             |  2 +-
+ 2 files changed, 16 insertions(+), 1 deletion(-)
+
+diff --git a/tools/tests/resource/test-resource.c b/tools/tests/resource/test-resource.c
+index 0557f8a1b585..37dfff4dcd20 100644
+--- a/tools/tests/resource/test-resource.c
++++ b/tools/tests/resource/test-resource.c
+@@ -106,6 +106,21 @@ static void test_gnttab(uint32_t domid, unsigned int nr_frames,
+     if ( rc )
+         return fail("    Fail: Unmap grant table %d - %s\n",
+                     errno, strerror(errno));
++
++    /*
++     * Verify that an attempt to map the status frames fails, as the domain is
++     * in gnttab v1 mode.
++     */
++    res = xenforeignmemory_map_resource(
++        fh, domid, XENMEM_resource_grant_table,
++        XENMEM_resource_grant_table_id_status, 0, 1,
++        (void **)&gnttab, PROT_READ | PROT_WRITE, 0);
++
++    if ( res )
++    {
++        fail("    Fail: Managed to map gnttab v2 status frames in v1 mode\n");
++        xenforeignmemory_unmap_resource(fh, res);
++    }
+ }
+ 
+ static void test_domain_configurations(void)
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index d8ca645b96ff..76272b3c8add 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -4142,7 +4142,7 @@ int gnttab_acquire_resource(
+      * on non-error paths, and hence it needs setting to NULL at the top of the
+      * function.  Leave some runtime safety.
+      */
+-    if ( !vaddrs )
++    if ( !rc && !vaddrs )
+     {
+         ASSERT_UNREACHABLE();
+         rc = -ENODATA;
+-- 
+2.37.3
+
diff --git a/0025-x86-Don-t-change-the-cacheability-of-the-directmap.patch b/0025-x86-Don-t-change-the-cacheability-of-the-directmap.patch
deleted file mode 100644
index 0a24a0a..0000000
--- a/0025-x86-Don-t-change-the-cacheability-of-the-directmap.patch
+++ /dev/null
@@ -1,223 +0,0 @@
-From 74193f4292d9cfc2874866e941d9939d8f33fcef Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 9 Jun 2022 15:28:23 +0200
-Subject: [PATCH 25/51] x86: Don't change the cacheability of the directmap
-
-Changeset 55f97f49b7ce ("x86: Change cache attributes of Xen 1:1 page mappings
-in response to guest mapping requests") attempted to keep the cacheability
-consistent between different mappings of the same page.
-
-The reason wasn't described in the changelog, but it is understood to be in
-regards to a concern over machine check exceptions, owing to errata when using
-mixed cacheabilities.  It did this primarily by updating Xen's mapping of the
-page in the direct map when the guest mapped a page with reduced cacheability.
-
-Unfortunately, the logic didn't actually prevent mixed cacheability from
-occurring:
- * A guest could map a page normally, and then map the same page with
-   different cacheability; nothing prevented this.
- * The cacheability of the directmap was always latest-takes-precedence in
-   terms of guest requests.
- * Grant-mapped frames with lesser cacheability didn't adjust the page's
-   cacheattr settings.
- * The map_domain_page() function still unconditionally created WB mappings,
-   irrespective of the page's cacheattr settings.
-
-Additionally, update_xen_mappings() had a bug where the alias calculation was
-wrong for mfn's which were .init content, which should have been treated as
-fully guest pages, not Xen pages.
-
-Worse yet, the logic introduced a vulnerability whereby necessary
-pagetable/segdesc adjustments made by Xen in the validation logic could become
-non-coherent between the cache and main memory.  The CPU could subsequently
-operate on the stale value in the cache, rather than the safe value in main
-memory.
-
-The directmap contains primarily mappings of RAM.  PAT/MTRR conflict
-resolution is asymmetric, and generally for MTRR=WB ranges, PAT of lesser
-cacheability resolves to being coherent.  The special case is WC mappings,
-which are non-coherent against MTRR=WB regions (except for fully-coherent
-CPUs).
-
-Xen must not have any WC cacheability in the directmap, to prevent Xen's
-actions from creating non-coherency.  (Guest actions creating non-coherency is
-dealt with in subsequent patches.)  As all memory types for MTRR=WB ranges
-inter-operate coherently, so leave Xen's directmap mappings as WB.
-
-Only PV guests with access to devices can use reduced-cacheability mappings to
-begin with, and they're trusted not to mount DoSs against the system anyway.
-
-Drop PGC_cacheattr_{base,mask} entirely, and the logic to manipulate them.
-Shift the later PGC_* constants up, to gain 3 extra bits in the main reference
-count.  Retain the check in get_page_from_l1e() for special_pages() because a
-guest has no business using reduced cacheability on these.
-
-This reverts changeset 55f97f49b7ce6c3520c555d19caac6cf3f9a5df0
-
-This is CVE-2022-26363, part of XSA-402.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: George Dunlap <george.dunlap@citrix.com>
-master commit: ae09597da34aee6bc5b76475c5eea6994457e854
-master date: 2022-06-09 14:22:08 +0200
----
- xen/arch/x86/mm.c        | 84 ++++------------------------------------
- xen/include/asm-x86/mm.h | 23 +++++------
- 2 files changed, 17 insertions(+), 90 deletions(-)
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index c6429b0f749a..ab32d13a1a0d 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -783,28 +783,6 @@ bool is_iomem_page(mfn_t mfn)
-     return (page_get_owner(page) == dom_io);
- }
- 
--static int update_xen_mappings(unsigned long mfn, unsigned int cacheattr)
--{
--    int err = 0;
--    bool alias = mfn >= PFN_DOWN(xen_phys_start) &&
--         mfn < PFN_UP(xen_phys_start + xen_virt_end - XEN_VIRT_START);
--    unsigned long xen_va =
--        XEN_VIRT_START + ((mfn - PFN_DOWN(xen_phys_start)) << PAGE_SHIFT);
--
--    if ( boot_cpu_has(X86_FEATURE_XEN_SELFSNOOP) )
--        return 0;
--
--    if ( unlikely(alias) && cacheattr )
--        err = map_pages_to_xen(xen_va, _mfn(mfn), 1, 0);
--    if ( !err )
--        err = map_pages_to_xen((unsigned long)mfn_to_virt(mfn), _mfn(mfn), 1,
--                     PAGE_HYPERVISOR | cacheattr_to_pte_flags(cacheattr));
--    if ( unlikely(alias) && !cacheattr && !err )
--        err = map_pages_to_xen(xen_va, _mfn(mfn), 1, PAGE_HYPERVISOR);
--
--    return err;
--}
--
- #ifndef NDEBUG
- struct mmio_emul_range_ctxt {
-     const struct domain *d;
-@@ -1009,47 +987,14 @@ get_page_from_l1e(
-         goto could_not_pin;
-     }
- 
--    if ( pte_flags_to_cacheattr(l1f) !=
--         ((page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base) )
-+    if ( (l1f & PAGE_CACHE_ATTRS) != _PAGE_WB && is_special_page(page) )
-     {
--        unsigned long x, nx, y = page->count_info;
--        unsigned long cacheattr = pte_flags_to_cacheattr(l1f);
--        int err;
--
--        if ( is_special_page(page) )
--        {
--            if ( write )
--                put_page_type(page);
--            put_page(page);
--            gdprintk(XENLOG_WARNING,
--                     "Attempt to change cache attributes of Xen heap page\n");
--            return -EACCES;
--        }
--
--        do {
--            x  = y;
--            nx = (x & ~PGC_cacheattr_mask) | (cacheattr << PGC_cacheattr_base);
--        } while ( (y = cmpxchg(&page->count_info, x, nx)) != x );
--
--        err = update_xen_mappings(mfn, cacheattr);
--        if ( unlikely(err) )
--        {
--            cacheattr = y & PGC_cacheattr_mask;
--            do {
--                x  = y;
--                nx = (x & ~PGC_cacheattr_mask) | cacheattr;
--            } while ( (y = cmpxchg(&page->count_info, x, nx)) != x );
--
--            if ( write )
--                put_page_type(page);
--            put_page(page);
--
--            gdprintk(XENLOG_WARNING, "Error updating mappings for mfn %" PRI_mfn
--                     " (pfn %" PRI_pfn ", from L1 entry %" PRIpte ") for d%d\n",
--                     mfn, get_gpfn_from_mfn(mfn),
--                     l1e_get_intpte(l1e), l1e_owner->domain_id);
--            return err;
--        }
-+        if ( write )
-+            put_page_type(page);
-+        put_page(page);
-+        gdprintk(XENLOG_WARNING,
-+                 "Attempt to change cache attributes of Xen heap page\n");
-+        return -EACCES;
-     }
- 
-     return 0;
-@@ -2467,24 +2412,9 @@ static int mod_l4_entry(l4_pgentry_t *pl4e,
-  */
- static int cleanup_page_mappings(struct page_info *page)
- {
--    unsigned int cacheattr =
--        (page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base;
-     int rc = 0;
-     unsigned long mfn = mfn_x(page_to_mfn(page));
- 
--    /*
--     * If we've modified xen mappings as a result of guest cache
--     * attributes, restore them to the "normal" state.
--     */
--    if ( unlikely(cacheattr) )
--    {
--        page->count_info &= ~PGC_cacheattr_mask;
--
--        BUG_ON(is_special_page(page));
--
--        rc = update_xen_mappings(mfn, 0);
--    }
--
-     /*
-      * If this may be in a PV domain's IOMMU, remove it.
-      *
-diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
-index cb9052749963..8a9a43bb0a9d 100644
---- a/xen/include/asm-x86/mm.h
-+++ b/xen/include/asm-x86/mm.h
-@@ -69,25 +69,22 @@
-  /* Set when is using a page as a page table */
- #define _PGC_page_table   PG_shift(3)
- #define PGC_page_table    PG_mask(1, 3)
-- /* 3-bit PAT/PCD/PWT cache-attribute hint. */
--#define PGC_cacheattr_base PG_shift(6)
--#define PGC_cacheattr_mask PG_mask(7, 6)
-  /* Page is broken? */
--#define _PGC_broken       PG_shift(7)
--#define PGC_broken        PG_mask(1, 7)
-+#define _PGC_broken       PG_shift(4)
-+#define PGC_broken        PG_mask(1, 4)
-  /* Mutually-exclusive page states: { inuse, offlining, offlined, free }. */
--#define PGC_state         PG_mask(3, 9)
--#define PGC_state_inuse   PG_mask(0, 9)
--#define PGC_state_offlining PG_mask(1, 9)
--#define PGC_state_offlined PG_mask(2, 9)
--#define PGC_state_free    PG_mask(3, 9)
-+#define PGC_state           PG_mask(3, 6)
-+#define PGC_state_inuse     PG_mask(0, 6)
-+#define PGC_state_offlining PG_mask(1, 6)
-+#define PGC_state_offlined  PG_mask(2, 6)
-+#define PGC_state_free      PG_mask(3, 6)
- #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
- /* Page is not reference counted (see below for caveats) */
--#define _PGC_extra        PG_shift(10)
--#define PGC_extra         PG_mask(1, 10)
-+#define _PGC_extra        PG_shift(7)
-+#define PGC_extra         PG_mask(1, 7)
- 
- /* Count of references to this frame. */
--#define PGC_count_width   PG_shift(10)
-+#define PGC_count_width   PG_shift(7)
- #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
- 
- /*
--- 
-2.35.1
-
diff --git a/0025-x86-wire-up-VCPUOP_register_vcpu_time_memory_area-fo.patch b/0025-x86-wire-up-VCPUOP_register_vcpu_time_memory_area-fo.patch
new file mode 100644
index 0000000..849ef60
--- /dev/null
+++ b/0025-x86-wire-up-VCPUOP_register_vcpu_time_memory_area-fo.patch
@@ -0,0 +1,59 @@
+From 3f4da85ca8816f6617529c80850eaddd80ea0f1f Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 15:01:36 +0200
+Subject: [PATCH 25/26] x86: wire up VCPUOP_register_vcpu_time_memory_area for
+ 32-bit guests
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Forever sinced its introduction VCPUOP_register_vcpu_time_memory_area
+was available only to native domains. Linux, for example, would attempt
+to use it irrespective of guest bitness (including in its so called
+PVHVM mode) as long as it finds XEN_PVCLOCK_TSC_STABLE_BIT set (which we
+set only for clocksource=tsc, which in turn needs engaging via command
+line option).
+
+Fixes: a5d39947cb89 ("Allow guests to register secondary vcpu_time_info")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: b726541d94bd0a80b5864d17a2cd2e6d73a3fe0a
+master date: 2022-09-29 14:47:45 +0200
+---
+ xen/arch/x86/x86_64/domain.c | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+diff --git a/xen/arch/x86/x86_64/domain.c b/xen/arch/x86/x86_64/domain.c
+index c46dccc25a54..d51d99344796 100644
+--- a/xen/arch/x86/x86_64/domain.c
++++ b/xen/arch/x86/x86_64/domain.c
+@@ -54,6 +54,26 @@ arch_compat_vcpu_op(
+         break;
+     }
+ 
++    case VCPUOP_register_vcpu_time_memory_area:
++    {
++        struct compat_vcpu_register_time_memory_area area = { .addr.p = 0 };
++
++        rc = -EFAULT;
++        if ( copy_from_guest(&area.addr.h, arg, 1) )
++            break;
++
++        if ( area.addr.h.c != area.addr.p ||
++             !compat_handle_okay(area.addr.h, 1) )
++            break;
++
++        rc = 0;
++        guest_from_compat_handle(v->arch.time_info_guest, area.addr.h);
++
++        force_update_vcpu_system_time(v);
++
++        break;
++    }
++
+     case VCPUOP_get_physid:
+         rc = arch_do_vcpu_op(cmd, v, arg);
+         break;
+-- 
+2.37.3
+
diff --git a/0026-x86-Split-cache_flush-out-of-cache_writeback.patch b/0026-x86-Split-cache_flush-out-of-cache_writeback.patch
deleted file mode 100644
index 50f70f4..0000000
--- a/0026-x86-Split-cache_flush-out-of-cache_writeback.patch
+++ /dev/null
@@ -1,294 +0,0 @@
-From 8eafa2d871ae51d461256e4a14175e24df330c70 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 9 Jun 2022 15:28:48 +0200
-Subject: [PATCH 26/51] x86: Split cache_flush() out of cache_writeback()
-
-Subsequent changes will want a fully flushing version.
-
-Use the new helper rather than opencoding it in flush_area_local().  This
-resolves an outstanding issue where the conditional sfence is on the wrong
-side of the clflushopt loop.  clflushopt is ordered with respect to older
-stores, not to younger stores.
-
-Rename gnttab_cache_flush()'s helper to avoid colliding in name.
-grant_table.c can see the prototype from cache.h so the build fails
-otherwise.
-
-This is part of XSA-402.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 9a67ffee3371506e1cbfdfff5b90658d4828f6a2
-master date: 2022-06-09 14:22:38 +0200
----
- xen/arch/x86/flushtlb.c               | 84 ++++++++++++++++++++++++---
- xen/common/grant_table.c              |  4 +-
- xen/drivers/passthrough/vtd/extern.h  |  1 -
- xen/drivers/passthrough/vtd/iommu.c   | 53 +----------------
- xen/drivers/passthrough/vtd/x86/vtd.c |  5 --
- xen/include/asm-x86/cache.h           |  7 +++
- 6 files changed, 88 insertions(+), 66 deletions(-)
-
-diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
-index 25798df50f54..0c912b8669f8 100644
---- a/xen/arch/x86/flushtlb.c
-+++ b/xen/arch/x86/flushtlb.c
-@@ -234,7 +234,7 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
-     if ( flags & FLUSH_CACHE )
-     {
-         const struct cpuinfo_x86 *c = &current_cpu_data;
--        unsigned long i, sz = 0;
-+        unsigned long sz = 0;
- 
-         if ( order < (BITS_PER_LONG - PAGE_SHIFT) )
-             sz = 1UL << (order + PAGE_SHIFT);
-@@ -244,13 +244,7 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
-              c->x86_clflush_size && c->x86_cache_size && sz &&
-              ((sz >> 10) < c->x86_cache_size) )
-         {
--            alternative("", "sfence", X86_FEATURE_CLFLUSHOPT);
--            for ( i = 0; i < sz; i += c->x86_clflush_size )
--                alternative_input(".byte " __stringify(NOP_DS_PREFIX) ";"
--                                  " clflush %0",
--                                  "data16 clflush %0",      /* clflushopt */
--                                  X86_FEATURE_CLFLUSHOPT,
--                                  "m" (((const char *)va)[i]));
-+            cache_flush(va, sz);
-             flags &= ~FLUSH_CACHE;
-         }
-         else
-@@ -265,6 +259,80 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
-     return flags;
- }
- 
-+void cache_flush(const void *addr, unsigned int size)
-+{
-+    /*
-+     * This function may be called before current_cpu_data is established.
-+     * Hence a fallback is needed to prevent the loop below becoming infinite.
-+     */
-+    unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16;
-+    const void *end = addr + size;
-+
-+    addr -= (unsigned long)addr & (clflush_size - 1);
-+    for ( ; addr < end; addr += clflush_size )
-+    {
-+        /*
-+         * Note regarding the "ds" prefix use: it's faster to do a clflush
-+         * + prefix than a clflush + nop, and hence the prefix is added instead
-+         * of letting the alternative framework fill the gap by appending nops.
-+         */
-+        alternative_io("ds; clflush %[p]",
-+                       "data16 clflush %[p]", /* clflushopt */
-+                       X86_FEATURE_CLFLUSHOPT,
-+                       /* no outputs */,
-+                       [p] "m" (*(const char *)(addr)));
-+    }
-+
-+    alternative("", "sfence", X86_FEATURE_CLFLUSHOPT);
-+}
-+
-+void cache_writeback(const void *addr, unsigned int size)
-+{
-+    unsigned int clflush_size;
-+    const void *end = addr + size;
-+
-+    /* Fall back to CLFLUSH{,OPT} when CLWB isn't available. */
-+    if ( !boot_cpu_has(X86_FEATURE_CLWB) )
-+        return cache_flush(addr, size);
-+
-+    /*
-+     * This function may be called before current_cpu_data is established.
-+     * Hence a fallback is needed to prevent the loop below becoming infinite.
-+     */
-+    clflush_size = current_cpu_data.x86_clflush_size ?: 16;
-+    addr -= (unsigned long)addr & (clflush_size - 1);
-+    for ( ; addr < end; addr += clflush_size )
-+    {
-+/*
-+ * The arguments to a macro must not include preprocessor directives. Doing so
-+ * results in undefined behavior, so we have to create some defines here in
-+ * order to avoid it.
-+ */
-+#if defined(HAVE_AS_CLWB)
-+# define CLWB_ENCODING "clwb %[p]"
-+#elif defined(HAVE_AS_XSAVEOPT)
-+# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
-+#else
-+# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
-+#endif
-+
-+#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
-+#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
-+# define INPUT BASE_INPUT
-+#else
-+# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
-+#endif
-+
-+        asm volatile (CLWB_ENCODING :: INPUT(addr));
-+
-+#undef INPUT
-+#undef BASE_INPUT
-+#undef CLWB_ENCODING
-+    }
-+
-+    asm volatile ("sfence" ::: "memory");
-+}
-+
- unsigned int guest_flush_tlb_flags(const struct domain *d)
- {
-     bool shadow = paging_mode_shadow(d);
-diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
-index 66f8ce71741c..4c742cd8fe81 100644
---- a/xen/common/grant_table.c
-+++ b/xen/common/grant_table.c
-@@ -3431,7 +3431,7 @@ gnttab_swap_grant_ref(XEN_GUEST_HANDLE_PARAM(gnttab_swap_grant_ref_t) uop,
-     return 0;
- }
- 
--static int cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref)
-+static int _cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref)
- {
-     struct domain *d, *owner;
-     struct page_info *page;
-@@ -3525,7 +3525,7 @@ gnttab_cache_flush(XEN_GUEST_HANDLE_PARAM(gnttab_cache_flush_t) uop,
-             return -EFAULT;
-         for ( ; ; )
-         {
--            int ret = cache_flush(&op, cur_ref);
-+            int ret = _cache_flush(&op, cur_ref);
- 
-             if ( ret < 0 )
-                 return ret;
-diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h
-index 01e010a10d61..401079299725 100644
---- a/xen/drivers/passthrough/vtd/extern.h
-+++ b/xen/drivers/passthrough/vtd/extern.h
-@@ -76,7 +76,6 @@ int __must_check qinval_device_iotlb_sync(struct vtd_iommu *iommu,
-                                           struct pci_dev *pdev,
-                                           u16 did, u16 size, u64 addr);
- 
--unsigned int get_cache_line_size(void);
- void flush_all_cache(void);
- 
- uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
-diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
-index 8975c1de61bc..bc377c9bcfa4 100644
---- a/xen/drivers/passthrough/vtd/iommu.c
-+++ b/xen/drivers/passthrough/vtd/iommu.c
-@@ -31,6 +31,7 @@
- #include <xen/pci.h>
- #include <xen/pci_regs.h>
- #include <xen/keyhandler.h>
-+#include <asm/cache.h>
- #include <asm/msi.h>
- #include <asm/nops.h>
- #include <asm/irq.h>
-@@ -206,54 +207,6 @@ static void check_cleanup_domid_map(const struct domain *d,
-     }
- }
- 
--static void sync_cache(const void *addr, unsigned int size)
--{
--    static unsigned long clflush_size = 0;
--    const void *end = addr + size;
--
--    if ( clflush_size == 0 )
--        clflush_size = get_cache_line_size();
--
--    addr -= (unsigned long)addr & (clflush_size - 1);
--    for ( ; addr < end; addr += clflush_size )
--/*
-- * The arguments to a macro must not include preprocessor directives. Doing so
-- * results in undefined behavior, so we have to create some defines here in
-- * order to avoid it.
-- */
--#if defined(HAVE_AS_CLWB)
--# define CLWB_ENCODING "clwb %[p]"
--#elif defined(HAVE_AS_XSAVEOPT)
--# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
--#else
--# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
--#endif
--
--#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
--#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
--# define INPUT BASE_INPUT
--#else
--# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
--#endif
--        /*
--         * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush
--         * + prefix than a clflush + nop, and hence the prefix is added instead
--         * of letting the alternative framework fill the gap by appending nops.
--         */
--        alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]",
--                         "data16 clflush %[p]", /* clflushopt */
--                         X86_FEATURE_CLFLUSHOPT,
--                         CLWB_ENCODING,
--                         X86_FEATURE_CLWB, /* no outputs */,
--                         INPUT(addr));
--#undef INPUT
--#undef BASE_INPUT
--#undef CLWB_ENCODING
--
--    alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
--                      "sfence", X86_FEATURE_CLWB);
--}
--
- /* Allocate page table, return its machine address */
- uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node)
- {
-@@ -273,7 +226,7 @@ uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node)
-         clear_page(vaddr);
- 
-         if ( (iommu_ops.init ? &iommu_ops : &vtd_ops)->sync_cache )
--            sync_cache(vaddr, PAGE_SIZE);
-+            cache_writeback(vaddr, PAGE_SIZE);
-         unmap_domain_page(vaddr);
-         cur_pg++;
-     }
-@@ -1305,7 +1258,7 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd)
-     iommu->nr_pt_levels = agaw_to_level(agaw);
- 
-     if ( !ecap_coherent(iommu->ecap) )
--        vtd_ops.sync_cache = sync_cache;
-+        vtd_ops.sync_cache = cache_writeback;
- 
-     /* allocate domain id bitmap */
-     iommu->domid_bitmap = xzalloc_array(unsigned long, BITS_TO_LONGS(nr_dom));
-diff --git a/xen/drivers/passthrough/vtd/x86/vtd.c b/xen/drivers/passthrough/vtd/x86/vtd.c
-index 6681dccd6970..55f0faa521cb 100644
---- a/xen/drivers/passthrough/vtd/x86/vtd.c
-+++ b/xen/drivers/passthrough/vtd/x86/vtd.c
-@@ -47,11 +47,6 @@ void unmap_vtd_domain_page(const void *va)
-     unmap_domain_page(va);
- }
- 
--unsigned int get_cache_line_size(void)
--{
--    return ((cpuid_ebx(1) >> 8) & 0xff) * 8;
--}
--
- void flush_all_cache()
- {
-     wbinvd();
-diff --git a/xen/include/asm-x86/cache.h b/xen/include/asm-x86/cache.h
-index 1f7173d8c72c..e4770efb22b9 100644
---- a/xen/include/asm-x86/cache.h
-+++ b/xen/include/asm-x86/cache.h
-@@ -11,4 +11,11 @@
- 
- #define __read_mostly __section(".data.read_mostly")
- 
-+#ifndef __ASSEMBLY__
-+
-+void cache_flush(const void *addr, unsigned int size);
-+void cache_writeback(const void *addr, unsigned int size);
-+
-+#endif
-+
- #endif
--- 
-2.35.1
-
diff --git a/0026-x86-vpmu-Fix-race-condition-in-vpmu_load.patch b/0026-x86-vpmu-Fix-race-condition-in-vpmu_load.patch
new file mode 100644
index 0000000..0f33747
--- /dev/null
+++ b/0026-x86-vpmu-Fix-race-condition-in-vpmu_load.patch
@@ -0,0 +1,97 @@
+From 1bce7fb1f702da4f7a749c6f1457ecb20bf74fca Mon Sep 17 00:00:00 2001
+From: Tamas K Lengyel <tamas.lengyel@intel.com>
+Date: Tue, 11 Oct 2022 15:01:48 +0200
+Subject: [PATCH 26/26] x86/vpmu: Fix race-condition in vpmu_load
+
+The vPMU code-bases attempts to perform an optimization on saving/reloading the
+PMU context by keeping track of what vCPU ran on each pCPU. When a pCPU is
+getting scheduled, checks if the previous vCPU isn't the current one. If so,
+attempts a call to vpmu_save_force. Unfortunately if the previous vCPU is
+already getting scheduled to run on another pCPU its state will be already
+runnable, which results in an ASSERT failure.
+
+Fix this by always performing a pmu context save in vpmu_save when called from
+vpmu_switch_from, and do a vpmu_load when called from vpmu_switch_to.
+
+While this presents a minimal overhead in case the same vCPU is getting
+rescheduled on the same pCPU, the ASSERT failure is avoided and the code is a
+lot easier to reason about.
+
+Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+master commit: defa4e51d20a143bdd4395a075bf0933bb38a9a4
+master date: 2022-09-30 09:53:49 +0200
+---
+ xen/arch/x86/cpu/vpmu.c | 42 ++++-------------------------------------
+ 1 file changed, 4 insertions(+), 38 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c
+index 16e91a3694fe..b6c2ec3cd047 100644
+--- a/xen/arch/x86/cpu/vpmu.c
++++ b/xen/arch/x86/cpu/vpmu.c
+@@ -368,58 +368,24 @@ void vpmu_save(struct vcpu *v)
+     vpmu->last_pcpu = pcpu;
+     per_cpu(last_vcpu, pcpu) = v;
+ 
++    vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
++
+     if ( vpmu->arch_vpmu_ops )
+         if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
+             vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+ 
++    vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
++
+     apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
+ }
+ 
+ int vpmu_load(struct vcpu *v, bool_t from_guest)
+ {
+     struct vpmu_struct *vpmu = vcpu_vpmu(v);
+-    int pcpu = smp_processor_id();
+-    struct vcpu *prev = NULL;
+ 
+     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+         return 0;
+ 
+-    /* First time this VCPU is running here */
+-    if ( vpmu->last_pcpu != pcpu )
+-    {
+-        /*
+-         * Get the context from last pcpu that we ran on. Note that if another
+-         * VCPU is running there it must have saved this VPCU's context before
+-         * startig to run (see below).
+-         * There should be no race since remote pcpu will disable interrupts
+-         * before saving the context.
+-         */
+-        if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+-        {
+-            on_selected_cpus(cpumask_of(vpmu->last_pcpu),
+-                             vpmu_save_force, (void *)v, 1);
+-            vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+-        }
+-    } 
+-
+-    /* Prevent forced context save from remote CPU */
+-    local_irq_disable();
+-
+-    prev = per_cpu(last_vcpu, pcpu);
+-
+-    if ( prev != v && prev )
+-    {
+-        vpmu = vcpu_vpmu(prev);
+-
+-        /* Someone ran here before us */
+-        vpmu_save_force(prev);
+-        vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+-
+-        vpmu = vcpu_vpmu(v);
+-    }
+-
+-    local_irq_enable();
+-
+     /* Only when PMU is counting, we load PMU context immediately. */
+     if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
+          (!has_vlapic(vpmu_vcpu(vpmu)->domain) &&
+-- 
+2.37.3
+
diff --git a/0027-x86-amd-Work-around-CLFLUSH-ordering-on-older-parts.patch b/0027-x86-amd-Work-around-CLFLUSH-ordering-on-older-parts.patch
deleted file mode 100644
index 060bc99..0000000
--- a/0027-x86-amd-Work-around-CLFLUSH-ordering-on-older-parts.patch
+++ /dev/null
@@ -1,95 +0,0 @@
-From c4815be949aae6583a9a22897beb96b095b4f1a2 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 9 Jun 2022 15:29:13 +0200
-Subject: [PATCH 27/51] x86/amd: Work around CLFLUSH ordering on older parts
-
-On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakely ordered with everything,
-including reads and writes to the address, and LFENCE/SFENCE instructions.
-
-This creates a multitude of problematic corner cases, laid out in the manual.
-Arrange to use MFENCE on both sides of the CLFLUSH to force proper ordering.
-
-This is part of XSA-402.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 062868a5a8b428b85db589fa9a6d6e43969ffeb9
-master date: 2022-06-09 14:23:07 +0200
----
- xen/arch/x86/cpu/amd.c            |  8 ++++++++
- xen/arch/x86/flushtlb.c           | 13 ++++++++++++-
- xen/include/asm-x86/cpufeatures.h |  1 +
- 3 files changed, 21 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
-index a8e37dbb1f5c..b3b9a0df5fed 100644
---- a/xen/arch/x86/cpu/amd.c
-+++ b/xen/arch/x86/cpu/amd.c
-@@ -812,6 +812,14 @@ static void init_amd(struct cpuinfo_x86 *c)
- 	if (!cpu_has_lfence_dispatch)
- 		__set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability);
- 
-+	/*
-+	 * On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakly ordered with
-+	 * everything, including reads and writes to address, and
-+	 * LFENCE/SFENCE instructions.
-+	 */
-+	if (!cpu_has_clflushopt)
-+		setup_force_cpu_cap(X86_BUG_CLFLUSH_MFENCE);
-+
- 	switch(c->x86)
- 	{
- 	case 0xf ... 0x11:
-diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
-index 0c912b8669f8..dcbb4064012e 100644
---- a/xen/arch/x86/flushtlb.c
-+++ b/xen/arch/x86/flushtlb.c
-@@ -259,6 +259,13 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
-     return flags;
- }
- 
-+/*
-+ * On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakly ordered with everything,
-+ * including reads and writes to address, and LFENCE/SFENCE instructions.
-+ *
-+ * This function only works safely after alternatives have run.  Luckily, at
-+ * the time of writing, we don't flush the caches that early.
-+ */
- void cache_flush(const void *addr, unsigned int size)
- {
-     /*
-@@ -268,6 +275,8 @@ void cache_flush(const void *addr, unsigned int size)
-     unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16;
-     const void *end = addr + size;
- 
-+    alternative("", "mfence", X86_BUG_CLFLUSH_MFENCE);
-+
-     addr -= (unsigned long)addr & (clflush_size - 1);
-     for ( ; addr < end; addr += clflush_size )
-     {
-@@ -283,7 +292,9 @@ void cache_flush(const void *addr, unsigned int size)
-                        [p] "m" (*(const char *)(addr)));
-     }
- 
--    alternative("", "sfence", X86_FEATURE_CLFLUSHOPT);
-+    alternative_2("",
-+                  "sfence", X86_FEATURE_CLFLUSHOPT,
-+                  "mfence", X86_BUG_CLFLUSH_MFENCE);
- }
- 
- void cache_writeback(const void *addr, unsigned int size)
-diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
-index 7413febd7ad8..ff3157d52d13 100644
---- a/xen/include/asm-x86/cpufeatures.h
-+++ b/xen/include/asm-x86/cpufeatures.h
-@@ -47,6 +47,7 @@ XEN_CPUFEATURE(XEN_IBT,           X86_SYNTH(27)) /* Xen uses CET Indirect Branch
- 
- #define X86_BUG_FPU_PTRS          X86_BUG( 0) /* (F)X{SAVE,RSTOR} doesn't save/restore FOP/FIP/FDP. */
- #define X86_BUG_NULL_SEG          X86_BUG( 1) /* NULL-ing a selector preserves the base and limit. */
-+#define X86_BUG_CLFLUSH_MFENCE    X86_BUG( 2) /* MFENCE needed to serialise CLFLUSH */
- 
- /* Total number of capability words, inc synth and bug words. */
- #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */
--- 
-2.35.1
-
diff --git a/0028-x86-pv-Track-and-flush-non-coherent-mappings-of-RAM.patch b/0028-x86-pv-Track-and-flush-non-coherent-mappings-of-RAM.patch
deleted file mode 100644
index af60348..0000000
--- a/0028-x86-pv-Track-and-flush-non-coherent-mappings-of-RAM.patch
+++ /dev/null
@@ -1,160 +0,0 @@
-From dc020d8d1ba420e2dd0e7a40f5045db897f3c4f4 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 9 Jun 2022 15:29:38 +0200
-Subject: [PATCH 28/51] x86/pv: Track and flush non-coherent mappings of RAM
-
-There are legitimate uses of WC mappings of RAM, e.g. for DMA buffers with
-devices that make non-coherent writes.  The Linux sound subsystem makes
-extensive use of this technique.
-
-For such usecases, the guest's DMA buffer is mapped and consistently used as
-WC, and Xen doesn't interact with the buffer.
-
-However, a mischevious guest can use WC mappings to deliberately create
-non-coherency between the cache and RAM, and use this to trick Xen into
-validating a pagetable which isn't actually safe.
-
-Allocate a new PGT_non_coherent to track the non-coherency of mappings.  Set
-it whenever a non-coherent writeable mapping is created.  If the page is used
-as anything other than PGT_writable_page, force a cache flush before
-validation.  Also force a cache flush before the page is returned to the heap.
-
-This is CVE-2022-26364, part of XSA-402.
-
-Reported-by: Jann Horn <jannh@google.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: George Dunlap <george.dunlap@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: c1c9cae3a9633054b177c5de21ad7268162b2f2c
-master date: 2022-06-09 14:23:37 +0200
----
- xen/arch/x86/mm.c             | 38 +++++++++++++++++++++++++++++++++++
- xen/arch/x86/pv/grant_table.c | 21 +++++++++++++++++++
- xen/include/asm-x86/mm.h      |  6 +++++-
- 3 files changed, 64 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index ab32d13a1a0d..bab9624fabb7 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -997,6 +997,15 @@ get_page_from_l1e(
-         return -EACCES;
-     }
- 
-+    /*
-+     * Track writeable non-coherent mappings to RAM pages, to trigger a cache
-+     * flush later if the target is used as anything but a PGT_writeable page.
-+     * We care about all writeable mappings, including foreign mappings.
-+     */
-+    if ( !boot_cpu_has(X86_FEATURE_XEN_SELFSNOOP) &&
-+         (l1f & (PAGE_CACHE_ATTRS | _PAGE_RW)) == (_PAGE_WC | _PAGE_RW) )
-+        set_bit(_PGT_non_coherent, &page->u.inuse.type_info);
-+
-     return 0;
- 
-  could_not_pin:
-@@ -2454,6 +2463,19 @@ static int cleanup_page_mappings(struct page_info *page)
-         }
-     }
- 
-+    /*
-+     * Flush the cache if there were previously non-coherent writeable
-+     * mappings of this page.  This forces the page to be coherent before it
-+     * is freed back to the heap.
-+     */
-+    if ( __test_and_clear_bit(_PGT_non_coherent, &page->u.inuse.type_info) )
-+    {
-+        void *addr = __map_domain_page(page);
-+
-+        cache_flush(addr, PAGE_SIZE);
-+        unmap_domain_page(addr);
-+    }
-+
-     return rc;
- }
- 
-@@ -3027,6 +3049,22 @@ static int _get_page_type(struct page_info *page, unsigned long type,
- 
-     if ( unlikely(!(nx & PGT_validated)) )
-     {
-+        /*
-+         * Flush the cache if there were previously non-coherent mappings of
-+         * this page, and we're trying to use it as anything other than a
-+         * writeable page.  This forces the page to be coherent before we
-+         * validate its contents for safety.
-+         */
-+        if ( (nx & PGT_non_coherent) && type != PGT_writable_page )
-+        {
-+            void *addr = __map_domain_page(page);
-+
-+            cache_flush(addr, PAGE_SIZE);
-+            unmap_domain_page(addr);
-+
-+            page->u.inuse.type_info &= ~PGT_non_coherent;
-+        }
-+
-         /*
-          * No special validation needed for writable or shared pages.  Page
-          * tables and GDT/LDT need to have their contents audited.
-diff --git a/xen/arch/x86/pv/grant_table.c b/xen/arch/x86/pv/grant_table.c
-index 0325618c9883..81c72e61ed55 100644
---- a/xen/arch/x86/pv/grant_table.c
-+++ b/xen/arch/x86/pv/grant_table.c
-@@ -109,7 +109,17 @@ int create_grant_pv_mapping(uint64_t addr, mfn_t frame,
- 
-     ol1e = *pl1e;
-     if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) )
-+    {
-+        /*
-+         * We always create mappings in this path.  However, our caller,
-+         * map_grant_ref(), only passes potentially non-zero cache_flags for
-+         * MMIO frames, so this path doesn't create non-coherent mappings of
-+         * RAM frames and there's no need to calculate PGT_non_coherent.
-+         */
-+        ASSERT(!cache_flags || is_iomem_page(frame));
-+
-         rc = GNTST_okay;
-+    }
- 
-  out_unlock:
-     page_unlock(page);
-@@ -294,7 +304,18 @@ int replace_grant_pv_mapping(uint64_t addr, mfn_t frame,
-                  l1e_get_flags(ol1e), addr, grant_pte_flags);
- 
-     if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) )
-+    {
-+        /*
-+         * Generally, replace_grant_pv_mapping() is used to destroy mappings
-+         * (n1le = l1e_empty()), but it can be a present mapping on the
-+         * GNTABOP_unmap_and_replace path.
-+         *
-+         * In such cases, the PTE is fully transplanted from its old location
-+         * via steal_linear_addr(), so we need not perform PGT_non_coherent
-+         * checking here.
-+         */
-         rc = GNTST_okay;
-+    }
- 
-  out_unlock:
-     page_unlock(page);
-diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
-index 8a9a43bb0a9d..7464167ae192 100644
---- a/xen/include/asm-x86/mm.h
-+++ b/xen/include/asm-x86/mm.h
-@@ -53,8 +53,12 @@
- #define _PGT_partial      PG_shift(8)
- #define PGT_partial       PG_mask(1, 8)
- 
-+/* Has this page been mapped writeable with a non-coherent memory type? */
-+#define _PGT_non_coherent PG_shift(9)
-+#define PGT_non_coherent  PG_mask(1, 9)
-+
-  /* Count of uses of this frame as its current type. */
--#define PGT_count_width   PG_shift(8)
-+#define PGT_count_width   PG_shift(9)
- #define PGT_count_mask    ((1UL<<PGT_count_width)-1)
- 
- /* Are the 'type mask' bits identical? */
--- 
-2.35.1
-
diff --git a/0029-x86-mm-account-for-PGT_pae_xen_l2-in-recently-added-.patch b/0029-x86-mm-account-for-PGT_pae_xen_l2-in-recently-added-.patch
deleted file mode 100644
index 90ce4cf..0000000
--- a/0029-x86-mm-account-for-PGT_pae_xen_l2-in-recently-added-.patch
+++ /dev/null
@@ -1,37 +0,0 @@
-From 0b4e62847c5af1a59eea8d17093feccd550d1c26 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Fri, 10 Jun 2022 10:28:28 +0200
-Subject: [PATCH 29/51] x86/mm: account for PGT_pae_xen_l2 in recently added
- assertion
-
-While PGT_pae_xen_l2 will be zapped once the type refcount of an L2 page
-reaches zero, it'll be retained as long as the type refcount is non-
-zero. Hence any checking against the requested type needs to either zap
-the bit from the type or include it in the used mask.
-
-Fixes: 9186e96b199e ("x86/pv: Clean up _get_page_type()")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: c2095ac76be0f4a1940346c9ffb49fb967345060
-master date: 2022-06-10 10:21:06 +0200
----
- xen/arch/x86/mm.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index bab9624fabb7..c1b9a3bb102a 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -2928,7 +2928,8 @@ static int _get_page_type(struct page_info *page, unsigned long type,
-              * The page is in one of two states (depending on PGT_partial),
-              * and should have exactly one reference.
-              */
--            ASSERT((x & (PGT_type_mask | PGT_count_mask)) == (type | 1));
-+            ASSERT((x & (PGT_type_mask | PGT_pae_xen_l2 | PGT_count_mask)) ==
-+                   (type | 1));
- 
-             if ( !(x & PGT_partial) )
-             {
--- 
-2.35.1
-
diff --git a/0030-x86-spec-ctrl-Make-VERW-flushing-runtime-conditional.patch b/0030-x86-spec-ctrl-Make-VERW-flushing-runtime-conditional.patch
deleted file mode 100644
index af25b5c..0000000
--- a/0030-x86-spec-ctrl-Make-VERW-flushing-runtime-conditional.patch
+++ /dev/null
@@ -1,258 +0,0 @@
-From 0e80f9f61168d4e4f008da75762cee0118f802ed Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Mon, 13 Jun 2022 16:19:01 +0100
-Subject: [PATCH 30/51] x86/spec-ctrl: Make VERW flushing runtime conditional
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Currently, VERW flushing to mitigate MDS is boot time conditional per domain
-type.  However, to provide mitigations for DRPW (CVE-2022-21166), we need to
-conditionally use VERW based on the trustworthiness of the guest, and the
-devices passed through.
-
-Remove the PV/HVM alternatives and instead issue a VERW on the return-to-guest
-path depending on the SCF_verw bit in cpuinfo spec_ctrl_flags.
-
-Introduce spec_ctrl_init_domain() and d->arch.verw to calculate the VERW
-disposition at domain creation time, and context switch the SCF_verw bit.
-
-For now, VERW flushing is used and controlled exactly as before, but later
-patches will add per-domain cases too.
-
-No change in behaviour.
-
-This is part of XSA-404.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-(cherry picked from commit e06b95c1d44ab80da255219fc9f1e2fc423edcb6)
----
- docs/misc/xen-command-line.pandoc   |  5 ++---
- xen/arch/x86/domain.c               | 12 ++++++++++--
- xen/arch/x86/hvm/vmx/entry.S        |  2 +-
- xen/arch/x86/spec_ctrl.c            | 30 +++++++++++++++++------------
- xen/include/asm-x86/cpufeatures.h   |  3 +--
- xen/include/asm-x86/domain.h        |  3 +++
- xen/include/asm-x86/spec_ctrl.h     |  2 ++
- xen/include/asm-x86/spec_ctrl_asm.h | 16 +++++++++++++--
- 8 files changed, 51 insertions(+), 22 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index 1d08fb7e9aa6..d5cb09f86541 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2258,9 +2258,8 @@ in place for guests to use.
- Use of a positive boolean value for either of these options is invalid.
- 
- The booleans `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` offer fine
--grained control over the alternative blocks used by Xen.  These impact Xen's
--ability to protect itself, and Xen's ability to virtualise support for guests
--to use.
-+grained control over the primitives by Xen.  These impact Xen's ability to
-+protect itself, and Xen's ability to virtualise support for guests to use.
- 
- * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests
-   respectively.
-diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
-index ef1812dc1402..1fe6644a71ae 100644
---- a/xen/arch/x86/domain.c
-+++ b/xen/arch/x86/domain.c
-@@ -863,6 +863,8 @@ int arch_domain_create(struct domain *d,
- 
-     d->arch.msr_relaxed = config->arch.misc_flags & XEN_X86_MSR_RELAXED;
- 
-+    spec_ctrl_init_domain(d);
-+
-     return 0;
- 
-  fail:
-@@ -2017,14 +2019,15 @@ static void __context_switch(void)
- void context_switch(struct vcpu *prev, struct vcpu *next)
- {
-     unsigned int cpu = smp_processor_id();
-+    struct cpu_info *info = get_cpu_info();
-     const struct domain *prevd = prev->domain, *nextd = next->domain;
-     unsigned int dirty_cpu = read_atomic(&next->dirty_cpu);
- 
-     ASSERT(prev != next);
-     ASSERT(local_irq_is_enabled());
- 
--    get_cpu_info()->use_pv_cr3 = false;
--    get_cpu_info()->xen_cr3 = 0;
-+    info->use_pv_cr3 = false;
-+    info->xen_cr3 = 0;
- 
-     if ( unlikely(dirty_cpu != cpu) && dirty_cpu != VCPU_CPU_CLEAN )
-     {
-@@ -2088,6 +2091,11 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
-                 *last_id = next_id;
-             }
-         }
-+
-+        /* Update the top-of-stack block with the VERW disposition. */
-+        info->spec_ctrl_flags &= ~SCF_verw;
-+        if ( nextd->arch.verw )
-+            info->spec_ctrl_flags |= SCF_verw;
-     }
- 
-     sched_context_switched(prev, next);
-diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
-index 49651f3c435a..5f5de45a1309 100644
---- a/xen/arch/x86/hvm/vmx/entry.S
-+++ b/xen/arch/x86/hvm/vmx/entry.S
-@@ -87,7 +87,7 @@ UNLIKELY_END(realmode)
- 
-         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
-         /* SPEC_CTRL_EXIT_TO_VMX   Req: %rsp=regs/cpuinfo              Clob:    */
--        ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), X86_FEATURE_SC_VERW_HVM
-+        DO_SPEC_CTRL_COND_VERW
- 
-         mov  VCPU_hvm_guest_cr2(%rbx),%rax
- 
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index c19464da70ce..21730aa03071 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -36,8 +36,8 @@ static bool __initdata opt_msr_sc_pv = true;
- static bool __initdata opt_msr_sc_hvm = true;
- static int8_t __initdata opt_rsb_pv = -1;
- static bool __initdata opt_rsb_hvm = true;
--static int8_t __initdata opt_md_clear_pv = -1;
--static int8_t __initdata opt_md_clear_hvm = -1;
-+static int8_t __read_mostly opt_md_clear_pv = -1;
-+static int8_t __read_mostly opt_md_clear_hvm = -1;
- 
- /* Cmdline controls for Xen's speculative settings. */
- static enum ind_thunk {
-@@ -932,6 +932,13 @@ static __init void mds_calculations(uint64_t caps)
-     }
- }
- 
-+void spec_ctrl_init_domain(struct domain *d)
-+{
-+    bool pv = is_pv_domain(d);
-+
-+    d->arch.verw = pv ? opt_md_clear_pv : opt_md_clear_hvm;
-+}
-+
- void __init init_speculation_mitigations(void)
- {
-     enum ind_thunk thunk = THUNK_DEFAULT;
-@@ -1196,21 +1203,20 @@ void __init init_speculation_mitigations(void)
-                             boot_cpu_has(X86_FEATURE_MD_CLEAR));
- 
-     /*
--     * Enable MDS defences as applicable.  The PV blocks need using all the
--     * time, and the Idle blocks need using if either PV or HVM defences are
--     * used.
-+     * Enable MDS defences as applicable.  The Idle blocks need using if
-+     * either PV or HVM defences are used.
-      *
-      * HVM is more complicated.  The MD_CLEAR microcode extends L1D_FLUSH with
--     * equivelent semantics to avoid needing to perform both flushes on the
--     * HVM path.  The HVM blocks don't need activating if our hypervisor told
--     * us it was handling L1D_FLUSH, or we are using L1D_FLUSH ourselves.
-+     * equivalent semantics to avoid needing to perform both flushes on the
-+     * HVM path.  Therefore, we don't need VERW in addition to L1D_FLUSH.
-+     *
-+     * After calculating the appropriate idle setting, simplify
-+     * opt_md_clear_hvm to mean just "should we VERW on the way into HVM
-+     * guests", so spec_ctrl_init_domain() can calculate suitable settings.
-      */
--    if ( opt_md_clear_pv )
--        setup_force_cpu_cap(X86_FEATURE_SC_VERW_PV);
-     if ( opt_md_clear_pv || opt_md_clear_hvm )
-         setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
--    if ( opt_md_clear_hvm && !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush )
--        setup_force_cpu_cap(X86_FEATURE_SC_VERW_HVM);
-+    opt_md_clear_hvm &= !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush;
- 
-     /*
-      * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT
-diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
-index ff3157d52d13..bd45a144ee78 100644
---- a/xen/include/asm-x86/cpufeatures.h
-+++ b/xen/include/asm-x86/cpufeatures.h
-@@ -35,8 +35,7 @@ XEN_CPUFEATURE(SC_RSB_HVM,        X86_SYNTH(19)) /* RSB overwrite needed for HVM
- XEN_CPUFEATURE(XEN_SELFSNOOP,     X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */
- XEN_CPUFEATURE(SC_MSR_IDLE,       X86_SYNTH(21)) /* (SC_MSR_PV || SC_MSR_HVM) && default_xen_spec_ctrl */
- XEN_CPUFEATURE(XEN_LBR,           X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */
--XEN_CPUFEATURE(SC_VERW_PV,        X86_SYNTH(23)) /* VERW used by Xen for PV */
--XEN_CPUFEATURE(SC_VERW_HVM,       X86_SYNTH(24)) /* VERW used by Xen for HVM */
-+/* Bits 23,24 unused. */
- XEN_CPUFEATURE(SC_VERW_IDLE,      X86_SYNTH(25)) /* VERW used by Xen for idle */
- XEN_CPUFEATURE(XEN_SHSTK,         X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */
- XEN_CPUFEATURE(XEN_IBT,           X86_SYNTH(27)) /* Xen uses CET Indirect Branch Tracking */
-diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
-index 92d54de0b9a1..2398a1d99da9 100644
---- a/xen/include/asm-x86/domain.h
-+++ b/xen/include/asm-x86/domain.h
-@@ -319,6 +319,9 @@ struct arch_domain
-     uint32_t pci_cf8;
-     uint8_t cmos_idx;
- 
-+    /* Use VERW on return-to-guest for its flushing side effect. */
-+    bool verw;
-+
-     union {
-         struct pv_domain pv;
-         struct hvm_domain hvm;
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index f76029523610..751355f471f4 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -24,6 +24,7 @@
- #define SCF_use_shadow (1 << 0)
- #define SCF_ist_wrmsr  (1 << 1)
- #define SCF_ist_rsb    (1 << 2)
-+#define SCF_verw       (1 << 3)
- 
- #ifndef __ASSEMBLY__
- 
-@@ -32,6 +33,7 @@
- #include <asm/msr-index.h>
- 
- void init_speculation_mitigations(void);
-+void spec_ctrl_init_domain(struct domain *d);
- 
- extern bool opt_ibpb;
- extern bool opt_ssbd;
-diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
-index 02b3b18ce69f..5a590bac44aa 100644
---- a/xen/include/asm-x86/spec_ctrl_asm.h
-+++ b/xen/include/asm-x86/spec_ctrl_asm.h
-@@ -136,6 +136,19 @@
- #endif
- .endm
- 
-+.macro DO_SPEC_CTRL_COND_VERW
-+/*
-+ * Requires %rsp=cpuinfo
-+ *
-+ * Issue a VERW for its flushing side effect, if indicated.  This is a Spectre
-+ * v1 gadget, but the IRET/VMEntry is serialising.
-+ */
-+    testb $SCF_verw, CPUINFO_spec_ctrl_flags(%rsp)
-+    jz .L\@_verw_skip
-+    verw CPUINFO_verw_sel(%rsp)
-+.L\@_verw_skip:
-+.endm
-+
- .macro DO_SPEC_CTRL_ENTRY maybexen:req
- /*
-  * Requires %rsp=regs (also cpuinfo if !maybexen)
-@@ -231,8 +244,7 @@
- #define SPEC_CTRL_EXIT_TO_PV                                            \
-     ALTERNATIVE "",                                                     \
-         DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV;              \
--    ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)),           \
--        X86_FEATURE_SC_VERW_PV
-+    DO_SPEC_CTRL_COND_VERW
- 
- /*
-  * Use in IST interrupt/exception context.  May interrupt Xen or PV context.
--- 
-2.35.1
-
diff --git a/0031-x86-spec-ctrl-Enumeration-for-MMIO-Stale-Data-contro.patch b/0031-x86-spec-ctrl-Enumeration-for-MMIO-Stale-Data-contro.patch
deleted file mode 100644
index 3b91fb5..0000000
--- a/0031-x86-spec-ctrl-Enumeration-for-MMIO-Stale-Data-contro.patch
+++ /dev/null
@@ -1,98 +0,0 @@
-From a83108736db0ddaa5855f5abda6dcc8ae4fe25e9 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Mon, 20 Sep 2021 18:47:49 +0100
-Subject: [PATCH 31/51] x86/spec-ctrl: Enumeration for MMIO Stale Data controls
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The three *_NO bits indicate non-susceptibility to the SSDP, FBSDP and PSDP
-data movement primitives.
-
-FB_CLEAR indicates that the VERW instruction has re-gained it's Fill Buffer
-flushing side effect.  This is only enumerated on parts where VERW had
-previously lost it's flushing side effect due to the MDS/TAA vulnerabilities
-being fixed in hardware.
-
-FB_CLEAR_CTRL is available on a subset of FB_CLEAR parts where the Fill Buffer
-clearing side effect of VERW can be turned off for performance reasons.
-
-This is part of XSA-404.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-(cherry picked from commit 2ebe8fe9b7e0d36e9ec3cfe4552b2b197ef0dcec)
----
- xen/arch/x86/spec_ctrl.c        | 11 ++++++++---
- xen/include/asm-x86/msr-index.h |  6 ++++++
- 2 files changed, 14 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 21730aa03071..d285538bde9f 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -323,7 +323,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
-      * Hardware read-only information, stating immunity to certain issues, or
-      * suggestions of which mitigation to use.
-      */
--    printk("  Hardware hints:%s%s%s%s%s%s%s%s%s%s%s\n",
-+    printk("  Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
-            (caps & ARCH_CAPS_RDCL_NO)                        ? " RDCL_NO"        : "",
-            (caps & ARCH_CAPS_IBRS_ALL)                       ? " IBRS_ALL"       : "",
-            (caps & ARCH_CAPS_RSBA)                           ? " RSBA"           : "",
-@@ -332,13 +332,16 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
-            (caps & ARCH_CAPS_SSB_NO)                         ? " SSB_NO"         : "",
-            (caps & ARCH_CAPS_MDS_NO)                         ? " MDS_NO"         : "",
-            (caps & ARCH_CAPS_TAA_NO)                         ? " TAA_NO"         : "",
-+           (caps & ARCH_CAPS_SBDR_SSDP_NO)                   ? " SBDR_SSDP_NO"   : "",
-+           (caps & ARCH_CAPS_FBSDP_NO)                       ? " FBSDP_NO"       : "",
-+           (caps & ARCH_CAPS_PSDP_NO)                        ? " PSDP_NO"        : "",
-            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS))    ? " IBRS_ALWAYS"    : "",
-            (e8b  & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS))   ? " STIBP_ALWAYS"   : "",
-            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_FAST))      ? " IBRS_FAST"      : "",
-            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_SAME_MODE)) ? " IBRS_SAME_MODE" : "");
- 
-     /* Hardware features which need driving to mitigate issues. */
--    printk("  Hardware features:%s%s%s%s%s%s%s%s%s%s\n",
-+    printk("  Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s\n",
-            (e8b  & cpufeat_mask(X86_FEATURE_IBPB)) ||
-            (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB))          ? " IBPB"           : "",
-            (e8b  & cpufeat_mask(X86_FEATURE_IBRS)) ||
-@@ -353,7 +356,9 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
-            (_7d0 & cpufeat_mask(X86_FEATURE_MD_CLEAR))       ? " MD_CLEAR"       : "",
-            (_7d0 & cpufeat_mask(X86_FEATURE_SRBDS_CTRL))     ? " SRBDS_CTRL"     : "",
-            (e8b  & cpufeat_mask(X86_FEATURE_VIRT_SSBD))      ? " VIRT_SSBD"      : "",
--           (caps & ARCH_CAPS_TSX_CTRL)                       ? " TSX_CTRL"       : "");
-+           (caps & ARCH_CAPS_TSX_CTRL)                       ? " TSX_CTRL"       : "",
-+           (caps & ARCH_CAPS_FB_CLEAR)                       ? " FB_CLEAR"       : "",
-+           (caps & ARCH_CAPS_FB_CLEAR_CTRL)                  ? " FB_CLEAR_CTRL"  : "");
- 
-     /* Compiled-in support which pertains to mitigations. */
-     if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) )
-diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
-index 31964b88af7a..72bc32ba04ff 100644
---- a/xen/include/asm-x86/msr-index.h
-+++ b/xen/include/asm-x86/msr-index.h
-@@ -66,6 +66,11 @@
- #define  ARCH_CAPS_IF_PSCHANGE_MC_NO        (_AC(1, ULL) <<  6)
- #define  ARCH_CAPS_TSX_CTRL                 (_AC(1, ULL) <<  7)
- #define  ARCH_CAPS_TAA_NO                   (_AC(1, ULL) <<  8)
-+#define  ARCH_CAPS_SBDR_SSDP_NO             (_AC(1, ULL) << 13)
-+#define  ARCH_CAPS_FBSDP_NO                 (_AC(1, ULL) << 14)
-+#define  ARCH_CAPS_PSDP_NO                  (_AC(1, ULL) << 15)
-+#define  ARCH_CAPS_FB_CLEAR                 (_AC(1, ULL) << 17)
-+#define  ARCH_CAPS_FB_CLEAR_CTRL            (_AC(1, ULL) << 18)
- 
- #define MSR_FLUSH_CMD                       0x0000010b
- #define  FLUSH_CMD_L1D                      (_AC(1, ULL) <<  0)
-@@ -83,6 +88,7 @@
- #define  MCU_OPT_CTRL_RNGDS_MITG_DIS        (_AC(1, ULL) <<  0)
- #define  MCU_OPT_CTRL_RTM_ALLOW             (_AC(1, ULL) <<  1)
- #define  MCU_OPT_CTRL_RTM_LOCKED            (_AC(1, ULL) <<  2)
-+#define  MCU_OPT_CTRL_FB_CLEAR_DIS          (_AC(1, ULL) <<  3)
- 
- #define MSR_RTIT_OUTPUT_BASE                0x00000560
- #define MSR_RTIT_OUTPUT_MASK                0x00000561
--- 
-2.35.1
-
diff --git a/0032-x86-spec-ctrl-Add-spec-ctrl-unpriv-mmio.patch b/0032-x86-spec-ctrl-Add-spec-ctrl-unpriv-mmio.patch
deleted file mode 100644
index c63891a..0000000
--- a/0032-x86-spec-ctrl-Add-spec-ctrl-unpriv-mmio.patch
+++ /dev/null
@@ -1,187 +0,0 @@
-From 2e82446cb252f6c8ac697e81f4155872c69afde4 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Mon, 13 Jun 2022 19:18:32 +0100
-Subject: [PATCH 32/51] x86/spec-ctrl: Add spec-ctrl=unpriv-mmio
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Per Xen's support statement, PCI passthrough should be to trusted domains
-because the overall system security depends on factors outside of Xen's
-control.
-
-As such, Xen, in a supported configuration, is not vulnerable to DRPW/SBDR.
-
-However, users who have risk assessed their configuration may be happy with
-the risk of DoS, but unhappy with the risk of cross-domain data leakage.  Such
-users should enable this option.
-
-On CPUs vulnerable to MDS, the existing mitigations are the best we can do to
-mitigate MMIO cross-domain data leakage.
-
-On CPUs fixed to MDS but vulnerable MMIO stale data leakage, this option:
-
- * On CPUs susceptible to FBSDP, mitigates cross-domain fill buffer leakage
-   using FB_CLEAR.
- * On CPUs susceptible to SBDR, mitigates RNG data recovery by engaging the
-   srb-lock, previously used to mitigate SRBDS.
-
-Both mitigations require microcode from IPU 2022.1, May 2022.
-
-This is part of XSA-404.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-(cherry picked from commit 8c24b70fedcb52633b2370f834d8a2be3f7fa38e)
----
- docs/misc/xen-command-line.pandoc | 14 +++++++--
- xen/arch/x86/spec_ctrl.c          | 48 ++++++++++++++++++++++++-------
- 2 files changed, 48 insertions(+), 14 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index d5cb09f86541..a642e43476a2 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2235,7 +2235,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
- ### spec-ctrl (x86)
- > `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb,md-clear}=<bool>,
- >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,eager-fpu,
-->              l1d-flush,branch-harden,srb-lock}=<bool> ]`
-+>              l1d-flush,branch-harden,srb-lock,unpriv-mmio}=<bool> ]`
- 
- Controls for speculative execution sidechannel mitigations.  By default, Xen
- will pick the most appropriate mitigations based on compiled in support,
-@@ -2314,8 +2314,16 @@ Xen will enable this mitigation.
- On hardware supporting SRBDS_CTRL, the `srb-lock=` option can be used to force
- or prevent Xen from protect the Special Register Buffer from leaking stale
- data. By default, Xen will enable this mitigation, except on parts where MDS
--is fixed and TAA is fixed/mitigated (in which case, there is believed to be no
--way for an attacker to obtain the stale data).
-+is fixed and TAA is fixed/mitigated and there are no unprivileged MMIO
-+mappings (in which case, there is believed to be no way for an attacker to
-+obtain stale data).
-+
-+The `unpriv-mmio=` boolean indicates whether the system has (or will have)
-+less than fully privileged domains granted access to MMIO devices.  By
-+default, this option is disabled.  If enabled, Xen will use the `FB_CLEAR`
-+and/or `SRBDS_CTRL` functionality available in the Intel May 2022 microcode
-+release to mitigate cross-domain leakage of data via the MMIO Stale Data
-+vulnerabilities.
- 
- ### sync_console
- > `= <boolean>`
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index d285538bde9f..099113ba41e6 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -67,6 +67,8 @@ static bool __initdata cpu_has_bug_msbds_only; /* => minimal HT impact. */
- static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination. */
- 
- static int8_t __initdata opt_srb_lock = -1;
-+static bool __initdata opt_unpriv_mmio;
-+static bool __read_mostly opt_fb_clear_mmio;
- 
- static int __init parse_spec_ctrl(const char *s)
- {
-@@ -184,6 +186,8 @@ static int __init parse_spec_ctrl(const char *s)
-             opt_branch_harden = val;
-         else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 )
-             opt_srb_lock = val;
-+        else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 )
-+            opt_unpriv_mmio = val;
-         else
-             rc = -EINVAL;
- 
-@@ -392,7 +396,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
-            opt_srb_lock                              ? " SRB_LOCK+" : " SRB_LOCK-",
-            opt_ibpb                                  ? " IBPB"  : "",
-            opt_l1d_flush                             ? " L1D_FLUSH" : "",
--           opt_md_clear_pv || opt_md_clear_hvm       ? " VERW"  : "",
-+           opt_md_clear_pv || opt_md_clear_hvm ||
-+           opt_fb_clear_mmio                         ? " VERW"  : "",
-            opt_branch_harden                         ? " BRANCH_HARDEN" : "");
- 
-     /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */
-@@ -941,7 +946,9 @@ void spec_ctrl_init_domain(struct domain *d)
- {
-     bool pv = is_pv_domain(d);
- 
--    d->arch.verw = pv ? opt_md_clear_pv : opt_md_clear_hvm;
-+    d->arch.verw =
-+        (pv ? opt_md_clear_pv : opt_md_clear_hvm) ||
-+        (opt_fb_clear_mmio && is_iommu_enabled(d));
- }
- 
- void __init init_speculation_mitigations(void)
-@@ -1195,6 +1202,18 @@ void __init init_speculation_mitigations(void)
- 
-     mds_calculations(caps);
- 
-+    /*
-+     * Parts which enumerate FB_CLEAR are those which are post-MDS_NO and have
-+     * reintroduced the VERW fill buffer flushing side effect because of a
-+     * susceptibility to FBSDP.
-+     *
-+     * If unprivileged guests have (or will have) MMIO mappings, we can
-+     * mitigate cross-domain leakage of fill buffer data by issuing VERW on
-+     * the return-to-guest path.
-+     */
-+    if ( opt_unpriv_mmio )
-+        opt_fb_clear_mmio = caps & ARCH_CAPS_FB_CLEAR;
-+
-     /*
-      * By default, enable PV and HVM mitigations on MDS-vulnerable hardware.
-      * This will only be a token effort for MLPDS/MFBDS when HT is enabled,
-@@ -1208,18 +1227,20 @@ void __init init_speculation_mitigations(void)
-                             boot_cpu_has(X86_FEATURE_MD_CLEAR));
- 
-     /*
--     * Enable MDS defences as applicable.  The Idle blocks need using if
--     * either PV or HVM defences are used.
-+     * Enable MDS/MMIO defences as applicable.  The Idle blocks need using if
-+     * either the PV or HVM MDS defences are used, or if we may give MMIO
-+     * access to untrusted guests.
-      *
-      * HVM is more complicated.  The MD_CLEAR microcode extends L1D_FLUSH with
-      * equivalent semantics to avoid needing to perform both flushes on the
--     * HVM path.  Therefore, we don't need VERW in addition to L1D_FLUSH.
-+     * HVM path.  Therefore, we don't need VERW in addition to L1D_FLUSH (for
-+     * MDS mitigations.  L1D_FLUSH is not safe for MMIO mitigations.)
-      *
-      * After calculating the appropriate idle setting, simplify
-      * opt_md_clear_hvm to mean just "should we VERW on the way into HVM
-      * guests", so spec_ctrl_init_domain() can calculate suitable settings.
-      */
--    if ( opt_md_clear_pv || opt_md_clear_hvm )
-+    if ( opt_md_clear_pv || opt_md_clear_hvm || opt_fb_clear_mmio )
-         setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
-     opt_md_clear_hvm &= !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush;
- 
-@@ -1284,14 +1305,19 @@ void __init init_speculation_mitigations(void)
-      * On some SRBDS-affected hardware, it may be safe to relax srb-lock by
-      * default.
-      *
--     * On parts which enumerate MDS_NO and not TAA_NO, TSX is the only known
--     * way to access the Fill Buffer.  If TSX isn't available (inc. SKU
--     * reasons on some models), or TSX is explicitly disabled, then there is
--     * no need for the extra overhead to protect RDRAND/RDSEED.
-+     * All parts with SRBDS_CTRL suffer SSDP, the mechanism by which stale RNG
-+     * data becomes available to other contexts.  To recover the data, an
-+     * attacker needs to use:
-+     *  - SBDS (MDS or TAA to sample the cores fill buffer)
-+     *  - SBDR (Architecturally retrieve stale transaction buffer contents)
-+     *  - DRPW (Architecturally latch stale fill buffer data)
-+     *
-+     * On MDS_NO parts, and with TAA_NO or TSX unavailable/disabled, and there
-+     * is no unprivileged MMIO access, the RNG data doesn't need protecting.
-      */
-     if ( cpu_has_srbds_ctrl )
-     {
--        if ( opt_srb_lock == -1 &&
-+        if ( opt_srb_lock == -1 && !opt_unpriv_mmio &&
-              (caps & (ARCH_CAPS_MDS_NO|ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO &&
-              (!cpu_has_hle || ((caps & ARCH_CAPS_TSX_CTRL) && rtm_disabled)) )
-             opt_srb_lock = 0;
--- 
-2.35.1
-
diff --git a/0033-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch b/0033-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch
deleted file mode 100644
index 07f488d..0000000
--- a/0033-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch
+++ /dev/null
@@ -1,52 +0,0 @@
-From 460b08d6c6c16b3f32aa138e772b759ae02a4479 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 12 Jul 2022 11:10:34 +0200
-Subject: [PATCH 33/51] IOMMU/x86: work around bogus gcc12 warning in
- hvm_gsi_eoi()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-As per [1] the expansion of the pirq_dpci() macro causes a -Waddress
-controlled warning (enabled implicitly in our builds, if not by default)
-tying the middle part of the involved conditional expression to the
-surrounding boolean context. Work around this by introducing a local
-inline function in the affected source file.
-
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Roger Pau Monné <roger.pau@citrix.com>
-
-[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102967
-master commit: 80ad8db8a4d9bb24952f0aea788ce6f47566fa76
-master date: 2022-06-15 10:19:32 +0200
----
- xen/drivers/passthrough/x86/hvm.c | 12 ++++++++++++
- 1 file changed, 12 insertions(+)
-
-diff --git a/xen/drivers/passthrough/x86/hvm.c b/xen/drivers/passthrough/x86/hvm.c
-index 0b37cd145b60..ba0f6c53d742 100644
---- a/xen/drivers/passthrough/x86/hvm.c
-+++ b/xen/drivers/passthrough/x86/hvm.c
-@@ -25,6 +25,18 @@
- #include <asm/hvm/support.h>
- #include <asm/io_apic.h>
- 
-+/*
-+ * Gcc12 takes issue with pirq_dpci() being used in boolean context (see gcc
-+ * bug 102967). While we can't replace the macro definition in the header by an
-+ * inline function, we can do so here.
-+ */
-+static inline struct hvm_pirq_dpci *_pirq_dpci(struct pirq *pirq)
-+{
-+    return pirq_dpci(pirq);
-+}
-+#undef pirq_dpci
-+#define pirq_dpci(pirq) _pirq_dpci(pirq)
-+
- static DEFINE_PER_CPU(struct list_head, dpci_list);
- 
- /*
--- 
-2.35.1
-
diff --git a/0034-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch b/0034-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch
deleted file mode 100644
index ac71ab8..0000000
--- a/0034-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From 5cb8142076ce1ce53eafd7e00acb4d0eac4e7784 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
- <marmarek@invisiblethingslab.com>
-Date: Tue, 12 Jul 2022 11:11:35 +0200
-Subject: [PATCH 34/51] ehci-dbgp: fix selecting n-th ehci controller
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The ehci<n> number was parsed but ignored.
-
-Fixes: 322ecbe4ac85 ("console: add EHCI debug port based serial console")
-Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: d6d0cb659fda64430d4649f8680c5cead32da8fd
-master date: 2022-06-16 14:23:37 +0100
----
- xen/drivers/char/ehci-dbgp.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/drivers/char/ehci-dbgp.c b/xen/drivers/char/ehci-dbgp.c
-index c893d246defa..66b4811af24a 100644
---- a/xen/drivers/char/ehci-dbgp.c
-+++ b/xen/drivers/char/ehci-dbgp.c
-@@ -1478,7 +1478,7 @@ void __init ehci_dbgp_init(void)
-         unsigned int num = 0;
- 
-         if ( opt_dbgp[4] )
--            simple_strtoul(opt_dbgp + 4, &e, 10);
-+            num = simple_strtoul(opt_dbgp + 4, &e, 10);
- 
-         dbgp->cap = find_dbgp(dbgp, num);
-         if ( !dbgp->cap )
--- 
-2.35.1
-
diff --git a/0035-tools-xenstored-Harden-corrupt.patch b/0035-tools-xenstored-Harden-corrupt.patch
deleted file mode 100644
index bb0f7f1..0000000
--- a/0035-tools-xenstored-Harden-corrupt.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-From 81ee3d08351be1ef2a14d371993604098d6a4673 Mon Sep 17 00:00:00 2001
-From: Julien Grall <jgrall@amazon.com>
-Date: Tue, 12 Jul 2022 11:12:13 +0200
-Subject: [PATCH 35/51] tools/xenstored: Harden corrupt()
-
-At the moment, corrupt() is neither checking for allocation failure
-nor freeing the allocated memory.
-
-Harden the code by printing ENOMEM if the allocation failed and
-free 'str' after the last use.
-
-This is not considered to be a security issue because corrupt() should
-only be called when Xenstored thinks the database is corrupted. Note
-that the trigger (i.e. a guest reliably provoking the call) would be
-a security issue.
-
-Fixes: 06d17943f0cd ("Added a basic integrity checker, and some basic ability to recover from store")
-Signed-off-by: Julien Grall <jgrall@amazon.com>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-master commit: db3382dd4f468c763512d6bf91c96773395058fb
-master date: 2022-06-23 13:44:10 +0100
----
- tools/xenstore/xenstored_core.c | 5 ++++-
- 1 file changed, 4 insertions(+), 1 deletion(-)
-
-diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
-index 91d093a12ea6..0c8ee276f837 100644
---- a/tools/xenstore/xenstored_core.c
-+++ b/tools/xenstore/xenstored_core.c
-@@ -2087,7 +2087,10 @@ void corrupt(struct connection *conn, const char *fmt, ...)
- 	va_end(arglist);
- 
- 	log("corruption detected by connection %i: err %s: %s",
--	    conn ? (int)conn->id : -1, strerror(saved_errno), str);
-+	    conn ? (int)conn->id : -1, strerror(saved_errno),
-+	    str ?: "ENOMEM");
-+
-+	talloc_free(str);
- 
- 	check_store();
- }
--- 
-2.35.1
-
diff --git a/0036-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch b/0036-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch
deleted file mode 100644
index 8bc0768..0000000
--- a/0036-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch
+++ /dev/null
@@ -1,93 +0,0 @@
-From 09d533f4c80b7eaf9fb4e36ebba8259580857a9d Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 12 Jul 2022 11:12:46 +0200
-Subject: [PATCH 36/51] x86/spec-ctrl: Only adjust MSR_SPEC_CTRL for idle with
- legacy IBRS
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Back at the time of the original Spectre-v2 fixes, it was recommended to clear
-MSR_SPEC_CTRL when going idle.  This is because of the side effects on the
-sibling thread caused by the microcode IBRS and STIBP implementations which
-were retrofitted to existing CPUs.
-
-However, there are no relevant cross-thread impacts for the hardware
-IBRS/STIBP implementations, so this logic should not be used on Intel CPUs
-supporting eIBRS, or any AMD CPUs; doing so only adds unnecessary latency to
-the idle path.
-
-Furthermore, there's no point playing with MSR_SPEC_CTRL in the idle paths if
-SMT is disabled for other reasons.
-
-Fixes: 8d03080d2a33 ("x86/spec-ctrl: Cease using thunk=lfence on AMD")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-master commit: ffc7694e0c99eea158c32aa164b7d1e1bb1dc46b
-master date: 2022-06-30 18:07:13 +0100
----
- xen/arch/x86/spec_ctrl.c          | 10 ++++++++--
- xen/include/asm-x86/cpufeatures.h |  2 +-
- xen/include/asm-x86/spec_ctrl.h   |  5 +++--
- 3 files changed, 12 insertions(+), 5 deletions(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 099113ba41e6..1ed5ceda8b46 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -1150,8 +1150,14 @@ void __init init_speculation_mitigations(void)
-     /* (Re)init BSP state now that default_spec_ctrl_flags has been calculated. */
-     init_shadow_spec_ctrl_state();
- 
--    /* If Xen is using any MSR_SPEC_CTRL settings, adjust the idle path. */
--    if ( default_xen_spec_ctrl )
-+    /*
-+     * For microcoded IBRS only (i.e. Intel, pre eIBRS), it is recommended to
-+     * clear MSR_SPEC_CTRL before going idle, to avoid impacting sibling
-+     * threads.  Activate this if SMT is enabled, and Xen is using a non-zero
-+     * MSR_SPEC_CTRL setting.
-+     */
-+    if ( boot_cpu_has(X86_FEATURE_IBRSB) && !(caps & ARCH_CAPS_IBRS_ALL) &&
-+         hw_smt_enabled && default_xen_spec_ctrl )
-         setup_force_cpu_cap(X86_FEATURE_SC_MSR_IDLE);
- 
-     xpti_init_default(caps);
-diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
-index bd45a144ee78..493d338a085e 100644
---- a/xen/include/asm-x86/cpufeatures.h
-+++ b/xen/include/asm-x86/cpufeatures.h
-@@ -33,7 +33,7 @@ XEN_CPUFEATURE(SC_MSR_HVM,        X86_SYNTH(17)) /* MSR_SPEC_CTRL used by Xen fo
- XEN_CPUFEATURE(SC_RSB_PV,         X86_SYNTH(18)) /* RSB overwrite needed for PV */
- XEN_CPUFEATURE(SC_RSB_HVM,        X86_SYNTH(19)) /* RSB overwrite needed for HVM */
- XEN_CPUFEATURE(XEN_SELFSNOOP,     X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */
--XEN_CPUFEATURE(SC_MSR_IDLE,       X86_SYNTH(21)) /* (SC_MSR_PV || SC_MSR_HVM) && default_xen_spec_ctrl */
-+XEN_CPUFEATURE(SC_MSR_IDLE,       X86_SYNTH(21)) /* Clear MSR_SPEC_CTRL on idle */
- XEN_CPUFEATURE(XEN_LBR,           X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */
- /* Bits 23,24 unused. */
- XEN_CPUFEATURE(SC_VERW_IDLE,      X86_SYNTH(25)) /* VERW used by Xen for idle */
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index 751355f471f4..7e83e0179fb9 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -78,7 +78,8 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info)
-     uint32_t val = 0;
- 
-     /*
--     * Branch Target Injection:
-+     * It is recommended in some cases to clear MSR_SPEC_CTRL when going idle,
-+     * to avoid impacting sibling threads.
-      *
-      * Latch the new shadow value, then enable shadowing, then update the MSR.
-      * There are no SMP issues here; only local processor ordering concerns.
-@@ -114,7 +115,7 @@ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info)
-     uint32_t val = info->xen_spec_ctrl;
- 
-     /*
--     * Branch Target Injection:
-+     * Restore MSR_SPEC_CTRL on exit from idle.
-      *
-      * Disable shadowing before updating the MSR.  There are no SMP issues
-      * here; only local processor ordering concerns.
--- 
-2.35.1
-
diff --git a/0037-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch b/0037-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch
deleted file mode 100644
index 156aa58..0000000
--- a/0037-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch
+++ /dev/null
@@ -1,234 +0,0 @@
-From db6ca8176ccc4ff7dfe3c06969af9ebfab0d7b04 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 12 Jul 2022 11:13:33 +0200
-Subject: [PATCH 37/51] x86/spec-ctrl: Knobs for STIBP and PSFD, and follow
- hardware STIBP hint
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-STIBP and PSFD are slightly weird bits, because they're both implied by other
-bits in MSR_SPEC_CTRL.  Add fine grain controls for them, and take the
-implications into account when setting IBRS/SSBD.
-
-Rearrange the IBPB text/variables/logic to keep all the MSR_SPEC_CTRL bits
-together, for consistency.
-
-However, AMD have a hardware hint CPUID bit recommending that STIBP be set
-unilaterally.  This is advertised on Zen3, so follow the recommendation.
-Furthermore, in such cases, set STIBP behind the guest's back for now.  This
-has negligible overhead for the guest, but saves a WRMSR on vmentry.  This is
-the only default change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-master commit: fef244b179c06fcdfa581f7d57fa6e578c49ff50
-master date: 2022-06-30 18:07:13 +0100
----
- docs/misc/xen-command-line.pandoc | 21 +++++++---
- xen/arch/x86/hvm/svm/vmcb.c       |  9 +++++
- xen/arch/x86/spec_ctrl.c          | 67 ++++++++++++++++++++++++++-----
- 3 files changed, 82 insertions(+), 15 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index a642e43476a2..46e9c58d35cd 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2234,8 +2234,9 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
- 
- ### spec-ctrl (x86)
- > `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb,md-clear}=<bool>,
-->              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,eager-fpu,
-->              l1d-flush,branch-harden,srb-lock,unpriv-mmio}=<bool> ]`
-+>              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
-+>              eager-fpu,l1d-flush,branch-harden,srb-lock,
-+>              unpriv-mmio}=<bool> ]`
- 
- Controls for speculative execution sidechannel mitigations.  By default, Xen
- will pick the most appropriate mitigations based on compiled in support,
-@@ -2285,9 +2286,10 @@ On hardware supporting IBRS (Indirect Branch Restricted Speculation), the
- If Xen is not using IBRS itself, functionality is still set up so IBRS can be
- virtualised for guests.
- 
--On hardware supporting IBPB (Indirect Branch Prediction Barrier), the `ibpb=`
--option can be used to force (the default) or prevent Xen from issuing branch
--prediction barriers on vcpu context switches.
-+On hardware supporting STIBP (Single Thread Indirect Branch Predictors), the
-+`stibp=` option can be used to force or prevent Xen using the feature itself.
-+By default, Xen will use STIBP when IBRS is in use (IBRS implies STIBP), and
-+when hardware hints recommend using it as a blanket setting.
- 
- On hardware supporting SSBD (Speculative Store Bypass Disable), the `ssbd=`
- option can be used to force or prevent Xen using the feature itself.  On AMD
-@@ -2295,6 +2297,15 @@ hardware, this is a global option applied at boot, and not virtualised for
- guest use.  On Intel hardware, the feature is virtualised for guests,
- independently of Xen's choice of setting.
- 
-+On hardware supporting PSFD (Predictive Store Forwarding Disable), the `psfd=`
-+option can be used to force or prevent Xen using the feature itself.  By
-+default, Xen will not use PSFD.  PSFD is implied by SSBD, and SSBD is off by
-+default.
-+
-+On hardware supporting IBPB (Indirect Branch Prediction Barrier), the `ibpb=`
-+option can be used to force (the default) or prevent Xen from issuing branch
-+prediction barriers on vcpu context switches.
-+
- On all hardware, the `eager-fpu=` option can be used to force or prevent Xen
- from using fully eager FPU context switches.  This is currently implemented as
- a global control.  By default, Xen will choose to use fully eager context
-diff --git a/xen/arch/x86/hvm/svm/vmcb.c b/xen/arch/x86/hvm/svm/vmcb.c
-index 565e997155f2..ef7224eb5dd7 100644
---- a/xen/arch/x86/hvm/svm/vmcb.c
-+++ b/xen/arch/x86/hvm/svm/vmcb.c
-@@ -29,6 +29,7 @@
- #include <asm/hvm/support.h>
- #include <asm/hvm/svm/svm.h>
- #include <asm/hvm/svm/svmdebug.h>
-+#include <asm/spec_ctrl.h>
- 
- struct vmcb_struct *alloc_vmcb(void)
- {
-@@ -176,6 +177,14 @@ static int construct_vmcb(struct vcpu *v)
-             vmcb->_pause_filter_thresh = SVM_PAUSETHRESH_INIT;
-     }
- 
-+    /*
-+     * When default_xen_spec_ctrl simply SPEC_CTRL_STIBP, default this behind
-+     * the back of the VM too.  Our SMT topology isn't accurate, the overhead
-+     * is neglegable, and doing this saves a WRMSR on the vmentry path.
-+     */
-+    if ( default_xen_spec_ctrl == SPEC_CTRL_STIBP )
-+        v->arch.msrs->spec_ctrl.raw = SPEC_CTRL_STIBP;
-+
-     return 0;
- }
- 
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 1ed5ceda8b46..dfdd45c358c4 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -48,9 +48,13 @@ static enum ind_thunk {
-     THUNK_LFENCE,
-     THUNK_JMP,
- } opt_thunk __initdata = THUNK_DEFAULT;
-+
- static int8_t __initdata opt_ibrs = -1;
-+int8_t __initdata opt_stibp = -1;
-+bool __read_mostly opt_ssbd;
-+int8_t __initdata opt_psfd = -1;
-+
- bool __read_mostly opt_ibpb = true;
--bool __read_mostly opt_ssbd = false;
- int8_t __read_mostly opt_eager_fpu = -1;
- int8_t __read_mostly opt_l1d_flush = -1;
- static bool __initdata opt_branch_harden = true;
-@@ -172,12 +176,20 @@ static int __init parse_spec_ctrl(const char *s)
-             else
-                 rc = -EINVAL;
-         }
-+
-+        /* Bits in MSR_SPEC_CTRL. */
-         else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 )
-             opt_ibrs = val;
--        else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 )
--            opt_ibpb = val;
-+        else if ( (val = parse_boolean("stibp", s, ss)) >= 0 )
-+            opt_stibp = val;
-         else if ( (val = parse_boolean("ssbd", s, ss)) >= 0 )
-             opt_ssbd = val;
-+        else if ( (val = parse_boolean("psfd", s, ss)) >= 0 )
-+            opt_psfd = val;
-+
-+        /* Misc settings. */
-+        else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 )
-+            opt_ibpb = val;
-         else if ( (val = parse_boolean("eager-fpu", s, ss)) >= 0 )
-             opt_eager_fpu = val;
-         else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 )
-@@ -376,7 +388,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
-                "\n");
- 
-     /* Settings for Xen's protection, irrespective of guests. */
--    printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s, Other:%s%s%s%s%s\n",
-+    printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s\n",
-            thunk == THUNK_NONE      ? "N/A" :
-            thunk == THUNK_RETPOLINE ? "RETPOLINE" :
-            thunk == THUNK_LFENCE    ? "LFENCE" :
-@@ -390,6 +402,9 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
-            (!boot_cpu_has(X86_FEATURE_SSBD) &&
-             !boot_cpu_has(X86_FEATURE_AMD_SSBD))     ? "" :
-            (default_xen_spec_ctrl & SPEC_CTRL_SSBD)  ? " SSBD+" : " SSBD-",
-+           (!boot_cpu_has(X86_FEATURE_PSFD) &&
-+            !boot_cpu_has(X86_FEATURE_INTEL_PSFD))   ? "" :
-+           (default_xen_spec_ctrl & SPEC_CTRL_PSFD)  ? " PSFD+" : " PSFD-",
-            !(caps & ARCH_CAPS_TSX_CTRL)              ? "" :
-            (opt_tsx & 1)                             ? " TSX+" : " TSX-",
-            !cpu_has_srbds_ctrl                       ? "" :
-@@ -979,10 +994,7 @@ void __init init_speculation_mitigations(void)
-         if ( !has_spec_ctrl )
-             printk(XENLOG_WARNING "?!? CET active, but no MSR_SPEC_CTRL?\n");
-         else if ( opt_ibrs == -1 )
--        {
-             opt_ibrs = ibrs = true;
--            default_xen_spec_ctrl |= SPEC_CTRL_IBRS | SPEC_CTRL_STIBP;
--        }
- 
-         if ( opt_thunk == THUNK_DEFAULT || opt_thunk == THUNK_RETPOLINE )
-             thunk = THUNK_JMP;
-@@ -1086,14 +1098,49 @@ void __init init_speculation_mitigations(void)
-             setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM);
-     }
- 
--    /* If we have IBRS available, see whether we should use it. */
-+    /* Figure out default_xen_spec_ctrl. */
-     if ( has_spec_ctrl && ibrs )
--        default_xen_spec_ctrl |= SPEC_CTRL_IBRS;
-+    {
-+        /* IBRS implies STIBP.  */
-+        if ( opt_stibp == -1 )
-+            opt_stibp = 1;
-+
-+        default_xen_spec_ctrl |= SPEC_CTRL_IBRS;
-+    }
-+
-+    /*
-+     * Use STIBP by default if the hardware hint is set.  Otherwise, leave it
-+     * off as it a severe performance pentalty on pre-eIBRS Intel hardware
-+     * where it was retrofitted in microcode.
-+     */
-+    if ( opt_stibp == -1 )
-+        opt_stibp = !!boot_cpu_has(X86_FEATURE_STIBP_ALWAYS);
-+
-+    if ( opt_stibp && (boot_cpu_has(X86_FEATURE_STIBP) ||
-+                       boot_cpu_has(X86_FEATURE_AMD_STIBP)) )
-+        default_xen_spec_ctrl |= SPEC_CTRL_STIBP;
- 
--    /* If we have SSBD available, see whether we should use it. */
-     if ( opt_ssbd && (boot_cpu_has(X86_FEATURE_SSBD) ||
-                       boot_cpu_has(X86_FEATURE_AMD_SSBD)) )
-+    {
-+        /* SSBD implies PSFD */
-+        if ( opt_psfd == -1 )
-+            opt_psfd = 1;
-+
-         default_xen_spec_ctrl |= SPEC_CTRL_SSBD;
-+    }
-+
-+    /*
-+     * Don't use PSFD by default.  AMD designed the predictor to
-+     * auto-clear on privilege change.  PSFD is implied by SSBD, which is
-+     * off by default.
-+     */
-+    if ( opt_psfd == -1 )
-+        opt_psfd = 0;
-+
-+    if ( opt_psfd && (boot_cpu_has(X86_FEATURE_PSFD) ||
-+                      boot_cpu_has(X86_FEATURE_INTEL_PSFD)) )
-+        default_xen_spec_ctrl |= SPEC_CTRL_PSFD;
- 
-     /*
-      * PV guests can create RSB entries for any linear address they control,
--- 
-2.35.1
-
diff --git a/0038-libxc-fix-compilation-error-with-gcc13.patch b/0038-libxc-fix-compilation-error-with-gcc13.patch
deleted file mode 100644
index 8056742..0000000
--- a/0038-libxc-fix-compilation-error-with-gcc13.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From cd3d6b4cd46cd05590805b4a6c0b6654af60106e Mon Sep 17 00:00:00 2001
-From: Charles Arnold <carnold@suse.com>
-Date: Tue, 12 Jul 2022 11:14:07 +0200
-Subject: [PATCH 38/51] libxc: fix compilation error with gcc13
-
-xc_psr.c:161:5: error: conflicting types for 'xc_psr_cmt_get_data'
-due to enum/integer mismatch;
-
-Signed-off-by: Charles Arnold <carnold@suse.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: 8eeae8c2b4efefda8e946461e86cf2ae9c18e5a9
-master date: 2022-07-06 13:06:40 +0200
----
- tools/include/xenctrl.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
-index 07b96e6671a5..893ae39e4a95 100644
---- a/tools/include/xenctrl.h
-+++ b/tools/include/xenctrl.h
-@@ -2516,7 +2516,7 @@ int xc_psr_cmt_get_l3_event_mask(xc_interface *xch, uint32_t *event_mask);
- int xc_psr_cmt_get_l3_cache_size(xc_interface *xch, uint32_t cpu,
-                                  uint32_t *l3_cache_size);
- int xc_psr_cmt_get_data(xc_interface *xch, uint32_t rmid, uint32_t cpu,
--                        uint32_t psr_cmt_type, uint64_t *monitor_data,
-+                        xc_psr_cmt_type type, uint64_t *monitor_data,
-                         uint64_t *tsc);
- int xc_psr_cmt_enabled(xc_interface *xch);
- 
--- 
-2.35.1
-
diff --git a/0039-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch b/0039-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch
deleted file mode 100644
index 1797a8f..0000000
--- a/0039-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-From 61b9c2ceeb94b0cdaff01023cc5523b1f13e66e2 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 12 Jul 2022 11:14:34 +0200
-Subject: [PATCH 39/51] x86/spec-ctrl: Honour spec-ctrl=0 for unpriv-mmio
- sub-option
-
-This was an oversight from when unpriv-mmio was introduced.
-
-Fixes: 8c24b70fedcb ("x86/spec-ctrl: Add spec-ctrl=unpriv-mmio")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 4cdb519d797c19ebb8fadc5938cdb47479d5a21b
-master date: 2022-07-11 15:21:35 +0100
----
- xen/arch/x86/spec_ctrl.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index dfdd45c358c4..ae74943c1053 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -122,6 +122,7 @@ static int __init parse_spec_ctrl(const char *s)
-             opt_l1d_flush = 0;
-             opt_branch_harden = false;
-             opt_srb_lock = 0;
-+            opt_unpriv_mmio = false;
-         }
-         else if ( val > 0 )
-             rc = -EINVAL;
--- 
-2.35.1
-
diff --git a/0040-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch b/0040-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch
deleted file mode 100644
index 3512590..0000000
--- a/0040-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch
+++ /dev/null
@@ -1,87 +0,0 @@
-From eec5b02403a9df2523527caad24f17af5060fbe7 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 12 Jul 2022 11:15:03 +0200
-Subject: [PATCH 40/51] xen/cmdline: Extend parse_boolean() to signal a name
- match
-
-This will help parsing a sub-option which has boolean and non-boolean options
-available.
-
-First, rework 'int val' into 'bool has_neg_prefix'.  This inverts it's value,
-but the resulting logic is far easier to follow.
-
-Second, reject anything of the form 'no-$FOO=' which excludes ambiguous
-constructs such as 'no-$foo=yes' which have never been valid.
-
-This just leaves the case where everything is otherwise fine, but parse_bool()
-can't interpret the provided string.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 382326cac528dd1eb0d04efd5c05363c453e29f4
-master date: 2022-07-11 15:21:35 +0100
----
- xen/common/kernel.c   | 20 ++++++++++++++++----
- xen/include/xen/lib.h |  3 ++-
- 2 files changed, 18 insertions(+), 5 deletions(-)
-
-diff --git a/xen/common/kernel.c b/xen/common/kernel.c
-index e119e5401f9d..7ed96521f97a 100644
---- a/xen/common/kernel.c
-+++ b/xen/common/kernel.c
-@@ -272,9 +272,9 @@ int parse_bool(const char *s, const char *e)
- int parse_boolean(const char *name, const char *s, const char *e)
- {
-     size_t slen, nlen;
--    int val = !!strncmp(s, "no-", 3);
-+    bool has_neg_prefix = !strncmp(s, "no-", 3);
- 
--    if ( !val )
-+    if ( has_neg_prefix )
-         s += 3;
- 
-     slen = e ? ({ ASSERT(e >= s); e - s; }) : strlen(s);
-@@ -286,11 +286,23 @@ int parse_boolean(const char *name, const char *s, const char *e)
- 
-     /* Exact, unadorned name?  Result depends on the 'no-' prefix. */
-     if ( slen == nlen )
--        return val;
-+        return !has_neg_prefix;
-+
-+    /* Inexact match with a 'no-' prefix?  Not valid. */
-+    if ( has_neg_prefix )
-+        return -1;
- 
-     /* =$SOMETHING?  Defer to the regular boolean parsing. */
-     if ( s[nlen] == '=' )
--        return parse_bool(&s[nlen + 1], e);
-+    {
-+        int b = parse_bool(&s[nlen + 1], e);
-+
-+        if ( b >= 0 )
-+            return b;
-+
-+        /* Not a boolean, but the name matched.  Signal specially. */
-+        return -2;
-+    }
- 
-     /* Unrecognised.  Give up. */
-     return -1;
-diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h
-index c6987973bf88..2296044caf79 100644
---- a/xen/include/xen/lib.h
-+++ b/xen/include/xen/lib.h
-@@ -80,7 +80,8 @@ int parse_bool(const char *s, const char *e);
- /**
-  * Given a specific name, parses a string of the form:
-  *   [no-]$NAME[=...]
-- * returning 0 or 1 for a recognised boolean, or -1 for an error.
-+ * returning 0 or 1 for a recognised boolean.  Returns -1 for general errors,
-+ * and -2 for "not a boolean, but $NAME= matches".
-  */
- int parse_boolean(const char *name, const char *s, const char *e);
- 
--- 
-2.35.1
-
diff --git a/0041-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch b/0041-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch
deleted file mode 100644
index 9964bb9..0000000
--- a/0041-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch
+++ /dev/null
@@ -1,137 +0,0 @@
-From f066c8bb3e5686141cef6fa1dc86ea9f37c5388a Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 12 Jul 2022 11:15:37 +0200
-Subject: [PATCH 41/51] x86/spec-ctrl: Add fine-grained cmdline suboptions for
- primitives
-
-Support controling the PV/HVM suboption of msr-sc/rsb/md-clear, which
-previously wasn't possible.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 27357c394ba6e1571a89105b840ce1c6f026485c
-master date: 2022-07-11 15:21:35 +0100
----
- docs/misc/xen-command-line.pandoc | 12 ++++--
- xen/arch/x86/spec_ctrl.c          | 66 ++++++++++++++++++++++++++-----
- 2 files changed, 66 insertions(+), 12 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index 46e9c58d35cd..1bbdb55129cc 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2233,7 +2233,8 @@ not be able to control the state of the mitigation.
- By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
- 
- ### spec-ctrl (x86)
--> `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb,md-clear}=<bool>,
-+> `= List of [ <bool>, xen=<bool>, {pv,hvm}=<bool>,
-+>              {msr-sc,rsb,md-clear}=<bool>|{pv,hvm}=<bool>,
- >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
- >              eager-fpu,l1d-flush,branch-harden,srb-lock,
- >              unpriv-mmio}=<bool> ]`
-@@ -2258,12 +2259,17 @@ in place for guests to use.
- 
- Use of a positive boolean value for either of these options is invalid.
- 
--The booleans `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` offer fine
-+The `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` options offer fine
- grained control over the primitives by Xen.  These impact Xen's ability to
--protect itself, and Xen's ability to virtualise support for guests to use.
-+protect itself, and/or Xen's ability to virtualise support for guests to use.
- 
- * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests
-   respectively.
-+* Each other option can be used either as a plain boolean
-+  (e.g. `spec-ctrl=rsb` to control both the PV and HVM sub-options), or with
-+  `pv=` or `hvm=` subsuboptions (e.g. `spec-ctrl=rsb=no-hvm` to disable HVM
-+  RSB only).
-+
- * `msr-sc=` offers control over Xen's support for manipulating `MSR_SPEC_CTRL`
-   on entry and exit.  These blocks are necessary to virtualise support for
-   guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc.
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index ae74943c1053..9507e5da60a9 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -147,20 +147,68 @@ static int __init parse_spec_ctrl(const char *s)
-             opt_rsb_hvm = val;
-             opt_md_clear_hvm = val;
-         }
--        else if ( (val = parse_boolean("msr-sc", s, ss)) >= 0 )
-+        else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 )
-         {
--            opt_msr_sc_pv = val;
--            opt_msr_sc_hvm = val;
-+            switch ( val )
-+            {
-+            case 0:
-+            case 1:
-+                opt_msr_sc_pv = opt_msr_sc_hvm = val;
-+                break;
-+
-+            case -2:
-+                s += strlen("msr-sc=");
-+                if ( (val = parse_boolean("pv", s, ss)) >= 0 )
-+                    opt_msr_sc_pv = val;
-+                else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
-+                    opt_msr_sc_hvm = val;
-+                else
-+            default:
-+                    rc = -EINVAL;
-+                break;
-+            }
-         }
--        else if ( (val = parse_boolean("rsb", s, ss)) >= 0 )
-+        else if ( (val = parse_boolean("rsb", s, ss)) != -1 )
-         {
--            opt_rsb_pv = val;
--            opt_rsb_hvm = val;
-+            switch ( val )
-+            {
-+            case 0:
-+            case 1:
-+                opt_rsb_pv = opt_rsb_hvm = val;
-+                break;
-+
-+            case -2:
-+                s += strlen("rsb=");
-+                if ( (val = parse_boolean("pv", s, ss)) >= 0 )
-+                    opt_rsb_pv = val;
-+                else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
-+                    opt_rsb_hvm = val;
-+                else
-+            default:
-+                    rc = -EINVAL;
-+                break;
-+            }
-         }
--        else if ( (val = parse_boolean("md-clear", s, ss)) >= 0 )
-+        else if ( (val = parse_boolean("md-clear", s, ss)) != -1 )
-         {
--            opt_md_clear_pv = val;
--            opt_md_clear_hvm = val;
-+            switch ( val )
-+            {
-+            case 0:
-+            case 1:
-+                opt_md_clear_pv = opt_md_clear_hvm = val;
-+                break;
-+
-+            case -2:
-+                s += strlen("md-clear=");
-+                if ( (val = parse_boolean("pv", s, ss)) >= 0 )
-+                    opt_md_clear_pv = val;
-+                else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
-+                    opt_md_clear_hvm = val;
-+                else
-+            default:
-+                    rc = -EINVAL;
-+                break;
-+            }
-         }
- 
-         /* Xen's speculative sidechannel mitigation settings. */
--- 
-2.35.1
-
diff --git a/0042-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch b/0042-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch
deleted file mode 100644
index eea790a..0000000
--- a/0042-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From 14fd97e3de939a63a6e467f240efb49fe226a5dc Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Tue, 12 Jul 2022 11:16:10 +0200
-Subject: [PATCH 42/51] tools/helpers: fix build of xen-init-dom0 with -Werror
-
-Missing prototype of asprintf() without _GNU_SOURCE.
-
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Reviewed-by: Henry Wang <Henry.Wang@arm.com>
-master commit: d693b22733044d68e9974766b5c9e6259c9b1708
-master date: 2022-07-12 08:38:35 +0200
----
- tools/helpers/xen-init-dom0.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/tools/helpers/xen-init-dom0.c b/tools/helpers/xen-init-dom0.c
-index c99224a4b607..b4861c9e8041 100644
---- a/tools/helpers/xen-init-dom0.c
-+++ b/tools/helpers/xen-init-dom0.c
-@@ -1,3 +1,5 @@
-+#define _GNU_SOURCE
-+
- #include <stdlib.h>
- #include <stdint.h>
- #include <string.h>
--- 
-2.35.1
-
diff --git a/0043-libxl-check-return-value-of-libxl__xs_directory-in-n.patch b/0043-libxl-check-return-value-of-libxl__xs_directory-in-n.patch
deleted file mode 100644
index 0c2470a..0000000
--- a/0043-libxl-check-return-value-of-libxl__xs_directory-in-n.patch
+++ /dev/null
@@ -1,38 +0,0 @@
-From 744accad1b73223b3261e3e678e16e030d83b179 Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Tue, 12 Jul 2022 11:16:30 +0200
-Subject: [PATCH 43/51] libxl: check return value of libxl__xs_directory in
- name2bdf
-
-libxl__xs_directory() can potentially return NULL without setting `n`.
-As `n` isn't initialised, we need to check libxl__xs_directory()
-return value before checking `n`. Otherwise, `n` might be non-zero
-with `bdfs` NULL which would lead to a segv.
-
-Fixes: 57bff091f4 ("libxl: add 'name' field to 'libxl_device_pci' in the IDL...")
-Reported-by: "G.R." <firemeteor@users.sourceforge.net>
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-Tested-by: "G.R." <firemeteor@users.sourceforge.net>
-master commit: d778089ac70e5b8e3bdea0c85fc8c0b9ed0eaf2f
-master date: 2022-07-12 08:38:51 +0200
----
- tools/libs/light/libxl_pci.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/libs/light/libxl_pci.c b/tools/libs/light/libxl_pci.c
-index 4bbbfe9f168f..ce3bf7c0ae81 100644
---- a/tools/libs/light/libxl_pci.c
-+++ b/tools/libs/light/libxl_pci.c
-@@ -859,7 +859,7 @@ static int name2bdf(libxl__gc *gc, libxl_device_pci *pci)
-     int rc = ERROR_NOTFOUND;
- 
-     bdfs = libxl__xs_directory(gc, XBT_NULL, PCI_INFO_PATH, &n);
--    if (!n)
-+    if (!bdfs || !n)
-         goto out;
- 
-     for (i = 0; i < n; i++) {
--- 
-2.35.1
-
diff --git a/0044-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch b/0044-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch
deleted file mode 100644
index d8517f8..0000000
--- a/0044-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch
+++ /dev/null
@@ -1,167 +0,0 @@
-From 3a280cbae7022b83af91c27a8e2211ba3b1234f5 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 1 Jul 2022 15:59:40 +0100
-Subject: [PATCH 44/51] x86/spec-ctrl: Rework spec_ctrl_flags context switching
-
-We are shortly going to need to context switch new bits in both the vcpu and
-S3 paths.  Introduce SCF_IST_MASK and SCF_DOM_MASK, and rework d->arch.verw
-into d->arch.spec_ctrl_flags to accommodate.
-
-No functional change.
-
-This is part of XSA-407.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 5796912f7279d9348a3166655588d30eae9f72cc)
----
- xen/arch/x86/acpi/power.c           |  8 ++++----
- xen/arch/x86/domain.c               |  8 ++++----
- xen/arch/x86/spec_ctrl.c            |  9 ++++++---
- xen/include/asm-x86/domain.h        |  3 +--
- xen/include/asm-x86/spec_ctrl.h     | 30 ++++++++++++++++++++++++++++-
- xen/include/asm-x86/spec_ctrl_asm.h |  3 ---
- 6 files changed, 44 insertions(+), 17 deletions(-)
-
-diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
-index 5eaa77f66a28..dd397f713067 100644
---- a/xen/arch/x86/acpi/power.c
-+++ b/xen/arch/x86/acpi/power.c
-@@ -248,8 +248,8 @@ static int enter_state(u32 state)
-         error = 0;
- 
-     ci = get_cpu_info();
--    /* Avoid NMI/#MC using MSR_SPEC_CTRL until we've reloaded microcode. */
--    ci->spec_ctrl_flags &= ~SCF_ist_wrmsr;
-+    /* Avoid NMI/#MC using unsafe MSRs until we've reloaded microcode. */
-+    ci->spec_ctrl_flags &= ~SCF_IST_MASK;
- 
-     ACPI_FLUSH_CPU_CACHE();
- 
-@@ -292,8 +292,8 @@ static int enter_state(u32 state)
-     if ( !recheck_cpu_features(0) )
-         panic("Missing previously available feature(s)\n");
- 
--    /* Re-enabled default NMI/#MC use of MSR_SPEC_CTRL. */
--    ci->spec_ctrl_flags |= (default_spec_ctrl_flags & SCF_ist_wrmsr);
-+    /* Re-enabled default NMI/#MC use of MSRs now microcode is loaded. */
-+    ci->spec_ctrl_flags |= (default_spec_ctrl_flags & SCF_IST_MASK);
- 
-     if ( boot_cpu_has(X86_FEATURE_IBRSB) || boot_cpu_has(X86_FEATURE_IBRS) )
-     {
-diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
-index 1fe6644a71ae..82a0b73cf6ef 100644
---- a/xen/arch/x86/domain.c
-+++ b/xen/arch/x86/domain.c
-@@ -2092,10 +2092,10 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
-             }
-         }
- 
--        /* Update the top-of-stack block with the VERW disposition. */
--        info->spec_ctrl_flags &= ~SCF_verw;
--        if ( nextd->arch.verw )
--            info->spec_ctrl_flags |= SCF_verw;
-+        /* Update the top-of-stack block with the new spec_ctrl settings. */
-+        info->spec_ctrl_flags =
-+            (info->spec_ctrl_flags       & ~SCF_DOM_MASK) |
-+            (nextd->arch.spec_ctrl_flags &  SCF_DOM_MASK);
-     }
- 
-     sched_context_switched(prev, next);
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 9507e5da60a9..7e646680f1c7 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -1010,9 +1010,12 @@ void spec_ctrl_init_domain(struct domain *d)
- {
-     bool pv = is_pv_domain(d);
- 
--    d->arch.verw =
--        (pv ? opt_md_clear_pv : opt_md_clear_hvm) ||
--        (opt_fb_clear_mmio && is_iommu_enabled(d));
-+    bool verw = ((pv ? opt_md_clear_pv : opt_md_clear_hvm) ||
-+                 (opt_fb_clear_mmio && is_iommu_enabled(d)));
-+
-+    d->arch.spec_ctrl_flags =
-+        (verw   ? SCF_verw         : 0) |
-+        0;
- }
- 
- void __init init_speculation_mitigations(void)
-diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
-index 2398a1d99da9..e4c099262cb7 100644
---- a/xen/include/asm-x86/domain.h
-+++ b/xen/include/asm-x86/domain.h
-@@ -319,8 +319,7 @@ struct arch_domain
-     uint32_t pci_cf8;
-     uint8_t cmos_idx;
- 
--    /* Use VERW on return-to-guest for its flushing side effect. */
--    bool verw;
-+    uint8_t spec_ctrl_flags; /* See SCF_DOM_MASK */
- 
-     union {
-         struct pv_domain pv;
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index 7e83e0179fb9..3cd72e40305f 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -20,12 +20,40 @@
- #ifndef __X86_SPEC_CTRL_H__
- #define __X86_SPEC_CTRL_H__
- 
--/* Encoding of cpuinfo.spec_ctrl_flags */
-+/*
-+ * Encoding of:
-+ *   cpuinfo.spec_ctrl_flags
-+ *   default_spec_ctrl_flags
-+ *   domain.spec_ctrl_flags
-+ *
-+ * Live settings are in the top-of-stack block, because they need to be
-+ * accessable when XPTI is active.  Some settings are fixed from boot, some
-+ * context switched per domain, and some inhibited in the S3 path.
-+ */
- #define SCF_use_shadow (1 << 0)
- #define SCF_ist_wrmsr  (1 << 1)
- #define SCF_ist_rsb    (1 << 2)
- #define SCF_verw       (1 << 3)
- 
-+/*
-+ * The IST paths (NMI/#MC) can interrupt any arbitrary context.  Some
-+ * functionality requires updated microcode to work.
-+ *
-+ * On boot, this is easy; we load microcode before figuring out which
-+ * speculative protections to apply.  However, on the S3 resume path, we must
-+ * be able to disable the configured mitigations until microcode is reloaded.
-+ *
-+ * These are the controls to inhibit on the S3 resume path until microcode has
-+ * been reloaded.
-+ */
-+#define SCF_IST_MASK (SCF_ist_wrmsr)
-+
-+/*
-+ * Some speculative protections are per-domain.  These settings are merged
-+ * into the top-of-stack block in the context switch path.
-+ */
-+#define SCF_DOM_MASK (SCF_verw)
-+
- #ifndef __ASSEMBLY__
- 
- #include <asm/alternative.h>
-diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
-index 5a590bac44aa..66b00d511fc6 100644
---- a/xen/include/asm-x86/spec_ctrl_asm.h
-+++ b/xen/include/asm-x86/spec_ctrl_asm.h
-@@ -248,9 +248,6 @@
- 
- /*
-  * Use in IST interrupt/exception context.  May interrupt Xen or PV context.
-- * Fine grain control of SCF_ist_wrmsr is needed for safety in the S3 resume
-- * path to avoid using MSR_SPEC_CTRL before the microcode introducing it has
-- * been reloaded.
-  */
- .macro SPEC_CTRL_ENTRY_FROM_INTR_IST
- /*
--- 
-2.35.1
-
diff --git a/0045-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch b/0045-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch
deleted file mode 100644
index 5b841a6..0000000
--- a/0045-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch
+++ /dev/null
@@ -1,110 +0,0 @@
-From 31aa2a20bfefc3a8a200da54a56471bf99f9630e Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 28 Jun 2022 14:36:56 +0100
-Subject: [PATCH 45/51] x86/spec-ctrl: Rename SCF_ist_wrmsr to SCF_ist_sc_msr
-
-We are about to introduce SCF_ist_ibpb, at which point SCF_ist_wrmsr becomes
-ambiguous.
-
-No functional change.
-
-This is part of XSA-407.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 76d6a36f645dfdbad8830559d4d52caf36efc75e)
----
- xen/arch/x86/spec_ctrl.c            | 6 +++---
- xen/include/asm-x86/spec_ctrl.h     | 4 ++--
- xen/include/asm-x86/spec_ctrl_asm.h | 8 ++++----
- 3 files changed, 9 insertions(+), 9 deletions(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 7e646680f1c7..89f95c083e1b 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -1115,7 +1115,7 @@ void __init init_speculation_mitigations(void)
-     {
-         if ( opt_msr_sc_pv )
-         {
--            default_spec_ctrl_flags |= SCF_ist_wrmsr;
-+            default_spec_ctrl_flags |= SCF_ist_sc_msr;
-             setup_force_cpu_cap(X86_FEATURE_SC_MSR_PV);
-         }
- 
-@@ -1126,7 +1126,7 @@ void __init init_speculation_mitigations(void)
-              * Xen's value is not restored atomically.  An early NMI hitting
-              * the VMExit path needs to restore Xen's value for safety.
-              */
--            default_spec_ctrl_flags |= SCF_ist_wrmsr;
-+            default_spec_ctrl_flags |= SCF_ist_sc_msr;
-             setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM);
-         }
-     }
-@@ -1139,7 +1139,7 @@ void __init init_speculation_mitigations(void)
-          * on real hardware matches the availability of MSR_SPEC_CTRL in the
-          * first place.
-          *
--         * No need for SCF_ist_wrmsr because Xen's value is restored
-+         * No need for SCF_ist_sc_msr because Xen's value is restored
-          * atomically WRT NMIs in the VMExit path.
-          *
-          * TODO: Adjust cpu_has_svm_spec_ctrl to be usable earlier on boot.
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index 3cd72e40305f..f8f0ac47e759 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -31,7 +31,7 @@
-  * context switched per domain, and some inhibited in the S3 path.
-  */
- #define SCF_use_shadow (1 << 0)
--#define SCF_ist_wrmsr  (1 << 1)
-+#define SCF_ist_sc_msr (1 << 1)
- #define SCF_ist_rsb    (1 << 2)
- #define SCF_verw       (1 << 3)
- 
-@@ -46,7 +46,7 @@
-  * These are the controls to inhibit on the S3 resume path until microcode has
-  * been reloaded.
-  */
--#define SCF_IST_MASK (SCF_ist_wrmsr)
-+#define SCF_IST_MASK (SCF_ist_sc_msr)
- 
- /*
-  * Some speculative protections are per-domain.  These settings are merged
-diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
-index 66b00d511fc6..0ff1b118f882 100644
---- a/xen/include/asm-x86/spec_ctrl_asm.h
-+++ b/xen/include/asm-x86/spec_ctrl_asm.h
-@@ -266,8 +266,8 @@
- 
- .L\@_skip_rsb:
- 
--    test $SCF_ist_wrmsr, %al
--    jz .L\@_skip_wrmsr
-+    test $SCF_ist_sc_msr, %al
-+    jz .L\@_skip_msr_spec_ctrl
- 
-     xor %edx, %edx
-     testb $3, UREGS_cs(%rsp)
-@@ -290,7 +290,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
-      * to speculate around the WRMSR.  As a result, we need a dispatch
-      * serialising instruction in the else clause.
-      */
--.L\@_skip_wrmsr:
-+.L\@_skip_msr_spec_ctrl:
-     lfence
-     UNLIKELY_END(\@_serialise)
- .endm
-@@ -301,7 +301,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
-  * Requires %rbx=stack_end
-  * Clobbers %rax, %rcx, %rdx
-  */
--    testb $SCF_ist_wrmsr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
-+    testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
-     jz .L\@_skip
- 
-     DO_SPEC_CTRL_EXIT_TO_XEN
--- 
-2.35.1
-
diff --git a/0046-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch b/0046-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch
deleted file mode 100644
index a950639..0000000
--- a/0046-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch
+++ /dev/null
@@ -1,97 +0,0 @@
-From e7671561c84322860875745e57b228a7a310f2bf Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Mon, 4 Jul 2022 21:32:17 +0100
-Subject: [PATCH 46/51] x86/spec-ctrl: Rename opt_ibpb to opt_ibpb_ctxt_switch
-
-We are about to introduce the use of IBPB at different points in Xen, making
-opt_ibpb ambiguous.  Rename it to opt_ibpb_ctxt_switch.
-
-No functional change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit a8e5ef079d6f5c88c472e3e620db5a8d1402a50d)
----
- xen/arch/x86/domain.c           |  2 +-
- xen/arch/x86/spec_ctrl.c        | 10 +++++-----
- xen/include/asm-x86/spec_ctrl.h |  2 +-
- 3 files changed, 7 insertions(+), 7 deletions(-)
-
-diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
-index 82a0b73cf6ef..0d39981550ca 100644
---- a/xen/arch/x86/domain.c
-+++ b/xen/arch/x86/domain.c
-@@ -2064,7 +2064,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
- 
-         ctxt_switch_levelling(next);
- 
--        if ( opt_ibpb && !is_idle_domain(nextd) )
-+        if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) )
-         {
-             static DEFINE_PER_CPU(unsigned int, last);
-             unsigned int *last_id = &this_cpu(last);
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 89f95c083e1b..f4ae36eae2d0 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -54,7 +54,7 @@ int8_t __initdata opt_stibp = -1;
- bool __read_mostly opt_ssbd;
- int8_t __initdata opt_psfd = -1;
- 
--bool __read_mostly opt_ibpb = true;
-+bool __read_mostly opt_ibpb_ctxt_switch = true;
- int8_t __read_mostly opt_eager_fpu = -1;
- int8_t __read_mostly opt_l1d_flush = -1;
- static bool __initdata opt_branch_harden = true;
-@@ -117,7 +117,7 @@ static int __init parse_spec_ctrl(const char *s)
- 
-             opt_thunk = THUNK_JMP;
-             opt_ibrs = 0;
--            opt_ibpb = false;
-+            opt_ibpb_ctxt_switch = false;
-             opt_ssbd = false;
-             opt_l1d_flush = 0;
-             opt_branch_harden = false;
-@@ -238,7 +238,7 @@ static int __init parse_spec_ctrl(const char *s)
- 
-         /* Misc settings. */
-         else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 )
--            opt_ibpb = val;
-+            opt_ibpb_ctxt_switch = val;
-         else if ( (val = parse_boolean("eager-fpu", s, ss)) >= 0 )
-             opt_eager_fpu = val;
-         else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 )
-@@ -458,7 +458,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
-            (opt_tsx & 1)                             ? " TSX+" : " TSX-",
-            !cpu_has_srbds_ctrl                       ? "" :
-            opt_srb_lock                              ? " SRB_LOCK+" : " SRB_LOCK-",
--           opt_ibpb                                  ? " IBPB"  : "",
-+           opt_ibpb_ctxt_switch                      ? " IBPB-ctxt" : "",
-            opt_l1d_flush                             ? " L1D_FLUSH" : "",
-            opt_md_clear_pv || opt_md_clear_hvm ||
-            opt_fb_clear_mmio                         ? " VERW"  : "",
-@@ -1240,7 +1240,7 @@ void __init init_speculation_mitigations(void)
- 
-     /* Check we have hardware IBPB support before using it... */
-     if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) )
--        opt_ibpb = false;
-+        opt_ibpb_ctxt_switch = false;
- 
-     /* Check whether Eager FPU should be enabled by default. */
-     if ( opt_eager_fpu == -1 )
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index f8f0ac47e759..fb4365575620 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -63,7 +63,7 @@
- void init_speculation_mitigations(void);
- void spec_ctrl_init_domain(struct domain *d);
- 
--extern bool opt_ibpb;
-+extern bool opt_ibpb_ctxt_switch;
- extern bool opt_ssbd;
- extern int8_t opt_eager_fpu;
- extern int8_t opt_l1d_flush;
--- 
-2.35.1
-
diff --git a/0047-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch b/0047-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch
deleted file mode 100644
index 3ce9fd9..0000000
--- a/0047-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch
+++ /dev/null
@@ -1,106 +0,0 @@
-From 2a9e690a0ad5d54dca4166e089089a07bbe7fc85 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 1 Jul 2022 15:59:40 +0100
-Subject: [PATCH 47/51] x86/spec-ctrl: Rework SPEC_CTRL_ENTRY_FROM_INTR_IST
-
-We are shortly going to add a conditional IBPB in this path.
-
-Therefore, we cannot hold spec_ctrl_flags in %eax, and rely on only clobbering
-it after we're done with its contents.  %rbx is available for use, and the
-more normal register to hold preserved information in.
-
-With %rax freed up, use it instead of %rdx for the RSB tmp register, and for
-the adjustment to spec_ctrl_flags.
-
-This leaves no use of %rdx, except as 0 for the upper half of WRMSR.  In
-practice, %rdx is 0 from SAVE_ALL on all paths and isn't likely to change in
-the foreseeable future, so update the macro entry requirements to state this
-dependency.  This marginal optimisation can be revisited if circumstances
-change.
-
-No practical change.
-
-This is part of XSA-407.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit e9b8d31981f184c6539f91ec54bd9cae29cdae36)
----
- xen/arch/x86/x86_64/entry.S         |  4 ++--
- xen/include/asm-x86/spec_ctrl_asm.h | 21 ++++++++++-----------
- 2 files changed, 12 insertions(+), 13 deletions(-)
-
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index 2a86938f1f32..a1810bf4d311 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -932,7 +932,7 @@ ENTRY(double_fault)
- 
-         GET_STACK_END(14)
- 
--        SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, Clob: acd */
-+        SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: abcd */
-         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
- 
-         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx
-@@ -968,7 +968,7 @@ handle_ist_exception:
- 
-         GET_STACK_END(14)
- 
--        SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, Clob: acd */
-+        SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: abcd */
-         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
- 
-         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
-diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
-index 0ff1b118f882..15e24cde00d1 100644
---- a/xen/include/asm-x86/spec_ctrl_asm.h
-+++ b/xen/include/asm-x86/spec_ctrl_asm.h
-@@ -251,34 +251,33 @@
-  */
- .macro SPEC_CTRL_ENTRY_FROM_INTR_IST
- /*
-- * Requires %rsp=regs, %r14=stack_end
-- * Clobbers %rax, %rcx, %rdx
-+ * Requires %rsp=regs, %r14=stack_end, %rdx=0
-+ * Clobbers %rax, %rbx, %rcx, %rdx
-  *
-  * This is logical merge of DO_OVERWRITE_RSB and DO_SPEC_CTRL_ENTRY
-  * maybexen=1, but with conditionals rather than alternatives.
-  */
--    movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %eax
-+    movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx
- 
--    test $SCF_ist_rsb, %al
-+    test $SCF_ist_rsb, %bl
-     jz .L\@_skip_rsb
- 
--    DO_OVERWRITE_RSB tmp=rdx /* Clobbers %rcx/%rdx */
-+    DO_OVERWRITE_RSB         /* Clobbers %rax/%rcx */
- 
- .L\@_skip_rsb:
- 
--    test $SCF_ist_sc_msr, %al
-+    test $SCF_ist_sc_msr, %bl
-     jz .L\@_skip_msr_spec_ctrl
- 
--    xor %edx, %edx
-+    xor %eax, %eax
-     testb $3, UREGS_cs(%rsp)
--    setnz %dl
--    not %edx
--    and %dl, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
-+    setnz %al
-+    not %eax
-+    and %al, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
- 
-     /* Load Xen's intended value. */
-     mov $MSR_SPEC_CTRL, %ecx
-     movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax
--    xor %edx, %edx
-     wrmsr
- 
-     /* Opencoded UNLIKELY_START() with no condition. */
--- 
-2.35.1
-
diff --git a/0048-x86-spec-ctrl-Support-IBPB-on-entry.patch b/0048-x86-spec-ctrl-Support-IBPB-on-entry.patch
deleted file mode 100644
index d5ad043..0000000
--- a/0048-x86-spec-ctrl-Support-IBPB-on-entry.patch
+++ /dev/null
@@ -1,300 +0,0 @@
-From 76c5fcee9027fb8823dd501086f0ff3ee3c4231c Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 24 Feb 2022 13:44:33 +0000
-Subject: [PATCH 48/51] x86/spec-ctrl: Support IBPB-on-entry
-
-We are going to need this to mitigate Branch Type Confusion on AMD/Hygon CPUs,
-but as we've talked about using it in other cases too, arrange to support it
-generally.  However, this is also very expensive in some cases, so we're going
-to want per-domain controls.
-
-Introduce SCF_ist_ibpb and SCF_entry_ibpb controls, adding them to the IST and
-DOM masks as appropriate.  Also introduce X86_FEATURE_IBPB_ENTRY_{PV,HVM} to
-to patch the code blocks.
-
-For SVM, the STGI is serialising enough to protect against Spectre-v1 attacks,
-so no "else lfence" is necessary.  VT-x will use use the MSR host load list,
-so doesn't need any code in the VMExit path.
-
-For the IST path, we can't safely check CPL==0 to skip a flush, as we might
-have hit an entry path before it's IBPB.  As IST hitting Xen is rare, flush
-irrespective of CPL.  A later path, SCF_ist_sc_msr, provides Spectre-v1
-safety.
-
-For the PV paths, we know we're interrupting CPL>0, while for the INTR paths,
-we can safely check CPL==0.  Only flush when interrupting guest context.
-
-An "else lfence" is needed for safety, but we want to be able to skip it on
-unaffected CPUs, so the block wants to be an alternative, which means the
-lfence has to be inline rather than UNLIKELY() (the replacement block doesn't
-have displacements fixed up for anything other than the first instruction).
-
-As with SPEC_CTRL_ENTRY_FROM_INTR_IST, %rdx is 0 on entry so rely on this to
-shrink the logic marginally.  Update the comments to specify this new
-dependency.
-
-This is part of XSA-407.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 53a570b285694947776d5190f591a0d5b9b18de7)
----
- xen/arch/x86/hvm/svm/entry.S        | 18 ++++++++++-
- xen/arch/x86/hvm/vmx/vmcs.c         |  4 +++
- xen/arch/x86/x86_64/compat/entry.S  |  2 +-
- xen/arch/x86/x86_64/entry.S         | 12 +++----
- xen/include/asm-x86/cpufeatures.h   |  2 ++
- xen/include/asm-x86/spec_ctrl.h     |  6 ++--
- xen/include/asm-x86/spec_ctrl_asm.h | 49 +++++++++++++++++++++++++++--
- 7 files changed, 81 insertions(+), 12 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
-index 4ae55a2ef605..0ff4008060fa 100644
---- a/xen/arch/x86/hvm/svm/entry.S
-+++ b/xen/arch/x86/hvm/svm/entry.S
-@@ -97,7 +97,19 @@ __UNLIKELY_END(nsvm_hap)
- 
-         GET_CURRENT(bx)
- 
--        /* SPEC_CTRL_ENTRY_FROM_SVM    Req: %rsp=regs/cpuinfo         Clob: acd */
-+        /* SPEC_CTRL_ENTRY_FROM_SVM    Req: %rsp=regs/cpuinfo, %rdx=0 Clob: acd */
-+
-+        .macro svm_vmexit_cond_ibpb
-+            testb  $SCF_entry_ibpb, CPUINFO_xen_spec_ctrl(%rsp)
-+            jz     .L_skip_ibpb
-+
-+            mov    $MSR_PRED_CMD, %ecx
-+            mov    $PRED_CMD_IBPB, %eax
-+            wrmsr
-+.L_skip_ibpb:
-+	.endm
-+        ALTERNATIVE "", svm_vmexit_cond_ibpb, X86_FEATURE_IBPB_ENTRY_HVM
-+
-         ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM
- 
-         .macro svm_vmexit_spec_ctrl
-@@ -114,6 +126,10 @@ __UNLIKELY_END(nsvm_hap)
-         ALTERNATIVE "", svm_vmexit_spec_ctrl, X86_FEATURE_SC_MSR_HVM
-         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
- 
-+        /*
-+         * STGI is executed unconditionally, and is sufficiently serialising
-+         * to safely resolve any Spectre-v1 concerns in the above logic.
-+         */
-         stgi
- GLOBAL(svm_stgi_label)
-         mov  %rsp,%rdi
-diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
-index f9f9bc18cdbc..dd817cee4e69 100644
---- a/xen/arch/x86/hvm/vmx/vmcs.c
-+++ b/xen/arch/x86/hvm/vmx/vmcs.c
-@@ -1345,6 +1345,10 @@ static int construct_vmcs(struct vcpu *v)
-         rc = vmx_add_msr(v, MSR_FLUSH_CMD, FLUSH_CMD_L1D,
-                          VMX_MSR_GUEST_LOADONLY);
- 
-+    if ( !rc && (d->arch.spec_ctrl_flags & SCF_entry_ibpb) )
-+        rc = vmx_add_msr(v, MSR_PRED_CMD, PRED_CMD_IBPB,
-+                         VMX_MSR_HOST);
-+
-  out:
-     vmx_vmcs_exit(v);
- 
-diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
-index 5fd6dbbd4513..b86d38d1c50d 100644
---- a/xen/arch/x86/x86_64/compat/entry.S
-+++ b/xen/arch/x86/x86_64/compat/entry.S
-@@ -18,7 +18,7 @@ ENTRY(entry_int82)
-         movl  $HYPERCALL_VECTOR, 4(%rsp)
-         SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */
- 
--        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
-+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
-         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
- 
-         CR4_PV32_RESTORE
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index a1810bf4d311..fba8ae498f74 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -260,7 +260,7 @@ ENTRY(lstar_enter)
-         movl  $TRAP_syscall, 4(%rsp)
-         SAVE_ALL
- 
--        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
-+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
-         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
- 
-         GET_STACK_END(bx)
-@@ -298,7 +298,7 @@ ENTRY(cstar_enter)
-         movl  $TRAP_syscall, 4(%rsp)
-         SAVE_ALL
- 
--        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
-+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
-         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
- 
-         GET_STACK_END(bx)
-@@ -338,7 +338,7 @@ GLOBAL(sysenter_eflags_saved)
-         movl  $TRAP_syscall, 4(%rsp)
-         SAVE_ALL
- 
--        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
-+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
-         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
- 
-         GET_STACK_END(bx)
-@@ -392,7 +392,7 @@ ENTRY(int80_direct_trap)
-         movl  $0x80, 4(%rsp)
-         SAVE_ALL
- 
--        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
-+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
-         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
- 
-         GET_STACK_END(bx)
-@@ -674,7 +674,7 @@ ENTRY(common_interrupt)
- 
-         GET_STACK_END(14)
- 
--        SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
-+        SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: acd */
-         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
- 
-         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
-@@ -708,7 +708,7 @@ GLOBAL(handle_exception)
- 
-         GET_STACK_END(14)
- 
--        SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
-+        SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: acd */
-         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
- 
-         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
-diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
-index 493d338a085e..672c9ee22ba2 100644
---- a/xen/include/asm-x86/cpufeatures.h
-+++ b/xen/include/asm-x86/cpufeatures.h
-@@ -39,6 +39,8 @@ XEN_CPUFEATURE(XEN_LBR,           X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */
- XEN_CPUFEATURE(SC_VERW_IDLE,      X86_SYNTH(25)) /* VERW used by Xen for idle */
- XEN_CPUFEATURE(XEN_SHSTK,         X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */
- XEN_CPUFEATURE(XEN_IBT,           X86_SYNTH(27)) /* Xen uses CET Indirect Branch Tracking */
-+XEN_CPUFEATURE(IBPB_ENTRY_PV,     X86_SYNTH(28)) /* MSR_PRED_CMD used by Xen for PV */
-+XEN_CPUFEATURE(IBPB_ENTRY_HVM,    X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen for HVM */
- 
- /* Bug words follow the synthetic words. */
- #define X86_NR_BUG 1
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index fb4365575620..3fc599a817c4 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -34,6 +34,8 @@
- #define SCF_ist_sc_msr (1 << 1)
- #define SCF_ist_rsb    (1 << 2)
- #define SCF_verw       (1 << 3)
-+#define SCF_ist_ibpb   (1 << 4)
-+#define SCF_entry_ibpb (1 << 5)
- 
- /*
-  * The IST paths (NMI/#MC) can interrupt any arbitrary context.  Some
-@@ -46,13 +48,13 @@
-  * These are the controls to inhibit on the S3 resume path until microcode has
-  * been reloaded.
-  */
--#define SCF_IST_MASK (SCF_ist_sc_msr)
-+#define SCF_IST_MASK (SCF_ist_sc_msr | SCF_ist_ibpb)
- 
- /*
-  * Some speculative protections are per-domain.  These settings are merged
-  * into the top-of-stack block in the context switch path.
-  */
--#define SCF_DOM_MASK (SCF_verw)
-+#define SCF_DOM_MASK (SCF_verw | SCF_entry_ibpb)
- 
- #ifndef __ASSEMBLY__
- 
-diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
-index 15e24cde00d1..9eb4ad9ab71d 100644
---- a/xen/include/asm-x86/spec_ctrl_asm.h
-+++ b/xen/include/asm-x86/spec_ctrl_asm.h
-@@ -88,6 +88,35 @@
-  *  - SPEC_CTRL_EXIT_TO_{SVM,VMX}
-  */
- 
-+.macro DO_SPEC_CTRL_COND_IBPB maybexen:req
-+/*
-+ * Requires %rsp=regs (also cpuinfo if !maybexen)
-+ * Requires %r14=stack_end (if maybexen), %rdx=0
-+ * Clobbers %rax, %rcx, %rdx
-+ *
-+ * Conditionally issue IBPB if SCF_entry_ibpb is active.  In the maybexen
-+ * case, we can safely look at UREGS_cs to skip taking the hit when
-+ * interrupting Xen.
-+ */
-+    .if \maybexen
-+        testb  $SCF_entry_ibpb, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
-+        jz     .L\@_skip
-+        testb  $3, UREGS_cs(%rsp)
-+    .else
-+        testb  $SCF_entry_ibpb, CPUINFO_xen_spec_ctrl(%rsp)
-+    .endif
-+    jz     .L\@_skip
-+
-+    mov     $MSR_PRED_CMD, %ecx
-+    mov     $PRED_CMD_IBPB, %eax
-+    wrmsr
-+    jmp     .L\@_done
-+
-+.L\@_skip:
-+    lfence
-+.L\@_done:
-+.endm
-+
- .macro DO_OVERWRITE_RSB tmp=rax
- /*
-  * Requires nothing
-@@ -225,12 +254,16 @@
- 
- /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
- #define SPEC_CTRL_ENTRY_FROM_PV                                         \
-+    ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=0),     \
-+        X86_FEATURE_IBPB_ENTRY_PV;                                      \
-     ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV;            \
-     ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0),         \
-         X86_FEATURE_SC_MSR_PV
- 
- /* Use in interrupt/exception context.  May interrupt Xen or PV context. */
- #define SPEC_CTRL_ENTRY_FROM_INTR                                       \
-+    ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=1),     \
-+        X86_FEATURE_IBPB_ENTRY_PV;                                      \
-     ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV;            \
-     ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1),         \
-         X86_FEATURE_SC_MSR_PV
-@@ -254,11 +287,23 @@
-  * Requires %rsp=regs, %r14=stack_end, %rdx=0
-  * Clobbers %rax, %rbx, %rcx, %rdx
-  *
-- * This is logical merge of DO_OVERWRITE_RSB and DO_SPEC_CTRL_ENTRY
-- * maybexen=1, but with conditionals rather than alternatives.
-+ * This is logical merge of:
-+ *    DO_SPEC_CTRL_COND_IBPB maybexen=0
-+ *    DO_OVERWRITE_RSB
-+ *    DO_SPEC_CTRL_ENTRY maybexen=1
-+ * but with conditionals rather than alternatives.
-  */
-     movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx
- 
-+    test    $SCF_ist_ibpb, %bl
-+    jz      .L\@_skip_ibpb
-+
-+    mov     $MSR_PRED_CMD, %ecx
-+    mov     $PRED_CMD_IBPB, %eax
-+    wrmsr
-+
-+.L\@_skip_ibpb:
-+
-     test $SCF_ist_rsb, %bl
-     jz .L\@_skip_rsb
- 
--- 
-2.35.1
-
diff --git a/0049-x86-cpuid-Enumeration-for-BTC_NO.patch b/0049-x86-cpuid-Enumeration-for-BTC_NO.patch
deleted file mode 100644
index 0e5d119..0000000
--- a/0049-x86-cpuid-Enumeration-for-BTC_NO.patch
+++ /dev/null
@@ -1,106 +0,0 @@
-From 0826c7596d35c887b3b7858137c7ac374d9ef17a Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Mon, 16 May 2022 15:48:24 +0100
-Subject: [PATCH 49/51] x86/cpuid: Enumeration for BTC_NO
-
-BTC_NO indicates that hardware is not succeptable to Branch Type Confusion.
-
-Zen3 CPUs don't suffer BTC.
-
-This is part of XSA-407.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 76cb04ad64f3ab9ae785988c40655a71dde9c319)
----
- tools/libs/light/libxl_cpuid.c              |  1 +
- tools/misc/xen-cpuid.c                      |  2 +-
- xen/arch/x86/cpu/amd.c                      | 10 ++++++++++
- xen/arch/x86/spec_ctrl.c                    |  5 +++--
- xen/include/public/arch-x86/cpufeatureset.h |  1 +
- 5 files changed, 16 insertions(+), 3 deletions(-)
-
-diff --git a/tools/libs/light/libxl_cpuid.c b/tools/libs/light/libxl_cpuid.c
-index d462f9e421ed..bf6fdee360a9 100644
---- a/tools/libs/light/libxl_cpuid.c
-+++ b/tools/libs/light/libxl_cpuid.c
-@@ -288,6 +288,7 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str)
-         {"virt-ssbd",    0x80000008, NA, CPUID_REG_EBX, 25,  1},
-         {"ssb-no",       0x80000008, NA, CPUID_REG_EBX, 26,  1},
-         {"psfd",         0x80000008, NA, CPUID_REG_EBX, 28,  1},
-+        {"btc-no",       0x80000008, NA, CPUID_REG_EBX, 29,  1},
- 
-         {"nc",           0x80000008, NA, CPUID_REG_ECX,  0,  8},
-         {"apicidsize",   0x80000008, NA, CPUID_REG_ECX, 12,  4},
-diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
-index bc7dcf55757a..fe22f5f5b68b 100644
---- a/tools/misc/xen-cpuid.c
-+++ b/tools/misc/xen-cpuid.c
-@@ -158,7 +158,7 @@ static const char *const str_e8b[32] =
-     /* [22] */                 [23] = "ppin",
-     [24] = "amd-ssbd",         [25] = "virt-ssbd",
-     [26] = "ssb-no",
--    [28] = "psfd",
-+    [28] = "psfd",             [29] = "btc-no",
- };
- 
- static const char *const str_7d0[32] =
-diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
-index b3b9a0df5fed..b158e3acb5c7 100644
---- a/xen/arch/x86/cpu/amd.c
-+++ b/xen/arch/x86/cpu/amd.c
-@@ -847,6 +847,16 @@ static void init_amd(struct cpuinfo_x86 *c)
- 			warning_add(text);
- 		}
- 		break;
-+
-+	case 0x19:
-+		/*
-+		 * Zen3 (Fam19h model < 0x10) parts are not susceptible to
-+		 * Branch Type Confusion, but predate the allocation of the
-+		 * BTC_NO bit.  Fill it back in if we're not virtualised.
-+		 */
-+		if (!cpu_has_hypervisor && !cpu_has(c, X86_FEATURE_BTC_NO))
-+			__set_bit(X86_FEATURE_BTC_NO, c->x86_capability);
-+		break;
- 	}
- 
- 	display_cacheinfo(c);
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index f4ae36eae2d0..0f101c057f3e 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -388,7 +388,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
-      * Hardware read-only information, stating immunity to certain issues, or
-      * suggestions of which mitigation to use.
-      */
--    printk("  Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
-+    printk("  Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
-            (caps & ARCH_CAPS_RDCL_NO)                        ? " RDCL_NO"        : "",
-            (caps & ARCH_CAPS_IBRS_ALL)                       ? " IBRS_ALL"       : "",
-            (caps & ARCH_CAPS_RSBA)                           ? " RSBA"           : "",
-@@ -403,7 +403,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
-            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS))    ? " IBRS_ALWAYS"    : "",
-            (e8b  & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS))   ? " STIBP_ALWAYS"   : "",
-            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_FAST))      ? " IBRS_FAST"      : "",
--           (e8b  & cpufeat_mask(X86_FEATURE_IBRS_SAME_MODE)) ? " IBRS_SAME_MODE" : "");
-+           (e8b  & cpufeat_mask(X86_FEATURE_IBRS_SAME_MODE)) ? " IBRS_SAME_MODE" : "",
-+           (e8b  & cpufeat_mask(X86_FEATURE_BTC_NO))         ? " BTC_NO"         : "");
- 
-     /* Hardware features which need driving to mitigate issues. */
-     printk("  Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s\n",
-diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
-index 743b857dcd5c..e7b8167800a2 100644
---- a/xen/include/public/arch-x86/cpufeatureset.h
-+++ b/xen/include/public/arch-x86/cpufeatureset.h
-@@ -266,6 +266,7 @@ XEN_CPUFEATURE(AMD_SSBD,      8*32+24) /*S  MSR_SPEC_CTRL.SSBD available */
- XEN_CPUFEATURE(VIRT_SSBD,     8*32+25) /*   MSR_VIRT_SPEC_CTRL.SSBD */
- XEN_CPUFEATURE(SSB_NO,        8*32+26) /*A  Hardware not vulnerable to SSB */
- XEN_CPUFEATURE(PSFD,          8*32+28) /*S  MSR_SPEC_CTRL.PSFD */
-+XEN_CPUFEATURE(BTC_NO,        8*32+29) /*A  Hardware not vulnerable to Branch Type Confusion */
- 
- /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
- XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A  AVX512 Neural Network Instructions */
--- 
-2.35.1
-
diff --git a/0050-x86-spec-ctrl-Enable-Zen2-chickenbit.patch b/0050-x86-spec-ctrl-Enable-Zen2-chickenbit.patch
deleted file mode 100644
index c83844d..0000000
--- a/0050-x86-spec-ctrl-Enable-Zen2-chickenbit.patch
+++ /dev/null
@@ -1,106 +0,0 @@
-From 5457a6870eb1369b868f7b8e833966ed43a773ad Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 15 Mar 2022 18:30:25 +0000
-Subject: [PATCH 50/51] x86/spec-ctrl: Enable Zen2 chickenbit
-
-... as instructed in the Branch Type Confusion whitepaper.
-
-This is part of XSA-407.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-(cherry picked from commit 9deaf2d932f08c16c6b96a1c426e4b1142c0cdbe)
----
- xen/arch/x86/cpu/amd.c          | 28 ++++++++++++++++++++++++++++
- xen/arch/x86/cpu/cpu.h          |  1 +
- xen/arch/x86/cpu/hygon.c        |  6 ++++++
- xen/include/asm-x86/msr-index.h |  1 +
- 4 files changed, 36 insertions(+)
-
-diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
-index b158e3acb5c7..37ac84ddd74d 100644
---- a/xen/arch/x86/cpu/amd.c
-+++ b/xen/arch/x86/cpu/amd.c
-@@ -731,6 +731,31 @@ void amd_init_ssbd(const struct cpuinfo_x86 *c)
- 		printk_once(XENLOG_ERR "No SSBD controls available\n");
- }
- 
-+/*
-+ * On Zen2 we offer this chicken (bit) on the altar of Speculation.
-+ *
-+ * Refer to the AMD Branch Type Confusion whitepaper:
-+ * https://XXX
-+ *
-+ * Setting this unnamed bit supposedly causes prediction information on
-+ * non-branch instructions to be ignored.  It is to be set unilaterally in
-+ * newer microcode.
-+ *
-+ * This chickenbit is something unrelated on Zen1, and Zen1 vs Zen2 isn't a
-+ * simple model number comparison, so use STIBP as a heuristic to separate the
-+ * two uarches in Fam17h(AMD)/18h(Hygon).
-+ */
-+void amd_init_spectral_chicken(void)
-+{
-+	uint64_t val, chickenbit = 1 << 1;
-+
-+	if (cpu_has_hypervisor || !boot_cpu_has(X86_FEATURE_AMD_STIBP))
-+		return;
-+
-+	if (rdmsr_safe(MSR_AMD64_DE_CFG2, val) == 0 && !(val & chickenbit))
-+		wrmsr_safe(MSR_AMD64_DE_CFG2, val | chickenbit);
-+}
-+
- void __init detect_zen2_null_seg_behaviour(void)
- {
- 	uint64_t base;
-@@ -796,6 +821,9 @@ static void init_amd(struct cpuinfo_x86 *c)
- 
- 	amd_init_ssbd(c);
- 
-+	if (c->x86 == 0x17)
-+		amd_init_spectral_chicken();
-+
- 	/* Probe for NSCB on Zen2 CPUs when not virtualised */
- 	if (!cpu_has_hypervisor && !cpu_has_nscb && c == &boot_cpu_data &&
- 	    c->x86 == 0x17)
-diff --git a/xen/arch/x86/cpu/cpu.h b/xen/arch/x86/cpu/cpu.h
-index b593bd85f04f..145bc5156a86 100644
---- a/xen/arch/x86/cpu/cpu.h
-+++ b/xen/arch/x86/cpu/cpu.h
-@@ -22,4 +22,5 @@ void early_init_amd(struct cpuinfo_x86 *c);
- void amd_log_freq(const struct cpuinfo_x86 *c);
- void amd_init_lfence(struct cpuinfo_x86 *c);
- void amd_init_ssbd(const struct cpuinfo_x86 *c);
-+void amd_init_spectral_chicken(void);
- void detect_zen2_null_seg_behaviour(void);
-diff --git a/xen/arch/x86/cpu/hygon.c b/xen/arch/x86/cpu/hygon.c
-index cdc94130dd2e..6f8d491297e8 100644
---- a/xen/arch/x86/cpu/hygon.c
-+++ b/xen/arch/x86/cpu/hygon.c
-@@ -40,6 +40,12 @@ static void init_hygon(struct cpuinfo_x86 *c)
- 	    c->x86 == 0x18)
- 		detect_zen2_null_seg_behaviour();
- 
-+	/*
-+	 * TODO: Check heuristic safety with Hygon first
-+	if (c->x86 == 0x18)
-+		amd_init_spectral_chicken();
-+	 */
-+
- 	/*
- 	 * Hygon CPUs before Zen2 don't clear segment bases/limits when
- 	 * loading a NULL selector.
-diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
-index 72bc32ba04ff..d3735e499e0f 100644
---- a/xen/include/asm-x86/msr-index.h
-+++ b/xen/include/asm-x86/msr-index.h
-@@ -361,6 +361,7 @@
- #define MSR_AMD64_DE_CFG		0xc0011029
- #define AMD64_DE_CFG_LFENCE_SERIALISE	(_AC(1, ULL) << 1)
- #define MSR_AMD64_EX_CFG		0xc001102c
-+#define MSR_AMD64_DE_CFG2		0xc00110e3
- 
- #define MSR_AMD64_DR0_ADDRESS_MASK	0xc0011027
- #define MSR_AMD64_DR1_ADDRESS_MASK	0xc0011019
--- 
-2.35.1
-
diff --git a/0051-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch b/0051-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch
deleted file mode 100644
index e313ede..0000000
--- a/0051-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch
+++ /dev/null
@@ -1,305 +0,0 @@
-From 0a5387a01165b46c8c85e7f7e2ddbe60a7f5db44 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Mon, 27 Jun 2022 19:29:40 +0100
-Subject: [PATCH 51/51] x86/spec-ctrl: Mitigate Branch Type Confusion when
- possible
-
-Branch Type Confusion affects AMD/Hygon CPUs on Zen2 and earlier.  To
-mitigate, we require SMT safety (STIBP on Zen2, no-SMT on Zen1), and to issue
-an IBPB on each entry to Xen, to flush the BTB.
-
-Due to performance concerns, dom0 (which is trusted in most configurations) is
-excluded from protections by default.
-
-Therefore:
- * Use STIBP by default on Zen2 too, which now means we want it on by default
-   on all hardware supporting STIBP.
- * Break the current IBPB logic out into a new function, extending it with
-   IBPB-at-entry logic.
- * Change the existing IBPB-at-ctxt-switch boolean to be tristate, and disable
-   it by default when IBPB-at-entry is providing sufficient safety.
-
-If all PV guests on the system are trusted, then it is recommended to boot
-with `spec-ctrl=ibpb-entry=no-pv`, as this will provide an additional marginal
-perf improvement.
-
-This is part of XSA-407 / CVE-2022-23825.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit d8cb7e0f069e0f106d24941355b59b45a731eabe)
----
- docs/misc/xen-command-line.pandoc |  14 ++--
- xen/arch/x86/spec_ctrl.c          | 113 ++++++++++++++++++++++++++----
- xen/include/asm-x86/spec_ctrl.h   |   2 +-
- 3 files changed, 112 insertions(+), 17 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index 1bbdb55129cc..bd6826d0ae05 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2234,7 +2234,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
- 
- ### spec-ctrl (x86)
- > `= List of [ <bool>, xen=<bool>, {pv,hvm}=<bool>,
-->              {msr-sc,rsb,md-clear}=<bool>|{pv,hvm}=<bool>,
-+>              {msr-sc,rsb,md-clear,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
- >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
- >              eager-fpu,l1d-flush,branch-harden,srb-lock,
- >              unpriv-mmio}=<bool> ]`
-@@ -2259,9 +2259,10 @@ in place for guests to use.
- 
- Use of a positive boolean value for either of these options is invalid.
- 
--The `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` options offer fine
--grained control over the primitives by Xen.  These impact Xen's ability to
--protect itself, and/or Xen's ability to virtualise support for guests to use.
-+The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `md-clear=` and `ibpb-entry=` options
-+offer fine grained control over the primitives by Xen.  These impact Xen's
-+ability to protect itself, and/or Xen's ability to virtualise support for
-+guests to use.
- 
- * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests
-   respectively.
-@@ -2280,6 +2281,11 @@ protect itself, and/or Xen's ability to virtualise support for guests to use.
-   compatibility with development versions of this fix, `mds=` is also accepted
-   on Xen 4.12 and earlier as an alias.  Consult vendor documentation in
-   preference to here.*
-+* `ibpb-entry=` offers control over whether IBPB (Indirect Branch Prediction
-+  Barrier) is used on entry to Xen.  This is used by default on hardware
-+  vulnerable to Branch Type Confusion, but for performance reasons, dom0 is
-+  unprotected by default.  If it necessary to protect dom0 too, boot with
-+  `spec-ctrl=ibpb-entry`.
- 
- If Xen was compiled with INDIRECT_THUNK support, `bti-thunk=` can be used to
- select which of the thunks gets patched into the `__x86_indirect_thunk_%reg`
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 0f101c057f3e..1d9796c34d71 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -39,6 +39,10 @@ static bool __initdata opt_rsb_hvm = true;
- static int8_t __read_mostly opt_md_clear_pv = -1;
- static int8_t __read_mostly opt_md_clear_hvm = -1;
- 
-+static int8_t __read_mostly opt_ibpb_entry_pv = -1;
-+static int8_t __read_mostly opt_ibpb_entry_hvm = -1;
-+static bool __read_mostly opt_ibpb_entry_dom0;
-+
- /* Cmdline controls for Xen's speculative settings. */
- static enum ind_thunk {
-     THUNK_DEFAULT, /* Decide which thunk to use at boot time. */
-@@ -54,7 +58,7 @@ int8_t __initdata opt_stibp = -1;
- bool __read_mostly opt_ssbd;
- int8_t __initdata opt_psfd = -1;
- 
--bool __read_mostly opt_ibpb_ctxt_switch = true;
-+int8_t __read_mostly opt_ibpb_ctxt_switch = -1;
- int8_t __read_mostly opt_eager_fpu = -1;
- int8_t __read_mostly opt_l1d_flush = -1;
- static bool __initdata opt_branch_harden = true;
-@@ -114,6 +118,9 @@ static int __init parse_spec_ctrl(const char *s)
-             opt_rsb_hvm = false;
-             opt_md_clear_pv = 0;
-             opt_md_clear_hvm = 0;
-+            opt_ibpb_entry_pv = 0;
-+            opt_ibpb_entry_hvm = 0;
-+            opt_ibpb_entry_dom0 = false;
- 
-             opt_thunk = THUNK_JMP;
-             opt_ibrs = 0;
-@@ -140,12 +147,14 @@ static int __init parse_spec_ctrl(const char *s)
-             opt_msr_sc_pv = val;
-             opt_rsb_pv = val;
-             opt_md_clear_pv = val;
-+            opt_ibpb_entry_pv = val;
-         }
-         else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
-         {
-             opt_msr_sc_hvm = val;
-             opt_rsb_hvm = val;
-             opt_md_clear_hvm = val;
-+            opt_ibpb_entry_hvm = val;
-         }
-         else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 )
-         {
-@@ -210,6 +219,28 @@ static int __init parse_spec_ctrl(const char *s)
-                 break;
-             }
-         }
-+        else if ( (val = parse_boolean("ibpb-entry", s, ss)) != -1 )
-+        {
-+            switch ( val )
-+            {
-+            case 0:
-+            case 1:
-+                opt_ibpb_entry_pv = opt_ibpb_entry_hvm =
-+                    opt_ibpb_entry_dom0 = val;
-+                break;
-+
-+            case -2:
-+                s += strlen("ibpb-entry=");
-+                if ( (val = parse_boolean("pv", s, ss)) >= 0 )
-+                    opt_ibpb_entry_pv = val;
-+                else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
-+                    opt_ibpb_entry_hvm = val;
-+                else
-+            default:
-+                    rc = -EINVAL;
-+                break;
-+            }
-+        }
- 
-         /* Xen's speculative sidechannel mitigation settings. */
-         else if ( !strncmp(s, "bti-thunk=", 10) )
-@@ -477,27 +508,31 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
-      * mitigation support for guests.
-      */
- #ifdef CONFIG_HVM
--    printk("  Support for HVM VMs:%s%s%s%s%s\n",
-+    printk("  Support for HVM VMs:%s%s%s%s%s%s\n",
-            (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ||
-             boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ||
-             boot_cpu_has(X86_FEATURE_MD_CLEAR)   ||
-+            boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ||
-             opt_eager_fpu)                           ? ""               : " None",
-            boot_cpu_has(X86_FEATURE_SC_MSR_HVM)      ? " MSR_SPEC_CTRL" : "",
-            boot_cpu_has(X86_FEATURE_SC_RSB_HVM)      ? " RSB"           : "",
-            opt_eager_fpu                             ? " EAGER_FPU"     : "",
--           boot_cpu_has(X86_FEATURE_MD_CLEAR)        ? " MD_CLEAR"      : "");
-+           boot_cpu_has(X86_FEATURE_MD_CLEAR)        ? " MD_CLEAR"      : "",
-+           boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM)  ? " IBPB-entry"    : "");
- 
- #endif
- #ifdef CONFIG_PV
--    printk("  Support for PV VMs:%s%s%s%s%s\n",
-+    printk("  Support for PV VMs:%s%s%s%s%s%s\n",
-            (boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
-             boot_cpu_has(X86_FEATURE_SC_RSB_PV) ||
-             boot_cpu_has(X86_FEATURE_MD_CLEAR)  ||
-+            boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ||
-             opt_eager_fpu)                           ? ""               : " None",
-            boot_cpu_has(X86_FEATURE_SC_MSR_PV)       ? " MSR_SPEC_CTRL" : "",
-            boot_cpu_has(X86_FEATURE_SC_RSB_PV)       ? " RSB"           : "",
-            opt_eager_fpu                             ? " EAGER_FPU"     : "",
--           boot_cpu_has(X86_FEATURE_MD_CLEAR)        ? " MD_CLEAR"      : "");
-+           boot_cpu_has(X86_FEATURE_MD_CLEAR)        ? " MD_CLEAR"      : "",
-+           boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV)   ? " IBPB-entry"    : "");
- 
-     printk("  XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n",
-            opt_xpti_hwdom ? "enabled" : "disabled",
-@@ -759,6 +794,55 @@ static bool __init should_use_eager_fpu(void)
-     }
- }
- 
-+static void __init ibpb_calculations(void)
-+{
-+    /* Check we have hardware IBPB support before using it... */
-+    if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) )
-+    {
-+        opt_ibpb_entry_hvm = opt_ibpb_entry_pv = opt_ibpb_ctxt_switch = 0;
-+        opt_ibpb_entry_dom0 = false;
-+        return;
-+    }
-+
-+    /*
-+     * IBPB-on-entry mitigations for Branch Type Confusion.
-+     *
-+     * IBPB && !BTC_NO selects all AMD/Hygon hardware, not known to be safe,
-+     * that we can provide some form of mitigation on.
-+     */
-+    if ( opt_ibpb_entry_pv == -1 )
-+        opt_ibpb_entry_pv = (IS_ENABLED(CONFIG_PV) &&
-+                             boot_cpu_has(X86_FEATURE_IBPB) &&
-+                             !boot_cpu_has(X86_FEATURE_BTC_NO));
-+    if ( opt_ibpb_entry_hvm == -1 )
-+        opt_ibpb_entry_hvm = (IS_ENABLED(CONFIG_HVM) &&
-+                              boot_cpu_has(X86_FEATURE_IBPB) &&
-+                              !boot_cpu_has(X86_FEATURE_BTC_NO));
-+
-+    if ( opt_ibpb_entry_pv )
-+    {
-+        setup_force_cpu_cap(X86_FEATURE_IBPB_ENTRY_PV);
-+
-+        /*
-+         * We only need to flush in IST context if we're protecting against PV
-+         * guests.  HVM IBPB-on-entry protections are both atomic with
-+         * NMI/#MC, so can't interrupt Xen ahead of having already flushed the
-+         * BTB.
-+         */
-+        default_spec_ctrl_flags |= SCF_ist_ibpb;
-+    }
-+    if ( opt_ibpb_entry_hvm )
-+        setup_force_cpu_cap(X86_FEATURE_IBPB_ENTRY_HVM);
-+
-+    /*
-+     * If we're using IBPB-on-entry to protect against PV and HVM guests
-+     * (ignoring dom0 if trusted), then there's no need to also issue IBPB on
-+     * context switch too.
-+     */
-+    if ( opt_ibpb_ctxt_switch == -1 )
-+        opt_ibpb_ctxt_switch = !(opt_ibpb_entry_hvm && opt_ibpb_entry_pv);
-+}
-+
- /* Calculate whether this CPU is vulnerable to L1TF. */
- static __init void l1tf_calculations(uint64_t caps)
- {
-@@ -1014,8 +1098,12 @@ void spec_ctrl_init_domain(struct domain *d)
-     bool verw = ((pv ? opt_md_clear_pv : opt_md_clear_hvm) ||
-                  (opt_fb_clear_mmio && is_iommu_enabled(d)));
- 
-+    bool ibpb = ((pv ? opt_ibpb_entry_pv : opt_ibpb_entry_hvm) &&
-+                 (d->domain_id != 0 || opt_ibpb_entry_dom0));
-+
-     d->arch.spec_ctrl_flags =
-         (verw   ? SCF_verw         : 0) |
-+        (ibpb   ? SCF_entry_ibpb   : 0) |
-         0;
- }
- 
-@@ -1162,12 +1250,15 @@ void __init init_speculation_mitigations(void)
-     }
- 
-     /*
--     * Use STIBP by default if the hardware hint is set.  Otherwise, leave it
--     * off as it a severe performance pentalty on pre-eIBRS Intel hardware
--     * where it was retrofitted in microcode.
-+     * Use STIBP by default on all AMD systems.  Zen3 and later enumerate
-+     * STIBP_ALWAYS, but STIBP is needed on Zen2 as part of the mitigations
-+     * for Branch Type Confusion.
-+     *
-+     * Leave STIBP off by default on Intel.  Pre-eIBRS systems suffer a
-+     * substantial perf hit when it was implemented in microcode.
-      */
-     if ( opt_stibp == -1 )
--        opt_stibp = !!boot_cpu_has(X86_FEATURE_STIBP_ALWAYS);
-+        opt_stibp = !!boot_cpu_has(X86_FEATURE_AMD_STIBP);
- 
-     if ( opt_stibp && (boot_cpu_has(X86_FEATURE_STIBP) ||
-                        boot_cpu_has(X86_FEATURE_AMD_STIBP)) )
-@@ -1239,9 +1330,7 @@ void __init init_speculation_mitigations(void)
-     if ( opt_rsb_hvm )
-         setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM);
- 
--    /* Check we have hardware IBPB support before using it... */
--    if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) )
--        opt_ibpb_ctxt_switch = false;
-+    ibpb_calculations();
- 
-     /* Check whether Eager FPU should be enabled by default. */
-     if ( opt_eager_fpu == -1 )
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index 3fc599a817c4..9403b81dc7af 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -65,7 +65,7 @@
- void init_speculation_mitigations(void);
- void spec_ctrl_init_domain(struct domain *d);
- 
--extern bool opt_ibpb_ctxt_switch;
-+extern int8_t opt_ibpb_ctxt_switch;
- extern bool opt_ssbd;
- extern int8_t opt_eager_fpu;
- extern int8_t opt_l1d_flush;
--- 
-2.35.1
-
diff --git a/info.txt b/info.txt
index e830829..d2c53b1 100644
--- a/info.txt
+++ b/info.txt
@@ -1,6 +1,6 @@
-Xen upstream patchset #1 for 4.16.2-pre
+Xen upstream patchset #0 for 4.16.3-pre
 
 Containing patches from
-RELEASE-4.16.1 (13fee86475f3831d7a1ecf6d7e0acbc2ac779f7e)
+RELEASE-4.16.2 (1871bd1c9eb934f0ffd039f3d68e42fd0097f322)
 to
-staging-4.16 (0a5387a01165b46c8c85e7f7e2ddbe60a7f5db44)
+staging-4.16 (1bce7fb1f702da4f7a749c6f1457ecb20bf74fca)