diff --git a/patch-Revert-xen-credit2-limit-the-max-number-of-CPUs-in-a.patch b/patch-Revert-xen-credit2-limit-the-max-number-of-CPUs-in-a.patch
new file mode 100644
index 00000000..d1c84031
--- /dev/null
+++ b/patch-Revert-xen-credit2-limit-the-max-number-of-CPUs-in-a.patch
@@ -0,0 +1,296 @@
+From cca3d7e05bc31c03bf573a1572aa0e913fc6cd94 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
+ <marmarek@invisiblethingslab.com>
+Date: Sat, 3 Oct 2020 14:48:45 +0200
+Subject: [PATCH] Revert "xen: credit2: limit the max number of CPUs in a
+ runqueue"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+Organization: Invisible Things Lab
+Cc: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+
+This triggers assert failure in sched2.c:2273. The bug itself is
+somewhere else (under investigation), for now revert the commit that
+makes it much more likely to happen it on S3 resume.
+
+This reverts commit 8e2aa76dc1670e82eaa15683353853bc66bf54fc.
+
+Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+---
+ docs/misc/xen-command-line.pandoc |  14 ---
+ xen/common/sched/credit2.c        | 145 ++----------------------------
+ xen/include/asm-arm/cpufeature.h  |   5 --
+ xen/include/asm-x86/processor.h   |   5 --
+ 4 files changed, 6 insertions(+), 163 deletions(-)
+
+diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
+index 4ae9391fcd3d..1f754d3ec1d7 100644
+--- a/docs/misc/xen-command-line.pandoc
++++ b/docs/misc/xen-command-line.pandoc
+@@ -1880,20 +1880,6 @@ with read and write permissions.
+ 
+ Choose the default scheduler.
+ 
+-### sched_credit2_max_cpus_runqueue
+-> `= <integer>`
+-
+-> Default: `16`
+-
+-Defines how many CPUs will be put, at most, in each Credit2 runqueue.
+-
+-Runqueues are still arranged according to the host topology (and following
+-what indicated by the 'credit2_runqueue' parameter). But we also have a cap
+-to the number of CPUs that share each runqueues.
+-
+-A value that is a submultiple of the number of online CPUs is recommended,
+-as that would likely produce a perfectly balanced runqueue configuration.
+-
+ ### sched_credit2_migrate_resist
+ > `= <integer>`
+ 
+diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
+index eb5e5a78c5e7..8a4f28b9f546 100644
+--- a/xen/common/sched/credit2.c
++++ b/xen/common/sched/credit2.c
+@@ -25,8 +25,6 @@
+ #include <xen/trace.h>
+ #include <xen/cpu.h>
+ #include <xen/keyhandler.h>
+-#include <asm/cpufeature.h>
+-#include <asm/processor.h>
+ 
+ #include "private.h"
+ 
+@@ -473,22 +471,6 @@ static int __init parse_credit2_runqueue(const char *s)
+ }
+ custom_param("credit2_runqueue", parse_credit2_runqueue);
+ 
+-/*
+- * How many CPUs will be put, at most, in each runqueue.
+- *
+- * Runqueues are still arranged according to the host topology (and according
+- * to the value of the 'credit2_runqueue' parameter). But we also have a cap
+- * to the number of CPUs that share runqueues.
+- *
+- * This should be considered an upper limit. In fact, we also try to balance
+- * the number of CPUs in each runqueue. And, when doing that, it is possible
+- * that fewer CPUs than what this parameters mandates will actually be put
+- * in each runqueue.
+- */
+-#define MAX_CPUS_RUNQ 16
+-static unsigned int __read_mostly opt_max_cpus_runqueue = MAX_CPUS_RUNQ;
+-integer_param("sched_credit2_max_cpus_runqueue", opt_max_cpus_runqueue);
+-
+ /*
+  * Per-runqueue data
+  */
+@@ -870,83 +852,18 @@ cpu_runqueue_match(const struct csched2_runqueue_data *rqd, unsigned int cpu)
+            (opt_runqueue == OPT_RUNQUEUE_NODE && same_node(peer_cpu, cpu));
+ }
+ 
+-/*
+- * Additional checks, to avoid separating siblings in different runqueues.
+- * This deals with both Intel's HTs and AMD's CUs. An arch that does not have
+- * any similar concept will just have cpu_nr_siblings() always return 1, and
+- * setup the cpu_sibling_mask-s acordingly (as currently does ARM), and things
+- * will just work as well.
+- */
+-static bool
+-cpu_runqueue_siblings_match(const struct csched2_runqueue_data *rqd,
+-                            unsigned int cpu, unsigned int max_cpus_runq)
+-{
+-    unsigned int nr_sibls = cpu_nr_siblings(cpu);
+-    unsigned int rcpu, tot_sibls = 0;
+-
+-    /*
+-     * If we put the CPU in this runqueue, we must be sure that there will
+-     * be enough room for accepting its sibling(s) as well.
+-     */
+-    cpumask_clear(cpumask_scratch_cpu(cpu));
+-    for_each_cpu ( rcpu, &rqd->active )
+-    {
+-        ASSERT(rcpu != cpu);
+-        if ( !cpumask_intersects(per_cpu(cpu_sibling_mask, rcpu), cpumask_scratch_cpu(cpu)) )
+-        {
+-            /*
+-             * For each CPU already in the runqueue, account for it and for
+-             * its sibling(s), independently from whether they are in the
+-             * runqueue or not. Of course, we do this only once, for each CPU
+-             * that is already inside the runqueue and all its siblings!
+-             *
+-             * This way, even if there are CPUs in the runqueue with siblings
+-             * in a different cpupools, we still count all of them here.
+-             * The reason for this is that, if at some future point we will
+-             * move those sibling CPUs to this cpupool, we want them to land
+-             * in this runqueue. Hence we must be sure to leave space for them.
+-             */
+-            cpumask_or(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
+-                       per_cpu(cpu_sibling_mask, rcpu));
+-            tot_sibls += cpu_nr_siblings(rcpu);
+-        }
+-    }
+-    /*
+-     * We know that neither the CPU, nor any of its sibling are here,
+-     * or we wouldn't even have entered the function.
+-     */
+-    ASSERT(!cpumask_intersects(cpumask_scratch_cpu(cpu),
+-                               per_cpu(cpu_sibling_mask, cpu)));
+-
+-    /* Try adding CPU and its sibling(s) to the count and check... */
+-    return tot_sibls + nr_sibls <= max_cpus_runq;
+-}
+-
+ static struct csched2_runqueue_data *
+-cpu_add_to_runqueue(const struct scheduler *ops, unsigned int cpu)
++cpu_add_to_runqueue(struct csched2_private *prv, unsigned int cpu)
+ {
+-    struct csched2_private *prv = csched2_priv(ops);
+     struct csched2_runqueue_data *rqd, *rqd_new;
+-    struct csched2_runqueue_data *rqd_valid = NULL;
+     struct list_head *rqd_ins;
+     unsigned long flags;
+     int rqi = 0;
+-    unsigned int min_rqs, max_cpus_runq;
+-    bool rqi_unused = false;
++    bool rqi_unused = false, rqd_valid = false;
+ 
+     /* Prealloc in case we need it - not allowed with interrupts off. */
+     rqd_new = xzalloc(struct csched2_runqueue_data);
+ 
+-    /*
+-     * While respecting the limit of not having more than the max number of
+-     * CPUs per runqueue, let's also try to "spread" the CPU, as evenly as
+-     * possible, among the runqueues. For doing that, we need to know upfront
+-     * how many CPUs we have, so let's use the number of CPUs that are online
+-     * for that.
+-     */
+-    min_rqs = ((num_online_cpus() - 1) / opt_max_cpus_runqueue) + 1;
+-    max_cpus_runq = num_online_cpus() / min_rqs;
+-
+     write_lock_irqsave(&prv->lock, flags);
+ 
+     rqd_ins = &prv->rql;
+@@ -956,59 +873,10 @@ cpu_add_to_runqueue(const struct scheduler *ops, unsigned int cpu)
+         if ( !rqi_unused && rqd->id > rqi )
+             rqi_unused = true;
+ 
+-        /*
+-         * First of all, let's check whether, according to the system
+-         * topology, this CPU belongs in this runqueue.
+-         */
+         if ( cpu_runqueue_match(rqd, cpu) )
+         {
+-            /*
+-             * If the CPU has any siblings, they are online and they are
+-             * being added to this cpupool, always keep them together. Even
+-             * if that means violating what the opt_max_cpus_runqueue param
+-             * indicates. However, if this happens, chances are high that a
+-             * too small value was used for the parameter, so warn the user
+-             * about that.
+-             *
+-             * Note that we cannot check this once and for all, say, during
+-             * scheduler initialization. In fact, at least in theory, the
+-             * number of siblings a CPU has may not be the same for all the
+-             * CPUs.
+-             */
+-            if ( cpumask_intersects(&rqd->active, per_cpu(cpu_sibling_mask, cpu)) )
+-            {
+-                if ( cpumask_weight(&rqd->active) >= opt_max_cpus_runqueue )
+-                {
+-                        printk("WARNING: %s: more than opt_max_cpus_runqueue "
+-                               "in a runqueue (%u vs %u), due to topology constraints.\n"
+-                               "Consider raising it!\n",
+-                               __func__, opt_max_cpus_runqueue,
+-                               cpumask_weight(&rqd->active));
+-                }
+-                rqd_valid = rqd;
+-                break;
+-            }
+-
+-            /*
+-             * If we're using core (or socket) scheduling, no need to do any
+-             * further checking beyond the number of CPUs already in this
+-             * runqueue respecting our upper bound.
+-             *
+-             * Otherwise, let's try to make sure that siblings stay in the
+-             * same runqueue, pretty much under any cinrcumnstances.
+-             */
+-            if ( rqd->refcnt < max_cpus_runq && (ops->cpupool->gran != SCHED_GRAN_cpu ||
+-                  cpu_runqueue_siblings_match(rqd, cpu, max_cpus_runq)) )
+-            {
+-                /*
+-                 * This runqueue is ok, but as we said, we also want an even
+-                 * distribution of the CPUs. So, unless this is the very first
+-                 * match, we go on, check all runqueues and actually add the
+-                 * CPU into the one that is less full.
+-                 */
+-                if ( !rqd_valid || rqd->refcnt < rqd_valid->refcnt )
+-                    rqd_valid = rqd;
+-            }
++            rqd_valid = true;
++            break;
+         }
+ 
+         if ( !rqi_unused )
+@@ -1032,8 +900,6 @@ cpu_add_to_runqueue(const struct scheduler *ops, unsigned int cpu)
+         rqd->pick_bias = cpu;
+         rqd->id = rqi;
+     }
+-    else
+-        rqd = rqd_valid;
+ 
+     rqd->refcnt++;
+ 
+@@ -3878,6 +3744,7 @@ csched2_dump(const struct scheduler *ops)
+ static void *
+ csched2_alloc_pdata(const struct scheduler *ops, int cpu)
+ {
++    struct csched2_private *prv = csched2_priv(ops);
+     struct csched2_pcpu *spc;
+     struct csched2_runqueue_data *rqd;
+ 
+@@ -3887,7 +3754,7 @@ csched2_alloc_pdata(const struct scheduler *ops, int cpu)
+     if ( spc == NULL )
+         return ERR_PTR(-ENOMEM);
+ 
+-    rqd = cpu_add_to_runqueue(ops, cpu);
++    rqd = cpu_add_to_runqueue(prv, cpu);
+     if ( IS_ERR(rqd) )
+     {
+         xfree(spc);
+diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h
+index 10878ead8a27..6bff5ce13110 100644
+--- a/xen/include/asm-arm/cpufeature.h
++++ b/xen/include/asm-arm/cpufeature.h
+@@ -64,11 +64,6 @@ static inline bool cpus_have_cap(unsigned int num)
+     return test_bit(num, cpu_hwcaps);
+ }
+ 
+-static inline int cpu_nr_siblings(unsigned int cpu)
+-{
+-    return 1;
+-}
+-
+ /* System capability check for constant cap */
+ #define cpus_have_const_cap(num) ({                 \
+         register_t __ret;                           \
+diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
+index 9acb80fdcd37..badd7e60e506 100644
+--- a/xen/include/asm-x86/processor.h
++++ b/xen/include/asm-x86/processor.h
+@@ -171,11 +171,6 @@ extern void init_intel_cacheinfo(struct cpuinfo_x86 *c);
+ 
+ unsigned int apicid_to_socket(unsigned int);
+ 
+-static inline int cpu_nr_siblings(unsigned int cpu)
+-{
+-    return cpu_data[cpu].x86_num_siblings;
+-}
+-
+ /*
+  * Generic CPUID function
+  * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
+-- 
+2.25.4
+
diff --git a/patch-x86-S3-Fix-Shadow-Stack-resume-path.patch b/patch-x86-S3-Fix-Shadow-Stack-resume-path.patch
new file mode 100644
index 00000000..bc685c37
--- /dev/null
+++ b/patch-x86-S3-Fix-Shadow-Stack-resume-path.patch
@@ -0,0 +1,37 @@
+From bd032cbd664a229603592d595ce562dc1a22d158 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
+ <marmarek@invisiblethingslab.com>
+Date: Sun, 27 Sep 2020 17:29:35 +0200
+Subject: [PATCH] x86/S3: Fix Shadow Stack resume path
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+Organization: Invisible Things Lab
+Cc: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+
+Fix the resume path to load the shadow stack pointer from saved_ssp (not
+saved_rsp), to match what suspend path does.
+
+Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+Fixes: 633ecc4a7cb2 ("x86/S3: Save and restore Shadow Stack configuration")
+Backport: 4.14
+---
+ xen/arch/x86/acpi/wakeup_prot.S | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/acpi/wakeup_prot.S b/xen/arch/x86/acpi/wakeup_prot.S
+index a2c41c4f3f26..c6b3fcc93d93 100644
+--- a/xen/arch/x86/acpi/wakeup_prot.S
++++ b/xen/arch/x86/acpi/wakeup_prot.S
+@@ -69,7 +69,7 @@ ENTRY(s3_resume)
+          * so SETSSBSY will successfully load a value useful for us, then
+          * reset MSR_PL0_SSP to its usual value and pop the temporary token.
+          */
+-        mov     saved_rsp(%rip), %rdi
++        mov     saved_ssp(%rip), %rdi
+         cmpq    $1, %rdi
+         je      .L_shstk_done
+ 
+-- 
+2.25.4
+
diff --git a/patch-x86-S3-Restore-CR4-earlier-during-resume.patch b/patch-x86-S3-Restore-CR4-earlier-during-resume.patch
new file mode 100644
index 00000000..5619fb3a
--- /dev/null
+++ b/patch-x86-S3-Restore-CR4-earlier-during-resume.patch
@@ -0,0 +1,66 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Fri, 2 Oct 2020 17:49:32 +0000 (+0100)
+Subject: x86/S3: Restore CR4 earlier during resume
+X-Git-Url: https://xenbits.xen.org/gitweb/?p=people%2Fandrewcoop%2Fxen.git;a=commitdiff_plain;h=eb2dd26a5060012377368cfe7edb38b76ba04058
+
+x86/S3: Restore CR4 earlier during resume
+
+c/s 4304ff420e5 "x86/S3: Drop {save,restore}_rest_processor_state()
+completely" moved CR4 restoration up into C, to account for the fact that MCE
+was explicitly handled later.
+
+However, time_resume() ends up making an EFI Runtime Service call, and EFI
+explodes without OSFXSR, presumably when trying to spill %xmm registers onto
+the stack.
+
+Given this codepath, and the potential for other issues of a similar kind (TLB
+flushing vs INVPCID, HVM logic vs VMXE, etc), restore CR4 in asm before
+entering C.
+
+Ignore the previous MCE special case, because its not actually necessary.  The
+handler is already suitably configured from before suspend.
+
+Fixes: 4304ff420e5 ("x86/S3: Drop {save,restore}_rest_processor_state() completely")
+Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+
+diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
+index 4fb1e7a148..7f162a4df9 100644
+--- a/xen/arch/x86/acpi/power.c
++++ b/xen/arch/x86/acpi/power.c
+@@ -276,9 +276,6 @@ static int enter_state(u32 state)
+ 
+     mcheck_init(&boot_cpu_data, false);
+ 
+-    /* Restore CR4 from cached value, now MCE is set up. */
+-    write_cr4(read_cr4());
+-
+     printk(XENLOG_INFO "Finishing wakeup from ACPI S%d state.\n", state);
+ 
+     if ( (state == ACPI_STATE_S3) && error )
+diff --git a/xen/arch/x86/acpi/wakeup_prot.S b/xen/arch/x86/acpi/wakeup_prot.S
+index c6b3fcc93d..15052c300f 100644
+--- a/xen/arch/x86/acpi/wakeup_prot.S
++++ b/xen/arch/x86/acpi/wakeup_prot.S
+@@ -1,3 +1,4 @@
++#include <asm/asm_defns.h>
+ #include <asm/msr-index.h>
+ #include <asm/page.h>
+ #include <asm/processor.h>
+@@ -110,6 +111,11 @@ ENTRY(s3_resume)
+ 
+         call    load_system_tables
+ 
++        /* Restore CR4 from the cpuinfo block. */
++        GET_STACK_END(bx)
++        mov     STACK_CPUINFO_FIELD(cr4)(%rbx), %rax
++        mov     %rax, %cr4
++
+ .Lsuspend_err:
+         pop     %r15
+         pop     %r14
+-- 
+2.11.0
+
diff --git a/patch-x86-smpboot-Unconditionally-call-memguard_unguard_stack.patch b/patch-x86-smpboot-Unconditionally-call-memguard_unguard_stack.patch
new file mode 100644
index 00000000..5d225e78
--- /dev/null
+++ b/patch-x86-smpboot-Unconditionally-call-memguard_unguard_stack.patch
@@ -0,0 +1,57 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: [PATCH] x86/smpboot: Unconditionally call memguard_unguard_stack() in cpu_smpboot_free()
+Date: Mon, 5 Oct 2020 13:23:25 +0100
+
+For simplicity between various configuration, Xen always uses shadow stack
+mappings (Read-only + Dirty) for the guard page, irrespective of whether
+CET-SS is enabled.
+
+memguard_guard_stack() writes shadow stack tokens with plain writes.  This is
+necessary to configure the BSP shadow stack correctly, and cannot be
+implemented with WRSS.
+
+Therefore, unconditionally call memguard_unguard_stack() to return the
+mappings to fully writeable, so a subsequent call to memguard_guard_stack()
+will succeed.
+
+Fixes: 91d26ed304f ("x86/shstk: Create shadow stacks")
+Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+---
+CC: Jan Beulich <JBeulich@suse.com>
+CC: Roger Pau Monné <roger.pau@citrix.com>
+CC: Wei Liu <wl@xen.org>
+
+This can more easily be demonstrated with CPU hotplug than S3, and the absence
+of bug reports goes to show how rarely hotplug is used.
+---
+ xen/arch/x86/smpboot.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index 5708573c41..c193cc0fb8 100644
+--- a/xen/arch/x86/smpboot.c
++++ b/xen/arch/x86/smpboot.c
+@@ -971,16 +971,16 @@ static void cpu_smpboot_free(unsigned int cpu, bool remove)
+     if ( IS_ENABLED(CONFIG_PV32) )
+         FREE_XENHEAP_PAGE(per_cpu(compat_gdt, cpu));
+ 
++    if ( stack_base[cpu] )
++        memguard_unguard_stack(stack_base[cpu]);
++
+     if ( remove )
+     {
+         FREE_XENHEAP_PAGE(per_cpu(gdt, cpu));
+         FREE_XENHEAP_PAGE(idt_tables[cpu]);
+ 
+         if ( stack_base[cpu] )
+-        {
+-            memguard_unguard_stack(stack_base[cpu]);
+             FREE_XENHEAP_PAGES(stack_base[cpu], STACK_ORDER);
+-        }
+     }
+ }
+ 
+-- 
+2.11.0
+
diff --git a/xen.spec.in b/xen.spec.in
index 210ca0f3..a8dab5b8 100644
--- a/xen.spec.in
+++ b/xen.spec.in
@@ -126,6 +126,10 @@ Patch622: patch-xen-disable-efi-gettime.patch
 Patch627: patch-libxl-automatically-enable-gfx_passthru-if-IGD-is-as.patch
 Patch628: patch-libxl-workaround-gcc-10.2-maybe-uninitialized-warnin.patch
 Patch629: patch-libxl-fix-Werror-stringop-truncation-in-libxl__prepa.patch
+Patch630: patch-x86-S3-Fix-Shadow-Stack-resume-path.patch
+Patch631: patch-Revert-xen-credit2-limit-the-max-number-of-CPUs-in-a.patch
+Patch632: patch-x86-S3-Restore-CR4-earlier-during-resume.patch
+Patch633: patch-x86-smpboot-Unconditionally-call-memguard_unguard_stack.patch
 
 # GCC8 fixes
 Patch714: patch-tools-kdd-mute-spurious-gcc-warning.patch