From 7485af89a6fd48f7e6fab2505d2364d1817723e6 Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Thu, 25 May 2017 13:51:20 -0600 Subject: [PATCH 01/13] arch/sparc: increase CONFIG_NODES_SHIFT on SPARC64 to 5 SPARC M6-32 platform has (2^5) NUMA nodes, so need to bump up the CONFIG_NODES_SHIFT to 5. Orabug: 25577754 Signed-off-by: Jane Chu Reviewed-by: Bob Picco Reviewed-by: Atish Patra Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 58243b0d21c006..e544ac12737e7b 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -295,9 +295,13 @@ config NUMA depends on SPARC64 && SMP config NODES_SHIFT - int - default "4" + int "Maximum NUMA Nodes (as a power of 2)" + range 4 5 if SPARC64 + default "5" depends on NEED_MULTIPLE_NODES + help + Specify the maximum number of NUMA Nodes available on the target + system. Increases memory reserved to accommodate various tables. # Some NUMA nodes have memory ranges that span # other nodes. Even though a pfn is valid and From 0fde7ad71ee371ede73b3f326e58f9e8d102feb6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 1 Jun 2017 09:42:46 -0700 Subject: [PATCH 02/13] sparc64: Fix build warnings with gcc 7. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/sparc/kernel/ds.c: In function ‘register_services’: arch/sparc/kernel/ds.c:912:3: error: ‘strcpy’: writing at least 1 byte into a region of size 0 overflows the destination Reported-by: Anatoly Pugachev Signed-off-by: David S. Miller --- arch/sparc/kernel/ds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index b542cc7c8d94d8..f87265afb1759e 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -909,7 +909,7 @@ static int register_services(struct ds_info *dp) pbuf.req.handle = cp->handle; pbuf.req.major = 1; pbuf.req.minor = 0; - strcpy(pbuf.req.svc_id, cp->service_id); + strcpy(pbuf.id_buf, cp->service_id); err = __ds_send(lp, &pbuf, msg_len); if (err > 0) From 1b4af13ff2cc6897557bb0b8d9e2fad4fa4d67aa Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 5 Jun 2017 11:28:57 -0700 Subject: [PATCH 03/13] sparc64: Add __multi3 for gcc 7.x and later. Reported-by: Waldemar Brodkorb Signed-off-by: David S. Miller --- arch/sparc/lib/Makefile | 1 + arch/sparc/lib/multi3.S | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 arch/sparc/lib/multi3.S diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 69912d2f8b54e9..07c03e72d81248 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -15,6 +15,7 @@ lib-$(CONFIG_SPARC32) += copy_user.o locks.o lib-$(CONFIG_SPARC64) += atomic_64.o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o +lib-$(CONFIG_SPARC64) += multi3.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/multi3.S b/arch/sparc/lib/multi3.S new file mode 100644 index 00000000000000..d6b6c97fe3c730 --- /dev/null +++ b/arch/sparc/lib/multi3.S @@ -0,0 +1,35 @@ +#include +#include + + .text + .align 4 +ENTRY(__multi3) /* %o0 = u, %o1 = v */ + mov %o1, %g1 + srl %o3, 0, %g4 + mulx %g4, %g1, %o1 + srlx %g1, 0x20, %g3 + mulx %g3, %g4, %g5 + sllx %g5, 0x20, %o5 + srl %g1, 0, %g4 + sub %o1, %o5, %o5 + srlx %o5, 0x20, %o5 + addcc %g5, %o5, %g5 + srlx %o3, 0x20, %o5 + mulx %g4, %o5, %g4 + mulx %g3, %o5, %o5 + sethi %hi(0x80000000), %g3 + addcc %g5, %g4, %g5 + srlx %g5, 0x20, %g5 + add %g3, %g3, %g3 + movcc %xcc, %g0, %g3 + addcc %o5, %g5, %o5 + sllx %g4, 0x20, %g4 + add %o1, %g4, %o1 + add %o5, %g3, %g2 + mulx %g1, %o2, %g1 + add %g1, %g2, %g1 + mulx %o0, %o3, %o0 + retl + add %g1, %o0, %o0 +ENDPROC(__multi3) +EXPORT_SYMBOL(__multi3) From c79a13734d104b5b147d7cb0870276ccdd660dae Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Tue, 6 Jun 2017 14:32:29 -0600 Subject: [PATCH 04/13] arch/sparc: support NR_CPUS = 4096 Linux SPARC64 limits NR_CPUS to 4064 because init_cpu_send_mondo_info() only allocates a single page for NR_CPUS mondo entries. Thus we cannot use all 4096 CPUs on some SPARC platforms. To fix, allocate (2^order) pages where order is set according to the size of cpu_list for possible cpus. Since cpu_list_pa and cpu_mondo_block_pa are not used in asm code, there are no imm13 offsets from the base PA that will break because they can only reach one page. Orabug: 25505750 Signed-off-by: Jane Chu Reviewed-by: Bob Picco Reviewed-by: Atish Patra Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 4 ++-- arch/sparc/kernel/irq_64.c | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index e544ac12737e7b..b558c9e29de37f 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -192,9 +192,9 @@ config NR_CPUS int "Maximum number of CPUs" depends on SMP range 2 32 if SPARC32 - range 2 1024 if SPARC64 + range 2 4096 if SPARC64 default 32 if SPARC32 - default 64 if SPARC64 + default 4096 if SPARC64 source kernel/Kconfig.hz diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 4d0248aa092869..99dd133a029f06 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -1034,17 +1034,26 @@ static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb) { #ifdef CONFIG_SMP unsigned long page; + void *mondo, *p; - BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64)); + BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > PAGE_SIZE); + + /* Make sure mondo block is 64byte aligned */ + p = kzalloc(127, GFP_KERNEL); + if (!p) { + prom_printf("SUN4V: Error, cannot allocate mondo block.\n"); + prom_halt(); + } + mondo = (void *)(((unsigned long)p + 63) & ~0x3f); + tb->cpu_mondo_block_pa = __pa(mondo); page = get_zeroed_page(GFP_KERNEL); if (!page) { - prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n"); + prom_printf("SUN4V: Error, cannot allocate cpu list page.\n"); prom_halt(); } - tb->cpu_mondo_block_pa = __pa(page); - tb->cpu_list_pa = __pa(page + 64); + tb->cpu_list_pa = __pa(page); #endif } From 654f4807624a657f364417c2a7454f0df9961734 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 2 Jun 2017 14:51:12 -0700 Subject: [PATCH 05/13] sparc64: mm: fix copy_tsb to correctly copy huge page TSBs When a TSB grows beyond its current capacity, a new TSB is allocated and copy_tsb is called to copy entries from the old TSB to the new. A hash shift based on page size is used to calculate the index of an entry in the TSB. copy_tsb has hard coded PAGE_SHIFT in these calculations. However, for huge page TSBs the value REAL_HPAGE_SHIFT should be used. As a result, when copy_tsb is called for a huge page TSB the entries are placed at the incorrect index in the newly allocated TSB. When doing hardware table walk, the MMU does not match these entries and we end up in the TSB miss handling code. This code will then create and write an entry to the correct index in the TSB. We take a performance hit for the table walk miss and recreation of these entries. Pass a new parameter to copy_tsb that is the page size shift to be used when copying the TSB. Suggested-by: Anthony Yznaga Signed-off-by: Mike Kravetz Signed-off-by: David S. Miller --- arch/sparc/kernel/tsb.S | 11 +++++++---- arch/sparc/mm/tsb.c | 7 +++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index 10689cfd0ad40e..07c0df92496034 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -455,13 +455,16 @@ __tsb_context_switch: .type copy_tsb,#function copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size * %o2=new_tsb_base, %o3=new_tsb_size + * %o4=page_size_shift */ sethi %uhi(TSB_PASS_BITS), %g7 srlx %o3, 4, %o3 - add %o0, %o1, %g1 /* end of old tsb */ + add %o0, %o1, %o1 /* end of old tsb */ sllx %g7, 32, %g7 sub %o3, 1, %o3 /* %o3 == new tsb hash mask */ + mov %o4, %g1 /* page_size_shift */ + 661: prefetcha [%o0] ASI_N, #one_read .section .tsb_phys_patch, "ax" .word 661b @@ -486,9 +489,9 @@ copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size /* This can definitely be computed faster... */ srlx %o0, 4, %o5 /* Build index */ and %o5, 511, %o5 /* Mask index */ - sllx %o5, PAGE_SHIFT, %o5 /* Put into vaddr position */ + sllx %o5, %g1, %o5 /* Put into vaddr position */ or %o4, %o5, %o4 /* Full VADDR. */ - srlx %o4, PAGE_SHIFT, %o4 /* Shift down to create index */ + srlx %o4, %g1, %o4 /* Shift down to create index */ and %o4, %o3, %o4 /* Mask with new_tsb_nents-1 */ sllx %o4, 4, %o4 /* Shift back up into tsb ent offset */ TSB_STORE(%o2 + %o4, %g2) /* Store TAG */ @@ -496,7 +499,7 @@ copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size TSB_STORE(%o2 + %o4, %g3) /* Store TTE */ 80: add %o0, 16, %o0 - cmp %o0, %g1 + cmp %o0, %o1 bne,pt %xcc, 90b nop diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index bedf08b22a4773..0d4b998c7d7b74 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -496,7 +496,8 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) extern void copy_tsb(unsigned long old_tsb_base, unsigned long old_tsb_size, unsigned long new_tsb_base, - unsigned long new_tsb_size); + unsigned long new_tsb_size, + unsigned long page_size_shift); unsigned long old_tsb_base = (unsigned long) old_tsb; unsigned long new_tsb_base = (unsigned long) new_tsb; @@ -504,7 +505,9 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) old_tsb_base = __pa(old_tsb_base); new_tsb_base = __pa(new_tsb_base); } - copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size); + copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size, + tsb_index == MM_TSB_BASE ? + PAGE_SHIFT : REAL_HPAGE_SHIFT); } mm->context.tsb_block[tsb_index].tsb = new_tsb; From c982aa9c304bf0b9a7522fd118fed4afa5a0263c Mon Sep 17 00:00:00 2001 From: James Clarke Date: Mon, 29 May 2017 20:17:56 +0100 Subject: [PATCH 06/13] sparc: Machine description indices can vary VIO devices were being looked up by their index in the machine description node block, but this often varies over time as devices are added and removed. Instead, store the ID and look up using the type, config handle and ID. Signed-off-by: James Clarke Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=112541 Signed-off-by: David S. Miller --- arch/sparc/include/asm/vio.h | 1 + arch/sparc/kernel/vio.c | 68 +++++++++++++++++++++++++++++++++--- 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h index 8174f6cdbbbbd8..9dca7a892978a4 100644 --- a/arch/sparc/include/asm/vio.h +++ b/arch/sparc/include/asm/vio.h @@ -327,6 +327,7 @@ struct vio_dev { int compat_len; u64 dev_no; + u64 id; unsigned long channel_id; diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c index f6bb857254fcfa..075d38980dee39 100644 --- a/arch/sparc/kernel/vio.c +++ b/arch/sparc/kernel/vio.c @@ -302,13 +302,16 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, if (!id) { dev_set_name(&vdev->dev, "%s", bus_id_name); vdev->dev_no = ~(u64)0; + vdev->id = ~(u64)0; } else if (!cfg_handle) { dev_set_name(&vdev->dev, "%s-%llu", bus_id_name, *id); vdev->dev_no = *id; + vdev->id = ~(u64)0; } else { dev_set_name(&vdev->dev, "%s-%llu-%llu", bus_id_name, *cfg_handle, *id); vdev->dev_no = *cfg_handle; + vdev->id = *id; } vdev->dev.parent = parent; @@ -351,27 +354,84 @@ static void vio_add(struct mdesc_handle *hp, u64 node) (void) vio_create_one(hp, node, &root_vdev->dev); } +struct vio_md_node_query { + const char *type; + u64 dev_no; + u64 id; +}; + static int vio_md_node_match(struct device *dev, void *arg) { + struct vio_md_node_query *query = (struct vio_md_node_query *) arg; struct vio_dev *vdev = to_vio_dev(dev); - if (vdev->mp == (u64) arg) - return 1; + if (vdev->dev_no != query->dev_no) + return 0; + if (vdev->id != query->id) + return 0; + if (strcmp(vdev->type, query->type)) + return 0; - return 0; + return 1; } static void vio_remove(struct mdesc_handle *hp, u64 node) { + const char *type; + const u64 *id, *cfg_handle; + u64 a; + struct vio_md_node_query query; struct device *dev; - dev = device_find_child(&root_vdev->dev, (void *) node, + type = mdesc_get_property(hp, node, "device-type", NULL); + if (!type) { + type = mdesc_get_property(hp, node, "name", NULL); + if (!type) + type = mdesc_node_name(hp, node); + } + + query.type = type; + + id = mdesc_get_property(hp, node, "id", NULL); + cfg_handle = NULL; + mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) { + u64 target; + + target = mdesc_arc_target(hp, a); + cfg_handle = mdesc_get_property(hp, target, + "cfg-handle", NULL); + if (cfg_handle) + break; + } + + if (!id) { + query.dev_no = ~(u64)0; + query.id = ~(u64)0; + } else if (!cfg_handle) { + query.dev_no = *id; + query.id = ~(u64)0; + } else { + query.dev_no = *cfg_handle; + query.id = *id; + } + + dev = device_find_child(&root_vdev->dev, &query, vio_md_node_match); if (dev) { printk(KERN_INFO "VIO: Removing device %s\n", dev_name(dev)); device_unregister(dev); put_device(dev); + } else { + if (!id) + printk(KERN_ERR "VIO: Removed unknown %s node.\n", + type); + else if (!cfg_handle) + printk(KERN_ERR "VIO: Removed unknown %s node %llu.\n", + type, *id); + else + printk(KERN_ERR "VIO: Removed unknown %s node %llu-%llu.\n", + type, *cfg_handle, *id); } } From f322980b74a15e08f8c70a34a5864ecdbf957251 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Tue, 30 May 2017 15:45:00 -0400 Subject: [PATCH 07/13] sparc/mm/hugepages: Fix setup_hugepagesz for invalid values. hugetlb_bad_size needs to be called on invalid values. Also change the pr_warn to a pr_err to better align with other platforms. Signed-off-by: Liam R. Howlett Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 0cda653ae00764..7ca1b9dc7d6456 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -358,7 +358,8 @@ static int __init setup_hugepagesz(char *string) } if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) { - pr_warn("hugepagesz=%llu not supported by MMU.\n", + hugetlb_bad_size(); + pr_err("hugepagesz=%llu not supported by MMU.\n", hugepage_size); goto out; } From 588974857359861891f478a070b1dc7ae04a3880 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 31 May 2017 11:25:20 -0400 Subject: [PATCH 08/13] sparc64: reset mm cpumask after wrap After a wrap (getting a new context version) a process must get a new context id, which means that we would need to flush the context id from the TLB before running for the first time with this ID on every CPU. But, we use mm_cpumask to determine if this process has been running on this CPU before, and this mask is not reset after a wrap. So, there are two possible fixes for this issue: 1. Clear mm cpumask whenever mm gets a new context id 2. Unconditionally flush context every time process is running on a CPU This patch implements the first solution Signed-off-by: Pavel Tatashin Reviewed-by: Bob Picco Reviewed-by: Steven Sistare Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 7ca1b9dc7d6456..f8e30a3906bea1 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -759,6 +759,8 @@ void get_new_mmu_context(struct mm_struct *mm) goto out; } } + if (mm->context.sparc64_ctx_val) + cpumask_clear(mm_cpumask(mm)); mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63)); new_ctx |= (tlb_context_cache & CTX_VERSION_MASK); out: From 14d0334c6748ff2aedb3f2f7fdc51ee90a9b54e7 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 31 May 2017 11:25:21 -0400 Subject: [PATCH 09/13] sparc64: combine activate_mm and switch_mm The only difference between these two functions is that in activate_mm we unconditionally flush context. However, there is no need to keep this difference after fixing a bug where cpumask was not reset on a wrap. So, in this patch we combine these. Signed-off-by: Pavel Tatashin Reviewed-by: Bob Picco Reviewed-by: Steven Sistare Signed-off-by: David S. Miller --- arch/sparc/include/asm/mmu_context_64.h | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index 22fede6eba1160..734a1343d77d93 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -133,26 +133,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str } #define deactivate_mm(tsk,mm) do { } while (0) - -/* Activate a new MM instance for the current task. */ -static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm) -{ - unsigned long flags; - int cpu; - - spin_lock_irqsave(&mm->context.lock, flags); - if (!CTX_VALID(mm->context)) - get_new_mmu_context(mm); - cpu = smp_processor_id(); - if (!cpumask_test_cpu(cpu, mm_cpumask(mm))) - cpumask_set_cpu(cpu, mm_cpumask(mm)); - - load_secondary_context(mm); - __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); - tsb_context_switch(mm); - spin_unlock_irqrestore(&mm->context.lock, flags); -} - +#define activate_mm(active_mm, mm) switch_mm(active_mm, mm, NULL) #endif /* !(__ASSEMBLY__) */ #endif /* !(__SPARC64_MMU_CONTEXT_H) */ From c4415235b2be0cc791572e8e7f7466ab8f73a2bf Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 31 May 2017 11:25:22 -0400 Subject: [PATCH 10/13] sparc64: redefine first version CTX_FIRST_VERSION defines the first context version, but also it defines first context. This patch redefines it to only include the first context version. Signed-off-by: Pavel Tatashin Reviewed-by: Bob Picco Reviewed-by: Steven Sistare Signed-off-by: David S. Miller --- arch/sparc/include/asm/mmu_64.h | 2 +- arch/sparc/mm/init_64.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h index f7de0dbc38af2d..83b36a5371ffc6 100644 --- a/arch/sparc/include/asm/mmu_64.h +++ b/arch/sparc/include/asm/mmu_64.h @@ -52,7 +52,7 @@ #define CTX_NR_MASK TAG_CONTEXT_BITS #define CTX_HW_MASK (CTX_NR_MASK | CTX_PGSZ_MASK) -#define CTX_FIRST_VERSION ((_AC(1,UL) << CTX_VERSION_SHIFT) + _AC(1,UL)) +#define CTX_FIRST_VERSION BIT(CTX_VERSION_SHIFT) #define CTX_VALID(__ctx) \ (!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK)) #define CTX_HWBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_HW_MASK) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index f8e30a3906bea1..63b50447bb62eb 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -707,7 +707,7 @@ EXPORT_SYMBOL(__flush_dcache_range); /* get_new_mmu_context() uses "cache + 1". */ DEFINE_SPINLOCK(ctx_alloc_lock); -unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1; +unsigned long tlb_context_cache = CTX_FIRST_VERSION; #define MAX_CTX_NR (1UL << CTX_NR_BITS) #define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR) DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR); @@ -738,9 +738,9 @@ void get_new_mmu_context(struct mm_struct *mm) if (new_ctx >= ctx) { int i; new_ctx = (tlb_context_cache & CTX_VERSION_MASK) + - CTX_FIRST_VERSION; + CTX_FIRST_VERSION + 1; if (new_ctx == 1) - new_ctx = CTX_FIRST_VERSION; + new_ctx = CTX_FIRST_VERSION + 1; /* Don't call memset, for 16 entries that's just * plain silly... From 7a5b4bbf49fe86ce77488a70c5dccfe2d50d7a2d Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 31 May 2017 11:25:23 -0400 Subject: [PATCH 11/13] sparc64: add per-cpu mm of secondary contexts The new wrap is going to use information from this array to figure out mm's that currently have valid secondary contexts setup. Signed-off-by: Pavel Tatashin Reviewed-by: Bob Picco Reviewed-by: Steven Sistare Signed-off-by: David S. Miller --- arch/sparc/include/asm/mmu_context_64.h | 5 +++-- arch/sparc/mm/init_64.c | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index 734a1343d77d93..edb45247bfa9f2 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -19,6 +19,7 @@ extern spinlock_t ctx_alloc_lock; extern unsigned long tlb_context_cache; extern unsigned long mmu_context_bmap[]; +DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm); void get_new_mmu_context(struct mm_struct *mm); #ifdef CONFIG_SMP void smp_new_mmu_context_version(void); @@ -76,8 +77,9 @@ void __flush_tlb_mm(unsigned long, unsigned long); static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk) { unsigned long ctx_valid, flags; - int cpu; + int cpu = smp_processor_id(); + per_cpu(per_cpu_secondary_mm, cpu) = mm; if (unlikely(mm == &init_mm)) return; @@ -123,7 +125,6 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str * for the first time, we must flush that context out of the * local TLB. */ - cpu = smp_processor_id(); if (!ctx_valid || !cpumask_test_cpu(cpu, mm_cpumask(mm))) { cpumask_set_cpu(cpu, mm_cpumask(mm)); __flush_tlb_mm(CTX_HWBITS(mm->context), diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 63b50447bb62eb..a4c0bc8af82029 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -711,6 +711,7 @@ unsigned long tlb_context_cache = CTX_FIRST_VERSION; #define MAX_CTX_NR (1UL << CTX_NR_BITS) #define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR) DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR); +DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0}; /* Caller does TLB context flushing on local CPU if necessary. * The caller also ensures that CTX_VALID(mm->context) is false. From a0582f26ec9dfd5360ea2f35dd9a1b026f8adda0 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 31 May 2017 11:25:24 -0400 Subject: [PATCH 12/13] sparc64: new context wrap The current wrap implementation has a race issue: it is called outside of the ctx_alloc_lock, and also does not wait for all CPUs to complete the wrap. This means that a thread can get a new context with a new version and another thread might still be running with the same context. The problem is especially severe on CPUs with shared TLBs, like sun4v. I used the following test to very quickly reproduce the problem: - start over 8K processes (must be more than context IDs) - write and read values at a memory location in every process. Very quickly memory corruptions start happening, and what we read back does not equal what we wrote. Several approaches were explored before settling on this one: Approach 1: Move smp_new_mmu_context_version() inside ctx_alloc_lock, and wait for every process to complete the wrap. (Note: every CPU must WAIT before leaving smp_new_mmu_context_version_client() until every one arrives). This approach ends up with deadlocks, as some threads own locks which other threads are waiting for, and they never receive softint until these threads exit smp_new_mmu_context_version_client(). Since we do not allow the exit, deadlock happens. Approach 2: Handle wrap right during mondo interrupt. Use etrap/rtrap to enter into into C code, and issue new versions to every CPU. This approach adds some overhead to runtime: in switch_mm() we must add some checks to make sure that versions have not changed due to wrap while we were loading the new secondary context. (could be protected by PSTATE_IE but that degrades performance as on M7 and older CPUs as it takes 50 cycles for each access). Also, we still need a global per-cpu array of MMs to know where we need to load new contexts, otherwise we can change context to a thread that is going way (if we received mondo between switch_mm() and switch_to() time). Finally, there are some issues with window registers in rtrap() when context IDs are changed during CPU mondo time. The approach in this patch is the simplest and has almost no impact on runtime. We use the array with mm's where last secondary contexts were loaded onto CPUs and bump their versions to the new generation without changing context IDs. If a new process comes in to get a context ID, it will go through get_new_mmu_context() because of version mismatch. But the running processes do not need to be interrupted. And wrap is quicker as we do not need to xcall and wait for everyone to receive and complete wrap. Signed-off-by: Pavel Tatashin Reviewed-by: Bob Picco Reviewed-by: Steven Sistare Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 81 +++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index a4c0bc8af82029..3c40ebd50f928c 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -713,6 +713,53 @@ unsigned long tlb_context_cache = CTX_FIRST_VERSION; DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR); DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0}; +static void mmu_context_wrap(void) +{ + unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK; + unsigned long new_ver, new_ctx, old_ctx; + struct mm_struct *mm; + int cpu; + + bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS); + + /* Reserve kernel context */ + set_bit(0, mmu_context_bmap); + + new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION; + if (unlikely(new_ver == 0)) + new_ver = CTX_FIRST_VERSION; + tlb_context_cache = new_ver; + + /* + * Make sure that any new mm that are added into per_cpu_secondary_mm, + * are going to go through get_new_mmu_context() path. + */ + mb(); + + /* + * Updated versions to current on those CPUs that had valid secondary + * contexts + */ + for_each_online_cpu(cpu) { + /* + * If a new mm is stored after we took this mm from the array, + * it will go into get_new_mmu_context() path, because we + * already bumped the version in tlb_context_cache. + */ + mm = per_cpu(per_cpu_secondary_mm, cpu); + + if (unlikely(!mm || mm == &init_mm)) + continue; + + old_ctx = mm->context.sparc64_ctx_val; + if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) { + new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver; + set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap); + mm->context.sparc64_ctx_val = new_ctx; + } + } +} + /* Caller does TLB context flushing on local CPU if necessary. * The caller also ensures that CTX_VALID(mm->context) is false. * @@ -727,50 +774,30 @@ void get_new_mmu_context(struct mm_struct *mm) { unsigned long ctx, new_ctx; unsigned long orig_pgsz_bits; - int new_version; spin_lock(&ctx_alloc_lock); +retry: + /* wrap might have happened, test again if our context became valid */ + if (unlikely(CTX_VALID(mm->context))) + goto out; orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); ctx = (tlb_context_cache + 1) & CTX_NR_MASK; new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); - new_version = 0; if (new_ctx >= (1 << CTX_NR_BITS)) { new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1); if (new_ctx >= ctx) { - int i; - new_ctx = (tlb_context_cache & CTX_VERSION_MASK) + - CTX_FIRST_VERSION + 1; - if (new_ctx == 1) - new_ctx = CTX_FIRST_VERSION + 1; - - /* Don't call memset, for 16 entries that's just - * plain silly... - */ - mmu_context_bmap[0] = 3; - mmu_context_bmap[1] = 0; - mmu_context_bmap[2] = 0; - mmu_context_bmap[3] = 0; - for (i = 4; i < CTX_BMAP_SLOTS; i += 4) { - mmu_context_bmap[i + 0] = 0; - mmu_context_bmap[i + 1] = 0; - mmu_context_bmap[i + 2] = 0; - mmu_context_bmap[i + 3] = 0; - } - new_version = 1; - goto out; + mmu_context_wrap(); + goto retry; } } if (mm->context.sparc64_ctx_val) cpumask_clear(mm_cpumask(mm)); mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63)); new_ctx |= (tlb_context_cache & CTX_VERSION_MASK); -out: tlb_context_cache = new_ctx; mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; +out: spin_unlock(&ctx_alloc_lock); - - if (unlikely(new_version)) - smp_new_mmu_context_version(); } static int numa_enabled = 1; From 0197e41ce70511dc3b71f7fefa1a676e2b5cd60b Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 31 May 2017 11:25:25 -0400 Subject: [PATCH 13/13] sparc64: delete old wrap code The old method that is using xcall and softint to get new context id is deleted, as it is replaced by a method of using per_cpu_secondary_mm without xcall to perform the context wrap. Signed-off-by: Pavel Tatashin Reviewed-by: Bob Picco Reviewed-by: Steven Sistare Signed-off-by: David S. Miller --- arch/sparc/include/asm/mmu_context_64.h | 6 ----- arch/sparc/include/asm/pil.h | 1 - arch/sparc/kernel/kernel.h | 1 - arch/sparc/kernel/smp_64.c | 31 ------------------------- arch/sparc/kernel/ttable_64.S | 2 +- arch/sparc/mm/ultra.S | 5 ---- 6 files changed, 1 insertion(+), 45 deletions(-) diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index edb45247bfa9f2..2cddcda4f85f75 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -21,12 +21,6 @@ extern unsigned long mmu_context_bmap[]; DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm); void get_new_mmu_context(struct mm_struct *mm); -#ifdef CONFIG_SMP -void smp_new_mmu_context_version(void); -#else -#define smp_new_mmu_context_version() do { } while (0) -#endif - int init_new_context(struct task_struct *tsk, struct mm_struct *mm); void destroy_context(struct mm_struct *mm); diff --git a/arch/sparc/include/asm/pil.h b/arch/sparc/include/asm/pil.h index 2669370305465d..522b43db2ed336 100644 --- a/arch/sparc/include/asm/pil.h +++ b/arch/sparc/include/asm/pil.h @@ -20,7 +20,6 @@ #define PIL_SMP_CALL_FUNC 1 #define PIL_SMP_RECEIVE_SIGNAL 2 #define PIL_SMP_CAPTURE 3 -#define PIL_SMP_CTX_NEW_VERSION 4 #define PIL_DEVICE_IRQ 5 #define PIL_SMP_CALL_FUNC_SNGL 6 #define PIL_DEFERRED_PCR_WORK 7 diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index c9804551262cc2..6ae1e77be0bfde 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -37,7 +37,6 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr /* smp_64.c */ void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs); void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs); -void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs); void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs); void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index b3bc0ac757cc11..fdf31040a7dc5c 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -964,37 +964,6 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) preempt_enable(); } -void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) -{ - struct mm_struct *mm; - unsigned long flags; - - clear_softint(1 << irq); - - /* See if we need to allocate a new TLB context because - * the version of the one we are using is now out of date. - */ - mm = current->active_mm; - if (unlikely(!mm || (mm == &init_mm))) - return; - - spin_lock_irqsave(&mm->context.lock, flags); - - if (unlikely(!CTX_VALID(mm->context))) - get_new_mmu_context(mm); - - spin_unlock_irqrestore(&mm->context.lock, flags); - - load_secondary_context(mm); - __flush_tlb_mm(CTX_HWBITS(mm->context), - SECONDARY_CONTEXT); -} - -void smp_new_mmu_context_version(void) -{ - smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0); -} - #ifdef CONFIG_KGDB void kgdb_roundup_cpus(unsigned long flags) { diff --git a/arch/sparc/kernel/ttable_64.S b/arch/sparc/kernel/ttable_64.S index 7bd8f6556352d9..efe93ab4a9c065 100644 --- a/arch/sparc/kernel/ttable_64.S +++ b/arch/sparc/kernel/ttable_64.S @@ -50,7 +50,7 @@ tl0_resv03e: BTRAP(0x3e) BTRAP(0x3f) BTRAP(0x40) tl0_irq1: TRAP_IRQ(smp_call_function_client, 1) tl0_irq2: TRAP_IRQ(smp_receive_signal_client, 2) tl0_irq3: TRAP_IRQ(smp_penguin_jailcell, 3) -tl0_irq4: TRAP_IRQ(smp_new_mmu_context_version_client, 4) +tl0_irq4: BTRAP(0x44) #else tl0_irq1: BTRAP(0x41) tl0_irq2: BTRAP(0x42) diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index 5d2fd6cd31896b..fcf4d27a38fb47 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -971,11 +971,6 @@ xcall_capture: wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint retry - .globl xcall_new_mmu_context_version -xcall_new_mmu_context_version: - wr %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint - retry - #ifdef CONFIG_KGDB .globl xcall_kgdb_capture xcall_kgdb_capture: