From 62559494ea20a3e2ac50ea0f9e4b059292b5a6ed Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 9 Feb 2016 12:25:42 -0700 Subject: [PATCH 1/4] Add the hash table to the end of the thread group struct This commit removes an extra kernel allocation by moving the thread group and partition hash tables to flexible array members at the end their structures. This change required moving the definition of the hash table and xpmem_id structures above the thread group and partition structure definitions. Signed-off-by: Nathan Hjelm --- kernel/xpmem_main.c | 38 ++++------- kernel/xpmem_misc.c | 2 - kernel/xpmem_private.h | 141 +++++++++++++++++++++-------------------- 3 files changed, 83 insertions(+), 98 deletions(-) diff --git a/kernel/xpmem_main.c b/kernel/xpmem_main.c index a32592f316a..e01bbad8176 100644 --- a/kernel/xpmem_main.c +++ b/kernel/xpmem_main.c @@ -5,7 +5,7 @@ * * Copyright (c) 2004-2007 Silicon Graphics, Inc. All Rights Reserved. * Copyright 2010, 2014 Cray Inc. All Rights Reserved - * Copyright 2015 Los Alamos National Security, LLC. All rights reserved. + * Copyright 2015-2016 Los Alamos National Security, LLC. All rights reserved. */ /* @@ -67,7 +67,9 @@ xpmem_open(struct inode *inode, struct file *file) } /* create tg */ - tg = kzalloc(sizeof(struct xpmem_thread_group), GFP_KERNEL); + tg = kzalloc(sizeof(struct xpmem_thread_group) + + sizeof(struct xpmem_hashlist) * + XPMEM_AP_HASHTABLE_SIZE, GFP_KERNEL); if (tg == NULL) { spin_unlock(&xpmem_open_lock); return -ENOMEM; @@ -92,6 +94,11 @@ xpmem_open(struct inode *inode, struct file *file) tg->mmu_unregister_called = 0; tg->mm = current->mm; + for (index = 0; index < XPMEM_AP_HASHTABLE_SIZE; index++) { + rwlock_init(&tg->ap_hashtable[index].lock); + INIT_LIST_HEAD(&tg->ap_hashtable[index].list); + } + /* Register MMU notifier callbacks */ if (xpmem_mmu_notifier_init(tg) != 0) { spin_unlock(&xpmem_open_lock); @@ -99,20 +106,6 @@ xpmem_open(struct inode *inode, struct file *file) return -EFAULT; } - /* create and initialize struct xpmem_access_permit hashtable */ - tg->ap_hashtable = kzalloc(sizeof(struct xpmem_hashlist) * - XPMEM_AP_HASHTABLE_SIZE, GFP_KERNEL); - if (tg->ap_hashtable == NULL) { - spin_unlock(&xpmem_open_lock); - xpmem_mmu_notifier_unlink(tg); - kfree(tg); - return -ENOMEM; - } - for (index = 0; index < XPMEM_AP_HASHTABLE_SIZE; index++) { - rwlock_init(&tg->ap_hashtable[index].lock); - INIT_LIST_HEAD(&tg->ap_hashtable[index].list); - } - snprintf(tgid_string, XPMEM_TGID_STRING_LEN, "%d", current->tgid); spin_lock(&xpmem_unpin_procfs_lock); unpin_entry = proc_create_data(tgid_string, 0644, @@ -409,17 +402,12 @@ xpmem_init(void) struct proc_dir_entry *debug_printk_entry; /* create and initialize struct xpmem_partition array */ - xpmem_my_part = kzalloc(sizeof(struct xpmem_partition), GFP_KERNEL); + xpmem_my_part = kzalloc(sizeof(struct xpmem_partition) + + sizeof(struct xpmem_hashlist) * + XPMEM_TG_HASHTABLE_SIZE, GFP_KERNEL); if (xpmem_my_part == NULL) return -ENOMEM; - xpmem_my_part->tg_hashtable = kzalloc(sizeof(struct xpmem_hashlist) * - XPMEM_TG_HASHTABLE_SIZE, GFP_KERNEL); - if (xpmem_my_part->tg_hashtable == NULL) { - kfree(xpmem_my_part); - return -ENOMEM; - } - for (i = 0; i < XPMEM_TG_HASHTABLE_SIZE; i++) { rwlock_init(&xpmem_my_part->tg_hashtable[i].lock); INIT_LIST_HEAD(&xpmem_my_part->tg_hashtable[i].list); @@ -472,7 +460,6 @@ xpmem_init(void) out_2: remove_proc_entry(XPMEM_MODULE_NAME, NULL); out_1: - kfree(xpmem_my_part->tg_hashtable); kfree(xpmem_my_part); return ret; } @@ -483,7 +470,6 @@ xpmem_init(void) void __exit xpmem_exit(void) { - kfree(xpmem_my_part->tg_hashtable); kfree(xpmem_my_part); misc_deregister(&xpmem_dev_handle); diff --git a/kernel/xpmem_misc.c b/kernel/xpmem_misc.c index 3e32d5768ed..c33240ca01a 100644 --- a/kernel/xpmem_misc.c +++ b/kernel/xpmem_misc.c @@ -111,8 +111,6 @@ xpmem_tg_deref(struct xpmem_thread_group *tg) remove_proc_entry(tgid_string, xpmem_unpin_procfs_dir); spin_unlock(&xpmem_unpin_procfs_lock); - kfree(tg->ap_hashtable); - kfree(tg); } diff --git a/kernel/xpmem_private.h b/kernel/xpmem_private.h index 5f448454744..fbafab3d062 100644 --- a/kernel/xpmem_private.h +++ b/kernel/xpmem_private.h @@ -5,7 +5,7 @@ * * Copyright (c) 2004-2007 Silicon Graphics, Inc. All Rights Reserved. * Copyright 2009, 2010, 2014 Cray Inc. All Rights Reserved - * Copyright (c) 2014-2015 Los Alamos National Security, LCC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LCC. All rights * reserved. */ @@ -85,6 +85,73 @@ extern uint32_t xpmem_debug_on; #define delayed_work work_struct +/* + * Both the xpmem_segid_t and xpmem_apid_t are of type __s64 and designed + * to be opaque to the user. Both consist of the same underlying fields. + * + * The 'uniq' field is designed to give each segid or apid a unique value. + * Each type is only unique with respect to itself. + * + * An ID is never less than or equal to zero. + */ +struct xpmem_id { + pid_t tgid; /* thread group that owns ID */ + unsigned int uniq; /* this value makes the ID unique */ +}; + +/* Shift INT_MAX by one so we can tell when we overflow. */ +#define XPMEM_MAX_UNIQ_ID (INT_MAX >> 1) + +static inline pid_t +xpmem_segid_to_tgid(xpmem_segid_t segid) +{ + DBUG_ON(segid <= 0); + return ((struct xpmem_id *)&segid)->tgid; +} + +static inline pid_t +xpmem_apid_to_tgid(xpmem_apid_t apid) +{ + DBUG_ON(apid <= 0); + return ((struct xpmem_id *)&apid)->tgid; +} + +/* + * Hash Tables + * + * XPMEM utilizes hash tables to enable faster lookups of list entries. + * These hash tables are implemented as arrays. A simple modulus of the hash + * key yields the appropriate array index. A hash table's array element (i.e., + * hash table bucket) consists of a hash list and the lock that protects it. + * + * XPMEM has the following two hash tables: + * + * table bucket key + * part->tg_hashtable list of struct xpmem_thread_group tgid + * tg->ap_hashtable list of struct xpmem_access_permit apid.uniq + */ + +struct xpmem_hashlist { + rwlock_t lock; /* lock for hash list */ + struct list_head list; /* hash list */ +} ____cacheline_aligned; + +#define XPMEM_TG_HASHTABLE_SIZE 8 +#define XPMEM_AP_HASHTABLE_SIZE 8 + +static inline int +xpmem_tg_hashtable_index(pid_t tgid) +{ + return ((unsigned int)tgid % XPMEM_TG_HASHTABLE_SIZE); +} + +static inline int +xpmem_ap_hashtable_index(xpmem_apid_t apid) +{ + DBUG_ON(apid <= 0); + return (((struct xpmem_id *)&apid)->uniq % XPMEM_AP_HASHTABLE_SIZE); +} + /* * general internal driver structures */ @@ -104,7 +171,6 @@ struct xpmem_thread_group { atomic_t uniq_apid; rwlock_t seg_list_lock; struct list_head seg_list; /* tg's list of segs */ - struct xpmem_hashlist *ap_hashtable; /* locks + ap hash lists */ atomic_t refcnt; /* references to tg */ atomic_t n_pinned; /* #of pages pinned by this tg */ u64 addr_limit; /* highest possible user addr */ @@ -118,6 +184,8 @@ struct xpmem_thread_group { struct mmu_notifier mmu_not; /* tg's mmu notifier struct */ int mmu_initialized; /* registered for mmu callbacks? */ int mmu_unregister_called; + + struct xpmem_hashlist ap_hashtable[]; /* locks + ap hash lists */ }; struct xpmem_segment { @@ -164,44 +232,13 @@ struct xpmem_attachment { }; struct xpmem_partition { - struct xpmem_hashlist *tg_hashtable; /* locks + tg hash lists */ - /* procfs debugging */ atomic_t n_pinned; /* # of pages pinned xpmem */ atomic_t n_unpinned; /* # of pages unpinned by xpmem */ -}; -/* - * Both the xpmem_segid_t and xpmem_apid_t are of type __s64 and designed - * to be opaque to the user. Both consist of the same underlying fields. - * - * The 'uniq' field is designed to give each segid or apid a unique value. - * Each type is only unique with respect to itself. - * - * An ID is never less than or equal to zero. - */ -struct xpmem_id { - pid_t tgid; /* thread group that owns ID */ - unsigned int uniq; /* this value makes the ID unique */ + struct xpmem_hashlist tg_hashtable[]; /* locks + tg hash lists */ }; -/* Shift INT_MAX by one so we can tell when we overflow. */ -#define XPMEM_MAX_UNIQ_ID (INT_MAX >> 1) - -static inline pid_t -xpmem_segid_to_tgid(xpmem_segid_t segid) -{ - DBUG_ON(segid <= 0); - return ((struct xpmem_id *)&segid)->tgid; -} - -static inline pid_t -xpmem_apid_to_tgid(xpmem_apid_t apid) -{ - DBUG_ON(apid <= 0); - return ((struct xpmem_id *)&apid)->tgid; -} - /* * Attribute and state flags for various xpmem structures. Some values * are defined in xpmem.h, so we reserved space here via XPMEM_DONT_USE_X @@ -412,40 +449,4 @@ xpmem_wait_for_seg_destroyed(struct xpmem_segment *seg) XPMEM_FLAG_RECALLINGPFNS)))); } -/* - * Hash Tables - * - * XPMEM utilizes hash tables to enable faster lookups of list entries. - * These hash tables are implemented as arrays. A simple modulus of the hash - * key yields the appropriate array index. A hash table's array element (i.e., - * hash table bucket) consists of a hash list and the lock that protects it. - * - * XPMEM has the following two hash tables: - * - * table bucket key - * part->tg_hashtable list of struct xpmem_thread_group tgid - * tg->ap_hashtable list of struct xpmem_access_permit apid.uniq - */ - -struct xpmem_hashlist { - rwlock_t lock; /* lock for hash list */ - struct list_head list; /* hash list */ -} ____cacheline_aligned; - -#define XPMEM_TG_HASHTABLE_SIZE 8 -#define XPMEM_AP_HASHTABLE_SIZE 8 - -static inline int -xpmem_tg_hashtable_index(pid_t tgid) -{ - return ((unsigned int)tgid % XPMEM_TG_HASHTABLE_SIZE); -} - -static inline int -xpmem_ap_hashtable_index(xpmem_apid_t apid) -{ - DBUG_ON(apid <= 0); - return (((struct xpmem_id *)&apid)->uniq % XPMEM_AP_HASHTABLE_SIZE); -} - #endif /* _XPMEM_PRIVATE_H */ From b797a2485e6382f9e376bdc8ca5f74ee29aa2fda Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 9 Feb 2016 12:29:54 -0700 Subject: [PATCH 2/4] Fix kernel deadlock on open/close In 648eabb5bda05f7bc3d15eb5cfce92f60b6fd718 a fix was added to remove a race condition between the creation and desruction of a proc entry for a thread group. The issue was caused by a user code opening then closing the xpmem device file before using xpmem. Unfortunately the fix causes deadlock in 4.x kernels (not observed in 3.x). This commit removes the open spinlock and moves the destruction of the proc entry into xpmem_flush. This will guarantee that the proc entry is always destroyed before it is created again. An additional change introduced in this commit has to do with when the thread group is removed from the hash table. Before this commit the hash table read lock is obtained, the returned tg entry referenced, and the read lock dropped. The write lock is later obtained and the entry is removed. This allows for the possibility of a race condition. To avoid this a non-locking version of xpmem_tg_ref_by_tgid_all_nolock has been implemented to allow a search then remove functionality. Fixes hjelmn/xpmem#4 Signed-off-by: Nathan Hjelm --- kernel/xpmem_main.c | 57 +++++++++++++++++++++--------------------- kernel/xpmem_misc.c | 15 +---------- kernel/xpmem_private.h | 24 +++++++++++++++--- 3 files changed, 50 insertions(+), 46 deletions(-) diff --git a/kernel/xpmem_main.c b/kernel/xpmem_main.c index e01bbad8176..370f488466e 100644 --- a/kernel/xpmem_main.c +++ b/kernel/xpmem_main.c @@ -42,7 +42,7 @@ #endif struct xpmem_partition *xpmem_my_part = NULL; /* pointer to this partition */ -static spinlock_t xpmem_open_lock; +static void xpmem_destroy_tg(struct xpmem_thread_group *tg); /* * User open of the XPMEM driver. Called whenever /dev/xpmem is opened. @@ -57,11 +57,9 @@ xpmem_open(struct inode *inode, struct file *file) struct proc_dir_entry *unpin_entry; char tgid_string[XPMEM_TGID_STRING_LEN]; - spin_lock(&xpmem_open_lock); /* if this has already been done, just return silently */ tg = xpmem_tg_ref_by_tgid(current->tgid); if (!IS_ERR(tg)) { - spin_unlock(&xpmem_open_lock); xpmem_tg_deref(tg); return 0; } @@ -71,7 +69,6 @@ xpmem_open(struct inode *inode, struct file *file) sizeof(struct xpmem_hashlist) * XPMEM_AP_HASHTABLE_SIZE, GFP_KERNEL); if (tg == NULL) { - spin_unlock(&xpmem_open_lock); return -ENOMEM; } @@ -101,7 +98,6 @@ xpmem_open(struct inode *inode, struct file *file) /* Register MMU notifier callbacks */ if (xpmem_mmu_notifier_init(tg) != 0) { - spin_unlock(&xpmem_open_lock); kfree(tg); return -EFAULT; } @@ -138,7 +134,6 @@ xpmem_open(struct inode *inode, struct file *file) get_task_struct(current->group_leader); tg->group_leader = current->group_leader; BUG_ON(current->mm != current->group_leader->mm); - spin_unlock(&xpmem_open_lock); return 0; } @@ -151,9 +146,6 @@ xpmem_open(struct inode *inode, struct file *file) static void xpmem_destroy_tg(struct xpmem_thread_group *tg) { - int index; - - spin_lock(&xpmem_open_lock); XPMEM_DEBUG("tg->mm=%p", tg->mm); /* @@ -161,27 +153,8 @@ xpmem_destroy_tg(struct xpmem_thread_group *tg) * Decrements mm_count. */ xpmem_mmu_notifier_unlink(tg); - - /* Remove tg structure from its hash list */ - index = xpmem_tg_hashtable_index(tg->tgid); - write_lock(&xpmem_my_part->tg_hashtable[index].lock); - /* - * Two threads could have called xpmem_flush at about the same time, - * and thus xpmem_tg_ref_by_tgid_all could return the same tg in - * both threads. Guard against this race. - */ - if (list_empty(&tg->tg_hashlist)) { - write_unlock(&xpmem_my_part->tg_hashtable[index].lock); - xpmem_tg_deref(tg); - spin_unlock(&xpmem_open_lock); - return; - } - list_del_init(&tg->tg_hashlist); - write_unlock(&xpmem_my_part->tg_hashtable[index].lock); - xpmem_tg_destroyable(tg); xpmem_tg_deref(tg); - spin_unlock(&xpmem_open_lock); } /* @@ -223,7 +196,9 @@ xpmem_teardown(struct xpmem_thread_group *tg) static int xpmem_flush(struct file *file, fl_owner_t owner) { + char tgid_string[XPMEM_TGID_STRING_LEN]; struct xpmem_thread_group *tg; + int index; /* * During a call to fork() there is a check for whether the parent @@ -242,8 +217,18 @@ xpmem_flush(struct file *file, fl_owner_t owner) if (current->files && current->files != owner) return 0; - tg = xpmem_tg_ref_by_tgid_all(current->tgid); + /* + * Two threads could have called xpmem_flush at about the same time, + * and thus xpmem_tg_ref_by_tgid_all could return the same tg in + * both threads. Guard against this race. + */ + index = xpmem_tg_hashtable_index(current->tgid); + write_lock(&xpmem_my_part->tg_hashtable[index].lock); + + /* Remove tg structure from its hash list */ + tg = xpmem_tg_ref_by_tgid_all_nolock(current->tgid); if (IS_ERR(tg)) { + write_unlock(&xpmem_my_part->tg_hashtable[index].lock); /* * xpmem_flush() can get called twice for thread groups * which inherited /dev/xpmem: once for the inherited fd, @@ -254,8 +239,22 @@ xpmem_flush(struct file *file, fl_owner_t owner) return 0; } + list_del_init(&tg->tg_hashlist); + + write_unlock(&xpmem_my_part->tg_hashtable[index].lock); + XPMEM_DEBUG("tg->mm=%p", tg->mm); + /* + * NTH: the thread group may not be released until later so remove the + * proc entry now to avoid a race between another call to xpmem_open() + * and the distruction of the thread group object. + */ + snprintf(tgid_string, XPMEM_TGID_STRING_LEN, "%d", tg->tgid); + spin_lock(&xpmem_unpin_procfs_lock); + remove_proc_entry(tgid_string, xpmem_unpin_procfs_dir); + spin_unlock(&xpmem_unpin_procfs_lock); + xpmem_destroy_tg(tg); return 0; diff --git a/kernel/xpmem_misc.c b/kernel/xpmem_misc.c index c33240ca01a..c1e161c94f5 100644 --- a/kernel/xpmem_misc.c +++ b/kernel/xpmem_misc.c @@ -32,14 +32,10 @@ uint32_t xpmem_debug_on = 0; * XPMEM_FLAG_DESTROYING. */ struct xpmem_thread_group * -__xpmem_tg_ref_by_tgid(pid_t tgid, int return_destroying) +__xpmem_tg_ref_by_tgid_nolock_internal(pid_t tgid, int index, int return_destroying) { - int index; struct xpmem_thread_group *tg; - index = xpmem_tg_hashtable_index(tgid); - read_lock(&xpmem_my_part->tg_hashtable[index].lock); - list_for_each_entry(tg, &xpmem_my_part->tg_hashtable[index].list, tg_hashlist) { if (tg->tgid == tgid) { @@ -49,12 +45,10 @@ __xpmem_tg_ref_by_tgid(pid_t tgid, int return_destroying) } xpmem_tg_ref(tg); - read_unlock(&xpmem_my_part->tg_hashtable[index].lock); return tg; } } - read_unlock(&xpmem_my_part->tg_hashtable[index].lock); return ERR_PTR(-ENOENT); } @@ -86,8 +80,6 @@ xpmem_tg_ref_by_apid(xpmem_apid_t apid) void xpmem_tg_deref(struct xpmem_thread_group *tg) { - char tgid_string[XPMEM_TGID_STRING_LEN]; - DBUG_ON(atomic_read(&tg->refcnt) <= 0); if (atomic_dec_return(&tg->refcnt) != 0) return; @@ -106,11 +98,6 @@ xpmem_tg_deref(struct xpmem_thread_group *tg) */ put_task_struct(tg->group_leader); - snprintf(tgid_string, XPMEM_TGID_STRING_LEN, "%d", tg->tgid); - spin_lock(&xpmem_unpin_procfs_lock); - remove_proc_entry(tgid_string, xpmem_unpin_procfs_dir); - spin_unlock(&xpmem_unpin_procfs_lock); - kfree(tg); } diff --git a/kernel/xpmem_private.h b/kernel/xpmem_private.h index fbafab3d062..3b4f8120bc4 100644 --- a/kernel/xpmem_private.h +++ b/kernel/xpmem_private.h @@ -300,9 +300,27 @@ extern struct xpmem_partition *xpmem_my_part; void xpmem_teardown(struct xpmem_thread_group *tg); /* found in xpmem_misc.c */ -extern struct xpmem_thread_group *__xpmem_tg_ref_by_tgid(pid_t, int); -#define xpmem_tg_ref_by_tgid(t) __xpmem_tg_ref_by_tgid(t, 0) -#define xpmem_tg_ref_by_tgid_all(t) __xpmem_tg_ref_by_tgid(t, 1) +extern struct xpmem_thread_group * +__xpmem_tg_ref_by_tgid_nolock_internal(pid_t tgid, int index, int return_destroying); +static inline struct xpmem_thread_group *__xpmem_tg_ref_by_tgid(pid_t tgid, int return_destroying) { + struct xpmem_thread_group *tg; + int index; + + index = xpmem_tg_hashtable_index(tgid); + read_lock(&xpmem_my_part->tg_hashtable[index].lock); + tg = __xpmem_tg_ref_by_tgid_nolock_internal (tgid, index, return_destroying); + read_unlock(&xpmem_my_part->tg_hashtable[index].lock); + return tg; +} + +static inline struct xpmem_thread_group *__xpmem_tg_ref_by_tgid_nolock(pid_t tgid, int return_destroying) { + return __xpmem_tg_ref_by_tgid_nolock_internal (tgid, xpmem_tg_hashtable_index(tgid), + return_destroying); +} +#define xpmem_tg_ref_by_tgid(t) __xpmem_tg_ref_by_tgid(t, 0) +#define xpmem_tg_ref_by_tgid_all(t) __xpmem_tg_ref_by_tgid(t, 1) +#define xpmem_tg_ref_by_tgid_nolock(t) __xpmem_tg_ref_by_tgid_nolock(t, 0) +#define xpmem_tg_ref_by_tgid_all_nolock(t) __xpmem_tg_ref_by_tgid_nolock(t, 1) extern struct xpmem_thread_group *xpmem_tg_ref_by_segid(xpmem_segid_t); extern struct xpmem_thread_group *xpmem_tg_ref_by_apid(xpmem_apid_t); extern void xpmem_tg_deref(struct xpmem_thread_group *); From f8a6158054850925d99c5709e227cd9d97b52c7e Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 9 Feb 2016 12:37:04 -0700 Subject: [PATCH 3/4] Fix bug in munmap workaround The original Cray code used the unexported do_munmap function to specify the memory descriptor where the address range should be unmapped. To avoid the use of a unexported symbol the code was updated to use the vm_munmap function instead. In order for this to work it was necessary to set the current task's mm value before calling vm_munmap. This appears to cause hangs, crashes, etc in 4.x kernels that were not observed with 3.x. To fix the issue remove the set of current->mm and just avoid calling vm_munmap when the current task has no memory descriptor. The memory mapping should go away automatically when the last mmput is called on the memory descriptor. Tested with 4.4.1 and confirmed there are no apparent leaks. Signed-off-by: Nathan Hjelm --- kernel/xpmem_attach.c | 51 +++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/kernel/xpmem_attach.c b/kernel/xpmem_attach.c index a450dd51e1f..793b77133eb 100644 --- a/kernel/xpmem_attach.c +++ b/kernel/xpmem_attach.c @@ -5,7 +5,7 @@ * * Copyright (c) 2004-2007 Silicon Graphics, Inc. All Rights Reserved. * Copyright 2010,2012 Cray Inc. All Rights Reserved - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. */ @@ -55,14 +55,14 @@ xpmem_close_handler(struct vm_area_struct *vma) struct xpmem_access_permit *ap; struct xpmem_attachment *att; - XPMEM_DEBUG("cleaning up"); - att = (struct xpmem_attachment *)vma->vm_private_data; if (att == NULL) { /* can happen if a user tries to mmap /dev/xpmem directly */ return; } + XPMEM_DEBUG("cleaning up vma with range: 0x%lx - 0x%lx", vma->vm_start, vma->vm_end); + xpmem_att_ref(att); mutex_lock(&att->mutex); @@ -619,32 +619,26 @@ void xpmem_detach_att(struct xpmem_access_permit *ap, struct xpmem_attachment *att) { struct vm_area_struct *vma; - struct mm_struct *current_mm; + struct mm_struct *mm; int ret; XPMEM_DEBUG("detaching attr %p. current->mm = %p, att->mm = %p", att, (void *) current->mm, (void *) att->mm); + mm = current->mm ? current->mm : att->mm; + /* must lock mmap_sem before att's sema to prevent deadlock */ - down_write(&att->mm->mmap_sem); + down_write(&mm->mmap_sem); mutex_lock(&att->mutex); - /* store a copy of the current mm */ - current_mm = current->mm; - if (NULL == current_mm) { - current->mm = att->mm; - } - /* ensure we aren't racing with MMU notifier PTE cleanup */ mutex_lock(&att->invalidate_mutex); if (att->flags & XPMEM_FLAG_DESTROYING) { mutex_unlock(&att->invalidate_mutex); mutex_unlock(&att->mutex); - up_write(¤t->mm->mmap_sem); - /* restore the current mm */ - current->mm = current_mm; + up_write(&mm->mmap_sem); return; } att->flags |= XPMEM_FLAG_DESTROYING; @@ -652,20 +646,18 @@ xpmem_detach_att(struct xpmem_access_permit *ap, struct xpmem_attachment *att) mutex_unlock(&att->invalidate_mutex); /* find the corresponding vma */ - vma = find_vma(current->mm, att->at_vaddr); + vma = find_vma(mm, att->at_vaddr); if (!vma || vma->vm_start > att->at_vaddr) { DBUG_ON(1); mutex_unlock(&att->mutex); - up_write(¤t->mm->mmap_sem); - /* restore the current mm */ - current->mm = current_mm; + up_write(&mm->mmap_sem); return; } DBUG_ON(!xpmem_is_vm_ops_set(vma)); DBUG_ON((vma->vm_end - vma->vm_start) != att->at_size); DBUG_ON(vma->vm_private_data != att); - xpmem_unpin_pages(ap->seg, current->mm, att->at_vaddr, att->at_size); + xpmem_unpin_pages(ap->seg, mm, att->at_vaddr, att->at_size); vma->vm_private_data = NULL; @@ -675,14 +667,17 @@ xpmem_detach_att(struct xpmem_access_permit *ap, struct xpmem_attachment *att) list_del_init(&att->att_list); spin_unlock(&ap->lock); - /* NTH: drop the semaphoe before calling vm_munmap */ - up_write(¤t->mm->mmap_sem); + /* NTH: drop the semaphore and attachment lock before calling vm_munmap */ mutex_unlock(&att->mutex); - - ret = vm_munmap(vma->vm_start, att->at_size); - /* restore the current mm */ - current->mm = current_mm; - DBUG_ON(ret != 0); + up_write(&mm->mmap_sem); + + /* NTH: if the current task does not have a memory descriptor + * then there is nothing more to do. the memory mapping should + * go away automatically when the memory descriptor does. */ + if (NULL != current->mm) { + ret = vm_munmap(vma->vm_start, att->at_size); + DBUG_ON(ret != 0); + } xpmem_att_destroyable(att); } @@ -756,10 +751,8 @@ xpmem_clear_PTEs_of_att(struct xpmem_attachment *att, u64 start, u64 end, * space and find the intersection with (start, end). */ invalidate_start = max(start, att->vaddr); - if (invalidate_start >= att_vaddr_end) - goto out; invalidate_end = min(end, att_vaddr_end); - if (invalidate_end <= att->vaddr) + if (invalidate_start >= att_vaddr_end || invalidate_end <= att->vaddr) goto out; /* Convert the intersection of vaddr into offsets. */ From fe25b3e40cd4d7d086c211164fcd79ba1a48cd5b Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 9 Feb 2016 12:42:25 -0700 Subject: [PATCH 4/4] Bump the version to v2.6.3 Signed-off-by: Nathan Hjelm --- kernel/xpmem_main.c | 6 ++---- kernel/xpmem_private.h | 6 ++++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/xpmem_main.c b/kernel/xpmem_main.c index 370f488466e..4026e8037a2 100644 --- a/kernel/xpmem_main.c +++ b/kernel/xpmem_main.c @@ -446,9 +446,7 @@ xpmem_init(void) goto out_4; } - spin_lock_init (&xpmem_open_lock); - - printk("SGI XPMEM kernel module v%s loaded\n", + printk("XPMEM kernel module v%s loaded\n", XPMEM_CURRENT_VERSION_STRING); return 0; @@ -476,7 +474,7 @@ xpmem_exit(void) remove_proc_entry("debug_printk", xpmem_unpin_procfs_dir); remove_proc_entry(XPMEM_MODULE_NAME, NULL); - printk("SGI XPMEM kernel module v%s unloaded\n", + printk("XPMEM kernel module v%s unloaded\n", XPMEM_CURRENT_VERSION_STRING); } diff --git a/kernel/xpmem_private.h b/kernel/xpmem_private.h index 3b4f8120bc4..26abc460774 100644 --- a/kernel/xpmem_private.h +++ b/kernel/xpmem_private.h @@ -56,6 +56,8 @@ * 2.6 CRAY: rearrange/clean-up code for easier debugging * 2.6.1 Merge with latest Cray version (2.4->2.6) * 2.6.2 Fix race in xpmem_open + * 2.6.3 Fix bugs introduced in 2.6.2 that worked with 3.x but + * not 4.x kernels. * * This int constant has the following format: * @@ -66,8 +68,8 @@ * major - major revision number (12-bits) * minor - minor revision number (16-bits) */ -#define XPMEM_CURRENT_VERSION 0x00026002 -#define XPMEM_CURRENT_VERSION_STRING "2.6.2" +#define XPMEM_CURRENT_VERSION 0x00026003 +#define XPMEM_CURRENT_VERSION_STRING "2.6.3" #define XPMEM_MODULE_NAME "xpmem"