Skip to content

Commit

Permalink
Merge pull request openucx#5 from hjelmn/issue1
Browse files Browse the repository at this point in the history
Fix bugs when running with 4.x kernels
  • Loading branch information
hjelmn committed Feb 10, 2016
2 parents d6af590 + fe25b3e commit 9557836
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 179 deletions.
51 changes: 22 additions & 29 deletions kernel/xpmem_attach.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* Copyright (c) 2004-2007 Silicon Graphics, Inc. All Rights Reserved.
* Copyright 2010,2012 Cray Inc. All Rights Reserved
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
* reserved.
*/

Expand Down Expand Up @@ -55,14 +55,14 @@ xpmem_close_handler(struct vm_area_struct *vma)
struct xpmem_access_permit *ap;
struct xpmem_attachment *att;

XPMEM_DEBUG("cleaning up");

att = (struct xpmem_attachment *)vma->vm_private_data;
if (att == NULL) {
/* can happen if a user tries to mmap /dev/xpmem directly */
return;
}

XPMEM_DEBUG("cleaning up vma with range: 0x%lx - 0x%lx", vma->vm_start, vma->vm_end);

xpmem_att_ref(att);
mutex_lock(&att->mutex);

Expand Down Expand Up @@ -619,53 +619,45 @@ void
xpmem_detach_att(struct xpmem_access_permit *ap, struct xpmem_attachment *att)
{
struct vm_area_struct *vma;
struct mm_struct *current_mm;
struct mm_struct *mm;
int ret;


XPMEM_DEBUG("detaching attr %p. current->mm = %p, att->mm = %p", att,
(void *) current->mm, (void *) att->mm);

mm = current->mm ? current->mm : att->mm;

/* must lock mmap_sem before att's sema to prevent deadlock */
down_write(&att->mm->mmap_sem);
down_write(&mm->mmap_sem);
mutex_lock(&att->mutex);

/* store a copy of the current mm */
current_mm = current->mm;
if (NULL == current_mm) {
current->mm = att->mm;
}

/* ensure we aren't racing with MMU notifier PTE cleanup */
mutex_lock(&att->invalidate_mutex);

if (att->flags & XPMEM_FLAG_DESTROYING) {
mutex_unlock(&att->invalidate_mutex);
mutex_unlock(&att->mutex);
up_write(&current->mm->mmap_sem);
/* restore the current mm */
current->mm = current_mm;
up_write(&mm->mmap_sem);
return;
}
att->flags |= XPMEM_FLAG_DESTROYING;

mutex_unlock(&att->invalidate_mutex);

/* find the corresponding vma */
vma = find_vma(current->mm, att->at_vaddr);
vma = find_vma(mm, att->at_vaddr);
if (!vma || vma->vm_start > att->at_vaddr) {
DBUG_ON(1);
mutex_unlock(&att->mutex);
up_write(&current->mm->mmap_sem);
/* restore the current mm */
current->mm = current_mm;
up_write(&mm->mmap_sem);
return;
}
DBUG_ON(!xpmem_is_vm_ops_set(vma));
DBUG_ON((vma->vm_end - vma->vm_start) != att->at_size);
DBUG_ON(vma->vm_private_data != att);

xpmem_unpin_pages(ap->seg, current->mm, att->at_vaddr, att->at_size);
xpmem_unpin_pages(ap->seg, mm, att->at_vaddr, att->at_size);

vma->vm_private_data = NULL;

Expand All @@ -675,14 +667,17 @@ xpmem_detach_att(struct xpmem_access_permit *ap, struct xpmem_attachment *att)
list_del_init(&att->att_list);
spin_unlock(&ap->lock);

/* NTH: drop the semaphoe before calling vm_munmap */
up_write(&current->mm->mmap_sem);
/* NTH: drop the semaphore and attachment lock before calling vm_munmap */
mutex_unlock(&att->mutex);

ret = vm_munmap(vma->vm_start, att->at_size);
/* restore the current mm */
current->mm = current_mm;
DBUG_ON(ret != 0);
up_write(&mm->mmap_sem);

/* NTH: if the current task does not have a memory descriptor
* then there is nothing more to do. the memory mapping should
* go away automatically when the memory descriptor does. */
if (NULL != current->mm) {
ret = vm_munmap(vma->vm_start, att->at_size);
DBUG_ON(ret != 0);
}

xpmem_att_destroyable(att);
}
Expand Down Expand Up @@ -756,10 +751,8 @@ xpmem_clear_PTEs_of_att(struct xpmem_attachment *att, u64 start, u64 end,
* space and find the intersection with (start, end).
*/
invalidate_start = max(start, att->vaddr);
if (invalidate_start >= att_vaddr_end)
goto out;
invalidate_end = min(end, att_vaddr_end);
if (invalidate_end <= att->vaddr)
if (invalidate_start >= att_vaddr_end || invalidate_end <= att->vaddr)
goto out;

/* Convert the intersection of vaddr into offsets. */
Expand Down
101 changes: 42 additions & 59 deletions kernel/xpmem_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* Copyright (c) 2004-2007 Silicon Graphics, Inc. All Rights Reserved.
* Copyright 2010, 2014 Cray Inc. All Rights Reserved
* Copyright 2015 Los Alamos National Security, LLC. All rights reserved.
* Copyright 2015-2016 Los Alamos National Security, LLC. All rights reserved.
*/

/*
Expand Down Expand Up @@ -42,7 +42,7 @@
#endif

struct xpmem_partition *xpmem_my_part = NULL; /* pointer to this partition */
static spinlock_t xpmem_open_lock;
static void xpmem_destroy_tg(struct xpmem_thread_group *tg);

/*
* User open of the XPMEM driver. Called whenever /dev/xpmem is opened.
Expand All @@ -57,19 +57,18 @@ xpmem_open(struct inode *inode, struct file *file)
struct proc_dir_entry *unpin_entry;
char tgid_string[XPMEM_TGID_STRING_LEN];

spin_lock(&xpmem_open_lock);
/* if this has already been done, just return silently */
tg = xpmem_tg_ref_by_tgid(current->tgid);
if (!IS_ERR(tg)) {
spin_unlock(&xpmem_open_lock);
xpmem_tg_deref(tg);
return 0;
}

/* create tg */
tg = kzalloc(sizeof(struct xpmem_thread_group), GFP_KERNEL);
tg = kzalloc(sizeof(struct xpmem_thread_group) +
sizeof(struct xpmem_hashlist) *
XPMEM_AP_HASHTABLE_SIZE, GFP_KERNEL);
if (tg == NULL) {
spin_unlock(&xpmem_open_lock);
return -ENOMEM;
}

Expand All @@ -92,27 +91,17 @@ xpmem_open(struct inode *inode, struct file *file)
tg->mmu_unregister_called = 0;
tg->mm = current->mm;

for (index = 0; index < XPMEM_AP_HASHTABLE_SIZE; index++) {
rwlock_init(&tg->ap_hashtable[index].lock);
INIT_LIST_HEAD(&tg->ap_hashtable[index].list);
}

/* Register MMU notifier callbacks */
if (xpmem_mmu_notifier_init(tg) != 0) {
spin_unlock(&xpmem_open_lock);
kfree(tg);
return -EFAULT;
}

/* create and initialize struct xpmem_access_permit hashtable */
tg->ap_hashtable = kzalloc(sizeof(struct xpmem_hashlist) *
XPMEM_AP_HASHTABLE_SIZE, GFP_KERNEL);
if (tg->ap_hashtable == NULL) {
spin_unlock(&xpmem_open_lock);
xpmem_mmu_notifier_unlink(tg);
kfree(tg);
return -ENOMEM;
}
for (index = 0; index < XPMEM_AP_HASHTABLE_SIZE; index++) {
rwlock_init(&tg->ap_hashtable[index].lock);
INIT_LIST_HEAD(&tg->ap_hashtable[index].list);
}

snprintf(tgid_string, XPMEM_TGID_STRING_LEN, "%d", current->tgid);
spin_lock(&xpmem_unpin_procfs_lock);
unpin_entry = proc_create_data(tgid_string, 0644,
Expand Down Expand Up @@ -145,7 +134,6 @@ xpmem_open(struct inode *inode, struct file *file)
get_task_struct(current->group_leader);
tg->group_leader = current->group_leader;
BUG_ON(current->mm != current->group_leader->mm);
spin_unlock(&xpmem_open_lock);

return 0;
}
Expand All @@ -158,37 +146,15 @@ xpmem_open(struct inode *inode, struct file *file)
static void
xpmem_destroy_tg(struct xpmem_thread_group *tg)
{
int index;

spin_lock(&xpmem_open_lock);
XPMEM_DEBUG("tg->mm=%p", tg->mm);

/*
* Calls MMU release function if exit_mmap() has not executed yet.
* Decrements mm_count.
*/
xpmem_mmu_notifier_unlink(tg);

/* Remove tg structure from its hash list */
index = xpmem_tg_hashtable_index(tg->tgid);
write_lock(&xpmem_my_part->tg_hashtable[index].lock);
/*
* Two threads could have called xpmem_flush at about the same time,
* and thus xpmem_tg_ref_by_tgid_all could return the same tg in
* both threads. Guard against this race.
*/
if (list_empty(&tg->tg_hashlist)) {
write_unlock(&xpmem_my_part->tg_hashtable[index].lock);
xpmem_tg_deref(tg);
spin_unlock(&xpmem_open_lock);
return;
}
list_del_init(&tg->tg_hashlist);
write_unlock(&xpmem_my_part->tg_hashtable[index].lock);

xpmem_tg_destroyable(tg);
xpmem_tg_deref(tg);
spin_unlock(&xpmem_open_lock);
}

/*
Expand Down Expand Up @@ -230,7 +196,9 @@ xpmem_teardown(struct xpmem_thread_group *tg)
static int
xpmem_flush(struct file *file, fl_owner_t owner)
{
char tgid_string[XPMEM_TGID_STRING_LEN];
struct xpmem_thread_group *tg;
int index;

/*
* During a call to fork() there is a check for whether the parent
Expand All @@ -249,8 +217,18 @@ xpmem_flush(struct file *file, fl_owner_t owner)
if (current->files && current->files != owner)
return 0;

tg = xpmem_tg_ref_by_tgid_all(current->tgid);
/*
* Two threads could have called xpmem_flush at about the same time,
* and thus xpmem_tg_ref_by_tgid_all could return the same tg in
* both threads. Guard against this race.
*/
index = xpmem_tg_hashtable_index(current->tgid);
write_lock(&xpmem_my_part->tg_hashtable[index].lock);

/* Remove tg structure from its hash list */
tg = xpmem_tg_ref_by_tgid_all_nolock(current->tgid);
if (IS_ERR(tg)) {
write_unlock(&xpmem_my_part->tg_hashtable[index].lock);
/*
* xpmem_flush() can get called twice for thread groups
* which inherited /dev/xpmem: once for the inherited fd,
Expand All @@ -261,8 +239,22 @@ xpmem_flush(struct file *file, fl_owner_t owner)
return 0;
}

list_del_init(&tg->tg_hashlist);

write_unlock(&xpmem_my_part->tg_hashtable[index].lock);

XPMEM_DEBUG("tg->mm=%p", tg->mm);

/*
* NTH: the thread group may not be released until later so remove the
* proc entry now to avoid a race between another call to xpmem_open()
* and the distruction of the thread group object.
*/
snprintf(tgid_string, XPMEM_TGID_STRING_LEN, "%d", tg->tgid);
spin_lock(&xpmem_unpin_procfs_lock);
remove_proc_entry(tgid_string, xpmem_unpin_procfs_dir);
spin_unlock(&xpmem_unpin_procfs_lock);

xpmem_destroy_tg(tg);

return 0;
Expand Down Expand Up @@ -409,17 +401,12 @@ xpmem_init(void)
struct proc_dir_entry *debug_printk_entry;

/* create and initialize struct xpmem_partition array */
xpmem_my_part = kzalloc(sizeof(struct xpmem_partition), GFP_KERNEL);
xpmem_my_part = kzalloc(sizeof(struct xpmem_partition) +
sizeof(struct xpmem_hashlist) *
XPMEM_TG_HASHTABLE_SIZE, GFP_KERNEL);
if (xpmem_my_part == NULL)
return -ENOMEM;

xpmem_my_part->tg_hashtable = kzalloc(sizeof(struct xpmem_hashlist) *
XPMEM_TG_HASHTABLE_SIZE, GFP_KERNEL);
if (xpmem_my_part->tg_hashtable == NULL) {
kfree(xpmem_my_part);
return -ENOMEM;
}

for (i = 0; i < XPMEM_TG_HASHTABLE_SIZE; i++) {
rwlock_init(&xpmem_my_part->tg_hashtable[i].lock);
INIT_LIST_HEAD(&xpmem_my_part->tg_hashtable[i].list);
Expand Down Expand Up @@ -459,9 +446,7 @@ xpmem_init(void)
goto out_4;
}

spin_lock_init (&xpmem_open_lock);

printk("SGI XPMEM kernel module v%s loaded\n",
printk("XPMEM kernel module v%s loaded\n",
XPMEM_CURRENT_VERSION_STRING);
return 0;

Expand All @@ -472,7 +457,6 @@ xpmem_init(void)
out_2:
remove_proc_entry(XPMEM_MODULE_NAME, NULL);
out_1:
kfree(xpmem_my_part->tg_hashtable);
kfree(xpmem_my_part);
return ret;
}
Expand All @@ -483,15 +467,14 @@ xpmem_init(void)
void __exit
xpmem_exit(void)
{
kfree(xpmem_my_part->tg_hashtable);
kfree(xpmem_my_part);

misc_deregister(&xpmem_dev_handle);
remove_proc_entry("global_pages", xpmem_unpin_procfs_dir);
remove_proc_entry("debug_printk", xpmem_unpin_procfs_dir);
remove_proc_entry(XPMEM_MODULE_NAME, NULL);

printk("SGI XPMEM kernel module v%s unloaded\n",
printk("XPMEM kernel module v%s unloaded\n",
XPMEM_CURRENT_VERSION_STRING);
}

Expand Down
Loading

0 comments on commit 9557836

Please sign in to comment.