Skip to content

Commit

Permalink
Fix nasa#470, bin sem unlock after task delete
Browse files Browse the repository at this point in the history
Corrects issue when a task waiting on a binary semaphore is
deleted, it left the mutex in a locked state preventing other
tasks from using the mutex.
  • Loading branch information
jphickey committed May 19, 2020
1 parent c2bcebb commit 084dd75
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 4 deletions.
54 changes: 50 additions & 4 deletions src/os/posix/src/os-impl-binsem.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,38 @@
#include "os-shared-binsem.h"
#include "os-impl-binsem.h"

/*
* This controls the maximum time the that the calling thread will wait to
* acquire the condition mutex before returning an error.
*
* Under normal conditions, this lock is held by giving/taking threads very
* briefly, so the lock should be available with minimal delay. However,
* if the "taking" thread is canceled or exits abnormally without releasing the
* lock, it means any other task accessing the sem can get blocked indefinitely.
*
* There should be no reason for a user to configure this, as it should
* not be relevant in a normally operating system. This only prevents a
* deadlock condition in off-nominal circumstances.
*/
static const struct timespec OS_POSIX_BINSEM_MAX_WAIT =
{
.tv_sec = 2,
.tv_nsec = 0
};


/* Tables where the OS object information is stored */
OS_impl_binsem_internal_record_t OS_impl_bin_sem_table [OS_MAX_BIN_SEMAPHORES];

/*---------------------------------------------------------------------------------------
* Helper function for releasing the mutex in case the thread
* executing pthread_condwait() is canceled.
----------------------------------------------------------------------------------------*/
void OS_Posix_BinSemReleaseMutex(void *mut)
{
pthread_mutex_unlock(mut);
}

/****************************************************************************************
BINARY SEMAPHORE API
***************************************************************************************/
Expand Down Expand Up @@ -244,10 +273,13 @@ int32 OS_BinSemGive_Impl ( uint32 sem_id )
* alternative of having a BinSemGive not wake up the other thread is a bigger issue.
*
* Counting sems do not suffer from this, as there is a native POSIX mechanism for those.
*
* Note: This lock should be readily available, with only minimal delay if any.
* If a long delay occurs here, it means something is fundamentally wrong.
*/

/* Lock the mutex ( not the table! ) */
if ( pthread_mutex_lock(&(sem->id)) != 0 )
if ( pthread_mutex_timedlock(&sem->id, &OS_POSIX_BINSEM_MAX_WAIT) != 0 )
{
return(OS_SEM_FAILURE);
}
Expand Down Expand Up @@ -279,7 +311,7 @@ int32 OS_BinSemFlush_Impl (uint32 sem_id)
sem = &OS_impl_bin_sem_table[sem_id];

/* Lock the mutex ( not the table! ) */
if ( pthread_mutex_lock(&(sem->id)) != 0 )
if ( pthread_mutex_timedlock(&sem->id, &OS_POSIX_BINSEM_MAX_WAIT) != 0 )
{
return(OS_SEM_FAILURE);
}
Expand Down Expand Up @@ -311,12 +343,21 @@ static int32 OS_GenericBinSemTake_Impl (OS_impl_binsem_internal_record_t *sem, c
sig_atomic_t flush_count;
int32 return_code;

/*
* Note - this lock should be quickly available - should not delay here.
* The main delay is in the pthread_cond_wait() below.
*/
/* Lock the mutex ( not the table! ) */
if ( pthread_mutex_lock(&(sem->id)) != 0 )
if ( pthread_mutex_timedlock(&sem->id, &OS_POSIX_BINSEM_MAX_WAIT) != 0 )
{
return(OS_SEM_FAILURE);
}

/* because pthread_cond_wait() is also a cancellation point,
* this uses a cleanup handler to ensure that if canceled during this call,
* the mutex is also released */
pthread_cleanup_push(OS_Posix_BinSemReleaseMutex, &sem->id);

return_code = OS_SUCCESS;

/*
Expand Down Expand Up @@ -362,7 +403,12 @@ static int32 OS_GenericBinSemTake_Impl (OS_impl_binsem_internal_record_t *sem, c
sem->current_value = 0;
}

pthread_mutex_unlock(&(sem->id));
/*
* Pop the cleanup handler.
* Passing "true" means it will be executed, which
* handles releasing the mutex.
*/
pthread_cleanup_pop(true);

return return_code;
} /* end OS_GenericBinSemTake_Impl */
Expand Down
17 changes: 17 additions & 0 deletions src/tests/bin-sem-test/bin-sem-test.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,27 @@ void task_1(void)
void BinSemCheck(void)
{
uint32 status;
OS_bin_sem_prop_t bin_sem_prop;

/* Delete the task, which should be pending in OS_BinSemTake() */
status = OS_TaskDelete(task_1_id);
UtAssert_True(status == OS_SUCCESS, "OS_TaskDelete Rc=%d", (int)status);

status = OS_TimerDelete(timer_id);
UtAssert_True(status == OS_SUCCESS, "OS_TimerDelete Rc=%d", (int)status);

OS_TaskDelay(100);

/* Confirm that the semaphore itself is still operational after task deletion */
status = OS_BinSemGive(bin_sem_id);
UtAssert_True(status == OS_SUCCESS, "BinSem give Rc=%d", (int)status);
status = OS_BinSemGetInfo (bin_sem_id, &bin_sem_prop);
UtAssert_True(status == OS_SUCCESS, "BinSem value=%d Rc=%d", (int)bin_sem_prop.value, (int)status);
status = OS_BinSemTake(bin_sem_id);
UtAssert_True(status == OS_SUCCESS, "BinSem take Rc=%d", (int)status);
status = OS_BinSemDelete(bin_sem_id);
UtAssert_True(status == OS_SUCCESS, "BinSem delete Rc=%d", (int)status);

UtAssert_True(counter < timer_counter, "Task counter (%d) < timer counter (%d)", (int)counter, (int)timer_counter);
UtAssert_True(task_1_failures == 0, "Task 1 failures = %u", (unsigned int)task_1_failures);
UtAssert_True(timer_failures == 0, "Timer failures = %u", (unsigned int)timer_failures);
Expand Down

0 comments on commit 084dd75

Please sign in to comment.