Skip to content

Commit

Permalink
Refactor alternative matrix parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
nicodr97 committed Oct 25, 2023
1 parent 3bd6e3e commit 734ffa9
Show file tree
Hide file tree
Showing 10 changed files with 44 additions and 95 deletions.
3 changes: 1 addition & 2 deletions Documentation/00_mainPage.dox
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,8 @@ For a complete list please see the User Guide or visit http://trimal.cgenomics.o
Split input coding sequences up to first stop codon appearance

<b>Trimming Parameters</b>
--alternative_matrix degenerated_nt_identity
--degenerated_nt_identity
Specify the degenerated nt identity matrix as the similarity matrix to use.
If a matrix is not specified, the best suited among a set will be selected.
-matrix <inpufile>
Input file for user-defined similarity matrix (default is Blosum62).
-block <n>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ If the counter is bigger than 1, we are trying to use multiple automatic methods
*/

/**
\var int trimAlManager::alternative_matrix
\var int trimAlManager::degenerated_nt_identity
\brief Int that represents which alternative matrix to use, where '-1' means no alternative matrix, and '1' means degenerated_nt_identity
*/

Expand Down
3 changes: 1 addition & 2 deletions docs/source/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,9 @@ Backtranslation Options

Trimming Parameters
=======================
.. option:: --alternative_matrix <degenerated_nt_identity>
.. option:: --degenerated_nt_identity

Specify the degenerated nt identity matrix as the similarity matrix to use.
If a matrix is not specified, the best suited among a set will be selected.

.. option:: -matrix <inputfile>

Expand Down
3 changes: 1 addition & 2 deletions include/RawText/legacyMenu.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,8 @@ Common options (for a complete list please see the User Guide or visit http://tr
-ignoreFilter Ignore vcf-filtered variants in VCF. Only valid in combination with -vcf.
Still applies min-quality and min-coverage when provided

--alternative_matrix degenerated_nt_identity
--degenerated_nt_identity
Specify the degenerated nt identity matrix as the similarity matrix to use.
If a matrix is not specified, the best suited among a set will be selected.

Legacy Options
These options are included for back-compatibility with older versions of trimAl.
Expand Down
3 changes: 1 addition & 2 deletions include/RawText/menu.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,8 @@ For a complete list please see the User Guide or visit http://trimal.cgenomics.o
Split input coding sequences up to first stop codon appearance

[b][u]Trimming Parameters[r]
[b]--alternative_matrix degenerated_nt_identity[r]
[b]--degenerated_nt_identity[r]
Specify the degenerated nt identity matrix as the similarity matrix to use.
If a matrix is not specified, the best suited among a set will be selected.
[b]-matrix <inpufile>[r]
Input file for user-defined similarity matrix (default is Blosum62).
[b]-block <n>[r]
Expand Down
9 changes: 2 additions & 7 deletions include/Statistics/similarityMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,9 @@ namespace statistics {
void defaultNTDegeneratedSimMatrix();

/**
* \brief Method to load alternative similarity matrices also included on the suite.
* Currently, only one type of alternative matrix is available: \n
* \b matrix_code: 1 \b datatype SequenceTypes::AA
* \param matrix_code ID of the matrix
* \param datatype Numberical representation of the data type.
* See #SequenceTypes
* \brief Method to load alternative DEG NT similarity matrix
*/
void alternativeSimilarityMatrices(int matrix_code, int datatype);
void alternativeNTDegeneratedSimMatrix();

/**
* \brief Method to get the similarity distance between two residues, A and B\n
Expand Down
2 changes: 1 addition & 1 deletion include/trimalManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ class trimAlManager
clusters = -1
,
automatedMethodCount = -1,
alternative_matrix = -1,
degenerated_nt_identity = -1,
gapAbsoluteThreshold = -1,

*delColumns = nullptr,
Expand Down
55 changes: 12 additions & 43 deletions source/Statistics/similarityMatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,62 +350,31 @@ namespace statistics {
}
}

void similarityMatrix::alternativeSimilarityMatrices(int matrix_code, \
int datatype) {
int i, j, k;
void similarityMatrix::alternativeNTDegeneratedSimMatrix() {
float sum;

// Allocate memory depending on the input datatype
switch (datatype) {
case SequenceTypes::AA:
memoryAllocation(20);
break;
case SequenceTypes::DNA:
case SequenceTypes::RNA:
memoryAllocation(5);
break;
case SequenceTypes::DNA | SequenceTypes::DEG:
case SequenceTypes::RNA | SequenceTypes::DEG:
memoryAllocation(15);
break;
}

for (i = 0; i < TAMABC; i++)
memoryAllocation(15);
for (int i = 0; i < TAMABC; i++) {
vhash[i] = -1;
}

// We create the hashing vector taking into account the input datatype
for (i = 0; i < numPositions; i++) {
switch (datatype) {
case SequenceTypes::AA:
vhash[listAASym[i] - 'A'] = i;
break;
case SequenceTypes::DNA:
case SequenceTypes::RNA:
vhash[listNTSym[i] - 'A'] = i;
break;
case SequenceTypes::DNA | SequenceTypes::DEG:
case SequenceTypes::RNA | SequenceTypes::DEG:
vhash[listNTDegenerateSym[i] - 'A'] = i;
break;
}
for (int i = 0; i < numPositions; i++) {
vhash[listNTDegenerateSym[i] - 'A'] = i;
}

// Working similarity matrix is set depending on the pre loaded matrices
for (i = 0; i < numPositions; i++) {
for (j = 0; j < numPositions; j++) {
switch (matrix_code) {
case 1:
simMat[i][j] = alternative_1_NTDegeneratedMatrix[i][j];
break;
}
for (int i = 0; i < numPositions; i++) {
for (int j = 0; j < numPositions; j++) {
simMat[i][j] = alternative_1_NTDegeneratedMatrix[i][j];
}
}

// Calculate the distances between residues based on Euclidean distance
for (j = 0; j < numPositions; j++) {
for (i = 0; i < numPositions; i++) {
for (int j = 0; j < numPositions; j++) {
for (int i = 0; i < numPositions; i++) {
if ((i != j) && (distMat[i][j] == 0.0)) {
for (k = 0, sum = 0; k < numPositions; k++)
for (int k = 0, sum = 0; k < numPositions; k++)
sum += ((simMat[k][j] - simMat[k][i]) * (simMat[k][j] - simMat[k][i]));
sum = (float) sqrt(sum);
distMat[i][j] = sum;
Expand Down
50 changes: 23 additions & 27 deletions source/trimalManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -444,17 +444,9 @@ int trimAlManager::parseArguments(int argc, char **argv) {
matrixFile = new char[argumentLength + 1];
strcpy(matrixFile, argv[*i]);
return Recognized;
} else if (!strcmp(argv[*i], "--alternative_matrix") && ((*i) + 1 != *argc) && (alternative_matrix == -1)) {
(*i)++;
if (!strcmp(argv[*i], "degenerated_nt_identity"))
{
alternative_matrix = 1;
return Recognized;
}
else {
debug.report(ErrorCode::AlternativeMatrixNotRecognized, argv[*i]);
appearErrors = true;
}
} else if (!strcmp(argv[*i], "--degenerated_nt_identity") && ((*i) + 1 != *argc) && (degenerated_nt_identity == -1)) {
degenerated_nt_identity = 1;
return Recognized;
}
return NotRecognized;
}
Expand Down Expand Up @@ -1376,7 +1368,7 @@ inline bool trimAlManager::check_absolute_gap_theshold() {
}

/**inline**/ bool trimAlManager::check_similarity_matrix() {
if ((matrixFile != nullptr || alternative_matrix != -1) && (!appearErrors)) {
if ((matrixFile != nullptr || degenerated_nt_identity != -1) && (!appearErrors)) {
if ((!strict) && (!strictplus) && (!automated1) && (similarityThreshold == -1) && (!ssc) && (!sst)) {
debug.report(ErrorCode::MatrixGivenWithNoMethodToUseIt);
appearErrors = true;
Expand Down Expand Up @@ -1967,33 +1959,37 @@ int trimAlManager::perform() {
if ((strict) || (strictplus) || (automated1) || (similarityThreshold != -1.0) || (ssc == 1) || (sst == 1)) {
similMatrix = new statistics::similarityMatrix();

// Load Matrix
if (matrixFile != nullptr)
if (matrixFile != nullptr) {
similMatrix->loadSimMatrix(matrixFile);
} else if (degenerated_nt_identity != -1) {
int alignDataType = origAlig->getAlignmentType();
if (alignDataType == (SequenceTypes::DNA | SequenceTypes::DEG) ||
alignDataType == (SequenceTypes::RNA | SequenceTypes::DEG)) {
debug.report(ErrorCode::ImpossibleToProcessMatrix);
return false;
}

// Alternative Default Matrix
else if (alternative_matrix != -1) {
similMatrix->alternativeSimilarityMatrices(alternative_matrix, origAlig->getAlignmentType());
}

// Default Matrices
else {
similMatrix->alternativeNTDegeneratedSimMatrix();
} else {
int alignDataType = origAlig->getAlignmentType();
if (alignDataType & SequenceTypes::AA)
if (alignDataType == SequenceTypes::AA) {
similMatrix->defaultAASimMatrix();
else if ((alignDataType == SequenceTypes::DNA) || (alignDataType == SequenceTypes::RNA))
} else if (alignDataType == (SequenceTypes::AA | SequenceTypes::DEG)) {
similMatrix->defaultAASimMatrix();
} else if ((alignDataType == SequenceTypes::DNA) || (alignDataType == SequenceTypes::RNA)) {
similMatrix->defaultNTSimMatrix();
else if ((alignDataType == (SequenceTypes::DNA | SequenceTypes::DEG)) ||
(alignDataType == (SequenceTypes::RNA | SequenceTypes::DEG)))
similMatrix->defaultNTDegeneratedSimMatrix();
} else if ((alignDataType == (SequenceTypes::DNA | SequenceTypes::DEG)) ||
(alignDataType == (SequenceTypes::RNA | SequenceTypes::DEG))) {
similMatrix->defaultNTDegeneratedSimMatrix();
}
}

// Check if Matrix has been loaded
if (!origAlig->Statistics->setSimilarityMatrix(similMatrix)) {
debug.report(ErrorCode::ImpossibleToProcessMatrix);
return false;
}
}

return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ TEST_CASE("Matrix", "[manager][arguments][matrix]") {

GIVEN("Alternative matrix argument")
{
args.push_back("--alternative_matrix");
args.push_back("--degenerated_nt_identity");
test_arguments(
args,
manager,
Expand All @@ -140,13 +140,6 @@ TEST_CASE("Matrix", "[manager][arguments][matrix]") {

AND_GIVEN("Correct value input")
{
args.push_back("degenerated_nt_identity");
test_arguments(
args,
manager,
true, trimAlManager::argumentReport::Errored,
true, true);

GIVEN("Input file")
{
args.push_back("-in");
Expand Down

0 comments on commit 734ffa9

Please sign in to comment.