Skip to content

Commit

Permalink
filter on db side
Browse files Browse the repository at this point in the history
  • Loading branch information
ginberg committed Aug 22, 2024
1 parent 5e7e003 commit dee92db
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 22 deletions.
4 changes: 2 additions & 2 deletions R/GetDefaultCovariates.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ getDbDefaultCovariateData <- function(connection,

settings <- .toJson(covariateSettings)
rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$init(system.file("", package = "FeatureExtraction"))
json <- rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$createSql(settings, aggregated, cohortTable, rowIdField, rJava::.jarray(as.character(cohortIds)), cdmDatabaseSchema)
json <- rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$createSql(
settings, aggregated, cohortTable, rowIdField, rJava::.jarray(as.character(cohortIds)), cdmDatabaseSchema, as.character(minCharacterizationMean))
todo <- .fromJson(json)
if (length(todo$tempTables) != 0) {
ParallelLogger::logInfo("Sending temp tables to server")
Expand Down Expand Up @@ -138,7 +139,6 @@ getDbDefaultCovariateData <- function(connection,
andromedaTableName = "covariates",
snakeCaseToCamelCase = TRUE
)
filterCovariateDataCovariates(covariateData, "covariates", minCharacterizationMean)
}

# Continuous aggregated features
Expand Down
2 changes: 1 addition & 1 deletion inst/csv/jarChecksum.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
89d2f730bbcdad9c98603f061edc51d5cdbde9b15ee84aae571c9c6799457b53
3aa4b6bdae3b098051bf7025fcf5d78d4e8b159c4ed2c20bb281db5664635e94
Binary file modified inst/java/FeatureExtraction.jar
Binary file not shown.
35 changes: 23 additions & 12 deletions java/org/ohdsi/featureExtraction/FeatureExtraction.java
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ public class FeatureExtraction {
private static String ADD_DESCENDANTS_SQL = "SELECT descendant_concept_id AS id\nINTO @target_temp\nFROM @cdm_database_schema.concept_ancestor\nINNER JOIN @source_temp\n\tON ancestor_concept_id = id;\n\n";

public static void main(String[] args) {
init("C:/Users/mschuemi/git/FeatureExtraction/inst");
init("/Users/ginberg/Code/FeatureExtraction/inst");
// init("C:/R/R-3.3.1/library/FeatureExtraction");
// init("D:/git/OHDSI/FeatureExtraction/inst");
// System.out.println(convertSettingsPrespecToDetails("{\"temporal\":false,\"DemographicsGender\":true,\"DemographicsAge\":true,\"longTermStartDays\":-365,\"mediumTermStartDays\":-180,\"shortTermStartDays\":-30,\"endDays\":0,\"includedCovariateConceptIds\":[],\"addDescendantsToInclude\":false,\"excludedCovariateConceptIds\":[1,2,3],\"addDescendantsToExclude\":false,\"includedCovariateIds\":[]}"));
Expand All @@ -86,13 +86,13 @@ public static void main(String[] args) {
//String settings = getDefaultPrespecTemporalAnalyses();
// String settings = getDefaultPrespecTemporalSequenceAnalyses();
// String settings = convertSettingsPrespecToDetails(getDefaultPrespecTemporalAnalyses());
// System.out.println(convertSettingsPrespecToDetails(getDefaultPrespecAnalyses()));
// System.out.println(convertSettingsPrespecToDetails(getDefaultPrespecAnalyses()));
String settings =
"{\"temporal\":true,\"temporalSequence\":false,\"ConditionEraGroupOverlap\":true,\"temporalStartDays\":0,\"temporalEndDays\":0,\"includedCovariateConceptIds\":[],\"addDescendantsToInclude\":false,\"excludedCovariateConceptIds\":[],\"addDescendantsToExclude\":false,\"includedCovariateIds\":[]}";
//String settings =
//"{\"temporal\":false,\"temporalSequence\":false,\"analyses\":[{\"analysisId\":999,\"sqlFileName\":\"CohortBasedBinaryCovariates.sql\",\"parameters\":{\"covariateCohortTable\":\"cohort\",\"analysisId\":999,\"analysisName\":\"Cohort\",\"startDay\":-365,\"endDay\":0},\"includedCovariateConceptIds\":[],\"addDescendantsToInclude\":false,\"excludedCovariateConceptIds\":[],\"addDescendantsToExclude\":false,\"includedCovariateIds\":[]}]}";
// String settings = convertSettingsPrespecToDetails(getDefaultPrespecAnalyses());
System.out.println(createSql(settings, true, "#temp_cohort", "row_id", -1, "cdm_synpuf"));
System.out.println(createSql(settings, true, "#temp_cohort", "row_id", -1, "cdm_synpuf", 0.0));
// System.out.println(createSql(getDefaultPrespecAnalyses(), true, "#temp_cohort", "row_id", -1, "cdm_synpuf"));
// System.out.println(createSql(getDefaultPrespecTemporalAnalyses(), false, "#temp_cohort", "row_id", -1, "cdm_synpuf"));
}
Expand Down Expand Up @@ -480,15 +480,17 @@ else if (temporalSequence)
* @param cdmDatabaseSchema
* The name of the database schema that contains the OMOP CDM instance. Requires read permissions to this database. On SQL Server, this should
* specify both the database and the schema, so for example 'cdm_instance.dbo'.
* @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This will help reduce
* the file size of the characterization output, but will remove information on covariates that have very low values.
* @return A JSON object.
*/
public static String createSql(String settings, boolean aggregated, String cohortTable, String rowIdField, String[] cohortDefinitionIds,
String cdmDatabaseSchema) {
String cdmDatabaseSchema, String minCharacterizationMean) {

long[] idsAsLongs = new long[cohortDefinitionIds.length];
for (int i = 0; i < cohortDefinitionIds.length; i++)
idsAsLongs[i] = Long.valueOf(cohortDefinitionIds[i]);
return createSql(settings, aggregated, cohortTable, rowIdField, idsAsLongs, cdmDatabaseSchema);
return createSql(settings, aggregated, cohortTable, rowIdField, idsAsLongs, cdmDatabaseSchema, Double.valueOf(minCharacterizationMean));
}


Expand Down Expand Up @@ -523,11 +525,13 @@ public static String createSql(String settings, boolean aggregated, String cohor
* @param cdmDatabaseSchema
* The name of the database schema that contains the OMOP CDM instance. Requires read permissions to this database. On SQL Server, this should
* specify both the database and the schema, so for example 'cdm_instance.dbo'.
* @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This will help reduce
* the file size of the characterization output, but will remove information on covariates that have very low values.
* @return A JSON object.
*/
public static String createSql(String settings, boolean aggregated, String cohortTable, String rowIdField, int cohortDefinitionId,
String cdmDatabaseSchema) {
return createSql(settings, aggregated, cohortTable, rowIdField, new long[]{cohortDefinitionId}, cdmDatabaseSchema);
String cdmDatabaseSchema, double minCharacterizationMean) {
return createSql(settings, aggregated, cohortTable, rowIdField, new long[]{cohortDefinitionId}, cdmDatabaseSchema, minCharacterizationMean);
}

/**
Expand Down Expand Up @@ -559,10 +563,12 @@ public static String createSql(String settings, boolean aggregated, String cohor
* @param cdmDatabaseSchema
* The name of the database schema that contains the OMOP CDM instance. Requires read permissions to this database. On SQL Server, this should
* specify both the database and the schema, so for example 'cdm_instance.dbo'.
* @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This will help reduce
* the file size of the characterization output, but will remove information on covariates that have very low values.
* @return A JSON object.
*/
public static String createSql(String settings, boolean aggregated, String cohortTable, String rowIdField, long[] cohortDefinitionIds,
String cdmDatabaseSchema) {
String cdmDatabaseSchema, double minCharacterizationMean) {

JSONObject jsonObject = new JSONObject(settings);

Expand Down Expand Up @@ -629,7 +635,7 @@ public static String createSql(String settings, boolean aggregated, String cohor
jsonWriter.key("sqlConstruction");
jsonWriter.value(createConstructionSql(jsonObject, idSetToName, temporal, temporalSequence, aggregated, cohortTable, rowIdField, cohortDefinitionIds, cdmDatabaseSchema));

String sqlQueryFeatures = createQuerySql(jsonObject, cohortTable, cohortDefinitionIds, aggregated, temporal, temporalSequence);
String sqlQueryFeatures = createQuerySql(jsonObject, cohortTable, cohortDefinitionIds, aggregated, temporal, temporalSequence, minCharacterizationMean);
if (sqlQueryFeatures != null) {
jsonWriter.key("sqlQueryFeatures");
jsonWriter.value(sqlQueryFeatures);
Expand Down Expand Up @@ -700,7 +706,8 @@ private static Object createCleanupSql(JSONObject jsonObject, boolean temporal2)
return sql.toString();
}

private static String createQuerySql(JSONObject jsonObject, String cohortTable, long[] cohortDefinitionIds, boolean aggregated, boolean temporal, boolean temporalSequence) {
private static String createQuerySql(JSONObject jsonObject, String cohortTable, long[] cohortDefinitionIds, boolean aggregated, boolean temporal, boolean temporalSequence,
double minCharacterizationMean) {
StringBuilder fields = new StringBuilder();
if (aggregated) {
fields.append("cohort_definition_id, covariate_id, sum_value");
Expand Down Expand Up @@ -736,8 +743,12 @@ private static String createQuerySql(JSONObject jsonObject, String cohortTable,
if (!hasFeature)
return null;
if (aggregated) {
sql.append(
"\n) all_covariates\nINNER JOIN (\nSELECT cohort_definition_id, COUNT(*) AS total_count\nFROM @cohort_table {@cohort_definition_id != -1} ? {\nWHERE cohort_definition_id IN (@cohort_definition_id)} GROUP BY cohort_definition_id\n) total\n ON all_covariates.cohort_definition_id = total.cohort_definition_id;");
sql.append("\n) all_covariates\nINNER JOIN (\nSELECT cohort_definition_id, COUNT(*) AS total_count\nFROM @cohort_table {@cohort_definition_id != -1} ? {\nWHERE cohort_definition_id IN (@cohort_definition_id)}");
sql.append(" GROUP BY cohort_definition_id\n) total\n ON all_covariates.cohort_definition_id = total.cohort_definition_id");
if (minCharacterizationMean != 0) {
sql.append(" WHERE average_value >= " + minCharacterizationMean);
}
sql.append(";");
} else {
sql.append("\n) all_covariates;");
}
Expand Down
4 changes: 2 additions & 2 deletions man/createCovariateSettings.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions man/getDbCovariateData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<groupId>org.ohdsi</groupId>
<artifactId>featureExtraction</artifactId>
<packaging>jar</packaging>
<version>3.5.1-SNAPSHOT</version>
<version>3.6.0</version>
<name>featureExtraction</name>
<scm>
<connection>scm:git:https://github.com/OHDSI/featureExtraction</connection>
Expand Down

0 comments on commit dee92db

Please sign in to comment.