filter on db side

OHDSI · Aug 22, 2024 · dee92db · dee92db
1 parent 5e7e003
commit dee92db
Show file tree

Hide file tree

Showing 7 changed files with 33 additions and 22 deletions.
diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R
@@ -93,7 +93,8 @@ getDbDefaultCovariateData <- function(connection,
 
   settings <- .toJson(covariateSettings)
   rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$init(system.file("", package = "FeatureExtraction"))
-  json <- rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$createSql(settings, aggregated, cohortTable, rowIdField, rJava::.jarray(as.character(cohortIds)), cdmDatabaseSchema)
+  json <- rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$createSql(
+    settings, aggregated, cohortTable, rowIdField, rJava::.jarray(as.character(cohortIds)), cdmDatabaseSchema, as.character(minCharacterizationMean))
   todo <- .fromJson(json)
   if (length(todo$tempTables) != 0) {
     ParallelLogger::logInfo("Sending temp tables to server")
@@ -138,7 +139,6 @@ getDbDefaultCovariateData <- function(connection,
         andromedaTableName = "covariates",
         snakeCaseToCamelCase = TRUE
       )
-      filterCovariateDataCovariates(covariateData, "covariates", minCharacterizationMean)
     }
 
     # Continuous aggregated features

diff --git a/inst/csv/jarChecksum.txt b/inst/csv/jarChecksum.txt
@@ -1 +1 @@
-89d2f730bbcdad9c98603f061edc51d5cdbde9b15ee84aae571c9c6799457b53
+3aa4b6bdae3b098051bf7025fcf5d78d4e8b159c4ed2c20bb281db5664635e94
diff --git a/inst/java/FeatureExtraction.jar b/inst/java/FeatureExtraction.jar
diff --git a/java/org/ohdsi/featureExtraction/FeatureExtraction.java b/java/org/ohdsi/featureExtraction/FeatureExtraction.java
@@ -75,7 +75,7 @@ public class FeatureExtraction {
 	private static String									ADD_DESCENDANTS_SQL				= "SELECT descendant_concept_id AS id\nINTO @target_temp\nFROM @cdm_database_schema.concept_ancestor\nINNER JOIN @source_temp\n\tON ancestor_concept_id = id;\n\n";
 
 	public static void main(String[] args) {
-		init("C:/Users/mschuemi/git/FeatureExtraction/inst");
+		init("/Users/ginberg/Code/FeatureExtraction/inst");
 		// init("C:/R/R-3.3.1/library/FeatureExtraction");
 //		init("D:/git/OHDSI/FeatureExtraction/inst");
 		// System.out.println(convertSettingsPrespecToDetails("{\"temporal\":false,\"DemographicsGender\":true,\"DemographicsAge\":true,\"longTermStartDays\":-365,\"mediumTermStartDays\":-180,\"shortTermStartDays\":-30,\"endDays\":0,\"includedCovariateConceptIds\":[],\"addDescendantsToInclude\":false,\"excludedCovariateConceptIds\":[1,2,3],\"addDescendantsToExclude\":false,\"includedCovariateIds\":[]}"));
@@ -86,13 +86,13 @@ public static void main(String[] args) {
 		//String settings = getDefaultPrespecTemporalAnalyses();
 //		String settings = getDefaultPrespecTemporalSequenceAnalyses();
 		// String settings = convertSettingsPrespecToDetails(getDefaultPrespecTemporalAnalyses());
-		// System.out.println(convertSettingsPrespecToDetails(getDefaultPrespecAnalyses()));
+		// System.out.println(convertSettingsPrespecToDetails(getDefaultPrespecAnalyses()));		
 		String settings =
 				 "{\"temporal\":true,\"temporalSequence\":false,\"ConditionEraGroupOverlap\":true,\"temporalStartDays\":0,\"temporalEndDays\":0,\"includedCovariateConceptIds\":[],\"addDescendantsToInclude\":false,\"excludedCovariateConceptIds\":[],\"addDescendantsToExclude\":false,\"includedCovariateIds\":[]}";
 		//String settings =
 		//"{\"temporal\":false,\"temporalSequence\":false,\"analyses\":[{\"analysisId\":999,\"sqlFileName\":\"CohortBasedBinaryCovariates.sql\",\"parameters\":{\"covariateCohortTable\":\"cohort\",\"analysisId\":999,\"analysisName\":\"Cohort\",\"startDay\":-365,\"endDay\":0},\"includedCovariateConceptIds\":[],\"addDescendantsToInclude\":false,\"excludedCovariateConceptIds\":[],\"addDescendantsToExclude\":false,\"includedCovariateIds\":[]}]}";
 		// String settings = convertSettingsPrespecToDetails(getDefaultPrespecAnalyses());
-		System.out.println(createSql(settings, true, "#temp_cohort", "row_id", -1, "cdm_synpuf"));
+		System.out.println(createSql(settings, true, "#temp_cohort", "row_id", -1, "cdm_synpuf", 0.0));
 		// System.out.println(createSql(getDefaultPrespecAnalyses(), true, "#temp_cohort", "row_id", -1, "cdm_synpuf"));
 		// System.out.println(createSql(getDefaultPrespecTemporalAnalyses(), false, "#temp_cohort", "row_id", -1, "cdm_synpuf"));
 	}
@@ -480,15 +480,17 @@ else if (temporalSequence)
 	 * @param cdmDatabaseSchema
 	 *            The name of the database schema that contains the OMOP CDM instance. Requires read permissions to this database. On SQL Server, this should
 	 *            specify both the database and the schema, so for example 'cdm_instance.dbo'.
+	 * @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This will help reduce
+	 *            the file size of the characterization output, but will remove information on covariates that have very low values.
 	 * @return A JSON object.
 	 */
 	public static String createSql(String settings, boolean aggregated, String cohortTable, String rowIdField, String[] cohortDefinitionIds,
-			String cdmDatabaseSchema) {
+			String cdmDatabaseSchema, String minCharacterizationMean) {
 
 		long[] idsAsLongs = new long[cohortDefinitionIds.length];
 		for (int i = 0; i < cohortDefinitionIds.length; i++)
 			idsAsLongs[i] = Long.valueOf(cohortDefinitionIds[i]);
-		return createSql(settings, aggregated, cohortTable, rowIdField, idsAsLongs, cdmDatabaseSchema);
+		return createSql(settings, aggregated, cohortTable, rowIdField, idsAsLongs, cdmDatabaseSchema, Double.valueOf(minCharacterizationMean));
 	}
 
 
@@ -523,11 +525,13 @@ public static String createSql(String settings, boolean aggregated, String cohor
 	 * @param cdmDatabaseSchema
 	 *            The name of the database schema that contains the OMOP CDM instance. Requires read permissions to this database. On SQL Server, this should
 	 *            specify both the database and the schema, so for example 'cdm_instance.dbo'.
+	 * @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This will help reduce
+	 *            the file size of the characterization output, but will remove information on covariates that have very low values.           
 	 * @return A JSON object.
 	 */
 	public static String createSql(String settings, boolean aggregated, String cohortTable, String rowIdField, int cohortDefinitionId,
-			String cdmDatabaseSchema) {
-		return createSql(settings, aggregated, cohortTable, rowIdField, new long[]{cohortDefinitionId}, cdmDatabaseSchema);
+			String cdmDatabaseSchema, double minCharacterizationMean) {
+		return createSql(settings, aggregated, cohortTable, rowIdField, new long[]{cohortDefinitionId}, cdmDatabaseSchema, minCharacterizationMean);
 	}
 
 	/**
@@ -559,10 +563,12 @@ public static String createSql(String settings, boolean aggregated, String cohor
 	 * @param cdmDatabaseSchema
 	 *            The name of the database schema that contains the OMOP CDM instance. Requires read permissions to this database. On SQL Server, this should
 	 *            specify both the database and the schema, so for example 'cdm_instance.dbo'.
+	 * @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This will help reduce
+	 *            the file size of the characterization output, but will remove information on covariates that have very low values.
 	 * @return A JSON object.
 	 */
 	public static String createSql(String settings, boolean aggregated, String cohortTable, String rowIdField, long[] cohortDefinitionIds,
-			String cdmDatabaseSchema) {
+			String cdmDatabaseSchema, double minCharacterizationMean) {
 
 		JSONObject jsonObject = new JSONObject(settings);
 
@@ -629,7 +635,7 @@ public static String createSql(String settings, boolean aggregated, String cohor
 		jsonWriter.key("sqlConstruction");
 		jsonWriter.value(createConstructionSql(jsonObject, idSetToName, temporal, temporalSequence, aggregated, cohortTable, rowIdField, cohortDefinitionIds, cdmDatabaseSchema));
 
-		String sqlQueryFeatures = createQuerySql(jsonObject, cohortTable, cohortDefinitionIds, aggregated, temporal, temporalSequence);
+		String sqlQueryFeatures = createQuerySql(jsonObject, cohortTable, cohortDefinitionIds, aggregated, temporal, temporalSequence, minCharacterizationMean);
 		if (sqlQueryFeatures != null) {
 			jsonWriter.key("sqlQueryFeatures");
 			jsonWriter.value(sqlQueryFeatures);
@@ -700,7 +706,8 @@ private static Object createCleanupSql(JSONObject jsonObject, boolean temporal2)
 		return sql.toString();
 	}
 
-	private static String createQuerySql(JSONObject jsonObject, String cohortTable, long[] cohortDefinitionIds, boolean aggregated, boolean temporal, boolean temporalSequence) {
+	private static String createQuerySql(JSONObject jsonObject, String cohortTable, long[] cohortDefinitionIds, boolean aggregated, boolean temporal, boolean temporalSequence,
+	                                     double minCharacterizationMean) {
 		StringBuilder fields = new StringBuilder();
 		if (aggregated) {
 			fields.append("cohort_definition_id, covariate_id, sum_value");
@@ -736,8 +743,12 @@ private static String createQuerySql(JSONObject jsonObject, String cohortTable,
 		if (!hasFeature)
 			return null;
 		if (aggregated) {
-			sql.append(
-					"\n) all_covariates\nINNER JOIN (\nSELECT cohort_definition_id, COUNT(*) AS total_count\nFROM @cohort_table {@cohort_definition_id != -1} ? {\nWHERE cohort_definition_id IN (@cohort_definition_id)} GROUP BY cohort_definition_id\n) total\n  ON all_covariates.cohort_definition_id = total.cohort_definition_id;");
+			sql.append("\n) all_covariates\nINNER JOIN (\nSELECT cohort_definition_id, COUNT(*) AS total_count\nFROM @cohort_table {@cohort_definition_id != -1} ? {\nWHERE cohort_definition_id IN (@cohort_definition_id)}");
+			sql.append(" GROUP BY cohort_definition_id\n) total\n  ON all_covariates.cohort_definition_id = total.cohort_definition_id");
+			if (minCharacterizationMean != 0) {
+				sql.append(" WHERE average_value >= " + minCharacterizationMean);
+			}
+			sql.append(";");
 		} else {
 			sql.append("\n) all_covariates;");
 		}

diff --git a/man/createCovariateSettings.Rd b/man/createCovariateSettings.Rd
diff --git a/man/getDbCovariateData.Rd b/man/getDbCovariateData.Rd
diff --git a/pom.xml b/pom.xml
@@ -5,7 +5,7 @@
 	<groupId>org.ohdsi</groupId>
 	<artifactId>featureExtraction</artifactId>
 	<packaging>jar</packaging>
-	<version>3.5.1-SNAPSHOT</version>
+	<version>3.6.0</version>
 	<name>featureExtraction</name>
 	<scm>
 		<connection>scm:git:https://github.com/OHDSI/featureExtraction</connection>
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		89d2f730bbcdad9c98603f061edc51d5cdbde9b15ee84aae571c9c6799457b53
		3aa4b6bdae3b098051bf7025fcf5d78d4e8b159c4ed2c20bb281db5664635e94