Skip to content

Commit

Permalink
Merge pull request #41 from afsc-gap-products/development
Browse files Browse the repository at this point in the history
Development: addressing #38 and #39
  • Loading branch information
zoyafuso-NOAA authored Nov 17, 2023
2 parents d5a86ee + f828a57 commit 57bd38c
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 90 deletions.
97 changes: 61 additions & 36 deletions R/calc_biomass_subarea.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,43 +77,46 @@ calc_biomass_subarea <- function(racebase_tables = NULL,

if (nrow(x = subarea_biomass_by_stratrum) > 0) {

## Sum the biomass and abundance across strata in isubarea
## Sum the total biomass across the strata in isubarea
subarea_summed_biomass <-
stats::aggregate(BIOMASS_MT ~
SPECIES_CODE + YEAR,
data = subarea_biomass_by_stratrum,
FUN = sum)

## Sum the N_HAUL, N_WEIGHT, N_COUNT, N_LENGTH across the
## strata in isubarea.
subarea_summed_hauls <-
stats::aggregate(cbind(N_HAUL, N_WEIGHT, N_COUNT, N_LENGTH) ~
SPECIES_CODE + YEAR,
data = subarea_biomass_by_stratrum,
FUN = sum)

## Sum the biomass and abundance across strata in isubarea
subarea_summed_biomass <-
stats::aggregate(BIOMASS_MT ~
## Sum the total abundance across the strata in isubarea
subarea_summed_population <-
stats::aggregate(POPULATION_COUNT ~
SPECIES_CODE + YEAR,
data = subarea_biomass_by_stratrum,
FUN = sum)

subarea_summed_population <-
stats::aggregate(POPULATION_COUNT ~
SPECIES_CODE + YEAR,
data = subarea_biomass_by_stratrum,
FUN = sum)

## Merge Total Biomass and Population
subarea_summed_biomass <- merge(x = subarea_summed_biomass,
y = subarea_summed_population,
by = c("SPECIES_CODE", "YEAR"),
all = TRUE)

## Sum the many types of variances across strata in isubarea
subarea_summed_variance <-
stats::aggregate(cbind(CPUE_KGKM2_VAR, CPUE_NOKM2_VAR,
BIOMASS_VAR, POPULATION_VAR) ~
SPECIES_CODE + YEAR,

## Sum the variances associated with the total biomass across
## the strata in isubarea
subarea_summed_biomass_variance <-
stats::aggregate(BIOMASS_VAR ~ SPECIES_CODE + YEAR,
data = subarea_biomass_by_stratrum,
FUN = sum,
na.rm = TRUE)


## Sum the variances associated with the total abundance across
## the strata in isubarea
subarea_summed_population_variance <-
stats::aggregate(POPULATION_VAR ~ SPECIES_CODE + YEAR,
data = subarea_biomass_by_stratrum,
FUN = sum,
na.rm = TRUE)

## Calculate a weighted mean weight and numerical CPUE across the
## strata in isubarea using stratum area as the weightings.
subarea_mean_cpue <-
do.call(what = rbind,
args = lapply(
Expand All @@ -122,31 +125,53 @@ calc_biomass_subarea <- function(racebase_tables = NULL,
subarea_biomass_by_stratrum$YEAR)),
FUN = function(x) {
data.frame(
SPECIES_CODE = unique(x$SPECIES_CODE),
YEAR = unique(x$YEAR),
SPECIES_CODE = unique(x = x$SPECIES_CODE),
YEAR = unique(x = x$YEAR),
TOT_AREA = sum(x$AREA_KM2),
CPUE_KGKM2_MEAN = weighted.mean(x = x$CPUE_KGKM2_MEAN,
w = x$AREA_KM2),
w = x$AREA_KM2,
na.rm = TRUE),
CPUE_NOKM2_MEAN = weighted.mean(x = x$CPUE_NOKM2_MEAN,
w = x$AREA_KM2),
w = x$AREA_KM2,
na.rm = TRUE),
stringsAsFactors = FALSE)} ))

## Merge haul summary
subarea_summed_biomass <- merge(x = subarea_summed_biomass,
y = subarea_summed_hauls,
by = c("SPECIES_CODE", "YEAR"))

## Merge Total Abundance
subarea_summed_biomass <- merge(x = subarea_summed_biomass,
y = subarea_summed_population,
by = c("SPECIES_CODE", "YEAR"),
all = TRUE)

## Merge mean numerical/weight CPUE
subarea_summed_biomass <- merge(x = subarea_summed_biomass,
y = subarea_mean_cpue,
by = c("SPECIES_CODE", "YEAR"))

subarea_summed_biomass <- merge(y = subarea_summed_variance,
x = subarea_summed_biomass,
## Merge biomass/mean weight CPUE variance
subarea_summed_biomass <- merge(x = subarea_summed_biomass,
y = subarea_summed_biomass_variance,
by = c("SPECIES_CODE", "YEAR"))

subarea_summed_biomass <- merge(y = subarea_summed_hauls,
x = subarea_summed_biomass,
## Merge Total abundance/mean numerical CPUE variance
subarea_summed_biomass <- merge(x = subarea_summed_biomass,
y = subarea_summed_population_variance,

by = c("SPECIES_CODE", "YEAR"))


## Derive the variance associated with the mean weight CPUE from
## the variance of the total biomass.
subarea_summed_biomass$CPUE_KGKM2_VAR <-
subarea_summed_biomass$BIOMASS_VAR /
subarea_summed_biomass$TOT_AREA^2 * 1e6

## Derive the variance associated with the mean numerical CPUE from
## the variance of the total abundance.
subarea_summed_biomass$CPUE_NOKM2_VAR <-
subarea_summed_biomass$POPULATION_VAR /
subarea_summed_biomass$TOT_AREA^2
Expand Down Expand Up @@ -187,11 +212,11 @@ calc_biomass_subarea <- function(racebase_tables = NULL,
these early years were removed.")

subarea_biomass <- subset(x = subarea_biomass,
subset = !(SURVEY_DEFINITION_ID == 98 &
YEAR < 1987 &
AREA_ID %in% c(99900,
300, 200, 100,
8, 9)) )
subset = !(SURVEY_DEFINITION_ID == 98 &
YEAR < 1987 &
AREA_ID %in% c(99900,
300, 200, 100,
8, 9)) )
}

return(subarea_biomass)
Expand Down
110 changes: 56 additions & 54 deletions R/get_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ get_data <- function(year_set = c(1996, 1999),
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if (is.null(x = sql_channel)) sql_channel <- gapindex::get_connected()


##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## 2) Get survey designs for the survey regions and years queried
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -69,17 +68,17 @@ get_data <- function(year_set = c(1996, 1999),
survey_design <-
RODBC::sqlQuery(channel = sql_channel,
query = paste0(
"SELECT SURVEY_DEFINITION_ID, ",
"CASE
"SELECT SURVEY_DEFINITION_ID,
CASE
WHEN SURVEY_DEFINITION_ID = 143 THEN 'NBS'
WHEN SURVEY_DEFINITION_ID = 98 THEN 'EBS'
WHEN SURVEY_DEFINITION_ID = 47 THEN 'GOA'
WHEN SURVEY_DEFINITION_ID = 52 THEN 'AI'
WHEN SURVEY_DEFINITION_ID = 78 THEN 'BSS'
ELSE NULL
END AS SURVEY, ",
"YEAR, DESIGN_YEAR FROM GAP_PRODUCTS.SURVEY_DESIGN ",
" WHERE SURVEY_DEFINITION_ID IN ",
END AS SURVEY,
YEAR, DESIGN_YEAR FROM GAP_PRODUCTS.SURVEY_DESIGN
WHERE SURVEY_DEFINITION_ID IN ",
survey_def_ids_vec,
" AND YEAR IN ", year_vec) )

Expand All @@ -99,22 +98,22 @@ get_data <- function(year_set = c(1996, 1999),
RODBC::sqlQuery(
channel = sql_channel,
query = paste0(
"select distinct a.cruisejoin, b.cruise, floor(b.cruise/100) year, ",
"d.survey_definition_id, b.vessel_id, e.name vessel_name, ",
"CASE
WHEN d.SURVEY_DEFINITION_ID = 143 THEN 'NBS'
WHEN d.SURVEY_DEFINITION_ID = 98 THEN 'EBS'
WHEN d.SURVEY_DEFINITION_ID = 47 THEN 'GOA'
WHEN d.SURVEY_DEFINITION_ID = 52 THEN 'AI'
WHEN d.SURVEY_DEFINITION_ID = 78 THEN 'BSS'
"select distinct a.cruisejoin, b.cruise, floor(b.cruise/100) year,
d.survey_definition_id, b.vessel_id, e.name vessel_name,
CASE
WHEN d.SURVEY_DEFINITION_ID = 143 THEN 'NBS'
WHEN d.SURVEY_DEFINITION_ID = 98 THEN 'EBS'
WHEN d.SURVEY_DEFINITION_ID = 47 THEN 'GOA'
WHEN d.SURVEY_DEFINITION_ID = 52 THEN 'AI'
WHEN d.SURVEY_DEFINITION_ID = 78 THEN 'BSS'
ELSE NULL
END AS SURVEY ",
"from racebase.haul a, race_data.cruises b, race_data.surveys c, ",
"race_data.survey_definitions d, race_data.vessels e ",
"where a.vessel = b.vessel_id and b.vessel_id = e.vessel_id ",
"and a.cruise = b.cruise and c.survey_id = b.survey_id ",
"and c.survey_definition_id = d.survey_definition_id ",
"and d.survey_definition_id in ", survey_def_ids_vec,
END AS SURVEY
from racebase.haul a, race_data.cruises b, race_data.surveys c,
race_data.survey_definitions d, race_data.vessels e
where a.vessel = b.vessel_id and b.vessel_id = e.vessel_id
and a.cruise = b.cruise and c.survey_id = b.survey_id
and c.survey_definition_id = d.survey_definition_id
and d.survey_definition_id in ", survey_def_ids_vec,
"and a.abundance_haul in ", gapindex::stitch_entries(abundance_haul),
"and year in ", year_vec))

Expand All @@ -139,18 +138,18 @@ get_data <- function(year_set = c(1996, 1999),
area_info <-
RODBC::sqlQuery(channel = sql_channel,
query = paste0(
"SELECT SURVEY_DEFINITION_ID, ",
"CASE
WHEN SURVEY_DEFINITION_ID = 143 THEN 'NBS'
WHEN SURVEY_DEFINITION_ID = 98 THEN 'EBS'
WHEN SURVEY_DEFINITION_ID = 47 THEN 'GOA'
WHEN SURVEY_DEFINITION_ID = 52 THEN 'AI'
WHEN SURVEY_DEFINITION_ID = 78 THEN 'BSS'
ELSE NULL
END AS SURVEY, ",
"DESIGN_YEAR, AREA_ID, AREA_TYPE, AREA_KM2, DESCRIPTION, ",
"AREA_NAME FROM GAP_PRODUCTS.AREA",
" WHERE SURVEY_DEFINITION_ID IN ", survey_def_ids_vec))
"SELECT SURVEY_DEFINITION_ID,
CASE
WHEN SURVEY_DEFINITION_ID = 143 THEN 'NBS'
WHEN SURVEY_DEFINITION_ID = 98 THEN 'EBS'
WHEN SURVEY_DEFINITION_ID = 47 THEN 'GOA'
WHEN SURVEY_DEFINITION_ID = 52 THEN 'AI'
WHEN SURVEY_DEFINITION_ID = 78 THEN 'BSS'
ELSE NULL
END AS SURVEY,
DESIGN_YEAR, AREA_ID, AREA_TYPE, AREA_KM2, DESCRIPTION,
AREA_NAME FROM GAP_PRODUCTS.AREA
WHERE SURVEY_DEFINITION_ID IN ", survey_def_ids_vec))

## Subset stratum info out of `area_info`
stratum_data <- subset(x = area_info,
Expand Down Expand Up @@ -181,18 +180,18 @@ get_data <- function(year_set = c(1996, 1999),
stratum_groups <-
RODBC::sqlQuery(channel = sql_channel,
query = paste0(
"SELECT SURVEY_DEFINITION_ID, ",
"CASE
WHEN SURVEY_DEFINITION_ID = 143 THEN 'NBS'
WHEN SURVEY_DEFINITION_ID = 98 THEN 'EBS'
WHEN SURVEY_DEFINITION_ID = 47 THEN 'GOA'
WHEN SURVEY_DEFINITION_ID = 52 THEN 'AI'
WHEN SURVEY_DEFINITION_ID = 78 THEN 'BSS'
"SELECT SURVEY_DEFINITION_ID,
CASE
WHEN SURVEY_DEFINITION_ID = 143 THEN 'NBS'
WHEN SURVEY_DEFINITION_ID = 98 THEN 'EBS'
WHEN SURVEY_DEFINITION_ID = 47 THEN 'GOA'
WHEN SURVEY_DEFINITION_ID = 52 THEN 'AI'
WHEN SURVEY_DEFINITION_ID = 78 THEN 'BSS'
ELSE NULL
END AS SURVEY, ",
"AREA_ID, DESIGN_YEAR, STRATUM ",
"FROM GAP_PRODUCTS.STRATUM_GROUPS ",
"WHERE SURVEY_DEFINITION_ID IN ", survey_def_ids_vec,
END AS SURVEY,
AREA_ID, DESIGN_YEAR, STRATUM
FROM GAP_PRODUCTS.STRATUM_GROUPS
WHERE SURVEY_DEFINITION_ID IN ", survey_def_ids_vec,
"ORDER BY SURVEY, AREA_ID, STRATUM"))

##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -209,9 +208,11 @@ get_data <- function(year_set = c(1996, 1999),
haul_data <-
RODBC::sqlQuery(channel = sql_channel,
query = paste0(
"SELECT * FROM RACEBASE.HAUL WHERE CRUISEJOIN IN ",
cruisejoin_vec, " AND HAUL_TYPE IN ", haultype_vec,
" AND PERFORMANCE >= 0 AND ABUNDANCE_HAUL IN ",
"SELECT * FROM RACEBASE.HAUL
WHERE CRUISEJOIN IN ", cruisejoin_vec,
" AND HAUL_TYPE IN ", haultype_vec,
" AND PERFORMANCE >= 0
AND ABUNDANCE_HAUL IN ",
gapindex::stitch_entries(abundance_haul)))

if (na_rm_strata)
Expand Down Expand Up @@ -256,13 +257,14 @@ get_data <- function(year_set = c(1996, 1999),
cat("Pulling species data...\n")
species_info <- RODBC::sqlQuery(
channel = sql_channel,
query = paste0("SELECT * FROM GAP_PRODUCTS.TAXONOMIC_CLASSIFICATION ",
"WHERE SURVEY_SPECIES = 1 ",
"AND SPECIES_CODE IN ",
query = paste0("SELECT * FROM GAP_PRODUCTS.TAXONOMIC_CLASSIFICATION
WHERE SURVEY_SPECIES = 1
AND SPECIES_CODE IN ",
ifelse(test = is.null(x = spp_codes$SPECIES_CODE),
yes = paste0("(SELECT DISTINCT SPECIES_CODE ",
"FROM RACEBASE.CATCH where ",
"CRUISEJOIN in ", cruisejoin_vec, ")"),
yes = paste0("(SELECT DISTINCT SPECIES_CODE
FROM RACEBASE.CATCH ",
"WHERE CRUISEJOIN IN ",
cruisejoin_vec, ")"),
no = spp_codes_vec)))

##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -381,7 +383,7 @@ get_data <- function(year_set = c(1996, 1999),
## Rename "GROUP" column
names(x = catch_data)[names(x = catch_data) == "GROUP"] <- "SPECIES_CODE"

if (pull_lengths) {
if (pull_lengths & !is.null(size_data)) {
## Merge "GROUP" column from `spp_codes` into `size_data` for scenraios
## where you are defining species complexes.
size_data <- merge(x = size_data,
Expand Down

0 comments on commit 57bd38c

Please sign in to comment.