dt <- nla_load(2012)
tp <- select(dt$waterchem_wide, PTL_RESULT, UID) %>%
      left_join(select(dt$wide_siteinfo, AREA_HA, SITE_ID, UID)) %>%
      group_by(SITE_ID) %>%
      summarize(tp = median(PTL_RESULT), area = median(AREA_HA) / 100)
## Joining, by = "UID"
chl  <- select(dt$chla_wide, CHLX_RESULT, UID)  %>%
        left_join(select(dt$wide_siteinfo, SITE_ID, UID)) %>%
        group_by(SITE_ID) %>%
        summarize(chl = median(CHLX_RESULT))
## Joining, by = "UID"
secchi  <- select(dt$secchi, SECCHI, SITE_ID)  %>%
           group_by(SITE_ID) %>%
           summarize(secchi = median(SECCHI))

res <- reduce(list(tp, chl, secchi), left_join)
## Joining, by = "SITE_ID"
## Joining, by = "SITE_ID"
res <- left_join(res, select(dt$wide_phab, depth = DEPTH_AT_STATION, SITE_ID))
## Joining, by = "SITE_ID"
skimr::skim(res)
Data summary
Name res
Number of rows 20838
Number of columns 6
_______________________
Column type frequency:
character 1
numeric 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SITE_ID 0 1 12 13 0 1130 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
tp 0 1.00 120.94 286.04 4.00 21.00 41.00 100.00 3636.00 ▇▁▁▁▁
area 0 1.00 4.62 39.54 0.01 0.11 0.30 1.05 1674.90 ▇▁▁▁▁
chl 90 1.00 26.75 53.16 0.00 3.03 8.00 26.80 764.64 ▇▁▁▁▁
secchi 1238 0.94 2.08 2.25 0.02 0.64 1.39 2.83 28.00 ▇▁▁▁▁
depth 9051 0.57 1.60 1.71 0.00 0.70 1.10 1.90 34.10 ▇▁▁▁▁