This document provides examples on how to obtain data using the dams package and how to create summary graphics of the extracted data.

Data Attributes

If you have not already done so, load the package along with ggplot and maps (for graphics).

Load Data

Load the entire dataset. This might take a few moments.

dim(nid_subset)
## [1] 91457    32
head(nid_subset, 3)
## # A tibble: 3 x 32
##   recordid dam_name nidid longitude latitude county owner_type dam_type purposes
##      <dbl> <chr>    <chr>     <dbl>    <dbl> <chr>  <chr>      <chr>    <chr>   
## 1        1 COOPER … AK00…     -150.     60.4 KENAI… P          RE       H       
## 2        2 BLUE LA… AK00…     -135.     57.1 SITKA  L          VA       HRS     
## 3        3 SALMON … AK00…     -134.     58.3 JUNEAU P          VA       HORS    
## # … with 23 more variables: year_completed <dbl>, nid_height <dbl>,
## #   max_storage <dbl>, normal_storage <dbl>, nid_storage <dbl>, hazard <chr>,
## #   eap <chr>, inspection_frequency <dbl>, state_reg_dam <chr>,
## #   spillway_width <dbl>, volume <dbl>, number_of_locks <dbl>,
## #   length_of_locks <dbl>, width_of_locks <dbl>, source_agency <chr>,
## #   state <chr>, submit_date <chr>, party <chr>, numseparatestructures <dbl>,
## #   permittingauthority <chr>, inspectionauthority <chr>,
## #   enforcementauthority <chr>, jurisdictionaldam <chr>

Summary Graphics

Data for graphics

gfx_data <- nid_subset[, c("year_completed", "state")]

head(gfx_data)
## # A tibble: 6 x 2
##   year_completed state
##            <dbl> <chr>
## 1           1959 AK   
## 2           1961 AK   
## 3           1914 AK   
## 4           1968 AK   
## 5           1955 AK   
## 6           1957 AK

Counts of number of dams built per decade or other time period of interest

gfx_data$year <- cut(gfx_data$year_completed, 
                     breaks = c(0, 1850, seq(1900, 2000, 10), 2014), 
                     labels = c("<1850", "1850-1900", "1910", "1920", "1930",
                              "1940", "1950", "1960", "1970", "1980", "1990", 
                              "2000", "2014"))
table(gfx_data$year)
## 
##     <1850 1850-1900      1910      1920      1930      1940      1950      1960 
##       569      2351      1460      1992      2329      4144      5028     13318 
##      1970      1980      1990      2000      2014 
##     18935     12167      5148      4629      3770
year_counts <- as.data.frame(table(gfx_data$year), stringsAsFactors = FALSE)
colnames(year_counts) <- c("Year", "Count")

Histogram of number of dams by time period

gfx_bar <- ggplot(year_counts, aes(x = Year, y = Count))
gfx_bar <- gfx_bar + geom_bar(position = "dodge", stat = "identity")
gfx_bar <- gfx_bar + ylab("Number of Dams") + xlab("Year of Completion")
gfx_bar <- gfx_bar + ggtitle("Number of Dams in the NID Database")

Counts of dams per state in the US mainland

gfx_data <- subset(gfx_data, !(state %in% c("AK", "HI", "PR", "GU")))
sort(table(gfx_data$state))
## 
##   DE   RI   VT   AZ   MD   NM   ID   NV   ME   WV   LA   NH   WA   NJ   CT   UT 
##   83  234  368  376  400  407  472  525  581  586  609  652  802  834  845  860 
##   OR   IN   ND   MI   WI   KY   FL   MN   TN   AR   MA   OH   PA   CA   WY   IL 
##  882  912  933 1059 1065 1090 1109 1187 1241 1263 1327 1407 1514 1576 1613 1662 
##   CO   NY   AL   SC   SD   VA   NE   NC   MT   IA   OK   GA   MO   MS   KS   TX 
## 1803 1934 2273 2343 2562 2790 3007 3191 3306 4018 4986 5306 5379 6081 6403 7324

Map of dams per state in the US mainland

state_counts <- as.data.frame(table(gfx_data$state), stringsAsFactors = FALSE)
colnames(state_counts) <- c("state", "Count")

# add long names of states
state_names <- data.frame(state = state.abb, 
                          name = state.name, 
                          stringsAsFactors = FALSE)
gfx_data <- merge(state_counts, state_names, by = "state")
# change state name to lower case to be consistent with ggplot
gfx_data$name <- tolower(gfx_data$name)

# geo reference data on states from ggplot
geo_state <- map_data("state")

# merge data with above for graphics
gfx_data <- merge(geo_state, gfx_data, by.x = "region", by.y = "name")
gfx_data <- gfx_data[order(gfx_data$order), ]

# discretize state counts
color_breaks <- c(0, 100, 500, 1000, 2000, 3000, 4000, 5000, 7500)
color_labels <- c("<100", "100 - 500", "500 - 1000", "1000 - 2000",
                  "2000 - 3000", "3000 - 4000", "4000 - 5000", "5000 - 7500")
gfx_data$dams <- cut(gfx_data$Count, 
                     breaks = color_breaks, 
                     labels = color_labels)

gfx_map <- ggplot(data = gfx_data)
gfx_map <- gfx_map + geom_polygon(aes(x = long, y = lat, group = group,
            fill = dams))
gfx_map <- gfx_map + geom_path(data = geo_state, aes(x = long, y = lat,
            group = group, fill = NA))
## Warning: Ignoring unknown aesthetics: fill
gfx_map <- gfx_map + labs(list(title = "Number of Dams in the NID Database",
            x = NULL, y = NULL))
gfx_map <- gfx_map + guides(fill = guide_legend(title = "Number of Dams"))
gfx_map <- gfx_map + scale_fill_brewer(palette = "Accent")
gfx_map <- gfx_map + coord_map()

Other Analyses: Flood Control Dams

A number of interesting analyses could be performed with the dataset. Of interest to water resources managers and hydrologists is the location of flood control dams. It is interesting to see a few states like Texas have a large number of flood control dams.

flood_dams <- subset(nid_subset, length(grep("C", purposes)) > 0)

table(flood_dams$state)
## 
##   AK   AL   AR   AZ   CA   CO   CT   DE   FL   GA   GU   HI   IA   ID   IL   IN 
##  107 2273 1263  376 1576 1803  845   83 1109 5306    1  133 4018  472 1662  912 
##   KS   KY   LA   MA   MD   ME   MI   MN   MO   MS   MT   NC   ND   NE   NH   NJ 
## 6403 1090  609 1327  400  581 1059 1187 5379 6081 3306 3191  933 3007  652  834 
##   NM   NV   NY   OH   OK   OR   PA   PR   RI   SC   SD   TN   TX   UT   VA   VT 
##  407  525 1934 1407 4986  882 1514   36  234 2343 2562 1241 7324  860 2790  368 
##   WA   WI   WV   WY 
##  802 1065  586 1613