download ACS 5year data from Census API, at blockgroup resolution (slowly if for entire US)
Source:R/acs_bybg.R
acs_bybg.Rddownload ACS 5year data from Census API, at blockgroup resolution (slowly if for entire US)
Usage
acs_bybg(
variables = c(pop = "B01001_001"),
table = NULL,
year = NULL,
cache_table = FALSE,
output = "wide",
state = stateinfo$ST,
county = NULL,
zcta = NULL,
geometry = FALSE,
keep_geo_vars = FALSE,
summary_var = NULL,
key = NULL,
moe_level = 90,
survey = "acs5",
show_call = FALSE,
geography = "block group",
dropname = TRUE,
...
)Arguments
- variables
Vector of variables - see get_acs from tidycensus package
- table
see get_acs from tidycensus package.
EJSCREEN-relevant key tables are listed in the details section here.
- year
e.g., 2022, 2023, or 2024
- cache_table
- output
see get_acs from tidycensus package
- state
Default is 2-character abbreviations, vector of all US States, DC, and PR.
- county
see get_acs from tidycensus package
- zcta
see get_acs from tidycensus package
- geometry
see get_acs from tidycensus package
- keep_geo_vars
see get_acs from tidycensus package
- summary_var
see get_acs from tidycensus package
- key
see get_acs from tidycensus package
- moe_level
see get_acs from tidycensus package
- survey
see get_acs from tidycensus package
- show_call
see get_acs from tidycensus package
- geography
"block group" (note this needs the space between words)
- dropname
whether to drop the column called NAME
- ...
see get_acs from tidycensus package
Value
A data.table (not tibble, and not just a data.frame)
Details
Probably requires getting and specifying an API key for Census Bureau ! (at least if query is large). see tidycensus package help envt var CENSUS_API_KEY
NOTES ON KEY TABLES IN ACS THAT ARE RELEVANT TO EJSCREEN:
x <- tidycensus::load_variables(2022, "acs5")
tables = c(
"B25034", # pre1960, for lead paint indicator (environmental not demographic per se)
"B01001", # sex and age / basic population counts
"B03002", # race with hispanic ethnicity
"B02001", # race without hispanic ethnicity
"B15002", # education
"B23025", # unemployed
"C17002", # low income, poor, etc.
"B19301", # per capita income
"B25032", # owned units vs rented units (occupied housing units, same universe as B25003)
"B28003", # no broadband
"B27010", # no health insurance
"C16002", # (language category and) % of households limited English speaking (lingiso) "https://data.census.gov/table/ACSDT5Y2023.C16002"
"B16004", # (language category and) % of residents (not hhlds) speak no English at all "https://data.census.gov/table/ACSDT5Y2023.B16004"
####### TRACT ONLY:
# used by EJSCREEN but only available at tract resolution:
"C16001", # languages detailed list: % of residents (not hhlds) IN TRACT speak Chinese, etc. "https://data.census.gov/table/ACSDT5Y2023.C16001"
"B18101" # disability -- at tract resolution only ########### #
)
acstabs2 <- paste0(tables, "_")
mytables <- data.table::rbindlist(lapply(acstabs2, function(z) {
x[substr(x$name,1,7) %in% z, ][1, ]
}))
print(mytables)
# see details of ALL the variables in these tables
# for (i in 1:NROW(mytables)) {
# x[substr(x$name,1,7) %in% substr(mytables[i,]$name,1,7), ] |> print(n=50)
# }
# disability is by tract only:
cbind(unique(grep("disab", x$concept, value = T, ignore.case = T) ))
# x[substr(x$name,1,6) %in% "B18101" & x$geography %in% "block group", ] |> print(n=50) # none
x[substr(x$name,1,7) %in% "B18101_" , ] |> print(n=50)Examples
if (FALSE) { # \dontrun{
## All states, full table
# newvars <- acs_bybg(table = "B01001")
## One state, some variables
newvars <- acs_bybg(c(pop = "B01001_001", y = "B01001_002"), state = "DC")
## Format new data to match rows of blockgroupstats
data.table::setnames(newvars, "GEOID", "bgfips")
dim(newvars)
newvars <- newvars[blockgroupstats[,.(bgfips, ST)], , on = "bgfips"]
dim(blockgroupstats)
dim(newvars)
newvars
newvars[ST == "DC", ]
## Calculate a new indicator for each blockgroup, using ACS data
mystates = c("DC", 'RI')
newvars <- acs_bybg(variables = c("B01001_001", paste0("B01001_0", 31:39)),
state = mystates)
data.table::setnames(newvars, "GEOID", "bgfips")
newvars[, ST := fips2stateabbrev(bgfips)]
names(newvars) <- gsub("E$", "", names(newvars))
# provide formulas for calculating new indicators from ACS raw data:
formula1 <- c(
" pop = B01001_001",
" age1849female = (B01001_031 + B01001_032 + B01001_033 + B01001_034 +
B01001_035 + B01001_036 + B01001_037 + B01001_038 + B01001_039)",
" pct1849female = ifelse(pop == 0, 0, age1849female / pop)"
)
newvars <- calc_ejam(newvars, formulas = formula1,
keep.old = c("bgid", "ST", "pop", 'bgfips'))
newvars[, pct1849female := round(100 * pct1849female, 1)]
mapfast(newvars[1:10,], column_names = colnames(newvars),
labels = gsub('pct1849female', 'Women 18-49 as % of residents',
gsub('age1849female', 'Count of women ages 18-49',
fixcolnames(colnames(newvars), 'r', 'long'))))
## ACS tables and variables most relevant to EJSCREEN
acsinfo <- tidycensus::load_variables(2022, "acs5")
ejscreentables <- c("B01001", # sex and age / basic population counts
"B03002", # race with hispanic ethnicity
"B02001", # race without hispanic ethnicity
"B15002", # education
"C16002", # language/ lingiso
"B16004", # language category and English not at all
"C17002", # low income, poor, etc.
"B25034", # pre1960, for lead paint indicator
"B23025", # unemployed
"B25032", # owned units vs rented units # ***
"B25003", # owned vs rented # ***
"B28003", # no broadband
"B27010" , # no health insurance
"B18101" # disability -- at tract resolution only ########### #
)
acstabs2 <- paste0(ejscreentables, "_")
acsinfo$table = gsub("_.*", "", acsinfo$name)
myacsinfo <- acsinfo[acsinfo$table %in% ejscreentables, ]
mytables <- data.table::rbindlist(lapply(ejscreentables, function(z) {acsinfo[acsinfo$table %in% z, ][1,]}))
ejscreen_tables <- mytables$table # same as ejscreentables
myvars <- myacsinfo$name # 184 variables among 8 tables
if ("want to run example that takes a few minutes" == "yes") {
# VERY SLOWLY download data for all these tables
# in ALL STATES and DC and PR but not Island Areas
mystates <- stateinfo2[stateinfo2$is.usa.plus.pr, ]$ST
## PR must be handled separately. see e.g., B05001PR
mystates = mystates[mystates != "PR"]
### takes time to download each table for each state:
system.time({
newvars <- acs_bybg(variables = myvars, state = mystates)
})
data.table::setnames(newvars, "GEOID", "bgfips")
newvars[, ST := fips2stateabbrev(bgfips)]
names(newvars) <- gsub("E$", "", names(newvars))
dim(newvars) # 239781 rows (bgs), 370 columns (variable estimates and margin of error values)
t(head(newvars))
ejscreen_acs = newvars
save(ejscreen_acs, file="ejscreen_acs.rda")
}
} # }