Skip to content

Commit

Permalink
geography + swap censusapi
Browse files Browse the repository at this point in the history
  • Loading branch information
CoryMcCartan committed Aug 25, 2022
1 parent 32090cb commit ce97c80
Show file tree
Hide file tree
Showing 21 changed files with 676 additions and 154 deletions.
44 changes: 26 additions & 18 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Workflow derived from https://github.com/r-lib/actions/tree/master/examples
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
Expand All @@ -10,32 +10,40 @@ name: R-CMD-check

jobs:
R-CMD-check:
runs-on: ubuntu-latest
runs-on: ${{ matrix.config.os }}

name: ${{ matrix.config.os }} (${{ matrix.config.r }})

strategy:
fail-fast: false
matrix:
config:
- {os: macOS-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
CENSUS_API_KEY: ${{ secrets.CENSUS_API_KEY }}
R_KEEP_PKG_SOURCE: yes

steps:
- uses: actions/checkout@v2

- uses: r-lib/actions/setup-r@v1
- uses: r-lib/actions/setup-pandoc@v2

- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v1
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: rcmdcheck

- uses: r-lib/actions/check-r-package@v1

- name: Show testthat output
if: always()
run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true
shell: bash
extra-packages: any::rcmdcheck
needs: check

- name: Upload check results
if: failure()
uses: actions/upload-artifact@main
- uses: r-lib/actions/check-r-package@v2
with:
name: ${{ runner.os }}-r${{ matrix.config.r }}-results
path: check
upload-snapshots: true
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: easycensus
Title: Quickly Extract and Marginalize U.S. Census Tables
Title: Quickly Find, Extract, and Marginalize U.S. Census Tables
Version: 1.0.0
Authors@R:
person("Cory", "McCartan", email="[email protected]", role=c("aut", "cre"))
Expand All @@ -16,7 +16,6 @@ Imports:
dplyr (>= 1.0.0),
tidyr (>= 1.0.0),
stringr,
tidycensus,
censusapi,
cli
Suggests:
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ S3method(vec_ptype_abbr,estimate)
export(as_estimate)
export(cens_find_acs)
export(cens_find_dec)
export(cens_geo)
export(cens_get_acs)
export(cens_get_dec)
export(cens_margin_to)
Expand All @@ -44,6 +45,7 @@ export(to_rvar)
import(cli)
import(stringr)
import(vctrs)
importFrom(dplyr,"%>%")
importFrom(dplyr,as_tibble)
importFrom(dplyr,if_else)
importFrom(pillar,pillar_shaft)
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
* Use `censusapi` instead of `tidycensus` for fewer dependencies
* New `estimate` vector type that tracks uncertainty through mathematical operations
* Expose parsed tables to users with a new `cens_table` type
* Improve tidiers, including income bin tidiers
* More flexible geography options when downloading data. See `cens_geo()` for details.
* Improved tidiers, including new income bin and detailed race tidiers

# easycensus 0.2.0

Expand Down
10 changes: 7 additions & 3 deletions R/cens_find.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' possible matches, at the cost of more output. Negative values will be
#' converted to positive but will suppress any printing.
#'
#' @returns The codes for the top `show` tables, invisibly.
#' @returns The codes for the top `show` tables, invisibly if `show` is positive.
#'
#' @examples
#' cens_find_dec("sex", "age")
Expand All @@ -32,8 +32,10 @@ cens_find_dec <- function(..., show=2) {
if (show > 0) {
cli_h1("Top {show} matching table{?s}")
lapply(tables_sf1[best], print, all=FALSE)
invisible(best)
} else {
best
}
invisible(best)
}

#' @rdname cens_find
Expand All @@ -44,8 +46,10 @@ cens_find_acs <- function(..., show=4) {
if (show > 0) {
cli_h1("Top {show} matching table{?s}")
lapply(tables_acs[best], print, all=FALSE)
invisible(best)
} else {
best
}
invisible(best)
}


Expand Down
197 changes: 197 additions & 0 deletions R/cens_geo.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
#' Construct a Geography Specification for Census Data
#'
#' Currently used mostly internally.
#' Builds a Census API-formatted specification of which geographies to download
#' data for. State and county names (or postal abbreviations) are partially
#' matched to existing tables, for ease of use. Other geographies should be
#' specified with Census GEOIDs. The `usgazeteer` package, available with
#' `remotes::install_github("bhaskarvk/usgazetteer")`, may be useful in finding
#' GEOIDs for other geographies. Consult the "geography" sections of each API
#' at <https://www.census.gov/data/developers/data-sets.html> for information on
#' which geographic specifiers may be provided in combination with others.
#'
#' Supported geography arguments:
#'
#' ```{r geos, echo=FALSE, results="asis"}
#' code_names = names(geo)
#' cens_names = unlist(geo)
#' paren_labels = dplyr::if_else(
#' cens_names == code_names, "",
#' stringr::str_c(" (", str_to_title(cens_names), ")")
#' )
#' cat(stringr::str_glue("* `{code_names}`{paren_labels}"), sep="\n")
#' ```
#'
#' @param geo The geographic level to return. One of the machine-readable or
#' human-readable names listed in the "Details" section. Will return all
#' matching geographies of this level, as filtered by the further arguments to
#' `...`. For example, setting `geo="tract"` is equivalent to setting
#' `tract="all"`.
#' @param ... Geographies to return, as supported by the Census API. Order
#' matters here---the first argument will be the geographic level to return
#' (i.e., it corresponds to the `geo` argument) and additional arguments will
#' filter the results. Use `"all"`, `"*"`, `NA`, or `TRUE` to return all units
#' of a particular geography. See the examples for details.
#' @param check If `TRUE`, validate the provided geographies against the
#' available geographies from the relevant Census API. Requires the `api` and
#' `year` arguments to be specified.
#' @param api A Census API programmatic name such as `"acs/acs5"`.
#' @param year The year for the data
#'
#' @returns A list with two elements, `region` and `regionin`, which together
#' specify a valid Census API geography argument.
#'
#' @examples
#' cens_geo(state="WA")
#' cens_geo("county", state="WA") # equivalent to `cens_geo(county="all", state="WA")`
#' cens_geo(county="King", state="Wash")
#' cens_geo(zcta="02138", check=FALSE)
#' cens_geo(zcta=NA, state="WA", check=FALSE)
#' cens_geo("zcta", state="WA", check=FALSE)
#' cens_geo(cd="09", state="WA", check=FALSE)
#' cens_geo("county_part", state="WA", cd="09", check=FALSE)
#'
#' @export
cens_geo <- function(geo=NULL, ..., check=TRUE, api="acs/acs5", year=2019) {
geo_in = list(...)
geo_in = lapply(geo_in, regularize_wild)
names(geo_in) = vapply(names(geo_in), match_geo, character(1))

# parse state and county
has_state = FALSE
if ("state" %in% names(geo_in)) {
has_state = TRUE
geo_in$state = fips_state(geo_in$state)
}
idx_cty = which(names(geo_in) == "county" | names(geo_in) == "county (or part)")
if (length(idx_cty) > 0 && !has_state)
cli_abort("{.arg state} must be provided if {.arg county} is specified.")
for (i in idx_cty) {
geo_in[[i]] = fips_county(geo_in[[i]], geo_in$state)
}

# find the 'for' variable
if (is.null(geo)) {
if (length(geo_in) == 0) cli_abort("Must provide at least one geographic level.")
geo_for = match_geo(names(geo_in)[[1]])
geo_for_val = geo_in[[1]]
geo_in = geo_in[-1] # remove first from list
} else {
if (!is.character(geo) && length(geo) == 1)
cli_abort("{.arg geo} should be a string.")
geo_for = match_geo(geo)
geo_for_val = "*"
}

if (isTRUE(check)) {
# match geographies to available
d_geo = as_tibble(censusapi::listCensusMetadata(name=api, vintage=year, type="geographies"))
idx_for = which(geo_for == d_geo$name)
if (length(idx_for) == 0) cli_abort("Geography level {.val {geo_for}} not found.")
matched = 0L

req_test = paste0(names(geo_in), collapse="|")
# look for perfect 'requires' match
req = vapply(d_geo$requires, paste0, character(1), collapse="|")
idx_in = which(req_test == req)
idx_both = intersect(idx_for, idx_in)
matched = length(idx_both)

# look for match without wildcards
if (matched == 0L) {
req_soft = character(nrow(d_geo))
for (i in seq_along(req_soft)) {
req_soft[i] = paste0(setdiff(d_geo$requires[[i]], d_geo$wildcard[[i]]), collapse="|")
}
idx_in = which(req_test == req_soft)
idx_both = intersect(idx_for, idx_in)
matched = length(idx_both)
if (matched == 1L) {
match_geo = d_geo[idx_both, ]
for (x in d_geo$wildcard[[idx_both]]) {
geo_in[[x]] = "*"
}
}
}

hyper = style_hyperlink("<https://www.census.gov/data/developers/data-sets.html>",
"https://www.census.gov/data/developers/data-sets.html")
if (matched == 0L) {
cli_abort(c("Geography combination {.val {c(geo_for, names(geo_in))}} not found.",
">"="Check {hyper} and make sure you are using a valid combination
for your survey and file."))
}
if (matched > 1L) {
cli_abort(c("Multiple geography combinations found for {.val {c(geo_for, names(geo_in))}}.",
">"="Check {hyper} and make sure you are using a valid combination
for your survey and file."))
}
}

if (length(geo_in) > 0) {
list(region = str_c(geo_for, ":", geo_for_val),
regionin = paste0(str_c(names(geo_in), ":", geo_in), collapse="+"))
} else {
list(region = str_c(geo_for, ":", geo_for_val),
regionin = NULL)
}
}

# Internal: match a geography level
match_geo <- function(g) {
geo_levels = unlist(geo)
match = dplyr::coalesce(pmatch(g, names(geo)),
pmatch(g, geo_levels))
if (is.na(match)) {
cli_abort("Geography level {.val {g}} not found.", .envir=parent.frame())
} else {
geo_levels[match]
}
}

regularize_wild <- function(x) {
if_else(x == "all" | x == "*" | isTRUE(x) | is.na(x), "*", as.character(x))
}

# Internal: match state to FIPS
fips_state <- function(state) {
if (is.numeric(state) && state > 0 && state < 1e2) { # already a fips code
state = str_pad(as.character(as.integer(state)), 2, pad="0")
} else {
state = str_to_lower(state)
}

match = dplyr::coalesce(pmatch(state, states$fips),
pmatch(state, str_to_lower(states$abbr)),
pmatch(state, str_to_lower(states$name)))

if (is.na(match)) {
cli_abort("No match found for {.val {state}}.", .envir=parent.frame())
} else {
states$fips[match]
}
}

# Internal: match county to FIPS
fips_county <- function(county, state_fips) {
if (is.numeric(county) && county > 0 && county < 1e5) { # already a fips code
county = str_pad(as.character(as.integer(county)), 3, pad="0")
} else {
county = str_to_lower(county)
}

idx_st = which(counties$state == state_fips)
names = str_to_lower(counties$name[idx_st])
fips = counties$county[idx_st]

match = dplyr::coalesce(pmatch(county, fips),
pmatch(county, names))

if (is.na(match)) {
# cli_abort("No match found for {.val {county}}.", .envir=parent.frame())
cli_abort("No match found for {.val {county}}.")
} else {
fips[match]
}
}

Loading

0 comments on commit ce97c80

Please sign in to comment.