-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcheck.R
137 lines (119 loc) · 4.92 KB
/
check.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#' Check for Problems with the Alt Text for Images on a Web Page
#'
#' @description Infer whether an image's alt tag is missing or could be improved.
#'
#' @param attributes_df A data.frame/tibble with image attributes, produced by
#' \code{\link{alt_get}}.
#' @param min_char A numeric value. Alt text shorter than this is flagged.
#' @param max_char A numeric value. Alt text longer than this is flagged.
#' @param file_ext A character string. A regular expression of image file
#' extensions.
#' @param redundant_pattern A character string. A regular expression of
#' 'redundant' phrases in alt text.
#'
#' @return A tibble object with classes \code{tbl_df}, \code{tbl} and
#' \code{data.frame}. In addition to columns provided by
#' \code{\link{alt_get}}, also returns:
#' \itemize{
#' \item \code{alt_exists} "Exists", "Missing" or intentionally "Empty".
#' \item \code{nchar_count} Integer. Number of characters.
#' \item \code{nchar_assess} "Short", "Long" or "OK", depending on inputs
#' to \code{min_char} and \code{max_char}.
#' \item \code{file_ext} Logical. Does it look like the alt text might
#' just be a filename (e.g. ends with ".jpg")?
#' \item \code{self_evident} Logical. Is a redundant phrase used in the
#' alt text (e.g. "a picture of")?
#' \item \code{terminal_punct} Logical. Does the alt text end with
#' terminal punctuation, like a period, to allow a screen reader to
#' parse it as a sentence?
#' \item \code{spellcheck} Character. Words to check for misspelling.
#' These could be misread by a screenreader.
#' \item \code{not_basic} Character list column. Words that match to
#' the 850 words of Charles Kay Ogden's Basic English.
#' }
#'
#' @importFrom rlang .data
#'
#' @export
#'
#' @examples \dontrun{
#' url <- "https://alphagov.github.io/accessibility-tool-audit/test-cases.html#images"
#' attr_df <- alt_get(url)
#' alt_check(attr_df)
#' }
alt_check <- function(
attributes_df, max_char = 125, min_char = 20,
file_ext = ".jpg$|.jpeg$|.png$|.svg$|.gif$",
redundant_pattern = "image|picture|photo|graph|plot|diagram") {
# Stop if not a data frame
if (!all(class(attributes_df) %in% c("tbl_df", "tbl", "data.frame"))) {
stop("Input to the attributes_df argument must be of class tibble or data.frame.")
} else if (!is.numeric(min_char)|!is.numeric(max_char)) {
stop("Input to the min_char and max_char arguments must be a numeric value.")
} else if (!is.character(redundant_pattern)) {
stop("Input to the redundant_pattern argument must be a character string.")
}
# Check for alt issues
alt_df <- dplyr::mutate(
attributes_df,
# Check for empty/missing alt text
alt_exists = dplyr::case_when(
is.na(.data$alt) ~ "Missing",
.data$alt == "" ~ "Empty",
stringr::str_detect(.data$alt, "^[[:space:]]+$") ~ "Empty",
TRUE ~ "Exists"
),
# Count characters in alt text
nchar_count = ifelse(
.data$alt_exists == "Exists",
nchar(.data$alt),
NA_integer_
),
# Assess length on scale
nchar_assess = dplyr::case_when(
.data$alt_exists %in% c("Missing", "Empty") ~ NA_character_,
nchar(.data$alt) < min_char ~ "Short",
nchar(.data$alt) > max_char ~ "Long",
TRUE ~ "OK"
),
# Check for filename in alt (by virtue of an image file extension)
file_ext = dplyr::case_when(
.data$alt_exists %in% c("Missing", "Empty") | is.na(.data$alt_exists) ~ NA,
stringr::str_detect(tolower(.data$alt), tolower(file_ext)) ~ TRUE,
TRUE ~ FALSE
),
# Check for self-evident phrases
self_evident = dplyr::case_when(
.data$alt_exists %in% c("Missing", "Empty") | is.na(.data$alt_exists) ~ NA,
stringr::str_detect(tolower(.data$alt), tolower(redundant_pattern)) ~ TRUE,
TRUE ~ FALSE
),
# Check for terminal punctuation
terminal_punct = dplyr::case_when(
.data$alt_exists %in% c("Missing", "Empty") | is.na(.data$alt_exists) ~ NA,
stringr::str_detect(.data$alt, "\\.$|\\!$|\\?$") ~ TRUE,
TRUE ~ FALSE
),
# Highlight possible incorrect spellings
spellcheck = hunspell::hunspell(.data$alt),
# tokens
alt_tokens = stringr::str_split(tolower(.data$alt), stringr::boundary("word")),
# detect
detect_basic = purrr::map(
.data$alt_tokens,
stringr::str_detect,
#stringr::str_c(cko_basic_words, collapse = "|")
paste0(paste0("^", paste(tolower(cko_basic_words), collapse = "$|^"), "$"))
),
# match
not_basic = purrr::map2(
.x = .data$alt_tokens,
.y = .data$detect_basic,
.f = ~.x[.y == FALSE]
)
)
alt_df <- tibble::as_tibble(alt_df) # ensure tibble output
alt_df <- dplyr::select(alt_df, -.data$alt_tokens, -.data$detect_basic)
# Return
return(alt_df)
}