From ac74f985a5522923c547377b96e0ffd1fdb6cd1f Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 10 Mar 2023 07:19:53 -0600 Subject: [PATCH] Adjust multi-column plots (#1354) Co-authored-by: mine-cetinkaya-rundel --- EDA.qmd | 3 +- base-R.qmd | 5 ++ communication.qmd | 116 ++++++++++++++++++++++--------------- data-visualize.qmd | 39 ++++++------- factors.qmd | 3 +- layers.qmd | 138 ++++++++++++++++++++++++--------------------- missing-values.qmd | 1 - 7 files changed, 172 insertions(+), 133 deletions(-) diff --git a/EDA.qmd b/EDA.qmd index 0945124cc..6635bbed8 100644 --- a/EDA.qmd +++ b/EDA.qmd @@ -382,7 +382,6 @@ But maybe that's because frequency polygons are a little hard to interpret - the A visually simpler plot for exploring this relationship is using side-by-side boxplots. ```{r} -#| fig-height: 3 #| fig-alt: > #| Side-by-side boxplots of prices of diamonds by cut. The distribution of #| prices is right skewed for each cut (Fair, Good, Very Good, Premium, and @@ -417,7 +416,6 @@ ggplot(mpg, aes(x = class, y = hwy)) + To make the trend easier to see, we can reorder `class` based on the median value of `hwy`: ```{r} -#| fig-height: 3 #| fig-alt: > #| Side-by-side boxplots of highway mileages of cars by class. Classes are #| on the x-axis and ordered by increasing median highway mileage (pickup, @@ -567,6 +565,7 @@ You will need to install the hexbin package to use `geom_hex()`. ```{r} #| layout-ncol: 2 +#| fig-width: 3 #| fig-alt: > #| Plot 1: A binned density plot of price vs. carat. Plot 2: A hexagonal bin #| plot of price vs. carat. Both plots show that the highest density of diff --git a/base-R.qmd b/base-R.qmd index cd625a6c3..8331b1f0a 100644 --- a/base-R.qmd +++ b/base-R.qmd @@ -518,9 +518,14 @@ Here's a quick example from the diamonds dataset: ```{r} #| dev: png +#| fig-width: 3 +#| fig-asp: 1 #| layout-ncol: 2 +# Left hist(diamonds$carat) + +# Right plot(diamonds$carat, diamonds$price) ``` diff --git a/communication.qmd b/communication.qmd index 5e301e865..17339296f 100644 --- a/communication.qmd +++ b/communication.qmd @@ -383,22 +383,23 @@ Note that `breaks` is in the original scale of the data. ```{r} #| layout-ncol: 2 +#| fig-width: 4 #| fig-alt: > #| Two side-by-side box plots of price versus cut of diamonds. The outliers -#| are transparent. On both plots the y-axis labels are formatted as dollars. -#| The y-axis labels on the plot start at $0 and go to $15,000, increasing -#| by $5,000. The y-axis labels on the right plot start at $1K and go to +#| are transparent. On both plots the x-axis labels are formatted as dollars. +#| The x-axis labels on the plot start at $0 and go to $15,000, increasing +#| by $5,000. The x-axis labels on the right plot start at $1K and go to #| $19K, increasing by $6K. # Left -ggplot(diamonds, aes(x = cut, y = price)) + +ggplot(diamonds, aes(x = price, y = cut)) + geom_boxplot(alpha = 0.05) + - scale_y_continuous(labels = scales::label_dollar()) + scale_x_continuous(labels = scales::label_dollar()) # Right -ggplot(diamonds, aes(x = cut, y = price)) + +ggplot(diamonds, aes(x = price, y = cut)) + geom_boxplot(alpha = 0.05) + - scale_y_continuous( + scale_x_continuous( labels = scales::label_dollar(scale = 1/1000, suffix = "K"), breaks = seq(1000, 19000, by = 6000) ) @@ -454,19 +455,22 @@ The theme setting `legend.position` controls where the legend is drawn: ```{r} #| layout-ncol: 2 #| fig-width: 4 -#| fig-height: 2 #| fig-alt: > #| Four scatterplots of highway fuel efficiency versus engine size of cars #| where points are colored based on class of car. Clockwise, the legend -#| is placed on the left, top, bottom, and right of the plot. +#| is placed on the right, left, top, and bottom of the plot. base <- ggplot(mpg, aes(x = displ, y = hwy)) + geom_point(aes(color = class)) -base + theme(legend.position = "left") -base + theme(legend.position = "top") -base + theme(legend.position = "bottom") base + theme(legend.position = "right") # the default +base + theme(legend.position = "left") +base + + theme(legend.position = "top") + + guides(col = guide_legend(nrow = 3)) +base + + theme(legend.position = "bottom") + + guides(col = guide_legend(nrow = 3)) ``` If your plot is short and wide, place the legend at the legend at the top or bottom, and if it's tall and narrow, place the legend at the left or right. @@ -505,8 +509,7 @@ For example, it's easier to see the precise relationship between `carat` and `pr ```{r} #| fig-align: default #| layout-ncol: 2 -#| fig-width: 4 -#| fig-height: 3 +#| fig-width: 3 #| fig-alt: > #| Two plots of price versus carat of diamonds. Data binned and the color of #| the rectangles representing each bin based on the number of points that @@ -548,8 +551,7 @@ The two plots below look similar, but there is enough difference in the shades o ```{r} #| fig-align: default #| layout-ncol: 2 -#| fig-width: 4 -#| fig-height: 3 +#| fig-width: 3 #| fig-alt: > #| Two scatterplots of highway mileage versus engine size where points are #| colored by drive type. The plot on the left uses the default @@ -630,8 +632,8 @@ These scales are available as continuous (`c`), discrete (`d`), and binned (`b`) ```{r} #| fig-align: default #| layout-ncol: 2 -#| fig-width: 4 -#| fig-asp: 1 +#| fig-width: 3 +#| fig-asp: 0.75 #| fig-alt: > #| Three hex plots where the color of the hexes show the number of observations #| that fall into that hex bin. The first plot uses the default, continuous @@ -646,19 +648,19 @@ df <- tibble( ggplot(df, aes(x, y)) + geom_hex() + coord_fixed() + - labs(title = "Default, continuous") + labs(title = "Default, continuous", x = NULL, y = NULL) ggplot(df, aes(x, y)) + geom_hex() + coord_fixed() + scale_fill_viridis_c() + - labs(title = "Viridis, continuous") + labs(title = "Viridis, continuous", x = NULL, y = NULL) ggplot(df, aes(x, y)) + geom_hex() + coord_fixed() + scale_fill_viridis_b() + - labs(title = "Viridis, binned") + labs(title = "Viridis, binned", x = NULL, y = NULL) ``` Note that all color scales come in two varieties: `scale_color_*()` and `scale_fill_*()` for the `color` and `fill` aesthetics respectively (the color scales are available in both UK and US spellings). @@ -671,38 +673,59 @@ There are three ways to control the plot limits: 2. Setting the limits in each scale. 3. Setting `xlim` and `ylim` in `coord_cartesian()`. -To zoom in on a region of the plot, it's generally best to use `coord_cartesian()`. -Compare the following two plots: +We'll demonstrate these options in a series of plots. +The plot on the left shows the relationship between engine size and fuel efficiency, colored by type of drive train. +The plot on the right shows the same variables, but subsets the data that are plotted. +Subsetting the data has affected the x and y scales as well as the smooth curve. ```{r} #| layout-ncol: 2 #| fig-width: 4 -#| fig-height: 3 #| message: false # Left ggplot(mpg, aes(x = displ, y = hwy)) + - geom_point(aes(color = class)) + - geom_smooth() + - coord_cartesian(xlim = c(5, 6), ylim = c(10, 30)) + geom_point(aes(color = drv)) + + geom_smooth() # Right mpg |> - filter(displ >= 5, displ <= 6, hwy >= 10, hwy <= 30) |> + filter(displ >= 5 & displ <= 6 & hwy >= 10 & hwy <= 25) |> ggplot(aes(x = displ, y = hwy)) + - geom_point(aes(color = class)) + + geom_point(aes(color = drv)) + geom_smooth() ``` -You can also set the `limits` on individual scales. -Reducing the limits is basically equivalent to subsetting the data. -It is generally more useful if you want to *expand* the limits, for example, to match scales across different plots. +Let's compare these to the two plots below where the plot on the left sets the `limits` on individual scales and the plot on the right sets them in `coord_cartesian()`. +We can see that reducing the limits is equivalent to subsetting the data. +Therefore, to zoom in on a region of the plot, it's generally best to use `coord_cartesian()`. + +```{r} +#| layout-ncol: 2 +#| fig-width: 4 +#| message: false +#| warning: false + +# Left +ggplot(mpg, aes(x = displ, y = hwy)) + + geom_point(aes(color = drv)) + + geom_smooth() + + scale_x_continuous(limits = c(5, 6)) + + scale_y_continuous(limits = c(10, 25)) + +# Right +ggplot(mpg, aes(x = displ, y = hwy)) + + geom_point(aes(color = drv)) + + geom_smooth() + + coord_cartesian(xlim = c(5, 6), ylim = c(10, 25)) +``` + +On the other hand, setting the `limits` on individual scales is generally more useful if you want to *expand* the limits, e.g., to match scales across different plots. For example, if we extract two classes of cars and plot them separately, it's difficult to compare the plots because all three scales (the x-axis, the y-axis, and the color aesthetic) have different ranges. ```{r} #| layout-ncol: 2 #| fig-width: 4 -#| fig-height: 3 suv <- mpg |> filter(class == "suv") compact <- mpg |> filter(class == "compact") @@ -721,7 +744,6 @@ One way to overcome this problem is to share scales across multiple plots, train ```{r} #| layout-ncol: 2 #| fig-width: 4 -#| fig-height: 3 x_scale <- scale_x_continuous(limits = range(mpg$displ)) y_scale <- scale_y_continuous(limits = range(mpg$hwy)) @@ -773,14 +795,11 @@ In this particular case, you could have simply used faceting, but this technique d. Adding informative plot labels. e. Placing breaks every 4 years (this is trickier than it seems!). -4. Use `override.aes` to make the legend on the following plot easier to see. +4. First, create the following plot. + Then, modify the code using `override.aes` to make the legend easier to see. ```{r} - #| fig-format: "png" - #| out-width: "50%" - #| fig-alt: > - #| Scatterplot of price versus carat of diamonds. The points are colored - #| by cut of the diamonds and they're very transparent. + #| fig-show: hide ggplot(diamonds, aes(x = carat, y = price)) + geom_point(aes(color = cut), alpha = 1/20) @@ -845,13 +864,13 @@ A few other helpful `theme()` components are used to change the placement for fo #| economy' with the caption pointing to the source of the data, fueleconomy.gov. #| The caption and title are left justified, the legend is inside of the plot #| with a black border. -#| + ggplot(mpg, aes(x = displ, y = hwy, color = drv)) + geom_point() + labs( title = "Larger engine sizes tend to have lower fuel economy", caption = "Source: https://fueleconomy.gov." - ) + + ) + theme( legend.position = c(0.6, 0.7), legend.direction = "horizontal", @@ -860,7 +879,7 @@ ggplot(mpg, aes(x = displ, y = hwy, color = drv)) + plot.title.position = "plot", plot.caption.position = "plot", plot.caption = element_text(hjust = 0) - ) + ) ``` For an overview of all `theme()` components, see help with `?theme`. @@ -883,6 +902,8 @@ Note that you first need to create the plots and save them as objects (in the fo Then, you place them next to each other with `+`. ```{r} +#| fig-width: 6 +#| fig-asp: 0.5 #| fig-alt: > #| Two plots (a scatterplot of highway mileage versus engine size and a #| side-by-side boxplots of highway mileage versus drive train) placed next @@ -904,6 +925,8 @@ You can also create complex plot layouts with patchwork. In the following, `|` places the `p1` and `p3` next to each other and `/` moves `p2` to the next line. ```{r} +#| fig-width: 6 +#| fig-asp: 0.8 #| fig-alt: > #| Three plots laid out such that first and third plot are next to each other #| and the second plot stretched beneath them. The first plot is a @@ -928,7 +951,8 @@ Finally, we have also customized the heights of the various components of our pa Patchwork divides up the area you have allotted for your plot using this scale and places the components accordingly. ```{r} -#| fig-width: 10 +#| fig-width: 8 +#| fig-asp: 1 #| fig-alt: > #| Five plots laid out such that first two plots are next to each other. Plots #| three and four are underneath them. And the fifth plot stretches under them. @@ -980,7 +1004,7 @@ If you'd like to learn more about combining and layout out multiple plots with p Can you explain why this happens? ```{r} - #| results: hide + #| fig-show: hide p1 <- ggplot(mpg, aes(x = displ, y = hwy)) + geom_point() + @@ -998,6 +1022,8 @@ If you'd like to learn more about combining and layout out multiple plots with p 2. Using the three plots from the previous exercise, recreate the following patchwork. ```{r} + #| fig-width: 7 + #| fig-asp: 0.8 #| echo: false #| fig-alt: > #| Three plots: Plot 1 is a scatterplot of highway mileage versus engine size. diff --git a/data-visualize.qmd b/data-visualize.qmd index f556364cb..f1f2a2fa5 100644 --- a/data-visualize.qmd +++ b/data-visualize.qmd @@ -537,7 +537,6 @@ One commonly used visualization for distributions of continuous variables is a h ```{r} #| warning: false -#| layout-ncol: 2 #| fig-alt: > #| A histogram of body masses of penguins. The distribution is unimodal #| and right skewed, ranging between approximately 2500 to 6500 grams. @@ -557,18 +556,16 @@ A binwidth of 200 provides a sensible balance. ```{r} #| warning: false -#| layout-ncol: 3 +#| layout-ncol: 2 +#| fig-width: 3 #| fig-alt: > -#| Three histograms of body masses of penguins, one with binwidth of 20 -#| (right), one with binwidth of 200 (center), and one with binwidth of -#| 2000 (left). The histogram with binwidth of 20 shows lots of ups and -#| downs in the heights of the bins, creating a jagged outline. The histogram -#| with binwidth of 2000 shows only three bins. +#| Two histograms of body masses of penguins, one with binwidth of 20 +#| (left) and one with binwidth of 2000 (right). The histogram with binwidth +#| of 20 shows lots of ups and downs in the heights of the bins, creating a +#| jagged outline. The histogram with binwidth of 2000 shows only three bins. ggplot(penguins, aes(x = body_mass_g)) + geom_histogram(binwidth = 20) -ggplot(penguins, aes(x = body_mass_g)) + - geom_histogram(binwidth = 200) ggplot(penguins, aes(x = body_mass_g)) + geom_histogram(binwidth = 2000) ``` @@ -702,25 +699,29 @@ Note the terminology we have used here: ### Two categorical variables We can use stacked bar plots to visualize the relationship between two categorical variables. +For example, the following two stacked bar plots both display the relationship between `island` and `species`, or specifically, visualizing the distribution of `species` within each island. -The two stacked bar plots below both display the relationship between `island` and `species`, or specifically, visualizing the distribution of `species` within each island. -The plot on the left shows the frequencies of each species of penguins on each island and the plot on the right shows the relative frequencies (proportions) of each species within each island (despite the incorrectly labeled y-axis that says "count"). +The first plot shows the frequencies of each species of penguins on each island and the plot on the right shows the relative frequencies (proportions) of each species within each island (despite the incorrectly labeled y-axis that says "count"). The plot of frequencies show that there are equal numbers of Adelies on each island. But we don't have a good sense of the percentage balance within each island. In the proportions plot, we've lost our notion of total penguins, but we've gained the advantage of "breakdown by island". -The relative frequency plot, created by setting `position = "fill"` in the geom, is more useful for comparing species distributions across islands since it's not affected by the unequal numbers of penguins across the islands. -Based on the plot on the left, we can see that Gentoo penguins all live on Biscoe island and make up roughly 75% of the penguins on that island, Chinstrap all live on Dream island and make up roughly 50% of the penguins on that island, and Adelie live on all three islands and make up all of the penguins on Torgersen. - ```{r} -#| layout-ncol: 2 #| fig-alt: > -#| Bar plots of penguin species by island (Biscoe, Dream, and Torgersen). -#| On the right, frequencies of species are shown. On the left, relative -#| frequencies of species are shown. - +#| Bar plots of penguin species by island (Biscoe, Dream, and Torgersen) ggplot(penguins, aes(x = island, fill = species)) + geom_bar() +``` + +The second plot is a relative frequency plot, created by setting `position = "fill"` in the geom is more useful for comparing species distributions across islands since it's not affected by the unequal numbers of penguins across the islands. +Using this plot we can see that Gentoo penguins all live on Biscoe island and make up roughly 75% of the penguins on that island, Chinstrap all live on Dream island and make up roughly 50% of the penguins on that island, and Adelie live on all three islands and make up all of the penguins on Torgersen. + +```{r} +#| fig-alt: > +#| Bar plots of penguin species by island (Biscoe, Dream, and Torgersen) +#| the bars are scaled to the same height, making it a relative frequencies +#| plot + ggplot(penguins, aes(x = island, fill = species)) + geom_bar(position = "fill") ``` diff --git a/factors.qmd b/factors.qmd index 3ed4620a3..b54db76e1 100644 --- a/factors.qmd +++ b/factors.qmd @@ -253,8 +253,7 @@ This makes the plot easier to read because the colors of the line at the far rig ```{r} #| layout-ncol: 2 -#| fig-width: 4 -#| fig-height: 2 +#| fig-width: 3 #| fig-alt: > #| A line plot with age on the x-axis and proportion on the y-axis. #| There is one line for each category of marital status: no answer, diff --git a/layers.qmd b/layers.qmd index 4dae5adbb..5b86cd590 100644 --- a/layers.qmd +++ b/layers.qmd @@ -62,7 +62,6 @@ We can do this with a scatterplot where the numerical variables are mapped to th ```{r} #| layout-ncol: 2 #| fig-width: 4 -#| fig-height: 2 #| fig-alt: > #| Two scatterplots next to each other, both visualizing highway fuel #| efficiency versus engine size of cars and showing a negative @@ -97,7 +96,6 @@ Similarly, we can map `class` to `size` or `alpha` aesthetics as well, which con ```{r} #| layout-ncol: 2 #| fig-width: 4 -#| fig-height: 2 #| fig-alt: > #| Two scatterplots next to each other, both visualizing highway fuel #| efficiency versus engine size of cars and showing a negative @@ -226,8 +224,7 @@ How are these two plots similar? #| echo: false #| message: false #| layout-ncol: 2 -#| fig-width: 4 -#| fig-height: 2 +#| fig-width: 3 #| fig-alt: > #| There are two plots. The plot on the left is a scatterplot of highway #| fuel efficiency versus engine size of cars and the plot on the right @@ -248,7 +245,7 @@ Each plot uses a different geometric object, geom, to represent the data. The plot on the left uses the point geom, and the plot on the right uses the smooth geom, a smooth line fitted to the data. To change the geom in your plot, change the geom function that you add to `ggplot()`. -For instance, to make the plots above, you can use this code: +For instance, to make the plots above, you can use the following code: ```{r} #| fig-show: hide @@ -271,6 +268,8 @@ On the other hand, you *could* set the linetype of a line. ```{r} #| message: false +#| layout-ncol: 2 +#| fig-width: 3 #| fig-alt: > #| Two plots of highway fuel efficiency versus engine size of cars. #| The data are represented with smooth curves. On the left, three @@ -279,8 +278,11 @@ On the other hand, you *could* set the linetype of a line. #| dashed) for each type of drive train. In both plots, confidence #| intervals around the smooth curves are also displayed. +# Left ggplot(mpg, aes(x = displ, y = hwy, shape = drv)) + geom_smooth() + +# Right ggplot(mpg, aes(x = displ, y = hwy, linetype = drv)) + geom_smooth() ``` @@ -315,7 +317,7 @@ It is convenient to rely on this feature because the `group` aesthetic by itself ```{r} #| layout-ncol: 3 #| fig-width: 3 -#| fig-height: 3 +#| fig-asp: 1 #| message: false #| fig-alt: > #| Three plots, each with highway fuel efficiency on the y-axis and engine @@ -327,12 +329,15 @@ It is convenient to rely on this feature because the `group` aesthetic by itself #| color maps to which level. Confidence intervals around the smooth curves #| are also displayed. +# Left ggplot(mpg, aes(x = displ, y = hwy)) + geom_smooth() - + +# Middle ggplot(mpg, aes(x = displ, y = hwy)) + geom_smooth(aes(group = drv)) - + +# Right ggplot(mpg, aes(x = displ, y = hwy)) + geom_smooth(aes(color = drv), show.legend = FALSE) ``` @@ -384,12 +389,13 @@ You can completely transform the look of your plot by changing its geom, and dif For example, the histogram and density plot below reveal that the distribution of highway mileage is bimodal and right skewed while the boxplot reveals two potential outliers. ```{r} -#| fig-asp: 0.33 +#| layout-ncol: 3 +#| fig-width: 3 #| fig-alt: > #| Three plots: histogram, density plot, and box plot of highway #| mileage. -# Top +# Left ggplot(mpg, aes(x = hwy)) + geom_histogram(binwidth = 2) @@ -397,7 +403,7 @@ ggplot(mpg, aes(x = hwy)) + ggplot(mpg, aes(x = hwy)) + geom_density() -# Bottom +# Right ggplot(mpg, aes(x = hwy)) + geom_boxplot() ``` @@ -436,6 +442,7 @@ To learn more about any single geom, use the help (e.g. `?geom_smooth`). ```{r} #| fig-show: hide + #| message: false ggplot(mpg, aes(x = displ, y = hwy)) + geom_smooth(aes(color = drv), show.legend = FALSE) @@ -454,8 +461,7 @@ To learn more about any single geom, use the help (e.g. `?geom_smooth`). #| echo: false #| message: false #| layout-ncol: 2 - #| fig-width: 4 - #| fig-height: 2 + #| fig-width: 3 #| fig-alt: > #| There are six scatterplots in this figure, arranged in a 3x2 grid. #| In all plots highway fuel efficiency of cars are on the y-axis and @@ -590,11 +596,12 @@ ggplot(mpg, aes(x = displ, y = hwy)) + What other options control the layout of the individual panels? Why doesn't `facet_grid()` have `nrow` and `ncol` arguments? -6. Which of the following two plots makes it easier to compare engine size (`displ`) across cars with different drive trains? +6. Which of the following plots makes it easier to compare engine size (`displ`) across cars with different drive trains? What does this say about when to place a faceting variable across rows or columns? ```{r} #| fig-show: hide + #| message: false ggplot(mpg, aes(x = displ)) + geom_histogram() + @@ -762,35 +769,37 @@ Each stat is a function, so you can get help in the usual way, e.g. `?stat_bin`. ## Position adjustments There's one more piece of magic associated with bar charts. -You can color a bar chart using either the `color` aesthetic, or, more usefully, `fill`: +You can color a bar chart using either the `color` aesthetic, or, more usefully, the `fill` aesthetic: ```{r} #| layout-ncol: 2 -#| fig-width: 5.5 -#| fig-height: 2 +#| fig-width: 4 #| fig-alt: > -#| Two bar charts of cut of diamonds. In the first plot, the bars have colored -#| borders. In the second plot, they're filled with colors. Heights of the -#| bars correspond to the number of diamonds in each cut category. +#| Two bar charts of drive types of cars. In the first plot, the bars have +#| colored borders. In the second plot, they're filled with colors. Heights +#| of the bars correspond to the number of cars in each cut category. -ggplot(diamonds, aes(x = cut, color = cut)) + +# Left +ggplot(mpg, aes(x = drv, color = drv)) + geom_bar() -ggplot(diamonds, aes(x = cut, fill = cut)) + + +# Right +ggplot(mpg, aes(x = drv, fill = drv)) + geom_bar() ``` -Note what happens if you map the fill aesthetic to another variable, like `clarity`: the bars are automatically stacked. -Each colored rectangle represents a combination of `cut` and `clarity`. +Note what happens if you map the fill aesthetic to another variable, like `class`: the bars are automatically stacked. +Each colored rectangle represents a combination of `drv` and `class`. ```{r} #| fig-alt: > -#| Segmented bar chart of cut of diamonds, where each bar is filled with -#| colors for the levels of clarity. Heights of the bars correspond to the -#| number of diamonds in each cut category, and heights of the colored -#| segments are proportional to the number of diamonds with a given clarity -#| level within a given cut level. +#| Segmented bar chart of drive types of cars, where each bar is filled with +#| colors for the classes of cars. Heights of the bars correspond to the +#| number of cars in each drive category, and heights of the colored +#| segments are proportional to the number of cars with a given class +#| level within a given drive type level. -ggplot(diamonds, aes(x = cut, fill = clarity)) + +ggplot(mpg, aes(x = drv, fill = class)) + geom_bar() ``` @@ -802,24 +811,23 @@ If you don't want a stacked bar chart, you can use one of three other options: ` To see that overlapping we either need to make the bars slightly transparent by setting `alpha` to a small value, or completely transparent by setting `fill = NA`. ```{r} - #| layout-ncol: 3 + #| layout-ncol: 2 #| fig-width: 4 - #| fig-height: 2 #| fig-alt: > - #| Three segmented bar charts of cut of diamonds, where each bar is filled - #| with colors for the levels of clarity. Heights of the bars correspond - #| to the number of diamonds in each cut category, and heights of the - #| colored segments are proportional to the number of diamonds with a - #| given clarity level within a given cut level. However the segments - #| overlap. In the first plot the segments are filled with opaque colors, - #| in the second with transparent colors, and in the third only - #| outlined with colors. - - ggplot(diamonds, aes(x = cut, fill = clarity)) + - geom_bar(position = "identity") - ggplot(diamonds, aes(x = cut, fill = clarity)) + + #| Segmented bar chart of drive types of cars, where each bar is filled with + #| colors for the classes of cars. Heights of the bars correspond to the + #| number of cars in each drive category, and heights of the colored + #| segments are proportional to the number of cars with a given class + #| level within a given drive type level. However the segments overlap. In + #| the first plot the bars are filled with transparent colors + #| and in the second plot they are only outlined with color. + + # Left + ggplot(mpg, aes(x = drv, fill = class)) + geom_bar(alpha = 1/5, position = "identity") - ggplot(diamonds, aes(x = cut, color = clarity)) + + + # Right + ggplot(mpg, aes(x = drv, color = class)) + geom_bar(fill = NA, position = "identity") ``` @@ -833,22 +841,25 @@ If you don't want a stacked bar chart, you can use one of three other options: ` ```{r} #| layout-ncol: 2 - #| fig-width: 5.5 - #| fig-height: 2 + #| fig-width: 4 #| fig-alt: > - #| On the left, segmented bar chart of cut of diamonds, where each bar is filled with - #| colors for the levels of clarity. Height of each bar is 1 and heights - #| of the colored segments are proportional to the proportion of diamonds - #| with a given clarity level within a given cut level. - #| On the right, dodged bar chart of cut of diamonds. Dodged bars are grouped by levels - #| of cut (fair, good, very good, premium, and ideal). In each group there - #| are eight bars, one for each level of clarity, and filled with a - #| different color for each level. Heights of these bars represent the - #| number of diamonds with a given level of cut and clarity. - - ggplot(diamonds, aes(x = cut, fill = clarity)) + + #| On the left, segmented bar chart of drive types of cars, where each bar is + #| filled with colors for the levels of class. Height of each bar is 1 and + #| heights of the colored segments represent the proportions of cars + #| with a given class level within a given drive type. + #| On the right, dodged bar chart of drive types of cars. Dodged bars are + #| grouped by levels of drive type. Within each group bars represent each + #| level of class. Some classes are represented within some drive types and + #| not represented in others, resulting in unequal number of bars within each + #| group. Heights of these bars represent the number of cars with a given + #| level of drive type and class. + + # Left + ggplot(mpg, aes(x = drv, fill = class)) + geom_bar(position = "fill") - ggplot(diamonds, aes(x = cut, fill = clarity)) + + + # Right + ggplot(mpg, aes(x = drv, fill = class)) + geom_bar(position = "dodge") ``` @@ -932,8 +943,7 @@ There are two other coordinate systems that are occasionally helpful. ```{r} #| layout-ncol: 2 - #| fig-width: 4 - #| fig-height: 2 + #| fig-width: 3 #| message: false #| fig-alt: > #| Two maps of the boundaries of New Zealand. In the first plot the aspect @@ -954,15 +964,15 @@ There are two other coordinate systems that are occasionally helpful. ```{r} #| layout-ncol: 2 - #| fig-width: 4 + #| fig-width: 3 #| fig-asp: 1 #| fig-alt: > - #| There are two plots. On the left is a bar chart of cut of diamonds, + #| There are two plots. On the left is a bar chart of clarity of diamonds, #| on the right is a Coxcomb chart of the same data. bar <- ggplot(data = diamonds) + geom_bar( - mapping = aes(x = cut, fill = cut), + mapping = aes(x = clarity, fill = clarity), show.legend = FALSE, width = 1 ) + diff --git a/missing-values.qmd b/missing-values.qmd index 55612604e..35142da2b 100644 --- a/missing-values.qmd +++ b/missing-values.qmd @@ -229,7 +229,6 @@ You can force them to display by supplying `drop = FALSE` to the appropriate dis ```{r} #| layout-ncol: 2 #| fig-width: 3 -#| fig-height: 2 #| fig-alt: > #| A bar chart with a single value on the x-axis, "no". #|