9  Data Visualisation with Grammar of Graphics

9.1 Plotting with R

  • with base R

  • ggplot2 package: ggplot() function

9.2 Grammar of graphics

The Grammar of Graphics is a structured way of thinking about and building data visualizations

Key Components of the Grammar of Graphics

  1. Data: the dataset you want to visualize.

  2. Aesthetics (aes): how variables are mapped to visual properties (x, y, color, size, shape).

  3. Geometries (geoms): the type of plot you draw (points, lines, bars, boxplots).

  4. Scales: control how data values map to aesthetics (e.g., continuous vs. categorical colors, log scales).

  5. Coordinate system: the space in which the data is drawn (Cartesian, polar, map projections).

  6. Facets: splitting data into subplots for comparison.

  7. Statistical transformations (stats): summaries or transformations (e.g., binning in histograms, smoothing in regression lines).

  8. Themes: non-data elements like fonts, backgrounds, grid lines.

9.3 Ingredients for plotting

  1. Data
data("mtcars")
head(mtcars)
                   mpg cyl disp  hp drat    wt  qsec vs am gear carb
Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
  1. Canvas to draw the plot
library(tidyverse)
ggplot()

9.4 Data + Aesthetics (aes)

ggplot(mtcars,
aes(x=wt, 
    y= mpg))

ggplot(mtcars,
aes(x=wt, 
    y= mpg,
    col=as_factor(cyl)))

9.5 Data + Aesthetics (aes) + Geometries (geoms)

ggplot(mtcars,
aes(x=wt, 
    y= mpg)) +
  geom_point()

ggplot(mtcars,
aes(x=wt, 
    y= mpg)) +
  geom_point()

ggplot(mtcars,
aes(x=wt, 
    y= mpg)) +
  geom_smooth()

ggplot(mtcars,
aes(x=wt, 
    y= mpg)) +
  geom_density_2d()

ggplot(mtcars,
aes(x=wt, 
    y= mpg)) +
  geom_density_2d() + 
  geom_point()

9.6 Add scale layer

ggplot(mtcars, aes(x = wt, y = mpg, col = as_factor(cyl))) +
  geom_point() +
  scale_color_brewer(palette = "Set1")

ggplot(mtcars, aes(x = wt, y = mpg, col = as_factor(cyl))) +
  geom_point() +
  scale_color_manual(values = c("red", "blue", "green"))

9.7 Add coord layer

ggplot(mtcars, aes(x = wt, y = mpg, col = as_factor(cyl))) +
  geom_point(size = 3) +
  geom_smooth(se = FALSE) +
  scale_color_brewer(palette = "Set1") +
  coord_cartesian(xlim = c(2, 5), ylim = c(10, 35))
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(mtcars, aes(x = wt, y = mpg, col = as_factor(cyl))) +
  geom_point(size = 3) +
  geom_smooth(se = FALSE) +
  scale_color_brewer(palette = "Set1") +
  coord_fixed(ratio = 3/10)
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(mtcars, aes(x = wt, y = mpg, col = as_factor(cyl))) +
  geom_point(size = 3) +
  geom_smooth(se = FALSE) +
  scale_color_brewer(palette = "Set1") +
  coord_flip()
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(mtcars, aes(x = wt, y = mpg, col = as_factor(cyl))) +
  geom_point(size = 3) +
  scale_color_brewer(palette = "Set1") +
  coord_polar()

9.8 Add facet layer

ggplot(mtcars, aes(x = wt, y = mpg, col = as_factor(cyl))) +
  geom_point(size = 3) +
  geom_smooth(se = FALSE) +
  facet_wrap(~cyl)
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

9.9 Add stat layer

ggplot(mtcars, aes(x = wt, y = mpg)) +
  geom_point() +
  stat_summary(fun = "mean", geom = "point", size = 5, shape = 18, col="red", alpha=0.5) +
  scale_color_brewer(palette = "Set1")

ggplot(mtcars, aes(x = wt, y = mpg)) +
  stat_summary(fun = "mean", geom = "point", size = 5, shape = 18, col="red", alpha=0.5) +
    geom_point() +
  scale_color_brewer(palette = "Set1")

ggplot(mtcars, aes(x = hp, y = mpg)) +
  geom_point() +
  stat_summary(fun = "mean", geom = "point", size = 5, shape = 18, col="green", alpha=0.5) +
  scale_color_brewer(palette = "Set1")

9.10 theme layer

ggplot(mtcars, aes(x = wt, y = mpg)) +
  stat_summary(fun = "mean", geom = "point", size = 5, shape = 18, col="red", alpha=0.5) +
    geom_point() +
  scale_color_brewer(palette = "Set1") +
  theme_bw()

ggplot(mtcars, aes(x = wt, y = mpg)) +
  stat_summary(fun = "mean", geom = "point", size = 5, shape = 18, col="red", alpha=0.5) +
    geom_point() +
  scale_color_brewer(palette = "Set1") +
  theme_dark()

9.11 Some examples with gapminder dataset

library(gapminder)
data(gapminder)

Visualize the relationship between life expectancy, GDP per capita and continent in 2007.

gapminder2007 <- gapminder %>%
  filter(year==2007)
ggplot(gapminder2007,
aes(x=lifeExp, y= gdpPercap,
    col=continent))+ geom_point()+
  theme(legend.position = "bottom") + 
labs(title="Relationship between life expectancy and GPD per capita by continent - 2007",
        x ="life expectancy at birth, in years",
     y = "GDP per capita (US$, inflation-adjusted)")

]

9.11.1 Add a vertical line

gapminder2007 <- gapminder %>%
  filter(year == 2007)
  
ggplot(gapminder2007,
aes(x = lifeExp, y = gdpPercap, col=continent)) +
    geom_point() + 
    geom_vline(xintercept = 70)

9.11.2 Add a horizontal line

gapminder2007 <- gapminder %>%
  filter(year == 2007)
  
ggplot(gapminder2007,
aes(x = lifeExp, y = gdpPercap, col=continent)) +
    geom_point() + 
    geom_hline(yintercept = 20000)

9.11.3 Add a diagonal line

gapminder2007 <- gapminder %>%
  filter(year == 2007)
  
ggplot(gapminder2007,
aes(x = lifeExp, y = gdpPercap, col=continent)) +
    geom_point() + 
    geom_abline(intercept = 20, slope=200)

9.12 All geoms in ggplot2

 [1] "geom_abline"            "geom_area"              "geom_bar"              
 [4] "geom_bin_2d"            "geom_bin2d"             "geom_blank"            
 [7] "geom_boxplot"           "geom_col"               "geom_contour"          
[10] "geom_contour_filled"    "geom_count"             "geom_crossbar"         
[13] "geom_curve"             "geom_density"           "geom_density_2d"       
[16] "geom_density_2d_filled" "geom_density2d"         "geom_density2d_filled" 
[19] "geom_dotplot"           "geom_errorbar"          "geom_errorbarh"        
[22] "geom_freqpoly"          "geom_function"          "geom_hex"              
[25] "geom_histogram"         "geom_hline"             "geom_jitter"           
[28] "geom_label"             "geom_line"              "geom_linerange"        
[31] "geom_map"               "geom_path"              "geom_point"            
[34] "geom_pointrange"        "geom_polygon"           "geom_qq"               
[37] "geom_qq_line"           "geom_quantile"          "geom_raster"           
[40] "geom_rect"              "geom_ribbon"            "geom_rug"              
[43] "geom_segment"           "geom_sf"                "geom_sf_label"         
[46] "geom_sf_text"           "geom_smooth"            "geom_spoke"            
[49] "geom_step"              "geom_text"              "geom_tile"             
[52] "geom_violin"            "geom_vline"            

9.13 geom_boxplot

ggplot(gapminder2007, aes(x=lifeExp, y=continent))+
geom_boxplot()

ggplot(gapminder2007, aes(x=lifeExp, y=continent, color=continent))+
geom_boxplot()

ggplot(gapminder2007, aes(x=lifeExp, y=continent, fill=continent))+
geom_boxplot()

]

ggplot(gapminder2007, aes(x=lifeExp, y=continent))+
geom_boxplot(fill="forestgreen")

]

ggplot(gapminder2007, aes(x=lifeExp, y=continent))+
geom_boxplot(fill="forestgreen", alpha=0.5)

]

9.14 geom_point

9.15 geom_jitter

ggplot(gapminder2007, aes(x=lifeExp, y=continent))+
  geom_jitter()

9.16 geom_jitter + geom_boxplot

ggplot(gapminder2007, aes(x=lifeExp, y=continent))+
  geom_jitter() +
  geom_boxplot()

ggplot(gapminder2007, aes(x=lifeExp, y=continent))+
  geom_jitter() +
  geom_boxplot(alpha=0.5)

ggplot(gapminder2007, aes(x=lifeExp, y=continent))+
  geom_boxplot() + 
  geom_jitter() 

ggplot(gapminder2007, aes(x=lifeExp, y=continent, fill=continent))+
  geom_boxplot() + 
  geom_jitter(aes(col=continent)) 

geom_jitter + geom_boxplot (outlier.shape = NA)

ggplot(gapminder2007, aes(x=lifeExp, y=continent, fill=continent))+
  geom_boxplot(outlier.shape = NA) + 
  geom_jitter(aes(col=continent)) 

.right-plot[

Write the code to obtain the following plot.

9.17 geom_histogram

ggplot(gapminder2007, 
aes(x=lifeExp))+
  geom_histogram() 

9.18 geom_bar (stat=“identity”)

cut.percent <- data.frame(cut=c("Fair", "Good", "Very Good", "Premium", 
                                "Ideal"), percent=c(3, 9, 22.4, 25.6, 40))
cut.percent
        cut percent
1      Fair     3.0
2      Good     9.0
3 Very Good    22.4
4   Premium    25.6
5     Ideal    40.0
ggplot(data=cut.percent, aes(x=cut, y=percent)) +
  geom_bar(stat="identity")

9.19 geom_col

cut.percent <- data.frame(cut=c("Fair", "Good", "Very Good", "Premium", 
                                "Ideal"), percent=c(3, 9, 22.4, 25.6, 40))
cut.percent
        cut percent
1      Fair     3.0
2      Good     9.0
3 Very Good    22.4
4   Premium    25.6
5     Ideal    40.0
ggplot(data=cut.percent, aes(x=cut, y=percent)) +
  geom_col()

10 geom_line

gapminder %>%
filter(country == "India") %>%
ggplot(aes(x = year, y = gdpPercap)) +
geom_line() 

Your turn

Write the code to obtain the following plot.

10.1 Data Wrangling + Data Visualization

avglifeExp <- gapminder %>%
  group_by(continent, year) %>%
  summarise(meanlifeExp=mean(lifeExp))
`summarise()` has grouped output by 'continent'. You can override using the
`.groups` argument.
avglifeExp
# A tibble: 60 × 3
# Groups:   continent [5]
   continent  year meanlifeExp
   <fct>     <int>       <dbl>
 1 Africa     1952        39.1
 2 Africa     1957        41.3
 3 Africa     1962        43.3
 4 Africa     1967        45.3
 5 Africa     1972        47.5
 6 Africa     1977        49.6
 7 Africa     1982        51.6
 8 Africa     1987        53.3
 9 Africa     1992        53.6
10 Africa     1997        53.6
# ℹ 50 more rows
ggplot(avglifeExp, aes(x=year, y=meanlifeExp, col=continent))+
  geom_line() + geom_point()

10.2 Exercise

Write an R code to reproduce the plot below.

Write an R code to reproduce the plot below.

Write an R code to reproduce the plot below.

Write an R code to reproduce the plot below.