06 May, 2022

Graphics in R

Options

  • Traditional (base) graphics
    • isolated instructions to the device

Options

  • Traditional (base) graphics
    • isolated instructions to the device
  • Grid graphics
    • instruction sets
    • lattice
    • ggplot2

Packages

library(ggplot2) #OR better still library(tidyverse)
library(tidyverse)
library(grid)
library(gridExtra)
library(patchwork)
library(scales)

Graphics infrastructure

  • layers of data driven geometric objects
  • coordinate system
  • scales
  • faceting
  • themes

Typical ggplot template

ggplot() +                          # required
    geom_*(                         # required
        data = <DATA>,              # required - <DATA> is a data frame
        mapping = aes(<MAPPING>),   # required - map variables to scales
        stat = <STAT>,              # optional - map variables to geoms
        position = <POSITION>) +    # optional - adjustments to overlapping geoms
    coord_*() +                     # optional - specify coordinate system
    scale_*() +                     # optional - visual appearence of scales
    facet_*() +                     # optional - subplots
    theme_*()                       # optional - overal appearence

Motivating data

head(BOD)
##   Time demand
## 1    1    8.3
## 2    2   10.3
## 3    3   19.0
## 4    4   16.0
## 5    5   15.6
## 6    7   19.8
summary(BOD)
##       Time           demand     
##  Min.   :1.000   Min.   : 8.30  
##  1st Qu.:2.250   1st Qu.:11.62  
##  Median :3.500   Median :15.80  
##  Mean   :3.667   Mean   :14.83  
##  3rd Qu.:4.750   3rd Qu.:18.25  
##  Max.   :7.000   Max.   :19.80

ggplot - complete template

 p <- ggplot() + #single layer - points
  layer(data = BOD, #data.frame
    mapping = aes(y = demand, x = Time),
    stat = "identity", #use original data
    geom = "point", #plot data as points
    position = "identity",
    params = list(na.rm = TRUE),
    show.legend = FALSE
  )+ #layer of lines
  layer(data = BOD, #data.frame
    mapping = aes(y = demand, x = Time),
    stat = "identity", #use original data
    geom = "line", #plot data as a line
    position = "identity",
    params = list(na.rm = TRUE),
    show.legend = FALSE
  ) +
  coord_cartesian() + #cartesian coordinates
  scale_x_continuous() + #continuous x axis
  scale_y_continuous()  #continuous y axis
p #print the plot

ggplot

ggplot

ggplot(data = BOD,
       map = aes(y = demand, x = Time)) +
    geom_point() +
    geom_line()

Overview

  • data
p<-ggplot(data = BOD)
  • layers (geoms)
p<-p + geom_point(aes(y = demand, x = Time))
p

Overview

  • data
p<-ggplot(data = BOD)
  • layers (geoms)
p <- p + geom_point(aes(y = demand, x = Time))
  • scales
p <- p + scale_x_sqrt(name = "Time (days)")
p

Layers

Layers

  • layers of data driven objects
    • geometric objects to represent data
    • statistical methods to summarize the data
    • mapping of aesthetics
    • position control

geom_ and stat_

  • coupled together
  • engage either
  • stat_identity

geom_

Arguments

  • data - obvious
  • mapping - aesthetics

If omitted, inherited from ggplot()

  • stat - the stat_ function
  • position - overlapping geoms

geom_

BOD %>%
    ggplot(aes(y = demand, x = Time)) +
    geom_point()
#OR
ggplot(data = BOD, aes(y = demand, x = Time)) +
    geom_point()
#OR
ggplot(data = BOD) +
    geom_point(aes(y = demand, x = Time))
#OR
ggplot() +
    geom_point(data = BOD, aes(y = demand, x = Time))

Optional aesthetics mapping

  • alpha - transparency
  • colour - colour of the geometric features
  • fill - colour of the geometric features
  • linetype - fill colour of geometric features
  • size - size of geometric features such as points or text
  • shape - shape of geometric features such as points
  • weight - weightings of values

primitives

df <- data.frame(x = c(3, 1, 5), y = c(2, 4, 6), z = c('a', 'b', 'c'))
p <- ggplot(data=df, aes(y = y, x = x))
geom Example syntax Example plot
geom_blank
p + geom_blank()
geom_point
p + geom_point()
geom_text
p + geom_text(aes(label = z))
geom_path
p + geom_path()
geom_polygon
p + geom_polygon()
geom_area
p + geom_area()
geom_ribbon
p + geom_ribbon(aes(ymin = y-1, ymax= y+1))

geom_point

head(CO2)
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
## 4   Qn1 Quebec nonchilled  350   37.2
## 5   Qn1 Quebec nonchilled  500   35.3
## 6   Qn1 Quebec nonchilled  675   39.2
summary(CO2)
##      Plant             Type         Treatment       conc          uptake     
##  Qn1    : 7   Quebec     :42   nonchilled:42   Min.   :  95   Min.   : 7.70  
##  Qn2    : 7   Mississippi:42   chilled   :42   1st Qu.: 175   1st Qu.:17.90  
##  Qn3    : 7                                    Median : 350   Median :28.30  
##  Qc1    : 7                                    Mean   : 435   Mean   :27.21  
##  Qc3    : 7                                    3rd Qu.: 675   3rd Qu.:37.12  
##  Qc2    : 7                                    Max.   :1000   Max.   :45.50  
##  (Other):42

geom_point

g <- ggplot(CO2, aes(x = conc, y = uptake))
g + geom_point(colour = "red")

g + geom_point(aes(colour = Type))

geom_point

g + geom_point(stat = "summary",
               fun = mean)

g + stat_summary(geom = "point",
                 fun = mean)

Example data sets

head(diamonds)
## # A tibble: 6 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
summary(diamonds)
##      carat               cut        color        clarity          depth      
##  Min.   :0.2000   Fair     : 1610   D: 6775   SI1    :13065   Min.   :43.00  
##  1st Qu.:0.4000   Good     : 4906   E: 9797   VS2    :12258   1st Qu.:61.00  
##  Median :0.7000   Very Good:12082   F: 9542   SI2    : 9194   Median :61.80  
##  Mean   :0.7979   Premium  :13791   G:11292   VS1    : 8171   Mean   :61.75  
##  3rd Qu.:1.0400   Ideal    :21551   H: 8304   VVS2   : 5066   3rd Qu.:62.50  
##  Max.   :5.0100                     I: 5422   VVS1   : 3655   Max.   :79.00  
##                                     J: 2808   (Other): 2531                  
##      table           price             x                y         
##  Min.   :43.00   Min.   :  326   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:56.00   1st Qu.:  950   1st Qu.: 4.710   1st Qu.: 4.720  
##  Median :57.00   Median : 2401   Median : 5.700   Median : 5.710  
##  Mean   :57.46   Mean   : 3933   Mean   : 5.731   Mean   : 5.735  
##  3rd Qu.:59.00   3rd Qu.: 5324   3rd Qu.: 6.540   3rd Qu.: 6.540  
##  Max.   :95.00   Max.   :18823   Max.   :10.740   Max.   :58.900  
##                                                                   
##        z         
##  Min.   : 0.000  
##  1st Qu.: 2.910  
##  Median : 3.530  
##  Mean   : 3.539  
##  3rd Qu.: 4.040  
##  Max.   :31.800  
## 

Various geometric objects

Exploring distributions

diamonds %>% data.frame() %>% head(2)
##   carat     cut color clarity depth table price    x    y    z
## 1  0.23   Ideal     E     SI2  61.5    55   326 3.95 3.98 2.43
## 2  0.21 Premium     E     SI1  59.8    61   326 3.89 3.84 2.31
g <- ggplot(diamonds, aes(x=carat))
Feature stat Example syntax Example plot
Histogram _bin
g + geom_histogram()
Density _density
g + geom_density()
Boxplot _boxplot
g + geom_boxplot()
Violin plot _ydensity
g + geom_violin(aes(y = carat),
                orientation='y')
QQ plot _qq
g + geom_qq(aes(sample = carat),
            inherit.aes = FALSE)
Bar plot _count
g + geom_bar(aes(x = cut))

Exploring distributions

diamonds %>% data.frame() %>% head(2)
##   carat     cut color clarity depth table price    x    y    z
## 1  0.23   Ideal     E     SI2  61.5    55   326 3.95 3.98 2.43
## 2  0.21 Premium     E     SI1  59.8    61   326 3.89 3.84 2.31
g <- ggplot(diamonds, aes(x=carat))
Feature stat Example syntax Example plot
Histogram _bin
g + geom_histogram(aes(fill=cut))
Density _density
g + geom_density(aes(fill=cut),
                 alpha=0.3)
Boxplot _boxplot
g + geom_boxplot(aes(y = cut))
Violin plot _ydensity
g + geom_violin(aes(y = cut))
QQ plot _qq
g + geom_qq(aes(sample = carat,
                color = cut),
            inherit.aes = FALSE)

Exploring distributions

diamonds %>% data.frame() %>% head(2)
##   carat     cut color clarity depth table price    x    y    z
## 1  0.23   Ideal     E     SI2  61.5    55   326 3.95 3.98 2.43
## 2  0.21 Premium     E     SI1  59.8    61   326 3.89 3.84 2.31
g <- ggplot(diamonds, aes(x=carat))
Feature stat Example syntax Example plot
Bar plot _count
g + geom_bar(aes(x = cut,
                 fill = clarity))
Bar plot _count
g + geom_bar(aes(x = cut,
                 fill = clarity),
                 position='dodge')

Visualising trends

Visualising trends

Visualising trends

Visualising trends

Visualising trends

Visualising trends

Visualising trends

Visualising trends

Visualising trends

Visualising trends

Visualising three dimensions

head(as.data.frame(faithfuld), 3)
##   eruptions waiting     density
## 1  1.600000      43 0.003216159
## 2  1.647297      43 0.003835375
## 3  1.694595      43 0.004435548
g <- ggplot(faithfuld, aes(y = eruptions, x = waiting))
Feature stat Example syntax Example plot
Tiles _tile
g + geom_tile(aes(fill = density))
Raster _identity
g + geom_raster(aes(fill = density))
Contour _contour
g + geom_contour(aes(z = density))
Filled contour _contour_filled
g + geom_contour_filled(aes(z = density))

Visualising maps

library(maps)
library(mapdata)
aus <- map_data("worldHires", region="Australia")
head(aus,3)
##       long       lat group order    region              subregion
## 1 142.1461 -10.74943     1     1 Australia Prince of Wales Island
## 2 142.1430 -10.74525     1     2 Australia Prince of Wales Island
## 3 142.1406 -10.74113     1     3 Australia Prince of Wales Island
ggplot(aus, aes(x=long, y=lat, group=group)) +
    geom_polygon()

Visualising uncertainty

head(warpbreaks)
##   breaks wool tension
## 1     26    A       L
## 2     30    A       L
## 3     54    A       L
## 4     25    A       L
## 5     70    A       L
## 6     52    A       L
summary(warpbreaks)
##      breaks      wool   tension
##  Min.   :10.00   A:27   L:18   
##  1st Qu.:18.25   B:27   M:18   
##  Median :26.00          H:18   
##  Mean   :28.15                 
##  3rd Qu.:34.00                 
##  Max.   :70.00

Visualising uncertainty

warpbreaks.sum <- warpbreaks %>%
    group_by(wool) %>%
    summarise(mean_se(breaks))
g <- ggplot(warpbreaks.sum, aes(y = y, x = wool, ymin = ymin, ymax = ymax))
Feature stat Example syntax Example plot
Error bars _identity
g + geom_errorbar()
Line range _identity
g + geom_linerange()
Point range _identity
g + geom_pointrange()

Visualising uncertainty

head(as.data.frame(warpbreaks),2)
##   breaks wool tension
## 1     26    A       L
## 2     30    A       L
g <- ggplot(warpbreaks, aes(y = breaks, x = wool))
g + geom_pointrange(stat = "summary",
      fun.data = "mean_se")

g + geom_pointrange(stat = "summary",
      fun.data = "mean_cl_boot")

Coordinate systems

Coordinate systems

head(CO2,3)
## Grouped Data: uptake ~ conc | Plant
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
g <- ggplot(CO2) + geom_point(aes(x = conc, y = uptake))
Coordinates Example syntax Example plot
Cartesian
g + coord_cartesian()
Flip
g + coord_flip()
Polar
g + coord_polar()

Coordinate systems

#Orthographic coordinates
library(maps)
library(mapdata)
aus <- map_data("worldHires", region = "Australia")
ggplot(aus, aes(x = long, y = lat, group = group)) +
  coord_map("ortho", orientation=c(-20, 125, 23.5))+
  geom_polygon()

Scales

Scales

  • x, y axes scales
  • size of points (thickness of lines)
  • shape of points
  • linetype of lines
  • color of lines or points
  • fill of shapes
g + scale_<SCALE>_<TYPE>(
    name,     # scale title
    breaks,   # axis/legend ticks/breaks
    labels,   # alternative axis/legend text labels
    values,   # scale dependent properties (sizes/colours etc)
    limits,   # range of values accommodated
    ...,      # additional scale dependent options
    )

<TYPE> - prepackaged scale modifier

X and Y axes

head(CO2,2)
## Grouped Data: uptake ~ conc | Plant
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
g <- ggplot(CO2, aes(y = uptake, x = conc)) + geom_point()
Scale Example syntax Example plot
Continuous
g + scale_x_continuous()
log
g + scale_x_log10()
sqrt
g + scale_x_sqrt()
reverse
g + scale_x_reverse()

X and Y axes

Axis titles with math

head(CO2,2)
## Grouped Data: uptake ~ conc | Plant
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
g + scale_x_continuous(name = expression(
         Ambient~CO[2]~concentration~(mg/l)))

X and Y axes

Axis more padding

g + scale_x_continuous(name = "CO2 conc",
         expand = c(0,200))

X and Y axes

Axis on a log scale

g + scale_x_log10(name = "CO2 conc",
    breaks = as.vector(c(1, 2, 5, 10) %o% 10^(-1:2)))

X and Y axes

Axis representing categorical data

ggplot(CO2, aes(y = uptake, x = Treatment)) + geom_point()+
  scale_x_discrete(name = "Treatment")

Size and shape scales

head(CO2, 2)
## Grouped Data: uptake ~ conc | Plant
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
g <- ggplot(CO2, aes(y = uptake, x = conc))
Scale Example syntax Example plot
Size
g + geom_point(aes(size = uptake)) +
    scale_size_continuous()
Size
g + geom_point(aes(size = Type)) +
    scale_size_discrete(range = c(2,5))
Shape
g + geom_point(aes(shape = Type)) +
    scale_shape_manual(values = c(2,3))

Color and fill scales

Continuous scale

head(CO2, 2)
## Grouped Data: uptake ~ conc | Plant
##   Plant   Type  Treatment conc uptake              Comb
## 1   Qn1 Quebec nonchilled   95   16.0 Quebec.nonchilled
## 2   Qn1 Quebec nonchilled  175   30.4 Quebec.nonchilled
g <- ggplot(CO2, aes(y = uptake, x = conc)) +
    geom_point(aes(colour = uptake))

Color and fill scales

Continuous scale

Scale Example syntax Example plot
Gradient
g + scale_color_gradient(low = 'red', high = 'blue')
Gradient2
g + scale_color_gradient2(low = 'red', mid = "white",
                          high = 'blue')
Gradientn
g + scale_color_gradientn(colors = terrain.colors(12))
Distiller
g + scale_color_distiller(palette = "Reds")
Viridis
g + scale_color_viridis_c(option = "D")

Colours

  • 657 named colours
colors()

Colour palettes

Colour palettes

library(colorspace)
pal <- choose_pal()

Colour palettes

RColorBrewer::display.brewer.all()

Colour palettes

Facets

Facets

Panels - matrices of plots

  • facet_wrap
  • facet_grid

Facets

g <- ggplot(CO2) +
    geom_point(aes(x = conc, y = uptake, colour = Type))
g + facet_wrap(~Plant)

Facets

g <- ggplot(CO2) +
    geom_point(aes(x = conc, y = uptake, colour = Type))
g + facet_wrap(~Plant, scales = 'free_y')

Facets

g <- ggplot(CO2) +
    geom_point(aes(x = conc, y = uptake, colour = Type))
g + facet_grid(Type~Treatment)

Arranging multiple plots

Multiple plots

Multiple plots

g1 <- ggplot(CO2) + geom_point(aes(x = conc, y = uptake, colour = Type))
g2 <- ggplot(CO2) + geom_point(aes(x = Treatment, y = uptake))
library(gridExtra)
grid.arrange(g1,  g2)

Multiple plots

g1 <- ggplot(CO2) + geom_point(aes(x = conc, y = uptake, colour = Type))
g2 <- ggplot(CO2) + geom_point(aes(x = Treatment, y = uptake))
library(patchwork)
g1/g2

Multiple plots

g1 <- ggplot(CO2) + geom_point(aes(x = conc, y = uptake, colour = Type))
g2 <- ggplot(CO2) + geom_point(aes(x = Treatment, y = uptake))
library(patchwork)
g1+g2

Multiple plots

g1 <- ggplot(CO2) + geom_point(aes(x = conc, y = uptake, colour = Type))
g2 <- ggplot(CO2) + geom_point(aes(x = Treatment, y = uptake))
library(patchwork)
(g1+g2)/g1

Multiple plots

g1 <- ggplot(CO2) + geom_point(aes(x = conc, y = uptake, colour = Type))
g2 <- ggplot(CO2) + geom_point(aes(x = Treatment, y = uptake))
library(patchwork)
wrap_plots(g1,g2, g1, g2, widths = c(2,1,1,1), guides = 'collect',
           design = "AAB\nCDE") +
    guide_area() +
    plot_annotation(tag_levels = 'a',
                    tag_suffix = ')')

wrap_plots(g1,g2, g1, g2, guides = 'collect',
           design = "AAAA\nBCDE") +
    guide_area() +
    plot_annotation(tag_levels = 'a',
                    tag_suffix = ')')

Themes

theme_classic

ggplot(CO2, aes(y = uptake, x = conc)) + geom_smooth() +
    geom_point() +
    theme_classic()

theme_bw

ggplot(CO2, aes(y = uptake, x = conc)) + geom_smooth() +
    geom_point() +
    theme_bw()

theme_grey

ggplot(CO2, aes(y = uptake, x = conc)) + geom_smooth() +
    geom_point() +
    theme_grey()

theme_minimal

ggplot(CO2, aes(y = uptake, x = conc)) + geom_smooth() +
    geom_point() +
    theme_minimal()

theme_linedraw

ggplot(CO2, aes(y = uptake, x = conc)) + geom_smooth() +
    geom_point() +
    theme_linedraw()

theme_light

ggplot(CO2, aes(y = uptake, x = conc)) + geom_smooth() +
    geom_point() +
    theme_light()

others

png('resources/xkcd.png', width=500, height=500, res=200)
library(xkcd)  
## remotes::install_version("Rttf2pt1", version = "1.3.8")
library(Rttf2pt1)
library(sysfonts)
library(extrafont)
## download.file("http://simonsoftware.se/other/xkcd.ttf", dest="xkcd.ttf")
#download.file("https://github.com/ekmett/arcade/blob/master/static/fonts/xkcd.ttf", dest="xkcd.ttf")
#font_import(".", prompt = FALSE)
#loadfonts()
xrange <- range(CO2$conc)
yrange <- range(CO2$uptake)
ggplot(CO2, aes(y = uptake, x = conc)) + geom_smooth(position='jitter', size=1.5) +
    #geom_point() +
        theme_minimal()+theme(text=element_text(size=16, family='xkcd'))+
        xkcdaxis(xrange, yrange)

dev.off()        

others

Saving ggplot figures

Saving ggplot figures

g1 <- ggplot(CO2) + geom_point(aes(x = conc, y = uptake, colour = Type))
ggsave(filename='figure1.pdf', g1,  width=7,  height=5)

Practice

state=data.frame(state.x77, state.region, state.division, state.center) %>%
    select(Illiteracy, state.region, x, y)
head(state)
##            Illiteracy state.region         x       y
## Alabama           2.1        South  -86.7509 32.5901
## Alaska            1.5         West -127.2500 49.2500
## Arizona           1.8         West -111.6250 34.2192
## Arkansas          1.9        South  -92.2992 34.7336
## California        1.1         West -119.7730 36.5341
## Colorado          0.7         West -105.5130 38.6777

Calculate the mean and 95% confidence interval of Illiteracy per state.region and plot them.

Practice

library(gmodels)
state.sum = state %>% group_by(state.region) %>%
    summarise(Mean = mean(Illiteracy),
              Lower = ci(Illiteracy)[2],
              Upper = ci(Illiteracy)[3])
state.sum
## # A tibble: 4 × 4
##   state.region   Mean Lower Upper
##   <fct>         <dbl> <dbl> <dbl>
## 1 Northeast      1    0.786 1.21 
## 2 South          1.74 1.44  2.03 
## 3 North Central  0.7  0.610 0.790
## 4 West           1.02 0.655 1.39
ggplot(state.sum, aes(y = Mean, x = state.region)) + geom_point() +
    geom_errorbar(aes(ymin = Lower, ymax = Upper), width=0.1)

Practice

ggplot(state.sum, aes(y = Mean, x = state.region)) + geom_point() +
    geom_errorbar(aes(ymin = Lower, ymax = Upper), width = 0.1) +
    scale_x_discrete("Region") +
    scale_y_continuous("Illiteracy rate (%)")+
    theme_classic() +
    theme(axis.line.y = element_line(),
          axis.line.x = element_line())

Practice

library(gmodels)
state.sum = state %>% group_by(state.region) %>%
    mutate(mean_sdl(Illiteracy))
state.sum
## # A tibble: 50 × 6
## # Groups:   state.region [4]
##    Illiteracy state.region       x     y   ymin  ymax
##         <dbl> <fct>          <dbl> <dbl>  <dbl> <dbl>
##  1        2.1 South          -86.8  1.74  0.633 2.84 
##  2        1.5 West          -127.   1.02 -0.194 2.24 
##  3        1.8 West          -112.   1.02 -0.194 2.24 
##  4        1.9 South          -92.3  1.74  0.633 2.84 
##  5        1.1 West          -120.   1.02 -0.194 2.24 
##  6        0.7 West          -106.   1.02 -0.194 2.24 
##  7        1.1 Northeast      -72.4  1     0.443 1.56 
##  8        0.9 South          -75.0  1.74  0.633 2.84 
##  9        1.3 South          -81.7  1.74  0.633 2.84 
## 10        2   South          -83.4  1.74  0.633 2.84 
## 11        1.9 West          -126.   1.02 -0.194 2.24 
## 12        0.6 West          -114.   1.02 -0.194 2.24 
## 13        0.9 North Central  -89.4  0.7   0.417 0.983
## 14        0.7 North Central  -86.1  0.7   0.417 0.983
## 15        0.5 North Central  -93.4  0.7   0.417 0.983
## 16        0.6 North Central  -98.1  0.7   0.417 0.983
## 17        1.6 South          -84.8  1.74  0.633 2.84 
## 18        2.8 South          -92.3  1.74  0.633 2.84 
## 19        0.7 Northeast      -69.0  1     0.443 1.56 
## 20        0.9 South          -76.6  1.74  0.633 2.84 
## 21        1.1 Northeast      -71.6  1     0.443 1.56 
## 22        0.9 North Central  -84.7  0.7   0.417 0.983
## 23        0.6 North Central  -94.6  0.7   0.417 0.983
## 24        2.4 South          -89.8  1.74  0.633 2.84 
## 25        0.8 North Central  -92.5  0.7   0.417 0.983
## 26        0.6 West          -109.   1.02 -0.194 2.24 
## 27        0.6 North Central  -99.6  0.7   0.417 0.983
## 28        0.5 West          -117.   1.02 -0.194 2.24 
## 29        0.7 Northeast      -71.4  1     0.443 1.56 
## 30        1.1 Northeast      -74.2  1     0.443 1.56 
## 31        2.2 West          -106.   1.02 -0.194 2.24 
## 32        1.4 Northeast      -75.1  1     0.443 1.56 
## 33        1.8 South          -78.5  1.74  0.633 2.84 
## 34        0.8 North Central -100.   0.7   0.417 0.983
## 35        0.8 North Central  -82.6  0.7   0.417 0.983
## 36        1.1 South          -97.1  1.74  0.633 2.84 
## 37        0.6 West          -120.   1.02 -0.194 2.24 
## 38        1   Northeast      -77.4  1     0.443 1.56 
## 39        1.3 Northeast      -71.1  1     0.443 1.56 
## 40        2.3 South          -80.5  1.74  0.633 2.84 
## 41        0.5 North Central  -99.7  0.7   0.417 0.983
## 42        1.7 South          -86.5  1.74  0.633 2.84 
## 43        2.2 South          -98.8  1.74  0.633 2.84 
## 44        0.6 West          -111.   1.02 -0.194 2.24 
## 45        0.6 Northeast      -72.5  1     0.443 1.56 
## 46        1.4 South          -78.2  1.74  0.633 2.84 
## 47        0.6 West          -120.   1.02 -0.194 2.24 
## 48        1.4 South          -80.7  1.74  0.633 2.84 
## 49        0.7 North Central  -90.0  0.7   0.417 0.983
## 50        0.6 West          -107.   1.02 -0.194 2.24
ggplot(state.sum, aes(y = y, x = state.region)) + geom_point() +
    geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.1)

Practice

library(gmodels)
state.sum = state %>% group_by(state.region) %>%
    mutate(mean_cl_boot(Illiteracy))
state.sum
## # A tibble: 50 × 6
## # Groups:   state.region [4]
##    Illiteracy state.region       x     y  ymin  ymax
##         <dbl> <fct>          <dbl> <dbl> <dbl> <dbl>
##  1        2.1 South          -86.8  1.74 1.48  2.01 
##  2        1.5 West          -127.   1.02 0.708 1.36 
##  3        1.8 West          -112.   1.02 0.708 1.36 
##  4        1.9 South          -92.3  1.74 1.48  2.01 
##  5        1.1 West          -120.   1.02 0.708 1.36 
##  6        0.7 West          -106.   1.02 0.708 1.36 
##  7        1.1 Northeast      -72.4  1    0.844 1.17 
##  8        0.9 South          -75.0  1.74 1.48  2.01 
##  9        1.3 South          -81.7  1.74 1.48  2.01 
## 10        2   South          -83.4  1.74 1.48  2.01 
## 11        1.9 West          -126.   1.02 0.708 1.36 
## 12        0.6 West          -114.   1.02 0.708 1.36 
## 13        0.9 North Central  -89.4  0.7  0.625 0.775
## 14        0.7 North Central  -86.1  0.7  0.625 0.775
## 15        0.5 North Central  -93.4  0.7  0.625 0.775
## 16        0.6 North Central  -98.1  0.7  0.625 0.775
## 17        1.6 South          -84.8  1.74 1.48  2.01 
## 18        2.8 South          -92.3  1.74 1.48  2.01 
## 19        0.7 Northeast      -69.0  1    0.844 1.17 
## 20        0.9 South          -76.6  1.74 1.48  2.01 
## 21        1.1 Northeast      -71.6  1    0.844 1.17 
## 22        0.9 North Central  -84.7  0.7  0.625 0.775
## 23        0.6 North Central  -94.6  0.7  0.625 0.775
## 24        2.4 South          -89.8  1.74 1.48  2.01 
## 25        0.8 North Central  -92.5  0.7  0.625 0.775
## 26        0.6 West          -109.   1.02 0.708 1.36 
## 27        0.6 North Central  -99.6  0.7  0.625 0.775
## 28        0.5 West          -117.   1.02 0.708 1.36 
## 29        0.7 Northeast      -71.4  1    0.844 1.17 
## 30        1.1 Northeast      -74.2  1    0.844 1.17 
## 31        2.2 West          -106.   1.02 0.708 1.36 
## 32        1.4 Northeast      -75.1  1    0.844 1.17 
## 33        1.8 South          -78.5  1.74 1.48  2.01 
## 34        0.8 North Central -100.   0.7  0.625 0.775
## 35        0.8 North Central  -82.6  0.7  0.625 0.775
## 36        1.1 South          -97.1  1.74 1.48  2.01 
## 37        0.6 West          -120.   1.02 0.708 1.36 
## 38        1   Northeast      -77.4  1    0.844 1.17 
## 39        1.3 Northeast      -71.1  1    0.844 1.17 
## 40        2.3 South          -80.5  1.74 1.48  2.01 
## 41        0.5 North Central  -99.7  0.7  0.625 0.775
## 42        1.7 South          -86.5  1.74 1.48  2.01 
## 43        2.2 South          -98.8  1.74 1.48  2.01 
## 44        0.6 West          -111.   1.02 0.708 1.36 
## 45        0.6 Northeast      -72.5  1    0.844 1.17 
## 46        1.4 South          -78.2  1.74 1.48  2.01 
## 47        0.6 West          -120.   1.02 0.708 1.36 
## 48        1.4 South          -80.7  1.74 1.48  2.01 
## 49        0.7 North Central  -90.0  0.7  0.625 0.775
## 50        0.6 West          -107.   1.02 0.708 1.36
ggplot(state.sum, aes(y = y, x = state.region)) + geom_point() +
    geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.1)

Practice

Overlay illiteracy data onto map of US

library(mapdata)
US <- map_data("worldHires", region = "USA")
ggplot(US) +
    geom_polygon(aes(x = long, y = lat, group = group)) +
    geom_point(data = state, aes(y = y, x = x, size = Illiteracy),
               color = "red")

Practice

Overlay illiteracy data onto map of US

library(mapdata) 
US <- map_data("worldHires", region="USA")
ggplot(US) +
    geom_polygon(aes(x = long, y = lat, group = group)) +
    geom_point(data = state,aes(y = y, x = x, size = Illiteracy),
               color = "red")+
    coord_map(xlim = c(-150, -50), ylim = c(20, 60)) +
    theme_minimal()

Practice

MACNALLY <- read.csv('../data/macnally.csv',
  header = TRUE, row.names = 1, strip.white = TRUE)
head(MACNALLY)
##                 HABITAT GST EYR
## Reedy Lake        Mixed 3.4 0.0
## Pearcedale  Gipps.Manna 3.4 9.2
## Warneet     Gipps.Manna 8.4 3.8
## Cranbourne  Gipps.Manna 3.0 5.0
## Lysterfield       Mixed 5.6 5.6
## Red Hill          Mixed 8.1 4.1

Calculate the mean and standard error of GST and plot them

Practice

Calculate the mean and standard error of GST and plot mean and confidence bars

library(gmodels)
ci(MACNALLY$GST)
##   Estimate   CI lower   CI upper Std. Error 
##   4.878378   4.035292   5.721465   0.415704
MACNALLY.agg = MACNALLY %>% group_by(HABITAT) %>%
  summarize(Mean = mean(GST), Lower = ci(GST)[2], Upper = ci(GST)[3])
ggplot(MACNALLY.agg, aes(y = Mean, x = HABITAT)) +
    geom_errorbar(aes(ymin = Lower, ymax = Upper), width = 0.1)+
    geom_point() + theme_classic()

Practice

You can also use ggplot’s summary

library(tidyverse)
MACNALLY.melt = MACNALLY %>%
  pivot_longer(-HABITAT, names_to = "variable",  values_to = "value")
ggplot(MACNALLY.melt, aes(y = value, x = HABITAT)) +
    stat_summary(fun.y = "mean", geom = "point")+
    stat_summary(fun.data = "mean_cl_normal", geom = "errorbar",
                 width = 0.1)+
    facet_grid(~variable)

Practice

You can also use ggplot’s summary

#and bootstrapped means..
ggplot(MACNALLY.melt, aes(y = value, x = HABITAT)) +
    stat_summary(fun.y = "mean", geom = "point")+
    stat_summary(fun.data = "mean_cl_boot", geom = "errorbar",
                 width = 0.1)+
    facet_grid(~variable)