1 Building up Plots, Again

library(ggplot2)
library(scales)
library(MASS)
library(stringr)
library(splines)

theme_set(theme_gray())

1.1 ASA Membership & Revenue data

asa.url <- "https://raw.githubusercontent.com/kjhealy/asa-sections/master/data/asa-section-membership.csv"
asa.data <- read.csv((url(asa.url)), header = TRUE)

## If you cloned the github repository, launch R in it and then
## asa.data <-  read.csv("data/asa-section-membership.csv", header=TRUE) 

dim(asa.data)
## [1] 52 18
head(asa.data)
##                                Section         Sname X2005 X2006 X2007
## 1      Aging and the Life Course (018)         Aging   598   603   614
## 2     Alcohol, Drugs and Tobacco (030) Alcohol/Drugs   301   304   303
## 3 Altruism and Social Solidarity (047)      Altruism    NA    NA    NA
## 4            Animals and Society (042)       Animals   209   208   218
## 5             Asia/Asian America (024)          Asia   365   379   398
## 6            Body and Embodiment (048)          Body    NA    NA    NA
##   X2008 X2009 X2010 X2011 X2012 X2013 X2014 X2015 Beginning Revenues
## 1   606   624   605   612   620   610   580   612     12752    12104
## 2   288   255   213   226   200   195   173   171     11933     1144
## 3    NA   139   216   320   305   306   318   307      1139     1862
## 4   176   180   167   172   149   160   154   141       473      820
## 5   368   405   351   377   337   349   336   313      9056     2116
## 6    NA   302   295   307   306   309   312   321      3408     1618
##   Expenses Ending Journal
## 1    12007  12849      No
## 2      400  12677      No
## 3     1875   1126      No
## 4     1116    177      No
## 5     1710   9462      No
## 6     1920   3106      No

1.2 Quick & Dirty Function for custom colors

my.colors <- function (palette = "cb") {
    cb.palette <- c("#999999", "#E69F00", "#56B4E9", "#009E73",
                    "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
    rcb.palette <- rev(cb.palette)
    bly.palette <- c("#E69F00", "#0072B2", "#999999", "#56B4E9",
                     "#009E73", "#F0E442", "#D55E00", "#CC79A7")
    if (palette == "cb")
        return(cb.palette)
    else if (palette == "rcb")
        return(rcb.palette)
    else if (palette == "bly")
        return(bly.palette)
    else stop("Choose cb, rcb, or bly ony.")
}

1.3 Make sure the figures/ directory is available

ifelse(!dir.exists(file.path("figures")),
       dir.create(file.path("figures")),
       FALSE)
## [1] FALSE

1.4 Starting with the basics again

p <- ggplot(asa.data, aes(x=X2014, y=Revenues, label=Sname))

p0 <- p + geom_point()
p0

1.5 Add a smoother

p <- ggplot(asa.data, aes(x=X2014, y=Revenues, label=Sname))

p0 <- p + geom_smooth() +
    geom_point()
p0

1.6 Pick out some outliers

p <- ggplot(asa.data, aes(x=X2014, y=Revenues, label=Sname))

p0 <- p + geom_smooth() +
    geom_point() +
    geom_text(data=subset(asa.data, Revenues > 7000)) 
p0

1.7 Introduce a third variable

p <- ggplot(asa.data, aes(x=X2014, y=Revenues, label=Sname))

p0 <- p + geom_smooth() +
    geom_point(aes(color = Journal)) +
    geom_text(data=subset(asa.data, Revenues > 7000)) 
p0

1.8 Change the fitted line

p <- ggplot(asa.data, aes(x=X2014, y=Revenues, label=Sname))

p0 <- p + geom_smooth(method = "lm",
                  se = FALSE,
                  color = "gray80") +
    geom_point(aes(color = Journal)) +
    geom_text(data=subset(asa.data, Revenues > 7000)) 
p0

1.9 Tidy up the labeled text

p <- ggplot(asa.data, aes(x=X2014, y=Revenues, label=Sname))

p0 <- p + geom_smooth(method = "lm",
                  se = FALSE,
                  color = "gray80") +
    geom_point(aes(color = Journal)) +
    geom_text(data=subset(asa.data, Revenues > 7000),
              size = 2,
              aes(x=X2014+10,
                  hjust = 0,
                  lineheight = 0.7)) 
p0

1.10 Label the Axes and Scales

p <- ggplot(asa.data, aes(x=X2014, y=Revenues, label=Sname))

p0 <- p + geom_smooth(method = "lm",
                  se = FALSE,
                  color = "gray80") +
    geom_point(aes(color = Journal)) +
    geom_text(data=subset(asa.data, Revenues > 7000),
              size = 2,
              aes(x=X2014+10,
                  hjust = 0,
                  lineheight = 0.7)) +

   labs(x="Membership",
        y="Revenues",
        color = "Section has own Journal") 
p0

1.11 Fix Tick Marks and Colors

p <- ggplot(asa.data, aes(x=X2014, y=Revenues, label=Sname))

p0 <- p + geom_smooth(method = "lm",
                  se = FALSE,
                  color = "gray80") +
    geom_point(aes(color = Journal)) +
    geom_text(data=subset(asa.data, Revenues > 7000),
              size = 2,
              aes(x=X2014+10,
                  hjust = 0,
                  lineheight = 0.7)) +
    scale_y_continuous(labels = dollar) +
    scale_color_manual(values = my.colors("bly")) +
    labs(x="Membership",
        y="Revenues",
        color = "Section has own Journal") 
p0

1.12 Add a title and move the legend

p <- ggplot(asa.data, aes(x=X2014, y=Revenues, label=Sname))

p0 <- p + geom_smooth(method = "lm",
                  se = FALSE,
                  color = "gray80") +
    geom_point(aes(color = Journal)) +
    geom_text(data=subset(asa.data, Revenues > 7000),
              size = 2,
              aes(x=X2014+10,
                  hjust = 0,
                  lineheight = 0.7)) +
    scale_y_continuous(labels = dollar) +
    scale_color_manual(values = my.colors("bly")) +
    labs(x="Membership",
        y="Revenues",
        color = "Section has own Journal") +
    theme(legend.position = "bottom") +
    ggtitle("ASA Sections, Membership vs Revenues")
p0

2 Change the theme

theme_set(theme_minimal())

print(p0)

2.1 Change the theme again

theme_set(theme_light())

print(p0)

2.2 Moar themes

library(ggthemes)
theme_set(theme_fivethirtyeight())

print(p0)

2.3 Still Moar themes

theme_set(theme_economist())

p0

4 Another Example: Apple Sales Data

  • git clone https://github.com/kjhealy/apple
apple.url <- "https://raw.githubusercontent.com/kjhealy/apple/master/data/apple-all-products-quarterly-sales.csv"
apple.data <- read.csv((url(apple.url)), header = TRUE)

## If you cloned the github repository, launch R in it and then
## asa.data <-  read.csv("data/asa-section-membership.csv", header=TRUE) 

dim(apple.data)
## [1] 69  6
head(apple.data)
##    Time Period iPhone iPad iPod   Mac
## 1 Q4/98      1     NA   NA   NA 0.944
## 2 Q1/99      2     NA   NA   NA 0.827
## 3 Q2/99      3     NA   NA   NA 0.905
## 4 Q3/99      4     NA   NA   NA 0.772
## 5 Q4/99      5     NA   NA   NA 1.377
## 6 Q1/00      6     NA   NA   NA 1.043
library(dplyr)
library(ggplot2)
library(tidyr)
library(splines)
library(scales)
library(grid)

## data <- read.csv("data/apple-all-products-quarterly-sales.csv",
## header=TRUE)

apple.data$Date <- seq(as.Date("1998/12/31"), as.Date("2015/12/31"), by = "quarter")

apple.data.m <- gather(apple.data, Product, Sales, iPhone:Mac)
head(apple.data.m)
##    Time Period       Date Product Sales
## 1 Q4/98      1 1998-12-31  iPhone    NA
## 2 Q1/99      2 1999-03-31  iPhone    NA
## 3 Q2/99      3 1999-07-01  iPhone    NA
## 4 Q3/99      4 1999-10-01  iPhone    NA
## 5 Q4/99      5 1999-12-31  iPhone    NA
## 6 Q1/00      6 2000-03-31  iPhone    NA
p <- ggplot(subset(apple.data.m, Product!="iPod" & Period>30),
            aes(x=Date, y=Sales, color=Product, fill=Product))
p0 <- p + geom_point(size=1.3) +
    geom_smooth(size=0.8, se=FALSE, method = "loess") +
    theme(legend.position="top") +
    scale_x_date(labels = date_format("%Y"),
                 breaks=date_breaks("year")) +
    scale_colour_manual(values=my.colors()) +
    scale_fill_manual(values=my.colors()) + 
    labs(x="", y="Sales (millions)") 
p0
Quarterly Product Sales
### Convert to time series objects
ipad <- apple.data.m %>%
    group_by(Product) %>%
    filter(Product=="iPad") %>%
    na.omit() %>%
    data.frame(.)

ipad.ts <- ts(ipad$Sales, start=c(2010, 2), frequency = 4)


iphone <- apple.data.m %>%
    group_by(Product) %>%
    filter(Product=="iPhone") %>%
    na.omit() %>%
    data.frame(.)

iphone.ts <- ts(iphone$Sales, start=c(2007, 2), frequency = 4)

mac <- apple.data.m %>%
    group_by(Product) %>%
    filter(Product=="Mac") %>%
    na.omit() %>%
    data.frame(.)

mac.ts <- ts(mac$Sales, start=c(1998, 4), frequency = 4)
## Loess decomposition 

iphone.stl <- stl(iphone.ts, s.window = "periodic", t.jump = 1)
plot(iphone.stl)
Default stl() decomposition

4.1 Redraw the STL plot with GGplot

iphone.stl2 <- stl(iphone.ts, s.window = 11, t.jump = 1)

ggiphone.stl <- data.frame(iphone.stl2$time.series)

ggiphone.stl$sales <- apple.data$iPhone %>% na.omit()

ind <- is.na(apple.data$iPhone)

ggiphone.stl$Date <- apple.data$Date[!ind]

ggiphone.stl$Product <- "iPhone"
p <- ggplot(ggiphone.stl, aes(x=Date, y=sales))
p1 <- p + geom_line() + ylab("Data")

p <- ggplot(ggiphone.stl, aes(x=Date, y=trend))
p2 <- p + geom_line() + ylab("Trend")

p <- ggplot(ggiphone.stl, aes(x=Date, y=seasonal))
p3 <- p + geom_line() + ylab("Seasonal")

p <- ggplot(ggiphone.stl, aes(x=Date, y=remainder))
p4 <- p + geom_bar(stat="identity", position=position_dodge()) + ylab("Remainder")

p <- ggplot(ggiphone.stl, aes(x=Date, y=(seasonal/trend)*100))
p5 <- p + geom_line(stat="identity", position="dodge") + ylab("Seasonal/\nTrend (pct)")
grid.newpage()
vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y)
pushViewport(viewport(layout = grid.layout(5, 1)))
print(p1, vp = vplayout(1, 1))
print(p2, vp = vplayout(2, 1))
print(p3, vp = vplayout(3, 1))
print(p4, vp = vplayout(4, 1))
print(p5, vp = vplayout(5, 1))
STL decomposition in ggplot

5 Try it for the Mac and iPad data