Olympic Games winner came from G8 countries

Power and richness of the country infect to the result of Olympic Games

library(“dplyr”)
library(“ggplot2”)
library(“tidyr”)
library(“rvest”)
library(“methods”)
theme_set(theme_bw())

base_url <- “https://en.wikipedia.org/wiki/1960_Summer_Olympics_medal_table&#8221;

scrape_medaltab <- function(year) { tableNumber <- ifelse(year %in% c(1960,1984,1992,1996,2000,2004,2008,2012,2016), 2,1) #table number depends on year medals <- gsub(“1960”,year,base_url) %>%
read_html() %>%
html_nodes(xpath=paste0(“//*[@id=\”mw-content-text\”]/div//table[“,tableNumber,”]”)) %>%
html_table(fill=TRUE) #%>% .[[1]]
medals <- medals[[1]]
#Sometimes the nation is called NOC, and “Rank” is also ” Rank ” once
names(medals) <- gsub(“NOC”,”Nation”,names(medals))
names(medals) <- gsub(“\sRank\s”,”Rank”,names(medals))

#Remove total row
medals <- medals %>% filter(row_number() < nrow(medals))

# Massage country names
medals <- medals %>% mutate(Nation = gsub(“\*”, “”, Nation)) #host nation
medals <- medals %>% mutate(Nation = gsub(“‡”, “”, Nation)) #changes in medals
medals <- medals %>% mutate(Nation = gsub(“^\s”, “”, Nation))

medals <- medals %>% mutate(Nation = gsub(“\([A-Z\s]+\)$”, “”, Nation))
medals <- medals %>% mutate(Nation = gsub(“\s$”, “”, Nation))

return(cbind(Year=year,medals))
}

Years which had olympic games

olympic_years <- seq(1960, 2016, by=4)

Extract olympic medal table from all olympic years since 1960

medals <- bind_rows(lapply(olympic_years, scrape_medaltab))

Show result

DT::datatable(medals)

We now plot of the total number of medals awarded for each summer games in the period of 1960-2016.

nTotal <- medals_gm %>%
group_by(Year) %>%
summarise(TotalOfGames = sum(Total))
ggplot(nTotal, aes(x = Year, y = TotalOfGames)) + geom_line() + ylab(“Total number of medals per Summer Games”)

A distinct increasing trend is observed in the above figure. Hence, in order to make between-country comparisons over time based on the number of medals won, we normalize the medals by the total number of medals awarded during the corresponding games. The result is stored in the column Frac.

medals_gm <- medals_gm %>%
left_join(nTotal, by = “Year”) %>%
mutate(Frac = Total / TotalOfGames)

After all these pre-processing steps, we can now compare country results for all summer games in the period 2000-2016.

Add city name for better visualization

olympic_city <- data.frame(Year=olympic_years, City=c(“Rome”,”Tokyo”, “Mexico City”,”Munich”, “Montreal”, “Moscow”,”Los Angeles”, “Seoul”, “Barcelona”, “Atlanta”, “Sydney”, “Athens”, “Beijing”, “London”,”Rio”)) medals_gm <- left_join(medals_gm, olympic_city, by = “Year”) %>% mutate(“YearCity” = paste0(Year, ” – “, City))

Restrict dataset to Year >= 2000

medals_gm2000 <- medals_gm %>% filter(Year >= 2000)

Plot

p1 <- ggplot(medals_gm2000,
aes(x=GDPpc, y=Frac, size = Population, colour= Continent)) +
geom_point() +
geom_text(data=medals_gm2000, aes(x=GDPpc, y=Frac, label=Nation), vjust=-1,show.legend=FALSE) +
scale_x_log10() + scale_y_sqrt(labels = scales::percent) +
xlab(“GDP per Capita”) + ylab(“Fraction of All Medals”)

p1 + facet_grid(. ~ YearCity)

Time series with ggplot, ggflags

Leave a comment

Design a site like this with WordPress.com
Get started