Power and richness of the country infect to the result of Olympic Games
library(“dplyr”)
library(“ggplot2”)
library(“tidyr”)
library(“rvest”)
library(“methods”)
theme_set(theme_bw())
base_url <- “https://en.wikipedia.org/wiki/1960_Summer_Olympics_medal_table”
scrape_medaltab <- function(year) { tableNumber <- ifelse(year %in% c(1960,1984,1992,1996,2000,2004,2008,2012,2016), 2,1) #table number depends on year medals <- gsub(“1960”,year,base_url) %>%
read_html() %>%
html_nodes(xpath=paste0(“//*[@id=\”mw-content-text\”]/div//table[“,tableNumber,”]”)) %>%
html_table(fill=TRUE) #%>% .[[1]]
medals <- medals[[1]]
#Sometimes the nation is called NOC, and “Rank” is also ” Rank ” once
names(medals) <- gsub(“NOC”,”Nation”,names(medals))
names(medals) <- gsub(“\sRank\s”,”Rank”,names(medals))
#Remove total row
medals <- medals %>% filter(row_number() < nrow(medals))
# Massage country names
medals <- medals %>% mutate(Nation = gsub(“\*”, “”, Nation)) #host nation
medals <- medals %>% mutate(Nation = gsub(“‡”, “”, Nation)) #changes in medals
medals <- medals %>% mutate(Nation = gsub(“^\s”, “”, Nation))
medals <- medals %>% mutate(Nation = gsub(“\([A-Z\s]+\)$”, “”, Nation))
medals <- medals %>% mutate(Nation = gsub(“\s$”, “”, Nation))
return(cbind(Year=year,medals))
}
Years which had olympic games
olympic_years <- seq(1960, 2016, by=4)
Extract olympic medal table from all olympic years since 1960
medals <- bind_rows(lapply(olympic_years, scrape_medaltab))
Show result
DT::datatable(medals)
We now plot of the total number of medals awarded for each summer games in the period of 1960-2016.
nTotal <- medals_gm %>%
group_by(Year) %>%
summarise(TotalOfGames = sum(Total))
ggplot(nTotal, aes(x = Year, y = TotalOfGames)) + geom_line() + ylab(“Total number of medals per Summer Games”)
A distinct increasing trend is observed in the above figure. Hence, in order to make between-country comparisons over time based on the number of medals won, we normalize the medals by the total number of medals awarded during the corresponding games. The result is stored in the column Frac
.
medals_gm <- medals_gm %>%
left_join(nTotal, by = “Year”) %>%
mutate(Frac = Total / TotalOfGames)
After all these pre-processing steps, we can now compare country results for all summer games in the period 2000-2016.
Add city name for better visualization
olympic_city <- data.frame(Year=olympic_years, City=c(“Rome”,”Tokyo”, “Mexico City”,”Munich”, “Montreal”, “Moscow”,”Los Angeles”, “Seoul”, “Barcelona”, “Atlanta”, “Sydney”, “Athens”, “Beijing”, “London”,”Rio”)) medals_gm <- left_join(medals_gm, olympic_city, by = “Year”) %>% mutate(“YearCity” = paste0(Year, ” – “, City))
Restrict dataset to Year >= 2000
medals_gm2000 <- medals_gm %>% filter(Year >= 2000)
Plot
p1 <- ggplot(medals_gm2000,
aes(x=GDPpc, y=Frac, size = Population, colour= Continent)) +
geom_point() +
geom_text(data=medals_gm2000, aes(x=GDPpc, y=Frac, label=Nation), vjust=-1,show.legend=FALSE) +
scale_x_log10() + scale_y_sqrt(labels = scales::percent) +
xlab(“GDP per Capita”) + ylab(“Fraction of All Medals”)
p1 + facet_grid(. ~ YearCity)