# install.packages("metacoder")
# install.packages("readr")
library(readr)
library(metacoder)
library(dplyr)
Although metacoder
and taxa
have been designed for use with taxonomic data, any data that can be assigned to a hierarchy can be used. To demonstrate this, we have used metacoder to display the results of the 2016 Democratic primary election.
We will use the readr
package to read in the data. You can download the data here.
raw_data <- read_csv("primary_results.csv")
raw_data
## # A tibble: 24,611 x 11
## state state_abbreviat… county fips party candidate votes fraction_votes division region country
## <chr> <chr> <chr> <dbl> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Alaba… AL Autau… 1001 Demo… Bernie S… 544 0.182 SE Cent… South USA
## 2 Alaba… AL Autau… 1001 Demo… Hillary … 2387 0.8 SE Cent… South USA
## 3 Alaba… AL Baldw… 1003 Demo… Bernie S… 2694 0.329 SE Cent… South USA
## 4 Alaba… AL Baldw… 1003 Demo… Hillary … 5290 0.647 SE Cent… South USA
## 5 Alaba… AL Barbo… 1005 Demo… Bernie S… 222 0.078 SE Cent… South USA
## 6 Alaba… AL Barbo… 1005 Demo… Hillary … 2567 0.906 SE Cent… South USA
## 7 Alaba… AL Bibb 1007 Demo… Bernie S… 246 0.197 SE Cent… South USA
## 8 Alaba… AL Bibb 1007 Demo… Hillary … 942 0.755 SE Cent… South USA
## 9 Alaba… AL Blount 1009 Demo… Bernie S… 395 0.386 SE Cent… South USA
## 10 Alaba… AL Blount 1009 Demo… Hillary … 564 0.551 SE Cent… South USA
## # … with 24,601 more rows
Instead of have taxonomic ranks in columns, this data has regions in columns, but its the same idea.
obj <- parse_tax_data(raw_data,
class_cols = c("country", "region", "division", "state", "county"),
named_by_rank = TRUE)
obj
## <Taxmap>
## 4279 taxa: aab. USA, aac. South ... gio. Teton-Sublette, gip. Uinta-Lincoln
## 4279 edges: NA->aab, aab->aac, aab->aad, aab->aae ... acl->gin, acl->gio, acl->gip
## 1 data sets:
## tax_data:
## # A tibble: 24,611 x 12
## taxon_id state state_abbreviat… county fips party candidate votes fraction_votes
## <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <dbl> <dbl>
## 1 acm Alab… AL Autau… 1001 Demo… Bernie S… 544 0.182
## 2 acm Alab… AL Autau… 1001 Demo… Hillary … 2387 0.8
## 3 acn Alab… AL Baldw… 1003 Demo… Bernie S… 2694 0.329
## # … with 2.461e+04 more rows, and 3 more variables: division <chr>, region <chr>,
## # country <chr>
## 0 functions:
We have now need to sum the data for geographic region.
obj$data$totals <- obj %>%
filter_obs("tax_data", candidate == "Bernie Sanders") %>%
calc_taxon_abund("tax_data", cols = "votes", out_names = "bernie")
obj$data$totals <- obj %>%
filter_obs("tax_data", candidate == "Hillary Clinton") %>%
calc_taxon_abund("tax_data", cols = "votes", out_names = "hillary") %>%
left_join(obj$data$totals)
obj <- mutate_obs(obj, "totals", total = bernie + hillary)
I will get a list of the “taxon” IDs for the county in each state with the most votes. These will the only counties labeled in the plot below.
top_counties <- unlist(subtaxa_apply(obj, subset = taxon_ranks == "state", value = "total",
function(x) names(x[which.max(x)])))
obj %>%
heat_tree(node_size = total,
node_size_range = c(0.0002, 0.06),
node_color = (hillary - bernie) / total * 100,
edge_label = ifelse(taxon_ids %in% top_counties | n_supertaxa <= 3, taxon_names, ""),
edge_label_size_trans = "area",
edge_label_size_range = c(0.008, 0.025),
node_color_range = c("#a6611a", "lightgray", "#018571"),
node_color_interval = c(-50, 50),
edge_color_range = c("#a6611a", "lightgray", "#018571"),
edge_color_interval = c(-50, 50),
node_color_axis_label = "Clinton Sanders",
node_size_axis_label = "Total votes",
repel_labels = FALSE,
output_file = "voting.png")
Places colored green cast more votes for Hillary Clinton and places colored brown cast more votes for Bernie Sanders.