R
environment.# load tidyverse
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
# download and import the data
<- read_csv("https://github.com/mbtoomey/Biol_7263/blob/main/Data/MBT_ebird.csv?raw=true") Matt_ebird
## New names:
## Rows: 6595 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): list_ID, common_name, scientific_name, location dbl (8): ...1, count,
## duration, latitude, longitude, count_tot, month, year date (1): date time (1):
## time
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# view the headers
glimpse(Matt_ebird)
## Rows: 6,595
## Columns: 14
## $ ...1 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
## $ list_ID <chr> "S40748758", "S33616660", "S33809874", "S35533959", "S…
## $ common_name <chr> "Snow Goose", "Snow Goose", "Snow Goose", "Snow Goose"…
## $ scientific_name <chr> "Anser caerulescens", "Anser caerulescens", "Anser cae…
## $ date <date> 2017-11-26, 2017-01-12, 2017-01-20, 2017-03-30, 2017-…
## $ time <time> 10:28:00, 07:00:00, 16:26:00, 07:05:00, 07:00:00, 18:…
## $ count <dbl> 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 26, 30, 6, 31,…
## $ duration <dbl> 20, 90, 59, 100, 127, 68, 109, 98, 173, 45, 118, 85, 1…
## $ location <chr> "US-MO", "US-MO", "US-MO", "US-MO", "US-MO", "US-MO", …
## $ latitude <dbl> 38.87193, 38.63891, 38.63891, 38.63891, 38.63891, 38.6…
## $ longitude <dbl> -90.18439, -90.28538, -90.28538, -90.28538, -90.28538,…
## $ count_tot <dbl> 369, 272, 188, 283, 369, 28, 247, 237, 137, 114, 108, …
## $ month <dbl> 11, 1, 1, 3, 4, 4, 4, 4, 4, 5, 6, 8, 1, 2, 12, 11, 11,…
## $ year <dbl> 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, …
# create a group by year
<- group_by(Matt_ebird, year)
Matt_ebird
# summarize the 'count_tot' variable by year groups
summarize(Matt_ebird, max_bird=max(count_tot, na.rm = TRUE))
## # A tibble: 13 × 2
## year max_bird
## <dbl> <dbl>
## 1 2003 18
## 2 2004 228
## 3 2009 25
## 4 2013 106
## 5 2014 469
## 6 2015 253
## 7 2016 87
## 8 2017 515
## 9 2018 275
## 10 2019 88
## 11 2020 3154
## 12 2021 696
## 13 2022 582
# ungroup by year
ungroup(Matt_ebird)
## # A tibble: 6,595 × 14
## ...1 list_ID commo…¹ scien…² date time count durat…³ locat…⁴ latit…⁵
## <dbl> <chr> <chr> <chr> <date> <tim> <dbl> <dbl> <chr> <dbl>
## 1 1 S407487… Snow G… Anser … 2017-11-26 10:28 16 20 US-MO 38.9
## 2 2 S336166… Snow G… Anser … 2017-01-12 07:00 1 90 US-MO 38.6
## 3 3 S338098… Snow G… Anser … 2017-01-20 16:26 1 59 US-MO 38.6
## 4 4 S355339… Snow G… Anser … 2017-03-30 07:05 1 100 US-MO 38.6
## 5 5 S356980… Snow G… Anser … 2017-04-04 07:00 1 127 US-MO 38.6
## 6 6 S358612… Snow G… Anser … 2017-04-10 18:06 1 68 US-MO 38.6
## 7 7 S359184… Snow G… Anser … 2017-04-13 06:59 1 109 US-MO 38.6
## 8 8 S361181… Snow G… Anser … 2017-04-20 07:00 1 98 US-MO 38.6
## 9 9 S361989… Snow G… Anser … 2017-04-23 08:13 1 173 US-MO 38.6
## 10 10 S368405… Snow G… Anser … 2017-05-13 18:00 1 45 US-MO 38.6
## # … with 6,585 more rows, 4 more variables: longitude <dbl>, count_tot <dbl>,
## # month <dbl>, year <dbl>, and abbreviated variable names ¹common_name,
## # ²scientific_name, ³duration, ⁴location, ⁵latitude
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
# pull out all 2020 data
<- filter(Matt_ebird, year == "2020")
Matt_ebird_2020
# group by species
<- group_by(Matt_ebird_2020, scientific_name)
Matt_ebird_2020
# print the dimensions of the table produced from summarizing by scientific name
dim(summarize(Matt_ebird_2020))
## [1] 146 1
# pull out all Red-winged blackbird observations
<- filter(Matt_ebird, common_name == "Red-winged Blackbird")
Matt_ebird_RWBB
# group by location
<-group_by(Matt_ebird_RWBB, location)
Matt_ebird_RWBB
# summarize the data by location
summarise(Matt_ebird_RWBB, state_total=sum(count_tot))
## # A tibble: 5 × 2
## location state_total
## <chr> <dbl>
## 1 US-FL 168
## 2 US-IL 30
## 3 US-MO 8443
## 4 US-OK 6861
## 5 US-VT 391
# create a new dataset by filtering observations for duration between 5-200 minutes
<- filter(Matt_ebird, duration > 5 & duration < 200)
Matt_ebird_duration
# group by list_ID (which I think is "checklist")
<- group_by(Matt_ebird_duration, list_ID)
Matt_ebird_duration
# create a new variable `rate` for each list_ID
<- mutate(Matt_ebird_duration, rate = (count/duration))
Matt_ebird_duration
# ungroup and group by year
ungroup(Matt_ebird_duration)
## # A tibble: 5,824 × 15
## ...1 list_ID commo…¹ scien…² date time count durat…³ locat…⁴ latit…⁵
## <dbl> <chr> <chr> <chr> <date> <tim> <dbl> <dbl> <chr> <dbl>
## 1 1 S407487… Snow G… Anser … 2017-11-26 10:28 16 20 US-MO 38.9
## 2 2 S336166… Snow G… Anser … 2017-01-12 07:00 1 90 US-MO 38.6
## 3 3 S338098… Snow G… Anser … 2017-01-20 16:26 1 59 US-MO 38.6
## 4 4 S355339… Snow G… Anser … 2017-03-30 07:05 1 100 US-MO 38.6
## 5 5 S356980… Snow G… Anser … 2017-04-04 07:00 1 127 US-MO 38.6
## 6 6 S358612… Snow G… Anser … 2017-04-10 18:06 1 68 US-MO 38.6
## 7 7 S359184… Snow G… Anser … 2017-04-13 06:59 1 109 US-MO 38.6
## 8 8 S361181… Snow G… Anser … 2017-04-20 07:00 1 98 US-MO 38.6
## 9 9 S361989… Snow G… Anser … 2017-04-23 08:13 1 173 US-MO 38.6
## 10 10 S368405… Snow G… Anser … 2017-05-13 18:00 1 45 US-MO 38.6
## # … with 5,814 more rows, 5 more variables: longitude <dbl>, count_tot <dbl>,
## # month <dbl>, year <dbl>, rate <dbl>, and abbreviated variable names
## # ¹common_name, ²scientific_name, ³duration, ⁴location, ⁵latitude
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
<- group_by(Matt_ebird_duration, year)
Matt_ebird_duration
# calculate the mean rate of species encountered by year
summarise(Matt_ebird_duration, mean_rate=mean(rate))
## # A tibble: 13 × 2
## year mean_rate
## <dbl> <dbl>
## 1 2003 0.0185
## 2 2004 0.0771
## 3 2009 0.0521
## 4 2013 0.0390
## 5 2014 0.0978
## 6 2015 0.0842
## 7 2016 0.0483
## 8 2017 0.0847
## 9 2018 0.0595
## 10 2019 0.0311
## 11 2020 0.542
## 12 2021 0.0782
## 13 2022 0.218
.csv
file saved to a folder called “Results” folder
within your R project and add link to the markdown document.# group by species
<- group_by(Matt_ebird, scientific_name)
Matt_ebird
# create a top 10 list
<- head(arrange(summarise(Matt_ebird, freq=sum(count)), by = desc(freq)),10)
ebird_top_10
# filter the original dataset using the top 10 list
<- filter(Matt_ebird, scientific_name == ebird_top_10[1,1])
F1 <- filter(Matt_ebird, scientific_name == ebird_top_10[2,1])
F2 <- filter(Matt_ebird, scientific_name == ebird_top_10[3,1])
F3 <- filter(Matt_ebird, scientific_name == ebird_top_10[4,1])
F4 <- filter(Matt_ebird, scientific_name == ebird_top_10[5,1])
F5 <- filter(Matt_ebird, scientific_name == ebird_top_10[6,1])
F6 <- filter(Matt_ebird, scientific_name == ebird_top_10[7,1])
F7 <- filter(Matt_ebird, scientific_name == ebird_top_10[8,1])
F8 <- filter(Matt_ebird, scientific_name == ebird_top_10[9,1])
F9 <- filter(Matt_ebird, scientific_name == ebird_top_10[10,1])
F10
# merge all the top ten datasets into a 'csv' saved in the Results folder
write_csv(full_join(F1,
full_join(F2,
full_join(F3,
full_join(F4,
full_join(F5,
full_join(F6,
full_join(F7,
full_join(F8,
full_join(F9, F10))))))))),
"Results/ebird_top10.csv")
.csv
file.