Load the required packages, download the data, and import it into the R environment.

# load tidyverse
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
# download and import the data
Matt_ebird <- read_csv("https://github.com/mbtoomey/Biol_7263/blob/main/Data/MBT_ebird.csv?raw=true")
## New names:
## Rows: 6595 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): list_ID, common_name, scientific_name, location dbl (8): ...1, count,
## duration, latitude, longitude, count_tot, month, year date (1): date time (1):
## time
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# view the headers
glimpse(Matt_ebird)
## Rows: 6,595
## Columns: 14
## $ ...1            <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
## $ list_ID         <chr> "S40748758", "S33616660", "S33809874", "S35533959", "S…
## $ common_name     <chr> "Snow Goose", "Snow Goose", "Snow Goose", "Snow Goose"…
## $ scientific_name <chr> "Anser caerulescens", "Anser caerulescens", "Anser cae…
## $ date            <date> 2017-11-26, 2017-01-12, 2017-01-20, 2017-03-30, 2017-…
## $ time            <time> 10:28:00, 07:00:00, 16:26:00, 07:05:00, 07:00:00, 18:…
## $ count           <dbl> 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 26, 30, 6, 31,…
## $ duration        <dbl> 20, 90, 59, 100, 127, 68, 109, 98, 173, 45, 118, 85, 1…
## $ location        <chr> "US-MO", "US-MO", "US-MO", "US-MO", "US-MO", "US-MO", …
## $ latitude        <dbl> 38.87193, 38.63891, 38.63891, 38.63891, 38.63891, 38.6…
## $ longitude       <dbl> -90.18439, -90.28538, -90.28538, -90.28538, -90.28538,…
## $ count_tot       <dbl> 369, 272, 188, 283, 369, 28, 247, 237, 137, 114, 108, …
## $ month           <dbl> 11, 1, 1, 3, 4, 4, 4, 4, 4, 5, 6, 8, 1, 2, 12, 11, 11,…
## $ year            <dbl> 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, …

1. In which year did I observe the most individual birds? How many?

# create a group by year
Matt_ebird <- group_by(Matt_ebird, year)

# summarize the 'count_tot' variable by year groups
summarize(Matt_ebird, max_bird=max(count_tot, na.rm = TRUE))
## # A tibble: 13 × 2
##     year max_bird
##    <dbl>    <dbl>
##  1  2003       18
##  2  2004      228
##  3  2009       25
##  4  2013      106
##  5  2014      469
##  6  2015      253
##  7  2016       87
##  8  2017      515
##  9  2018      275
## 10  2019       88
## 11  2020     3154
## 12  2021      696
## 13  2022      582

In 2020, 3154 birds were recorded.

2. In that year how many different species of birds did I observe?

# ungroup by year
ungroup(Matt_ebird)
## # A tibble: 6,595 × 14
##     ...1 list_ID  commo…¹ scien…² date       time  count durat…³ locat…⁴ latit…⁵
##    <dbl> <chr>    <chr>   <chr>   <date>     <tim> <dbl>   <dbl> <chr>     <dbl>
##  1     1 S407487… Snow G… Anser … 2017-11-26 10:28    16      20 US-MO      38.9
##  2     2 S336166… Snow G… Anser … 2017-01-12 07:00     1      90 US-MO      38.6
##  3     3 S338098… Snow G… Anser … 2017-01-20 16:26     1      59 US-MO      38.6
##  4     4 S355339… Snow G… Anser … 2017-03-30 07:05     1     100 US-MO      38.6
##  5     5 S356980… Snow G… Anser … 2017-04-04 07:00     1     127 US-MO      38.6
##  6     6 S358612… Snow G… Anser … 2017-04-10 18:06     1      68 US-MO      38.6
##  7     7 S359184… Snow G… Anser … 2017-04-13 06:59     1     109 US-MO      38.6
##  8     8 S361181… Snow G… Anser … 2017-04-20 07:00     1      98 US-MO      38.6
##  9     9 S361989… Snow G… Anser … 2017-04-23 08:13     1     173 US-MO      38.6
## 10    10 S368405… Snow G… Anser … 2017-05-13 18:00     1      45 US-MO      38.6
## # … with 6,585 more rows, 4 more variables: longitude <dbl>, count_tot <dbl>,
## #   month <dbl>, year <dbl>, and abbreviated variable names ¹​common_name,
## #   ²​scientific_name, ³​duration, ⁴​location, ⁵​latitude
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
# pull out all 2020 data
Matt_ebird_2020 <- filter(Matt_ebird, year == "2020")

# group by species
Matt_ebird_2020 <- group_by(Matt_ebird_2020, scientific_name)

# print the dimensions of the table produced from summarizing by scientific name
dim(summarize(Matt_ebird_2020))
## [1] 146   1

There are 146 species recorded for 2020.

3. In which state did I most frequently observe Red-winged Blackbirds?

# pull out all Red-winged blackbird observations
Matt_ebird_RWBB <- filter(Matt_ebird, common_name == "Red-winged Blackbird")

# group by location
Matt_ebird_RWBB <-group_by(Matt_ebird_RWBB, location)

# summarize the data by location
summarise(Matt_ebird_RWBB, state_total=sum(count_tot))
## # A tibble: 5 × 2
##   location state_total
##   <chr>          <dbl>
## 1 US-FL            168
## 2 US-IL             30
## 3 US-MO           8443
## 4 US-OK           6861
## 5 US-VT            391

The most Red-winged Blackbirds were observed in US-MO.

4. Filter observations for a duration between 5 and 200 minutes. Calculate the mean rate per checklist that I encounter species each year. Specifically, calculate the number of species in each checklist divided by duration and then take the mean for the year.

# create a new dataset by filtering observations for duration between 5-200 minutes
Matt_ebird_duration <- filter(Matt_ebird, duration > 5 & duration < 200)

# group by list_ID (which I think is "checklist")
Matt_ebird_duration <- group_by(Matt_ebird_duration, list_ID)

# create a new variable `rate` for each list_ID
Matt_ebird_duration <- mutate(Matt_ebird_duration, rate = (count/duration))

# ungroup and group by year
ungroup(Matt_ebird_duration)
## # A tibble: 5,824 × 15
##     ...1 list_ID  commo…¹ scien…² date       time  count durat…³ locat…⁴ latit…⁵
##    <dbl> <chr>    <chr>   <chr>   <date>     <tim> <dbl>   <dbl> <chr>     <dbl>
##  1     1 S407487… Snow G… Anser … 2017-11-26 10:28    16      20 US-MO      38.9
##  2     2 S336166… Snow G… Anser … 2017-01-12 07:00     1      90 US-MO      38.6
##  3     3 S338098… Snow G… Anser … 2017-01-20 16:26     1      59 US-MO      38.6
##  4     4 S355339… Snow G… Anser … 2017-03-30 07:05     1     100 US-MO      38.6
##  5     5 S356980… Snow G… Anser … 2017-04-04 07:00     1     127 US-MO      38.6
##  6     6 S358612… Snow G… Anser … 2017-04-10 18:06     1      68 US-MO      38.6
##  7     7 S359184… Snow G… Anser … 2017-04-13 06:59     1     109 US-MO      38.6
##  8     8 S361181… Snow G… Anser … 2017-04-20 07:00     1      98 US-MO      38.6
##  9     9 S361989… Snow G… Anser … 2017-04-23 08:13     1     173 US-MO      38.6
## 10    10 S368405… Snow G… Anser … 2017-05-13 18:00     1      45 US-MO      38.6
## # … with 5,814 more rows, 5 more variables: longitude <dbl>, count_tot <dbl>,
## #   month <dbl>, year <dbl>, rate <dbl>, and abbreviated variable names
## #   ¹​common_name, ²​scientific_name, ³​duration, ⁴​location, ⁵​latitude
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
Matt_ebird_duration <- group_by(Matt_ebird_duration, year)

# calculate the mean rate of species encountered by year
summarise(Matt_ebird_duration, mean_rate=mean(rate))
## # A tibble: 13 × 2
##     year mean_rate
##    <dbl>     <dbl>
##  1  2003    0.0185
##  2  2004    0.0771
##  3  2009    0.0521
##  4  2013    0.0390
##  5  2014    0.0978
##  6  2015    0.0842
##  7  2016    0.0483
##  8  2017    0.0847
##  9  2018    0.0595
## 10  2019    0.0311
## 11  2020    0.542 
## 12  2021    0.0782
## 13  2022    0.218

Click Top_10_Bird_Obs to download the output .csv file.