-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathTidyRbasics.Rmd
115 lines (90 loc) · 2.5 KB
/
TidyRbasics.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
---
title: "Tidying Data in R"
author: "T8"
date: "August 15, 2018"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
Callings a masked function
packageName::functionName()
e.g.
stats::filter()
```{r, warning = F, message=F}
library(dplyr)
library(tidyr)
```
Read in data from [Mike Byerly. 2016. Alaska commercial salmon catches by management region (1886- 1997). Gulf of Alaska Data Portal.](https://knb.ecoinformatics.org/#view/df35b.304.2)
```{r}
catch_df <- read.csv(url("https://knb.ecoinformatics.org/knb/d1/mn/v2/object/df35b.302.1", method = "libcurl"),
stringsAsFactors = FALSE)
head(catch_df)
```
```{r}
# Select not all & notesRegCode and overwite catch_df
catch_df <- catch_df %>%
select(-All,-notesRegCode)
```
Transpose form Wide to Long Format using gather and Rename Column Catch
```{r}
catch_df <- catch_df %>%
gather(key = "Species", value = "Catch", -Region, -Year) %>%
rename(catchThousands = Catch)
head(catch_df)
```
Let's Count Fish
1 fish 2 fish red fish lake
```{r}
# Convert Char to num
catchInt <- as.integer(catch_df$catchThousands)
naIx <- which(is.na(catchInt) == T)
catch_df$catchThousands[naIx]
```
turn this I into a 1
coerce catchThousands into numerical type
convert catchThosands into units of fishes
```{r}
catch_df <- catch_df %>%
mutate(catchThousands = ifelse( catchThousands == "I", 1, catchThousands)) %>%
mutate(catchThousands = as.integer(catchThousands)) %>%
mutate(Catch = catchThousands*1000)
tail(catch_df)
```
# Split Apply Combine
```{r}
catchSummarized <- catch_df %>%
group_by(Region,Species) %>%
summarise(meanCatch = mean(Catch),
sdCatch = sd(Catch),
numObs = n()) %>%
# Filter for Chinook
filter(Species == "Chinook")
head(catchSummarized)
```
Filter by year then group
```{r}
catchYear <- catch_df %>%
filter(Year >1975 | Year < 1925) %>%
group_by(Year, Species) %>%
summarize(catchSum = sum(Catch))
head(catchYear)
```
# Joins
Read Regions definitions file
```{r}
region_defs <- read.csv(url("https://knb.ecoinformatics.org/knb/d1/mn/v2/object/df35b.303.1", method = "libcurl"),
stringsAsFactors = FALSE)
head(region_defs)
```
```{r}
region_defs <-region_defs %>%
select(code,mgmtArea)
head(region_defs)
```
```{r}
catch_joined <- left_join(catch_df,region_defs, by = c("Region" = "code"))
# Move magmtArea
catch_joined <- select(catch_joined,Region,mgmtArea,Year,Species,catchThousands,Catch)
head(catch_joined)
```