-
Notifications
You must be signed in to change notification settings - Fork 0
/
USGSdataRetrieval.R
2000 lines (1811 loc) · 96.9 KB
/
USGSdataRetrieval.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#Script to read USGS streamflow and water quality, and NOAA weather station data from gauges
#Author Credits:
# Portions of the function for the streamflow download were provided by:
# Caitline Barber (Caitline.Barber@tufts.edu) and Jonathan Lamontagne (Jonathan.Lamontagne@tufts.edu)
# Modified by Jared Smith (js4yd@virginia.edu) in June, 2019, and started git tracking.
# See git commit history for all later contributions
#Fixme: catch all print statements for each section and write to text file(s)
#Fixme: make each method a separate function or script
#Fixme: add methods for handling detection limits in time series
#Set directory names----
#Region of interest shapefile directory
dir_ROI = "C:\\Users\\js4yd\\OneDrive - University of Virginia\\EnGauge\\EnGauge\\DataForExamples"
#ColorFunctions.R script directory - from JDS github repo: Geothermal_ESDA
dir_ColFuns = "C:\\Users\\js4yd\\OneDrive - University of Virginia\\BES_Data\\BES_Data\\Hydrology\\USGSGauges"
#EnGauge code repository directory
dir_EnGauge = "C:\\Users\\js4yd\\OneDrive - University of Virginia\\EnGauge\\EnGauge"
#Directory where USGS streamflow gauge data will be downloaded. This directory must already exist.
dir_sfgauges = "C:\\Users\\js4yd\\OneDrive - University of Virginia\\EnGauge\\EnGauge\\DataForExamples"
#Directory where water quality gauge data will be downloaded. This directory must already exist.
dir_wq = "C:\\Users\\js4yd\\OneDrive - University of Virginia\\EnGauge\\EnGauge\\DataForExamples"
#Directory where weather station data will be downloaded. This directory must already exist.
dir_weather = "C:\\Users\\js4yd\\OneDrive - University of Virginia\\EnGauge\\EnGauge\\DataForExamples"
#DEM - specify as a vector of directories if there are multiple tiles to be mosaicked together.
# The directory order has to match the file name order for f_DEM below.
# Fixme: can DEMs be downloaded from a server instead of downloading manually before using this script?
dir_DEM = c("C:\\Users\\js4yd\\OneDrive - University of Virginia\\EnGauge\\EnGauge\\DataForExamples\\grdn40w077_1",
"C:\\Users\\js4yd\\OneDrive - University of Virginia\\EnGauge\\EnGauge\\DataForExamples\\grdn40w078_1")
#Output directory for processed DEM. This directory must already exist.
dir_DEM_out = 'C:\\Users\\js4yd\\OneDrive - University of Virginia\\BES_Data\\BES_Data\\DEM\\'
#Set input filenames----
#Region of interest shapefile name
f_ROI = "Watershed_GF"
#Streamflow gauges and site coordinates filenames - Only used for Method 2 for Streamflow
f_StreamGaugeData = "BES_USGS_GaugeStations.csv"
#Water quality gauges - Only used for Method 2 for Water Quality
f_WQgauges = "BES_WaterQualityGaugeStations.csv"
#DEM - all separate DEM tiles should be added to this vector (e.g. c("w001001.adf", "w001002.adf") )
f_DEM = c("w001001.adf", "w001001.adf")
#Set output filenames----
#NWIS streamflow gauges in ROI
f_NWIS_ROI_out = 'NWIS_ROI'
#NWIS streamflow gauges in bounding box. Only for Method 2 streamflow.
f_NWIS_bb_out = 'NWIS_bb'
#Streamflow gauge data processing name appendage (_p for processed)
# e.g. 0159384_p
f_sf_processKey = '_p'
#Name for the list of all streamflow gauge timeseries. YAML list type is used below.
f_StreamStationList = 'SF.yaml'
#Name for the list of all TN water quality site timeseries. YAML list type is used below.
f_TNSiteList = 'TN.yaml'
f_TNSiteList_daily = 'TN_d.yaml'
f_TNSiteList_monthly = 'TN_m.yaml'
f_TNSiteList_annual = 'TN_a.yaml'
#Name for the list of all TP water quality site timeseries. YAML list type is used below.
f_TPSiteList = 'TP.yaml'
f_TPSiteList_daily = 'TP_d.yaml'
f_TPSiteList_monthly = 'TP_m.yaml'
f_TPSiteList_annual = 'TP_a.yaml'
#NOAA weather station data locations
f_NOAAstationsROI = 'NOAA_StationLocs'
#Name for the list of all NOAA weather station timeseries. YAML list type is used below.
f_NOAAstationsDataList = 'NOAA_MetStations.yaml'
#DEM - GeoTiff format is the default. You can change in the script.
f_DEM_mosiac = "DEM_mosaic"
#Set project coordinate system----
#This is the coordinate system that all data will be plotted and written in
# It is not the coordinate system of your data (although it could be)
# EPSG codes from: https://spatialreference.org/ref/?page=2
pCRS = '+init=epsg:26918'
#Set plot limits - set to NULL to ignore use----
#Streamflow dates
xlim_dates = c(as.Date("1950-01-01"), as.Date("2020-01-01"))
#Water quality dates
xlim_WQdates = c(as.Date("1980-01-01"), as.Date("2010-01-01"))
#Streamflow y axis limits
ylim_SF = c(0, 7000)
#TN y-axis limits
ylim_TN = c(0, 10)
#TP y-axis limits
ylim_TP = c(0, 0.5)
#Temperature y-axis limits
ylim_Temp = c(-20,50)
#Define a small buffer to use for the ROI, in pCRS map units----
#For streamflow and water quality
ROIbuff = 0
#For weather stations
ROIbuffWeather = 20000
#Set high outlier quantile----
HighOutProb = 0.99
#Load libraries and functions----
#USGS function library - note that a more recent version is available through Github
library(dataRetrieval)
#USGS EGRET library - currently not used
#library(EGRET)
#R libraries
library(stringi)
library(stringr)
library(foreach)
library(parallel)
library(doParallel)
library(sp)
library(rgdal)
library(maptools)
library(GISTools)
library(raster)
library(rlist)
library(lattice)
library(zoo)
library(lubridate)
library(hydroTSM)
library(rnoaa)
library(rappdirs)
#Color functions for plots (R script from Jared Smith's Geothermal_ESDA Github Repo)
setwd(dir_ColFuns)
source('ColorFunctions.R')
#Functions from repository
setwd(dir_EnGauge)
source('fdcDefaultModification.R')
source('missingDates.R')
source('addZerosToGaugeNames.R')
source('processDEM.R')
source('extractWQdata.R')
source('checkDuplicates.R')
source('checkZerosNegs.R')
source('formatMonthlyMatrix.R')
source('matplotDates.R')
source('aggregateTimeseries.R')
#Make Region of Interest (ROI) buffer----
ROI = readOGR(dsn = dir_ROI, layer = f_ROI, stringsAsFactors = FALSE)
ROI = spTransform(ROI, CRS(pCRS))
#buffer
ROI_buff = buffer(ROI, ROIbuff)
ROI_bufferW = buffer(ROI, width = ROIbuffWeather)
#Streamflow----
setwd(dir_sfgauges)
#Create directory to save files
wd_sf = paste0(getwd(), '/Streamflow')
dir.create(path = wd_sf, showWarnings = FALSE)
# Method 1: Get gauges using whatNWISsites()----
AllStations_fn = whatNWISsites(statecode = "MD", parameterCd = '00060')
AllStations_fn = readNWISsite(AllStations_fn$site_no)
# Make a spatial dataframe: Method 1----
# NOTE: Your data may be all one coordinate system, and therefore not need to split into 2 datasets
# before joining into 1 dataset.
# NOTE: your coordinate system may be different (epsg code)
# Some of the data are NAD27 projection and others are NAD83 projection. Split the dataset to handle each
#Fixme: function for splitting coordinate systems and returning one same-coordinate system file
# Would require being able to look up epsg codes.
#vector of unique coordinate systems in station data
StreamUniqueCoords = unique(AllStations_fn$coord_datum_cd)
#Process to the pCRS coordinate system
GaugesLocs_NAD27 = AllStations_fn[which(AllStations_fn$coord_datum_cd == 'NAD27'), ]
coordinates(GaugesLocs_NAD27) = c('dec_long_va', 'dec_lat_va')
proj4string(GaugesLocs_NAD27) = CRS('+init=epsg:4267')
GaugesLocs_NAD83 = AllStations_fn[which(AllStations_fn$coord_datum_cd == 'NAD83'), ]
coordinates(GaugesLocs_NAD83) = c('dec_long_va', 'dec_lat_va')
proj4string(GaugesLocs_NAD83) = CRS('+init=epsg:4269')
#Transform to pCRS
GaugeLocs_NAD27 = spTransform(GaugesLocs_NAD27, CRS(pCRS))
GaugeLocs_NAD83 = spTransform(GaugesLocs_NAD83, CRS(pCRS))
#Join to one dataset again
GaugeLocs_fn = rbind(GaugeLocs_NAD27, GaugeLocs_NAD83)
#Remove separate datasets
rm(GaugeLocs_NAD27, GaugeLocs_NAD83, GaugesLocs_NAD27, GaugesLocs_NAD83)
# Method 2: Read USGS station data from .csv file----
AllStations <- read.csv(f_StreamGaugeData, stringsAsFactors = FALSE)
#Add a leading 0 to the NWIS gauges to look up their values on the server
# NOTE: This step may not be necessary for your dataset.
# NOTE: suppressing warnings for NAs introduced by coercion, which is intended.
# Users should check that other warnings are not also being suppressed.
AllStations = addZeros(AllStations)
# Select data for only NWIS gauges----
NWISstations = AllStations[which(AllStations$Source == 'NWIS'),]
# Add Gauge locations to that dataset----
# Also contains altitudes of the gauges, which should be crosss-checked with DEM data
# NOTE: you may have to change the commands to match your file.
GaugesLocs = readNWISsite(NWISstations$GaugeNum)
# Make spatial dataframe: Method 2----
GaugesLocs_NAD27 = GaugesLocs[which(GaugesLocs$coord_datum_cd == 'NAD27'), ]
coordinates(GaugesLocs_NAD27) = c('dec_long_va', 'dec_lat_va')
proj4string(GaugesLocs_NAD27) = CRS('+init=epsg:4267')
GaugesLocs_NAD83 = GaugesLocs[which(GaugesLocs$coord_datum_cd == 'NAD83'), ]
coordinates(GaugesLocs_NAD83) = c('dec_long_va', 'dec_lat_va')
proj4string(GaugesLocs_NAD83) = CRS('+init=epsg:4269')
#Transform to NAD83 UTM Zone 18N
GaugeLocs_NAD27 = spTransform(GaugesLocs_NAD27, CRS(pCRS))
GaugeLocs_NAD83 = spTransform(GaugesLocs_NAD83, CRS(pCRS))
#Join to one dataset again
GaugeLocs = rbind(GaugeLocs_NAD27, GaugeLocs_NAD83)
#Remove separate datasets
rm(GaugeLocs_NAD27, GaugeLocs_NAD83, GaugesLocs, GaugesLocs_NAD27, GaugesLocs_NAD83)
# Add coordinates and other data to the NWISstations data:Methodd 2 only----
if (exists(x = "GaugeLocs")){
for (i in 1:nrow(NWISstations)){
#NOTE: if your data are not both character, errors stating the following will appear:
# Error in data.frame(..., check.names = FALSE) :
# arguments imply differing number of rows: 1, 0
Ind = which(GaugeLocs$site_no == NWISstations$GaugeNum[i])
if (length(Ind) > 1){
print(paste('More than one gauge number matches the uniqueNum gauge ', i, '. Using only first match.'))
}
cmb = cbind(NWISstations[i,], GaugeLocs@data[Ind,], GaugeLocs@coords[Ind,][1], GaugeLocs@coords[Ind,][2])
colnames(cmb) = c(colnames(NWISstations), colnames(GaugeLocs@data), colnames(GaugeLocs@coords))
if (i == 1){
NewData = cmb
}else{
NewData = rbind(NewData, cmb)
}
}
NWISstations = NewData
rm(i, Ind, NewData, cmb)
}
# Clip to ROI----
if (exists(x = "GaugeLocs_fn")){
NWIS_ROI_fn = GaugeLocs_fn[ROI_buff,]
}
if (exists(x = "GaugeLocs")){
# Method 2 only: Make NWIS stations a spatial dataframe
coordinates(NWISstations) = c('dec_long_va', 'dec_lat_va')
proj4string(NWISstations) = CRS(pCRS)
NWIS_ROI = NWISstations[ROI_buff,]
}
# Plot locations of gauges----
setwd(wd_sf)
if (exists(x = "GaugeLocs_fn")){
# Method 1 only
png('StremflowGauges_fn.png', res = 300, units = 'in', width = 6, height = 6)
# ROI
plot(ROI)
# All NWIS streamflow gauges in bounding box, in color
plot(GaugeLocs_fn, pch = 16, add = TRUE)
# NWIS streamflow gauges in ROI
plot(NWIS_ROI_fn, pch = 16, col = 'red', add = TRUE)
# Add coordinates
axis(side = 1)
axis(side = 2)
box()
north.arrow(xb = 370000, yb = 4347000, len = 700, col = 'black', lab = 'N')
legend('bottomleft', title = 'Streamflow Stations', legend = c('In ROI', 'Not in ROI'), pch = 16, col = c('red', 'black'))
dev.off()
}
if (exists(x = "GaugeLocs")){
# Method 2 only
png('StremflowGauges.png', res = 300, units = 'in', width = 6, height = 6)
# All NWIS streamflow gauges in bounding box
plot(NWISstations, pch = 16, col = 'white')
# ROI
plot(ROI, add = TRUE)
# All NWIS streamflow gauges in bounding box, in color
plot(NWISstations, pch = 16, add = TRUE)
# NWIS streamflow gauges in ROI
plot(NWIS_ROI, pch = 16, col = 'red', add = TRUE)
# Add coordinates
axis(side = 1)
axis(side = 2)
box()
north.arrow(xb = 370000, yb = 4346000, len = 700, col = 'black', lab = 'N')
legend('topleft', title = 'Streamflow Stations', legend = c('In ROI', 'Not in ROI'), pch = 16, col = c('red', 'black'))
dev.off()
}
# Statistics to download for each streamflow gauge, if available----
# All codes defined here: https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=%25&fmt=html
#Fixme: lookup codes directly from the website
Stat.minFlow = "00002"
Stat.avgFlow = "00003"
Stat.maxFlow = "00001"
Stat.varFlow = "00010"
Stat.skewFlow = "00013"
#Xth percentile
Stat.P1 = "01010"
Stat.P5 = "01050"
Stat.P25 = "01250"
Stat.P50 = "01500"
Stat.P75 = "01750"
Stat.P95 = "01950"
Stat.P99 = "01990"
#Collect all of the Stat variables into a vector
#Fixme: is there a way to collect all variables that begin Stat. and collect them into a new vector?
Stats = c(Stat.minFlow, Stat.P1, Stat.P5, Stat.P25, Stat.P50, Stat.P75, Stat.P95, Stat.P99,
Stat.maxFlow, Stat.avgFlow, Stat.varFlow, Stat.skewFlow)
# Parameters to download for each gauge, if available----
# All codes defined here: https://help.waterdata.usgs.gov/code/parameter_cd_query?fmt=rdb&inline=true&group_cd=%
#Fixme: lookup codes directly from the website
Par.cfsFlow = "00060"
Par.Nflow = "00600"
Par.Lat = "91110"
Par.Long = "91111"
#Fixme: is there a way to collect all variables that begin Par. and collect them into a new vector?
Pars = c(Par.cfsFlow, Par.Nflow, Par.Long, Par.Lat)
# Download the within-ROI stream gauge data in parallel----
# Use only the unique gauge numbers in the dataset
# (repeats numbers occur when multiple variables are available for a gauge)
setwd(wd_sf)
#Get unique numbers
if (exists(x = "NWIS_ROI_fn")){
# Method 1
uniqueNums = unique(NWIS_ROI_fn$site_no)
}else if (exists(x = "NWIS_ROI")){
# Method 2
uniqueNums = unique(NWIS_ROI$site_no)
}
#Download
cl = makeCluster(detectCores() - 1)
registerDoParallel(cl)
a = foreach(i = uniqueNums, .packages = 'dataRetrieval') %dopar% {
# Read all of the Pars data for the provided station number
#Fixme: unable to test the use of Stats instead of only specifying statistic codes one by one.
# Need a site that has more than one of these statistic codes reported to test.
stationData <- readNWISdv(siteNumbers = i, parameterCd = Pars, statCd = Stats)
write.table(stationData,
paste0(getwd(), '/StreamStat_', i, ".txt"),
sep = "\t", row.names = FALSE)
}
stopCluster(cl)
rm(cl)
#Check that the run was successful
if (any(!is.null(unlist(a)))){
print('DOWNLOAD UNSUCCESSFUL')
}else{
print('Stream gauge data download complete!')
rm(a)
}
# Gather the records for each gauge into a list of dataframes----
StreamStationList = list()
#Also record their error codes for use in plotting later
ErrCodes = vector('character')
#Find all of the streamflow station file indices in directory
Ind_f_StreamStat = list.files()[grep(pattern = 'StreamStat', x = list.files(), ignore.case = FALSE, fixed = TRUE)]
for (i in 1:length(Ind_f_StreamStat)){
#Read file
f = read.table(Ind_f_StreamStat[i], header = TRUE, sep = '\t', stringsAsFactors = FALSE,
colClasses = c('character', 'character', 'Date', 'numeric', 'character'))
#Gather the error codes for dates
ErrCodes = unique(c(ErrCodes, f$X_00060_00003_cd))
#Add to list
StreamStationList = c(StreamStationList, list(f))
}
rm(f, Ind_f_StreamStat, i)
#Name the list by the gauge number
for (i in 1:length(StreamStationList)){
names(StreamStationList)[i] = StreamStationList[[i]]$site_no[1]
}
rm(i)
# Check for duplicate observations----
# This step is recommended in USGS publications.
StreamStationList = checkDuplicatesAndRemove(StreamStationList)
# Check for zeros and negative values in the records----
StreamStationList = checkZerosNegs(StreamStationList)
# See if any stations have negative or zero values and add to NWIS spatial dataset----
if (exists(x = "NWIS_ROI_fn")){
# Method 1
NWIS_ROI_fn = addNegsToSpatialDataset(StationList = StreamStationList, SpatialDataset = NWIS_ROI_fn, site_D = 'site_no', site_SL = 'site_no')
NWIS_ROI_fn = addZerosToSpatialDataset(StationList = StreamStationList, SpatialDataset = NWIS_ROI_fn, site_D = 'site_no', site_SL = 'site_no')
}else if (exists(x = "NWIS_ROI")){
# Method 2
NWIS_ROI = addNegsToSpatialDataset(StationList = StreamStationList, SpatialDataset = NWIS_ROI, site_D = 'site_no', site_SL = 'site_no')
NWIS_ROI = addZerosToSpatialDataset(StationList = StreamStationList, SpatialDataset = NWIS_ROI, site_D = 'site_no', site_SL = 'site_no')
}
# Plot a map of the stations with zero and negative records----
if (exists(x = "NWIS_ROI_fn")){
# Method 1
png(paste0('Streamflow_ZerosNegsMap_fn.png'), res = 300, units = 'in', width = 6, height = 6)
plot(ROI)
#All gauges
plot(NWIS_ROI_fn, pch = 16, add = TRUE)
#Gauges with zeros
plot(NWIS_ROI_fn[!is.na(NWIS_ROI_fn$Zero),], pch = 16, col = 'red', add = TRUE)
#Gauges with negative values
plot(NWIS_ROI_fn[!is.na(NWIS_ROI_fn$Neg),], pch = 16, col = 'blue', add = TRUE)
#Gauges with both
plot(NWIS_ROI_fn[which(!is.na(NWIS_ROI_fn$Neg) & !is.na(NWIS_ROI_fn$Zero)),], pch = 16, col = 'purple', add = TRUE)
# Add coordinates
axis(side = 1)
axis(side = 2)
box()
north.arrow(xb = 370000, yb = 4347000, len = 700, col = 'black', lab = 'N')
legend('topright', title = 'Streamflow Stations', legend = c('Zeros in Record', 'Negatives in Record', 'Both', 'Neither'), pch = 16, col = c('red', 'blue', 'purple', 'black'), bty = 'n')
dev.off()
}else if (exists(x = "NWIS_ROI")){
# Method 2
png(paste0('Streamflow_ZerosNegsMap.png'), res = 300, units = 'in', width = 6, height = 6)
plot(ROI)
#All gauges
plot(NWIS_ROI, pch = 16, add = TRUE)
#Gauges with zeros
plot(NWIS_ROI[!is.na(NWIS_ROI$Zero),], pch = 16, col = 'red', add = TRUE)
#Gauges with negative values
plot(NWIS_ROI[!is.na(NWIS_ROI$Neg),], pch = 16, col = 'blue', add = TRUE)
#Gauges with both
plot(NWIS_ROI[which(!is.na(NWIS_ROI$Neg) & !is.na(NWIS_ROI$Zero)),], pch = 16, col = 'purple', add = TRUE)
# Add coordinates
axis(side = 1)
axis(side = 2)
box()
north.arrow(xb = 370000, yb = 4347000, len = 700, col = 'black', lab = 'N')
legend('topright', title = 'Streamflow Stations', legend = c('Zeros in Record', 'Negatives in Record', 'Both', 'Neither'), pch = 16, col = c('red', 'blue', 'purple', 'black'), bty = 'n')
dev.off()
}
# Identify missing dates and fill them into the timeseries. This also places the timeseries in chronological order----
#Fixme: Missing data fill in with numerical value estimates using prediction in ungauged basins methods for large gaps
if (exists(x = "NWIS_ROI_fn")){
# Method 1
Fills = FillMissingDates_par(Dataset = NWIS_ROI_fn, StationList = StreamStationList, Var = 'X_00060_00003',
Date = 'Date', gapType = 'd', site_no_D = 'site_no', site_no_SL = 'site_no', NoNAcols = 'agency_cd',
NumCores = detectCores()-1)
NWIS_ROI_fn = Fills$Dataset
StreamStationList = Fills$StationList
rm(Fills)
}else if (exists(x = "NWIS_ROI")){
# Method 2
Fills = FillMissingDates_par(Dataset = NWIS_ROI, StationList = StreamStationList, Var = 'X_00060_00003',
Date = 'Date', gapType = 'd', site_no_D = 'site_no', site_no_SL = 'site_no', NoNAcols = 'agency_cd',
NumCores = detectCores()-1)
NWIS_ROI = Fills$Dataset
StreamStationList = Fills$StationList
rm(Fills)
}
# Plot the time series for each gauge, and the eCDF, colored by error code----
#Fixme: add confidence intervals for flow duration curves (fdcu package)
#Fixme: make axes have the same limits on the seasonal plots
for (i in 1:length(StreamStationList)){
#Timeseries:----
#Assign colors to the error codes
colCodes = rainbow(length(ErrCodes))
#Find the indices of the codes that are in this dataset
codes = which(ErrCodes %in% StreamStationList[[i]]$X_00060_00003_cd)
png(paste0('StreamflowTimeseries_', StreamStationList[[i]]$site_no[1],'.png'), res = 300, units = 'in', width = 6, height = 6)
plot(x = StreamStationList[[i]]$Date, y = StreamStationList[[i]]$X_00060_00003, type = 'o', pch = 16, cex = 0.3,
xlab = 'Year', ylab = 'Daily Mean Streamflow (cfs)', main = paste0('Station #', StreamStationList[[i]]$site_no[1]),
ylim = ylim_SF, xlim = xlim_dates)
#Add colors
for (cc in 1:length(colCodes)){
par(new = TRUE)
plot(x = StreamStationList[[i]]$Date[which(StreamStationList[[i]]$X_00060_00003_cd == ErrCodes[cc])],
y = StreamStationList[[i]]$X_00060_00003[which(StreamStationList[[i]]$X_00060_00003_cd == ErrCodes[cc])],
pch = 16, cex = 0.3,
col = colCodes[cc],
ylim = ylim_SF, xlim = xlim_dates, axes = FALSE, xlab = '', ylab = '')
}
legend('topleft', legend = ErrCodes[codes], col = colCodes[codes], pch = 16)
dev.off()
#Flow-Duration Curve----
#Make y axis limits for flow duration curve
at.y <- outer(1:9, 10^(-3:5))
lab.y <- ifelse(log10(at.y) %% 1 == 0, sapply(log10(at.y),function(k)
as.expression(bquote(10^ .(k)))), NA)
png(paste0('StreamflowExceedance_', StreamStationList[[i]]$site_no[1],'.png'), res = 300, units = 'in', width = 6, height = 6)
fdc(x = StreamStationList[[i]]$X_00060_00003, pch = NA, ylab = 'Daily Mean Streamflow (cfs)',
main = paste0('Exceedance Probability for Daily Mean Streamflow \n Station #', StreamStationList[[i]]$site_no[1]),
xlim = c(0,1), lQ.thr = NA, hQ.thr = NA, thr.shw = FALSE, yat = -1000, verbose = FALSE)
axis(side=2, at=at.y, labels=lab.y, cex.axis=1.2, las=1, tck = -0.01, hadj = 0.7, padj = 0.3)
axis(side=2, at=10^seq(-3,5,1), labels=FALSE, tck = -0.025)
dev.off()
#Streamflow exceedance, timeseries, and map----
png(paste0('StreamflowExceedanceTimeseries_Map_', StreamStationList[[i]]$site_no[1],'.png'),
res = 300, units = 'in', width = 10, height = 10)
layout(rbind(c(1,2), c(3, 2)))
#Exceedance
fdc(x = StreamStationList[[i]]$X_00060_00003, pch = NA, ylab = 'Daily Mean Streamflow (cfs)',
main = paste0('Exceedance Probability for Daily Mean Streamflow \n Station #', StreamStationList[[i]]$site_no[1]),
xlim = c(0,1), lQ.thr = NA, hQ.thr = NA, thr.shw = FALSE, yat = -1000, verbose = FALSE)
axis(side=2, at=at.y, labels=lab.y, cex.axis=1.2, las=1, tck = -0.01, hadj = 0.7, padj = 0.3)
axis(side=2, at=10^seq(-3,5,1), labels=FALSE, tck = -0.025)
#Map of sites with the plotted streamflow exceedance site highlighted in red
plot(ROI)
#All gauges
if (exists(x = "NWIS_ROI_fn")){
plot(NWIS_ROI_fn, pch = 16, add = TRUE)
#Gauge selected for exceedance plot
plot(NWIS_ROI_fn[which(NWIS_ROI_fn$site_no == StreamStationList[[i]]$site_no[1]),], pch = 16, col = 'red', add = TRUE)
title(main = NWIS_ROI_fn$station_nm[which(NWIS_ROI_fn$site_no == StreamStationList[[i]]$site_no[1])])
}else if (exists(x = "NWIS_ROI")){
plot(NWIS_ROI, pch = 16, add = TRUE)
#Gauge selected for exceedance plot
plot(NWIS_ROI[which(NWIS_ROI$site_no == StreamStationList[[i]]$site_no[1]),], pch = 16, col = 'red', add = TRUE)
title(main = NWIS_ROI$station_nm[which(NWIS_ROI$site_no == StreamStationList[[i]]$site_no[1])])
}
# Add coordinates
axis(side = 1)
axis(side = 2)
box()
north.arrow(xb = 370000, yb = 4347000, len = 700, col = 'black', lab = 'N')
legend('topright', title = 'Streamflow Stations', legend = c('Selected', 'Other'), pch = 16, col = c('red', 'black'), bty = 'n')
#Timeseries
plot(x = StreamStationList[[i]]$Date, y = StreamStationList[[i]]$X_00060_00003, type = 'o', pch = 16, cex = 0.3,
xlab = 'Year', ylab = 'Daily Mean Streamflow (cfs)', main = paste0('Station #', StreamStationList[[i]]$site_no[1]),
ylim = c(min(StreamStationList[[i]]$X_00060_00003, na.rm = TRUE), max(StreamStationList[[i]]$X_00060_00003, na.rm = TRUE)),
xlim = xlim_dates)
#Add colors
for (cc in 1:length(colCodes)){
par(new = TRUE)
plot(x = StreamStationList[[i]]$Date[which(StreamStationList[[i]]$X_00060_00003_cd == ErrCodes[cc])],
y = StreamStationList[[i]]$X_00060_00003[which(StreamStationList[[i]]$X_00060_00003_cd == ErrCodes[cc])],
pch = 16, cex = 0.3, col = colCodes[cc], axes = FALSE, xlab = '', ylab = '',
ylim = c(min(StreamStationList[[i]]$X_00060_00003, na.rm = TRUE), max(StreamStationList[[i]]$X_00060_00003, na.rm = TRUE)),
xlim = xlim_dates)
}
legend('topleft', legend = ErrCodes[codes], col = colCodes[codes], pch = 16)
dev.off()
#hydroTSM plots----
#daily timeseries
dts = zoo(StreamStationList[[i]]$X_00060_00003, order.by = StreamStationList[[i]]$Date)
#monthly timeseries for mean daily flows in a month
mts = daily2monthly(dts, FUN = mean, na.rm = TRUE)
#plot(mts)
#monthly total flows boxplot
#boxplot(coredata(mts) ~ factor(format(time(mts), "%b"), levels=unique(format(time(mts), "%b")), ordered=TRUE))
#Monthly matrix
M = formatMonthlyMatrix(mts)
# Plotting the monthly values as matrixplot
png(paste0('StreamflowMonthly_', StreamStationList[[i]]$site_no[1],'.png'), res = 300, units = 'in', width = 6, height = 3)
print(matrixplot(M, ColorRamp="Precipitation", main="Mean Monthly Streamflow"))
dev.off()
#annual timeseries
#ats = daily2annual(dts, FUN = mean, na.rm = TRUE)
#plot(ats)
#annual total flows boxplot
#boxplot(coredata(ats) ~ factor(format(time(ats), "%b"), levels=unique(format(time(ats), "%b")), ordered=TRUE))
#Summary EDA plots
png(paste0('StreamflowEDA_', StreamStationList[[i]]$site_no[1],'.png'), res = 300, units = 'in', width = 10, height = 10)
hydroplot(dts, FUN = mean, col = 'black', var.unit = 'mean cfs')
dev.off()
#Seasonal flows
if ((as.numeric((as.Date(max(StreamStationList[[i]]$Date)) - as.Date(min(StreamStationList[[i]]$Date)))) >= 366)
& (length(StreamStationList[[i]]$X_00060_00003) >= 366)){
png(paste0('StreamflowEDA_Seasonal_', StreamStationList[[i]]$site_no[1],'.png'), res = 300, units = 'in', width = 10, height = 10)
hydroplot(dts, FUN = mean, col = 'black', var.unit = 'mean cfs', pfreq = 'seasonal', ylab = 'Mean Streamflow (cfs)')
dev.off()
}
#Fixme: annual timeseries trace plots (overlay of annual hydrographs / shading of 10-25-75-90, plot median sf for water year)
#Fixme: quantitative summary tables like: seasonalfunction(dts, FUN = mean)
# hydropairs for correlation plot
}
rm(i, cc, colCodes, codes, mts, dts, M, at.y, lab.y)
# Outlier detection----
#Fixme: check for high and low flow outliers in each record, and compare spatially to other gauges on those dates
#Fixme: add temporal outlier detection methods to account for correlation at many lag distances (spectral methods?)
#Fixme: add spatiotemporal outlier detection methods
#Fixme: flags for ST outliers at specified levels
for (i in 1:length(StreamStationList)){
#Hacky spatiotemporal outlier detection method
#select all values in the 99th percentile or above
highs = StreamStationList[[i]][which(StreamStationList[[i]]$X_00060_00003 > quantile(StreamStationList[[i]]$X_00060_00003, probs = HighOutProb, na.rm = TRUE)),c('X_00060_00003', 'Date')]
#empirical quantile for all of the flows
qhighs = highs
qhighs$X_00060_00003 = ecdf(StreamStationList[[i]]$X_00060_00003)(highs$X_00060_00003)
#Name the streamflow column the station number
colnames(highs) = c(StreamStationList[[i]]$site_no[1], 'Date')
colnames(qhighs) = c(StreamStationList[[i]]$site_no[1], 'Date')
#Flip to have Date be the first column. Makes more sense for reporting and plotting.
highs = highs[,c(2,1)]
qhighs = qhighs[,c(2,1)]
#Gather dataframes for the day before and day after the outlier record
bhighs = ahighs = highs
bqhighs = aqhighs = qhighs
#Assign the date
bhighs$Date = highs$Date - 1
ahighs$Date = highs$Date + 1
bqhighs$Date = qhighs$Date - 1
aqhighs$Date = qhighs$Date + 1
#Assign NA values to the timeseries
bhighs[,2] = NA
ahighs[,2] = NA
bqhighs[,2] = NA
aqhighs[,2] = NA
bInds = which(StreamStationList[[i]]$X_00060_00003 > quantile(StreamStationList[[i]]$X_00060_00003, probs = HighOutProb, na.rm = TRUE))-1
aInds = which(StreamStationList[[i]]$X_00060_00003 > quantile(StreamStationList[[i]]$X_00060_00003, probs = HighOutProb, na.rm = TRUE))+1
if (min(bInds) != 0){
bhighs[,2] = StreamStationList[[i]][bInds,'X_00060_00003']
bqhighs[,2] = ecdf(StreamStationList[[i]]$X_00060_00003)(bhighs[,2])
}else{
zInd = which(bInds == 0)
#Set to 1 for now, but later will set back to NA
bInds[zInd] = 1
bhighs[,2] = StreamStationList[[i]][bInds,'X_00060_00003']
bqhighs[,2] = ecdf(StreamStationList[[i]]$X_00060_00003)(bhighs[,2])
bhighs[zInd,2] = NA
bqhighs[zInd,2] = NA
rm(zInd)
}
if (max(aInds) != (nrow(StreamStationList[[i]])+1)){
ahighs[,2] = StreamStationList[[i]][aInds,'X_00060_00003']
aqhighs[,2] = ecdf(StreamStationList[[i]]$X_00060_00003)(ahighs[,2])
}else{
zInd = which(aInds == (nrow(StreamStationList[[i]])+1))
#Set to 1 for now, but later will set back to NA
aInds[zInd] = 1
ahighs[,2] = StreamStationList[[i]][aInds,'X_00060_00003']
aqhighs[,2] = ecdf(StreamStationList[[i]]$X_00060_00003)(ahighs[,2])
ahighs[zInd,2] = NA
aqhighs[zInd,2] = NA
rm(zInd)
}
for (j in 1:length(StreamStationList)){
if (j != i){
#Search for the dates corresponding to the high flows in other stations
Inds = which(StreamStationList[[j]]$Date %in% highs$Date)
if(length(Inds) > 0){
highsj = StreamStationList[[j]][Inds, c('X_00060_00003', 'Date')]
#Add new column to the original dataset for this station's flows
highs[,StreamStationList[[j]]$site_no[1]] = NA
qhighs[,StreamStationList[[j]]$site_no[1]] = NA
#Detect the first date that matches
IndStart = which(highs$Date == min(highsj$Date))
#Ensure that the dates match in order from the start to the last date that is in the series
# These should match because timeseries were filled in for missing dates above.
if(any(highsj$Date[which(highsj$Date %in% highs$Date)] == highs$Date[IndStart:(nrow(highsj)+IndStart-1)]) == FALSE){
stop('Error: Timeseries dates do not match. Make sure all dates are filled into all stations (no gaps in coverage). NAs are okay.')
}
#record the quantile values for that station
# This could be a problem if the outlier is on the first or last day of the timeseries. That would only affect the first and last values in the timeseries for any one station.
#empirical quantile for all of the flows
qhighs[IndStart:(nrow(highsj)+IndStart-1),StreamStationList[[j]]$site_no[1]] = ecdf(StreamStationList[[j]]$X_00060_00003)(highsj$X_00060_00003)
#record the raw values for that station
highs[IndStart:(nrow(highsj)+IndStart-1),StreamStationList[[j]]$site_no[1]] = highsj$X_00060_00003
}else{
#Report NAs for that station's values. It doesn't have any dates in common with the station.
highs[,StreamStationList[[j]]$site_no[1]] = NA
qhighs[,StreamStationList[[j]]$site_no[1]] = NA
}
#day before
#Search for the dates corresponding to the high flows in other stations
Inds = which(StreamStationList[[j]]$Date %in% bhighs$Date)
if(length(Inds) > 0){
highsj = StreamStationList[[j]][Inds, c('X_00060_00003', 'Date')]
#Add new column to the original dataset for this station's flows
bhighs[,StreamStationList[[j]]$site_no[1]] = NA
bqhighs[,StreamStationList[[j]]$site_no[1]] = NA
#Detect the first date that matches
IndStart = which(bhighs$Date == min(highsj$Date))
#Ensure that the dates match in order from the start to the last date that is in the series
# These should match because timeseries were filled in for missing dates above.
if(any(highsj$Date[which(highsj$Date %in% bhighs$Date)] == bhighs$Date[IndStart:(nrow(highsj)+IndStart-1)]) == FALSE){
stop('Error: Timeseries dates do not match. Make sure all dates are filled into all stations (no gaps in coverage). NAs are okay.')
}
#record the quantile values for that station
# This could be a problem if the outlier is on the first or last day of the timeseries. That would only affect the first and last values in the timeseries for any one station.
#empirical quantile for all of the flows
bqhighs[IndStart:(nrow(highsj)+IndStart-1),StreamStationList[[j]]$site_no[1]] = ecdf(StreamStationList[[j]]$X_00060_00003)(highsj$X_00060_00003)
#record the raw values for that station
bhighs[IndStart:(nrow(highsj)+IndStart-1),StreamStationList[[j]]$site_no[1]] = highsj$X_00060_00003
}else{
#Report NAs for that station's values. It doesn't have any dates in common with the station.
bhighs[,StreamStationList[[j]]$site_no[1]] = NA
bqhighs[,StreamStationList[[j]]$site_no[1]] = NA
}
#day after
#Search for the dates corresponding to the high flows in other stations
Inds = which(StreamStationList[[j]]$Date %in% ahighs$Date)
if(length(Inds) > 0){
highsj = StreamStationList[[j]][Inds, c('X_00060_00003', 'Date')]
#Add new column to the original dataset for this station's flows
ahighs[,StreamStationList[[j]]$site_no[1]] = NA
aqhighs[,StreamStationList[[j]]$site_no[1]] = NA
#Detect the first date that matches
IndStart = which(ahighs$Date == min(highsj$Date))
#Ensure that the dates match in order from the start to the last date that is in the series
# These should match because timeseries were filled in for missing dates above.
if(any(highsj$Date[which(highsj$Date %in% ahighs$Date)] == ahighs$Date[IndStart:(nrow(highsj)+IndStart-1)]) == FALSE){
stop('Error: Timeseries dates do not match. Make sure all dates are filled into all stations (no gaps in coverage). NAs are okay.')
}
#record the quantile values for that station
# This could be a problem if the outlier is on the first or last day of the timeseries. That would only affect the first and last values in the timeseries for any one station.
#empirical quantile for all of the flows
aqhighs[IndStart:(nrow(highsj)+IndStart-1),StreamStationList[[j]]$site_no[1]] = ecdf(StreamStationList[[j]]$X_00060_00003)(highsj$X_00060_00003)
#record the raw values for that station
ahighs[IndStart:(nrow(highsj)+IndStart-1),StreamStationList[[j]]$site_no[1]] = highsj$X_00060_00003
}else{
#Report NAs for that station's values. It doesn't have any dates in common with the station.
ahighs[,StreamStationList[[j]]$site_no[1]] = NA
aqhighs[,StreamStationList[[j]]$site_no[1]] = NA
}
}
}
#Save the highs and qhighs data to files
write.table(highs,
paste0(getwd(), '/OutlierCheck_Flows_', colnames(highs)[2], ".txt"),
sep = "\t", row.names = FALSE)
write.table(qhighs,
paste0(getwd(), '/OutlierCheck_Quantiles_', colnames(qhighs)[2], ".txt"),
sep = "\t", row.names = FALSE)
write.table(ahighs,
paste0(getwd(), '/OutlierCheck_aFlows_', colnames(ahighs)[2], ".txt"),
sep = "\t", row.names = FALSE)
write.table(aqhighs,
paste0(getwd(), '/OutlierCheck_aQuantiles_', colnames(aqhighs)[2], ".txt"),
sep = "\t", row.names = FALSE)
write.table(bhighs,
paste0(getwd(), '/OutlierCheck_bFlows_', colnames(bhighs)[2], ".txt"),
sep = "\t", row.names = FALSE)
write.table(bqhighs,
paste0(getwd(), '/OutlierCheck_bQuantiles_', colnames(bqhighs)[2], ".txt"),
sep = "\t", row.names = FALSE)
#Make a lineplot for the quantiles of the outliers across each of the stations in the ROI
#Order the qhighs and highs dataframes by their column name numbers to ensure that the colors used for each station are the same in each iteration
qhighs = qhighs[,c(1,1+order(as.numeric(colnames(qhighs[-1]))))]
highs = highs[,c(1,1+order(as.numeric(colnames(highs[-1]))))]
bqhighs = bqhighs[,c(1,1+order(as.numeric(colnames(bqhighs[-1]))))]
bhighs = bhighs[,c(1,1+order(as.numeric(colnames(bhighs[-1]))))]
aqhighs = aqhighs[,c(1,1+order(as.numeric(colnames(aqhighs[-1]))))]
ahighs = ahighs[,c(1,1+order(as.numeric(colnames(ahighs[-1]))))]
#also order the stations for plotting with same color scheme
if (exists(x = "NWIS_ROI_fn")){
pstations = NWIS_ROI_fn[order(as.numeric(NWIS_ROI_fn$site_no)),]
#figure titles
title1 = paste(NWIS_ROI_fn$station_nm[which(NWIS_ROI_fn$site_no == StreamStationList[[i]]$site_no[1])], '\n 99th Percentile Daily Average Flow')
title2 = paste(NWIS_ROI_fn$station_nm[which(NWIS_ROI_fn$site_no == StreamStationList[[i]]$site_no[1])], '\n Day Before 99th Percentile Daily Average Flow')
title3 = paste(NWIS_ROI_fn$station_nm[which(NWIS_ROI_fn$site_no == StreamStationList[[i]]$site_no[1])], '\n Day After 99th Percentile Daily Average Flow')
}else if (exists(x = "NWIS_ROI")){
pstations = NWIS_ROI[order(as.numeric(NWIS_ROI$site_no)),]
#figure titles
title1 = paste(NWIS_ROI$station_nm[which(NWIS_ROI$site_no == StreamStationList[[i]]$site_no[1])], '\n 99th Percentile Daily Average Flow')
title2 = paste(NWIS_ROI$station_nm[which(NWIS_ROI$site_no == StreamStationList[[i]]$site_no[1])], '\n Day Before 99th Percentile Daily Average Flow')
title3 = paste(NWIS_ROI$station_nm[which(NWIS_ROI$site_no == StreamStationList[[i]]$site_no[1])], '\n Day After 99th Percentile Daily Average Flow')
}
png(paste0('Outlier99Quantile_', StreamStationList[[i]]$site_no[1],'.png'), res = 300, units = 'in', width = 12, height = 6)
layout(rbind(c(1,2)))
#Plot of outlier timeseries
cols = rainbow(n = (ncol(qhighs)-1))
cols[which(colnames(qhighs[-1]) == StreamStationList[[i]]$site_no[1])] = 'black'
matplotDates(x = qhighs$Date, y = qhighs[,-1], type = 'o', pch = 16, cex = 0.7, col = cols,
xlab = 'Date', ylab = 'Quantile')
#Map of sites with the plotted streamflow exceedance site highlighted in red
plot(ROI)
#All gauges
plot(pstations, pch = 16, add = TRUE, col = rainbow(n = (ncol(qhighs)-1)))
#Gauge selected for 99th percentile flows
plot(pstations[which(pstations$site_no == StreamStationList[[i]]$site_no[1]),], pch = 16, add = TRUE, col = 'black')
# Add coordinates
axis(side = 1)
axis(side = 2)
box()
north.arrow(xb = 370000, yb = 4347000, len = 700, col = 'black', lab = 'N')
legend('topright', title = 'Streamflow Stations', legend = c('Reference'), pch = 16, col = c('black'), bty = 'n')
title(main = title1)
dev.off()
png(paste0('Outlier99bQuantile_', StreamStationList[[i]]$site_no[1],'.png'), res = 300, units = 'in', width = 12, height = 6)
layout(rbind(c(1,2)))
#Plot of outlier timeseries
cols = rainbow(n = (ncol(bqhighs)-1))
cols[which(colnames(bqhighs[-1]) == StreamStationList[[i]]$site_no[1])] = 'black'
matplotDates(x = bqhighs$Date, y = bqhighs[,-1], type = 'o', pch = 16, cex = 0.7, col = cols,
xlab = 'Date', ylab = 'Quantile')
#Map of sites with the plotted streamflow exceedance site highlighted in red
plot(ROI)
#All gauges
plot(pstations, pch = 16, add = TRUE, col = rainbow(n = (ncol(qhighs)-1)))
#Gauge selected for 99th percentile flows
plot(pstations[which(pstations$site_no == StreamStationList[[i]]$site_no[1]),], pch = 16, add = TRUE, col = 'black')
# Add coordinates
axis(side = 1)
axis(side = 2)
box()
north.arrow(xb = 370000, yb = 4347000, len = 700, col = 'black', lab = 'N')
legend('topright', title = 'Streamflow Stations', legend = c('Reference'), pch = 16, col = c('black'), bty = 'n')
title(main = title2)
dev.off()
png(paste0('Outlier99aQuantile_', StreamStationList[[i]]$site_no[1],'.png'), res = 300, units = 'in', width = 12, height = 6)
layout(rbind(c(1,2)))
#Plot of outlier timeseries
cols = rainbow(n = (ncol(aqhighs)-1))
cols[which(colnames(aqhighs[-1]) == StreamStationList[[i]]$site_no[1])] = 'black'
matplotDates(x = aqhighs$Date, y = aqhighs[,-1], type = 'o', pch = 16, cex = 0.7, col = cols,
xlab = 'Date', ylab = 'Quantile')
#Map of sites with the plotted streamflow exceedance site highlighted in red
plot(ROI)
#All gauges
plot(pstations, pch = 16, add = TRUE, col = rainbow(n = (ncol(qhighs)-1)))
#Gauge selected for 99th percentile flows
plot(pstations[which(pstations$site_no == StreamStationList[[i]]$site_no[1]),], pch = 16, add = TRUE, col = 'black')
# Add coordinates
axis(side = 1)
axis(side = 2)
box()
north.arrow(xb = 370000, yb = 4347000, len = 700, col = 'black', lab = 'N')
legend('topright', title = 'Streamflow Stations', legend = c('Reference'), pch = 16, col = c('black'), bty = 'n')
title(main = title3)
dev.off()
}
rm(i, j, Inds, IndStart, highs, highsj, qhighs, pstations, ahighs, bhighs, aInds, bInds, cols, aqhighs, bqhighs)
# Make a map of gauge locations colored by their record lengths, corrected for the total amount of missing data----
if (exists(x = "NWIS_ROI_fn")){
NWIS_ROI_fn$RecordLength = NWIS_ROI_fn$RecordLengthMinusGaps = NA
for (i in 1:length(StreamStationList)){
NWIS_ROI_fn$RecordLength[which(as.numeric(NWIS_ROI_fn$site_no) == as.numeric(StreamStationList[[i]]$site_no[1]))] = as.numeric((max(StreamStationList[[i]]$Date) - min(StreamStationList[[i]]$Date)))
}
rm(i)
NWIS_ROI_fn$RecordLengthMinusGaps = NWIS_ROI_fn$RecordLength - NWIS_ROI_fn$MissingData_d
#in years
NWIS_ROI_fn$RecordLength = NWIS_ROI_fn$RecordLength/365.25
NWIS_ROI_fn$RecordLengthMinusGaps = NWIS_ROI_fn$RecordLengthMinusGaps/365.25
#Color by decades
scaleRange = c(0,70)
scaleBy = 10
Pal = rev(rainbow((scaleRange[2] - scaleRange[1])/scaleBy))
png('StremflowGauges_RecordLengths.png', res = 300, units = 'in', width = 6, height = 6)
plot(ROI)
# Gauges colored by their record lengths
plot(NWIS_ROI_fn, pch = 16, col = colFun(NWIS_ROI_fn$RecordLengthMinusGaps), add = TRUE)
# Add coordinates
axis(side = 1)
axis(side = 2)
box()
north.arrow(xb = 370000, yb = 4346000, len = 700, col = 'black', lab = 'N')
legend('right', title = 'Streamflow Station \n Record Lengths \n (years)', legend = seq(scaleRange[1], scaleRange[2]-scaleBy, scaleBy),
pch = 16, col = colFun(seq(scaleRange[1], scaleRange[2]-scaleBy, scaleBy)), bty = 'n')
dev.off()
}else if (exists(x = "NWIS_ROI")){
NWIS_ROI$RecordLength = NWIS_ROI$RecordLengthMinusGaps = NA
for (i in 1:length(StreamStationList)){
NWIS_ROI$RecordLength[which(as.numeric(NWIS_ROI$site_no) == as.numeric(StreamStationList[[i]]$site_no[1]))] = as.numeric((max(StreamStationList[[i]]$Date) - min(StreamStationList[[i]]$Date)))
}
rm(i)
NWIS_ROI$RecordLengthMinusGaps = NWIS_ROI$RecordLength - NWIS_ROI$MissingData_d
#in years
NWIS_ROI$RecordLength = NWIS_ROI$RecordLength/365.25
NWIS_ROI$RecordLengthMinusGaps = NWIS_ROI$RecordLengthMinusGaps/365.25
#Color by decades
scaleRange = c(0,70)
scaleBy = 10
Pal = rev(rainbow((scaleRange[2] - scaleRange[1])/scaleBy))
png('StremflowGauges_RecordLengths.png', res = 300, units = 'in', width = 6, height = 6)
plot(ROI)
# Gauges colored by their record lengths
plot(NWIS_ROI, pch = 16, col = colFun(NWIS_ROI$RecordLengthMinusGaps), add = TRUE)
# Add coordinates
axis(side = 1)
axis(side = 2)
box()
north.arrow(xb = 370000, yb = 4346000, len = 700, col = 'black', lab = 'N')
legend('right', title = 'Streamflow Station \n Record Lengths \n (years)', legend = seq(scaleRange[1], scaleRange[2]-scaleBy, scaleBy),
pch = 16, col = colFun(seq(scaleRange[1], scaleRange[2]-scaleBy, scaleBy)), bty = 'n')
dev.off()
}
# Write streamflow datasets to files----
setwd(wd_sf)
if (exists(x = "NWIS_ROI_fn")){
writeOGR(obj = NWIS_ROI_fn, dsn = getwd(), driver = 'ESRI Shapefile', layer = f_NWIS_ROI_out)
}else if (exists(x = "NWIS_ROI")){
writeOGR(obj = NWIS_ROI, dsn = getwd(), driver = 'ESRI Shapefile', layer = f_NWIS_ROI_out)
writeOGR(obj = NWISstations, dsn = getwd(), driver = 'ESRI Shapefile', layer = f_NWIS_bb_out)
}
#Stream gauges had missing dates added to the files. Write new files.
for (i in names(StreamStationList)){
write.table(StreamStationList[[i]],
paste0(getwd(), '/StreamStat_', i, f_sf_processKey, ".txt"),
sep = "\t", row.names = FALSE)
}
rm(i)
list.save(x = StreamStationList, file = f_StreamStationList, type = "YAML")
# DEM for Streamflow----
# Add DEM elevation to the gauge datasets----
# Gather all of the DEM files together and mosaic into one file
DEM = processDEM(dir_DEMs = dir_DEM, f_DEMs = f_DEM, pCRS = pCRS)
setwd(wd_sf)
#Add the DEM elevation to the gauge dataset
#Method 1
if (exists(x = "GaugeLocs_fn")){
GaugeLocs_fn$ElevDEM = raster::extract(x = DEM, y = GaugeLocs_fn)
}
#Method 2
if (exists(x = "GaugeLocs")){
GaugeLocs$ElevDEM = raster::extract(x = DEM, y = GaugeLocs)
}
# Compare the DEM elevation to the listed elevation----
setwd(wd_sf)
#Method 1
if (exists(x = "GaugeLocs_fn")){
png('CompareStreamflowGaugeElevToDEM_fn.png', res = 300, units = 'in', width = 5, height = 5)
plot(GaugeLocs_fn$alt_va[which((is.na(GaugeLocs_fn$alt_va) == FALSE) & (is.na(GaugeLocs_fn$ElevDEM) == FALSE))],
GaugeLocs_fn$ElevDEM[which((is.na(GaugeLocs_fn$alt_va) == FALSE) & (is.na(GaugeLocs_fn$ElevDEM) == FALSE))]/.3048,
xlab = 'USGS Reported Elevation (ft)', ylab = 'DEM Elevation (ft)', main = 'Gauge Elevations')
lines(c(-100,1100), c(-100,1100), col = 'red')
dev.off()
}
#Method 2
if (exists(x = "GaugeLocs")){
png('CompareStreamflowGaugeElevToDEM.png', res = 300, units = 'in', width = 5, height = 5)
plot(GaugeLocs$alt_va[which((is.na(GaugeLocs$alt_va) == FALSE) & (is.na(GaugeLocs$ElevDEM) == FALSE))],
GaugeLocs$ElevDEM[which((is.na(GaugeLocs$alt_va) == FALSE) & (is.na(GaugeLocs$ElevDEM) == FALSE))]/.3048,
xlab = 'USGS Reported Elevation (ft)', ylab = 'DEM Elevation (ft)', main = 'Gauge Elevations')
lines(c(-100,1100), c(-100,1100), col = 'red')
dev.off()
}
#Fixme: Correct the elevations for those gauges that are very different than the DEM
# and have collection codes that suggest lower data quality
# Some of the gauges were likely reported in m instead of in ft in the USGS database
#Identify spatially those gauges that are more than X feet different
#plot(GaugeLocs)
#plot(GaugeLocs[which(abs(GaugeLocs$ElevDEM/.3048 - GaugeLocs$alt_va) >= 50),], col = 'red', add =T)
#plot(GaugeLocs[which(abs(GaugeLocs$ElevDEM/.3048 - GaugeLocs$alt_va) >= 100),], col = 'blue', add =T)
#plot(GaugeLocs[which(abs(GaugeLocs$ElevDEM/.3048 - GaugeLocs$alt_va) >= 200),], col = 'green', add =T)
#plot(GaugeLocs_fn)
#plot(GaugeLocs_fn[which(abs(GaugeLocs_fn$ElevDEM/.3048 - GaugeLocs_fn$alt_va) >= 50),], col = 'red', add =T)
#plot(GaugeLocs_fn[which(abs(GaugeLocs_fn$ElevDEM/.3048 - GaugeLocs_fn$alt_va) >= 100),], col = 'blue', add =T)
#plot(GaugeLocs_fn[which(abs(GaugeLocs_fn$ElevDEM/.3048 - GaugeLocs_fn$alt_va) >= 200),], col = 'green', add =T)
# Plot reported vs. DEM elevation of gauges within the ROI----
setwd(wd_sf)
if (exists(x = "GaugeLocs_fn")){
# Method 1 only
#assign elevation to the dataset
for (i in 1:nrow(NWIS_ROI_fn)){
NWIS_ROI_fn$ElevDEM[i] = GaugeLocs_fn$ElevDEM[GaugeLocs_fn$site_no == NWIS_ROI_fn$site_no[i]]
}
png('CompareGaugeElevToDEM_ROI_fn.png', res = 300, units = 'in', width = 5, height = 5)
plot(NWIS_ROI_fn$alt_va, NWIS_ROI_fn$ElevDEM/.3048,
xlab = 'USGS Reported Elevation (ft)', ylab = 'DEM Elevation (ft)', main = 'Gauge Elevations in ROI')
lines(c(-100,1100), c(-100,1100), col = 'red')
dev.off()
}
if (exists(x = "GaugeLocs")){
# Method 2 only