-
Notifications
You must be signed in to change notification settings - Fork 1
/
2-2_frontier-harmonization.R
116 lines (83 loc) · 4.06 KB
/
2-2_frontier-harmonization.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
### 2.2 Frontier Harmonization
# This script harmonizes (or scales) the frontier using a level-wise approach,
# such that mortality rates of lower level causes of death sum to mortality
# rates of higher level causes of death. This script relies a function
# (`harmonize`) created in `scr/harmonize.R`.
# 1 Loading data ----------------------------------------------------------
# Applying the standard project environment
applyEnv()
# Loading data
sarahLoad(c("cause_hierarchy", "frontier_base", "frontier_info/frontier_info_1"),
folder = "data/processed")
# Initializing a list of figures for the scaling factor graphs
figs <- list()
# 2 Harmonizing the frontier ----------------------------------------------
# Using the `harmonize` function from `scr/harmonize.R`
# Adding cause parents and levels
data <- frontier_base %>%
left_join(cause_hierarchy %>%
select(ghecause, parent_ghecause, parent_causename, level),
by = "ghecause")
# * 2.1 Level 0 -----------------------------------------------------------
# Reference level (all cause)
harmonized <- data %>%
filter(level == 0)
# * 2.2 Level 1 -----------------------------------------------------------
# Scaling level 1 frontiers so they sum to the level 0 frontier
lvl1 <- harmonize(1, data, harmonized)$harmonized
harmonized <- bind_rows(harmonized, lvl1)
# Figure
figs[["lvl1"]] <- harmonize(1, data, harmonized)$`scaling factors`
# * 2.3 Level 2 -----------------------------------------------------------
# Scaling level 2 frontiers so they sum to level 1 frontiers
lvl2 <- harmonize(2, data, harmonized)$harmonized
harmonized <- bind_rows(harmonized, lvl2)
# Figure
figs[["lvl2"]] <- harmonize(2, data, harmonized)$`scaling factors`
# * 2.4 Level 3 -----------------------------------------------------------
# Scaling level 3 frontiers so they sum to level 2 frontiers
lvl3 <- harmonize(3, data, harmonized)$harmonized
harmonized <- bind_rows(harmonized, lvl3)
# Figure
figs[["lvl3"]] <- harmonize(3, data, harmonized)$`scaling factors`
# Saving harmonization scaling factors
saveGGplot(figs, "2-2_frontier-harmonization-scaling-factors.pdf", folder = "output/figures/process",
width = 10, height = 6, multipage = TRUE)
# * 2.5 Checking harmonization --------------------------------------------
frontier_harmonized <- harmonized %>%
select(year, sex, age, ghecause, causename, definition, frontier) %>%
arrange(definition, year, age, ghecause, sex) %>%
ungroup()
levels <- c("Level 1" = "mece_lvl1", "Level 2" = "mece_lvl2", "Level 3" = "mece_lvl3")
concerns <- list()
for(i in levels){
check <- frontier_harmonized %>%
left_join(cause_hierarchy %>% select(ghecause, mece = !!as.name(i)),
by = "ghecause") %>%
filter(mece) %>%
group_by(year, age, definition) %>%
dplyr::summarize(lower_summed = sum(frontier), .groups = "drop") %>%
left_join(data %>% filter(level == 0) %>%
dplyr::select(year, age, definition, reference = frontier),
by = c("year", "age", "definition")) %>%
mutate(sf = reference / lower_summed)
concern <- check %>%
filter(sf < 0.99 | sf > 1.01) %>%
arrange(desc(sf))
if(nrow(concern) > 0){
warning(paste("Concerning scaling factors:", names(levels[levels == i])))
concerns[[names(levels[levels == i])]] <- concern
}
}
# __+ frontier_harmonized -------------------------------------------------
sarahSave("frontier_harmonized", folder = "data/processed")
# 3 Adding to frontier_info dataframe -------------------------------------
frontier_info_2 <- frontier_info_1 %>%
full_join(frontier_harmonized %>% dplyr::rename(harmonized = frontier),
by = c("year", "age", "sex", "ghecause", "causename", "definition")) %>%
mutate(harmonize.sf = harmonized / base) %>%
mutate(harmonize.sf = ifelse(is.nan(harmonize.sf) | is.infinite(harmonize.sf), 1, harmonize.sf)) %>%
arrange(definition, age, ghecause, sex, year) %>%
ungroup()
# __+ frontier_info_2 -----------------------------------------------------
sarahSave("frontier_info_2", folder = "data/processed/frontier_info")