From ca234a6f9a1c935a546daf165e2ceb77757b11e3 Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Tue, 26 Jun 2018 14:46:14 -0400 Subject: [PATCH] export self-adjoin + v0.9.0 --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS.md | 6 +- R/alluvial-data.r | 105 +++++++++++++++++---------------- R/geom-flow.r | 16 +++-- R/geom-utils.r | 30 ---------- R/lode-guidance-functions.r | 21 ++++--- R/self-adjoin.r | 68 +++++++++++++++++++++ _pkgdown.yml | 1 + inst/examples/ex-self-adjoin.r | 17 ++++++ man/alluvial-data.Rd | 53 +++++++++-------- man/lode-guidance-functions.Rd | 11 +++- man/self-adjoin.Rd | 64 ++++++++++++++++++++ 13 files changed, 269 insertions(+), 126 deletions(-) create mode 100644 R/self-adjoin.r create mode 100644 inst/examples/ex-self-adjoin.r create mode 100644 man/self-adjoin.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 41c45c70..fc33487c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ggalluvial Type: Package Title: Alluvial Diagrams in 'ggplot2' -Version: 0.8.1 +Version: 0.9.0 Date: 2018-05-30 Authors@R: person('Jason Cory', 'Brunson', email = 'cornelioid@gmail.com', role = c('aut', 'cre')) diff --git a/NAMESPACE b/NAMESPACE index 51894a10..57656c7f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,7 @@ export(lode_leftward) export(lode_rightleft) export(lode_rightward) export(lode_zigzag) +export(self_adjoin) export(stat_alluvium) export(stat_flow) export(stat_stratum) diff --git a/NEWS.md b/NEWS.md index 02b3c104..bc98b15d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# ggalluviala 0.8.1 +# ggalluviala 0.9.0 ## `geom_alluvium()` patch @@ -16,6 +16,10 @@ The deprecated parameters `axis_width` (all geom layers) and `ribbon_bend` (`geo A vignette illustrating two methods for labeling small strata, using other **ggplot2** extensions, is included. +## `self_adjoin()` export + +The internal function `self_adjoin()`, invoked by `geom_flow()`, is revised, exported, documented, and exemplified. + # ggalluvial 0.8.0 ## Stat layer functionality diff --git a/R/alluvial-data.r b/R/alluvial-data.r index fdc7d66d..07ede2ee 100644 --- a/R/alluvial-data.r +++ b/R/alluvial-data.r @@ -1,5 +1,5 @@ #' Check for alluvial structure and convert between alluvial formats -#' +#' #' Alluvial diagrams consist of multiple horizontally-distributed columns (axes) #' representing factor variables, vertical divisions (strata) of these axes #' representing these variables' values; and splines (alluvial flows) connecting @@ -19,58 +19,59 @@ #' set \code{axes} of fields encodes its values at each axis variable. #' \code{is_alluvia_form} tests for this structure. #' } -#' +#' #' \code{to_lodes_form} takes a data frame with several designated variables to #' be used as axes in an alluvial diagram, and reshapes the data frame so that -#' the axis variable names constitute a new factor variable and their values -#' comprise another. Other variables' values will be repeated, and a -#' row-grouping variable can be introduced. This function invokes +#' the axis variable names constitute a new factor variable and their values +#' comprise another. Other variables' values will be repeated, and a +#' row-grouping variable can be introduced. This function invokes #' \code{\link[tidyr]{gather}}. -#' +#' #' \code{to_alluvia_form} takes a data frame with axis and axis value variables #' to be used in an alluvial diagram, and reshape the data frame so that the #' axes constitute separate variables whose values are given by the value #' variable. This function invokes \code{\link[tidyr]{spread}}. -#' +#' #' @name alluvial-data #' @import tidyselect +#' @family alluvial data manipulation #' @param data A data frame. #' @param logical Deprecated. Whether to return a logical value or a character #' string indicating the type of alluvial structure ("none", "lodes", or #' "alluvia"). #' @param silent Whether to print messages. -#' @param key,value,id In \code{to_lodes_form}, handled as in -#' \code{\link[tidyr]{gather}} and used to name the new axis (key), stratum -#' (value), and alluvium (identifying) variables. In \code{to_alluvia_form}, -#' handled as in \code{\link[tidyr]{spread}} and used to identify the fields -#' of \code{data} to be used as the axis (key), stratum (value), and alluvium +#' @param key,value,id In \code{to_lodes_form}, handled as in +#' \code{\link[tidyr]{gather}} and used to name the new axis (key), stratum +#' (value), and alluvium (identifying) variables. In \code{to_alluvia_form}, +#' handled as in \code{\link[tidyr]{spread}} and used to identify the fields +#' of \code{data} to be used as the axis (key), stratum (value), and alluvium #' (identifying) variables. -#' @param axes In \code{*_alluvia_form}, handled as in -#' \code{\link[dplyr]{select}} and used to identify the field(s) of +#' @param axes In \code{*_alluvia_form}, handled as in +#' \code{\link[dplyr]{select}} and used to identify the field(s) of #' \code{data} to be used as axes. -#' @param ... Used in \code{is_alluvia_form} and \code{to_lodes_form} as in -#' \code{\link[dplyr]{select}} to determine axis variables, as an alternative +#' @param ... Used in \code{is_alluvia_form} and \code{to_lodes_form} as in +#' \code{\link[dplyr]{select}} to determine axis variables, as an alternative #' to \code{axes}. Ignored when \code{axes} is provided. -#' @param weight Optional field of \code{data}, handled using -#' \code{\link[rlang]{enquo}}, to be used as heights or depths of the alluvia +#' @param weight Optional field of \code{data}, handled using +#' \code{\link[rlang]{enquo}}, to be used as heights or depths of the alluvia #' or lodes. -#' @param diffuse Fields of \code{data}, handleded using -#' \code{\link[tidyselect]{vars_select}}, to merge into the reshapen data by -#' \code{id}. They must be a subset of the axis variables. Alternatively, a -#' logical value indicating whether to merge all (\code{TRUE}) or none +#' @param diffuse Fields of \code{data}, handleded using +#' \code{\link[tidyselect]{vars_select}}, to merge into the reshapen data by +#' \code{id}. They must be a subset of the axis variables. Alternatively, a +#' logical value indicating whether to merge all (\code{TRUE}) or none #' (\code{FALSE}) of the axis variables. #' @param distill A logical value indicating whether to include variables, other -#' than those passed to \code{key} and \code{value}, that vary within values +#' than those passed to \code{key} and \code{value}, that vary within values #' of \code{id}. Alternatively, a function (or its name) to be used to distill -#' each such variable to a single value. In addition to existing functions, -#' \code{distill} accepts the character values \code{"first"} (used if -#' \code{distill} is \code{TRUE}), \code{"last"}, and \code{"most"} (which +#' each such variable to a single value. In addition to existing functions, +#' \code{distill} accepts the character values \code{"first"} (used if +#' \code{distill} is \code{TRUE}), \code{"last"}, and \code{"most"} (which #' returns the modal value). -#' @param discern Logical value indicating whether to suffix values of the -#' variables used as axes that appear at more than one variable in order to -#' distinguish their factor levels. This forces the levels of the combined +#' @param discern Logical value indicating whether to suffix values of the +#' variables used as axes that appear at more than one variable in order to +#' distinguish their factor levels. This forces the levels of the combined #' factor variable \code{value} to be in the order of the axes. #' @example inst/examples/ex-alluvial-data.r @@ -81,21 +82,21 @@ is_lodes_form <- function(data, weight = NULL, logical = TRUE, silent = FALSE) { if (!isTRUE(logical)) deprecate_parameter("logical") - + key_var <- vars_pull(names(data), !!rlang::enquo(key)) value_var <- vars_pull(names(data), !!rlang::enquo(value)) id_var <- vars_pull(names(data), !!rlang::enquo(id)) - + if (any(duplicated(cbind(data[c(key_var, id_var)])))) { if (!silent) warning("Duplicated id-axis pairings.") } - + n_pairs <- dplyr::n_distinct(data[key_var]) * dplyr::n_distinct(data[id_var]) if (nrow(data) < n_pairs) { if (!silent) warning("Missing id-axis pairings.") } - + # if `weight` is not `NULL`, use NSE to identify `weight_var` if (!is.null(rlang::enexpr(weight))) { weight_var <- vars_select(names(data), !!rlang::enquo(weight)) @@ -108,7 +109,7 @@ is_lodes_form <- function(data, return(if (logical) TRUE else "lodes") } } - + if (logical) TRUE else "lodes" } @@ -119,7 +120,7 @@ is_alluvia_form <- function(data, weight = NULL, logical = TRUE, silent = FALSE) { if (!isTRUE(logical)) deprecate_parameter("logical") - + if (is.null(rlang::enexpr(weight))) { weight_var <- NULL } else { @@ -132,7 +133,7 @@ is_alluvia_form <- function(data, return(if (logical) FALSE else "none") } } - + if (!is.null(rlang::enexpr(axes))) { axes <- data_at_vars(data, axes) } else { @@ -143,13 +144,13 @@ is_alluvia_form <- function(data, axes <- unname(vars_select(names(data), !!!quos)) } } - + n_alluvia <- nrow(dplyr::distinct(data[axes])) n_combns <- do.call(prod, lapply(data[axes], dplyr::n_distinct)) if (n_alluvia < n_combns) { if (!silent) message("Missing alluvia for some stratum combinations.") } - + if (logical) TRUE else "alluvia" } @@ -159,11 +160,11 @@ to_lodes_form <- function(data, ..., axes = NULL, key = "x", value = "stratum", id = "alluvium", diffuse = FALSE, discern = FALSE) { - + key_var <- rlang::quo_name(rlang::enexpr(key)) value_var <- rlang::quo_name(rlang::enexpr(value)) id_var <- rlang::quo_name(rlang::enexpr(id)) - + if (!is.null(rlang::enexpr(axes))) { axes <- data_at_vars(data, axes) } else { @@ -174,11 +175,11 @@ to_lodes_form <- function(data, axes <- unname(vars_select(names(data), !!!quos)) } } - + stopifnot(is_alluvia_form(data, axes, silent = TRUE)) - + if (!is.data.frame(data)) data <- as.data.frame(data) - + if (is.logical(rlang::enexpr(diffuse))) { diffuse <- if (diffuse) axes else NULL } else { @@ -187,7 +188,7 @@ to_lodes_form <- function(data, stop("All `diffuse` variables must be `axes` variables.") } } - + # combine factor levels cat_levels <- unname(unlist(lapply(lapply(data[axes], as.factor), levels))) if (any(duplicated(cat_levels)) & is.null(discern)) { @@ -200,14 +201,14 @@ to_lodes_form <- function(data, } else { strata <- unique(unname(cat_levels)) } - + # format data in preparation for `gather()` data[[id_var]] <- 1:nrow(data) if (!is.null(diffuse)) { diffuse_data <- data[, c(id_var, diffuse), drop = FALSE] } for (i in axes) data[[i]] <- as.character(data[[i]]) - + # `gather()` by `axes` res <- tidyr::gather(data, key = !!key_var, value = !!value_var, @@ -218,7 +219,7 @@ to_lodes_form <- function(data, if (!is.null(diffuse)) { res <- merge(diffuse_data, res, by = id_var, all.x = FALSE, all.y = TRUE) } - + res } @@ -227,13 +228,13 @@ to_lodes_form <- function(data, to_alluvia_form <- function(data, key, value, id, distill = FALSE) { - + key_var <- vars_pull(names(data), !!rlang::enquo(key)) value_var <- vars_pull(names(data), !!rlang::enquo(value)) id_var <- vars_pull(names(data), !!rlang::enquo(id)) - + stopifnot(is_lodes_form(data, key_var, value_var, id_var, silent = TRUE)) - + # handle any variables that vary within `id`s uniq_id <- length(unique(data[[id_var]])) uniq_data <- unique(data[setdiff(names(data), c(key_var, value_var))]) @@ -269,14 +270,14 @@ to_alluvia_form <- function(data, } else { distill <- NULL } - + # `spread()` by designated `key` and `value` res <- tidyr::spread(data, key = !!key_var, value = !!value_var) # recombine with `distill_data` if (!is.null(distill)) { res <- merge(distill_data, res, by = id_var, all.x = FALSE, all.y = TRUE) } - + res } diff --git a/R/geom-flow.r b/R/geom-flow.r index d9492622..c071b64e 100644 --- a/R/geom-flow.r +++ b/R/geom-flow.r @@ -91,14 +91,18 @@ GeomFlow <- ggproto( "colour", "fill", "alpha")) flow_fore <- if (aes.flow != "backward") flow_aes else NULL flow_back <- if (aes.flow != "forward") flow_aes else NULL - data <- self_adjoin(data, "x", "alluvium", pair = flow_pos, - keep0 = flow_fore, keep1 = flow_back) + data <- self_adjoin( + data = data, key = "x", by = "alluvium", + link = flow_pos, + keep.x = flow_fore, keep.y = flow_back, + suffix = c(".0", ".1") + ) # aesthetics (in prescribed order) aesthetics <- intersect(.color_diff_aesthetics, names(data)) # arrange data by aesthetics for consistent (reverse) z-ordering data <- data[do.call(order, lapply( - data[, c("link", aesthetics)], + data[, c("pair", aesthetics)], function(x) factor(x, levels = unique(x)) )), ] @@ -106,9 +110,9 @@ GeomFlow <- ggproto( xspls <- plyr::alply(data, 1, function(row) { # spline paths and aesthetics - xspl <- knots_to_xspl(row$xmax0, row$xmin1, - row$ymin0, row$ymax0, row$ymin1, row$ymax1, - row$knot.pos0, row$knot.pos1) + xspl <- knots_to_xspl(row$xmax.0, row$xmin.1, + row$ymin.0, row$ymax.0, row$ymin.1, row$ymax.1, + row$knot.pos.0, row$knot.pos.1) aes <- as.data.frame(row[flow_aes], stringsAsFactors = FALSE)[rep(1, 8), ] f_data <- cbind(xspl, aes) diff --git a/R/geom-utils.r b/R/geom-utils.r index 28008f25..26c3beee 100644 --- a/R/geom-utils.r +++ b/R/geom-utils.r @@ -7,36 +7,6 @@ rect_to_poly <- function(xmin, xmax, ymin, ymax) { ) } -# self-adjoin a dataset, pairing some fields and holding others from one end -self_adjoin <- function(data, key, also.by, - pair = NULL, keep0 = NULL, keep1 = NULL) { - # ensure that `key` is coercible to numeric - if (is.character(data[[key]])) data[[key]] <- as.factor(data[[key]]) - # self-(inner )join position aesthetics by numeric-coerced `key` and `also.by` - adj <- dplyr::inner_join( - transform(data, - link = as.numeric(data[[key]]))[, c("link", also.by, pair)], - transform(data, - link = as.numeric(data[[key]]) - 1)[, c("link", also.by, pair)], - by = c("link", also.by), - suffix = c("0", "1") - ) - # side-join non-position aesthetics - if (!is.null(keep0)) adj <- dplyr::left_join( - adj, - transform(data, - link = as.numeric(data[[key]]))[, c("link", also.by, keep0)], - by = c("link", also.by) - ) - if (!is.null(keep1)) adj <- dplyr::left_join( - adj, - transform(data, - link = as.numeric(data[[key]]) - 1)[, c("link", also.by, keep1)], - by = c("link", also.by) - ) - adj -} - # x-spline coordinates from 2 x bounds, 4 y bounds, and knot position knots_to_xspl <- function(x0, x1, ymin0, ymax0, ymin1, ymax1, kp0, kp1) { x_oneway <- c(x0, x0 + kp0, x1 - kp1, x1) diff --git a/R/lode-guidance-functions.r b/R/lode-guidance-functions.r index 6dc8c9a5..9d0b8f83 100644 --- a/R/lode-guidance-functions.r +++ b/R/lode-guidance-functions.r @@ -1,8 +1,13 @@ #' Lode guidance functions -#' -#' Each function orders the numbers 1 through \code{n}, starting at index -#' \code{i}. The choice of function made in \code{\link{stat_alluvium}} -#' determines the order in which the other axes contribute to the sorting of +#' +#' These functions control the order of lodes within strata in an alluvial +#' diagram. They are invoked by \code{\link{stat_alluvium}} and can be passed to +#' the \code{lode.guidance} parameter. +#' + +#' Each function orders the numbers 1 through \code{n}, starting at index +#' \code{i}. The choice of function made in \code{\link{stat_alluvium}} +#' determines the order in which the other axes contribute to the sorting of #' lodes within each index axis. After starting at \code{i}, the functions order #' the remaining axes as follows: #' \itemize{ @@ -22,20 +27,20 @@ NULL #' @rdname lode-guidance-functions #' @export lode_zigzag <- function(n, i) { - + # radii r1 <- i - 1 r2 <- n - i r <- min(r1, r2) - + # attempt cohesion in the direction of the closer end leftward <- (i <= n / 2) - + # setup sgn <- if(r1 == r2) 0 else (r2 - r1) / abs(r2 - r1) rem <- (i + sgn * (r + 1)):((n+1)/2 + sgn * (n-1)/2) zz <- (1 - 2 * leftward) * c(1, -1) - + # order c(i, if(r == 0) c() else sapply(1:r, function(j) i + j * zz), diff --git a/R/self-adjoin.r b/R/self-adjoin.r new file mode 100644 index 00000000..d45ac485 --- /dev/null +++ b/R/self-adjoin.r @@ -0,0 +1,68 @@ +#' Adjoin a dataset to itself +#' +#' This function binds a dataset to itself along adjacent pairs of a \code{key} +#' variable. It is invoked by \code{\link{geom_flow}} to convert data in lodes +#' form to something similar to alluvia form. +#' + +#' \code{self_adjoin} invokes \code{\link[dplyr]{join}} functions in order to +#' convert a dataset with measures along a discrete \code{key} variable into a +#' dataset consisting of column bindings of these measures (by any \code{by} +#' variables) along adjacent values of \code{key}. +#' @name self-adjoin +#' @import tidyselect +#' @family alluvial data manipulation +#' @param data A data frame in lodes form (repeated measures data; see +#' \code{\link{alluvial-data}}). +#' @param key Column of \code{data} indicating sequential collection; handled as +#' in \code{\link[tidyr]{spread}}. +#' @param by Character vector of variables to self-adjoin by; passed to +#' \code{\link[dplyr]{join}} functions. +#' @param link Character vector of variables to adjoin. Will be replaced by +#' pairs of variables suffixed by \code{suffix}. +#' @param keep.x,keep.y Character vector of variables to associate with the +#' first (respectively, second) copy of \code{data} after adjoining. These +#' variables can overlap with each other but cannot overlap with \code{by} or +#' \code{link}. +#' @param suffix Suffixes to add to the adjoined \code{link} variables; passed +#' to \code{\link[dplyr]{join}} functions. +#' @example inst/examples/ex-self-adjoin.r +#' @export +self_adjoin <- function( + data, key, by = NULL, + link = NULL, + keep.x = NULL, keep.y = NULL, + suffix = c(".x", ".y") +) { + # ensure that `key` is coercible to numeric + key_var <- tidyselect::vars_pull(names(data), !!rlang::enquo(key)) + key_num <- data[[key_var]] + if (is.character(key_num)) key_num <- as.factor(key_num) + key_num <- as.numeric(key_num) + + # self-(inner )join `link` variables by `key` and `by` + adj <- dplyr::inner_join( + transform(data, pair = key_num)[, c("pair", by, link)], + transform(data, pair = key_num - 1)[, c("pair", by, link)], + by = c("pair", by), + suffix = suffix + ) + + # bind `keep.*` variables + if (!is.null(keep.x)) { + adj <- dplyr::left_join( + adj, + transform(data, pair = key_num)[, c("pair", by, keep.x)], + by = c("pair", by) + ) + } + if (!is.null(keep.y)) { + adj <- dplyr::left_join( + adj, + transform(data, pair = key_num - 1)[, c("pair", by, keep.y)], + by = c("pair", by) + ) + } + + adj +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 7ef5d9e5..f5317747 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -3,6 +3,7 @@ reference: desc: Illustrate, test for, and convert between alluvial formats contents: - '`alluvial-data`' + - '`self-adjoin`' - '`vaccinations`' - '`majors`' - title: Plot layers diff --git a/inst/examples/ex-self-adjoin.r b/inst/examples/ex-self-adjoin.r new file mode 100644 index 00000000..9f9b1f58 --- /dev/null +++ b/inst/examples/ex-self-adjoin.r @@ -0,0 +1,17 @@ +# self-adjoin `majors` data +data(majors) +major_changes <- self_adjoin(majors, key = semester, + by = "student", link = c("semester", "curriculum")) +major_changes$change <- major_changes$curriculum.x == major_changes$curriculum.y +head(major_changes) + +# self-adjoin `vaccinations` data +data(vaccinations) +vaccination_steps <- self_adjoin(vaccinations, key = survey, by = "subject", + link = c("survey", "response"), + keep.x = c("freq", "a")) +head(vaccination_steps) +vaccination_steps <- self_adjoin(vaccinations, key = survey, by = "subject", + link = c("survey", "response"), + keep.x = c("freq", "a"), keep.y = "a") +head(vaccination_steps) diff --git a/man/alluvial-data.Rd b/man/alluvial-data.Rd index be24a97a..56cc9de8 100644 --- a/man/alluvial-data.Rd +++ b/man/alluvial-data.Rd @@ -22,15 +22,15 @@ to_alluvia_form(data, key, value, id, distill = FALSE) \arguments{ \item{data}{A data frame.} -\item{key, value, id}{In \code{to_lodes_form}, handled as in -\code{\link[tidyr]{gather}} and used to name the new axis (key), stratum -(value), and alluvium (identifying) variables. In \code{to_alluvia_form}, -handled as in \code{\link[tidyr]{spread}} and used to identify the fields -of \code{data} to be used as the axis (key), stratum (value), and alluvium +\item{key, value, id}{In \code{to_lodes_form}, handled as in +\code{\link[tidyr]{gather}} and used to name the new axis (key), stratum +(value), and alluvium (identifying) variables. In \code{to_alluvia_form}, +handled as in \code{\link[tidyr]{spread}} and used to identify the fields +of \code{data} to be used as the axis (key), stratum (value), and alluvium (identifying) variables.} -\item{weight}{Optional field of \code{data}, handled using -\code{\link[rlang]{enquo}}, to be used as heights or depths of the alluvia +\item{weight}{Optional field of \code{data}, handled using +\code{\link[rlang]{enquo}}, to be used as heights or depths of the alluvia or lodes.} \item{logical}{Deprecated. Whether to return a logical value or a character @@ -39,31 +39,31 @@ string indicating the type of alluvial structure ("none", "lodes", or \item{silent}{Whether to print messages.} -\item{...}{Used in \code{is_alluvia_form} and \code{to_lodes_form} as in -\code{\link[dplyr]{select}} to determine axis variables, as an alternative +\item{...}{Used in \code{is_alluvia_form} and \code{to_lodes_form} as in +\code{\link[dplyr]{select}} to determine axis variables, as an alternative to \code{axes}. Ignored when \code{axes} is provided.} -\item{axes}{In \code{*_alluvia_form}, handled as in -\code{\link[dplyr]{select}} and used to identify the field(s) of +\item{axes}{In \code{*_alluvia_form}, handled as in +\code{\link[dplyr]{select}} and used to identify the field(s) of \code{data} to be used as axes.} -\item{diffuse}{Fields of \code{data}, handleded using -\code{\link[tidyselect]{vars_select}}, to merge into the reshapen data by -\code{id}. They must be a subset of the axis variables. Alternatively, a -logical value indicating whether to merge all (\code{TRUE}) or none +\item{diffuse}{Fields of \code{data}, handleded using +\code{\link[tidyselect]{vars_select}}, to merge into the reshapen data by +\code{id}. They must be a subset of the axis variables. Alternatively, a +logical value indicating whether to merge all (\code{TRUE}) or none (\code{FALSE}) of the axis variables.} -\item{discern}{Logical value indicating whether to suffix values of the -variables used as axes that appear at more than one variable in order to -distinguish their factor levels. This forces the levels of the combined +\item{discern}{Logical value indicating whether to suffix values of the +variables used as axes that appear at more than one variable in order to +distinguish their factor levels. This forces the levels of the combined factor variable \code{value} to be in the order of the axes.} \item{distill}{A logical value indicating whether to include variables, other -than those passed to \code{key} and \code{value}, that vary within values +than those passed to \code{key} and \code{value}, that vary within values of \code{id}. Alternatively, a function (or its name) to be used to distill -each such variable to a single value. In addition to existing functions, -\code{distill} accepts the character values \code{"first"} (used if -\code{distill} is \code{TRUE}), \code{"last"}, and \code{"most"} (which +each such variable to a single value. In addition to existing functions, +\code{distill} accepts the character values \code{"first"} (used if +\code{distill} is \code{TRUE}), \code{"last"}, and \code{"most"} (which returns the modal value).} } \description{ @@ -90,9 +90,9 @@ types of alluvial structure: \details{ \code{to_lodes_form} takes a data frame with several designated variables to be used as axes in an alluvial diagram, and reshapes the data frame so that -the axis variable names constitute a new factor variable and their values -comprise another. Other variables' values will be repeated, and a -row-grouping variable can be introduced. This function invokes +the axis variable names constitute a new factor variable and their values +comprise another. Other variables' values will be repeated, and a +row-grouping variable can be introduced. This function invokes \code{\link[tidyr]{gather}}. \code{to_alluvia_form} takes a data frame with axis and axis value variables @@ -164,3 +164,6 @@ ggplot(majors[majors$semester \%in\% paste0("CURR", c(1, 7, 13)), ], geom_stratum(width = 2/5, discern = TRUE) + geom_text(stat = "stratum", discern = TRUE) } +\seealso{ +Other alluvial data manipulation: \code{\link{self-adjoin}} +} diff --git a/man/lode-guidance-functions.Rd b/man/lode-guidance-functions.Rd index 9e45f606..6e51b51a 100644 --- a/man/lode-guidance-functions.Rd +++ b/man/lode-guidance-functions.Rd @@ -25,9 +25,14 @@ lode_leftright(n, i) \item{i}{Numeric, a positive integer at most \code{n}} } \description{ -Each function orders the numbers 1 through \code{n}, starting at index -\code{i}. The choice of function made in \code{\link{stat_alluvium}} -determines the order in which the other axes contribute to the sorting of +These functions control the order of lodes within strata in an alluvial +diagram. They are invoked by \code{\link{stat_alluvium}} and can be passed to +the \code{lode.guidance} parameter. +} +\details{ +Each function orders the numbers 1 through \code{n}, starting at index +\code{i}. The choice of function made in \code{\link{stat_alluvium}} +determines the order in which the other axes contribute to the sorting of lodes within each index axis. After starting at \code{i}, the functions order the remaining axes as follows: \itemize{ diff --git a/man/self-adjoin.Rd b/man/self-adjoin.Rd new file mode 100644 index 00000000..2961efcc --- /dev/null +++ b/man/self-adjoin.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/self-adjoin.r +\name{self-adjoin} +\alias{self-adjoin} +\alias{self_adjoin} +\title{Adjoin a dataset to itself} +\usage{ +self_adjoin(data, key, by = NULL, link = NULL, keep.x = NULL, + keep.y = NULL, suffix = c(".x", ".y")) +} +\arguments{ +\item{data}{A data frame in lodes form (repeated measures data; see +\code{\link{alluvial-data}}).} + +\item{key}{Column of \code{data} indicating sequential collection; handled as +in \code{\link[tidyr]{spread}}.} + +\item{by}{Character vector of variables to self-adjoin by; passed to +\code{\link[dplyr]{join}} functions.} + +\item{link}{Character vector of variables to adjoin. Will be replaced by +pairs of variables suffixed by \code{suffix}.} + +\item{keep.x, keep.y}{Character vector of variables to associate with the +first (respectively, second) copy of \code{data} after adjoining. These +variables can overlap with each other but cannot overlap with \code{by} or +\code{link}.} + +\item{suffix}{Suffixes to add to the adjoined \code{link} variables; passed +to \code{\link[dplyr]{join}} functions.} +} +\description{ +This function binds a dataset to itself along adjacent pairs of a \code{key} +variable. It is invoked by \code{\link{geom_flow}} to convert data in lodes +form to something similar to alluvia form. +} +\details{ +\code{self_adjoin} invokes \code{\link[dplyr]{join}} functions in order to +convert a dataset with measures along a discrete \code{key} variable into a +dataset consisting of column bindings of these measures (by any \code{by} +variables) along adjacent values of \code{key}. +} +\examples{ +# self-adjoin `majors` data +data(majors) +major_changes <- self_adjoin(majors, key = semester, + by = "student", link = c("semester", "curriculum")) +major_changes$change <- major_changes$curriculum.x == major_changes$curriculum.y +head(major_changes) + +# self-adjoin `vaccinations` data +data(vaccinations) +vaccination_steps <- self_adjoin(vaccinations, key = survey, by = "subject", + link = c("survey", "response"), + keep.x = c("freq", "a")) +head(vaccination_steps) +vaccination_steps <- self_adjoin(vaccinations, key = survey, by = "subject", + link = c("survey", "response"), + keep.x = c("freq", "a"), keep.y = "a") +head(vaccination_steps) +} +\seealso{ +Other alluvial data manipulation: \code{\link{alluvial-data}} +}