Add custom functions for DEA calculations (#82)

Add custom functions for DEA calculations Add custom functions for efficiency, super efficiency, slack and peer calculations. This removes Benchmarking as a hard dependency. The commit implies some minor changes in the slack table shown in the Analyse tab, but should otherwise not effect the UI. Includes: * New exported function compute_dea() * New internal function compute_efficiency() * New internal function compute_super_efficiency() * New internal function compute_slack() * New internal function get_peers() * New test datasets and extensive unit tests for these functions * Bug fix for rounding error in the UI Closes #80 Closes #84 --------- Co-authored-by: Ove Haugland Jakobsen <ohj@mac.com>
Riksrevisjonen · Apr 15, 2024 · 8ea62d2 · 8ea62d2
1 parent 3bb56d8
commit 8ea62d2
Show file tree

Hide file tree

Showing 27 changed files with 2,118 additions and 99 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -11,3 +11,4 @@ dev
 ^CONTRIBUTING.md$
 ^DOCKERFILE$
 ^README.Rmd$
+^tests/script
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -3,44 +3,44 @@ Title: Perform productivity analyses in a web browser
 Version: 0.3.0.9000
 Authors@R: c(
     person("Ove Haugland", "Jakobsen", role = c("aut", "cre"), email = "ohj@mac.com"),
+    person("Aleksander", "Eilertsen", role = "aut", email = "ale@riksrevisjonen.no"),
     person("Jan Roar", "Beckstrøm", role = "ctb", email = "jrb@riksrevisjonen.no"),
     person("Lars Skaage", "Engebretsen", role = "ctb", email = "lse@riksrevisjonen.no"),
     person("Jonas", "Månsson", role = "ctb", email = "jonas.mansson@riksrevisjonen.no"),
-    person("Aleksander", "Eilertsen", role = "ctb", email = "ale@riksrevisjonen.no"),
     person("Joachim", "Sandnes", role = "ctb", email = "jsn@riksrevisjonen.no"),
     person(family = "Riksrevisjonen", role = "cph"))
-Description: What the package does (one paragraph).
+Description: Application to conduct productivity and efficiency (DEA) analysis. 
 License: GPL-3 | file LICENSE
 URL: https://riksrevisjonen.github.io/pioneeR/, https://github.com/Riksrevisjonen/pioneeR
 BugReports: https://github.com/Riksrevisjonen/pioneeR/issues
 Depends:
     R (>= 4.1.0),
     shiny (>= 1.7.0)
 Imports:
-    Benchmarking (>= 0.30),
+    lpSolveAPI (>= 5.5.2),
     haven (>= 2.5.0),
     markdown,
     productivity (>= 1.1.0),
     readxl,
     writexl,
     bslib (>= 0.5.1),
     ggplot2 (>= 3.3.0),
-    reactable (>= 0.4.0)
+    reactable (>= 0.4.0),
+    cli (>= 3.6.0)
 Suggests:
-    cli,
+    Benchmarking (>= 0.30),
     data.table,
-    lpSolve,
-    lpSolveAPI,
     rmarkdown,
     knitr,
     scales,
     rlang,
     testthat (>= 3.0.0),
-    tibble
+    tibble,
+    withr
 Language: en-GB
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 VignetteBuilder: knitr
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,6 +2,8 @@
 
 S3method(summary_tbl_dea,Farrell)
 S3method(summary_tbl_dea,numeric)
+S3method(summary_tbl_dea,pioneer_dea)
+export(compute_dea)
 export(compute_scale_efficiency)
 export(create_matrix)
 export(runPioneeR)

diff --git a/NEWS.md b/NEWS.md
@@ -1,11 +1,18 @@
 # pioneeR 0.3.0.9000
 
-- Fixed bug where checkbox for timeseries would be available before any data is uploaded (#74)
+## Changes & improvements
+
 - Updated documentation for `run_pioneer()`
+- Added custom functions for DEA calculations in order to remove `{Benchmarking}` as a dependency (#80)
+
+## Bug fixes
+
+- Fixed bug where checkbox for timeseries would be available before any data is uploaded (#74)
+- Fixed a few bugs where the UI did not correctly reflect the user supplied input for rounding (#84)
 
 # pioneeR 0.3.0
 
-## Breaking changes:
+## Breaking changes
 
 - pioneeR now depends on bslib 0.5.1 or higher. Version 0.6.0 or higher is recommended, and support for version 0.5.1 will be deprecated in the next release
 

diff --git a/R/compute_dea.R b/R/compute_dea.R
@@ -0,0 +1,91 @@
+#' Compute DEA
+#'
+#' DEA analysis.
+#'
+#' @param data Dataset to analyse.
+#' @param id A string with the DMU id or name variable.
+#' @param input A character vector with input variables.
+#' @param output A character vector with output variables.
+#' @param rts Returns to scale.
+#' \tabular{rlll}{
+#'    \tab crs \tab\tab Constant returns to scale, convexity and free disposability. \cr
+#'    \tab vrs \tab\tab Variable returns to scale, convexity and free disposability. \cr
+#'    \tab drs \tab\tab Decreasing returns to scale, convexity, down-scaling and free disposability. \cr
+#'    \tab irs \tab\tab Increasing returns to scale, (up-scaling, but not down-scaling), convexity and free disposability.
+#' }
+#' @param orientation Model orientation.
+#' @param super If `TRUE` super efficiency scores are calculated.
+#' @param slack If `TRUE` slack values are calculated.
+#' @param peers If `TRUE` peers are added to the response.
+#' @return list
+#' @export
+compute_dea <- function(
+    data,
+    id,
+    input,
+    output,
+    rts = c('crs', 'vrs', 'drs', 'irs'),
+    orientation = c('in', 'out'),
+    super = FALSE,
+    slack = FALSE,
+    peers = FALSE) {
+
+  rts <- match.arg(rts)
+  orientation <- match.arg(orientation)
+
+  # Prepare and validate input
+  x <- as.matrix(data[input])
+  y <- as.matrix(data[output])
+  ids <- data[[id]]
+  check_data(x, y)
+
+  # Calculate DEA metrics
+  res <- compute_dea_(
+    x, y, ids,
+    rts = rts,
+    orientation = orientation,
+    super = super,
+    slack = slack,
+    peers = peers
+  )
+
+  res
+
+}
+
+#' compute_dea_ (internal helper)
+#' @inheritParams compute_dea
+#' @inheritParams compute_efficiency
+#' @return list
+#' @noRd
+compute_dea_ <- function(x, y, ids, rts, orientation, super, slack, peers) {
+
+  # Set initial values
+  super_res <- NULL
+  slack_res <- NULL
+  peers_res <- NULL
+
+  eff_res <- compute_efficiency(x, y, rts = rts, orientation = orientation)
+  if (super) super_res <- compute_super_efficiency(x, y, rts = rts, orientation = orientation)
+  if (slack) slack_res <- compute_slack(x, y, eff_res)
+  if (peers) {
+    peers_res <- get_peers(eff_res, ids)
+    peers_res <- peers_res[2:ncol(peers_res)]
+  }
+
+  res <- list(
+    efficiency = eff_res$values,
+    super_efficiency = super_res$values,
+    slack = slack_res$values
+  )
+  res <- do.call('cbind', res)
+  if (peers) res <- cbind(res, peers_res)
+
+  out <- structure(
+    list(
+      results = data.frame(dmu = ids, res),
+      info = eff_res$info
+    ), class = 'pioneer_dea'
+  )
+
+}
diff --git a/R/dea-methods.R b/R/dea-methods.R
@@ -0,0 +1,182 @@
+#' Compute efficiency
+#'
+#' Compute DEA efficiency scores.
+#'
+#' @inheritParams compute_dea
+#' @param x A matrix with input values.
+#' @param y A matrix with output values.
+#' @param xref A matrix with input values for the reference technology. Defaults to `NULL`.
+#' @param yref A matrix with output values for the reference technology. Defaults to `NULL`.
+#' @param values_only Only return the calculated values.
+#' @return list
+#' @noRd
+compute_efficiency <- function(
+    x, y,
+    xref = NULL,
+    yref = NULL,
+    rts,
+    orientation,
+    values_only = FALSE) {
+  # Get dims
+  dims <- get_dims(x, y, xref, yref, rts = rts, slack = FALSE)
+  # Create linear program model
+  lp <- create_lp(
+    x = x, y = y,
+    xref = xref, yref = yref,
+    rts = rts,
+    orientation = orientation,
+    m = dims$n_inputs,
+    n = dims$n_outputs,
+    n_units = dims$n_units,
+    n_constraints = dims$n_constraints,
+    n_vars = dims$n_vars,
+    slack = FALSE)
+  # Solve model
+  res <- solve_lp(
+    lp, x, y,
+    orientation = orientation,
+    m = dims$n_inputs,
+    n = dims$n_outputs)
+  # Adjust values
+  res <- adjust_values(res$values, res$lambda, res$eps)
+  # Return
+  res <- create_dea_output(
+    res,
+    rts = rts,
+    orientation = orientation,
+    dims = dims,
+    values_only = values_only)
+  res
+}
+
+#' Compute super efficiency
+#'
+#' Compute DEA super efficiency scores.
+#'
+#' @inheritParams compute_efficiency
+#' @noRd
+compute_super_efficiency <- function(
+    x, y,
+    rts,
+    orientation) {
+  dims <- get_dims(x, y, rts = rts, slack = FALSE)
+  lambda <- matrix(0, dims$n_units, dims$n_units)
+  values <- rep(NA_real_, dims$n_units)
+  for (i in seq_len(dims$n_units)) {
+    # Create linear program model
+    lp <- create_lp(
+      x[-i,,drop=FALSE],
+      y[-i,,drop=FALSE],
+      rts = rts,
+      orientation = orientation,
+      m = dims$n_inputs,
+      n = dims$n_outputs,
+      n_units = dims$n_units - 1,
+      n_constraints = dims$n_constraints,
+      n_vars = dims$n_vars - 1,
+      slack = FALSE)
+    # Solve model
+    tmp <- solve_lp_single(
+      x[i,,drop=FALSE],
+      y[i,,drop=FALSE],
+      lp = lp,
+      orientation = orientation,
+      m = dims$n_inputs,
+      n = dims$n_outputs)[[1]]
+    values[i] <- tmp$value
+    lambda[i,-i] <- tmp$solution
+  }
+  eps <- lpSolveAPI::lp.control(lp)$epsilon['epsint']
+  res <- adjust_values(values, lambda, eps)
+  # Return
+  res <- create_dea_output(
+    res,
+    rts = rts,
+    orientation = orientation,
+    dims = dims,
+    values_only = FALSE)
+  res
+}
+
+#' Compute slack
+#'
+#' Compute slack for a DEA efficiency analysis.
+#'
+#' @inheritParams compute_efficiency
+#' @param model Output of `compute_efficiency()`.
+#' @return list
+#' @noRd
+compute_slack <- function(x, y, model) {
+  # Get dims
+  dims <- get_dims(x, y, rts = model$info$rts, slack = TRUE)
+  # Scale inputs and outputs if needed
+  scale <- scale_vars(
+    x, y,
+    m = dims$n_inputs,
+    n = dims$n_outputs,
+    n_units = dims$n_units)
+  # Create linear program model
+  lp <- create_lp(
+    scale$x, scale$y,
+    rts = model$info$rts,
+    orientation = model$info$orientation,
+    m = dims$n_inputs,
+    n = dims$n_outputs,
+    n_units = dims$n_units,
+    n_constraints = dims$n_constraints,
+    n_vars = dims$n_vars,
+    n_lambda = dims$n_lambda,
+    slack = TRUE)
+  # Solve model
+  res <- solve_lp_slack(
+    lp,
+    scale$x, scale$y,
+    values = model$unadj_values, # slack is calculated based on unadjusted eff.scores
+    orientation = model$info$orientation,
+    m = dims$n_inputs,
+    n = dims$n_outputs,
+    n_units = dims$n_units,
+    n_vars = dims$n_vars)
+  # Adjust values
+  res <- adjust_slack(
+    lp,
+    values = res$values,
+    sx = res$sx,
+    sy = res$sy,
+    lambda = res$lambda,
+    scaling = scale,
+    m = dims$n_inputs,
+    n = dims$n_outputs,
+    n_units = dims$n_units)
+  # Return
+  res <- create_dea_output(
+    res,
+    rts = model$info$rts,
+    orientation = model$info$orientation,
+    dims = dims,
+    values_only = FALSE)
+  res
+}
+
+#' Get peers
+#'
+#' Get peers for each DMU.
+#'
+#' @param model Output of `compute_efficiency()`.
+#' @param ids A vector with DMU names or ids.
+#' @param threshold Minimum weight for extracted peers. Defaults to 0.
+#' @return data.frame
+#' @noRd
+get_peers <- function(model, ids, threshold = 0) {
+  lambda <- model$lambda
+  pt_ <- apply(lambda, 1, function(x) {which(x > threshold)})
+  if (dim(lambda)[1] == 1) pt_ <- list(c(pt_))  # Only 1 DMU
+  bench <- t(mapply(function(x) x[1:max(sapply(pt_, length))], pt_))
+  maxp <- max(sapply(pt_, length))
+  if (maxp == 0 | is.na(maxp)) maxp <- 1
+  dim(bench) <- c(dim(lambda)[1], maxp)
+  bench <- matrix(ids[bench], nrow=nrow(bench))
+  colnames(bench) <- paste0('peer', 1:ncol(bench))
+  bench <- data.frame(cbind(dmu = ids, bench))
+  bench
+}