From c7e5c063496c7fbe59357391959e1ae3d817bc59 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 28 Jul 2021 21:19:54 -0400 Subject: [PATCH 01/27] Note warning --- README.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.Rmd b/README.Rmd index 24a1f13..611aa90 100644 --- a/README.Rmd +++ b/README.Rmd @@ -65,9 +65,9 @@ Because [there are many Dataverse installations](https://dataverse.org/), all fu Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") ``` -3. Hard-code a default server in your own environment. Direct your `.Renviron` file directly or open it by `usethis::edit_r_environ()`. Then enter `DATAVERSE_SERVER = "dataverse.harvard.edu"`. +3. Hard-code a default server in your own environment. Direct your `.Renviron` file directly or open it by `usethis::edit_r_environ()`. Then enter `DATAVERSE_SERVER = "dataverse.harvard.edu"`. Note that doing this may make your scripts not replicable to other people who do not have access to the environment. -In all cases, values should be the Dataverse server, without the "https" prefix or the "/api" URL path, etc. +In all cases, values should be the Dataverse server, without the "https" prefix or the "/api" URL path, etc. ### Data Download From 01f5f02fc9b0a792c2e3daefe27eaca9e0372629 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 28 Jul 2021 21:20:13 -0400 Subject: [PATCH 02/27] Descriptive --- README.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.Rmd b/README.Rmd index 611aa90..5423162 100644 --- a/README.Rmd +++ b/README.Rmd @@ -40,7 +40,7 @@ remotes::install_github("iqss/dataverse-client-r") library("dataverse") ``` -#### Keys +#### API Access Keys Many features of the Dataverse API are public and require no authentication. This means in many cases you can search for and retrieve data without a Dataverse account or API key -- you wil not need to worry about this. From b80e5b74794eaf54e0b5bdc0c96d3befa7b594e3 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Sat, 1 Jan 2022 22:21:29 +0900 Subject: [PATCH 03/27] Sentences --- README.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.Rmd b/README.Rmd index 71c7db4..23fa2c5 100644 --- a/README.Rmd +++ b/README.Rmd @@ -65,9 +65,9 @@ Because [there are many Dataverse installations](https://dataverse.org/), all fu Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") ``` -3. Hard-code a default server in your own environment. Direct your `.Renviron` file directly or open it by `usethis::edit_r_environ()`. Then enter `DATAVERSE_SERVER = "dataverse.harvard.edu"`. Note that doing this may make your scripts not replicable to other people who do not have access to the environment. +3. Hard-code a default server in your own environment. Direct your `.Renviron` file directly or open it by `usethis::edit_r_environ()`. Then enter `DATAVERSE_SERVER = "dataverse.harvard.edu"`. However, doing this may make your scripts not replicable to other people who do not have access to the environment. -In all cases, values should be the Dataverse server, without the "https" prefix or the "/api" URL path, etc. +In all cases, values should be the Dataverse server, without the "https" prefix or the "/api" URL path. ### Data Download From 7a05564b54ddb1bd35fb8d768976a95643d2c5ac Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 5 Jan 2022 22:18:53 -0800 Subject: [PATCH 04/27] Prevent WIP R files getting built --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index 17082bc..a3b9f6e 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -23,3 +23,4 @@ man-roxygen/* ^CRAN-RELEASE$ ^\.github$ rhub-checks +/*.R From 316d67733d1329fb3323a8002b2294e4f9832585 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Thu, 6 Jan 2022 15:29:38 -0800 Subject: [PATCH 05/27] Use different name for output vs. function --- R/get_file_by_id.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R index ba48827..081c848 100644 --- a/R/get_file_by_id.R +++ b/R/get_file_by_id.R @@ -44,10 +44,10 @@ get_file_by_id <- function( } # ping get_file_metadata to see if file is ingested - is_ingested <- is_ingested(fileid, server = server) + ingested <- is_ingested(fileid, server = server) # update archival if not specified - if (isFALSE(is_ingested)) + if (isFALSE(ingested)) original <- NA # create query ----- @@ -58,11 +58,11 @@ get_file_by_id <- function( # format only matters in ingested datasets, # For non-ingested files (rds/docx), we need to NOT specify a format # also for bundle, only change url - if (is_ingested & format != "bundle") + if (ingested & format != "bundle") query$format <- match.arg(format) # if the original is not desired, we need to NOT specify a format - if (is_ingested & (isFALSE(original) || is.na(original) || is.null(original))) + if (ingested & (isFALSE(original) || is.na(original) || is.null(original))) query$format <- NULL # part of URL depending on DOI, bundle, or file From 2b5d9769560816a2ebdeb452f192d9b80e121d01 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Thu, 6 Jan 2022 17:37:56 -0800 Subject: [PATCH 06/27] syntax - no commas --- tests/testthat/tests-get_dataframe-dataframe-basketball.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/tests-get_dataframe-dataframe-basketball.R b/tests/testthat/tests-get_dataframe-dataframe-basketball.R index aa5dcd0..52e6c86 100644 --- a/tests/testthat/tests-get_dataframe-dataframe-basketball.R +++ b/tests/testthat/tests-get_dataframe-dataframe-basketball.R @@ -24,7 +24,7 @@ test_that("roster-by-doi", { actual <- get_dataframe_by_doi( - filedoi = expected_ds$roster$dataFile$persistentId, # A value like "doi:10.70122/FK2/HXJVJU/SA3Z2V", + filedoi = expected_ds$roster$dataFile$persistentId # A value like "doi:10.70122/FK2/HXJVJU/SA3Z2V", ) expect_equal(actual, expected_file) @@ -38,7 +38,7 @@ test_that("roster-by-id", { actual <- get_dataframe_by_id( - fileid = expected_ds$roster$dataFile$id, # A value like 1734005 + fileid = expected_ds$roster$dataFile$id # A value like 1734005 ) expect_equal(actual, expected_file) From f13246a362121d80062c1e74b7817730db06dac4 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Thu, 6 Jan 2022 17:39:00 -0800 Subject: [PATCH 07/27] Attempt at fixing #80 minimally. Will cause some unexpected reading issues in testthat/tests-get_dataframe-dataframe-basketball.R --- R/get_dataframe.R | 4 ---- R/get_file_by_id.R | 15 ++++++++------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/R/get_dataframe.R b/R/get_dataframe.R index 0316d21..c3576d7 100644 --- a/R/get_dataframe.R +++ b/R/get_dataframe.R @@ -132,10 +132,6 @@ get_dataframe_by_id <- function( # if not ingested, then whether to take the original is not relevant. ingested <- is_ingested(fileid, ...) - if (isFALSE(ingested)) { - original <- NA - } - if (is.null(.f) & isTRUE(ingested) & isFALSE(original)) { message("Downloading ingested version of data with readr::read_tsv. To download the original version and remove this message, set original = TRUE.\n") .f <- readr::read_tsv diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R index 081c848..be87ebd 100644 --- a/R/get_file_by_id.R +++ b/R/get_file_by_id.R @@ -46,10 +46,6 @@ get_file_by_id <- function( # ping get_file_metadata to see if file is ingested ingested <- is_ingested(fileid, server = server) - # update archival if not specified - if (isFALSE(ingested)) - original <- NA - # create query ----- query <- list() if (!is.null(vars)) @@ -57,14 +53,19 @@ get_file_by_id <- function( # format only matters in ingested datasets, # For non-ingested files (rds/docx), we need to NOT specify a format - # also for bundle, only change url - if (ingested & format != "bundle") - query$format <- match.arg(format) # if the original is not desired, we need to NOT specify a format if (ingested & (isFALSE(original) || is.na(original) || is.null(original))) query$format <- NULL + # also for bundle, only change url + if (ingested & format != "bundle") + query$format <- match.arg(format) + + # for when ingest fails (https://github.com/IQSS/dataverse-client-r/issues/113) + if (isTRUE(original) & format == "original") + query$format <- format + # part of URL depending on DOI, bundle, or file if (use_persistent_id) { u_part <- "access/datafile/:persistentId?persistentId=" From 118d327a1b0f96beb348fd227515b4b697352af8 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Thu, 6 Jan 2022 17:42:17 -0800 Subject: [PATCH 08/27] Proper regex --- .Rbuildignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.Rbuildignore b/.Rbuildignore index a3b9f6e..ebaa95f 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -23,4 +23,4 @@ man-roxygen/* ^CRAN-RELEASE$ ^\.github$ rhub-checks -/*.R +/.*\.R$ From 83d1b6cf0d0c6b73e7d7b560103ac6296740d48c Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Fri, 7 Jan 2022 10:26:43 -0800 Subject: [PATCH 09/27] Tweak placement of code chunks --- vignettes/C-download.Rmd | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/vignettes/C-download.Rmd b/vignettes/C-download.Rmd index c8a26e0..637be8a 100644 --- a/vignettes/C-download.Rmd +++ b/vignettes/C-download.Rmd @@ -19,6 +19,13 @@ This vignette shows how to download data from Dataverse using the dataverse pack The Dataverse entry for this study is persistently retrievable by a "Digital Object Identifier (DOI)": https://doi.org/10.7910/DVN/ARKOTI and the citation on the Dataverse Page includes a "[Universal Numeric Fingerprint (UNF)](https://guides.dataverse.org/en/latest/developers/unf/index.html)": `UNF:6:+itU9hcUJ8I9E0Kqv8HWHg==`, which provides a versioned, multi-file hash for the entire study, which contains 32 files. +The following examples will draw from the Harvard Dataverse, so it is convenient to set this as a default environment variable. + +```{r} +Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") +``` + + ## Retrieving Metadata @@ -27,7 +34,6 @@ We will download these files and examine them directly in R using the **datavers ```{r} library("dataverse") library("tibble") # to see dataframes in tidyverse-form -Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") ``` The `get_dataset()` function lists all of the files in the dataset along with a considerable amount of metadata about each. (Recall that in Dataverse, `dataset` is a collection of files, not a single file.) We can see a quick glance at these files using: @@ -78,7 +84,8 @@ head(energy) ## Retrieving Custom Data Fromats (RDS, Stata, SPSS) -If a file is displayed on dataverse as a `.tab` file like the survey data by [Alvarez et al. (2013)](https://doi.org/10.7910/DVN/ARKOTI/A8YRMP), it is likely that Dataverse [ingested](https://guides.dataverse.org/en/latest/user/tabulardataingest/index.html) the file to a plain-text, tab-delimited format. +If a file is displayed on dataverse as a `.tab` file like the survey data by [Alvarez et al. (2013)](https://doi.org/10.7910/DVN/ARKOTI/A8YRMP), it is likely that Dataverse [inge +sted](https://guides.dataverse.org/en/latest/user/tabulardataingest/index.html) the file to a plain-text, tab-delimited format. ```{r, message=FALSE} argentina_tab <- get_dataframe_by_name( From fea854ea3d89a59a89aae9b6df308c5335bcd9b6 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Fri, 7 Jan 2022 10:32:52 -0800 Subject: [PATCH 10/27] Correct statement about current default --- R/get_dataframe.R | 5 +++-- man/get_dataframe.Rd | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/R/get_dataframe.R b/R/get_dataframe.R index c3576d7..10961dc 100644 --- a/R/get_dataframe.R +++ b/R/get_dataframe.R @@ -19,8 +19,9 @@ #' file, then `.f` should be `readRDS` or `readr::read_rds`. It can be a custom #' function defined by the user. See examples for details. #' -#' @param original A logical, defaulting to `TRUE`. Whether to read the ingested, -#' archival version of the datafile if one exists. The archival versions are tab-delimited +#' @param original A logical, whether to read the ingested, +#' archival version of the datafile if one exists. If `TRUE`, users should supply +#' a function to use to read in the original. The archival versions are tab-delimited #' `.tab` files so if `original = FALSE`, `.f` is set to `readr::read_tsv`. #' #' @inheritDotParams get_file diff --git a/man/get_dataframe.Rd b/man/get_dataframe.Rd index cbcd3c5..b9ce9ff 100644 --- a/man/get_dataframe.Rd +++ b/man/get_dataframe.Rd @@ -31,8 +31,9 @@ must choose the appropriate function: for example if the target is a .rds file, then \code{.f} should be \code{readRDS} or \code{readr::read_rds}. It can be a custom function defined by the user. See examples for details.} -\item{original}{A logical, defaulting to \code{TRUE}. Whether to read the ingested, -archival version of the datafile if one exists. The archival versions are tab-delimited +\item{original}{A logical, whether to read the ingested, +archival version of the datafile if one exists. If \code{TRUE}, users should supply +a function to use to read in the original. The archival versions are tab-delimited \code{.tab} files so if \code{original = FALSE}, \code{.f} is set to \code{readr::read_tsv}.} \item{...}{ From 863dddfce9a5ddc2caa30781e4cf6296e1545ee0 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Fri, 7 Jan 2022 10:33:23 -0800 Subject: [PATCH 11/27] Previous version somehow ignoring functions --- .Rbuildignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.Rbuildignore b/.Rbuildignore index ebaa95f..273e558 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -23,4 +23,4 @@ man-roxygen/* ^CRAN-RELEASE$ ^\.github$ rhub-checks -/.*\.R$ +/Untitled.+\.R$ From 98fd69dd7910387ff7de3898fb3a38be2e697ffe Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Fri, 7 Jan 2022 23:38:11 -0800 Subject: [PATCH 12/27] Revert f13246a362121d80062c1e74b7817730db06dac4 --- R/get_dataframe.R | 5 +++++ R/get_file_by_id.R | 11 ++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/R/get_dataframe.R b/R/get_dataframe.R index 10961dc..955c265 100644 --- a/R/get_dataframe.R +++ b/R/get_dataframe.R @@ -133,6 +133,11 @@ get_dataframe_by_id <- function( # if not ingested, then whether to take the original is not relevant. ingested <- is_ingested(fileid, ...) + + if (isFALSE(ingested)) { + original <- NA + } + if (is.null(.f) & isTRUE(ingested) & isFALSE(original)) { message("Downloading ingested version of data with readr::read_tsv. To download the original version and remove this message, set original = TRUE.\n") .f <- readr::read_tsv diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R index be87ebd..36dfdc6 100644 --- a/R/get_file_by_id.R +++ b/R/get_file_by_id.R @@ -46,6 +46,11 @@ get_file_by_id <- function( # ping get_file_metadata to see if file is ingested ingested <- is_ingested(fileid, server = server) + # update archival if not specified + if (isFALSE(ingested)) + original <- NA + + # create query ----- query <- list() if (!is.null(vars)) @@ -62,9 +67,9 @@ get_file_by_id <- function( if (ingested & format != "bundle") query$format <- match.arg(format) - # for when ingest fails (https://github.com/IQSS/dataverse-client-r/issues/113) - if (isTRUE(original) & format == "original") - query$format <- format + # # for when ingest fails (https://github.com/IQSS/dataverse-client-r/issues/113) + # if (isTRUE(original) & format == "original") + # query$format <- format # part of URL depending on DOI, bundle, or file if (use_persistent_id) { From 5ffbc110f12c77f5b7aff04f3aaaca5761e27643 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Fri, 7 Jan 2022 23:43:13 -0800 Subject: [PATCH 13/27] First attempt for #113 (though see https://github.com/IQSS/dataverse-client-r/issues/113#issuecomment-1007523529), retry for #80 --- R/utils.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/utils.R b/R/utils.R index cb0bdec..b412d97 100644 --- a/R/utils.R +++ b/R/utils.R @@ -106,13 +106,13 @@ is_ingested <- server = Sys.getenv("DATAVERSE_SERVER"), ... ) { - ping_metadata <- tryCatch( - { - get_file_metadata(fileid, key = key, server = server) - }, - error = function(e) e - ) - !inherits(ping_metadata, "error") # if error, not ingested + file_info <- suppressMessages(dataverse_search(fileid = fileid, server = server, key = key, ...)) + + if (nrow(file_info) > 1) + warning("More than 1 file found for `is_ingested`, search may be unreliable.") + + # if UNF (https://guides.dataverse.org/en/latest/developers/unf/index.html) is not null, it is ingested + (!is.null(file_info$unf[1]) & !is.na(file_info$unf[1])) } From e32d354a1302bc659100bb12a228b04e7f000481 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 12 Jan 2022 20:29:40 -0800 Subject: [PATCH 14/27] Implement suggestion to prepend with quotes or datafile_. https://github.com/IQSS/dataverse-client-r/issues/113#issuecomment-1011208445 --- R/utils.R | 246 ++++++++++++++++++++++++--------------------- man/is_ingested.Rd | 14 ++- 2 files changed, 141 insertions(+), 119 deletions(-) diff --git a/R/utils.R b/R/utils.R index b412d97..33e4ad8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,181 +1,199 @@ # dataverse_id method dataverse_id <- function(x, ...) { - UseMethod('dataverse_id', x) + UseMethod('dataverse_id', x) } dataverse_id.default <- function(x, ...) { - x + x } dataverse_id.character <- function(x, ...) { - get_dataverse(x, ..., check = FALSE)$id + get_dataverse(x, ..., check = FALSE)$id } dataverse_id.dataverse <- function(x, ...) { - x$id + x$id } # dataset_id method dataset_id <- function(x, ...) { - UseMethod('dataset_id', x) + UseMethod('dataset_id', x) } dataset_id.default <- function(x, ...) { - x + x } dataset_id.character <- function(x, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) { - x <- prepend_doi(x) - u <- paste0(api_url(server), "datasets/:persistentId?persistentId=", x) - r <- tryCatch(httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...), - error = function(e) { - stop("Could not retrieve Dataset ID from persistent identifier!") - }) - jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"))[["data"]][["id"]] + x <- prepend_doi(x) + u <- paste0(api_url(server), "datasets/:persistentId?persistentId=", x) + r <- tryCatch(httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...), + error = function(e) { + stop("Could not retrieve Dataset ID from persistent identifier!") + }) + jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"))[["data"]][["id"]] } dataset_id.dataverse_dataset <- function(x, ...) { - x$id + x$id } # get fileid from a dataset DOI or dataset ID get_fileid <- function(x, ...) { - UseMethod('get_fileid', x) + UseMethod('get_fileid', x) } get_fileid.numeric <- function(x, file, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) { - files <- dataset_files(x, key = key, server = server, ...) - ids <- unlist(lapply(files, function(x) x[["datafile"]][["id"]])) - if (is.numeric(file)) { - w <- which(ids %in% file) - if (!length(w)) { - stop("File not found") - } - id <- ids[w] - } else { - ns <- unlist(lapply(files, `[[`, "label")) - w <- which(ns %in% file) - if (!length(w)) { - stop("File not found") - } - id <- ids[w] + files <- dataset_files(x, key = key, server = server, ...) + ids <- unlist(lapply(files, function(x) x[["datafile"]][["id"]])) + if (is.numeric(file)) { + w <- which(ids %in% file) + if (!length(w)) { + stop("File not found") } - id + id <- ids[w] + } else { + ns <- unlist(lapply(files, `[[`, "label")) + w <- which(ns %in% file) + if (!length(w)) { + stop("File not found") + } + id <- ids[w] + } + id } get_fileid.character <- function(x, file, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) { - files <- dataset_files(prepend_doi(x), key = key, server = server, ...) - ids <- unlist(lapply(files, function(x) x[["dataFile"]][["id"]])) - if (is.numeric(file)) { - w <- which(ids %in% file) - if (!length(w)) { - stop("File not found") - } - id <- ids[w] - } else { - ns <- unlist(lapply(files, `[[`, "label")) - w <- which(ns %in% file) - if (!length(w)) { - stop("File not found") - } - id <- ids[w] + files <- dataset_files(prepend_doi(x), key = key, server = server, ...) + ids <- unlist(lapply(files, function(x) x[["dataFile"]][["id"]])) + if (is.numeric(file)) { + w <- which(ids %in% file) + if (!length(w)) { + stop("File not found") + } + id <- ids[w] + } else { + ns <- unlist(lapply(files, `[[`, "label")) + w <- which(ns %in% file) + if (!length(w)) { + stop("File not found") } - id + id <- ids[w] + } + id } get_fileid.dataverse_file <- function(x, ...) { - x[["dataFile"]][["id"]] + x[["dataFile"]][["id"]] } +# Ingested +is_ingested <- function(x, ...) { + UseMethod('is_ingested', x) +} + #' Identify if file is an ingested file #' -#' @param fileid A numeric fileid or file-specific DOI +#' @param x A numeric fileid or file-specific DOI #' @param ... Arguments passed on to `get_file` (no effect here) #' @template envvars #' #' @examples #' \dontrun{ -#' # https://demo.dataverse.org/file.xhtml?persistentId=doi:10.70122/FK2/X5MUPQ/T0KKUZ +#' # https://demo.dataverse.org/file.xhtml?persistentId=doi:10.70122/FK2/PPIAXE #' # nlsw88.tab -#' is_ingested(fileid = "doi:10.70122/FK2/X5MUPQ/T0KKUZ", +#' is_ingested(x = "doi:10.70122/FK2/PPIAXE/MHDB0O", +#' server = "demo.dataverse.org") +#' is_ingested(x = 1734017, #' server = "demo.dataverse.org") #' #' # nlsw88_rds-export.rds -#' is_ingested(fileid = "doi:10.70122/FK2/PPIAXE/SUCFNI", +#' is_ingested(x = "doi:10.70122/FK2/PPIAXE/SUCFNI", +#' server = "demo.dataverse.org") +#' is_ingested(x = 1734016, #' server = "demo.dataverse.org") #'} -is_ingested <- - function( - fileid, - key = Sys.getenv("DATAVERSE_KEY"), - server = Sys.getenv("DATAVERSE_SERVER"), - ... - ) { - file_info <- suppressMessages(dataverse_search(fileid = fileid, server = server, key = key, ...)) +is_ingested <- function( + x, + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER"), + ...) { + + is_number <- is.numeric(x) - if (nrow(file_info) > 1) - warning("More than 1 file found for `is_ingested`, search may be unreliable.") + if (is_number) { + x_query <- paste0("datafile_", x) + file_info <- suppressMessages(dataverse_search(id = x_query, type = "file", server = server, key = key, ...)) + } else { + # expect doi + x_query <- paste0("\"", x, "\"") + file_info <- suppressMessages(dataverse_search(filePersistentId = x_query, type = "file", server = server, key = key, ...)) + } - # if UNF (https://guides.dataverse.org/en/latest/developers/unf/index.html) is not null, it is ingested - (!is.null(file_info$unf[1]) & !is.na(file_info$unf[1])) + if (nrow(file_info) > 1) + warning("More than 1 file found for `is_ingested`, search may be unreliable.") + + # if UNF (https://guides.dataverse.org/en/latest/developers/unf/index.html) is not null, it is ingested + (!is.null(file_info$unf[1]) && !is.na(file_info$unf[1])) } + # other functions prepend_doi <- function(dataset) { - if (grepl("^hdl", dataset)) { - dataset <- dataset - } else if (grepl("^doi:", dataset)) { - dataset <- dataset - } else if (grepl("^DOI:", dataset)) { - dataset <- paste0("doi:", strsplit(dataset, "DOI:", fixed = TRUE)[[1]][2]) - } else if (!grepl("^doi:", dataset)) { - if (grepl("dx\\.doi\\.org", dataset) | grepl("^http", dataset)) { - dataset <- httr::parse_url(dataset)$path - } - dataset <- paste0("doi:", dataset) - } else { - dataset <- dataset + if (grepl("^hdl", dataset)) { + dataset <- dataset + } else if (grepl("^doi:", dataset)) { + dataset <- dataset + } else if (grepl("^DOI:", dataset)) { + dataset <- paste0("doi:", strsplit(dataset, "DOI:", fixed = TRUE)[[1]][2]) + } else if (!grepl("^doi:", dataset)) { + if (grepl("dx\\.doi\\.org", dataset) | grepl("^http", dataset)) { + dataset <- httr::parse_url(dataset)$path } - dataset + dataset <- paste0("doi:", dataset) + } else { + dataset <- dataset + } + dataset } api_url <- function(server = Sys.getenv("DATAVERSE_SERVER"), prefix = "api/") { - if (is.null(server) || server == "") { - stop("'server' is missing with no default set in DATAVERSE_SERVER environment variable.") - } - server_parsed <- httr::parse_url(server) - if (is.null(server_parsed[["hostname"]]) || server_parsed[["hostname"]] == "") { - server_parsed[["hostname"]] <- server - } - if (is.null(server_parsed[["port"]]) || server_parsed[["port"]] == "") { - domain <- server_parsed[["hostname"]] - } else { - domain <- paste0(server_parsed[["hostname"]], ":", server_parsed[["port"]]) - } - return(paste0("https://", domain, "/", prefix)) + if (is.null(server) || server == "") { + stop("'server' is missing with no default set in DATAVERSE_SERVER environment variable.") + } + server_parsed <- httr::parse_url(server) + if (is.null(server_parsed[["hostname"]]) || server_parsed[["hostname"]] == "") { + server_parsed[["hostname"]] <- server + } + if (is.null(server_parsed[["port"]]) || server_parsed[["port"]] == "") { + domain <- server_parsed[["hostname"]] + } else { + domain <- paste0(server_parsed[["hostname"]], ":", server_parsed[["port"]]) + } + return(paste0("https://", domain, "/", prefix)) } # parse dataset response into list/dataframe parse_dataset <- function(out) { - out <- jsonlite::fromJSON(out)$data - if ("latestVersion" %in% names(out)) { - class(out$latestVersion) <- "dataverse_dataset_version" - } - if ("metadataBlocks" %in% names(out) && "citation" %in% out$metadata) { - class(out$metadata$citation) <- "dataverse_dataset_citation" - } - # cleanup response - file_df <- try(out$files$dataFile, silent = TRUE) - if (inherits(file_df, "try-error") || is.null(file_df)) { - file_df <- try(out$files$datafile, silent = TRUE) - out$files$datafile <- NULL - } else { - out$files$dataFile <- NULL - } - - # remove duplicate column - if ("description" %in% colnames(file_df) & "description" %in% colnames(out$files)) { - out$files[["description"]] <- NULL - } - - out$files <- cbind(out$files, file_df) - structure(out, class = "dataverse_dataset") + out <- jsonlite::fromJSON(out)$data + if ("latestVersion" %in% names(out)) { + class(out$latestVersion) <- "dataverse_dataset_version" + } + if ("metadataBlocks" %in% names(out) && "citation" %in% out$metadata) { + class(out$metadata$citation) <- "dataverse_dataset_citation" + } + # cleanup response + file_df <- try(out$files$dataFile, silent = TRUE) + if (inherits(file_df, "try-error") || is.null(file_df)) { + file_df <- try(out$files$datafile, silent = TRUE) + out$files$datafile <- NULL + } else { + out$files$dataFile <- NULL + } + + # remove duplicate column + if ("description" %in% colnames(file_df) & "description" %in% colnames(out$files)) { + out$files[["description"]] <- NULL + } + + out$files <- cbind(out$files, file_df) + structure(out, class = "dataverse_dataset") } diff --git a/man/is_ingested.Rd b/man/is_ingested.Rd index 23b5699..7aff42b 100644 --- a/man/is_ingested.Rd +++ b/man/is_ingested.Rd @@ -5,14 +5,14 @@ \title{Identify if file is an ingested file} \usage{ is_ingested( - fileid, + x, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ... ) } \arguments{ -\item{fileid}{A numeric fileid or file-specific DOI} +\item{x}{A numeric fileid or file-specific DOI} \item{key}{A character string specifying a Dataverse server API key. If one is not specified, functions calling authenticated API endpoints will fail. @@ -34,13 +34,17 @@ Identify if file is an ingested file } \examples{ \dontrun{ -# https://demo.dataverse.org/file.xhtml?persistentId=doi:10.70122/FK2/X5MUPQ/T0KKUZ +# https://demo.dataverse.org/file.xhtml?persistentId=doi:10.70122/FK2/PPIAXE # nlsw88.tab -is_ingested(fileid = "doi:10.70122/FK2/X5MUPQ/T0KKUZ", +is_ingested(x = "doi:10.70122/FK2/PPIAXE/MHDB0O", + server = "demo.dataverse.org") +is_ingested(x = 1734017, server = "demo.dataverse.org") # nlsw88_rds-export.rds -is_ingested(fileid = "doi:10.70122/FK2/PPIAXE/SUCFNI", +is_ingested(x = "doi:10.70122/FK2/PPIAXE/SUCFNI", + server = "demo.dataverse.org") +is_ingested(x = 1734016, server = "demo.dataverse.org") } } From 340ef5d7173465132152c714cff2845dd550baf5 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 12 Jan 2022 20:40:13 -0800 Subject: [PATCH 15/27] Revert to old version, looking at master --- R/get_file_by_id.R | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R index 36dfdc6..0cb4bf9 100644 --- a/R/get_file_by_id.R +++ b/R/get_file_by_id.R @@ -50,26 +50,22 @@ get_file_by_id <- function( if (isFALSE(ingested)) original <- NA - # create query ----- query <- list() + + # variables if (!is.null(vars)) query$vars <- paste0(vars, collapse = ",") # format only matters in ingested datasets, - # For non-ingested files (rds/docx), we need to NOT specify a format - - # if the original is not desired, we need to NOT specify a format - if (ingested & (isFALSE(original) || is.na(original) || is.null(original))) - query$format <- NULL - + # For non-ingested files (e.g. rds/docx), we need to NOT specify a format # also for bundle, only change url if (ingested & format != "bundle") query$format <- match.arg(format) - # # for when ingest fails (https://github.com/IQSS/dataverse-client-r/issues/113) - # if (isTRUE(original) & format == "original") - # query$format <- format + # if the original is not desired, we need to NOT specify a format + if (ingested & (isFALSE(original) || is.na(original) || is.null(original))) + query$format <- NULL # part of URL depending on DOI, bundle, or file if (use_persistent_id) { From 2e95ba137733d8cd451d259416217515aff075fb Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 12 Jan 2022 20:44:24 -0800 Subject: [PATCH 16/27] Put error when search does not turn up anything in #113 --- R/utils.R | 3 +++ tests/testthat/tests-get_file.R | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/utils.R b/R/utils.R index 33e4ad8..b7d0e3e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -125,6 +125,9 @@ is_ingested <- function( file_info <- suppressMessages(dataverse_search(filePersistentId = x_query, type = "file", server = server, key = key, ...)) } + if (length(file_info) == 0) { + stop("File information not found on Dataverse API") + } if (nrow(file_info) > 1) warning("More than 1 file found for `is_ingested`, search may be unreliable.") diff --git a/tests/testthat/tests-get_file.R b/tests/testthat/tests-get_file.R index 1b58695..8bad37e 100644 --- a/tests/testthat/tests-get_file.R +++ b/tests/testthat/tests-get_file.R @@ -52,6 +52,5 @@ test_that("download multiple files with file id - with folders", { test_that("More informative error message when file does not exist", { testthat::skip_on_cran() # wrong server - expect_error(get_file(2972336, server = "demo.dataverse.org"), - regexp = "API endpoint does not exist on this server") + expect_error(get_file(2972336, server = "demo.dataverse.org"), regexp = "API") }) From 608cdc6923e1ba0ea4db02752019461fd2f9be11 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 12 Jan 2022 20:57:27 -0800 Subject: [PATCH 17/27] To avoid leakage from test --- vignettes/C-download.Rmd | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/vignettes/C-download.Rmd b/vignettes/C-download.Rmd index 637be8a..b97528d 100644 --- a/vignettes/C-download.Rmd +++ b/vignettes/C-download.Rmd @@ -19,12 +19,20 @@ This vignette shows how to download data from Dataverse using the dataverse pack The Dataverse entry for this study is persistently retrievable by a "Digital Object Identifier (DOI)": https://doi.org/10.7910/DVN/ARKOTI and the citation on the Dataverse Page includes a "[Universal Numeric Fingerprint (UNF)](https://guides.dataverse.org/en/latest/developers/unf/index.html)": `UNF:6:+itU9hcUJ8I9E0Kqv8HWHg==`, which provides a versioned, multi-file hash for the entire study, which contains 32 files. -The following examples will draw from the Harvard Dataverse, so it is convenient to set this as a default environment variable. +The following examples will draw from the Harvard Dataverse, so it is convenient to set this as a default environment variable. ```{r} Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") ``` +This is equivalent to setting `server = "dataverse.harvard.edu"` in every `dataverse` function each time. Note that if you set an environment variable like the above, that is necessary to make your code reproducible. + +For downloading a public dataset, no API Key is needed. + +```{r} +Sys.setenv("DATAVERSE_KEY" = "") +``` + ## Retrieving Metadata From 3d0726e57e9c38bad064d4dda2c880a7b2bfddc8 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 12 Jan 2022 21:06:15 -0800 Subject: [PATCH 18/27] Prevents other arguments from being interpreted as search terms --- R/utils.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/utils.R b/R/utils.R index b7d0e3e..82a9ca7 100644 --- a/R/utils.R +++ b/R/utils.R @@ -118,11 +118,11 @@ is_ingested <- function( if (is_number) { x_query <- paste0("datafile_", x) - file_info <- suppressMessages(dataverse_search(id = x_query, type = "file", server = server, key = key, ...)) + file_info <- suppressMessages(dataverse_search(id = x_query, type = "file", server = server, key = key)) } else { # expect doi x_query <- paste0("\"", x, "\"") - file_info <- suppressMessages(dataverse_search(filePersistentId = x_query, type = "file", server = server, key = key, ...)) + file_info <- suppressMessages(dataverse_search(filePersistentId = x_query, type = "file", server = server, key = key)) } if (length(file_info) == 0) { From f6c9148c55af93cb498d0c91bc8898b865d979e7 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 12 Jan 2022 21:08:48 -0800 Subject: [PATCH 19/27] In case key is needed --- R/get_file_by_id.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R index 0cb4bf9..dbee4b9 100644 --- a/R/get_file_by_id.R +++ b/R/get_file_by_id.R @@ -44,7 +44,7 @@ get_file_by_id <- function( } # ping get_file_metadata to see if file is ingested - ingested <- is_ingested(fileid, server = server) + ingested <- is_ingested(fileid, server = server, key = key) # update archival if not specified if (isFALSE(ingested)) From 5b1aa0d627964d369a97c78611cd78acd4c39e09 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 12 Jan 2022 21:17:49 -0800 Subject: [PATCH 20/27] Make progress conditional on size #108 --- R/get_file_by_id.R | 16 ++++++++++++++-- R/utils.R | 33 +++++++++++++++++++++++++++++++-- man/files.Rd | 6 ++++-- man/get_filesize.Rd | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 81 insertions(+), 6 deletions(-) create mode 100644 man/get_filesize.Rd diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R index dbee4b9..4bf7d14 100644 --- a/R/get_file_by_id.R +++ b/R/get_file_by_id.R @@ -5,7 +5,9 @@ #' no ingested version, is set to NA. Note in `get_dataframe_*`, #' `original` is set to FALSE by default. Either can be changed. #' @param fileid A numeric ID internally used for `get_file_by_id`. Can be a vector for multiple files. -#' @param progress Whether to show a progress bar of the download. Defaults to `FALSE`. +#' @param progress Whether to show a progress bar of the download. +#' If not specified, will be set to `TRUE` for a file larger than 100MB. To fix +#' a value, set `FALSE` or `TRUE`. #' #' @export get_file_by_id <- function( @@ -14,7 +16,7 @@ get_file_by_id <- function( format = c("original", "bundle"), vars = NULL, original = TRUE, - progress = FALSE, + progress = NULL, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ... @@ -46,6 +48,16 @@ get_file_by_id <- function( # ping get_file_metadata to see if file is ingested ingested <- is_ingested(fileid, server = server, key = key) + # if progress = NULL, determine progress by size + if (is.null(progress)) { + bytesize <- get_filesize(fileid, server = server, key = key) + if (bytesize > 1e8) { + progress <- TRUE + } else { + progress <- FALSE + } + } + # update archival if not specified if (isFALSE(ingested)) original <- NA diff --git a/R/utils.R b/R/utils.R index 82a9ca7..62cbedc 100644 --- a/R/utils.R +++ b/R/utils.R @@ -92,7 +92,7 @@ is_ingested <- function(x, ...) { #' @param x A numeric fileid or file-specific DOI #' @param ... Arguments passed on to `get_file` (no effect here) #' @template envvars -#' +#' @return Length-1 logical, `TRUE` if it is ingested and `FALSE` otherwise #' @examples #' \dontrun{ #' # https://demo.dataverse.org/file.xhtml?persistentId=doi:10.70122/FK2/PPIAXE @@ -132,11 +132,40 @@ is_ingested <- function( warning("More than 1 file found for `is_ingested`, search may be unreliable.") # if UNF (https://guides.dataverse.org/en/latest/developers/unf/index.html) is not null, it is ingested - (!is.null(file_info$unf[1]) && !is.na(file_info$unf[1])) + return(!is.null(file_info$unf[1]) && !is.na(file_info$unf[1])) } +#' Get File size of file +#' +#' @param x A numeric fileid or file-specific DOI +#' @template envvars +#' @return number of bytes as a numeric +#' @keywords internal +get_filesize <- function( + x, + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER")) { + + is_number <- is.numeric(x) + + if (is_number) { + x_query <- paste0("datafile_", x) + file_info <- suppressMessages(dataverse_search(id = x_query, type = "file", server = server, key = key)) + } else { + # expect doi + x_query <- paste0("\"", x, "\"") + file_info <- suppressMessages(dataverse_search(filePersistentId = x_query, type = "file", server = server, key = key)) + } + + if (length(file_info) == 0) { + stop("File information not found on Dataverse API") + } + if (nrow(file_info) > 1) + warning("More than 1 file found for `is_ingested`, search may be unreliable.") + return(file_info$size_in_bytes) +} # other functions prepend_doi <- function(dataset) { diff --git a/man/files.Rd b/man/files.Rd index 303b0e9..67e017f 100644 --- a/man/files.Rd +++ b/man/files.Rd @@ -35,7 +35,7 @@ get_file_by_id( format = c("original", "bundle"), vars = NULL, original = TRUE, - progress = FALSE, + progress = NULL, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ... @@ -102,7 +102,9 @@ nlsw88.tab, use the ingested version.)} \item{fileid}{A numeric ID internally used for \code{get_file_by_id}. Can be a vector for multiple files.} -\item{progress}{Whether to show a progress bar of the download. Defaults to \code{FALSE}.} +\item{progress}{Whether to show a progress bar of the download. +If not specified, will be set to \code{TRUE} for a file larger than 100MB. To fix +a value, set \code{FALSE} or \code{TRUE}.} \item{filedoi}{A DOI for a single file (not the entire dataset), of the form \code{"10.70122/FK2/PPIAXE/MHDB0O"} or \code{"doi:10.70122/FK2/PPIAXE/MHDB0O"}. diff --git a/man/get_filesize.Rd b/man/get_filesize.Rd new file mode 100644 index 0000000..76b3c1e --- /dev/null +++ b/man/get_filesize.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{get_filesize} +\alias{get_filesize} +\title{Get File size of file} +\usage{ +get_filesize( + x, + key = Sys.getenv("DATAVERSE_KEY"), + server = Sys.getenv("DATAVERSE_SERVER") +) +} +\arguments{ +\item{x}{A numeric fileid or file-specific DOI} + +\item{key}{A character string specifying a Dataverse server API key. If one +is not specified, functions calling authenticated API endpoints will fail. +Keys can be specified atomically or globally using +\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.} + +\item{server}{A character string specifying a Dataverse server. +Multiple Dataverse installations exist, with \code{"dataverse.harvard.edu"} being the +most major. The server can be defined each time within a function, or it can +be set as a default via an environment variable. To set a default, run +\code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")} +or add \verb{DATAVERSE_SERVER = "dataverse.harvard.edu} in one's \code{.Renviron} +file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} +} +\description{ +Get File size of file +} +\keyword{internal} From 4dba7067d51fc79fe544b38f0327f5a935d68451 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 12 Jan 2022 22:00:23 -0800 Subject: [PATCH 21/27] Add NEWS --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 97aa2e8..b1bd00b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,7 @@ * Add progress bar for all downloads (#108) * Minor documentation improvements (#64, #107) +* Faster method for detecting ingest (#113) and robustness to ingested files without a metadata file due to errors (#80) * No longer relies on `foreign` (#34) # CHANGES in dataverse 0.3.9 From 46426c4164bb1bc4fad534be7c88ca7db9381a39 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 12 Jan 2022 22:00:32 -0800 Subject: [PATCH 22/27] Update roxygen --- man/get_filesize.Rd | 3 +++ man/is_ingested.Rd | 3 +++ 2 files changed, 6 insertions(+) diff --git a/man/get_filesize.Rd b/man/get_filesize.Rd index 76b3c1e..0cd370e 100644 --- a/man/get_filesize.Rd +++ b/man/get_filesize.Rd @@ -26,6 +26,9 @@ be set as a default via an environment variable. To set a default, run or add \verb{DATAVERSE_SERVER = "dataverse.harvard.edu} in one's \code{.Renviron} file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.} } +\value{ +number of bytes as a numeric +} \description{ Get File size of file } diff --git a/man/is_ingested.Rd b/man/is_ingested.Rd index 7aff42b..dedef22 100644 --- a/man/is_ingested.Rd +++ b/man/is_ingested.Rd @@ -29,6 +29,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu \item{...}{Arguments passed on to \code{get_file} (no effect here)} } +\value{ +Length-1 logical, \code{TRUE} if it is ingested and \code{FALSE} otherwise +} \description{ Identify if file is an ingested file } From 4d122a9ac6fc479ebbb74d460c88fd43b8a1d091 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 12 Jan 2022 22:06:23 -0800 Subject: [PATCH 23/27] Protect against null bytesize --- R/get_file_by_id.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R index 4bf7d14..1c5e9e3 100644 --- a/R/get_file_by_id.R +++ b/R/get_file_by_id.R @@ -51,7 +51,7 @@ get_file_by_id <- function( # if progress = NULL, determine progress by size if (is.null(progress)) { bytesize <- get_filesize(fileid, server = server, key = key) - if (bytesize > 1e8) { + if (isTRUE(bytesize > 1e8)) { progress <- TRUE } else { progress <- FALSE From 138363a94a1f21ec5a44bdace2fcf6ee1f3a4355 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 12 Jan 2022 22:25:58 -0800 Subject: [PATCH 24/27] Properly comment example --- R/get_dataframe.R | 4 ++-- man/get_dataframe.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/get_dataframe.R b/R/get_dataframe.R index 955c265..6ab87bb 100644 --- a/R/get_dataframe.R +++ b/R/get_dataframe.R @@ -91,8 +91,8 @@ #' writeBin(as_binary, path(temp, "county.RData")) #' load(path(temp, "county.RData")) #' -#' If you are certain each RData contains only one object, one could define a custom -#' custom function used in https://stackoverflow.com/a/34926943 +#' # If you are certain each RData contains only one object, one could define a +#' # custom function used in https://stackoverflow.com/a/34926943 #' load_object <- function(file) { #' tmp <- new.env() #' load(file = file, envir = tmp) diff --git a/man/get_dataframe.Rd b/man/get_dataframe.Rd index b9ce9ff..25c50d7 100644 --- a/man/get_dataframe.Rd +++ b/man/get_dataframe.Rd @@ -147,8 +147,8 @@ temp <- tempdir() writeBin(as_binary, path(temp, "county.RData")) load(path(temp, "county.RData")) -If you are certain each RData contains only one object, one could define a custom -custom function used in https://stackoverflow.com/a/34926943 +# If you are certain each RData contains only one object, one could define a +# custom function used in https://stackoverflow.com/a/34926943 load_object <- function(file) { tmp <- new.env() load(file = file, envir = tmp) From 2023edcd1019a1fc382a7c17507311a9bf92048f Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 12 Jan 2022 22:52:36 -0800 Subject: [PATCH 25/27] No linebreak --- vignettes/C-download.Rmd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vignettes/C-download.Rmd b/vignettes/C-download.Rmd index b97528d..758fd94 100644 --- a/vignettes/C-download.Rmd +++ b/vignettes/C-download.Rmd @@ -92,8 +92,7 @@ head(energy) ## Retrieving Custom Data Fromats (RDS, Stata, SPSS) -If a file is displayed on dataverse as a `.tab` file like the survey data by [Alvarez et al. (2013)](https://doi.org/10.7910/DVN/ARKOTI/A8YRMP), it is likely that Dataverse [inge -sted](https://guides.dataverse.org/en/latest/user/tabulardataingest/index.html) the file to a plain-text, tab-delimited format. +If a file is displayed on dataverse as a `.tab` file like the survey data by [Alvarez et al. (2013)](https://doi.org/10.7910/DVN/ARKOTI/A8YRMP), it is likely that Dataverse [ingested](https://guides.dataverse.org/en/latest/user/tabulardataingest/index.html) the file to a plain-text, tab-delimited format. ```{r, message=FALSE} argentina_tab <- get_dataframe_by_name( From a488d3c187739ef5bae2778f587d33550144303d Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Wed, 12 Jan 2022 23:00:00 -0800 Subject: [PATCH 26/27] CRAN comments for 0.3.10 --- cran-comments.md | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/cran-comments.md b/cran-comments.md index 85d6d5e..5441abf 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,26 +1,19 @@ Description ----------------------------------------------- -The `dataverse` package was archived on 2021-04-21 after we could not attend -to a failing test in time. This is a submission to effectively revive the package. +This version updates an expired API token for testing purposes, which started +causing errors and prompted a email from the CRAN Team on 2021-12-31. -Shiro Kuriwaki will be maintainer for this new submission (for 0.3.9 and going -forward), as the automatic `NOTE` will flag. We communicated this to the CRAN -team on 2021-07-18. The previous maintainer, Will Beasley, will continue to -contribute to the package as author. -The nature of the test failure was that the APIs were relying on a nightly CRAN -build. After a few unreliable connections after many successful tests, some of the -servers eventually returned an error. After Brian Ripley's instructions, we -decided we shouldn't rely on nightly CRAN builds for testing the package's -interactions with servers, even if the servers are reachable. All those tests -are now skipped on CRAN (with `testthat::skip_on_cran()`) and instead will run on -nightly GitHub Actions builds. +Because daily tests for the full download is done separately on Github Actions, +this test is not relevant as much for CRAN, so we updated the token and +also skipped the check if on CRAN tests. We also closed 8 issues and feature +extensions recorded on Github. Thank you for taking the time to review the submission. -- Shiro Kuriwaki +Shiro Kuriwaki Test environments @@ -40,6 +33,5 @@ Test environments R CMD check results ----------------------------------------------- -* One NOTE for a new maintainer. * No ERRORs, WARNINGs on any builds. * In the past, there has been a note about possibly misspelled words "APIs" and "Dataverse". If they appear on your machines, both spellings are intentional. From f696f744fb8ee2e41bada189e00dbe2546b8b2c2 Mon Sep 17 00:00:00 2001 From: Shiro Kuriwaki Date: Thu, 13 Jan 2022 07:39:42 -0800 Subject: [PATCH 27/27] v2 --- .github/workflows/R-CMD-check-daily.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check-daily.yaml b/.github/workflows/R-CMD-check-daily.yaml index a2c7453..22b7bbe 100644 --- a/.github/workflows/R-CMD-check-daily.yaml +++ b/.github/workflows/R-CMD-check-daily.yaml @@ -2,7 +2,7 @@ # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions on: schedule: - - cron: "20 3 * * *" # Run every morning at 3:20am UTC + - cron: "20 3 * * *" # Run every morning at 3:20am UTC # - cron: "7 1 * * *" # Run every morning at 1:07am UTC (~8pm central) name: R-CMD-check-daily @@ -14,7 +14,7 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v2 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 - uses: r-lib/actions/setup-pandoc@master - name: Query dependencies