From c7e5c063496c7fbe59357391959e1ae3d817bc59 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 28 Jul 2021 21:19:54 -0400
Subject: [PATCH 01/27] Note warning

---
 README.Rmd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.Rmd b/README.Rmd
index 24a1f13..611aa90 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -65,9 +65,9 @@ Because [there are many Dataverse installations](https://dataverse.org/), all fu
 Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
 ```
 
-3. Hard-code a default server in your own environment.  Direct your `.Renviron` file directly or open it by `usethis::edit_r_environ()`. Then enter `DATAVERSE_SERVER = "dataverse.harvard.edu"`.
+3. Hard-code a default server in your own environment.  Direct your `.Renviron` file directly or open it by `usethis::edit_r_environ()`. Then enter `DATAVERSE_SERVER = "dataverse.harvard.edu"`. Note that doing this may make your scripts not replicable to other people who do not have access to the environment.
 
-In all cases, values should be the Dataverse server, without the "https" prefix or the "/api" URL path, etc. 
+In all cases, values should be the Dataverse server, without the "https" prefix or the "/api" URL path, etc.  
 
 ### Data Download
 

From 01f5f02fc9b0a792c2e3daefe27eaca9e0372629 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 28 Jul 2021 21:20:13 -0400
Subject: [PATCH 02/27] Descriptive

---
 README.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.Rmd b/README.Rmd
index 611aa90..5423162 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -40,7 +40,7 @@ remotes::install_github("iqss/dataverse-client-r")
 library("dataverse")
 ```
 
-#### Keys
+#### API Access Keys
 
 Many features of the Dataverse API are public and require no authentication. This means in many cases you can search for and retrieve data without a Dataverse account or API key -- you wil not need to worry about this. 
 

From b80e5b74794eaf54e0b5bdc0c96d3befa7b594e3 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Sat, 1 Jan 2022 22:21:29 +0900
Subject: [PATCH 03/27] Sentences

---
 README.Rmd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.Rmd b/README.Rmd
index 71c7db4..23fa2c5 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -65,9 +65,9 @@ Because [there are many Dataverse installations](https://dataverse.org/), all fu
 Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
 ```
 
-3. Hard-code a default server in your own environment.  Direct your `.Renviron` file directly or open it by `usethis::edit_r_environ()`. Then enter `DATAVERSE_SERVER = "dataverse.harvard.edu"`. Note that doing this may make your scripts not replicable to other people who do not have access to the environment.
+3. Hard-code a default server in your own environment.  Direct your `.Renviron` file directly or open it by `usethis::edit_r_environ()`. Then enter `DATAVERSE_SERVER = "dataverse.harvard.edu"`. However, doing this may make your scripts not replicable to other people who do not have access to the environment.
 
-In all cases, values should be the Dataverse server, without the "https" prefix or the "/api" URL path, etc.  
+In all cases, values should be the Dataverse server, without the "https" prefix or the "/api" URL path.
 
 ### Data Download
 

From 7a05564b54ddb1bd35fb8d768976a95643d2c5ac Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 5 Jan 2022 22:18:53 -0800
Subject: [PATCH 04/27] Prevent WIP R files getting built

---
 .Rbuildignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.Rbuildignore b/.Rbuildignore
index 17082bc..a3b9f6e 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -23,3 +23,4 @@ man-roxygen/*
 ^CRAN-RELEASE$
 ^\.github$
 rhub-checks
+/*.R

From 316d67733d1329fb3323a8002b2294e4f9832585 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Thu, 6 Jan 2022 15:29:38 -0800
Subject: [PATCH 05/27] Use different name for output vs.  function

---
 R/get_file_by_id.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R
index ba48827..081c848 100644
--- a/R/get_file_by_id.R
+++ b/R/get_file_by_id.R
@@ -44,10 +44,10 @@ get_file_by_id <- function(
     }
 
     # ping get_file_metadata to see if file is ingested
-    is_ingested <- is_ingested(fileid, server = server)
+    ingested <- is_ingested(fileid, server = server)
 
     # update archival if not specified
-    if (isFALSE(is_ingested))
+    if (isFALSE(ingested))
       original <- NA
 
     # create query -----
@@ -58,11 +58,11 @@ get_file_by_id <- function(
     # format only matters in ingested datasets,
     # For non-ingested files (rds/docx), we need to NOT specify a format
     # also for bundle, only change url
-    if (is_ingested & format != "bundle")
+    if (ingested & format != "bundle")
       query$format <- match.arg(format)
 
     # if the original is not desired, we need to NOT specify a format
-    if (is_ingested & (isFALSE(original) || is.na(original) || is.null(original)))
+    if (ingested & (isFALSE(original) || is.na(original) || is.null(original)))
       query$format <- NULL
 
     # part of URL depending on DOI, bundle, or file

From 2b5d9769560816a2ebdeb452f192d9b80e121d01 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Thu, 6 Jan 2022 17:37:56 -0800
Subject: [PATCH 06/27] syntax - no commas

---
 tests/testthat/tests-get_dataframe-dataframe-basketball.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/testthat/tests-get_dataframe-dataframe-basketball.R b/tests/testthat/tests-get_dataframe-dataframe-basketball.R
index aa5dcd0..52e6c86 100644
--- a/tests/testthat/tests-get_dataframe-dataframe-basketball.R
+++ b/tests/testthat/tests-get_dataframe-dataframe-basketball.R
@@ -24,7 +24,7 @@ test_that("roster-by-doi", {
 
   actual <-
     get_dataframe_by_doi(
-      filedoi  = expected_ds$roster$dataFile$persistentId, # A value like "doi:10.70122/FK2/HXJVJU/SA3Z2V",
+      filedoi  = expected_ds$roster$dataFile$persistentId # A value like "doi:10.70122/FK2/HXJVJU/SA3Z2V",
     )
 
   expect_equal(actual, expected_file)
@@ -38,7 +38,7 @@ test_that("roster-by-id", {
 
   actual <-
     get_dataframe_by_id(
-      fileid   = expected_ds$roster$dataFile$id, # A value like 1734005
+      fileid   = expected_ds$roster$dataFile$id # A value like 1734005
     )
 
   expect_equal(actual, expected_file)

From f13246a362121d80062c1e74b7817730db06dac4 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Thu, 6 Jan 2022 17:39:00 -0800
Subject: [PATCH 07/27] Attempt at fixing #80 minimally.

Will cause some unexpected reading issues in testthat/tests-get_dataframe-dataframe-basketball.R
---
 R/get_dataframe.R  |  4 ----
 R/get_file_by_id.R | 15 ++++++++-------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/R/get_dataframe.R b/R/get_dataframe.R
index 0316d21..c3576d7 100644
--- a/R/get_dataframe.R
+++ b/R/get_dataframe.R
@@ -132,10 +132,6 @@ get_dataframe_by_id <- function(
   # if not ingested, then whether to take the original is not relevant.
   ingested <- is_ingested(fileid, ...)
 
-  if (isFALSE(ingested)) {
-    original <- NA
-  }
-
   if (is.null(.f) & isTRUE(ingested) & isFALSE(original)) {
     message("Downloading ingested version of data with readr::read_tsv. To download the original version and remove this message, set original = TRUE.\n")
     .f <- readr::read_tsv
diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R
index 081c848..be87ebd 100644
--- a/R/get_file_by_id.R
+++ b/R/get_file_by_id.R
@@ -46,10 +46,6 @@ get_file_by_id <- function(
     # ping get_file_metadata to see if file is ingested
     ingested <- is_ingested(fileid, server = server)
 
-    # update archival if not specified
-    if (isFALSE(ingested))
-      original <- NA
-
     # create query -----
     query <- list()
     if (!is.null(vars))
@@ -57,14 +53,19 @@ get_file_by_id <- function(
 
     # format only matters in ingested datasets,
     # For non-ingested files (rds/docx), we need to NOT specify a format
-    # also for bundle, only change url
-    if (ingested & format != "bundle")
-      query$format <- match.arg(format)
 
     # if the original is not desired, we need to NOT specify a format
     if (ingested & (isFALSE(original) || is.na(original) || is.null(original)))
       query$format <- NULL
 
+    # also for bundle, only change url
+    if (ingested & format != "bundle")
+      query$format <- match.arg(format)
+
+    # for when ingest fails (https://github.com/IQSS/dataverse-client-r/issues/113)
+    if (isTRUE(original) & format == "original")
+      query$format <- format
+
     # part of URL depending on DOI, bundle, or file
     if (use_persistent_id) {
       u_part <- "access/datafile/:persistentId?persistentId="

From 118d327a1b0f96beb348fd227515b4b697352af8 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Thu, 6 Jan 2022 17:42:17 -0800
Subject: [PATCH 08/27] Proper regex

---
 .Rbuildignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index a3b9f6e..ebaa95f 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -23,4 +23,4 @@ man-roxygen/*
 ^CRAN-RELEASE$
 ^\.github$
 rhub-checks
-/*.R
+/.*\.R$

From 83d1b6cf0d0c6b73e7d7b560103ac6296740d48c Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Fri, 7 Jan 2022 10:26:43 -0800
Subject: [PATCH 09/27] Tweak placement of code chunks

---
 vignettes/C-download.Rmd | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/vignettes/C-download.Rmd b/vignettes/C-download.Rmd
index c8a26e0..637be8a 100644
--- a/vignettes/C-download.Rmd
+++ b/vignettes/C-download.Rmd
@@ -19,6 +19,13 @@ This vignette shows how to download data from Dataverse using the dataverse pack
 
 The Dataverse entry for this study is persistently retrievable by a "Digital Object Identifier (DOI)": https://doi.org/10.7910/DVN/ARKOTI and the citation on the Dataverse Page includes a "[Universal Numeric Fingerprint (UNF)](https://guides.dataverse.org/en/latest/developers/unf/index.html)": `UNF:6:+itU9hcUJ8I9E0Kqv8HWHg==`, which provides a versioned, multi-file hash for the entire study, which contains 32 files.
 
+The following examples will draw from the Harvard Dataverse, so it is convenient to set this as a default environment variable.
+
+```{r}
+Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
+```
+
+
 
 ## Retrieving Metadata
 
@@ -27,7 +34,6 @@ We will download these files and examine them directly in R using the **datavers
 ```{r}
 library("dataverse")
 library("tibble") # to see dataframes in tidyverse-form
-Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
 ```
 
 The `get_dataset()` function lists all of the files in the dataset along with a considerable amount of metadata about each. (Recall that in Dataverse, `dataset` is a collection of files, not a single file.)  We can see a quick glance at these files using:
@@ -78,7 +84,8 @@ head(energy)
 ## Retrieving Custom Data Fromats (RDS, Stata, SPSS)
 
 
-If a file is displayed on dataverse as a `.tab` file like the survey data by [Alvarez et al. (2013)](https://doi.org/10.7910/DVN/ARKOTI/A8YRMP), it is likely that Dataverse [ingested](https://guides.dataverse.org/en/latest/user/tabulardataingest/index.html) the file to a plain-text, tab-delimited format.
+If a file is displayed on dataverse as a `.tab` file like the survey data by [Alvarez et al. (2013)](https://doi.org/10.7910/DVN/ARKOTI/A8YRMP), it is likely that Dataverse [inge
+sted](https://guides.dataverse.org/en/latest/user/tabulardataingest/index.html) the file to a plain-text, tab-delimited format.
 
 ```{r,  message=FALSE}
 argentina_tab <- get_dataframe_by_name(

From fea854ea3d89a59a89aae9b6df308c5335bcd9b6 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Fri, 7 Jan 2022 10:32:52 -0800
Subject: [PATCH 10/27] Correct statement about current default

---
 R/get_dataframe.R    | 5 +++--
 man/get_dataframe.Rd | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/R/get_dataframe.R b/R/get_dataframe.R
index c3576d7..10961dc 100644
--- a/R/get_dataframe.R
+++ b/R/get_dataframe.R
@@ -19,8 +19,9 @@
 #'  file, then `.f` should be `readRDS` or `readr::read_rds`. It can be a custom
 #'  function defined by the user. See examples for details.
 #'
-#' @param original A logical, defaulting to `TRUE`. Whether to read the ingested,
-#' archival version of the datafile if one exists. The archival versions are tab-delimited
+#' @param original A logical, whether to read the ingested,
+#' archival version of the datafile if one exists. If `TRUE`, users should supply
+#' a function to use to read in the original. The archival versions are tab-delimited
 #' `.tab` files so if `original = FALSE`, `.f` is set to `readr::read_tsv`.
 #'
 #' @inheritDotParams get_file
diff --git a/man/get_dataframe.Rd b/man/get_dataframe.Rd
index cbcd3c5..b9ce9ff 100644
--- a/man/get_dataframe.Rd
+++ b/man/get_dataframe.Rd
@@ -31,8 +31,9 @@ must choose the appropriate function: for example if the target is a .rds
 file, then \code{.f} should be \code{readRDS} or \code{readr::read_rds}. It can be a custom
 function defined by the user. See examples for details.}
 
-\item{original}{A logical, defaulting to \code{TRUE}. Whether to read the ingested,
-archival version of the datafile if one exists. The archival versions are tab-delimited
+\item{original}{A logical, whether to read the ingested,
+archival version of the datafile if one exists. If \code{TRUE}, users should supply
+a function to use to read in the original. The archival versions are tab-delimited
 \code{.tab} files so if \code{original = FALSE}, \code{.f} is set to \code{readr::read_tsv}.}
 
 \item{...}{

From 863dddfce9a5ddc2caa30781e4cf6296e1545ee0 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Fri, 7 Jan 2022 10:33:23 -0800
Subject: [PATCH 11/27] Previous version somehow ignoring functions

---
 .Rbuildignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index ebaa95f..273e558 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -23,4 +23,4 @@ man-roxygen/*
 ^CRAN-RELEASE$
 ^\.github$
 rhub-checks
-/.*\.R$
+/Untitled.+\.R$

From 98fd69dd7910387ff7de3898fb3a38be2e697ffe Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Fri, 7 Jan 2022 23:38:11 -0800
Subject: [PATCH 12/27] Revert f13246a362121d80062c1e74b7817730db06dac4

---
 R/get_dataframe.R  |  5 +++++
 R/get_file_by_id.R | 11 ++++++++---
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/R/get_dataframe.R b/R/get_dataframe.R
index 10961dc..955c265 100644
--- a/R/get_dataframe.R
+++ b/R/get_dataframe.R
@@ -133,6 +133,11 @@ get_dataframe_by_id <- function(
   # if not ingested, then whether to take the original is not relevant.
   ingested <- is_ingested(fileid, ...)
 
+
+  if (isFALSE(ingested)) {
+    original <- NA
+  }
+
   if (is.null(.f) & isTRUE(ingested) & isFALSE(original)) {
     message("Downloading ingested version of data with readr::read_tsv. To download the original version and remove this message, set original = TRUE.\n")
     .f <- readr::read_tsv
diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R
index be87ebd..36dfdc6 100644
--- a/R/get_file_by_id.R
+++ b/R/get_file_by_id.R
@@ -46,6 +46,11 @@ get_file_by_id <- function(
     # ping get_file_metadata to see if file is ingested
     ingested <- is_ingested(fileid, server = server)
 
+    # update archival if not specified
+    if (isFALSE(ingested))
+      original <- NA
+
+
     # create query -----
     query <- list()
     if (!is.null(vars))
@@ -62,9 +67,9 @@ get_file_by_id <- function(
     if (ingested & format != "bundle")
       query$format <- match.arg(format)
 
-    # for when ingest fails (https://github.com/IQSS/dataverse-client-r/issues/113)
-    if (isTRUE(original) & format == "original")
-      query$format <- format
+    # # for when ingest fails (https://github.com/IQSS/dataverse-client-r/issues/113)
+    # if (isTRUE(original) & format == "original")
+    #   query$format <- format
 
     # part of URL depending on DOI, bundle, or file
     if (use_persistent_id) {

From 5ffbc110f12c77f5b7aff04f3aaaca5761e27643 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Fri, 7 Jan 2022 23:43:13 -0800
Subject: [PATCH 13/27] First attempt for #113 (though see
 https://github.com/IQSS/dataverse-client-r/issues/113#issuecomment-1007523529),
 retry for #80

---
 R/utils.R | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index cb0bdec..b412d97 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -106,13 +106,13 @@ is_ingested <-
     server  = Sys.getenv("DATAVERSE_SERVER"),
     ...
   ) {
-    ping_metadata <- tryCatch(
-      {
-        get_file_metadata(fileid, key = key, server = server)
-      },
-      error = function(e) e
-    )
-    !inherits(ping_metadata, "error") # if error, not ingested
+    file_info <- suppressMessages(dataverse_search(fileid = fileid, server = server, key = key, ...))
+
+    if (nrow(file_info) > 1)
+      warning("More than 1 file found for `is_ingested`, search may be unreliable.")
+
+    # if UNF (https://guides.dataverse.org/en/latest/developers/unf/index.html) is not null, it is ingested
+    (!is.null(file_info$unf[1]) & !is.na(file_info$unf[1]))
 }
 
 

From e32d354a1302bc659100bb12a228b04e7f000481 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 12 Jan 2022 20:29:40 -0800
Subject: [PATCH 14/27] Implement suggestion to prepend with quotes or
 datafile_.
 https://github.com/IQSS/dataverse-client-r/issues/113#issuecomment-1011208445

---
 R/utils.R          | 246 ++++++++++++++++++++++++---------------------
 man/is_ingested.Rd |  14 ++-
 2 files changed, 141 insertions(+), 119 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index b412d97..33e4ad8 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -1,181 +1,199 @@
 # dataverse_id method
 dataverse_id <- function(x, ...) {
-    UseMethod('dataverse_id', x)
+  UseMethod('dataverse_id', x)
 }
 dataverse_id.default <- function(x, ...) {
-    x
+  x
 }
 dataverse_id.character <- function(x, ...) {
-    get_dataverse(x, ..., check = FALSE)$id
+  get_dataverse(x, ..., check = FALSE)$id
 }
 dataverse_id.dataverse <- function(x, ...) {
-    x$id
+  x$id
 }
 
 # dataset_id method
 dataset_id <- function(x, ...) {
-    UseMethod('dataset_id', x)
+  UseMethod('dataset_id', x)
 }
 dataset_id.default <- function(x, ...) {
-    x
+  x
 }
 dataset_id.character <- function(x, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) {
-    x <- prepend_doi(x)
-    u <- paste0(api_url(server), "datasets/:persistentId?persistentId=", x)
-    r <- tryCatch(httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...),
-                  error = function(e) {
-                    stop("Could not retrieve Dataset ID from persistent identifier!")
-                  })
-    jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"))[["data"]][["id"]]
+  x <- prepend_doi(x)
+  u <- paste0(api_url(server), "datasets/:persistentId?persistentId=", x)
+  r <- tryCatch(httr::GET(u, httr::add_headers("X-Dataverse-key" = key), ...),
+                error = function(e) {
+                  stop("Could not retrieve Dataset ID from persistent identifier!")
+                })
+  jsonlite::fromJSON(httr::content(r, as = "text", encoding = "UTF-8"))[["data"]][["id"]]
 }
 dataset_id.dataverse_dataset <- function(x, ...) {
-    x$id
+  x$id
 }
 
 # get fileid from a dataset DOI or dataset ID
 get_fileid <- function(x, ...) {
-    UseMethod('get_fileid', x)
+  UseMethod('get_fileid', x)
 }
 
 get_fileid.numeric <- function(x, file, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) {
-    files <- dataset_files(x, key = key, server = server, ...)
-    ids <- unlist(lapply(files, function(x) x[["datafile"]][["id"]]))
-    if (is.numeric(file)) {
-        w <- which(ids %in% file)
-        if (!length(w)) {
-            stop("File not found")
-        }
-        id <- ids[w]
-    } else {
-        ns <- unlist(lapply(files, `[[`, "label"))
-        w <- which(ns %in% file)
-        if (!length(w)) {
-            stop("File not found")
-        }
-        id <- ids[w]
+  files <- dataset_files(x, key = key, server = server, ...)
+  ids <- unlist(lapply(files, function(x) x[["datafile"]][["id"]]))
+  if (is.numeric(file)) {
+    w <- which(ids %in% file)
+    if (!length(w)) {
+      stop("File not found")
     }
-    id
+    id <- ids[w]
+  } else {
+    ns <- unlist(lapply(files, `[[`, "label"))
+    w <- which(ns %in% file)
+    if (!length(w)) {
+      stop("File not found")
+    }
+    id <- ids[w]
+  }
+  id
 }
 
 get_fileid.character <- function(x, file, key = Sys.getenv("DATAVERSE_KEY"), server = Sys.getenv("DATAVERSE_SERVER"), ...) {
-    files <- dataset_files(prepend_doi(x), key = key, server = server, ...)
-    ids <- unlist(lapply(files, function(x) x[["dataFile"]][["id"]]))
-    if (is.numeric(file)) {
-        w <- which(ids %in% file)
-        if (!length(w)) {
-            stop("File not found")
-        }
-        id <- ids[w]
-    } else {
-        ns <- unlist(lapply(files, `[[`, "label"))
-        w <- which(ns %in% file)
-        if (!length(w)) {
-            stop("File not found")
-        }
-        id <- ids[w]
+  files <- dataset_files(prepend_doi(x), key = key, server = server, ...)
+  ids <- unlist(lapply(files, function(x) x[["dataFile"]][["id"]]))
+  if (is.numeric(file)) {
+    w <- which(ids %in% file)
+    if (!length(w)) {
+      stop("File not found")
+    }
+    id <- ids[w]
+  } else {
+    ns <- unlist(lapply(files, `[[`, "label"))
+    w <- which(ns %in% file)
+    if (!length(w)) {
+      stop("File not found")
     }
-    id
+    id <- ids[w]
+  }
+  id
 }
 
 get_fileid.dataverse_file <- function(x, ...) {
-    x[["dataFile"]][["id"]]
+  x[["dataFile"]][["id"]]
 }
 
 
+# Ingested
+is_ingested <- function(x, ...) {
+  UseMethod('is_ingested', x)
+}
+
 #' Identify if file is an ingested file
 #'
-#' @param fileid A numeric fileid or file-specific DOI
+#' @param x A numeric fileid or file-specific DOI
 #' @param ... Arguments passed on to `get_file` (no effect here)
 #' @template envvars
 #'
 #' @examples
 #' \dontrun{
-#' # https://demo.dataverse.org/file.xhtml?persistentId=doi:10.70122/FK2/X5MUPQ/T0KKUZ
+#' # https://demo.dataverse.org/file.xhtml?persistentId=doi:10.70122/FK2/PPIAXE
 #' # nlsw88.tab
-#' is_ingested(fileid = "doi:10.70122/FK2/X5MUPQ/T0KKUZ",
+#' is_ingested(x = "doi:10.70122/FK2/PPIAXE/MHDB0O",
+#'             server = "demo.dataverse.org")
+#' is_ingested(x = 1734017,
 #'             server = "demo.dataverse.org")
 #'
 #' # nlsw88_rds-export.rds
-#' is_ingested(fileid = "doi:10.70122/FK2/PPIAXE/SUCFNI",
+#' is_ingested(x = "doi:10.70122/FK2/PPIAXE/SUCFNI",
+#'             server = "demo.dataverse.org")
+#' is_ingested(x = 1734016,
 #'             server = "demo.dataverse.org")
 #'}
-is_ingested <-
-  function(
-    fileid,
-    key     = Sys.getenv("DATAVERSE_KEY"),
-    server  = Sys.getenv("DATAVERSE_SERVER"),
-    ...
-  ) {
-    file_info <- suppressMessages(dataverse_search(fileid = fileid, server = server, key = key, ...))
+is_ingested <- function(
+  x,
+  key = Sys.getenv("DATAVERSE_KEY"),
+  server  = Sys.getenv("DATAVERSE_SERVER"),
+  ...) {
+
+  is_number <- is.numeric(x)
 
-    if (nrow(file_info) > 1)
-      warning("More than 1 file found for `is_ingested`, search may be unreliable.")
+  if (is_number) {
+    x_query <- paste0("datafile_", x)
+    file_info <- suppressMessages(dataverse_search(id = x_query, type = "file", server = server, key = key, ...))
+  } else {
+    # expect doi
+    x_query <- paste0("\"", x, "\"")
+    file_info <- suppressMessages(dataverse_search(filePersistentId = x_query, type = "file", server = server, key = key, ...))
+  }
 
-    # if UNF (https://guides.dataverse.org/en/latest/developers/unf/index.html) is not null, it is ingested
-    (!is.null(file_info$unf[1]) & !is.na(file_info$unf[1]))
+  if (nrow(file_info) > 1)
+    warning("More than 1 file found for `is_ingested`, search may be unreliable.")
+
+  # if UNF (https://guides.dataverse.org/en/latest/developers/unf/index.html) is not null, it is ingested
+  (!is.null(file_info$unf[1]) && !is.na(file_info$unf[1]))
 }
 
 
 
+
 # other functions
 prepend_doi <- function(dataset) {
-    if (grepl("^hdl", dataset)) {
-        dataset <- dataset
-    } else if (grepl("^doi:", dataset)) {
-        dataset <- dataset
-    } else if (grepl("^DOI:", dataset)) {
-        dataset <- paste0("doi:", strsplit(dataset, "DOI:", fixed = TRUE)[[1]][2])
-    } else if (!grepl("^doi:", dataset)) {
-        if (grepl("dx\\.doi\\.org", dataset) | grepl("^http", dataset)) {
-            dataset <- httr::parse_url(dataset)$path
-        }
-        dataset <- paste0("doi:", dataset)
-    } else {
-        dataset <- dataset
+  if (grepl("^hdl", dataset)) {
+    dataset <- dataset
+  } else if (grepl("^doi:", dataset)) {
+    dataset <- dataset
+  } else if (grepl("^DOI:", dataset)) {
+    dataset <- paste0("doi:", strsplit(dataset, "DOI:", fixed = TRUE)[[1]][2])
+  } else if (!grepl("^doi:", dataset)) {
+    if (grepl("dx\\.doi\\.org", dataset) | grepl("^http", dataset)) {
+      dataset <- httr::parse_url(dataset)$path
     }
-    dataset
+    dataset <- paste0("doi:", dataset)
+  } else {
+    dataset <- dataset
+  }
+  dataset
 }
 
 api_url <- function(server = Sys.getenv("DATAVERSE_SERVER"), prefix = "api/") {
-    if (is.null(server) || server == "") {
-        stop("'server' is missing with no default set in DATAVERSE_SERVER environment variable.")
-    }
-    server_parsed <- httr::parse_url(server)
-    if (is.null(server_parsed[["hostname"]]) || server_parsed[["hostname"]] == "") {
-        server_parsed[["hostname"]] <- server
-    }
-    if (is.null(server_parsed[["port"]]) || server_parsed[["port"]] == "") {
-        domain <- server_parsed[["hostname"]]
-    } else {
-        domain <- paste0(server_parsed[["hostname"]], ":", server_parsed[["port"]])
-    }
-    return(paste0("https://", domain, "/", prefix))
+  if (is.null(server) || server == "") {
+    stop("'server' is missing with no default set in DATAVERSE_SERVER environment variable.")
+  }
+  server_parsed <- httr::parse_url(server)
+  if (is.null(server_parsed[["hostname"]]) || server_parsed[["hostname"]] == "") {
+    server_parsed[["hostname"]] <- server
+  }
+  if (is.null(server_parsed[["port"]]) || server_parsed[["port"]] == "") {
+    domain <- server_parsed[["hostname"]]
+  } else {
+    domain <- paste0(server_parsed[["hostname"]], ":", server_parsed[["port"]])
+  }
+  return(paste0("https://", domain, "/", prefix))
 }
 
 # parse dataset response into list/dataframe
 parse_dataset <- function(out) {
-    out <- jsonlite::fromJSON(out)$data
-    if ("latestVersion" %in% names(out)) {
-        class(out$latestVersion) <- "dataverse_dataset_version"
-    }
-    if ("metadataBlocks" %in% names(out) && "citation" %in% out$metadata) {
-        class(out$metadata$citation) <- "dataverse_dataset_citation"
-    }
-    # cleanup response
-    file_df <- try(out$files$dataFile, silent = TRUE)
-    if (inherits(file_df, "try-error") || is.null(file_df)) {
-        file_df <- try(out$files$datafile, silent = TRUE)
-        out$files$datafile <- NULL
-    } else {
-        out$files$dataFile <- NULL
-    }
-
-    # remove duplicate column
-    if ("description" %in% colnames(file_df) & "description" %in% colnames(out$files)) {
-        out$files[["description"]] <- NULL
-    }
-
-    out$files <- cbind(out$files, file_df)
-    structure(out, class = "dataverse_dataset")
+  out <- jsonlite::fromJSON(out)$data
+  if ("latestVersion" %in% names(out)) {
+    class(out$latestVersion) <- "dataverse_dataset_version"
+  }
+  if ("metadataBlocks" %in% names(out) && "citation" %in% out$metadata) {
+    class(out$metadata$citation) <- "dataverse_dataset_citation"
+  }
+  # cleanup response
+  file_df <- try(out$files$dataFile, silent = TRUE)
+  if (inherits(file_df, "try-error") || is.null(file_df)) {
+    file_df <- try(out$files$datafile, silent = TRUE)
+    out$files$datafile <- NULL
+  } else {
+    out$files$dataFile <- NULL
+  }
+
+  # remove duplicate column
+  if ("description" %in% colnames(file_df) & "description" %in% colnames(out$files)) {
+    out$files[["description"]] <- NULL
+  }
+
+  out$files <- cbind(out$files, file_df)
+  structure(out, class = "dataverse_dataset")
 }
 
diff --git a/man/is_ingested.Rd b/man/is_ingested.Rd
index 23b5699..7aff42b 100644
--- a/man/is_ingested.Rd
+++ b/man/is_ingested.Rd
@@ -5,14 +5,14 @@
 \title{Identify if file is an ingested file}
 \usage{
 is_ingested(
-  fileid,
+  x,
   key = Sys.getenv("DATAVERSE_KEY"),
   server = Sys.getenv("DATAVERSE_SERVER"),
   ...
 )
 }
 \arguments{
-\item{fileid}{A numeric fileid or file-specific DOI}
+\item{x}{A numeric fileid or file-specific DOI}
 
 \item{key}{A character string specifying a Dataverse server API key. If one
 is not specified, functions calling authenticated API endpoints will fail.
@@ -34,13 +34,17 @@ Identify if file is an ingested file
 }
 \examples{
 \dontrun{
-# https://demo.dataverse.org/file.xhtml?persistentId=doi:10.70122/FK2/X5MUPQ/T0KKUZ
+# https://demo.dataverse.org/file.xhtml?persistentId=doi:10.70122/FK2/PPIAXE
 # nlsw88.tab
-is_ingested(fileid = "doi:10.70122/FK2/X5MUPQ/T0KKUZ",
+is_ingested(x = "doi:10.70122/FK2/PPIAXE/MHDB0O",
+            server = "demo.dataverse.org")
+is_ingested(x = 1734017,
             server = "demo.dataverse.org")
 
 # nlsw88_rds-export.rds
-is_ingested(fileid = "doi:10.70122/FK2/PPIAXE/SUCFNI",
+is_ingested(x = "doi:10.70122/FK2/PPIAXE/SUCFNI",
+            server = "demo.dataverse.org")
+is_ingested(x = 1734016,
             server = "demo.dataverse.org")
 }
 }

From 340ef5d7173465132152c714cff2845dd550baf5 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 12 Jan 2022 20:40:13 -0800
Subject: [PATCH 15/27] Revert to old version, looking at master

---
 R/get_file_by_id.R | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R
index 36dfdc6..0cb4bf9 100644
--- a/R/get_file_by_id.R
+++ b/R/get_file_by_id.R
@@ -50,26 +50,22 @@ get_file_by_id <- function(
     if (isFALSE(ingested))
       original <- NA
 
-
     # create query -----
     query <- list()
+
+    # variables
     if (!is.null(vars))
       query$vars <- paste0(vars, collapse = ",")
 
     # format only matters in ingested datasets,
-    # For non-ingested files (rds/docx), we need to NOT specify a format
-
-    # if the original is not desired, we need to NOT specify a format
-    if (ingested & (isFALSE(original) || is.na(original) || is.null(original)))
-      query$format <- NULL
-
+    # For non-ingested files (e.g. rds/docx), we need to NOT specify a format
     # also for bundle, only change url
     if (ingested & format != "bundle")
       query$format <- match.arg(format)
 
-    # # for when ingest fails (https://github.com/IQSS/dataverse-client-r/issues/113)
-    # if (isTRUE(original) & format == "original")
-    #   query$format <- format
+    # if the original is not desired, we need to NOT specify a format
+    if (ingested & (isFALSE(original) || is.na(original) || is.null(original)))
+      query$format <- NULL
 
     # part of URL depending on DOI, bundle, or file
     if (use_persistent_id) {

From 2e95ba137733d8cd451d259416217515aff075fb Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 12 Jan 2022 20:44:24 -0800
Subject: [PATCH 16/27] Put error when search does not turn up anything in #113

---
 R/utils.R                       | 3 +++
 tests/testthat/tests-get_file.R | 3 +--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index 33e4ad8..b7d0e3e 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -125,6 +125,9 @@ is_ingested <- function(
     file_info <- suppressMessages(dataverse_search(filePersistentId = x_query, type = "file", server = server, key = key, ...))
   }
 
+  if (length(file_info) == 0) {
+    stop("File information not found on Dataverse API")
+  }
   if (nrow(file_info) > 1)
     warning("More than 1 file found for `is_ingested`, search may be unreliable.")
 
diff --git a/tests/testthat/tests-get_file.R b/tests/testthat/tests-get_file.R
index 1b58695..8bad37e 100644
--- a/tests/testthat/tests-get_file.R
+++ b/tests/testthat/tests-get_file.R
@@ -52,6 +52,5 @@ test_that("download multiple files with file id - with folders", {
 test_that("More informative error message when file does not exist", {
   testthat::skip_on_cran()
   # wrong server
-  expect_error(get_file(2972336, server = "demo.dataverse.org"),
-               regexp = "API endpoint does not exist on this server")
+  expect_error(get_file(2972336, server = "demo.dataverse.org"), regexp = "API")
 })

From 608cdc6923e1ba0ea4db02752019461fd2f9be11 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 12 Jan 2022 20:57:27 -0800
Subject: [PATCH 17/27] To avoid leakage from test

---
 vignettes/C-download.Rmd | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/vignettes/C-download.Rmd b/vignettes/C-download.Rmd
index 637be8a..b97528d 100644
--- a/vignettes/C-download.Rmd
+++ b/vignettes/C-download.Rmd
@@ -19,12 +19,20 @@ This vignette shows how to download data from Dataverse using the dataverse pack
 
 The Dataverse entry for this study is persistently retrievable by a "Digital Object Identifier (DOI)": https://doi.org/10.7910/DVN/ARKOTI and the citation on the Dataverse Page includes a "[Universal Numeric Fingerprint (UNF)](https://guides.dataverse.org/en/latest/developers/unf/index.html)": `UNF:6:+itU9hcUJ8I9E0Kqv8HWHg==`, which provides a versioned, multi-file hash for the entire study, which contains 32 files.
 
-The following examples will draw from the Harvard Dataverse, so it is convenient to set this as a default environment variable.
+The following examples will draw from the Harvard Dataverse, so it is convenient to set this as a default environment variable. 
 
 ```{r}
 Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
 ```
 
+This is equivalent to setting `server = "dataverse.harvard.edu"` in every `dataverse` function each time. Note that if you set an environment variable like the above, that is necessary to make your code reproducible.
+
+For downloading a public dataset, no API Key is needed.
+
+```{r}
+Sys.setenv("DATAVERSE_KEY" = "")
+```
+
 
 
 ## Retrieving Metadata

From 3d0726e57e9c38bad064d4dda2c880a7b2bfddc8 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 12 Jan 2022 21:06:15 -0800
Subject: [PATCH 18/27] Prevents other arguments from being interpreted as
 search terms

---
 R/utils.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index b7d0e3e..82a9ca7 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -118,11 +118,11 @@ is_ingested <- function(
 
   if (is_number) {
     x_query <- paste0("datafile_", x)
-    file_info <- suppressMessages(dataverse_search(id = x_query, type = "file", server = server, key = key, ...))
+    file_info <- suppressMessages(dataverse_search(id = x_query, type = "file", server = server, key = key))
   } else {
     # expect doi
     x_query <- paste0("\"", x, "\"")
-    file_info <- suppressMessages(dataverse_search(filePersistentId = x_query, type = "file", server = server, key = key, ...))
+    file_info <- suppressMessages(dataverse_search(filePersistentId = x_query, type = "file", server = server, key = key))
   }
 
   if (length(file_info) == 0) {

From f6c9148c55af93cb498d0c91bc8898b865d979e7 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 12 Jan 2022 21:08:48 -0800
Subject: [PATCH 19/27] In case key is needed

---
 R/get_file_by_id.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R
index 0cb4bf9..dbee4b9 100644
--- a/R/get_file_by_id.R
+++ b/R/get_file_by_id.R
@@ -44,7 +44,7 @@ get_file_by_id <- function(
     }
 
     # ping get_file_metadata to see if file is ingested
-    ingested <- is_ingested(fileid, server = server)
+    ingested <- is_ingested(fileid, server = server, key = key)
 
     # update archival if not specified
     if (isFALSE(ingested))

From 5b1aa0d627964d369a97c78611cd78acd4c39e09 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 12 Jan 2022 21:17:49 -0800
Subject: [PATCH 20/27] Make progress conditional on size #108

---
 R/get_file_by_id.R  | 16 ++++++++++++++--
 R/utils.R           | 33 +++++++++++++++++++++++++++++++--
 man/files.Rd        |  6 ++++--
 man/get_filesize.Rd | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 81 insertions(+), 6 deletions(-)
 create mode 100644 man/get_filesize.Rd

diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R
index dbee4b9..4bf7d14 100644
--- a/R/get_file_by_id.R
+++ b/R/get_file_by_id.R
@@ -5,7 +5,9 @@
 #' no ingested version, is set to NA. Note in `get_dataframe_*`,
 #' `original` is set to FALSE by default. Either can be changed.
 #' @param fileid A numeric ID internally used for `get_file_by_id`. Can be a vector for multiple files.
-#' @param progress Whether to show a progress bar of the download. Defaults to `FALSE`.
+#' @param progress Whether to show a progress bar of the download.
+#'   If not specified, will be set to `TRUE` for a file larger than 100MB. To fix
+#'   a value, set `FALSE` or `TRUE`.
 #'
 #' @export
 get_file_by_id <- function(
@@ -14,7 +16,7 @@ get_file_by_id <- function(
   format          = c("original", "bundle"),
   vars            = NULL,
   original        = TRUE,
-  progress        = FALSE,
+  progress        = NULL,
   key             = Sys.getenv("DATAVERSE_KEY"),
   server          = Sys.getenv("DATAVERSE_SERVER"),
   ...
@@ -46,6 +48,16 @@ get_file_by_id <- function(
     # ping get_file_metadata to see if file is ingested
     ingested <- is_ingested(fileid, server = server, key = key)
 
+    # if progress = NULL, determine progress by size
+    if (is.null(progress)) {
+      bytesize <- get_filesize(fileid, server = server, key = key)
+      if (bytesize > 1e8) {
+        progress <- TRUE
+      } else {
+        progress <- FALSE
+      }
+    }
+
     # update archival if not specified
     if (isFALSE(ingested))
       original <- NA
diff --git a/R/utils.R b/R/utils.R
index 82a9ca7..62cbedc 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -92,7 +92,7 @@ is_ingested <- function(x, ...) {
 #' @param x A numeric fileid or file-specific DOI
 #' @param ... Arguments passed on to `get_file` (no effect here)
 #' @template envvars
-#'
+#' @return Length-1 logical, `TRUE` if it is ingested and `FALSE` otherwise
 #' @examples
 #' \dontrun{
 #' # https://demo.dataverse.org/file.xhtml?persistentId=doi:10.70122/FK2/PPIAXE
@@ -132,11 +132,40 @@ is_ingested <- function(
     warning("More than 1 file found for `is_ingested`, search may be unreliable.")
 
   # if UNF (https://guides.dataverse.org/en/latest/developers/unf/index.html) is not null, it is ingested
-  (!is.null(file_info$unf[1]) && !is.na(file_info$unf[1]))
+  return(!is.null(file_info$unf[1]) && !is.na(file_info$unf[1]))
 }
 
 
+#' Get File size of file
+#'
+#' @param x A numeric fileid or file-specific DOI
+#' @template envvars
+#' @return number of bytes as a numeric
+#' @keywords internal
+get_filesize <- function(
+  x,
+  key = Sys.getenv("DATAVERSE_KEY"),
+  server  = Sys.getenv("DATAVERSE_SERVER")) {
+
+    is_number <- is.numeric(x)
+
+    if (is_number) {
+      x_query <- paste0("datafile_", x)
+      file_info <- suppressMessages(dataverse_search(id = x_query, type = "file", server = server, key = key))
+    } else {
+      # expect doi
+      x_query <- paste0("\"", x, "\"")
+      file_info <- suppressMessages(dataverse_search(filePersistentId = x_query, type = "file", server = server, key = key))
+    }
+
+    if (length(file_info) == 0) {
+      stop("File information not found on Dataverse API")
+    }
+    if (nrow(file_info) > 1)
+      warning("More than 1 file found for `is_ingested`, search may be unreliable.")
 
+    return(file_info$size_in_bytes)
+}
 
 # other functions
 prepend_doi <- function(dataset) {
diff --git a/man/files.Rd b/man/files.Rd
index 303b0e9..67e017f 100644
--- a/man/files.Rd
+++ b/man/files.Rd
@@ -35,7 +35,7 @@ get_file_by_id(
   format = c("original", "bundle"),
   vars = NULL,
   original = TRUE,
-  progress = FALSE,
+  progress = NULL,
   key = Sys.getenv("DATAVERSE_KEY"),
   server = Sys.getenv("DATAVERSE_SERVER"),
   ...
@@ -102,7 +102,9 @@ nlsw88.tab, use the ingested version.)}
 
 \item{fileid}{A numeric ID internally used for \code{get_file_by_id}. Can be a vector for multiple files.}
 
-\item{progress}{Whether to show a progress bar of the download. Defaults to \code{FALSE}.}
+\item{progress}{Whether to show a progress bar of the download.
+If not specified, will be set to \code{TRUE} for a file larger than 100MB. To fix
+a value, set \code{FALSE} or \code{TRUE}.}
 
 \item{filedoi}{A DOI for a single file (not the entire dataset), of the form
 \code{"10.70122/FK2/PPIAXE/MHDB0O"} or \code{"doi:10.70122/FK2/PPIAXE/MHDB0O"}.
diff --git a/man/get_filesize.Rd b/man/get_filesize.Rd
new file mode 100644
index 0000000..76b3c1e
--- /dev/null
+++ b/man/get_filesize.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils.R
+\name{get_filesize}
+\alias{get_filesize}
+\title{Get File size of file}
+\usage{
+get_filesize(
+  x,
+  key = Sys.getenv("DATAVERSE_KEY"),
+  server = Sys.getenv("DATAVERSE_SERVER")
+)
+}
+\arguments{
+\item{x}{A numeric fileid or file-specific DOI}
+
+\item{key}{A character string specifying a Dataverse server API key. If one
+is not specified, functions calling authenticated API endpoints will fail.
+Keys can be specified atomically or globally using
+\code{Sys.setenv("DATAVERSE_KEY" = "examplekey")}.}
+
+\item{server}{A character string specifying a Dataverse server.
+Multiple Dataverse installations exist, with \code{"dataverse.harvard.edu"} being the
+most major. The server can be defined each time within a function, or it can
+be set as a default via an environment variable. To set a default, run
+\code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")}
+or add \verb{DATAVERSE_SERVER = "dataverse.harvard.edu} in one's \code{.Renviron}
+file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.}
+}
+\description{
+Get File size of file
+}
+\keyword{internal}

From 4dba7067d51fc79fe544b38f0327f5a935d68451 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 12 Jan 2022 22:00:23 -0800
Subject: [PATCH 21/27] Add NEWS

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index 97aa2e8..b1bd00b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,6 +4,7 @@
 
 * Add progress bar for all downloads (#108)
 * Minor documentation improvements (#64, #107)
+* Faster method for detecting ingest (#113) and robustness to ingested files without a metadata file due to errors (#80)
 * No longer relies on `foreign` (#34)
 
 # CHANGES in dataverse 0.3.9

From 46426c4164bb1bc4fad534be7c88ca7db9381a39 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 12 Jan 2022 22:00:32 -0800
Subject: [PATCH 22/27] Update roxygen

---
 man/get_filesize.Rd | 3 +++
 man/is_ingested.Rd  | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/man/get_filesize.Rd b/man/get_filesize.Rd
index 76b3c1e..0cd370e 100644
--- a/man/get_filesize.Rd
+++ b/man/get_filesize.Rd
@@ -26,6 +26,9 @@ be set as a default via an environment variable. To set a default, run
 or add \verb{DATAVERSE_SERVER = "dataverse.harvard.edu} in one's \code{.Renviron}
 file (\code{usethis::edit_r_environ()}), with the appropriate domain as its value.}
 }
+\value{
+number of bytes as a numeric
+}
 \description{
 Get File size of file
 }
diff --git a/man/is_ingested.Rd b/man/is_ingested.Rd
index 7aff42b..dedef22 100644
--- a/man/is_ingested.Rd
+++ b/man/is_ingested.Rd
@@ -29,6 +29,9 @@ file (\code{usethis::edit_r_environ()}), with the appropriate domain as its valu
 
 \item{...}{Arguments passed on to \code{get_file} (no effect here)}
 }
+\value{
+Length-1 logical, \code{TRUE} if it is ingested and \code{FALSE} otherwise
+}
 \description{
 Identify if file is an ingested file
 }

From 4d122a9ac6fc479ebbb74d460c88fd43b8a1d091 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 12 Jan 2022 22:06:23 -0800
Subject: [PATCH 23/27] Protect against null bytesize

---
 R/get_file_by_id.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/get_file_by_id.R b/R/get_file_by_id.R
index 4bf7d14..1c5e9e3 100644
--- a/R/get_file_by_id.R
+++ b/R/get_file_by_id.R
@@ -51,7 +51,7 @@ get_file_by_id <- function(
     # if progress = NULL, determine progress by size
     if (is.null(progress)) {
       bytesize <- get_filesize(fileid, server = server, key = key)
-      if (bytesize > 1e8) {
+      if (isTRUE(bytesize > 1e8)) {
         progress <- TRUE
       } else {
         progress <- FALSE

From 138363a94a1f21ec5a44bdace2fcf6ee1f3a4355 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 12 Jan 2022 22:25:58 -0800
Subject: [PATCH 24/27] Properly comment example

---
 R/get_dataframe.R    | 4 ++--
 man/get_dataframe.Rd | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/get_dataframe.R b/R/get_dataframe.R
index 955c265..6ab87bb 100644
--- a/R/get_dataframe.R
+++ b/R/get_dataframe.R
@@ -91,8 +91,8 @@
 #' writeBin(as_binary, path(temp, "county.RData"))
 #' load(path(temp, "county.RData"))
 #'
-#' If you are certain each RData contains only one object, one could define a custom
-#' custom function used in https://stackoverflow.com/a/34926943
+#' # If you are certain each RData contains only one object, one could define a
+#' # custom function used in https://stackoverflow.com/a/34926943
 #' load_object <- function(file) {
 #'   tmp <- new.env()
 #'   load(file = file, envir = tmp)
diff --git a/man/get_dataframe.Rd b/man/get_dataframe.Rd
index b9ce9ff..25c50d7 100644
--- a/man/get_dataframe.Rd
+++ b/man/get_dataframe.Rd
@@ -147,8 +147,8 @@ temp <- tempdir()
 writeBin(as_binary, path(temp, "county.RData"))
 load(path(temp, "county.RData"))
 
-If you are certain each RData contains only one object, one could define a custom
-custom function used in https://stackoverflow.com/a/34926943
+# If you are certain each RData contains only one object, one could define a
+# custom function used in https://stackoverflow.com/a/34926943
 load_object <- function(file) {
   tmp <- new.env()
   load(file = file, envir = tmp)

From 2023edcd1019a1fc382a7c17507311a9bf92048f Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 12 Jan 2022 22:52:36 -0800
Subject: [PATCH 25/27] No linebreak

---
 vignettes/C-download.Rmd | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vignettes/C-download.Rmd b/vignettes/C-download.Rmd
index b97528d..758fd94 100644
--- a/vignettes/C-download.Rmd
+++ b/vignettes/C-download.Rmd
@@ -92,8 +92,7 @@ head(energy)
 ## Retrieving Custom Data Fromats (RDS, Stata, SPSS)
 
 
-If a file is displayed on dataverse as a `.tab` file like the survey data by [Alvarez et al. (2013)](https://doi.org/10.7910/DVN/ARKOTI/A8YRMP), it is likely that Dataverse [inge
-sted](https://guides.dataverse.org/en/latest/user/tabulardataingest/index.html) the file to a plain-text, tab-delimited format.
+If a file is displayed on dataverse as a `.tab` file like the survey data by [Alvarez et al. (2013)](https://doi.org/10.7910/DVN/ARKOTI/A8YRMP), it is likely that Dataverse [ingested](https://guides.dataverse.org/en/latest/user/tabulardataingest/index.html) the file to a plain-text, tab-delimited format.
 
 ```{r,  message=FALSE}
 argentina_tab <- get_dataframe_by_name(

From a488d3c187739ef5bae2778f587d33550144303d Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Wed, 12 Jan 2022 23:00:00 -0800
Subject: [PATCH 26/27] CRAN comments for 0.3.10

---
 cran-comments.md | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/cran-comments.md b/cran-comments.md
index 85d6d5e..5441abf 100644
--- a/cran-comments.md
+++ b/cran-comments.md
@@ -1,26 +1,19 @@
 Description
 -----------------------------------------------
 
-The `dataverse` package was archived on 2021-04-21 after we could not attend 
-to a failing test in time. This is a submission to effectively revive the package.
+This version updates an expired API token for testing purposes, which started
+causing errors and prompted a email from the CRAN Team on 2021-12-31.
 
-Shiro Kuriwaki will be maintainer for this new submission (for 0.3.9 and going 
-forward), as the automatic `NOTE` will flag.  We communicated this to the CRAN 
-team on 2021-07-18. The previous maintainer, Will Beasley, will continue to 
-contribute to the package as author. 
 
-The nature of the test failure was that the APIs were relying on a nightly CRAN 
-build. After a few unreliable connections after many successful tests, some of the 
-servers eventually returned an error. After Brian Ripley's instructions,  we 
-decided we shouldn't rely  on nightly CRAN builds for testing the package's 
-interactions with servers, even if the servers are reachable. All those tests 
-are now skipped on CRAN (with `testthat::skip_on_cran()`) and instead will run on 
-nightly GitHub Actions builds.
+Because daily tests for the full download is done separately on Github Actions,
+this test is not relevant as much for CRAN, so we updated the token and
+also skipped the check if on CRAN tests.  We also closed 8 issues and feature
+extensions recorded on Github.
 
 
 Thank you for taking the time to review the submission.
 
-- Shiro Kuriwaki
+Shiro Kuriwaki
 
 
 Test environments
@@ -40,6 +33,5 @@ Test environments
 R CMD check results
 -----------------------------------------------
 
-* One NOTE for a new maintainer. 
 * No ERRORs, WARNINGs on any builds.
 * In the past, there has been a note about possibly misspelled words "APIs" and "Dataverse".  If they appear on your machines, both spellings are intentional.

From f696f744fb8ee2e41bada189e00dbe2546b8b2c2 Mon Sep 17 00:00:00 2001
From: Shiro Kuriwaki <shirokuriwaki@gmail.com>
Date: Thu, 13 Jan 2022 07:39:42 -0800
Subject: [PATCH 27/27] v2

---
 .github/workflows/R-CMD-check-daily.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/R-CMD-check-daily.yaml b/.github/workflows/R-CMD-check-daily.yaml
index a2c7453..22b7bbe 100644
--- a/.github/workflows/R-CMD-check-daily.yaml
+++ b/.github/workflows/R-CMD-check-daily.yaml
@@ -2,7 +2,7 @@
 # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
 on:
   schedule:
-    - cron: "20 3 * * *" # Run every morning at 3:20am UTC 
+    - cron: "20 3 * * *" # Run every morning at 3:20am UTC
     # - cron: "7 1 * * *" # Run every morning at 1:07am UTC (~8pm central)
 
 name: R-CMD-check-daily
@@ -14,7 +14,7 @@ jobs:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
     steps:
       - uses: actions/checkout@v2
-      - uses: r-lib/actions/setup-r@v1
+      - uses: r-lib/actions/setup-r@v2
       - uses: r-lib/actions/setup-pandoc@master
 
       - name: Query dependencies