Skip to content

Commit

Permalink
Merge pull request #114 from IQSS/dev
Browse files Browse the repository at this point in the history
  • Loading branch information
kuriwaki authored Jan 13, 2022
2 parents 4775a92 + f696f74 commit d496001
Show file tree
Hide file tree
Showing 15 changed files with 280 additions and 162 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ man-roxygen/*
^CRAN-RELEASE$
^\.github$
rhub-checks
/Untitled.+\.R$
4 changes: 2 additions & 2 deletions .github/workflows/R-CMD-check-daily.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
on:
schedule:
- cron: "20 3 * * *" # Run every morning at 3:20am UTC
- cron: "20 3 * * *" # Run every morning at 3:20am UTC
# - cron: "7 1 * * *" # Run every morning at 1:07am UTC (~8pm central)

name: R-CMD-check-daily
Expand All @@ -14,7 +14,7 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v2
- uses: r-lib/actions/setup-r@v1
- uses: r-lib/actions/setup-r@v2
- uses: r-lib/actions/setup-pandoc@master

- name: Query dependencies
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

* Add progress bar for all downloads (#108)
* Minor documentation improvements (#64, #107)
* Faster method for detecting ingest (#113) and robustness to ingested files without a metadata file due to errors (#80)
* No longer relies on `foreign` (#34)

# CHANGES in dataverse 0.3.9
Expand Down
10 changes: 6 additions & 4 deletions R/get_dataframe.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
#' file, then `.f` should be `readRDS` or `readr::read_rds`. It can be a custom
#' function defined by the user. See examples for details.
#'
#' @param original A logical, defaulting to `TRUE`. Whether to read the ingested,
#' archival version of the datafile if one exists. The archival versions are tab-delimited
#' @param original A logical, whether to read the ingested,
#' archival version of the datafile if one exists. If `TRUE`, users should supply
#' a function to use to read in the original. The archival versions are tab-delimited
#' `.tab` files so if `original = FALSE`, `.f` is set to `readr::read_tsv`.
#'
#' @inheritDotParams get_file
Expand Down Expand Up @@ -90,8 +91,8 @@
#' writeBin(as_binary, path(temp, "county.RData"))
#' load(path(temp, "county.RData"))
#'
#' If you are certain each RData contains only one object, one could define a custom
#' custom function used in https://stackoverflow.com/a/34926943
#' # If you are certain each RData contains only one object, one could define a
#' # custom function used in https://stackoverflow.com/a/34926943
#' load_object <- function(file) {
#' tmp <- new.env()
#' load(file = file, envir = tmp)
Expand Down Expand Up @@ -132,6 +133,7 @@ get_dataframe_by_id <- function(
# if not ingested, then whether to take the original is not relevant.
ingested <- is_ingested(fileid, ...)


if (isFALSE(ingested)) {
original <- NA
}
Expand Down
28 changes: 21 additions & 7 deletions R/get_file_by_id.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
#' no ingested version, is set to NA. Note in `get_dataframe_*`,
#' `original` is set to FALSE by default. Either can be changed.
#' @param fileid A numeric ID internally used for `get_file_by_id`. Can be a vector for multiple files.
#' @param progress Whether to show a progress bar of the download. Defaults to `FALSE`.
#' @param progress Whether to show a progress bar of the download.
#' If not specified, will be set to `TRUE` for a file larger than 100MB. To fix
#' a value, set `FALSE` or `TRUE`.
#'
#' @export
get_file_by_id <- function(
Expand All @@ -14,7 +16,7 @@ get_file_by_id <- function(
format = c("original", "bundle"),
vars = NULL,
original = TRUE,
progress = FALSE,
progress = NULL,
key = Sys.getenv("DATAVERSE_KEY"),
server = Sys.getenv("DATAVERSE_SERVER"),
...
Expand Down Expand Up @@ -44,25 +46,37 @@ get_file_by_id <- function(
}

# ping get_file_metadata to see if file is ingested
is_ingested <- is_ingested(fileid, server = server)
ingested <- is_ingested(fileid, server = server, key = key)

# if progress = NULL, determine progress by size
if (is.null(progress)) {
bytesize <- get_filesize(fileid, server = server, key = key)
if (isTRUE(bytesize > 1e8)) {
progress <- TRUE
} else {
progress <- FALSE
}
}

# update archival if not specified
if (isFALSE(is_ingested))
if (isFALSE(ingested))
original <- NA

# create query -----
query <- list()

# variables
if (!is.null(vars))
query$vars <- paste0(vars, collapse = ",")

# format only matters in ingested datasets,
# For non-ingested files (rds/docx), we need to NOT specify a format
# For non-ingested files (e.g. rds/docx), we need to NOT specify a format
# also for bundle, only change url
if (is_ingested & format != "bundle")
if (ingested & format != "bundle")
query$format <- match.arg(format)

# if the original is not desired, we need to NOT specify a format
if (is_ingested & (isFALSE(original) || is.na(original) || is.null(original)))
if (ingested & (isFALSE(original) || is.na(original) || is.null(original)))
query$format <- NULL

# part of URL depending on DOI, bundle, or file
Expand Down
Loading

0 comments on commit d496001

Please sign in to comment.