1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
|
#' @title Import list of data frames
#' @description Use [import()] to import a list of data frames from a vector of file names or from a multi-object file (Excel workbook, .Rdata file, compressed directory in a zip file or tar archive, or HTML file)
#' @param file A character string containing a single file name for a multi-object file (e.g., Excel workbook, zip file, tar archive, or HTML file), or a vector of file paths for multiple files to be imported.
#' @param which If `file` is a single file path, this specifies which objects should be extracted (passed to [import()]'s `which` argument). Ignored otherwise.
#' @param rbind A logical indicating whether to pass the import list of data frames through [data.table::rbindlist()].
#' @param rbind_label If `rbind = TRUE`, a character string specifying the name of a column to add to the data frame indicating its source file.
#' @param rbind_fill If `rbind = TRUE`, a logical indicating whether to set the `fill = TRUE` (and fill missing columns with `NA`).
#' @param \dots Additional arguments passed to [import()]. Behavior may be unexpected if files are of different formats.
#' @inheritParams import
#' @inheritSection import Trust
#' @inheritSection import Which
#' @inherit import references
#' @return If `rbind=FALSE` (the default), a list of a data frames. Otherwise, that list is passed to [data.table::rbindlist()] with `fill = TRUE` and returns a data frame object of class set by the `setclass` argument; if this operation fails, the list is returned.
#' @details When file is a vector of file paths and any files are missing, those files are ignored (with warnings) and this function will not raise any error. For compressed files, the file name must also contain information about the file format of all compressed files, e.g. `files.csv.zip` for this function to work.
#' @examples
#' ## For demo, a temp. file path is created with the file extension .xlsx
#' xlsx_file <- tempfile(fileext = ".xlsx")
#' export(
#' list(
#' mtcars1 = mtcars[1:10, ],
#' mtcars2 = mtcars[11:20, ],
#' mtcars3 = mtcars[21:32, ]
#' ),
#' xlsx_file
#' )
#'
#' # import a single file from multi-object workbook
#' import(xlsx_file, sheet = "mtcars1")
#' # import all worksheets, the return value is a list
#' import_list(xlsx_file)
#'
#' # import and rbind all worksheets, the return value is a data frame
#' import_list(xlsx_file, rbind = TRUE)
#' @seealso [import()], [export_list()], [export()]
#' @export
import_list <- function(file, setclass = getOption("rio.import.class", "data.frame"), which, rbind = FALSE,
rbind_label = "_file", rbind_fill = TRUE, ...) {
.check_file(file, single_only = FALSE)
## special cases
if (length(file) == 1) {
x <- .read_file_as_list(file = file, which = which, setclass = setclass, rbind = rbind, rbind_label = rbind_label, ...)
} else {
## note the plural
x <- .read_multiple_files_as_list(files = file, setclass = setclass, rbind = rbind, rbind_label = rbind_label, ...)
}
## optionally rbind
if (isTRUE(rbind)) {
if (length(x) == 1) {
x <- x[[1L]]
} else {
x2 <- try(data.table::rbindlist(x, fill = rbind_fill), silent = TRUE)
if (inherits(x2, "try-error")) {
warning("Attempt to rbindlist() the data did not succeed. List returned instead.", call. = FALSE)
return(x)
}
x <- x2
}
x <- set_class(x, class = setclass)
}
return(x)
}
.strip_exts <- function(file) {
vapply(file, function(x) tools::file_path_sans_ext(basename(x)), character(1))
}
.read_multiple_files_as_list <- function(files, setclass, rbind, rbind_label, ...) {
names(files) <- .strip_exts(files)
x <- lapply(files, function(thisfile) {
out <- try(import(thisfile, setclass = setclass, ...), silent = TRUE)
if (inherits(out, "try-error")) {
warning(sprintf("Import failed for %s", thisfile), call. = FALSE)
##out <- NULL
return(NULL)
} else if (isTRUE(rbind)) {
out[[rbind_label]] <- thisfile
}
structure(out, filename = thisfile)
})
names(x) <- names(files)
return(x)
}
.read_file_as_list <- function(file, which, setclass, rbind, rbind_label, ...) {
## respect the `format` argument, if given
if ("format" %in% ...names()) {
format <- list(...)[["format"]]
} else {
format <- get_info(file)$format
}
if (R.utils::isUrl(file)) {
file <- remote_to_local(file)
}
if (format == "rdata") {
return(.import.rio_rdata(file = file, .return_everything = TRUE, ...))
}
archive_format <- find_compress(file)
if (!format %in% c("html", "xlsx", "xls", "xlsm", "ods", "fods") && !archive_format$compress %in% c("zip", "tar", "tar.gz", "tar.bz2")) {
which <- 1
whichnames <- NULL
}
## getting list of `whichnames`
if (format == "html") {
.check_pkg_availability("xml2")
tables <- xml2::xml_find_all(xml2::read_html(unclass(file)), ".//table")
if (missing(which)) {
which <- seq_along(tables)
}
whichnames <- vapply(xml2::xml_attrs(tables[which]),
function(x) if ("class" %in% names(x)) x["class"] else "",
FUN.VALUE = character(1)
)
names(which) <- whichnames
}
if (format %in% c("xls", "xlsx", "xlsm","ods", "fods")) {
## .check_pkg_availability("readxl")
sheet_func <- readxl::excel_sheets
if (format %in% c("ods", "fods")) {
.check_pkg_availability("readODS")
sheet_func <- readODS::list_ods_sheets
}
whichnames <- sheet_func(path = file)
if (missing(which)) {
which <- seq_along(whichnames)
names(which) <- whichnames
} else if (is.character(which)) {
whichnames <- which
} else {
whichnames <- whichnames[which]
}
}
if (archive_format$compress %in% c("zip", "tar", "tar.gz", "tar.bz2")) {
whichnames <- .list_archive(file, archive_format$compress)
if (missing(which)) {
which <- seq_along(whichnames)
names(which) <- .strip_exts(whichnames)
} else if (is.character(which)) {
whichnames <- whichnames[whichnames %in% which]
} else {
names(which) <- .strip_exts(whichnames)
}
}
## reading all `whichnames`
x <- lapply(which, function(thiswhich) {
out <- try(import(file, setclass = setclass, which = thiswhich, ...), silent = TRUE)
if (inherits(out, "try-error")) {
warning(sprintf("Import failed for %s from %s", thiswhich, file))
out <- NULL
} else if (isTRUE(rbind) && length(which) > 1) {
out[[rbind_label]] <- thiswhich
}
out
})
names(x) <- whichnames
return(x)
}
|