1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
|
# Inner function to read pdf:
getPDF <- function(x)
{
txtfiles <- character(length(x))
for (i in 1:length(x))
{
system(paste('pdftotext -q -enc "ASCII7" "', x[i], '"', sep = ""))
if (file.exists(gsub("\\.pdf$", "\\.txt", x[i]))) {
fileName <- gsub("\\.pdf$", "\\.txt", x[i])
txtfiles[i] <- readChar(fileName, file.info(fileName)$size)
} else{
warning(paste("Failure in file", x[i]))
txtfiles[i] <- ""
}
}
return(txtfiles)
}
## Function to check directory of PDFs:
checkPDFdir <-
function(dir,
subdir = TRUE,
...) {
if (missing(dir))
dir <- tk_choose.dir()
all.files <-
list.files(dir,
pattern = "\\.pdf",
full.names = TRUE,
recursive = subdir)
files <- all.files[grepl("\\.pdf$", all.files)]
if (length(files) == 0)
stop("No PDF found")
txts <- character(length(files))
message("Importing PDF files...")
pb <- txtProgressBar(max = length(files), style = 3)
for (i in 1:length(files))
{
txts[i] <- getPDF(files[i])
setTxtProgressBar(pb, i)
}
close(pb)
names(txts) <- gsub("\\.pdf$", "", basename(files))
return(statcheck(txts, ...))
}
## Function to given PDFs:
checkPDF <-
function(files, ...) {
if (missing(files))
files <- tk_choose.files()
txts <- sapply(files, getPDF)
names(txts) <-
gsub("\\.pdf$", "", basename(files), perl = TRUE)
return(statcheck(txts, ...))
}
|