Back to Code Snippets


Uncompresses a zip archive and converts each CSV file therein to ParquetR

Execute this R

library(tidyverse)
library(duckdb)
# Unzip the archive to a temp dir
zip_file <- "path/to/your/zipfile.zip"
output_folder <- "path/to/output/folder/"
temp_dir <- tempfile()
unzip(zip_file, exdir = temp_dir)
# Get list of CSV files in the temporary directory
csv_files <- list.files(temp_dir, pattern = "\\.csv$", full.names = TRUE)
# Loop through CSV files and convert to parquet file using DuckDB
for (csv_file in csv_files) {
    con <- duckdb::duckdb()
    input_table <- duckdb::read_csv(con, csv_file)
    parquet_file <- file.path (output_folder,
        paste0(tools::file_path_sans_ext(basename(csv_file)), ".parquet"))
    duckdb::write_parquet(output_table, parquet_file)
    duckdb::db_disconnect(con)
}
unlink(temp_dir, recursive=TRUE)

Copy code

Kyle Lundstedt

Copy code

Expand

Share link