Intermediate

File I/O

Read and write CSV, Excel, JSON, and RDS files, and connect to databases using R.

Reading CSV Files

# Base R - read.csv
df <- read.csv("data.csv")
df <- read.csv("data.csv", stringsAsFactors = FALSE)
df <- read.csv("data.csv", header = TRUE, sep = ",")

# readr (tidyverse) - faster and more consistent
library(readr)
df <- read_csv("data.csv")

# Advantages of readr::read_csv:
# - Returns a tibble (enhanced data frame)
# - Faster for large files
# - Better column type guessing
# - Does not convert strings to factors
# - Shows a progress bar for large files

# Read with specific column types
df <- read_csv("data.csv", col_types = cols(
  id = col_integer(),
  name = col_character(),
  value = col_double()
))

Reading Excel Files

library(readxl)

# Read first sheet
df <- read_excel("data.xlsx")

# Read specific sheet
df <- read_excel("data.xlsx", sheet = "Sheet2")
df <- read_excel("data.xlsx", sheet = 2)

# Read specific range
df <- read_excel("data.xlsx", range = "A1:D50")

# List all sheets
excel_sheets("data.xlsx")

Writing Files

# Base R
write.csv(df, "output.csv", row.names = FALSE)

# readr (faster, no row names by default)
write_csv(df, "output.csv")

# Tab-separated
write_tsv(df, "output.tsv")

RDS Format (R Native)

RDS is R's native binary format. It preserves data types exactly and is much faster for large datasets.

# Save any R object to RDS
saveRDS(df, "data.rds")

# Read it back
df <- readRDS("data.rds")

# Save multiple objects to .RData
save(df, model, results, file = "workspace.RData")
load("workspace.RData")  # Restores all saved objects

JSON

library(jsonlite)

# Read JSON
data <- fromJSON("data.json")

# Read from URL
data <- fromJSON("https://api.example.com/data")

# Convert R object to JSON
json_str <- toJSON(df, pretty = TRUE)
write(json_str, "output.json")

Database Connections

library(DBI)
library(RSQLite)

# Connect to SQLite database
con <- dbConnect(SQLite(), "my_database.sqlite")

# Write a data frame to a table
dbWriteTable(con, "employees", df)

# Query the database
result <- dbGetQuery(con, "SELECT * FROM employees WHERE age > 30")

# List tables
dbListTables(con)

# Always disconnect when done
dbDisconnect(con)

Working Directories

# Check current working directory
getwd()

# Set working directory
setwd("/path/to/project")

# List files in directory
list.files()
list.files(pattern = "\\.csv$")  # Only .csv files

# Check if a file exists
file.exists("data.csv")

✅

Best practice: Use RStudio Projects (.Rproj files) instead of setwd(). Projects automatically set the working directory to the project root, making your code portable and reproducible.

← Previous Data Structures Next → Best Practices