library(tidyverse)
library(patchwork)18 数据可视化基础
19 数据可视化基础
好的图表胜过千言万语。本章介绍 ggplot2 的语法体系和科研论文级图表的制作方法。
19.1 ggplot2 语法体系
ggplot2 基于”图形语法”(Grammar of Graphics),将图表分解为几个核心组件:
ggplot(data, aes(x, y)) + # 数据和映射
geom_xxx() + # 几何对象(画什么)
scale_xxx() + # 标度(怎么映射)
labs() + # 标签
theme() # 主题(外观)
Note
aes() 是什么?
aes() 是 aesthetic mapping(美学映射)的缩写,它告诉 ggplot2 如何将数据变量映射到图形属性上:
aes(x = soil_ph)→ 将 soil_ph 映射到 x 轴aes(color = habitat)→ 将 habitat 映射到颜色aes(size = elevation)→ 将 elevation 映射到点的大小
关键区别:写在 aes() 里面的属性由数据驱动(每个数据点可以不同),写在 aes() 外面的属性是固定值(所有点相同)。例如 geom_point(aes(color = habitat)) 按生境着色,而 geom_point(color = "red") 所有点都是红色。
Tip
+ 运算符
ggplot2 使用 + 将图层逐层叠加。每个 + 后面添加一个新的组件(几何对象、标度、主题等)。注意 + 必须放在行末,不能放在下一行的开头:
# 正确
ggplot(data, aes(x, y)) +
geom_point()
# 错误(+ 不能在行首)
ggplot(data, aes(x, y))
+ geom_point()19.2 示例数据
set.seed(2027)
n <- 100
ecology <- tibble(
species_richness = rpois(n, 15),
soil_ph = rnorm(n, 6.2, 0.8),
organic_carbon = rlnorm(n, log(25), 0.4),
elevation = runif(n, 200, 1200),
habitat = sample(c("森林", "草地", "湿地", "灌丛"), n, replace = TRUE,
prob = c(0.4, 0.25, 0.15, 0.2)),
season = sample(c("春", "夏", "秋", "冬"), n, replace = TRUE)
)19.3 常用图表类型
19.3.1 散点图
ggplot(ecology, aes(x = soil_ph, y = species_richness)) +
geom_point(alpha = 0.6, size = 2) +
geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
labs(x = "土壤 pH", y = "物种丰富度", title = "土壤 pH 与物种丰富度的关系") +
theme_minimal()19.3.2 分组散点图
ggplot(ecology, aes(x = soil_ph, y = species_richness, color = habitat)) +
geom_point(alpha = 0.7, size = 2) +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "土壤 pH", y = "物种丰富度", color = "生境类型") +
theme_minimal()19.3.3 箱线图
ggplot(ecology, aes(x = habitat, y = species_richness, fill = habitat)) +
geom_boxplot(alpha = 0.7, outlier.shape = 21) +
geom_jitter(width = 0.2, alpha = 0.3, size = 1) +
labs(x = "", y = "物种丰富度", title = "不同生境的物种丰富度") +
scale_fill_brewer(palette = "Set2") +
theme_minimal() +
theme(legend.position = "none")19.3.4 直方图与密度图
p1 <- ggplot(ecology, aes(x = organic_carbon)) +
geom_histogram(bins = 20, fill = "steelblue", alpha = 0.7, color = "white") +
labs(x = "有机碳 (g/kg)", y = "频次", title = "直方图") +
theme_minimal()
p2 <- ggplot(ecology, aes(x = organic_carbon, fill = habitat)) +
geom_density(alpha = 0.5) +
labs(x = "有机碳 (g/kg)", y = "密度", title = "密度图", fill = "生境") +
theme_minimal()
p1 + p219.3.5 柱状图
ecology |>
group_by(habitat) |>
summarise(
mean_richness = mean(species_richness),
se = sd(species_richness) / sqrt(n())
) |>
ggplot(aes(x = reorder(habitat, -mean_richness), y = mean_richness, fill = habitat)) +
geom_col(alpha = 0.8) +
geom_errorbar(aes(ymin = mean_richness - se, ymax = mean_richness + se),
width = 0.2) +
labs(x = "", y = "平均物种丰富度 (± SE)") +
scale_fill_brewer(palette = "Set2") +
theme_minimal() +
theme(legend.position = "none")19.3.6 热力图(相关性矩阵)
# 计算相关系数
cor_matrix <- ecology |>
select(species_richness, soil_ph, organic_carbon, elevation) |>
cor()
# 转为长格式绘图
cor_matrix |>
as.data.frame() |>
rownames_to_column("var1") |>
pivot_longer(-var1, names_to = "var2", values_to = "cor") |>
ggplot(aes(x = var1, y = var2, fill = cor)) +
geom_tile() +
geom_text(aes(label = round(cor, 2)), size = 4) +
scale_fill_gradient2(low = "steelblue", mid = "white", high = "coral",
midpoint = 0, limits = c(-1, 1)) +
labs(title = "变量相关性矩阵", x = "", y = "", fill = "相关系数") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))19.4 科研论文级图表
19.4.1 自定义主题
theme_paper <- function(base_size = 12) {
theme_bw(base_size = base_size) +
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "grey92"),
strip.background = element_rect(fill = "grey95", color = NA),
legend.background = element_rect(fill = "white", color = NA),
plot.title = element_text(face = "bold", size = base_size + 2),
axis.title = element_text(size = base_size),
axis.text = element_text(size = base_size - 1)
)
}19.4.2 多面板组合图
p_scatter <- ggplot(ecology, aes(x = elevation, y = species_richness)) +
geom_point(aes(color = habitat), alpha = 0.6) +
geom_smooth(method = "lm", color = "black", linewidth = 0.8) +
labs(x = "海拔 (m)", y = "物种丰富度", color = "生境") +
theme_paper()
p_box <- ggplot(ecology, aes(x = habitat, y = organic_carbon, fill = habitat)) +
geom_boxplot(alpha = 0.7) +
labs(x = "", y = "有机碳 (g/kg)") +
scale_fill_brewer(palette = "Set2") +
theme_paper() +
theme(legend.position = "none")
# 组合并添加标签
p_scatter + p_box +
plot_annotation(tag_levels = "a", tag_prefix = "(", tag_suffix = ")") +
plot_layout(widths = c(1.5, 1))19.4.3 导出高质量图片
# 将图表保存到变量中
p_final <- p_scatter + p_box +
plot_annotation(tag_levels = "a", tag_prefix = "(", tag_suffix = ")")
# 保存为 PDF(矢量图,论文首选)
ggsave("output/figures/fig1_diversity.pdf", plot = p_final, width = 10, height = 5, dpi = 300)
# 保存为 PNG(位图,PPT/网页用)
ggsave("output/figures/fig1_diversity.png", plot = p_final, width = 10, height = 5, dpi = 300)
# 保存为 TIFF(某些期刊要求)
ggsave("output/figures/fig1_diversity.tiff", plot = p_final, width = 10, height = 5, dpi = 600,
compression = "lzw")
Tip期刊图表要求
- 分辨率:通常 ≥ 300 dpi(线图 600 dpi)
- 格式:PDF/EPS(矢量)或 TIFF(位图)
- 字体大小:图中文字 ≥ 8pt
- 配色:考虑色盲友好(避免红绿搭配,推荐 viridis 色板)
- 图注:在图下方,不在图内
19.5 色盲友好配色:viridis
viridis 色板在亮度上均匀变化,对色盲用户友好,打印为灰度图时也能区分。ggplot2 内置了 viridis 系列函数:
# 离散变量:scale_color_viridis_d()
p1 <- ggplot(ecology, aes(x = soil_ph, y = species_richness, color = habitat)) +
geom_point(alpha = 0.7, size = 2) +
scale_color_viridis_d(option = "D") +
labs(x = "土壤 pH", y = "物种丰富度", color = "生境", title = "viridis 离散配色") +
theme_minimal()
# 连续变量:scale_color_viridis_c()
p2 <- ggplot(ecology, aes(x = soil_ph, y = species_richness, color = elevation)) +
geom_point(alpha = 0.7, size = 2) +
scale_color_viridis_c(option = "C") +
labs(x = "土壤 pH", y = "物种丰富度", color = "海拔 (m)", title = "viridis 连续配色") +
theme_minimal()
p1 + p2
Tipviridis 色板选项
option 参数可选 "A"(magma)、"B"(inferno)、"C"(plasma)、"D"(viridis,默认)、"E"(cividis)。科研论文推荐使用 "D" 或 "E"。
19.6 课后练习
- 用
iris数据集绘制花瓣长度 vs 花瓣宽度的散点图,按物种着色 - 绘制三个物种花萼长度的箱线图,添加抖动点
- 创建一个 2×2 的组合图(散点图 + 箱线图 + 直方图 + 密度图)
- 应用自定义主题,导出为 PDF
- 尝试使用
scale_color_viridis_d()替换默认配色