rm(list = ls())
library(dplyr)
library(broom)
library(ggplot2)# 设置工作目录
setwd("C:\\Users\\Administrator\\Desktop\\machine learning\\Multiple Linear Regression")# 读取多样性数据
diversity_data <- read.table("alpha_diversity.txt", header = TRUE, sep = "\t")# 读取分组数据
group_data <- read.table("group.txt", header = TRUE, sep = "\t")# 合并两个数据集
merged_data <- merge(diversity_data, group_data, by.x = "SampleID", by.y = "SampleID")# 确保因变量为数值型,自变量为因子型
merged_data$Gene <- as.factor(merged_data$Gene)# 生成所有模型(包含交互项)
diversity_metrics <- c("Shannon", "Simpson", "Pielou")
results <- lapply(diversity_metrics, function(metric) {# 构建回归模型公式,包含基因型和时间的交互项formula <- as.formula(paste(metric, "~ Gene"))# 拟合线性模型model <- lm(formula, data = merged_data)# 结果整理并加上指标名称tidy(model) %>%mutate(Metric = metric) # 添加指标名称
})# 合并所有结果
final_table <- bind_rows(results)# 筛选需要的列并重命名
final_table <- final_table %>%select(Metric, term, estimate, std.error, p.value) %>%rename(Variable = term, β = estimate, SE = std.error, p = p.value)# 输出结果表格
print(final_table)# 将结果保存为CSV文件
write.csv(final_table, "alpha_regression_results.csv", row.names = TRUE)
#########################################################
# 定义一个函数来提取模型评估参数
extract_metrics <- function(model, metric_name) {model_summary <- summary(model)data.frame(Metric = metric_name,Residual_SE = model_summary$sigma,R_squared = model_summary$r.squared,Adjusted_R_squared = model_summary$adj.r.squared,F_statistic = model_summary$fstatistic[1],F_p_value = pf(model_summary$fstatistic[1], model_summary$fstatistic[2], model_summary$fstatistic[3], lower.tail = FALSE))
}# 对多个模型应用
metrics_list <- lapply(diversity_metrics, function(metric) {formula <- as.formula(paste(metric, "~ Gene"))model <- lm(formula, data = merged_data)extract_metrics(model, metric)
})# 合并结果到一个表格
metrics_table <- do.call(rbind, metrics_list)# 查看结果
print(metrics_table)write.csv(metrics_table, "alpha_metrics_table.csv", row.names = FALSE)