rm(list = ls())
library(readr) # 读取 CSV 文件
library(dplyr) # 数据操作
library(tidyr) # 数据整理
library(brms) # 贝叶斯回归
library(tibble)
setwd("C:\\Users\\Administrator\\Desktop\\machine learning\\Bayesian linear regression")
# 数据导入
# 读取矩阵和分组文件
bray_matrix <- read.csv("bray.csv", row.names = 1)
jaccard_matrix <- read.csv("jaccard.csv", row.names = 1)
group_info <- read_delim("group.txt", delim = "\t")# --- 1. 计算组内稳定性 ---
calculate_stability <- function(matrix_data, group_info) {long_data <- matrix_data %>%as.matrix() %>%as.table() %>%as.data.frame() %>%setNames(c("Sample1", "Sample2", "Distance")) %>%left_join(group_info, by = c("Sample1" = "SampleID")) %>%rename(Time1 = Time) %>% # 使用时间 (Time) 替换基因 (Gene)left_join(group_info, by = c("Sample2" = "SampleID")) %>%rename(Time2 = Time) %>% # 使用时间 (Time) 替换基因 (Gene)filter(Time1 == Time2, Distance != 0) %>% # 只保留组内非零距离mutate(Stability = 1 - Distance)return(long_data)
}stability_long <- calculate_stability(bray_matrix, group_info)# --- 2. 转换距离矩阵为全样本长格式,并引入分组信息 ---
convert_to_long <- function(matrix_data, group_info) {long_data <- matrix_data %>%as.matrix() %>%as.table() %>%as.data.frame() %>%setNames(c("Sample1", "Sample2", "Distance")) %>%left_join(group_info, by = c("Sample1" = "SampleID")) %>%rename(Time1 = Time) %>% # 使用时间 (Time) 替换基因 (Gene)left_join(group_info, by = c("Sample2" = "SampleID")) %>%rename(Time2 = Time) %>% # 使用时间 (Time) 替换基因 (Gene)filter(Distance != 0) # 移除自己与自己的比较return(long_data)
}bray_long <- convert_to_long(bray_matrix, group_info)
jaccard_long <- convert_to_long(jaccard_matrix, group_info)# --- 3. 贝叶斯Beta回归分析函数 ---
perform_bayesian_beta_regression <- function(data, response, predictors) {formula <- as.formula(paste(response, "~", paste(predictors, collapse = "+")))# 使用 brms 进行贝叶斯Beta回归model <- brm(formula, data = data, family = "beta", # 指定 Beta 分布prior = c(prior(normal(0, 5), class = "b"), # 为回归系数设置先验prior(student_t(3, 0, 10), class = "Intercept") # 为截距设置先验),chains = 4, # 链条数量iter = 2000, # 迭代次数warmup = 1000 # 热身迭代次数)# 提取回归结果result <- summary(model)$fixed# 创建结果表result_table <- data.frame(Variable = rownames(result),Beta = result[, "Estimate"], # 回归系数SE = result[, "Est.Error"], # 标准误差`2.5%` = result[, "l-95% CI"], # 2.5% 置信区间下限`97.5%` = result[, "u-95% CI"], # 97.5% 置信区间上限Rhat = result[, "Rhat"], # Gelman-Rubin 收敛诊断Bulk_ESS = result[, "Bulk_ESS"], # 样本有效大小 (Bulk ESS)Tail_ESS = result[, "Tail_ESS"] # 尾部有效大小 (Tail ESS))return(result_table)
}# --- 4. 进行回归分析 ---
# (1) 稳定性回归分析
stability_results <- perform_bayesian_beta_regression(stability_long, "Stability", c("Time1"))# (2) Bray–Curtis 距离回归分析
bray_results <- perform_bayesian_beta_regression(bray_long, "Distance", c("Time1"))# (3) Jaccard 距离回归分析
jaccard_results <- perform_bayesian_beta_regression(jaccard_long, "Distance", c("Time1"))# --- 5. 合并结果表 ---
final_table <- bind_rows(mutate(stability_results, Metric = "Stability (1 - Bray–Curtis, Within Group)"),mutate(bray_results, Metric = "Bray–Curtis distance"),mutate(jaccard_results, Metric = "Jaccard Index distance")
) %>%select(Metric, everything())# 打印结果
print(final_table)write.table(final_table, "Bayesian_Beta_Regression_Time.txt", row.names = TRUE, col.names = TRUE, sep = "\t", quote = FALSE)######################################################
# --- 6. 根据 Time 计算均值和标准差 ---
calculate_group_stats <- function(data) {stats <- data %>%group_by(Time1) %>% # 使用时间 (Time1) 分组summarise(Mean = mean(Stability, na.rm = TRUE),SD = sd(Stability, na.rm = TRUE))return(stats)
}group_stats <- calculate_group_stats(stability_long)# --- 7. PERMANOVA 检验 ---
library(vegan) # 加载 vegan 包用于 PERMANOVAperform_permanova <- function(data, response, grouping_var, n_permutations = 999) {# 使用距离矩阵进行 PERMANOVA 检验distance_matrix <- dist(data[[response]]) # 计算距离矩阵permanova_result <- adonis2(distance_matrix ~ data[[grouping_var]], permutations = n_permutations)return(permanova_result)
}# 执行 PERMANOVA 检验
permanova_result <- perform_permanova(stability_long, "Stability", "Time1", n_permutations = 999)# --- 8. 保存均值和标准差到文件 ---
write.table(group_stats, "Stability_Stats_Time.txt", row.names = FALSE, col.names = TRUE, sep = "\t", quote = FALSE)# --- 9. 保存 PERMANOVA 检验结果到文件 ---
write.table(permanova_result, "permanova_result_time.txt", row.names = FALSE, col.names = TRUE, sep = "\t", quote = FALSE)# 提取回归结果并查看列名
result <- summary(model)$fixed
print(colnames(result)) # 查看返回结果的列名