欢迎关注我的CSDN:https://spike.blog.csdn.net/
本文地址:https://spike.blog.csdn.net/article/details/134884889
在蛋白质复合物中,通过链间距离,可以计算出在接触面的残基与接触面的面积,使用 BioPython 库 与 SASA (Solvent Accessible Surface Area,溶剂可及表面积) 库。SASA 计算主要以 水分子 是否通过为基础,水分子的半径是1.4 A,直径是 2.8 A,所以以 2.8 A 的距离,判断链间是否连接。
相关函数:
- 获取 蛋白质复合物 的链列表,即
get_chain_ids()
- 获取 残基-原子 字典,注意原子名称与 PDB 的名称可能不同,即
get_atom_list()
- 获取 全原子 列表,即
get_all_atoms()
- 判断是否原子之间是否相连,即
is_contact()
- 获取全部的接触面,原子计算残基维度,即
get_contacts()
- 打印 ChimeraX 的显示命令,即
get_chimerax_cmd()
- 端到端的计算接触残基,即
cal_chain_interface_residue()
- 计算 SASA 面积,即
cal_sasa()
- 获取复合物多链的子结构,即
get_sub_structure()
- 端到端的计算接触面积,即
cal_chain_interface_area()
以 PDB 8j18 为例,计算链 A 与 链 R 的接触残基与面积:
源码如下:
#!/usr/bin/env python
# -- coding: utf-8 --
"""
Copyright (c) 2022. All rights reserved.
Created by C. L. Wang on 2023/12/8
"""import osfrom Bio.PDB import NeighborSearch
from Bio.PDB import PDBParser, Selection
from Bio.PDB.Chain import Chain
from Bio.PDB.Model import Model
from Bio.PDB.Structure import Structurefrom root_dir import DATA_DIRclass MainComplexChainDist(object):"""复合物,多链距离计算"""def __init__(self):pass@staticmethoddef get_chain_ids(structure):chain_ids = []for chain in structure.get_chains():chain_ids.append(chain.id)return chain_ids@staticmethoddef get_atom_list(structure, chains):"""获取 atom list 列表注意: 输出的 atom 名字与 PDB 文件中的 atom 名字略有差异,例如8jtl 第1个,PRO: CG 对应 OG1,CD 对应 CG28jtl 第2个,VAL: CG1 对应 CD1,CG2 对应 CD2"""output = dict()for chain in structure:if chain.id in chains:for residue in chain.get_residues():het_flag, res_seq, i_code = residue.get_id()the_id = (chain.id + "_" + str(res_seq) + "_" + i_code).strip()for atom in residue.get_unpacked_list():if het_flag == ' ':if the_id in output:output[the_id].append(atom)else:output[the_id] = [atom]return output# Filter out all the atoms from the chain,residue map given by residue_map@staticmethoddef get_all_atoms(residue_map):all_atoms_out = []for residue in residue_map:for atom in residue_map[residue]:all_atoms_out.append(atom)# Set the b-factor to zero for coloring by contactsatom.set_bfactor(0.0)return all_atoms_out@staticmethoddef is_contact(res_1, other_atoms, cutoff):for atom in res_1:ns = NeighborSearch(other_atoms)center = atom.get_coord()neighbors = ns.search(center, cutoff) # 5.0 for distance in angstromresidue_list = Selection.unfold_entities(neighbors, 'R') # R for residuesif len(residue_list) > 0:return Truereturn False@classmethoddef get_contacts(cls, struc, all_atoms, verbose, cutoff):progress = 0contacts = []for residue in struc:progress += 1# if len(verbose) > 0:# print(verbose, progress, "out of", len(struc))atom_list = struc[residue]outcome = cls.is_contact(atom_list, all_atoms, cutoff)if outcome:contacts.append(residue)return contacts@staticmethoddef get_chimerax_cmd(contacts, chain_id, pdb_id="2"):"""计算命令"""tmp_list = []for item in contacts:req_id = int(item.split("_")[1])tmp_list.append(req_id)contacts_gap = []prev, post = -1, -1for i in tmp_list:if prev == -1:prev = post = icontinueif i - post == 1:post = ielse:contacts_gap.append(f"#{pdb_id}/{chain_id}:{prev}-{post}")prev = post = icmd = "select " + " ".join(contacts_gap)return cmd@classmethoddef cal_chain_interface_residue(cls, structure, chain1, chain2, cutoff=7.5):"""计算复合物结构的链内距离"""chain_ids = cls.get_chain_ids(structure)# ['A', 'B', 'G', 'R', 'S']print(f"[Info] chain_ids: {chain_ids}")assert chain1 in chain_ids and chain2 in chain_ids# C 表示 chain,即将 structure 展开 chain 的形式# 同时支持 A, R, C, M, Sstructure_c = Selection.unfold_entities(structure, target_level="C")atom_dict1 = cls.get_atom_list(structure_c, chains=[chain1])atom_dict2 = cls.get_atom_list(structure_c, chains=[chain2])print(f"[Info] res_num: {len(atom_dict1.keys())}")all_atoms_1 = cls.get_all_atoms(atom_dict1)all_atoms_2 = cls.get_all_atoms(atom_dict2)cutoff = cutoffcontacts_1 = cls.get_contacts(atom_dict1, all_atoms_2, "First molecule, residue ", cutoff)contacts_2 = cls.get_contacts(atom_dict2, all_atoms_1, "Second molecule, residue ", cutoff)print(f"[Info] contacts_1: {len(contacts_1)}")print(f"[Info] contacts_2: {len(contacts_2)}")return contacts_1, contacts_2@staticmethoddef cal_sasa(structure):"""计算单体的表面积"""import freesasastructure = freesasa.structureFromBioPDB(structure)result = freesasa.calc(structure)# classArea = freesasa.classifyResults(result, structure)return result.totalArea()@staticmethoddef get_sub_structure(structure, chain_list):"""获取 PDB 结构的子结构"""new_structure = Structure(0)new_model = Model(0)# 遍历模型中的所有链for chain in structure[0]:# 获取链的IDtmp_chain_id = chain.get_id()if tmp_chain_id not in chain_list:continue# 创建一个新的结构对象,只包含当前链new_chain = Chain(tmp_chain_id)new_chain.child_list = chain.child_listnew_model.add(new_chain)new_structure.add(new_model)return new_structure@classmethoddef cal_chain_interface_area(cls, structure, chain1, chain2):sub_all_structure = cls.get_sub_structure(structure, [chain1, chain2])sub1_structure = cls.get_sub_structure(structure, [chain1])sub2_structure = cls.get_sub_structure(structure, [chain2])v1_area = cls.cal_sasa(sub_all_structure)v2_area = cls.cal_sasa(sub1_structure) + cls.cal_sasa(sub2_structure)inter_area = max(int((v2_area - v1_area) // 2), 0)return inter_areadef process(self, input_path, chain1, chain2):"""核心逻辑"""print(f"[Info] input_path: {input_path}")print(f"[Info] chain1: {chain1}, chain2: {chain2}")chain1 = chain1.upper()chain2 = chain2.upper()parser = PDBParser(QUIET=True)structure = parser.get_structure("def", input_path)contacts_1, contacts_2 = self.cal_chain_interface_residue(structure, chain1, chain2, cutoff=7.5)cmd1 = self.get_chimerax_cmd(contacts_1, chain1)cmd2 = self.get_chimerax_cmd(contacts_2, chain2)print(f"[Info] chimerax: {cmd1}")print(f"[Info] chimerax: {cmd2}")inter_area = self.cal_chain_interface_area(structure, chain1, chain2)print(f"[Info] inter_area: {inter_area}")def main():ccd = MainComplexChainDist()# input_path = os.path.join(DATA_DIR, "templates", "8p3l.pdb")# ccd.process(input_path, "A", "D") # 2562# ccd.process(input_path, "A", "J") # 490input_path = os.path.join(DATA_DIR, "templates", "8j18.pdb")# ccd.process(input_path, "R", "S") # 0ccd.process(input_path, "A", "R") # 1114if __name__ == '__main__':main()