|
|
import os |
|
|
import re |
|
|
import pandas as pd |
|
|
import MDAnalysis as mda |
|
|
from MDAnalysis.analysis import pca, align, rms |
|
|
import numpy as np |
|
|
import matplotlib.pyplot as plt |
|
|
import seaborn as sns |
|
|
import warnings |
|
|
import argparse |
|
|
warnings.filterwarnings("ignore") |
|
|
|
|
|
|
|
|
def cal_PCA(md_pdb_path,ref_path,pred_pdb_path,n_components = 2): |
|
|
print("") |
|
|
print('filename=',os.path.basename(ref_path)) |
|
|
|
|
|
u = mda.Universe(md_pdb_path, md_pdb_path) |
|
|
u_ref = mda.Universe(ref_path, ref_path) |
|
|
|
|
|
aligner = align.AlignTraj(u, |
|
|
u_ref, |
|
|
select='name CA or name C or name N', |
|
|
in_memory=True).run() |
|
|
|
|
|
pc = pca.PCA(u, |
|
|
select='name CA or name C or name N', |
|
|
align=False, mean=None, |
|
|
|
|
|
n_components=n_components, |
|
|
).run() |
|
|
|
|
|
backbone = u.select_atoms('name CA or name C or name N') |
|
|
n_bb = len(backbone) |
|
|
print('There are {} backbone atoms in the analysis'.format(n_bb)) |
|
|
|
|
|
for i in range(n_components): |
|
|
print(f"Cumulated variance {i+1}: {pc.cumulated_variance[i]:.3f}") |
|
|
|
|
|
transformed = pc.transform(backbone, n_components=n_components) |
|
|
|
|
|
print(transformed.shape) |
|
|
|
|
|
df = pd.DataFrame(transformed, |
|
|
columns=['PC{}'.format(i+1) for i in range(n_components)]) |
|
|
|
|
|
|
|
|
plt.scatter(df['PC1'],df['PC2'],marker='o') |
|
|
plt.show() |
|
|
|
|
|
output_dir = os.path.dirname(md_pdb_path) |
|
|
output_filename = os.path.basename(md_pdb_path).split('.')[0] |
|
|
|
|
|
df.to_csv(os.path.join(output_dir, f'{output_filename}_md_pca.csv')) |
|
|
plt.savefig(os.path.join(output_dir, f'{output_filename}_md_pca.png')) |
|
|
|
|
|
|
|
|
for k,v in pred_pdb_path.items(): |
|
|
u_pred = mda.Universe(v, v) |
|
|
aligner = align.AlignTraj(u_pred, |
|
|
u_ref, |
|
|
select='name CA or name C or name N', |
|
|
in_memory=True).run() |
|
|
pred_backbone = u_pred.select_atoms('name CA or name C or name N') |
|
|
pred_transformed = pc.transform(pred_backbone, n_components=n_components) |
|
|
|
|
|
df = pd.DataFrame(pred_transformed, |
|
|
columns=['PC{}'.format(i+1) for i in range(n_components)]) |
|
|
|
|
|
plt.scatter(df['PC1'],df['PC2'],marker='o') |
|
|
plt.show() |
|
|
|
|
|
output_dir = os.path.dirname(v) |
|
|
output_filename = os.path.basename(v).split('.')[0] |
|
|
df.to_csv(os.path.join(output_dir, f'{output_filename}_{k}_pca.csv')) |
|
|
plt.savefig(os.path.join(output_dir, f'{output_filename}_{k}_pca.png')) |
|
|
plt.clf() |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
parser = argparse.ArgumentParser() |
|
|
|
|
|
parser.add_argument("--pred_pdb_dir", type=str, default="./inference/test/pred_merge_results") |
|
|
parser.add_argument("--target_dir", type=str, default="./inference/test/target_dir") |
|
|
parser.add_argument("--crystal_dir", type=str, default="./inference/test/crystal_dir") |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
|
pred_pdb_path_org={ |
|
|
'P2DFlow':args.pred_pdb_dir, |
|
|
} |
|
|
md_pdb_path_org = args.target_dir |
|
|
ref_path_org = args.crystal_dir |
|
|
|
|
|
|
|
|
for file in os.listdir(md_pdb_path_org): |
|
|
if re.search('\.pdb',file): |
|
|
pred_pdb_path={ |
|
|
'P2DFlow':'', |
|
|
|
|
|
|
|
|
} |
|
|
for k,v in pred_pdb_path.items(): |
|
|
pred_pdb_path[k]=os.path.join(pred_pdb_path_org[k],file) |
|
|
md_pdb_path = os.path.join(md_pdb_path_org, file) |
|
|
ref_path = os.path.join(ref_path_org, file) |
|
|
cal_PCA(md_pdb_path,ref_path,pred_pdb_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|