Source code for jade.antibody.cdr_data.CDRDataTypes

import copy

#PyIgClassify
from jade.antibody.CDRClusterer import *
from jade.basic.sequence import fasta

#Modules
from jade.antibody.cdr_data.CDRData import *
from jade.basic.sequence.ClustalRunner import *
from jade.basic.structure.BioPose import *
from jade.basic.structure.Structure import AntibodyStructure

[docs]class CDRLengthData(CDRData): def __init__(self, native_path, is_camelid = False): CDRData.__init__(self, "length", native_path, is_camelid)
[docs] def add_data(self, strategy, con):
self._get_add_data(strategy, con, "length") def _setup_native_data(self, pdb_path): if not pdb_path: return None else: self._set_native_data_from_biopose(pdb_path) def _set_native_data_from_biopose(self, pdb_path): """ p = pose_from_pdb(pdb_path) ab_info = AntibodyInfo(p) native_data = CDRDataInfo(self.name, "native", pdb_path) for cdr in self.cdrs: cdr_enum = ab_info.get_CDR_name_enum(cdr) value = ab_info.get_CDR_length(cdr_enum) native_data.set_value(cdr, value) """ native_data = CDRDataInfo(self.name, "native", pdb_path) pose = BioPose(pdb_path) clusterer = CDRClusterer(pose) data = defaultdict() for cdr in self.cdrs: length = int(clusterer.get_length(cdr)) native_data.set_value(cdr, length)
self.native_data = native_data
[docs]class CDRClusterData(CDRData): def __init__(self, native_path, is_camelid = False): CDRData.__init__(self, "cluster", native_path, is_camelid)
[docs] def add_data(self, strategy, con):
self._get_add_data(strategy, con, "fullcluster") def _setup_native_data(self, pdb_path): if not pdb_path: return None else: self._set_native_data_from_biopose(pdb_path) def _set_native_data_from_biopose(self, pdb_path): """ p = pose_from_pdb(pdb_path) ab_info = AntibodyInfo(p) native_data = CDRDataInfo(self.name, "native", pdb_path) for cdr in self.cdrs: cdr_enum = ab_info.get_CDR_name_enum(cdr) value = ab_info.get_cluster_name(ab_info.get_CDR_cluster(cdr_enum).cluster()) native_data.set_value(cdr, value) """ native_data = CDRDataInfo(self.name, "native", pdb_path) pose = BioPose(pdb_path) clusterer = CDRClusterer(pose) for cdr in self.cdrs: clusterer.dihedrals = [] cluster = clusterer.get_fullcluster(cdr)[0] native_data.set_value(cdr, cluster)
self.native_data = native_data
[docs]class CDRSequenceData(CDRData): def __init__(self, native_path, is_camelid = False): CDRData.__init__(self, "sequence", native_path, is_camelid)
[docs] def add_data(self, strategy, con):
self._get_add_data(strategy, con, "sequence") def _setup_native_data(self, pdb_path): if not pdb_path: return None else: self._set_native_data_from_biopose(pdb_path) def _set_native_data_from_biopose(self, pdb_path): native_data = CDRDataInfo(self.name, "native", pdb_path) pose = BioPose(pdb_path) ab_structure = AntibodyStructure() clusterer = CDRClusterer(pose) for cdr in self.cdrs: seq = ab_structure.get_cdr_seq(pose, cdr) native_data.set_value(cdr, seq)
self.native_data = native_data
[docs]class CDRdSASAData(CDRData): def __init__(self, native_path, is_camelid = False): CDRData.__init__(self, "dSASA", native_path, is_camelid)
[docs] def add_data(self, strategy, con):
self._get_add_data(strategy, con, "ab_ag_dSASA")
[docs]class CDRAlignedSequenceData(CDRSequenceData): """ Uses Clustal Omega to align sequences from each database. """ def __init__(self, individual_clustal_outdir, combined_clustal_outdir, native_path, is_camelid = False): CDRSequenceData.__init__(self, native_path, is_camelid) self.name = "aligned_sequence" self.ind_clustal_outdir = individual_clustal_outdir self.com_clustal_outdir = combined_clustal_outdir self.original_seq_data = defaultdict() self.concatonated_data = None if self.native_data: self.old_native_data = copy.deepcopy(self.native_data)
[docs] def add_data(self, strategy, con): CDRSequenceData.add_data(self, strategy, con) self.original_seq_data[strategy] = copy.deepcopy(self.all_data[strategy]) self._run_clustal_set_data(strategy)
self.concatonated_data = None
[docs] def get_concatonated_map(self, only_cdr = None, decoy_list = None, use_saved_data = True): if self.concatonated_data and use_saved_data: return self.concatonated_data final_result_data = defaultdict() result_data = defaultdict() for strategy in self.original_seq_data: for decoy in self.original_seq_data[strategy]: if decoy_list and decoy not in decoy_list: continue triple = self.original_seq_data[strategy][decoy] if isinstance(triple, CDRDataInfo): pass if only_cdr: result_data[(triple.get_value_for_cdr(only_cdr), decoy)] = triple else: result_data[decoy] = triple for cdr in self.cdrs: if only_cdr and cdr != only_cdr: continue fasta_path = self._make_fasta_for_concatonated(result_data, cdr, "concatonated") clustal_outname = ".".join(os.path.basename(fasta_path).split(".")[0:-1])+".aln" runner = ClustalRunner(fasta_path) runner.set_extra_options("--force") runner.set_hard_wrap(500) runner.output_alignment(self.com_clustal_outdir, clustal_outname) clustal_path = self.com_clustal_outdir+"/"+clustal_outname new_data = self._parse_clustal_for_concatonated(result_data, cdr, clustal_path) print "Created Clustal file for reference: "+clustal_path for decoy in new_data: if only_cdr: cdr_decoy = (new_data[decoy].get_value_for_cdr(cdr), decoy) else: cdr_decoy = decoy if not final_result_data.has_key(cdr_decoy): final_result_data[cdr_decoy] = new_data[decoy] else: final_result_data[cdr_decoy].set_value_for_cdr(cdr, new_data[decoy].get_value_for_cdr(cdr)) self.concatonated_data = final_result_data
return final_result_data def _run_clustal_set_data(self, strategy): if not os.path.exists(self.ind_clustal_outdir): os.mkdir(self.ind_clustal_outdir) #Make Fasta, run clasta for cdr in self.cdrs: print "working on: "+strategy+" "+cdr fasta_path = self._make_fasta(cdr, strategy) clustal_outname = ".".join(os.path.basename(fasta_path).split(".")[0:-1])+".aln" runner = ClustalRunner(fasta_path) runner.set_extra_options("--force") runner.set_hard_wrap(500) runner.output_alignment(self.ind_clustal_outdir, clustal_outname) clustal_path = self.ind_clustal_outdir+"/"+clustal_outname self._parse_clustal_set_data(cdr, strategy, clustal_path) print "Created Clustal file for reference: "+clustal_path def _make_fasta_for_concatonated(self, concatonated_data, cdr, name): outpath = self.com_clustal_outdir+"/list_"+name+"_"+cdr+".fasta" OUTFILE = open(outpath, 'w') if self.old_native_data: seq = self.old_native_data.get_value_for_cdr(cdr) fasta.write_fasta(seq, "native", OUTFILE) for decoy in concatonated_data.keys(): #print repr(decoy) data = self.original_seq_data[ concatonated_data[decoy].strategy][decoy] #print repr(data) if not isinstance(data, CDRDataInfo): sys.exit() seq = data.get_value_for_cdr(cdr) #print seq fasta.write_fasta(seq, decoy, OUTFILE) OUTFILE.close() return outpath def _make_fasta(self, cdr, strategy): """ Makes Fasta for clustal, returns path """ outpath = self.ind_clustal_outdir+"/all_"+strategy+"_"+cdr+".fasta" OUTFILE = open(outpath, 'w') if self.old_native_data: seq = self.old_native_data.get_value_for_cdr(cdr) fasta.write_fasta(seq, "native", OUTFILE) for decoy in self.original_seq_data[ strategy ]: data = self.original_seq_data[ strategy][decoy] #print repr(data) if not isinstance(data, CDRDataInfo): sys.exit() seq = data.get_value_for_cdr(cdr) #print seq fasta.write_fasta(seq, decoy, OUTFILE) OUTFILE.close() return outpath def _parse_clustal_for_concatonated(self, concatonated_data, cdr, clustal_path): new_data = defaultdict() if not os.path.exists(clustal_path): sys.exit("clustal path not good") INFILE = open(clustal_path, 'r') INFILE.readline() INFILE.readline() for line in INFILE: line = line.strip() lineSP = line.split() if len(lineSP) < 2: continue decoy = lineSP[0] aligned_seq = lineSP[1] #A way to skip the ending *** alignment stuff hopefully. Lets see if this works. if decoy == "native": if not new_data.has_key(decoy): new_info = CDRDataInfo(self.name, "native", decoy) new_info.set_value(cdr, aligned_seq) new_data[decoy] = new_info else: new_data[decoy].set_value(cdr, aligned_seq) continue if not concatonated_data.has_key(decoy): continue if not new_data.has_key(decoy): new_info = CDRDataInfo(self.name, concatonated_data[decoy].strategy, decoy) new_info.set_value(cdr, aligned_seq) new_data[decoy] = new_info else: new_data[decoy].set_value(cdr, aligned_seq) INFILE.close() #print "Setting new data for "+strategy+" - "+cdr return new_data def _parse_clustal_set_data(self, cdr, strategy, clustal_path): new_data = defaultdict() if not os.path.exists(clustal_path): sys.exit("clustal path not good") INFILE = open(clustal_path, 'r') INFILE.readline() INFILE.readline() for line in INFILE: line = line.strip() lineSP = line.split() if len(lineSP) < 2: continue decoy = lineSP[0] aligned_seq = lineSP[1] #A way to skip the ending *** alignment stuff hopefully. Lets see if this works. if decoy == "native": self.native_data.set_value(cdr, aligned_seq) if not decoy in self.original_seq_data[strategy]: continue self.all_data[strategy][decoy].set_value(cdr, aligned_seq) #if not new_data.has_key(decoy): # new_info = CDRDataInfo(self.name, strategy, decoy) # new_info.set_value(cdr, aligned_seq) # new_data[decoy] = new_info #else: # new_data[decoy].set_value(cdr, aligned_seq)
INFILE.close() #print "Setting new data for "+strategy+" - "+cdr #self.all_data[strategy] = new_data