Source code for jade.RAbD_BM.tools_features_db

import sys
import os
import sqlite3
import pandas
from collections import defaultdict

from jade.RAbD_BM.AnalysisInfo import AnalysisInfo
from jade.RAbD_BM.AnalysisInfo import NativeInfo

#Tools for the features database.


[docs]def get_cdr_cluster_df(db_path): """ Get a dataframe with typical cluster info in it, which was generated by the features reporter framework. :param db_con: sqlite3.con :rtype: pandas.DataFrame """ query="SELECT "\ "structures.input_tag," \ "cdr_clusters.CDR," \ "cdr_clusters.fullcluster," \ "cdr_clusters.length," \ "cdr_clusters.normDis_deg," \ "cdr_clusters.sequence "\ "FROM " \ "cdr_clusters," \ "structures "\ "WHERE " \ "cdr_clusters.struct_id = structures.struct_id " con = sqlite3.connect(db_path) df = pandas.read_sql_query(query, con) df.to_csv(os.path.basename(db_path)+".csv") con.close()
return df
[docs]def get_all_entries(df, pdbid, cdr): """ Get all entries of a given PDBID and CDR. :param df: pandas.DataFrame :rtype: pandas.DataFrame """
return df[(df['input_tag'].str.contains(pdbid)) & (df['CDR'] == cdr)]
[docs]def get_total_entries(df, pdbid, cdr): """ Get the total number of entries of the particular CDR and PDBID in the database :param df: pandas.DataFrame :rtype: int """
return len(get_all_entries(df, pdbid, cdr)) ############### Data ###############
[docs]def get_cluster(df, pdbid, cdr): """ Get the fullcluster from the dataframe for native or experimental data :param df: pandas.DataFrame :rtype: str """ d = df[(df['input_tag'].str.contains(pdbid) ) & (df['CDR'] == cdr)]
return str(d.iloc[0]['fullcluster'])
[docs]def get_length(df, pdbid, cdr): """ Get the length from the dataframe for native or experimental data :param df: pandas.DataFrame :rtype: int """ d = df[(df['input_tag'].str.contains(pdbid)) & (df['CDR'] == cdr)] #print d
return d.iloc[0]['length'] ############### Matching ###############
[docs]def get_length_matches(df, pdbid, cdr, length): """ Get a dataframe of the matching ("Recovered") rows (DataFrame). :param df: pandas.DataFrame :param length: int :rtype: pandas.DataFrame """
return df[(df['input_tag'].str.contains(pdbid)) & (df['CDR'] == cdr) & (df['length'] == length )]
[docs]def get_cluster_matches(df, pdbid, cdr, cluster): """ Get a dataframe of the matching ("Recovered") rows (DataFrame). :param df: pandas.DataFrame :rtype: pandas.DataFrame: """
return df[(df['input_tag'].str.contains(pdbid)) & (df['CDR'] == cdr) & (df['fullcluster'] == cluster )] ############### Recovery ###############
[docs]def get_length_recovery(df, pdbid, cdr, length): """ Get the number of matches in the df and pdbid to the cdr and length :param df: pandas.DataFrame :param length: int :rtype: int """
return len(get_length_matches(df, pdbid, cdr, length)['length'])
[docs]def get_cluster_recovery(df, pdbid, cdr, cluster): """ Get the number of matches in the df and pdbid to the cdr and cluster :param df: pandas.DataFrame :rtype: int """
return len(get_cluster_matches(df, pdbid, cdr, cluster)['fullcluster'])