Source code for abpytools.features.regions

from abpytools.core.chain_collection import ChainCollection
from abpytools.core.cache import Cache
import numpy as np


[docs]class ChainDomains(ChainCollection): def __init__(self, antibody_objects=None, path=None, verbose=True, show_progressbar=True, n_threads=10): super().__init__(antibody_objects=antibody_objects) if antibody_objects: self.load() else: self.__init__(antibody_objects=ChainCollection.load_from_file(path=path, verbose=verbose, show_progressbar=show_progressbar, n_threads=n_threads)) self._cache = Cache(max_cache_size=5)
[docs] def cdr_lengths(self): """ method to obtain cdr_lengths :return: m by n matrix with CDR lengths, where m is the number of antibodies in ChainCollection and n is three, corresponding to the three CDRs. """ if 'cdr_lengths' not in self._cache: cdr_length_matrix = np.zeros((self.n_ab, 3), dtype=np.int) cdr_sequences = self.cdr_sequences() for m, antibody in enumerate(self.antibody_objects): for n, cdr in enumerate(['CDR1', 'CDR2', 'CDR3']): cdr_length_matrix[m, n] = len(cdr_sequences[antibody.name][cdr]) self._cache.update(key='cdr_lengths', data=cdr_length_matrix) return self._cache['cdr_lengths']
[docs] def cdr_sequences(self): """ method that returns sequences of each cdr :return: list of dictionaries with keys 'CDR1', 'CDR2' and 'CDR3' containing a string with the respective amino acid sequence """ if 'cdr_sequences' not in self._cache: cdr_sequences = dict() for antibody in self.antibody_objects: dict_i = dict() for cdr in ['CDR1', 'CDR2', 'CDR3']: self.sequence_splitter_helper(antibody=antibody, region=cdr, index=0, dict_i=dict_i) cdr_sequences[antibody.name] = dict_i self._cache.update(key='cdr_sequences', data=cdr_sequences) return self._cache['cdr_sequences']
[docs] def framework_length(self): framework_length_matrix = np.zeros((self.n_ab, 4), dtype=np.int) fr_sequences = self.framework_sequences() for m, antibody in enumerate(self.antibody_objects): for n, framework in enumerate(['FR1', 'FR2', 'FR3', 'FR4']): framework_length_matrix[m, n] = len(fr_sequences[antibody.name][framework]) return framework_length_matrix
[docs] def framework_sequences(self): framework_sequences = dict() for antibody in self.antibody_objects: dict_i = dict() for framework in ['FR1', 'FR2', 'FR3', 'FR4']: self.sequence_splitter_helper(antibody=antibody, region=framework, index=1, dict_i=dict_i) framework_sequences[antibody.name] = dict_i return framework_sequences
[docs] @staticmethod def sequence_splitter_helper(antibody, region, index, dict_i): seq_i = list() indices = antibody.ab_regions()[index][region] for i in indices: seq_i.append(antibody.sequence[i]) dict_i[region] = ''.join(seq_i)