Source code for abpytools.analysis.analysis_helper_functions

import warnings
import _pickle as cPickle
from abpytools.home import Home
from ..utils.python_config import PythonConfig
import matplotlib.pyplot as plt

SUPPORTED_SUBSITUTION_MATRICES = ['BLOSUM45', 'BLOSUM62', 'BLOSUM80']


[docs]def load_alignment_algorithm(algorithm): if algorithm.lower() == 'needleman_wunsch': return needleman_wunsch else: raise ValueError("Unknown algorithm")
[docs]def load_substitution_matrix(substitution_matrix): abpytools_directory = Home().homedir if substitution_matrix in SUPPORTED_SUBSITUTION_MATRICES: with open('{}/data/{}.txt'.format(abpytools_directory, substitution_matrix), 'rb') as f: matrix = cPickle.load(f) else: raise ValueError("Unknown substitution matrix") return matrix
[docs]def needleman_wunsch(seq_1, seq_2, substitution_matrix, indel=-1): if indel >= 0: f = "Indel must be negative, setting indel to {}.".format(-indel) warnings.warn(f) indel = -indel # initialise matrix init_matrix = init_score_matrix(seq_1=seq_1, seq_2=seq_2, indel=indel) scores, traceback_matrix = calculate_scores(matrix=init_matrix, seq_1=seq_1, seq_2=seq_2, substitution_matrix=substitution_matrix, gap_penalty=indel) seq_2_aligned = traceback(traceback_matrix=traceback_matrix, seq_1=seq_1, seq_2=seq_2) return seq_2_aligned, scores[-1][-1]
[docs]def init_score_matrix(seq_1, seq_2, indel): """ - score matrix initialisation with two sequences - pure python, i.e. no numpy Example init_score_matrix('SEND', 'AND', -1): [[0, -1, -2], [-1, 0, 0], [-2, 0, 0], [-3, 0, 0]] """ init_matrix = [[x * indel] + [0] * len(seq_1) if x > 0 else list(range(0, (len(seq_1) + 1) * indel, indel)) for x in range(len(seq_2) + 1)] return init_matrix
[docs]def calculate_scores(matrix, seq_1, seq_2, substitution_matrix, gap_penalty): traceback_matrix = [['up'] + [''] * len(seq_1) if x > 0 else ['left'] * (len(seq_1) + 1) for x in range(len(seq_2) + 1)] traceback_matrix[0][0] = 'done' for i in range(1, len(matrix)): for j in range(1, len(matrix[i])): q_diag = matrix[i - 1][j - 1] + int(substitution_matrix[(seq_2[i - 1], seq_1[j - 1])]) q_up = matrix[i - 1][j] + gap_penalty q_left = matrix[i][j - 1] + gap_penalty results = [q_diag, q_up, q_left] matrix[i][j] = max(results) max_index = results.index(matrix[i][j]) if max_index == 0: traceback_matrix[i][j] = 'diag' elif max_index == 1: traceback_matrix[i][j] = 'up' else: traceback_matrix[i][j] = 'left' return matrix, traceback_matrix
[docs]def traceback(traceback_matrix, seq_1, seq_2): row = -1 column = -1 current = traceback_matrix[row][column] # iter_seq_1 = iter(seq_1[::-1]) # iter_seq_2 = iter(seq_2[::-1]) # seq_1 = seq_1[::-1] # seq_2 = seq_2[::-1] # aligned_seq_1 = list() aligned_seq_2 = list() while current != 'done': # is aligned if current == 'diag': # aligned_seq_1.append(next(iter_seq_1)) # aligned_seq_2.append(next(iter_seq_2)) # aligned_seq_1.append(seq_1[column]) aligned_seq_2.append(seq_2[row]) row -= 1 column -= 1 # leave a gap elif current == 'left': # aligned_seq_1.append(next(iter_seq_1)) # aligned_seq_1.append(seq_1[column]) aligned_seq_2.append('-') column -= 1 # leave a gap else: # aligned_seq_1.append(next(iter_seq_1)) # aligned_seq_1.append(seq_1[column]) aligned_seq_2.append('-') row -= 1 current = traceback_matrix[row][column] return ''.join(aligned_seq_2)[::-1]
[docs]def switch_interactive_mode(save=False): ipython_config = PythonConfig() if ipython_config.ipython_info == 'notebook' and save is False: if ipython_config.matplotlib_interactive is False: # turns on interactive mode plt.ion()