from abpytools.core.formats.utils import get_protobuf_numbering_scheme, get_numbering_scheme_from_protobuf
from abpytools.core.chain import Chain
def _name_wrapper(func):
"""
Internal usage only counter decorator to format sequence names.
Args:
func:
Returns:
"""
def helper(*args):
helper.idi += 1
name = func(*args)
if isinstance(name, str):
name = name.replace("%%IDI%%", str(helper.idi))
else:
names = list()
for name_i in name:
names.append(name_i.replace("%%IDI%%", str(helper.idi)))
name = names
return name
helper.idi = 0
return helper
@_name_wrapper
def _chain_name_getter(antibody_object):
"""
Internal function that sets the name of Chain objects.
This is required because it is possible to work with a ChainCollection
where a Chain does not have a name. However, a name is
required internally to store data on disk.
Args:
antibody_object (Chain):
Returns:
"""
# if antibody does not have name, generate name:
# ID_chain_idi, where chain is heavy/light, idi is i = [1,..,N]
if len(antibody_object.name) > 0:
name = antibody_object.name
else:
name = f"ID_{antibody_object.chain}_%%IDI%%"
return name
@_name_wrapper
def _fab_name_getter(name_i, light_chain, heavy_chain):
"""
Internal function that sets the name of Fab objects
when they are stored on disk.
Args:
name_i (str):
light_chain (Chain):
heavy_chain (Chain):
Returns:
"""
# if antibody does not have name, generate name:
# ID_chain_idi, where chain is heavy/light, idi is i = [1,..,N]
if len(name_i) > 0:
names = name_i
else:
names = []
for chain in [light_chain, heavy_chain]:
if len(chain.name) > 0:
names.append(chain.name)
else:
names.append(f"ID_{chain.chain}_%%IDI%%")
return names
##############################################################################
# SAVERS
##############################################################################
[docs]def pb2_add_chain(chain_object, proto_parser):
"""
Populates a protobuf ProtoChain message from Chain object
and adds it to ChainCollectionProto
Args:
chain_object (Chain):
proto_parser (ChainCollectionProto):
Returns:
"""
proto_antibody = proto_parser.chains.add()
add_Chain_to_protobuf(chain_object, proto_antibody)
[docs]def add_Chain_to_protobuf(antibody_obj, proto_obj):
"""
Helper function to populate a ProtoChain message.
Args:
antibody_obj (Chain):
proto_obj (ChainProto):
Returns:
"""
proto_obj.name = _chain_name_getter(antibody_obj)
proto_obj.sequence = antibody_obj.sequence
proto_obj.numbering_scheme = get_protobuf_numbering_scheme(antibody_obj.numbering_scheme)
proto_obj.numbering.extend(antibody_obj.numbering)
proto_obj.chain_type = antibody_obj.chain
proto_obj.MW = antibody_obj.mw
for x in {**antibody_obj.ab_regions()[0], **antibody_obj.ab_regions()[1]}.items():
proto_obj_cdr = proto_obj.region.add()
proto_obj_cdr.region_name = x[0]
proto_obj_cdr.region_positions.extend(x[1])
proto_obj.pI = antibody_obj.pI
##############################################################################
# PARSERS
##############################################################################
[docs]def fasta_ChainCollection_parser(raw_fasta, numbering_scheme):
names = list()
sequences = list()
antibody_objects = list()
for line in raw_fasta:
if line.startswith(">"):
names.append(line.replace("\n", "")[1:])
# if line is empty skip line
elif line.isspace():
pass
else:
sequences.append(line.replace("\n", ""))
if len(names) != len(sequences):
raise ValueError("Error reading file: make sure it is FASTA format")
for name, sequence in zip(names, sequences):
antibody_objects.append(Chain(name=name, sequence=sequence,
numbering_scheme=numbering_scheme))
return antibody_objects
[docs]def pb2_FabCollection_parser(proto_parser):
from abpytools.core.fab import Fab
fab_objects = list()
for fab_i in proto_parser.fabs:
light_chain = pb2_Chain_parser(fab_i.light_chain)
heavy_chain = pb2_Chain_parser(fab_i.heavy_chain)
fab = Fab(light_chain=light_chain, heavy_chain=heavy_chain, name=fab_i.name)
fab_objects.append(fab)
return fab_objects
[docs]def pb2_ChainCollection_parser(proto_parser):
antibody_objects = list()
names = list()
for chain_i in proto_parser.chains:
antibody_i = pb2_Chain_parser(chain_i)
antibody_i._loading_status = 'Loaded'
names.append(chain_i.name)
antibody_objects.append(antibody_i)
return antibody_objects
[docs]def pb2_Chain_parser(proto_chain):
"""
Populate Chain object from protobuf file
Args:
proto_chain (ChainProto):
Returns:
"""
chain_obj = Chain(name=proto_chain.name, sequence=proto_chain.sequence)
if proto_chain.numbering_scheme:
chain_obj._numbering_scheme = get_numbering_scheme_from_protobuf(proto_chain.numbering_scheme)
if proto_chain.numbering:
# convert google.protobuf.pyext._message.RepeatedScalarContainer to list
chain_obj.numbering = list(proto_chain.numbering)
else:
chain_obj.numbering = chain_obj.ab_numbering()
if proto_chain.chain_type:
chain_obj._chain = proto_chain.chain_type
else:
chain_obj._chain = Chain.determine_chain_type(proto_chain.numbering)
if proto_chain.MW:
chain_obj.mw = proto_chain.MW
else:
chain_obj.mw = chain_obj.ab_molecular_weight()
if proto_chain.region:
regions = dict()
for region in proto_chain.region:
regions[region.region_name] = region.region_positions
chain_obj.CDR = regions
else:
chain_obj.CDR = chain_obj.ab_regions()
if proto_chain.pI:
chain_obj.pI = proto_chain.pI
else:
chain_obj.pI = chain_obj.ab_pi()
return chain_obj
[docs]def json_FabCollection_parser(raw_data):
from abpytools.core.fab import Fab
fab_objects = list()
ordered_names = raw_data.pop('ordered_names')
for key_i in ordered_names:
light_chain_i = json_Chain_parser(raw_data[key_i][0], raw_data[key_i][0]["name"])
heavy_chain_i = json_Chain_parser(raw_data[key_i][1], raw_data[key_i][1]["name"])
fab_i = Fab(light_chain=light_chain_i, heavy_chain=heavy_chain_i, name=key_i)
fab_objects.append(fab_i)
return fab_objects
[docs]def json_ChainCollection_parser(raw_data):
antibody_objects = list()
ordered_names = raw_data.pop('ordered_names')
for key_i in ordered_names:
chain_i = json_Chain_parser(raw_data[key_i], key_i)
antibody_objects.append(chain_i)
return antibody_objects
[docs]def json_Chain_parser(antibody_dict, name):
antibody_i = Chain(name=name, sequence=antibody_dict['sequence'])
antibody_i.numbering = antibody_dict['numbering']
antibody_i._chain = antibody_dict['chain']
antibody_i.mw = antibody_dict['MW']
antibody_i.CDR = antibody_dict["CDR"]
antibody_i._numbering_scheme = antibody_dict["numbering_scheme"]
antibody_i.pI = antibody_dict["pI"]
antibody_i._loading_status = 'Loaded'
return antibody_i