Module CLI.utils.data_loaders
Expand source code
from typing import List, Union, Dict
import numpy as np
from . import aa_letters
def seq_to_one_hot(sequence: str, aa_key: Dict[str, int]) -> np.ndarray:
"""
Parameters
----------
sequence : str
aa_key : Dict[str, int]
Returns
-------
numpy.ndarray
"""
arr: np.ndarray = np.zeros((len(sequence), len(aa_key)))
j: int
for j, c in enumerate(sequence):
err: KeyError
try:
arr[j, aa_key[c]] = 1
except KeyError as err:
print("Invalid sequence letter")
exit(err)
return arr
def to_one_hot(seqlist: Union[str, List[str]], alphabet: List[str] = aa_letters) -> np.ndarray:
"""
Parameters
----------
seqlist : List[str]
alphabet : List[str]
Returns
-------
numpy.ndarray
"""
aa_key: Dict[str, int] = {l: i for i, l in enumerate(alphabet)}
if type(seqlist) == str:
return seq_to_one_hot(seqlist, aa_key)
else:
encoded_seqs: List[np.ndarray] = []
prot: str
for prot in seqlist:
encoded_seqs.append(seq_to_one_hot(prot, aa_key))
return np.stack(encoded_seqs)
Functions
def seq_to_one_hot(sequence: str, aa_key: Dict[str, int]) ‑> numpy.ndarray-
Parameters
sequence:straa_key:Dict[str, int]
Returns
numpy.ndarray
Expand source code
def seq_to_one_hot(sequence: str, aa_key: Dict[str, int]) -> np.ndarray: """ Parameters ---------- sequence : str aa_key : Dict[str, int] Returns ------- numpy.ndarray """ arr: np.ndarray = np.zeros((len(sequence), len(aa_key))) j: int for j, c in enumerate(sequence): err: KeyError try: arr[j, aa_key[c]] = 1 except KeyError as err: print("Invalid sequence letter") exit(err) return arr def to_one_hot(seqlist: Union[str, List[str]], alphabet: List[str] = ['-', 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']) ‑> numpy.ndarray-
Parameters
seqlist:List[str]alphabet:List[str]
Returns
numpy.ndarray
Expand source code
def to_one_hot(seqlist: Union[str, List[str]], alphabet: List[str] = aa_letters) -> np.ndarray: """ Parameters ---------- seqlist : List[str] alphabet : List[str] Returns ------- numpy.ndarray """ aa_key: Dict[str, int] = {l: i for i, l in enumerate(alphabet)} if type(seqlist) == str: return seq_to_one_hot(seqlist, aa_key) else: encoded_seqs: List[np.ndarray] = [] prot: str for prot in seqlist: encoded_seqs.append(seq_to_one_hot(prot, aa_key)) return np.stack(encoded_seqs)