Module CLI.load_files
Expand source code
from typing import List, TextIO
import numpy as np
import sys
if sys.version_info < (3, 9):
# importlib.resources either doesn't exist or lacks the files()
# function, so use the PyPI version:
import importlib_resources
else:
# importlib.resources has files(), so use that:
import importlib.resources as importlib_resources
pkg = importlib_resources.files("CLI")
lspath = pkg / 'Latent_spaces'
s_length = pkg / "seq_lengths.csv"
def load_family(fname: str) -> np.ndarray:
""" Load data from protein family name
Parameters
----------
fname : str
Name of protein family
Returns
-------
numpy.ndarray
The family's latent space data
"""
try:
return np.loadtxt(lspath / fname)
# if a file with that name exists but isn't valid, treat it as a new filename
except ValueError:
load_ls_file(fname)
def load_ls_file(fname: str) -> np.ndarray:
""" Load data from new latent space file
Parameters
----------
fname : str
Name of file containing latent space data
Returns
-------
numpy.ndarray
Latent space data from file
"""
# try to read the file
try:
return np.loadtxt(fname)
# if it isn't a valid latent space, give an error and quit
except ValueError:
print("Invalid file: " + fname)
print("Files should contain 30 floats, each float in a separate line.")
exit(2)
# if the file doesn't exist or can't be read
except IOError as err:
exit(err)
def load_sequence(fname: str) -> str:
""" Load sequence from file
Parameters
----------
fname : str
Name of a file containing a new sequence
Returns
-------
str
New sequence
"""
try:
seq_file: TextIO = open(fname, "r+")
return seq_file.read()
# if the file doesn't exist or can't be read
except IOError as err:
exit(err)
def get_ls_list() -> List[str]:
"""
Returns
-------
List[str]
List of protein family filenames
"""
ls_list: List[str] = []
for f in lspath.iterdir():
ls_list.append(f.name)
return ls_list
def is_pf(fname: str) -> bool:
""" Check if filename is the name of a known protein family
Parameters
----------
fname : str
Filename
Returns
-------
bool
"""
if (lspath / (fname + '.txt')).is_file():
return True
return False
latent_space_list: List[str] = get_ls_list()
Functions
def get_ls_list() ‑> List[str]-
Returns
List[str]- List of protein family filenames
Expand source code
def get_ls_list() -> List[str]: """ Returns ------- List[str] List of protein family filenames """ ls_list: List[str] = [] for f in lspath.iterdir(): ls_list.append(f.name) return ls_list def is_pf(fname: str) ‑> bool-
Check if filename is the name of a known protein family
Parameters
fname:str- Filename
Returns
bool
Expand source code
def is_pf(fname: str) -> bool: """ Check if filename is the name of a known protein family Parameters ---------- fname : str Filename Returns ------- bool """ if (lspath / (fname + '.txt')).is_file(): return True return False def load_family(fname: str) ‑> numpy.ndarray-
Load data from protein family name
Parameters
fname:str- Name of protein family
Returns
numpy.ndarray- The family's latent space data
Expand source code
def load_family(fname: str) -> np.ndarray: """ Load data from protein family name Parameters ---------- fname : str Name of protein family Returns ------- numpy.ndarray The family's latent space data """ try: return np.loadtxt(lspath / fname) # if a file with that name exists but isn't valid, treat it as a new filename except ValueError: load_ls_file(fname) def load_ls_file(fname: str) ‑> numpy.ndarray-
Load data from new latent space file
Parameters
fname:str- Name of file containing latent space data
Returns
numpy.ndarray- Latent space data from file
Expand source code
def load_ls_file(fname: str) -> np.ndarray: """ Load data from new latent space file Parameters ---------- fname : str Name of file containing latent space data Returns ------- numpy.ndarray Latent space data from file """ # try to read the file try: return np.loadtxt(fname) # if it isn't a valid latent space, give an error and quit except ValueError: print("Invalid file: " + fname) print("Files should contain 30 floats, each float in a separate line.") exit(2) # if the file doesn't exist or can't be read except IOError as err: exit(err) def load_sequence(fname: str) ‑> str-
Load sequence from file
Parameters
fname:str- Name of a file containing a new sequence
Returns
str- New sequence
Expand source code
def load_sequence(fname: str) -> str: """ Load sequence from file Parameters ---------- fname : str Name of a file containing a new sequence Returns ------- str New sequence """ try: seq_file: TextIO = open(fname, "r+") return seq_file.read() # if the file doesn't exist or can't be read except IOError as err: exit(err)