"""
Core databank class and system initialization function.
Imported by :mod:`fairmd.lipids.databankLibrary` by default.
Can be imported without additional libraries to scan Databank system file tree!
"""
import os
import sys
import typing
from collections.abc import MutableMapping, Sequence
import yaml
from fairmd.lipids import FMDL_SIMU_PATH
from fairmd.lipids.molecules import Lipid, Molecule, NonLipid, lipids_set, molecules_set
[docs]
class System(MutableMapping):
"""
Main Databank single object.
It is an extension of a dictionary with additional functionality.
"""
def __init__(self, data: dict | typing.Mapping) -> None:
"""
Initialize the container for storing simulation record.
:param data: README-type dictionary.
:raises TypeError: If `data` is neither a `dict` nor another mapping type.
:raises ValueError: If a molecule key in the "COMPOSITION" data does not
belong to the predefined set of lipids or molecules.
"""
self._store: dict = {}
if isinstance(data, dict):
self._store.update(data)
elif isinstance(data, typing.Mapping):
self._store.update(dict(data))
else:
expect_type_msg = "Expected dict or Mapping"
raise TypeError(expect_type_msg)
self._content = {}
for k, v in self["COMPOSITION"].items():
mol = None
if k in lipids_set:
mol = Lipid(k)
elif k in molecules_set:
mol = NonLipid(k)
else:
mol_not_found_msg = f"Molecule {k} is not in the set of lipids or molecules."
raise ValueError(mol_not_found_msg)
mol.register_mapping(v["MAPPING"])
self._content[k] = mol
def __getitem__(self, key: str): # noqa: ANN204
return self._store[key]
def __setitem__(self, key: str, value) -> None:
self._store[key] = value
def __delitem__(self, key: str) -> None:
del self._store[key]
def __iter__(self) -> typing.Iterator:
return iter(self._store)
def __len__(self) -> int:
return len(self._store)
@property
def readme(self) -> dict:
"""Get the README dictionary of the system in true dict format.
:return: dict-type README (dict)
"""
return self._store
@property
def content(self) -> dict[str, Molecule]:
"""Returns dictionary of molecule objects."""
return self._content
@property
def lipids(self) -> dict[str, Lipid]:
"""Returns dictionary of lipid molecule objects."""
return {k: v for k, v in self._content.items() if k in lipids_set}
@property
def n_lipids(self) -> int:
"""Returns total number of lipid molecules in the system."""
total = 0
for k, v in self["COMPOSITION"].items():
if k in lipids_set:
total += sum(v["COUNT"])
return total
[docs]
def membrane_composition(self, basis: typing.Literal["molar", "mass"] = "molar") -> dict[str, float]:
"""Return the composition of the membrane in system.
:param which: Type of composition to return. Options are:
- "molar": compute molar fraction
- "mass": compute mass fraction
:return: dictionary (universal molecule name -> value)
"""
if basis not in ["molar", "mass"]:
msg = "Basis must be 'molar' or 'mass'"
raise ValueError(msg)
comp: dict[str, float] = {}
for k, v in self["COMPOSITION"].items():
if k not in lipids_set:
continue
count = sum(v["COUNT"])
comp[k] = count
n_lipids = self.n_lipids
if basis == "molar":
for k in comp:
comp[k] /= n_lipids
else: # (basis == "mass")
total_mass = 0.0
for k in comp: # noqa: PLC0206 (modify dict while iterating)
mol: Lipid = self._content[k]
mw = mol.metadata["bioschema_properties"]["molecularWeight"]
comp[k] *= mw
total_mass += comp[k]
for k in comp:
comp[k] /= total_mass
return comp
[docs]
def get_hydration(self, basis: typing.Literal["number", "mass"] = "number") -> float:
"""Get system hydration."""
if basis not in ["number", "mass"]:
msg = "Basis must be 'molar' or 'mass'"
raise ValueError(msg)
if basis == "number":
if "SOL" not in self["COMPOSITION"]:
msg = "Cannot compute hydration for implicit water (system #{}).".format(self["ID"])
raise ValueError(msg)
hyval = self["COMPOSITION"]["SOL"]["COUNT"] / self.n_lipids
else: # basis == "mass"
msg = "Mass hydration is not implemented yet."
raise NotImplementedError(msg)
return hyval
def __repr__(self) -> str:
return f"System({self._store['ID']}): {self._store['path']}"
[docs]
class SystemsCollection(Sequence[System]):
"""Immutable collection of system dicts. Can be accessed by ID using loc()."""
def __init__(self, iterable: typing.Sequence[System] = []) -> None:
self._data = iterable
self.__get_index_byid()
def __get_index_byid(self) -> None:
self._idx: dict = {}
for i in range(len(self)):
if "ID" in self[i]:
self._idx[self[i]["ID"]] = i
def __getitem__(self, key):
return self._data[key]
def __len__(self) -> int:
return len(self._data)
[docs]
def loc(self, sid: int) -> System:
"""Locate system by its ID.
:param sid: System ID
:return: System object with ID `sid`
"""
return self._data[self._idx[sid]]
[docs]
class Databank:
"""
Representation of all simulation in the NMR lipids databank.
`path` should be the local location of `{FMDL_DATA_PATH}/Simulations/` in
the FAIRMD Lipids folder. Example usage to loop over systems:
.. code-block:: python
path = 'BilayerData/Simulations/'
db_data = databank(path)
systems = db_data.get_systems()
for system in systems:
print(system)
"""
def __init__(self) -> None:
self.path = FMDL_SIMU_PATH
__systems = self.__load_systems__()
self._systems: SystemsCollection = SystemsCollection(__systems)
print("Databank initialized from the folder:", os.path.realpath(self.path))
def __load_systems__(self) -> list[System]:
systems: list[System] = []
rpath = os.path.realpath(self.path)
for subdir, _dirs, files in os.walk(rpath):
for filename in files:
filepath = os.path.join(subdir, filename)
if filename == "README.yaml":
ydict = {}
try:
with open(filepath) as yaml_file:
ydict.update(yaml.load(yaml_file, Loader=yaml.FullLoader))
content = System(ydict)
except (FileNotFoundError, PermissionError) as e:
sys.stderr.write(f"""
!!README LOAD ERROR!!
Problems while loading on of the files required for the system: {e}
System path: {subdir}
System: {ydict!s}\n""")
except Exception as e:
sys.stderr.write(f"""
!!README LOAD ERROR!!
Unexpected error: {e}
System: {ydict!s}\n""")
else:
relpath = os.path.relpath(filepath, rpath)
content["path"] = relpath[:-11]
systems.append(content)
return systems
[docs]
def get_systems(self) -> SystemsCollection:
"""List all systems in the FAIRMD Lipids."""
return self._systems
[docs]
def initialize_databank() -> SystemsCollection:
"""
Intialize the FAIRMD Lipids.
:return: list of dictionaries that contain the content of README.yaml files for
each system.
"""
db_data = Databank()
return db_data.get_systems()
# TODO: is not used at all in the project!!
[docs]
def print_README(system: str | typing.Mapping) -> None: # noqa: N802
"""
Print the content of ``system`` dictionary in human readable format.
:param system: FAIRMD Lipids dictionary defining a simulation or "example".
"""
if system == "example":
current_folder = os.path.dirname(os.path.realpath(__file__))
readme_path = os.path.join(current_folder, "SchemaValidation", "Schema", "READMEexplanations.yaml")
with open(readme_path) as file:
readme_file = yaml.safe_load(file)
else:
readme_file = system
for key in readme_file:
print("\033[1m" + key + ":" + "\033[0m")
print(" ", readme_file[key])