Source code for fairmd.lipids.core

"""
Core databank class and system initialization function.

Imported by :mod:`fairmd.lipids.databankLibrary` by default.
Can be imported without additional libraries to scan Databank system file tree!
"""

import os
import sys
import typing
from collections.abc import MutableMapping, Sequence

import yaml

from fairmd.lipids import FMDL_SIMU_PATH
from fairmd.lipids.molecules import Lipid, Molecule, NonLipid, lipids_set, molecules_set


[docs] class System(MutableMapping): """ Main Databank single object. It is an extension of a dictionary with additional functionality. """ def __init__(self, data: dict | typing.Mapping) -> None: """ Initialize the container for storing simulation record. :param data: README-type dictionary. :raises TypeError: If `data` is neither a `dict` nor another mapping type. :raises ValueError: If a molecule key in the "COMPOSITION" data does not belong to the predefined set of lipids or molecules. """ self._store: dict = {} if isinstance(data, dict): self._store.update(data) elif isinstance(data, typing.Mapping): self._store.update(dict(data)) else: expect_type_msg = "Expected dict or Mapping" raise TypeError(expect_type_msg) self._content = {} for k, v in self["COMPOSITION"].items(): mol = None if k in lipids_set: mol = Lipid(k) elif k in molecules_set: mol = NonLipid(k) else: mol_not_found_msg = f"Molecule {k} is not in the set of lipids or molecules." raise ValueError(mol_not_found_msg) mol.register_mapping(v["MAPPING"]) self._content[k] = mol def __getitem__(self, key: str): # noqa: ANN204 return self._store[key] def __setitem__(self, key: str, value) -> None: self._store[key] = value def __delitem__(self, key: str) -> None: del self._store[key] def __iter__(self) -> typing.Iterator: return iter(self._store) def __len__(self) -> int: return len(self._store) @property def readme(self) -> dict: """Get the README dictionary of the system in true dict format. :return: dict-type README (dict) """ return self._store @property def content(self) -> dict[str, Molecule]: """Returns dictionary of molecule objects.""" return self._content @property def lipids(self) -> dict[str, Lipid]: """Returns dictionary of lipid molecule objects.""" return {k: v for k, v in self._content.items() if k in lipids_set} @property def n_lipids(self) -> int: """Returns total number of lipid molecules in the system.""" total = 0 for k, v in self["COMPOSITION"].items(): if k in lipids_set: total += sum(v["COUNT"]) return total
[docs] def membrane_composition(self, basis: typing.Literal["molar", "mass"] = "molar") -> dict[str, float]: """Return the composition of the membrane in system. :param which: Type of composition to return. Options are: - "molar": compute molar fraction - "mass": compute mass fraction :return: dictionary (universal molecule name -> value) """ if basis not in ["molar", "mass"]: msg = "Basis must be 'molar' or 'mass'" raise ValueError(msg) comp: dict[str, float] = {} for k, v in self["COMPOSITION"].items(): if k not in lipids_set: continue count = sum(v["COUNT"]) comp[k] = count n_lipids = self.n_lipids if basis == "molar": for k in comp: comp[k] /= n_lipids else: # (basis == "mass") total_mass = 0.0 for k in comp: # noqa: PLC0206 (modify dict while iterating) mol: Lipid = self._content[k] mw = mol.metadata["bioschema_properties"]["molecularWeight"] comp[k] *= mw total_mass += comp[k] for k in comp: comp[k] /= total_mass return comp
[docs] def get_hydration(self, basis: typing.Literal["number", "mass"] = "number") -> float: """Get system hydration.""" if basis not in ["number", "mass"]: msg = "Basis must be 'molar' or 'mass'" raise ValueError(msg) if basis == "number": if "SOL" not in self["COMPOSITION"]: msg = "Cannot compute hydration for implicit water (system #{}).".format(self["ID"]) raise ValueError(msg) hyval = self["COMPOSITION"]["SOL"]["COUNT"] / self.n_lipids else: # basis == "mass" msg = "Mass hydration is not implemented yet." raise NotImplementedError(msg) return hyval
def __repr__(self) -> str: return f"System({self._store['ID']}): {self._store['path']}"
[docs] class SystemsCollection(Sequence[System]): """Immutable collection of system dicts. Can be accessed by ID using loc().""" def __init__(self, iterable: typing.Sequence[System] = []) -> None: self._data = iterable self.__get_index_byid() def __get_index_byid(self) -> None: self._idx: dict = {} for i in range(len(self)): if "ID" in self[i]: self._idx[self[i]["ID"]] = i def __getitem__(self, key): return self._data[key] def __len__(self) -> int: return len(self._data)
[docs] def loc(self, sid: int) -> System: """Locate system by its ID. :param sid: System ID :return: System object with ID `sid` """ return self._data[self._idx[sid]]
[docs] class Databank: """ Representation of all simulation in the NMR lipids databank. `path` should be the local location of `{FMDL_DATA_PATH}/Simulations/` in the FAIRMD Lipids folder. Example usage to loop over systems: .. code-block:: python path = 'BilayerData/Simulations/' db_data = databank(path) systems = db_data.get_systems() for system in systems: print(system) """ def __init__(self) -> None: self.path = FMDL_SIMU_PATH __systems = self.__load_systems__() self._systems: SystemsCollection = SystemsCollection(__systems) print("Databank initialized from the folder:", os.path.realpath(self.path)) def __load_systems__(self) -> list[System]: systems: list[System] = [] rpath = os.path.realpath(self.path) for subdir, _dirs, files in os.walk(rpath): for filename in files: filepath = os.path.join(subdir, filename) if filename == "README.yaml": ydict = {} try: with open(filepath) as yaml_file: ydict.update(yaml.load(yaml_file, Loader=yaml.FullLoader)) content = System(ydict) except (FileNotFoundError, PermissionError) as e: sys.stderr.write(f""" !!README LOAD ERROR!! Problems while loading on of the files required for the system: {e} System path: {subdir} System: {ydict!s}\n""") except Exception as e: sys.stderr.write(f""" !!README LOAD ERROR!! Unexpected error: {e} System: {ydict!s}\n""") else: relpath = os.path.relpath(filepath, rpath) content["path"] = relpath[:-11] systems.append(content) return systems
[docs] def get_systems(self) -> SystemsCollection: """List all systems in the FAIRMD Lipids.""" return self._systems
[docs] def initialize_databank() -> SystemsCollection: """ Intialize the FAIRMD Lipids. :return: list of dictionaries that contain the content of README.yaml files for each system. """ db_data = Databank() return db_data.get_systems()
# TODO: is not used at all in the project!!