"""Module for handling experimental data entries in the databank.
This module defines classes to represent different types of experimental datasets,
they inherit from a common abstract base class :class:`Experiment` which implements
internal ``SampleComposition`` class for retrieving sample composition information.
Two concrete experiment types are implemented: :class:`OPExperiment` for ssNMR order parameter data
and :class:`FFExperiment` for form factor data.
"""
import json
import os
import typing
from abc import abstractmethod
from typing import Any, Literal
import numpy as np
import yaml
from fairmd.lipids import FMDL_EXP_PATH
from fairmd.lipids._base import CollectionSingleton, SampleComposition
from fairmd.lipids.molecules import Lipid, NonLipid
[docs]
class ExperimentError(Exception):
"""Experiment-related exception"""
[docs]
class Experiment(SampleComposition):
"""Abstract base class representing an experimental dataset in the databank.
Implements important methods from :class:`SampleComposition`: :meth:`get_hydration`,
:meth:`membrane_composition`, and :meth:`solution_composition`. Additionally, defines
abstract properties for accessing experiment data (:attr:`data`) and metadata.
"""
_exp_id: str
_metadata: dict | None = None
_data: dict | None = None
def __init__(self, exp_id: str, path: str):
"""
Initialize the Experiment object.
:param exp_id: The unique identifier for the experiment.
:param path: The absolute path to the experiment's directory.
"""
self._exp_id = exp_id
self._path = path
self._populate_meta_data()
self._initialize_content()
def _populate_meta_data(self) -> None:
"""Populate metadata from the README.yaml file."""
self._metadata = {}
meta_path = os.path.join(self.path, "README.yaml")
if os.path.isfile(meta_path):
with open(meta_path) as yaml_file:
self._metadata = yaml.load(yaml_file, Loader=yaml.FullLoader)
# remove None entries
_keys_to_check = list(self._metadata.keys())
for k in _keys_to_check:
if self._metadata[k] is None:
del self._metadata[k]
else:
msg = f"Metadata file (README.yaml) not found for experiment '{self._exp_id}'."
raise FileNotFoundError(msg)
@property
def metadata(self) -> dict:
"""Access the experiment's metadata."""
if self._metadata is None:
self._populate_meta_data()
return self._metadata
def __getitem__(self, key: str):
return self.metadata[key]
def __repr__(self):
return f"{type(self).__name__}(id='{self.exp_id}')"
def __eq__(self, other):
return isinstance(other, type(self)) and self.exp_id == other.exp_id
def __hash__(self):
return hash(self.exp_id)
@property
def readme(self) -> dict:
"""Provides access to the experiment's metadata (for backward compatibility)."""
return self.metadata
@property
def exp_id(self) -> str:
"""The unique identifier of the experiment."""
return self._exp_id
@property
def path(self) -> str:
"""The absolute path to the experiment's directory."""
return self._path
@property
@abstractmethod
def data(self) -> dict:
"""Provide access to the experiment's data."""
[docs]
@abstractmethod
def verify_data(self) -> None:
"""Verify the integrity and consistency of the experiment's data."""
@property
@abstractmethod
def exptype(self) -> str:
"""The type of the experiment."""
[docs]
@classmethod
def target_folder(cls) -> str:
"""Get target folder name for the experiment type."""
msg = "This method should be implemented in subclasses."
raise NotImplementedError(msg)
# Implementation of SampleComposition interface
def _initialize_content(self) -> None:
null_concentration_threshold = 1e-7 # 0.1 uM
self._content = {}
for k in self.metadata["MOLAR_FRACTIONS"]:
lip = Lipid(k)
lip.register_mapping()
self._content[k] = lip
for k, v in self.metadata.get("ION_CONCENTRATIONS", {}).items():
if np.abs(float(v)) < null_concentration_threshold:
continue
self._content[k] = NonLipid(k)
for k in self.metadata.get("COUNTER_IONS", {}):
if k not in self._content:
self._content[k] = NonLipid(k)
[docs]
def get_hydration(self, basis: typing.Literal["number", "mass"] = "number") -> float:
if basis == "mass":
return float(self.metadata["TOTAL_HYDRATION"])
if basis == "number":
tlc = self.metadata["TOTAL_LIPID_CONCENTRATION"]
if tlc == "full hydration":
return 70
return 55.5 / float(tlc) # water per lipid from outdated field
msg = "Basis must be 'molar' or 'mass'"
raise ValueError(msg)
[docs]
def membrane_composition(self, basis: typing.Literal["molar", "mass"] = "molar") -> dict[str, float]:
if basis == "molar":
return self.metadata["MOLAR_FRACTIONS"]
if basis == "mass":
comp: dict[str, float] = {}
total_mass = 0.0
for k, v in self.metadata["MOLAR_FRACTIONS"].items():
mol: Lipid = self._content[k]
mw = float(mol.metadata["bioschema_properties"]["molecularWeight"])
comp[k] = v * mw
total_mass += comp[k]
for k in comp:
comp[k] /= total_mass
return comp
msg = "Basis must be 'molar' or 'mass'"
raise ValueError(msg)
[docs]
def solution_composition(self, basis="molar"):
if basis == "molar":
concs = {}
_tlc = 55.5 / self.get_hydration("number")
for k, v in self.metadata.get("ION_CONCENTRATIONS", {}).items():
if k not in self.solubles:
continue
concs[k] = float(v)
for k, v in self.metadata.get("COUNTER_IONS", {}).items():
if k not in concs:
concs[k] = 0.0
concs[k] += _tlc * self.membrane_composition("molar")[v]
return concs
if basis == "mass":
msg = "Mass basis for solution composition is not implemented yet"
raise NotImplementedError(msg)
msg = "Basis must be 'molar' or 'mass' (not implemented yet)"
raise ValueError(msg)
[docs]
class OPExperiment(Experiment):
"""Represents an ssNMR order parameter experiment.
:attr:`data` provides access to a dictionary where keys are lipid names and values are dictionaries
mapping atom unique names to their order parameters.
:meth:`verify_data` checks that all atom unique names in the data correspond to known atoms in the lipid mapping.
"""
@property
def data(self) -> dict:
if self._data is None:
self._data = {}
for fname in os.listdir(self.path):
if fname.endswith("_OrderParameters.json"):
molecule_name = fname.replace("_OrderParameters.json", "")
if molecule_name not in self.lipids:
msg = f"Data for non-existing molecule {molecule_name} in {self.exp_id}!"
raise ExperimentError(msg)
fpath = os.path.join(self.path, fname)
with open(fpath) as json_file:
_tmpdic = json.load(json_file)
self._data[molecule_name] = {k: v[0] for k, v in _tmpdic.items()}
if not self._data:
msg = f"No order parameter data files found for experiment '{self.exp_id}'."
raise ExperimentError(msg)
return self._data
[docs]
def verify_data(self) -> None:
for ln, opdic in self.data.items():
lipid = self.lipids[ln]
for uname_pair in opdic:
if uname_pair.split(" ")[0] not in lipid.mapping_dict:
msg = (
"Order parameter data contains unknown atom "
f"'{uname_pair.split(' ')[0]}' for lipid '{ln}' in experiment '{self.exp_id}'."
)
raise ExperimentError(msg)
@property
def exptype(self) -> str:
return "OrderParameters"
[docs]
@classmethod
def target_folder(cls) -> str:
return "OrderParameters"
[docs]
class FFExperiment(Experiment):
"""Represent a SAXS form-factor experiment.
:attr:`data` provides access to a table with the first column is Q(A-1),
second is the intensity, and third is error.
:meth:`verify_data` is not currently implemented for FF.
"""
@property
def data(self) -> dict:
if self._data is None:
self._data = {}
for fname in os.listdir(self.path):
if fname.endswith("_FormFactor.json"):
fpath = os.path.join(self.path, fname)
with open(fpath) as json_file:
self._data = json.load(json_file)
break # Assuming one form factor file per experiment
if not self._data:
msg = f"No form factor data file found for experiment '{self.exp_id}'."
raise ExperimentError(msg)
return self._data
[docs]
def verify_data(self) -> None:
pass
@property
def exptype(self) -> str:
return "FormFactors"
[docs]
@classmethod
def target_folder(cls) -> str:
return "FormFactors"
[docs]
class ExperimentCollection(CollectionSingleton[Experiment]):
"""A collection of experiments."""
def _test_item_type(self, item: Any) -> bool:
return isinstance(item, Experiment)
def _get_item_id(self, item: Experiment) -> str:
return item.exp_id
[docs]
@staticmethod
def load_from_data(exp_type: Literal["OPExperiment", "FFExperiment"] = "OPExperiment") -> "ExperimentCollection":
"""Load experiment data from the designated directory."""
exp_types = {
"OPExperiment": OPExperiment,
"FFExperiment": FFExperiment,
}
if exp_type not in exp_types:
msg = "..."
raise ValueError(msg)
collection = ExperimentCollection()
for exp_cls in [exp_types[exp_type]]:
path = os.path.join(FMDL_EXP_PATH, exp_cls.target_folder())
if not os.path.isdir(path):
continue
for subdir, _, files in os.walk(path):
if "README.yaml" in files:
# exp_id is the directory name relative to the exp_type directory
exp_id = os.path.relpath(subdir, path)
try:
exp = exp_cls(exp_id, subdir)
collection.add(exp)
except FileNotFoundError:
# Log this error?
pass
return collection