Source code for fairmd.lipids.auxiliary.opconvertor

"""
Auxiliary functions for converting OP data.

Helps to build a nicely formatted OP dictionary from raw OP data.
Fragmentation is handled.
"""

import re

from natsort import natsorted

from fairmd.lipids.molecules import Lipid



[docs]
class NamingRegistry:
    """Registry for naming conventions."""

    _registry: list = []

    @classmethod
    def _register(cls, name: str, func) -> None:
        """Register a naming convention function.

        :param name: Name of the fragment.
        :param func: Function implementing the convention.
        """
        cls._registry += [(name, func)]


[docs]
    @classmethod
    def apply(cls, opdic: dict) -> None:
        """Make whatever required to apply naming conventions to fragmentized dictionary.

        :param opdic: Fragmented dictionary.
        """
        if not cls._registry:
            cls._initialize()
        cls._apply_naming(opdic)
        cls._apply_sorting(opdic)


    @classmethod
    def _apply_sorting(cls, opdic: dict) -> None:
        """Sort every fragment list by C atom number."""
        for frag_name in opdic:
            opdic[frag_name] = natsorted(opdic[frag_name], key=lambda x: x["C"] + "__" + x["H"])

    @classmethod
    def _apply_naming(cls, opdic: dict) -> None:
        """Apply naming conventions to the fragmented dictionary."""
        for frag_func in cls._registry:
            frag_name, func = frag_func
            if frag_name in opdic:
                for i in range(len(opdic[frag_name])):
                    opdic[frag_name][i] = func(opdic[frag_name][i])
            elif frag_name == "_all_":
                for f in opdic:
                    for i in range(len(opdic[f])):
                        opdic[f][i] = func(opdic[f][i])

    # initialize the registry
    @classmethod
    def _initialize(cls):
        def _snX_c_renamer(row: dict) -> dict:  # noqa: N802
            match = re.match(r"M_G[12]C([0-9]{1,2})_M", row["C"])
            if match and len(match.groups()) == 1:
                idx = int(match[1])
                row["C"] = str(idx - 1)
            return row

        cls._register("sn-1", _snX_c_renamer)
        cls._register("sn-2", _snX_c_renamer)

        def _fa_c_renamer(row: dict) -> dict:
            match = re.match(r"M_N1C([0-9]{1,2})_M", row["C"])
            if match and len(match.groups()) == 1:
                idx = int(match[1])
                row["C"] = str(idx)
            return row

        cls._register("fa", _fa_c_renamer)

        def _gbb_c_renamer(row: dict) -> dict:
            match = re.match(r"M_G([1-3])_M", row["C"])
            if match and len(match.groups()) == 1:
                idx = int(match[1])
                row["C"] = f"g{idx}"
            return row

        cls._register("glycerol backbone", _gbb_c_renamer)

        def _headgroup_c_renamer(row: dict) -> dict:
            if row["C"] == "M_G3C4_M":
                row["C"] = "α"
            elif row["C"] == "M_G3C5_M":
                row["C"] = "β"
            elif re.match(r"M_G3N6C[1-3]_M", row["C"]):
                row["C"] = "γ"
            return row

        cls._register("headgroup", _headgroup_c_renamer)

        def _h_renamer(row: dict) -> dict:
            match = re.match(r"M_.+H([1-4])_M", row["H"])
            if match and len(match.groups()) == 1:
                idx = int(match[1])
                row["H"] = str(idx)
            return row

        cls._register("_all_", _h_renamer)

        def _plain_c_renamer(row: dict) -> dict:
            match = re.match(r"M_C([0-9]+)_M", row["C"])
            if match and len(match.groups()) == 1:
                idx = int(match[1])
                row["C"] = str(idx)
            return row

        cls._register("_all_", _plain_c_renamer)




[docs]
def build_nice_OPdict(src: dict, lipid: Lipid) -> dict:
    """Build nicely formatted OP dictionary from raw OP data.

    Handles fragmentation of lipids.

    :param src: raw OP data (dict)
    :param lipid: Lipid object
    :return: nicely formatted OP dictionary
    """

    # Helper function to convert NaN to None for better
    # JSON compatibility in output
    def _rnan(x: float) -> float | None:
        return None if x != x else x

    def _fragmentize(src: dict, mdict: dict) -> dict:
        r = {}
        for apair, opvals in src.items():
            atom_c, atom_h = apair.split(" ")
            if atom_c not in mdict:
                msg = f"Atom {atom_c} not found in mapping dictionary."
                raise ValueError(msg)
            frag_c = mdict[atom_c].get("FRAGMENT", "total")
            if frag_c not in r:
                r[frag_c] = []
            r[frag_c].append(
                {
                    "C": atom_c,
                    "H": atom_h,
                    "OP": opvals[0],
                    "STD": _rnan(opvals[1]) if len(opvals) > 1 else None,
                },
            )
            r[frag_c].sort(key=lambda x: x["C"])
        return r

    nice_OPdict: dict = _fragmentize(src, lipid.mapping_dict)
    # rename C and H atoms for registry fragments
    NamingRegistry.apply(nice_OPdict)
    return nice_OPdict