Source code for searchDATABANK

import os
import yaml
import json

from databankLibrary import lipids_dict
from tqdm import tqdm

databank_path = '../../Data/Simulations'
expbank_path = '../../Data/experiments'

lipid_numbers_list =  lipids_dict.keys() # should contain all lipid names
ions_list = ['POT', 'SOD', 'CLA', 'CAL'] # should contain names of all ions

LIP_CONC_REL_THRESHOLD = 0.15 # relative acceptable error for determination of 
                              # the hydration in ssNMR

[docs] class Data: def __init__(self, molecule, data_path): self.molecule = molecule self.data = {} self.__load_data__(data_path) def __load_data__(self,data_path): with open(data_path) as json_file: self.data = json.load(json_file)
[docs] class Simulation: def __init__(self, readme, data, indexingPath): self.readme = readme self.data = {} self.indexingPath = indexingPath
[docs] def getLipids(self, molecules=lipid_numbers_list): lipids = [] for key in self.readme['COMPOSITION'].keys(): if key in molecules: lipids.append(key) return lipids
[docs] def getIons(self, ions): simIons = [] for key in self.readme['COMPOSITION'].keys(): if key in ions: simIons.append(key) return simIons
#fraction of each lipid with respect to total amount of lipids
[docs] def molarFraction(self, molecule,molecules=lipid_numbers_list): #only for lipids sum_lipids = 0 number = sum(self.readme['COMPOSITION'][molecule]['COUNT']) for key in self.readme['COMPOSITION'].keys(): if key in molecules: sum_lipids += sum(self.readme['COMPOSITION'][key]['COUNT']) return number / sum_lipids
# concentration of other molecules than lipids # change name to ionConcentration()
[docs] def ionConcentration(self, molecule, exp_counter_ions): lipids1 = self.getLipids() c_water = 55.5 N_water = self.readme['COMPOSITION']['SOL']['COUNT'] try: N_molecule = self.readme['COMPOSITION'][molecule]['COUNT'] #number of ions except KeyError: N_molecule = 0 lipids2 = [] if exp_counter_ions and N_molecule != 0: for lipid in lipids1: if molecule in exp_counter_ions.keys() and lipid == exp_counter_ions[molecule]: N_lipid = self.readme['COMPOSITION'][lipid]['COUNT'] # print(molecule + " " + lipid) # print(self.readme) lipids2.append(sum(N_lipid)) N_molecule = N_molecule - sum(lipids2) # print(N_molecule) c_molecule = (N_molecule * c_water) / N_water #print(c_molecule) return c_molecule
[docs] def totalLipidConcentration(self): lipids = self.getLipids() c_water = 55.5 N_water = self.readme['COMPOSITION']['SOL']['COUNT'] N_lipids = 0 for lipid in lipids: N_lipids += sum(self.readme['COMPOSITION'][lipid]['COUNT']) try: if (N_water / N_lipids) > 25 : tot_lipid_c = 'full hydration' # print('full hydration') else: tot_lipid_c = (N_lipids * c_water) / N_water except ZeroDivisionError: print(self.readme) return tot_lipid_c
##################
[docs] class Experiment: def __init__(self, readme, data, dataPath,exptype): self.readme = readme self.data = data #object Data self.dataPath = dataPath self.exptype = exptype
[docs] def getLipids(self, molecules=lipid_numbers_list): lipids = [] for key in molecules: try: if key in self.readme['MOLAR_FRACTIONS'].keys(): lipids.append(key) except KeyError: continue return lipids
[docs] def getIons(self, ions): expIons = [] for key in ions: try: if self.readme['ION_CONCENTRATIONS'][key] != 0: expIons.append(key) except KeyError: continue try: if key in self.readme['COUNTER_IONS'].keys(): expIons.append(key) except AttributeError: continue return expIons
[docs] def loadSimulations(): """ Generates the list of Simulation objects. Go through all README.yaml files. """ print("Build simulation tree index...", end='') rmIdx = [] for subdir, dirs, files in os.walk(databank_path): for fn in files: if fn == 'README.yaml': rmIdx.append(subdir) print('%d READMEs loaded.' % len(rmIdx) ) print("Loading information about every simulation in the bank.") simulations = [] for subdir in tqdm(rmIdx, "Simulations"): READMEfilepathSimulation = os.path.join(subdir, 'README.yaml') # it exists because we collected only dirs with README.yaml with open(READMEfilepathSimulation) as yaml_file_sim: readmeSim = yaml.load(yaml_file_sim, Loader=yaml.FullLoader) try: if readmeSim['WARNINGS']['NOWATER']: continue except: pass indexingPath = os.path.relpath(subdir, start=databank_path) simOPdata = [] # order parameter files for each type of lipid simData = {} for filename in os.listdir(subdir): filepath = os.path.join(subdir, filename) if filename == 'fourierFromFinalDensity.json': ## TODO: WHAT IS THAT??? simData['FormFactors'] = Data("system", filepath) elif filename.endswith('OrderParameters.json'): lipid_name = filename.replace('OrderParameters.json', '') # print(dataPath) op_data = Data(lipid_name, filepath) simOPdata.append(op_data) simData['OrderParameters'] = simOPdata simulations.append(Simulation(readmeSim, simData, indexingPath)) return simulations
[docs] def loadExperiments(experimentType): """ Loops over the experiment entries in the experiment databank and read experiment readme and order parameter files into objects. """ if experimentType == 'OrderParameters': dataFile = '_Order_Parameters.json' elif experimentType == 'FormFactors': dataFile = '_FormFactor.json' else: raise NotImplementedError("Only OrderParameters and FormFactors types are implemented.") print("Build experiments [%s] index..." % experimentType, end='') rmIdx = [] path = os.path.join(expbank_path, experimentType) for subdir, dirs, files in os.walk(path): for fn in files: if fn == 'README.yaml': rmIdx.append(subdir) print('%d READMEs loaded.' % len(rmIdx) ) print("Loading data for each experiment.") experiments = [] for subdir in tqdm(rmIdx, desc='Experiment'): READMEfilepathExperiment = os.path.join(subdir, 'README.yaml') with open(READMEfilepathExperiment) as yaml_file_exp: readmeExp = yaml.load(yaml_file_exp, Loader=yaml.FullLoader) opData = {} for fname in os.listdir(subdir): dataPath = os.path.join(subdir, fname) if fname.endswith(dataFile): molecule_name = "" if experimentType == "OrderParameters": molecule_name = fname.replace(dataFile,'') elif experimentType == "FormFactors": molecule_name = 'system' expData = Data(molecule_name, dataPath) experiments.append(Experiment(readmeExp, expData, subdir, experimentType)) return experiments
[docs] def findPairs(experiments, simulations): pairs = [] for simulation in tqdm(simulations, desc='Simulation'): sim_lipids = simulation.getLipids() sim_total_lipid_concentration = simulation.totalLipidConcentration() sim_ions = simulation.getIons(ions_list) t_sim = simulation.readme['TEMPERATURE'] #calculate molar fractions from simulation sim_molar_fractions = {} for lipid in sim_lipids: sim_molar_fractions[lipid] = simulation.molarFraction(lipid) for experiment in experiments: # check lipid composition matches the simulation exp_lipids = experiment.getLipids() exp_total_lipid_concentration = experiment.readme['TOTAL_LIPID_CONCENTRATION'] exp_ions = experiment.getIons(ions_list) exp_counter_ions = experiment.readme['COUNTER_IONS'] # calculate simulation ion concentrations sim_concentrations = {} for molecule in ions_list: sim_concentrations[molecule] = simulation.ionConcentration(molecule, exp_counter_ions) # continue if lipid compositions are the same if set(sim_lipids) == set(exp_lipids): # compare molar fractions mf_ok = 0 for key in sim_lipids: if ( (experiment.readme['MOLAR_FRACTIONS'][key] >= sim_molar_fractions[key] - 0.03) and (experiment.readme['MOLAR_FRACTIONS'][key] <= sim_molar_fractions[key]+ 0.03) ): mf_ok +=1 # compare ion concentrations c_ok = 0 if set(sim_ions) == set(exp_ions): for key in sim_ions: if ( (experiment.readme['ION_CONCENTRATIONS'][key] >= sim_concentrations[key] - 0.05) and (experiment.readme['ION_CONCENTRATIONS'][key] <= sim_concentrations[key] + 0.05) ): c_ok += 1 switch = 0 if ( (type(exp_total_lipid_concentration) == float) and (type(sim_total_lipid_concentration) == float) ): if ( (exp_total_lipid_concentration / sim_total_lipid_concentration > 1 - LIP_CONC_REL_THRESHOLD) and (exp_total_lipid_concentration / sim_total_lipid_concentration < 1 + LIP_CONC_REL_THRESHOLD) ): switch = 1 elif ( (type(exp_total_lipid_concentration) == str) and (type(sim_total_lipid_concentration) == str) ): if exp_total_lipid_concentration == sim_total_lipid_concentration: switch = 1 if switch: #check temperature +/- 2 degrees t_exp = experiment.readme['TEMPERATURE'] if ( (mf_ok == len(sim_lipids)) and (c_ok == len(sim_ions)) and (t_exp >= float(t_sim) - 2.0) and (t_exp <= float(t_sim) + 2.0) ): # !we found the match! pairs.append([simulation, experiment]) # Add path to experiment into simulation README.yaml # many experiment entries can match to same simulation exp_doi = experiment.readme['DOI'] exp_path = os.path.relpath(experiment.dataPath, start=os.path.join(expbank_path, experiment.exptype)) if experiment.exptype == "OrderParameters": lipid = experiment.data.molecule simulation.readme['EXPERIMENT']['ORDERPARAMETER'][lipid][exp_doi] = exp_path elif experiment.exptype == "FormFactors": simulation.readme['EXPERIMENT']['FORMFACTOR'] = exp_path else: continue # sorting experiment lists to keep experimental order strict for _lipid in simulation.readme['EXPERIMENT']['ORDERPARAMETER'].keys(): unsortDict = simulation.readme['EXPERIMENT']['ORDERPARAMETER'][_lipid].copy() if not len(unsortDict): continue sortDict = dict(sorted(unsortDict.items())) simulation.readme['EXPERIMENT']['ORDERPARAMETER'][_lipid] = sortDict.copy() outfileDICT = os.path.join(databank_path, simulation.indexingPath, 'README.yaml') with open(outfileDICT, 'w') as f: yaml.dump(simulation.readme, f, sort_keys=False) return pairs
[docs] def logPairs(pairs, fd): """ Write found correspondences into log file. pairs: [(Simulation, Experiment), ...] fd: file descriptor for writting into """ for p in pairs: sim : Simulation = p[0] exp : Experiment = p[1] sysn = sim.readme['SYSTEM'] simp = sim.indexingPath expp = exp.dataPath expd = exp.readme['DOI'] fd.write(f""" -------------------------------------------------------------------------------- Simulation: - {sysn} - {simp} Experiment: - {expd} - {expp}""") # end for fd.write(""" -------------------------------------------------------------------------------- \n""")
[docs] def main(): """ Main program function. Not for exporting. """ simulations = loadSimulations() # clear all EXPERIMENT sections in all simulations for simulation in simulations: simulation.readme['EXPERIMENT'] = {} simulation.readme['EXPERIMENT']['ORDERPARAMETER']= {} simulation.readme['EXPERIMENT']['FORMFACTOR']= {} for lipid in simulation.getLipids(): simulation.readme['EXPERIMENT']['ORDERPARAMETER'][lipid] = {} outfileDICT = os.path.join(databank_path, simulation.indexingPath, 'README.yaml') with open(outfileDICT, 'w') as f: yaml.dump(simulation.readme, f, sort_keys=False) experimentsOrderParameters = loadExperiments('OrderParameters') experimentsFormFactors = loadExperiments('FormFactors') # Pair each simulation with an experiment with the closest matching temperature and composition with open('search-databank-pairs.log', 'w') as logf: print("Scanning simulation-experiment pairs among order parameter experiments.") pairsOP = findPairs(experimentsOrderParameters, simulations) logf.write("=== OP PAIRS ===\n") logPairs(pairsOP, logf) print("Scanning simulation-experiment pairs among form factor experiments.") pairsFF = findPairs(experimentsFormFactors, simulations) logf.write("=== FF PAIRS ===\n") logPairs(pairsFF, logf) ''' for pair in pairsFF: print('#################') print(pair[0].readme) print(pair[0].indexingPath) print("#") print(pair[1].readme) ''' print("Found order parameter data for " + str(len(pairsOP)) + " pairs") print("Found form factor data for " + str(len(pairsFF)) + " pairs")
if __name__ == "__main__": main()