Source code for alomancy.core.standard_active_learning

from pathlib import Path

import pandas as pd
from ase import Atoms
from ase.io import read, write
from mace.calculators import MACECalculator

from alomancy.configs.remote_info import get_remote_info
from alomancy.core.base_active_learning import BaseActiveLearningWorkflow
from alomancy.high_accuracy_evaluation.dft.qe_remote_submitter import (
    qe_remote_submitter,
)
from alomancy.high_accuracy_evaluation.dft.run_qe import run_qe
from alomancy.mlip.committee_remote_submitter import committee_remote_submitter
from alomancy.mlip.get_mace_eval_info import (
    get_mace_eval_info,
)
from alomancy.mlip.mace_wfl import mace_fit
from alomancy.structure_generation.find_high_sd_structures import (
    find_high_sd_structures,
)
from alomancy.structure_generation.md.md_remote_submitter import md_remote_submitter
from alomancy.structure_generation.md.md_wfl import run_md
from alomancy.structure_generation.select_initial_structures import (
    select_initial_structures,
)


[docs] class ActiveLearningStandardMACE(BaseActiveLearningWorkflow): """ AL Technique: Committee MLIP: MACE Structure Generation: MD High-Accuracy Evaluation: Quantum Espresso (DFT) """
[docs] def train_mlip(self, base_name: str, mlip_committee_job_dict: dict) -> pd.DataFrame: workdir = Path("results", base_name) if "mace_fit_kwargs" not in mlip_committee_job_dict: mlip_committee_job_dict["mace_fit_kwargs"] = {} committee_remote_submitter( remote_info=get_remote_info( mlip_committee_job_dict, input_files=[ str(Path(workdir, "train_set.xyz")), str(Path(workdir, "test_set.xyz")), ], ), base_name=base_name, target_file=f"{mlip_committee_job_dict['name']}_stagetwo_compiled.model", seed=803, size_of_committee=mlip_committee_job_dict["size_of_committee"], function=mace_fit, function_kwargs={ "mlip_committee_job_dict": mlip_committee_job_dict, "workdir_str": str(workdir), }, ) mae_avg_results = get_mace_eval_info( mlip_committee_job_dict=mlip_committee_job_dict ) return mae_avg_results
[docs] def generate_structures( self, base_name: str, job_dict: dict, train_atoms_list: list[Atoms] ) -> list[Atoms]: if "structure_selection_kwargs" not in job_dict["structure_generation"]: job_dict["structure_generation"]["structure_selection_kwargs"] = {} input_structures = select_initial_structures( base_name=base_name, structure_generation_job_dict=job_dict["structure_generation"], train_atoms_list=train_atoms_list, # type: ignore verbose=self.verbose, **job_dict["structure_generation"]["structure_selection_kwargs"], ) Path.mkdir( Path("results", base_name, job_dict["structure_generation"]["name"]), exist_ok=True, parents=True, ) write( Path( "results", base_name, job_dict["structure_generation"]["name"], f"{job_dict['structure_generation']['name']}_input_structures.xyz", ), input_structures, format="extxyz", ) base_mace_model_path = str( Path( "results", base_name, job_dict["mlip_committee"]["name"], "fit_0", f"{job_dict['mlip_committee']['name']}_stagetwo.model", ) ) if "run_md_kwargs" not in job_dict["structure_generation"]: job_dict["structure_generation"]["run_md_kwargs"] = {} function_kwargs = { "structure_generation_job_dict": job_dict["structure_generation"], "total_md_runs": len(input_structures), "model_path": [ base_mace_model_path ], # need to pass model path to preserve consistent dtype "verbose": self.verbose, **job_dict["structure_generation"]["run_md_kwargs"], } md_trajectory_paths = md_remote_submitter( remote_info=get_remote_info( job_dict["structure_generation"], input_files=[base_mace_model_path] ), base_name=base_name, target_file=f"{job_dict['structure_generation']['name']}.xyz", input_atoms_list=input_structures, function=run_md, function_kwargs=function_kwargs, ) structure_list = [] for md_trajectory_path in md_trajectory_paths: structures = read(md_trajectory_path, ":", format="extxyz") structure_list.extend(structures) if self.verbose > 0: print(len(structure_list), "structures found from trajectory files.") model_paths_list = list( Path.glob( Path("results", base_name, job_dict["mlip_committee"]["name"]), f"fit_*/{job_dict['mlip_committee']['name']}_stagetwo.model", ) ) list_of_other_calculators = [ MACECalculator( model_paths=[mace_model_path], device="cpu", default_dtype="float64", ) for mace_model_path in model_paths_list if str(mace_model_path) != base_mace_model_path ] high_sd_structures = find_high_sd_structures( structure_list=structure_list, base_name=base_name, job_dict=job_dict, list_of_other_calculators=list_of_other_calculators, verbose=self.verbose, ) # Assign job IDs to high SD structures for i in range(len(high_sd_structures)): high_sd_structures[i].info["job_id"] = i return high_sd_structures
[docs] def high_accuracy_evaluation( self, base_name: str, high_accuracy_eval_job_dict: dict, structures: list[Atoms], ) -> list[Atoms]: function_kwargs = { "high_accuracy_eval_job_dict": high_accuracy_eval_job_dict, } high_accuracy_structure_paths = qe_remote_submitter( remote_info=get_remote_info(high_accuracy_eval_job_dict, input_files=[]), base_name=base_name, target_file=f"{high_accuracy_eval_job_dict['name']}.xyz", input_atoms_list=structures, function=run_qe, function_kwargs=function_kwargs, ) high_accuracy_structures = [] for path in high_accuracy_structure_paths: structure = read(path, format="extxyz") high_accuracy_structures.append(structure) return high_accuracy_structures