from pathlib import Path
import pandas as pd
from ase import Atoms
from ase.io import read, write
from mace.calculators import MACECalculator
from alomancy.configs.remote_info import get_remote_info
from alomancy.core.base_active_learning import BaseActiveLearningWorkflow
from alomancy.high_accuracy_evaluation.dft.qe_remote_submitter import (
qe_remote_submitter,
)
from alomancy.high_accuracy_evaluation.dft.run_qe import run_qe
from alomancy.mlip.committee_remote_submitter import committee_remote_submitter
from alomancy.mlip.get_mace_eval_info import (
get_mace_eval_info,
)
from alomancy.mlip.mace_wfl import mace_fit
from alomancy.structure_generation.find_high_sd_structures import (
find_high_sd_structures,
)
from alomancy.structure_generation.md.md_remote_submitter import md_remote_submitter
from alomancy.structure_generation.md.md_wfl import run_md
from alomancy.structure_generation.select_initial_structures import (
select_initial_structures,
)
[docs]
class ActiveLearningStandardMACE(BaseActiveLearningWorkflow):
"""
AL Technique: Committee
MLIP: MACE
Structure Generation: MD
High-Accuracy Evaluation: Quantum Espresso (DFT)
"""
[docs]
def train_mlip(self, base_name: str, mlip_committee_job_dict: dict) -> pd.DataFrame:
workdir = Path("results", base_name)
if "mace_fit_kwargs" not in mlip_committee_job_dict:
mlip_committee_job_dict["mace_fit_kwargs"] = {}
committee_remote_submitter(
remote_info=get_remote_info(
mlip_committee_job_dict,
input_files=[
str(Path(workdir, "train_set.xyz")),
str(Path(workdir, "test_set.xyz")),
],
),
base_name=base_name,
target_file=f"{mlip_committee_job_dict['name']}_stagetwo_compiled.model",
seed=803,
size_of_committee=mlip_committee_job_dict["size_of_committee"],
function=mace_fit,
function_kwargs={
"mlip_committee_job_dict": mlip_committee_job_dict,
"workdir_str": str(workdir),
},
)
mae_avg_results = get_mace_eval_info(
mlip_committee_job_dict=mlip_committee_job_dict
)
return mae_avg_results
[docs]
def generate_structures(
self, base_name: str, job_dict: dict, train_atoms_list: list[Atoms]
) -> list[Atoms]:
if "structure_selection_kwargs" not in job_dict["structure_generation"]:
job_dict["structure_generation"]["structure_selection_kwargs"] = {}
input_structures = select_initial_structures(
base_name=base_name,
structure_generation_job_dict=job_dict["structure_generation"],
train_atoms_list=train_atoms_list, # type: ignore
verbose=self.verbose,
**job_dict["structure_generation"]["structure_selection_kwargs"],
)
Path.mkdir(
Path("results", base_name, job_dict["structure_generation"]["name"]),
exist_ok=True,
parents=True,
)
write(
Path(
"results",
base_name,
job_dict["structure_generation"]["name"],
f"{job_dict['structure_generation']['name']}_input_structures.xyz",
),
input_structures,
format="extxyz",
)
base_mace_model_path = str(
Path(
"results",
base_name,
job_dict["mlip_committee"]["name"],
"fit_0",
f"{job_dict['mlip_committee']['name']}_stagetwo.model",
)
)
if "run_md_kwargs" not in job_dict["structure_generation"]:
job_dict["structure_generation"]["run_md_kwargs"] = {}
function_kwargs = {
"structure_generation_job_dict": job_dict["structure_generation"],
"total_md_runs": len(input_structures),
"model_path": [
base_mace_model_path
], # need to pass model path to preserve consistent dtype
"verbose": self.verbose,
**job_dict["structure_generation"]["run_md_kwargs"],
}
md_trajectory_paths = md_remote_submitter(
remote_info=get_remote_info(
job_dict["structure_generation"], input_files=[base_mace_model_path]
),
base_name=base_name,
target_file=f"{job_dict['structure_generation']['name']}.xyz",
input_atoms_list=input_structures,
function=run_md,
function_kwargs=function_kwargs,
)
structure_list = []
for md_trajectory_path in md_trajectory_paths:
structures = read(md_trajectory_path, ":", format="extxyz")
structure_list.extend(structures)
if self.verbose > 0:
print(len(structure_list), "structures found from trajectory files.")
model_paths_list = list(
Path.glob(
Path("results", base_name, job_dict["mlip_committee"]["name"]),
f"fit_*/{job_dict['mlip_committee']['name']}_stagetwo.model",
)
)
list_of_other_calculators = [
MACECalculator(
model_paths=[mace_model_path],
device="cpu",
default_dtype="float64",
)
for mace_model_path in model_paths_list
if str(mace_model_path) != base_mace_model_path
]
high_sd_structures = find_high_sd_structures(
structure_list=structure_list,
base_name=base_name,
job_dict=job_dict,
list_of_other_calculators=list_of_other_calculators,
verbose=self.verbose,
)
# Assign job IDs to high SD structures
for i in range(len(high_sd_structures)):
high_sd_structures[i].info["job_id"] = i
return high_sd_structures
[docs]
def high_accuracy_evaluation(
self,
base_name: str,
high_accuracy_eval_job_dict: dict,
structures: list[Atoms],
) -> list[Atoms]:
function_kwargs = {
"high_accuracy_eval_job_dict": high_accuracy_eval_job_dict,
}
high_accuracy_structure_paths = qe_remote_submitter(
remote_info=get_remote_info(high_accuracy_eval_job_dict, input_files=[]),
base_name=base_name,
target_file=f"{high_accuracy_eval_job_dict['name']}.xyz",
input_atoms_list=structures,
function=run_qe,
function_kwargs=function_kwargs,
)
high_accuracy_structures = []
for path in high_accuracy_structure_paths:
structure = read(path, format="extxyz")
high_accuracy_structures.append(structure)
return high_accuracy_structures