Skip to content
Merged
7 changes: 7 additions & 0 deletions backend/find_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,13 @@ def get_pt_requirement(pt_version: str = "") -> dict:
# under the torch extra rather than the core deps (conda-forge has
# vesin but not vesin-torch).
"vesin[torch]",
# GPU O(N) cell-list neighbor list for large systems. Restricted to
# Python >= 3.11 (the package requires it while deepmd-kit still
# supports 3.10) and to Linux: it is a CUDA package, and its
# dependency warp-lang ships no macosx_x86_64 wheel, which otherwise
# makes the macOS x86_64 wheel build's dependency resolution
# unsatisfiable.
"nvalchemi-toolkit-ops>=0.3.1; python_version >= '3.11' and platform_system == 'Linux'",
*mpi_requirement,
*cibw_requirement,
],
Expand Down
48 changes: 40 additions & 8 deletions deepmd/pt/entrypoints/freeze_pt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,18 @@
from deepmd.dpmodel.utils.region import (
normalize_coord,
)
from deepmd.pt.model.descriptor.sezm_nn.so2 import (
SO2Linear,
)
from deepmd.pt.model.model import (
get_model,
)
from deepmd.pt.train.wrapper import (
ModelWrapper,
)
from deepmd.pt.utils.compile_compat import (
build_inductor_compile_options,
)
from deepmd.pt.utils.env import (
DEVICE,
)
Expand Down Expand Up @@ -219,6 +225,7 @@ def _collect_metadata(
model: torch.nn.Module,
output_keys: list[str],
is_spin: bool | None = None,
do_atomic_virial: bool = False,
) -> dict:
"""Assemble the flat metadata dict expected by :class:`DeepPotPTExpt`.

Expand Down Expand Up @@ -261,6 +268,8 @@ def _collect_metadata(
"mixed_types": bool(model.mixed_types()),
"has_message_passing": _model_has_message_passing(model),
"has_comm_artifact": False,
"do_atomic_virial": bool(do_atomic_virial),
"nnei": int(sum(model.get_sel())),
"has_default_fparam": bool(model.has_default_fparam()),
"default_fparam": _to_py_list(model.get_default_fparam()),
"default_chg_spin": _to_py_list(model.get_default_chg_spin()),
Expand Down Expand Up @@ -468,6 +477,7 @@ def freeze_sezm_to_pt2(
*,
device: torch.device | None = None,
head: str | None = None,
atomic_virial: bool = False,
) -> None:
"""Freeze a SeZM checkpoint into an AOTInductor ``.pt2`` archive.

Expand All @@ -484,10 +494,14 @@ def freeze_sezm_to_pt2(
Model head to export from a multi-task checkpoint. If omitted, the
``Default`` head is used when present; otherwise multi-task checkpoints
must pass an explicit head. Single-task checkpoints must pass ``None``.
atomic_virial
Whether to include per-atom virial outputs in the exported graph.
Disable this for fastest LAMMPS force/energy/total-virial inference.
"""
from torch._inductor import (
aoti_compile_and_package,
)
from torch._inductor import config as inductor_config

target_device = device if device is not None else DEVICE

Expand All @@ -507,16 +521,24 @@ def freeze_sezm_to_pt2(
model.eval()
model.to("cpu")

# The SO(2) linear mixer selects its block-diagonal vs dense matmul from a
# Python device branch that make_fx resolves at trace time. Since tracing
# always runs on CPU, pin the choice to the AOTI target device: non-CPU
# targets bake the block-diagonal contraction (which skips the structural
# off-|m| zeros); CPU targets keep the dense einsum that dodges the Inductor
# AVX2 codegen bug.
force_block_diag = target_device.type != "cpu"
for module in model.modules():
if isinstance(module, SO2Linear):
module._force_block_diag_matmul = force_block_diag

_, sample_inputs_cpu = _resolve_nframes(
model,
nloc=7,
device=torch.device("cpu"),
has_spin=is_spin,
)

# do_atomic_virial=True pulls every key that DeepPotPTExpt may read
# (energy, energy_redu, energy_derv_r, energy_derv_c, energy_derv_c_redu)
# into the traced graph.
if is_spin:
(
ext_coord,
Expand All @@ -537,7 +559,7 @@ def freeze_sezm_to_pt2(
fparam=fparam,
aparam=aparam,
charge_spin=charge_spin,
do_atomic_virial=True,
do_atomic_virial=atomic_virial,
)
else:
(
Expand All @@ -557,7 +579,7 @@ def freeze_sezm_to_pt2(
fparam=fparam,
aparam=aparam,
charge_spin=charge_spin,
do_atomic_virial=True,
do_atomic_virial=atomic_virial,
)

# Output key order is taken from a concrete run; Python dict order
Expand Down Expand Up @@ -587,9 +609,19 @@ def freeze_sezm_to_pt2(
exported = move_to_device_pass(exported, target_device)

out_path_str = str(out_path)
aoti_compile_and_package(exported, package_path=out_path_str)

metadata = _collect_metadata(model, output_keys=output_keys, is_spin=is_spin)
compile_options = build_inductor_compile_options()
# Keep AOTInductor aligned with the eval compile path. ``triton.max_tiles=1``
# keeps data-dependent edge axes on Triton's x grid, whose bound is large
# enough for production-scale neighbor lists.
with inductor_config.patch({**compile_options, "triton.max_tiles": 1}):
aoti_compile_and_package(exported, package_path=out_path_str)

metadata = _collect_metadata(
model,
output_keys=output_keys,
is_spin=is_spin,
do_atomic_virial=atomic_virial,
)
with zipfile.ZipFile(out_path_str, "a") as zf:
zf.writestr("model/extra/metadata.json", json.dumps(metadata))
# The raw training params are preserved so `dp change-bias` and
Expand Down
125 changes: 80 additions & 45 deletions deepmd/pt/infer/deep_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@
GLOBAL_PT_FLOAT_PRECISION,
RESERVED_PRECISION_DICT,
)
from deepmd.pt.utils.nv_nlist import (
NvNeighborList,
is_nv_available,
)
from deepmd.pt.utils.utils import (
to_numpy_array,
to_torch_tensor,
Expand Down Expand Up @@ -255,63 +259,94 @@ def __init__(
def _setup_nlist_backend(self, nlist_backend: str) -> None:
"""Resolve the neighbor-list construction strategy from a user choice.

``"native"`` uses the dense all-pairs builder; ``"vesin"`` forces the
O(N) ``vesin.torch`` cell list (raising if it is unavailable or the
model/inputs are unsupported); ``"auto"`` uses vesin when applicable and
silently falls back to the native builder otherwise. Results are
unchanged either way -- only the neighbor-search cost differs.
``"native"`` uses the dense all-pairs builder; ``"vesin"`` / ``"nv"``
force the O(N) ``vesin.torch`` / ``nvalchemiops`` cell list (raising if
unavailable or the model/inputs are unsupported); ``"auto"`` picks the
first available O(N) builder (vesin, then nv) and otherwise falls back to
the native builder. Results are unchanged either way -- only the
neighbor-search cost differs.
"""
if nlist_backend not in ("auto", "vesin", "native"):
inner = self.dp.model["Default"]
self_built = getattr(inner, "use_self_built_nlist", None)
if callable(self_built) and self_built():
# The model builds its own neighbor list and runs the native path;
# an external strategy would bypass it, so always use native.
log.info(
"Ignoring nlist_backend=%r: %s uses its own built-in neighbor list.",
nlist_backend,
type(inner).__name__,
)
self._nlist_builder = None
return
if nlist_backend not in ("auto", "vesin", "nv", "native"):
raise ValueError(
f"Unknown nlist_backend '{nlist_backend}'; "
"expected 'auto', 'vesin', or 'native'."
"expected 'auto', 'vesin', 'nv', or 'native'."
)
# reason vesin cannot be used (None means it can)

# reason an external strategy cannot be used (None means it can)
unsupported = None
if self._has_spin:
unsupported = "spin models"
elif self._has_hessian:
unsupported = "hessian models"
elif self.modifier is not None:
# the vesin path runs forward_common_lower directly, bypassing
# the strategy path runs forward_common_lower directly, bypassing
# ModelWrapper.forward (which applies the data modifier); fall back
# to the native path so the modifier is still applied.
unsupported = "models with a data modifier"
elif "energy" not in self.dp.model["Default"].model_output_type():
# _eval_lower_vesin reconstructs the backend output from the
elif "energy" not in inner.model_output_type():
# _eval_lower_strategy reconstructs the backend output from the
# forward_common_lower / communicate keys via _OUTDEF_DP2BACKEND,
# which matches the model's own translation only for the energy
# model (e.g. the polar fitting key is "polarizability" but the
# backend output is "polar"). Restrict vesin to energy models --
# the large-system inference target -- and fall back to native
# for the other fitting types.
# backend output is "polar"). Restrict strategies to energy models
# and fall back to native for the other fitting types.
unsupported = "non-energy models"
ase_provided = self.neighbor_list is not None
if nlist_backend == "native":
self._use_vesin = False
elif nlist_backend == "vesin":
if not is_vesin_torch_available():
raise ImportError(
"nlist_backend='vesin' was requested but 'vesin.torch' is "
"not installed. Install it (`pip install vesin[torch]`) or "
"use nlist_backend='native' (or 'auto')."
)

builder = None
if nlist_backend in ("vesin", "nv"):
if unsupported is not None:
raise ValueError(
f"nlist_backend='vesin' is not supported for {unsupported}; "
"use nlist_backend='native' (or 'auto')."
f"nlist_backend='{nlist_backend}' is not supported for "
f"{unsupported}; use nlist_backend='native' (or 'auto')."
)
if ase_provided:
raise ValueError(
"nlist_backend='vesin' conflicts with an explicitly "
"supplied ASE neighbor_list; pass only one."
f"nlist_backend='{nlist_backend}' conflicts with an "
"explicitly supplied ASE neighbor_list; pass only one."
)
self._use_vesin = True
else: # auto: use vesin when possible, otherwise fall back silently
self._use_vesin = (
is_vesin_torch_available() and unsupported is None and not ase_provided
)
self._nlist_builder = VesinNeighborList() if self._use_vesin else None
if nlist_backend == "vesin":
if not is_vesin_torch_available():
raise ImportError(
"nlist_backend='vesin' was requested but 'vesin.torch' "
"is not installed. Install it (`pip install "
"vesin[torch]`) or use nlist_backend='native' (or 'auto')."
)
builder = VesinNeighborList()
elif DEVICE.type != "cuda":
raise ValueError(
"nlist_backend='nv' requires CUDA inference tensors; "
f"current DEVICE is {DEVICE!s}. Use nlist_backend='native' "
"(or 'auto') for CPU inference."
)
elif not is_nv_available():
raise ImportError(
"nlist_backend='nv' was requested but 'nvalchemi-toolkit-ops'"
" is not installed. Install it (`pip install "
"nvalchemi-toolkit-ops`) or use nlist_backend='native' "
"(or 'auto')."
)
Comment thread
OutisLi marked this conversation as resolved.
else:
builder = NvNeighborList()
elif nlist_backend == "auto" and unsupported is None and not ase_provided:
# Pick the first available O(N) builder; nv is GPU-only.
if is_vesin_torch_available():
builder = VesinNeighborList()
elif is_nv_available() and DEVICE.type == "cuda":
builder = NvNeighborList()
self._nlist_builder = builder
Comment thread
OutisLi marked this conversation as resolved.

def get_rcut(self) -> float:
"""Get the cutoff radius of this model."""
Expand Down Expand Up @@ -659,8 +694,8 @@ def _eval_model(
do_atomic_virial = any(
x.category == OutputVariableCategory.DERV_C for x in request_defs
)
if self._use_vesin:
batch_output = self._eval_lower_vesin(
if self._nlist_builder is not None:
batch_output = self._eval_lower_strategy(
coord_input,
type_input,
box_input,
Expand Down Expand Up @@ -696,7 +731,7 @@ def _eval_model(
) # this is kinda hacky
return tuple(results)

def _eval_lower_vesin(
def _eval_lower_strategy(
self,
coord: torch.Tensor,
atype: torch.Tensor,
Expand All @@ -706,15 +741,15 @@ def _eval_lower_vesin(
charge_spin: torch.Tensor | None,
do_atomic_virial: bool,
) -> dict[str, torch.Tensor]:
"""Evaluate via the O(N) vesin-built ``(i,j,S)`` extended neighbor list.

Builds the extended representation with the vesin cell list, runs the
model's ``forward_common_lower``, and maps the extended outputs back to
local atoms with ``communicate_extended_output``. Returns a dict keyed
by backend names, matching the normal ``model()`` output so the caller's
extraction is unchanged. ``forward_common_atomic`` sets
``requires_grad`` on the extended coordinates internally, exactly as on
the native path, so forces/virials are produced identically.
"""Evaluate via the selected O(N) ``NeighborList`` strategy.

Builds the extended representation with ``self._nlist_builder`` (vesin or
nv), runs the model's ``forward_common_lower``, and maps the extended
outputs back to local atoms with ``communicate_extended_output``.
Returns a dict keyed by backend names, matching the normal ``model()``
output so the caller's extraction is unchanged. ``requires_grad`` is set
on the extended coordinates internally, exactly as on the native path, so
forces/virials are produced identically.
"""
inner = self.dp.model["Default"]
ext_coord, ext_atype, nlist, mapping = self._nlist_builder.build(
Expand Down
5 changes: 3 additions & 2 deletions deepmd/pt/model/atomic_model/sezm_atomic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,8 +727,9 @@ def _build_dens_fitting_kwargs(self) -> dict[str, Any]:
"""Reconstruct SeZM `dens`-head kwargs from energy head and descriptor."""
descriptor = self.descriptor
kwargs = self._build_ener_fitting_kwargs()
kwargs["condition_lmax"] = int(descriptor.l_schedule[0])
kwargs["latent_lmax"] = int(descriptor.l_schedule[-1])
node_l_schedule = getattr(descriptor, "node_l_schedule", descriptor.l_schedule)
kwargs["condition_lmax"] = int(node_l_schedule[0])
kwargs["latent_lmax"] = int(node_l_schedule[-1])
kwargs["channels"] = int(descriptor.channels)
return kwargs

Expand Down
Loading
Loading