NEQUIPSettings

Contents

NEQUIPSettings#

pydantic model autoplex.settings.NEQUIPSettings[source]#

Model describing the hyperparameters for the NEQUIP fits.

References

Defaults taken from mir-group/nequip

field root: str = 'results'#: Root directory

field run_name: str = 'autoplex'#: Name of the run

field seed: int = 123#: Model seed

field dataset_seed: int = 123#: Dataset seed

field append: bool = False#: When true a restarted run will append to the previous log file

field default_dtype: str = 'float64'#: Default data type

field model_dtype: str = 'float64'#: Model data type

field allow_tf32: bool = True#: Consider setting to false if you plan to mix training/inference over any devices that are not NVIDIA Ampere or later

field r_max: float = 4.0#: Radial cutoff distance

field num_layers: int = 4#: Number of layers

field l_max: int = 2#: Maximum degree of spherical harmonics

field parity: bool = True#: Whether to include features with odd mirror parity; often turning parity off gives equally good results but faster networks

field num_features: int = 32#: Number of features

field nonlinearity_type: Literal['gate', 'norm'] = 'gate'#: Type of nonlinearity, ‘gate’ is recommended

field nonlinearity_scalars: Nonlinearity [Optional]#: Nonlinearity scalars

field nonlinearity_gates: Nonlinearity [Optional]#: Nonlinearity gates

field num_basis: int = 8#: Number of basis functions used in the radial basis

field besselbasis_trainable: bool = True (alias 'BesselBasis_trainable')#: If true, train the bessel weights

field polynomialcutoff_p: int = 5 (alias 'PolynomialCutoff_p')#: p-exponent used in polynomial cutoff function, smaller p corresponds to stronger decay with distance

field invariant_layers: int = 2#: Number of radial layers, smaller is faster

field invariant_neurons: int = 64#: Number of hidden neurons in radial function, smaller is faster

field avg_num_neighbors: None | Literal['auto'] = 'auto'#: Number of neighbors to divide by, None => no normalization, auto computes it based on dataset

field use_sc: bool = True#: Use self-connection or not, usually gives big improvement

field dataset: Literal['ase'] = 'ase'#: Type of data set, can be npz or ase.Note that autoplex only supports ase at this point

field validation_dataset: Literal['ase'] = 'ase'#: Type of validation data set, can be npz or ase.Note that autoplex only supports ase at this point

field dataset_file_name: str = './train_nequip.extxyz'#: Name of the dataset file

field validation_dataset_file_name: str = './test.extxyz'#: Name of the validation dataset file

field ase_args: dict = {'format': 'extxyz'}#: Any arguments needed by ase.io.read

field dataset_key_mapping: dict = {'energy': 'total_energy', 'forces': 'forces'}#: Mapping of keys in the dataset to the expected keys

field validation_dataset_key_mapping: dict = {'energy': 'total_energy', 'forces': 'forces'}#: Mapping of keys in the validation dataset to the expected keys

field chemical_symbols: list[str] = []#: List of chemical symbols

field wandb: bool = False#: Use wandb for logging

field verbose: Literal['debug', 'info', 'warning', 'error', 'critical'] = 'info'#: Verbosity level

field log_batch_freq: int = 10#: Batch frequency, how often to print training errors within the same epoch

field log_epoch_freq: int = 1#: Epoch frequency, how often to print training errors

field save_checkpoint_freq: int = -1#: Frequency to save the intermediate checkpoint. No saving of intermediate checkpoints when the value is not positive.

field save_ema_checkpoint_freq: int = -1#: Frequency to save the intermediate EMA checkpoint. No saving of intermediate EMA checkpoints when the value is not positive.

field n_train: int = 1000#: Number of training samples

field n_val: int = 1000#: Number of validation samples

field learning_rate: float = 0.005#: Learning rate

field batch_size: int = 5#: Batch size

field validation_batch_size: int = 10#: Validation batch size

field max_epochs: int = 10000#: Maximum number of epochs

field shuffle: bool = True#: Shuffle the dataset

field metrics_key: str = 'validation_loss'#: Metrics used for scheduling and saving best model

field use_ema: bool = True#: Use exponential moving average on weights for val/test

field ema_decay: float = 0.99#: Exponential moving average decay

field ema_use_num_updates: bool = True#: Use number of updates for EMA decay

field report_init_validation: bool = True#: Report the validation error for just initialized model

field early_stopping_patiences: dict = {'validation_loss': 50}#: Stop early if a metric value stopped decreasing for n epochs

field early_stopping_lower_bounds: dict = {'LR': 1e-05}#: Stop early if a metric value is lower than the given value

field loss_coeffs: LossCoeff [Optional]#: Loss coefficients

field metrics_components: list [Optional]#: Metrics components

field optimizer_name: str = 'Adam'#: Optimizer name

field optimizer_amsgrad: bool = True#: Use AMSGrad variant of Adam

field lr_scheduler_name: str = 'ReduceLROnPlateau'#: Learning rate scheduler name

field lr_scheduler_patience: int = 100#: Patience for learning rate scheduler

field lr_scheduler_factor: float = 0.5#: Factor for learning rate scheduler

field per_species_rescale_shifts_trainable: bool = False#: Whether the shifts are trainable. Defaults to False.

field per_species_rescale_scales_trainable: bool = False#: Whether the scales are trainable. Defaults to False.

field per_species_rescale_shifts: float | list[float] | Literal['dataset_per_atom_total_energy_mean', 'dataset_per_species_total_energy_mean'] = 'dataset_per_atom_total_energy_mean'#: The value can be a constant float value, an array for each species, or a string. If float values are prpvided , they must be in the same energy units as the training data

field per_species_rescale_scales: float | list[float] | Literal['dataset_forces_absmax', 'dataset_per_atom_total_energy_std', 'dataset_per_species_total_energy_std', 'dataset_per_species_forces_rms'] = 'dataset_per_species_forces_rms'#: The value can be a constant float value, an array for each species, or a string. If float values are prpvided , they must be in the same energy units as the training data