docs(pt): examples for new dpa2 model (#4138)

small: 3 layers; w three-body; wo g2 attn; medium: 6 layers; w three-body; w g2 attn; large: 12 layers; w three-body; w g2 attn;  ## Summary by CodeRabbit - **New Features** - Introduced comprehensive JSON configuration files for the DPA2 model, enhancing setup for molecular simulations. - Added detailed README documentation outlining model configurations and input files, aiding user selection based on precision and efficiency needs. - Added parameters for three-body interactions to improve model accuracy. - Configured learning rate settings and loss function preferences for better training dynamics. - **Bug Fixes** - Expanded test coverage by including multiple input file variations for the DPA2 example, ensuring more robust testing. - **Documentation** - Updated training example reference for clarity and included links to README for input variations.
deepmodeling · Sep 20, 2024 · 83abc7b · 83abc7b
1 parent e1b6aec
commit 83abc7b
Show file tree

Hide file tree

Showing 6 changed files with 260 additions and 9 deletions.
diff --git a/doc/model/dpa2.md b/doc/model/dpa2.md
@@ -6,7 +6,7 @@
 
 The DPA-2 model implementation. See https://arxiv.org/abs/2312.15492 for more details.
 
-Training example: `examples/water/dpa2/input_torch.json`.
+Training example: `examples/water/dpa2/input_torch_medium.json`, see [README](../../examples/water/dpa2/README.md) for inputs in different levels.
 
 ## Data format
 

diff --git a/examples/water/dpa2/README.md b/examples/water/dpa2/README.md
@@ -0,0 +1,15 @@
+## Inputs for DPA-2 model
+
+This directory contains the input files for training the DPA-2 model (currently supporting PyTorch backend only). Depending on your precision/efficiency requirements, we provide three different levels of model complexity:
+
+- `input_torch_small.json`: Our smallest DPA-2 model, optimized for speed.
+- `input_torch_medium.json` (Recommended): Our well-performing DPA-2 model, balancing efficiency and precision. This is a good starting point for most users.
+- `input_torch_large.json`: Our most complex model with the highest precision, suitable for very intricate data structures.
+
+For detailed differences in their configurations, please refer to the table below:
+
+| Input                     | Repformer layers | Three-body embedding in Repinit | Pair-wise attention in Repformer | Tuned sub-structures in [#4089](https://github.com/deepmodeling/deepmd-kit/pull/4089) | Description                                                                  |
+| ------------------------- | ---------------- | ------------------------------- | -------------------------------- | ------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------- |
+| `input_torch_small.json`  | 3                | ✓                               | ✗                                | ✓                                                                                     | Smallest DPA-2 model, optimized for speed.                                   |
+| `input_torch_medium.json` | 6                | ✓                               | ✓                                | ✓                                                                                     | Recommended well-performing DPA-2 model, balancing efficiency and precision. |
+| `input_torch_large.json`  | 12               | ✓                               | ✓                                | ✓                                                                                     | Most complex model with the highest precision.                               |
diff --git a/examples/water/dpa2/input_torch.json → examples/water/dpa2/input_torch_large.json b/examples/water/dpa2/input_torch.json → examples/water/dpa2/input_torch_large.json
@@ -9,16 +9,20 @@
       "type": "dpa2",
       "repinit": {
         "tebd_dim": 8,
-        "rcut": 9.0,
-        "rcut_smth": 8.0,
+        "rcut": 6.0,
+        "rcut_smth": 0.5,
         "nsel": 120,
         "neuron": [
           25,
           50,
           100
         ],
         "axis_neuron": 12,
-        "activation_function": "tanh"
+        "activation_function": "tanh",
+        "three_body_sel": 40,
+        "three_body_rcut": 4.0,
+        "three_body_rcut_smth": 3.5,
+        "use_three_body": true
       },
       "repformer": {
         "rcut": 4.0,
@@ -36,10 +40,16 @@
         "update_g1_has_conv": true,
         "update_g1_has_grrg": true,
         "update_g1_has_drrd": true,
-        "update_g1_has_attn": true,
-        "update_g2_has_g1g1": true,
+        "update_g1_has_attn": false,
+        "update_g2_has_g1g1": false,
         "update_g2_has_attn": true,
-        "attn2_has_gate": true
+        "update_style": "res_residual",
+        "update_residual": 0.01,
+        "update_residual_init": "norm",
+        "attn2_has_gate": true,
+        "use_sqrt_nnei": true,
+        "g1_out_conv": true,
+        "g1_out_mlp": true
       },
       "add_tebd_to_repinit_out": false
     },
@@ -58,7 +68,7 @@
   "learning_rate": {
     "type": "exp",
     "decay_steps": 5000,
-    "start_lr": 0.0002,
+    "start_lr": 0.001,
     "stop_lr": 3.51e-08,
     "_comment": "that's all"
   },

diff --git a/examples/water/dpa2/input_torch_medium.json b/examples/water/dpa2/input_torch_medium.json
@@ -0,0 +1,112 @@
+{
+  "_comment": "that's all",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "dpa2",
+      "repinit": {
+        "tebd_dim": 8,
+        "rcut": 6.0,
+        "rcut_smth": 0.5,
+        "nsel": 120,
+        "neuron": [
+          25,
+          50,
+          100
+        ],
+        "axis_neuron": 12,
+        "activation_function": "tanh",
+        "three_body_sel": 40,
+        "three_body_rcut": 4.0,
+        "three_body_rcut_smth": 3.5,
+        "use_three_body": true
+      },
+      "repformer": {
+        "rcut": 4.0,
+        "rcut_smth": 3.5,
+        "nsel": 40,
+        "nlayers": 6,
+        "g1_dim": 128,
+        "g2_dim": 32,
+        "attn2_hidden": 32,
+        "attn2_nhead": 4,
+        "attn1_hidden": 128,
+        "attn1_nhead": 4,
+        "axis_neuron": 4,
+        "update_h2": false,
+        "update_g1_has_conv": true,
+        "update_g1_has_grrg": true,
+        "update_g1_has_drrd": true,
+        "update_g1_has_attn": false,
+        "update_g2_has_g1g1": false,
+        "update_g2_has_attn": true,
+        "update_style": "res_residual",
+        "update_residual": 0.01,
+        "update_residual_init": "norm",
+        "attn2_has_gate": true,
+        "use_sqrt_nnei": true,
+        "g1_out_conv": true,
+        "g1_out_mlp": true
+      },
+      "add_tebd_to_repinit_out": false
+    },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-08,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
+  "training": {
+    "stat_file": "./dpa2.hdf5",
+    "training_data": {
+      "systems": [
+        "../data/data_0",
+        "../data/data_1",
+        "../data/data_2"
+      ],
+      "batch_size": 1,
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1000000,
+    "warmup_steps": 0,
+    "gradient_max_norm": 5.0,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 2000,
+    "_comment": "that's all"
+  }
+}
diff --git a/examples/water/dpa2/input_torch_small.json b/examples/water/dpa2/input_torch_small.json
@@ -0,0 +1,112 @@
+{
+  "_comment": "that's all",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "dpa2",
+      "repinit": {
+        "tebd_dim": 8,
+        "rcut": 6.0,
+        "rcut_smth": 0.5,
+        "nsel": 120,
+        "neuron": [
+          25,
+          50,
+          100
+        ],
+        "axis_neuron": 12,
+        "activation_function": "tanh",
+        "three_body_sel": 40,
+        "three_body_rcut": 4.0,
+        "three_body_rcut_smth": 3.5,
+        "use_three_body": true
+      },
+      "repformer": {
+        "rcut": 4.0,
+        "rcut_smth": 3.5,
+        "nsel": 40,
+        "nlayers": 3,
+        "g1_dim": 128,
+        "g2_dim": 32,
+        "attn2_hidden": 32,
+        "attn2_nhead": 4,
+        "attn1_hidden": 128,
+        "attn1_nhead": 4,
+        "axis_neuron": 4,
+        "update_h2": false,
+        "update_g1_has_conv": true,
+        "update_g1_has_grrg": true,
+        "update_g1_has_drrd": true,
+        "update_g1_has_attn": false,
+        "update_g2_has_g1g1": false,
+        "update_g2_has_attn": false,
+        "update_style": "res_residual",
+        "update_residual": 0.01,
+        "update_residual_init": "norm",
+        "attn2_has_gate": true,
+        "use_sqrt_nnei": true,
+        "g1_out_conv": true,
+        "g1_out_mlp": true
+      },
+      "add_tebd_to_repinit_out": false
+    },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-08,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
+  "training": {
+    "stat_file": "./dpa2.hdf5",
+    "training_data": {
+      "systems": [
+        "../data/data_0",
+        "../data/data_1",
+        "../data/data_2"
+      ],
+      "batch_size": 1,
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1000000,
+    "warmup_steps": 0,
+    "gradient_max_norm": 5.0,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 2000,
+    "_comment": "that's all"
+  }
+}
diff --git a/source/tests/common/test_examples.py b/source/tests/common/test_examples.py
@@ -52,7 +52,9 @@
     p_examples / "dprc" / "generalized_force" / "input.json",
     p_examples / "water" / "se_e2_a" / "input_torch.json",
     p_examples / "water" / "se_atten" / "input_torch.json",
-    p_examples / "water" / "dpa2" / "input_torch.json",
+    p_examples / "water" / "dpa2" / "input_torch_small.json",
+    p_examples / "water" / "dpa2" / "input_torch_medium.json",
+    p_examples / "water" / "dpa2" / "input_torch_large.json",
     p_examples / "property" / "train" / "input_torch.json",
     p_examples / "water" / "se_e3_tebd" / "input_torch.json",
 )