Skip to content

Commit

Permalink
Add the argument tensorboard_freq to control sampling ratio during …
Browse files Browse the repository at this point in the history
…training. (#996)
  • Loading branch information
shishaochen authored Aug 19, 2021
1 parent 4f54de0 commit cf3e7d9
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 3 deletions.
5 changes: 3 additions & 2 deletions deepmd/train/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ def _init_param(self, jdata):
self.profiling_file = tr_data.get('profiling_file', 'timeline.json')
self.tensorboard = self.run_opt.is_chief and tr_data.get('tensorboard', False)
self.tensorboard_log_dir = tr_data.get('tensorboard_log_dir', 'log')
self.tensorboard_freq = tr_data.get('tensorboard_freq', 1)
# self.sys_probs = tr_data['sys_probs']
# self.auto_prob_style = tr_data['auto_prob']
self.useBN = False
Expand Down Expand Up @@ -475,11 +476,11 @@ def train (self, train_data = None, valid_data=None) :
train_feed_dict = self.get_feed_dict(train_batch, is_training=True)
# use tensorboard to visualize the training of deepmd-kit
# it will takes some extra execution time to generate the tensorboard data
if self.tensorboard :
if self.tensorboard and (cur_batch % self.tensorboard_freq == 0):
summary, _ = run_sess(self.sess, [summary_merged_op, self.train_op], feed_dict=train_feed_dict,
options=prf_options, run_metadata=prf_run_metadata)
tb_train_writer.add_summary(summary, cur_batch)
else :
else:
run_sess(self.sess, [self.train_op], feed_dict=train_feed_dict,
options=prf_options, run_metadata=prf_run_metadata)
if self.timing_in_training: toc = time.time()
Expand Down
2 changes: 2 additions & 0 deletions deepmd/utils/argcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,7 @@ def training_args(): # ! modified by Ziyao: data configuration isolated.
doc_profiling_file = 'Output file for profiling.'
doc_tensorboard = 'Enable tensorboard'
doc_tensorboard_log_dir = 'The log directory of tensorboard outputs'
doc_tensorboard_freq = 'The frequency of writing tensorboard events.'

arg_training_data = training_data_args()
arg_validation_data = validation_data_args()
Expand All @@ -591,6 +592,7 @@ def training_args(): # ! modified by Ziyao: data configuration isolated.
Argument("profiling_file", str, optional=True, default='timeline.json', doc=doc_profiling_file),
Argument("tensorboard", bool, optional=True, default=False, doc=doc_tensorboard),
Argument("tensorboard_log_dir", str, optional=True, default='log', doc=doc_tensorboard_log_dir),
Argument("tensorboard_freq", int, optional=True, default=1, doc=doc_tensorboard_freq),
]

doc_training = 'The training options.'
Expand Down
7 changes: 7 additions & 0 deletions doc/train-input-auto.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1485,3 +1485,10 @@ training:
The log directory of tensorboard outputs

.. _`training/tensorboard_freq`:

tensorboard_freq:
| type: ``int``, optional, default: ``1``
| argument path: ``training/tensorboard_freq``
The frequency of writing tensorboard events.
3 changes: 2 additions & 1 deletion doc/train/tensorboard.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ subsection will enable the tensorboard data analysis. eg. **water_se_a.json**.
"time_training":true,
"tensorboard": true,
"tensorboard_log_dir":"log",
"tensorboard_freq": 1000,
"profiling": false,
"profiling_file":"timeline.json",
"_comment": "that's all"
Expand All @@ -53,7 +54,7 @@ Once you have event files, run TensorBoard and provide the log directory. This
should print that TensorBoard has started. Next, connect to http://tensorboard_server_ip:6006.

TensorBoard requires a logdir to read logs from. For info on configuring TensorBoard, run tensorboard --help.
One can easily change the log name with "tensorboard_log_dir".
One can easily change the log name with "tensorboard_log_dir" and the sampling frequency with "tensorboard_freq".

```bash
tensorboard --logdir path/to/logs
Expand Down

0 comments on commit cf3e7d9

Please sign in to comment.