experiment with distributed strategy

GoogleCloudPlatform · Sep 28, 2021 · eb89841 · eb89841
1 parent 1721db4
commit eb89841
Showing 1 changed file with 8 additions and 5 deletions.
diff --git a/people-and-planet-ai/timeseries-classification/trainer.py b/people-and-planet-ai/timeseries-classification/trainer.py
@@ -192,7 +192,8 @@ def run(
     # For this sample we are using a mirrored distribution strategy,
     # which consists of a single machine with multiple GPUs.
     #   https://blog.tensorflow.org/2020/12/getting-started-with-distributed-tensorflow-on-gcp.html
-    distributed_strategy = tf.distribute.MirroredStrategy()
+    # distributed_strategy = tf.distribute.MirroredStrategy()
+    distributed_strategy = tf.distribute.get_strategy()
 
     # Create the training and evaluation datasets from the TFRecord files.
     logging.info("Creating datasets")
@@ -215,10 +216,12 @@ def run(
     # Train the model.
     logging.info("Training the model")
     model.fit(
-        train_dataset.repeat(),
-        steps_per_epoch=train_steps,
-        validation_data=eval_dataset.repeat(),
-        validation_steps=eval_steps,
+        # train_dataset.repeat(),
+        # steps_per_epoch=train_steps,
+        # validation_data=eval_dataset.repeat(),
+        # validation_steps=eval_steps,
+        train_dataset,
+        validation_data=eval_dataset,
         callbacks=[
             keras.callbacks.TensorBoard(tensorboard_dir, update_freq="batch"),
             keras.callbacks.ModelCheckpoint(