Skip to content

Commit

Permalink
fix travis
Browse files Browse the repository at this point in the history
  • Loading branch information
jrxk committed Nov 9, 2020
1 parent 2082208 commit 8758d09
Show file tree
Hide file tree
Showing 12 changed files with 1,142 additions and 1,113 deletions.
22 changes: 11 additions & 11 deletions examples/text_classification/config_classifier.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
name = "bert_classifier"
hidden_size = 768
clas_strategy = "cls_time"
dropout = 0.1
num_classes = 2

# This hyperparams is used in bert_with_hypertuning_main.py example
hyperparams = {
"optimizer.warmup_steps": {"start": 10000, "end": 20000, "dtype": int},
"optimizer.static_lr": {"start": 1e-3, "end": 1e-2, "dtype": float}
}
name = "bert_classifier"
hidden_size = 768
clas_strategy = "cls_time"
dropout = 0.1
num_classes = 2

# This hyperparams is used in bert_with_hypertuning_main.py example
hyperparams = {
"optimizer.warmup_steps": {"start": 10000, "end": 20000, "dtype": int},
"optimizer.static_lr": {"start": 1e-3, "end": 1e-2, "dtype": float}
}
136 changes: 68 additions & 68 deletions examples/text_classification/config_data.py
Original file line number Diff line number Diff line change
@@ -1,68 +1,68 @@
pickle_data_dir = "data/IMDB"
max_seq_length = 64
num_classes = 2
num_train_data = 25000

# used for bert executor example
max_batch_tokens = 128

train_batch_size = 32
max_train_epoch = 5
display_steps = 50 # Print training loss every display_steps; -1 to disable

# tbx config
tbx_logging_steps = 5 # log the metrics for tbX visualization
tbx_log_dir = "runs/"
exp_number = 1 # experiment number

eval_steps = 100 # Eval on the dev set every eval_steps; -1 to disable
# Proportion of training to perform linear learning rate warmup for.
# E.g., 0.1 = 10% of training.
warmup_proportion = 0.1
eval_batch_size = 8
test_batch_size = 8

feature_types = {
# Reading features from pickled data file.
# E.g., Reading feature "input_ids" as dtype `int64`;
# "FixedLenFeature" indicates its length is fixed for all data instances;
# and the sequence length is limited by `max_seq_length`.
"input_ids": ["int64", "stacked_tensor", max_seq_length],
"input_mask": ["int64", "stacked_tensor", max_seq_length],
"segment_ids": ["int64", "stacked_tensor", max_seq_length],
"label_ids": ["int64", "stacked_tensor"]
}

train_hparam = {
"allow_smaller_final_batch": False,
"batch_size": train_batch_size,
"dataset": {
"data_name": "data",
"feature_types": feature_types,
"files": "{}/train.pkl".format(pickle_data_dir)
},
"shuffle": True,
"shuffle_buffer_size": None
}

eval_hparam = {
"allow_smaller_final_batch": True,
"batch_size": eval_batch_size,
"dataset": {
"data_name": "data",
"feature_types": feature_types,
"files": "{}/eval.pkl".format(pickle_data_dir)
},
"shuffle": False
}

test_hparam = {
"allow_smaller_final_batch": True,
"batch_size": test_batch_size,
"dataset": {
"data_name": "data",
"feature_types": feature_types,
"files": "{}/predict.pkl".format(pickle_data_dir)
},
"shuffle": False
}
pickle_data_dir = "data/IMDB"
max_seq_length = 64
num_classes = 2
num_train_data = 25000

# used for bert executor example
max_batch_tokens = 128

train_batch_size = 32
max_train_epoch = 5
display_steps = 50 # Print training loss every display_steps; -1 to disable

# tbx config
tbx_logging_steps = 5 # log the metrics for tbX visualization
tbx_log_dir = "runs/"
exp_number = 1 # experiment number

eval_steps = 100 # Eval on the dev set every eval_steps; -1 to disable
# Proportion of training to perform linear learning rate warmup for.
# E.g., 0.1 = 10% of training.
warmup_proportion = 0.1
eval_batch_size = 8
test_batch_size = 8

feature_types = {
# Reading features from pickled data file.
# E.g., Reading feature "input_ids" as dtype `int64`;
# "FixedLenFeature" indicates its length is fixed for all data instances;
# and the sequence length is limited by `max_seq_length`.
"input_ids": ["int64", "stacked_tensor", max_seq_length],
"input_mask": ["int64", "stacked_tensor", max_seq_length],
"segment_ids": ["int64", "stacked_tensor", max_seq_length],
"label_ids": ["int64", "stacked_tensor"]
}

train_hparam = {
"allow_smaller_final_batch": False,
"batch_size": train_batch_size,
"dataset": {
"data_name": "data",
"feature_types": feature_types,
"files": "{}/train.pkl".format(pickle_data_dir)
},
"shuffle": True,
"shuffle_buffer_size": None
}

eval_hparam = {
"allow_smaller_final_batch": True,
"batch_size": eval_batch_size,
"dataset": {
"data_name": "data",
"feature_types": feature_types,
"files": "{}/eval.pkl".format(pickle_data_dir)
},
"shuffle": False
}

test_hparam = {
"allow_smaller_final_batch": True,
"batch_size": test_batch_size,
"dataset": {
"data_name": "data",
"feature_types": feature_types,
"files": "{}/predict.pkl".format(pickle_data_dir)
},
"shuffle": False
}
52 changes: 34 additions & 18 deletions examples/text_classification/download_imdb.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,34 @@
import os
import sys

def main(arguments):
import subprocess
if not os.path.exists("data/IMDB_raw"):
subprocess.run("mkdir data/IMDB_raw", shell=True)
# pylint: disable=line-too-long
subprocess.run(
'wget -P data/IMDB_raw/ https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz',
shell=True)
subprocess.run(
'tar xzvf data/IMDB_raw/aclImdb_v1.tar.gz -C data/IMDB_raw/ && rm data/IMDB_raw/aclImdb_v1.tar.gz',
shell=True)


if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))
# Copyright 2020 The Forte Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import os
import sys
import subprocess


def main():
if not os.path.exists("data/IMDB_raw"):
subprocess.run("mkdir data/IMDB_raw", shell=True, check=True)
# pylint: disable=line-too-long
subprocess.run(
'wget -P data/IMDB_raw/ https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz',
shell=True, check=True)
subprocess.run(
'tar xzvf data/IMDB_raw/aclImdb_v1.tar.gz -C data/IMDB_raw/ && rm data/IMDB_raw/aclImdb_v1.tar.gz',
shell=True, check=True)


if __name__ == '__main__':
sys.exit(main())
59 changes: 31 additions & 28 deletions examples/text_classification/main.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,31 @@
# Copyright 2020 The Forte Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

from forte.models.imdb_text_classifier.model import IMDBClassifier
import config_data
import config_classifier

def main(argv=None):
model = IMDBClassifier(config_data, config_classifier)
if not os.path.isfile("data/IMDB/train.pkl"):
model.prepare_data("data/IMDB")
model.run(do_train=True, do_eval=True, do_test=False)

if __name__ == "__main__":
main()
# Copyright 2020 The Forte Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import os

from forte.models.imdb_text_classifier.model import IMDBClassifier
import config_data
import config_classifier


def main():
model = IMDBClassifier(config_data, config_classifier)
if not os.path.isfile("data/IMDB/train.pkl"):
model.prepare_data("data/IMDB")
model.run(do_train=True, do_eval=True, do_test=False)


if __name__ == "__main__":
main()
Loading

0 comments on commit 8758d09

Please sign in to comment.