Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add tf input examples of converting TFDataSet, Tokenizer and ImageDataGenerator to ndarray #3111

Merged
merged 4 commits into from
Nov 23, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions docs/docs/ClusterServingGuide/OtherFrameworkUsers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ It is recommended to use savedModel format, Frozen Graph is also supported.
* Checkpoint to Frozen Graph:
### Data
To transform following data type to Numpy Ndarray
* TFDataSet:

* TFDataSet: [l08c08_forecasting_with_lstm.py](https://github.com/intel-analytics/analytics-zoo/tree/master/docs/docs/ClusterServingGuide/OtherFrameworkUsers/l08c08_forecasting_with_lstm.py)
* Tokenizer: [l10c03_nlp_constructing_text_generation_model.py](https://github.com/intel-analytics/analytics-zoo/tree/master/docs/docs/ClusterServingGuide/OtherFrameworkUsers/l10c03_nlp_constructing_text_generation_model.py)
* ImageDataGenerator: [transfer_learning.py](https://github.com/intel-analytics/analytics-zoo/tree/master/docs/docs/ClusterServingGuide/OtherFrameworkUsers/transfer_learning.py)
## Pytorch

## OpenVINO
## OpenVINO
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Related url: https://github.com/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l08c08_forecasting_with_lstm.ipynb
# Forecasting with LSTM
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

# Get the trend with time and slope
def trend(time, slope=0):
return slope * time


# Get a specific pattern, which can be customerized
def seasonal_pattern(season_time):
return np.where(season_time < 0.4,
np.cos(season_time * 2 * np.pi),
1 / np.exp(3 * season_time))

# Repeats the same pattern at each period
def seasonality(time, period, amplitude=1, phase=0):
season_time = ((time + phase) % period) / period
return amplitude * seasonal_pattern(season_time)

# Obtain a random white noise
def white_noise(time, noise_level=1, seed=None):
rnd = np.random.RandomState(seed)
return rnd.randn(len(time)) * noise_level

# Convert the series to dataset form
def ndarray_to_dataset(ndarray):
return tf.data.Dataset.from_tensor_slices(ndarray)

# Convert the series to dataset with some modifications
def sequential_window_dataset(series, window_size):
series = tf.expand_dims(series, axis=-1)
ds = ndarray_to_dataset(series)
ds = ds.window(window_size + 1, shift=window_size, drop_remainder=True)
ds = ds.flat_map(lambda window: window.batch(window_size + 1))
ds = ds.map(lambda window: (window[:-1], window[1:]))
return ds.batch(1).prefetch(1)

# Convert dataset form to ndarray
def dataset_to_ndarray(dataset):
array=list(dataset.as_numpy_iterator())
return np.ndarray(array)

# Generate some raw test data
time_range=4 * 365 + 1
time = np.arange(time_range)

slope = 0.05
baseline = 10
amplitude = 40
series = baseline + trend(time, slope) + seasonality(time, period=365, amplitude=amplitude)

noise_level = 5
noise = white_noise(time, noise_level, seed=42)

series += noise

# Modify the raw test data with DataSet form
tf.random.set_seed(42)
np.random.seed(42)

window_size = 30
test_set = sequential_window_dataset(series, window_size)

# Convert the DataSet form data to ndarry
#pre_in=series[np.newaxis, :, np.newaxis]
test_array=dataset_to_ndarray(test_set)

# Load the saved LSTM model
model=tf.keras.models.load_model("path/to/model")

# Predict with LSTM model
rnn_forecast_nd = model.predict(test_array)
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Related url: https://github.com/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l10c03_nlp_constructing_text_generation_model.ipynb
# Generating some new lyrics from the trained model

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Other imports for processing data
import string
import numpy as np
import pandas as pd

# DATA PREPROCESSING
# First to get the dataset of the Song Lyrics dataset on Kaggle by:
# !wget --no-check-certificate \
# https://drive.google.com/uc?id=1LiJFZd41ofrWoBtW-pMYsfz1w8Ny0Bj8 \
# -O /tmp/songdata.csv

# Then to generate a tokenizer with the songdata.csv
def tokenize_corpus(corpus, num_words=-1):
# Fit a Tokenizer on the corpus
if num_words > -1:
tokenizer = Tokenizer(num_words=num_words)
else:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
return tokenizer

def create_lyrics_corpus(dataset, field):
# Remove all other punctuation
dataset[field] = dataset[field].str.replace('[{}]'.format(string.punctuation), '')
# Make it lowercase
dataset[field] = dataset[field].str.lower()
# Make it one long string to split by line
lyrics = dataset[field].str.cat()
corpus = lyrics.split('\n')
# Remove any trailing whitespace
for l in range(len(corpus)):
corpus[l] = corpus[l].rstrip()
# Remove any empty lines
corpus = [l for l in corpus if l != '']

return corpus

# Read the dataset from csv
dataset = pd.read_csv('/tmp/songdata.csv', dtype=str)
# Create the corpus using the 'text' column containing lyrics
corpus = create_lyrics_corpus(dataset, 'text')
# Tokenize the corpus
tokenizer = tokenize_corpus(corpus)

# Get the uniform input length (max_sequence_len) of the model
max_sequence_len=0
for line in corpus:
token_list = tokenizer.texts_to_sequences([line])[0]
max_sequence_len=max(max_sequence_len,len(token_list))

# Load the saved model which is trained on the Song Lyrics dataset
model=tf.keras.models.load_model("path/to/model")

# Generate new lyrics with some "seed text"
seed_text = "im feeling chills" # seed text can be customerized
next_words = 100 # this defined the length of the new lyrics

for _ in range(next_words):
token_list = tokenizer.texts_to_sequences([seed_text])[0] # convert the seed text to ndarray
token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre') # pad the input for equal length
predicted = np.argmax(model.predict(token_list), axis=-1) # get the predicted word index
output_word = ""
for word, index in tokenizer.word_index.items():
if index == predicted:
output_word = word
break
seed_text += " " + output_word # add the predicted word to the seed text
print(seed_text)
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Related url: https://github.com/tensorflow/docs/blob/master/site/en/r1/tutorials/images/transfer_learning.ipynb
# Categorize image to cat or dog
import os
import tensorflow.compat.v1 as tf
from tensorflow import keras

# Obtain data from url:"https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip"
zip_file = tf.keras.utils.get_file(origin="https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip",
fname="cats_and_dogs_filtered.zip", extract=True)

# Find the directory of validation set
base_dir, _ = os.path.splitext(zip_file)
test_dir = os.path.join(base_dir, 'validation')

# Set images size to 160x160x3
image_size = 160

# Rescale all images by 1./255 and apply image augmentation
test_datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

# Flow images using generator to the test_generator
test_generator = test_datagen.flow_from_directory(
test_dir,
target_size=(image_size, image_size),
batch_size=1,
class_mode='binary')

# Convert the next data of ImageDataGenerator to ndarray
def convertToNarray(ImageGenerator):
return ImageGenerator.next()[0]

# Load model from its path
model=tf.keras.models.load_model("path/to/model")

# Convert each image in test_generator to ndarray and predict with model
max_length=test_generator.__len__()
for i in range(max_length): # number of image to predict can be altered
test_input=convertToNarray(test_generator)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use python style convert_to_ndarray

prediction=model.predict(test_input)