diff --git a/docs/docs/ClusterServingGuide/OtherFrameworkUsers/README.md b/docs/docs/ClusterServingGuide/OtherFrameworkUsers/README.md index b2696dfe233..6dcb9aac1e7 100644 --- a/docs/docs/ClusterServingGuide/OtherFrameworkUsers/README.md +++ b/docs/docs/ClusterServingGuide/OtherFrameworkUsers/README.md @@ -15,8 +15,9 @@ It is recommended to use savedModel format, Frozen Graph is also supported. * Checkpoint to Frozen Graph: ### Data To transform following data type to Numpy Ndarray -* TFDataSet: - +* TFDataSet: [l08c08_forecasting_with_lstm.py](https://github.com/intel-analytics/analytics-zoo/tree/master/docs/docs/ClusterServingGuide/OtherFrameworkUsers/l08c08_forecasting_with_lstm.py) +* Tokenizer: [l10c03_nlp_constructing_text_generation_model.py](https://github.com/intel-analytics/analytics-zoo/tree/master/docs/docs/ClusterServingGuide/OtherFrameworkUsers/l10c03_nlp_constructing_text_generation_model.py) +* ImageDataGenerator: [transfer_learning.py](https://github.com/intel-analytics/analytics-zoo/tree/master/docs/docs/ClusterServingGuide/OtherFrameworkUsers/transfer_learning.py) ## Pytorch -## OpenVINO \ No newline at end of file +## OpenVINO diff --git a/docs/docs/ClusterServingGuide/OtherFrameworkUsers/l08c08_forecasting_with_lstm.py b/docs/docs/ClusterServingGuide/OtherFrameworkUsers/l08c08_forecasting_with_lstm.py new file mode 100644 index 00000000000..612017b8252 --- /dev/null +++ b/docs/docs/ClusterServingGuide/OtherFrameworkUsers/l08c08_forecasting_with_lstm.py @@ -0,0 +1,75 @@ +# Related url: https://github.com/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l08c08_forecasting_with_lstm.ipynb +# Forecasting with LSTM +import numpy as np +import tensorflow as tf +import tensorflow.keras as keras + +# Get the trend with time and slope +def trend(time, slope=0): + return slope * time + + +# Get a specific pattern, which can be customerized +def seasonal_pattern(season_time): + return np.where(season_time < 0.4, + np.cos(season_time * 2 * np.pi), + 1 / np.exp(3 * season_time)) + +# Repeats the same pattern at each period +def seasonality(time, period, amplitude=1, phase=0): + season_time = ((time + phase) % period) / period + return amplitude * seasonal_pattern(season_time) + +# Obtain a random white noise +def white_noise(time, noise_level=1, seed=None): + rnd = np.random.RandomState(seed) + return rnd.randn(len(time)) * noise_level + +# Convert the series to dataset form +def ndarray_to_dataset(ndarray): + return tf.data.Dataset.from_tensor_slices(ndarray) + +# Convert the series to dataset with some modifications +def sequential_window_dataset(series, window_size): + series = tf.expand_dims(series, axis=-1) + ds = ndarray_to_dataset(series) + ds = ds.window(window_size + 1, shift=window_size, drop_remainder=True) + ds = ds.flat_map(lambda window: window.batch(window_size + 1)) + ds = ds.map(lambda window: (window[:-1], window[1:])) + return ds.batch(1).prefetch(1) + +# Convert dataset form to ndarray +def dataset_to_ndarray(dataset): + array=list(dataset.as_numpy_iterator()) + return np.ndarray(array) + +# Generate some raw test data +time_range=4 * 365 + 1 +time = np.arange(time_range) + +slope = 0.05 +baseline = 10 +amplitude = 40 +series = baseline + trend(time, slope) + seasonality(time, period=365, amplitude=amplitude) + +noise_level = 5 +noise = white_noise(time, noise_level, seed=42) + +series += noise + +# Modify the raw test data with DataSet form +tf.random.set_seed(42) +np.random.seed(42) + +window_size = 30 +test_set = sequential_window_dataset(series, window_size) + +# Convert the DataSet form data to ndarry +#pre_in=series[np.newaxis, :, np.newaxis] +test_array=dataset_to_ndarray(test_set) + +# Load the saved LSTM model +model=tf.keras.models.load_model("path/to/model") + +# Predict with LSTM model +rnn_forecast_nd = model.predict(test_array) diff --git a/docs/docs/ClusterServingGuide/OtherFrameworkUsers/l10c03_nlp_constructing_text_generation_model.py b/docs/docs/ClusterServingGuide/OtherFrameworkUsers/l10c03_nlp_constructing_text_generation_model.py new file mode 100644 index 00000000000..3d27b9a09c4 --- /dev/null +++ b/docs/docs/ClusterServingGuide/OtherFrameworkUsers/l10c03_nlp_constructing_text_generation_model.py @@ -0,0 +1,75 @@ +# Related url: https://github.com/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l10c03_nlp_constructing_text_generation_model.ipynb +# Generating some new lyrics from the trained model + +import tensorflow as tf +from tensorflow.keras.preprocessing.text import Tokenizer +from tensorflow.keras.preprocessing.sequence import pad_sequences + +# Other imports for processing data +import string +import numpy as np +import pandas as pd + +# DATA PREPROCESSING +# First to get the dataset of the Song Lyrics dataset on Kaggle by: +# !wget --no-check-certificate \ +# https://drive.google.com/uc?id=1LiJFZd41ofrWoBtW-pMYsfz1w8Ny0Bj8 \ +# -O /tmp/songdata.csv + +# Then to generate a tokenizer with the songdata.csv +def tokenize_corpus(corpus, num_words=-1): + # Fit a Tokenizer on the corpus + if num_words > -1: + tokenizer = Tokenizer(num_words=num_words) + else: + tokenizer = Tokenizer() + tokenizer.fit_on_texts(corpus) + return tokenizer + +def create_lyrics_corpus(dataset, field): + # Remove all other punctuation + dataset[field] = dataset[field].str.replace('[{}]'.format(string.punctuation), '') + # Make it lowercase + dataset[field] = dataset[field].str.lower() + # Make it one long string to split by line + lyrics = dataset[field].str.cat() + corpus = lyrics.split('\n') + # Remove any trailing whitespace + for l in range(len(corpus)): + corpus[l] = corpus[l].rstrip() + # Remove any empty lines + corpus = [l for l in corpus if l != ''] + + return corpus + +# Read the dataset from csv +dataset = pd.read_csv('/tmp/songdata.csv', dtype=str) +# Create the corpus using the 'text' column containing lyrics +corpus = create_lyrics_corpus(dataset, 'text') +# Tokenize the corpus +tokenizer = tokenize_corpus(corpus) + +# Get the uniform input length (max_sequence_len) of the model +max_sequence_len=0 +for line in corpus: + token_list = tokenizer.texts_to_sequences([line])[0] + max_sequence_len=max(max_sequence_len,len(token_list)) + +# Load the saved model which is trained on the Song Lyrics dataset +model=tf.keras.models.load_model("path/to/model") + +# Generate new lyrics with some "seed text" +seed_text = "im feeling chills" # seed text can be customerized +next_words = 100 # this defined the length of the new lyrics + +for _ in range(next_words): + token_list = tokenizer.texts_to_sequences([seed_text])[0] # convert the seed text to ndarray + token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre') # pad the input for equal length + predicted = np.argmax(model.predict(token_list), axis=-1) # get the predicted word index + output_word = "" + for word, index in tokenizer.word_index.items(): + if index == predicted: + output_word = word + break + seed_text += " " + output_word # add the predicted word to the seed text +print(seed_text) diff --git a/docs/docs/ClusterServingGuide/OtherFrameworkUsers/transfer_learning.py b/docs/docs/ClusterServingGuide/OtherFrameworkUsers/transfer_learning.py new file mode 100644 index 00000000000..9777ea70f65 --- /dev/null +++ b/docs/docs/ClusterServingGuide/OtherFrameworkUsers/transfer_learning.py @@ -0,0 +1,40 @@ +# Related url: https://github.com/tensorflow/docs/blob/master/site/en/r1/tutorials/images/transfer_learning.ipynb +# Categorize image to cat or dog +import os +import tensorflow.compat.v1 as tf +from tensorflow import keras + +# Obtain data from url:"https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip" +zip_file = tf.keras.utils.get_file(origin="https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip", + fname="cats_and_dogs_filtered.zip", extract=True) + +# Find the directory of validation set +base_dir, _ = os.path.splitext(zip_file) +test_dir = os.path.join(base_dir, 'validation') + +# Set images size to 160x160x3 +image_size = 160 + +# Rescale all images by 1./255 and apply image augmentation +test_datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255) + +# Flow images using generator to the test_generator +test_generator = test_datagen.flow_from_directory( + test_dir, + target_size=(image_size, image_size), + batch_size=1, + class_mode='binary') + +# Convert the next data of ImageDataGenerator to ndarray +def convert_to_ndarray(ImageGenerator): + return ImageGenerator.next()[0] + +# Load model from its path +model=tf.keras.models.load_model("path/to/model") + +# Convert each image in test_generator to ndarray and predict with model +max_length=test_generator.__len__() +for i in range(max_length): # number of image to predict can be altered + test_input=convert_to_ndarray(test_generator) + prediction=model.predict(test_input) +