-
Notifications
You must be signed in to change notification settings - Fork 2
/
dataLoader.py
48 lines (40 loc) · 1.38 KB
/
dataLoader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import tensorflow as tf
import numpy as np
from tfRecordTools import *
def sampleFunction(inputFeature):
"""
This is a description of the function
Args:
inputFeature - (np.ndarray) This is what the feature is
Returns:
result - (int) This is what is returned
"""
result = 55
return result
def buildReverseWordIndex(dataset):
"""
Convert the index back to words with proper accounting for
the special characters reserved at the beginning of the dictionary
Args:
dataset - (keras.dataset) The dataset to use
Returns:
buildReverseWordIndex - (dict) A dictionary mapping words to an integer index
"""
wordIndex = dataset.get_word_index()
# The first indices are reserved
wordIndex = {k: (v + 3) for k, v in wordIndex.items()}
wordIndex['<PAD>'] = 0
wordIndex['<START>'] = 1
wordIndex['<UNK>'] = 2 # unknown
wordIndex['<UNUSED>'] = 3
return dict((value, key) for (key, value) in wordIndex.items())
def decodeReview(text, reverseWordIndex):
"""
Uses build_reverse_word_index to decode original data format into text
Args:
text - (np.ndarray) The text to decode
reverseWordIndex - (dict) The reverse word index to use
Returns:
decodedReview - (string) The decoded review
"""
return ' '.join([reverseWordIndex.get(i, '?') for i in text])