From a8aac8d7b9667571dbf762bdfa9086a56e98ae5c Mon Sep 17 00:00:00 2001
From: Olcay Taner YILDIZ <olcay.yildiz@ozyegin.edu.tr>
Date: Mon, 14 Sep 2020 23:20:38 +0300
Subject: [PATCH] Started repository translation.

---
 .../contents.xcworkspacedata                  |   2 +-
 Package.swift                                 |   5 +-
 Sources/WordToVec/Iteration.swift             | 124 +++++++++
 Sources/WordToVec/NeuralNetwork.swift         | 239 ++++++++++++++++++
 Sources/WordToVec/Vocabulary.swift            | 186 ++++++++++++++
 Sources/WordToVec/VocabularyWord.swift        | 113 +++++++++
 Sources/WordToVec/WordToVec.swift             |   3 -
 Sources/WordToVec/WordToVecParameter.swift    | 154 +++++++++++
 Tests/WordToVecTests/NeuralNetworkTest.swift  |  44 ++++
 9 files changed, 863 insertions(+), 7 deletions(-)
 create mode 100644 Sources/WordToVec/Iteration.swift
 create mode 100644 Sources/WordToVec/NeuralNetwork.swift
 create mode 100644 Sources/WordToVec/Vocabulary.swift
 create mode 100644 Sources/WordToVec/VocabularyWord.swift
 delete mode 100644 Sources/WordToVec/WordToVec.swift
 create mode 100644 Sources/WordToVec/WordToVecParameter.swift
 create mode 100644 Tests/WordToVecTests/NeuralNetworkTest.swift
diff --git a/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata b/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata
index 706eede..919434a 100644
--- a/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata
+++ b/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata
@@ -2,6 +2,6 @@
 <Workspace
    version = "1.0">
    <FileRef
-      location = "container:../..">
+      location = "self:">
    </FileRef>
 </Workspace>
diff --git a/Package.swift b/Package.swift
index a49b218..0de7ab0 100644
--- a/Package.swift
+++ b/Package.swift
@@ -12,15 +12,14 @@ let package = Package(
             targets: ["WordToVec"]),
     ],
     dependencies: [
-        // Dependencies declare other packages that this package depends on.
-        // .package(url: /* package url */, from: "1.0.0"),
+        .package(name: "Corpus", url: "https://github.com/StarlangSoftware/Corpus-Swift.git", .exact("1.0.2")),
     ],
     targets: [
         // Targets are the basic building blocks of a package. A target can define a module or a test suite.
         // Targets can depend on other targets in this package, and on products in packages which this package depends on.
         .target(
             name: "WordToVec",
-            dependencies: []),
+            dependencies: ["Corpus"]),
         .testTarget(
             name: "WordToVecTests",
             dependencies: ["WordToVec"]),
diff --git a/Sources/WordToVec/Iteration.swift b/Sources/WordToVec/Iteration.swift
new file mode 100644
index 0000000..f8bbe69
--- /dev/null
+++ b/Sources/WordToVec/Iteration.swift
@@ -0,0 +1,124 @@
+//
+//  File.swift
+//  
+//
+//  Created by Olcay Taner YILDIZ on 14.09.2020.
+//
+
+import Foundation
+import Corpus
+
+public class Iteration{
+    
+    private var __wordCount: Int = 0
+    private var __lastWordCount: Int = 0
+    private var __wordCountActual: Int = 0
+    private var __iterationCount: Int = 0
+    private var __sentencePosition: Int = 0
+    private var __sentenceIndex: Int = 0
+    private var __startingAlpha: Double
+    private var __alpha: Double
+    private var __corpus: Corpus
+    private var __wordToVecParameter: WordToVecParameter
+
+    /**
+    Constructor for the Iteration class. Get corpus and parameter as input, sets the corresponding
+    parameters.
+
+    - Parameters:
+        - corpus : Corpus used to train word vectors using Word2Vec algorithm.
+        - wordToVecParameter : Parameters of the Word2Vec algorithm.
+    */
+    public init(corpus: Corpus, wordToVecParameter: WordToVecParameter){
+        self.__corpus = corpus
+        self.__wordToVecParameter = wordToVecParameter
+        self.__startingAlpha = wordToVecParameter.getAlpha()
+        self.__alpha = wordToVecParameter.getAlpha()
+    }
+
+    /**
+    Accessor for the alpha attribute.
+
+    - Returns: Alpha attribute.
+    */
+    public func getAlpha() -> Double{
+        return self.__alpha
+    }
+
+    /**
+    Accessor for the iterationCount attribute.
+
+    - Returns: IterationCount attribute.
+    */
+    public func getIterationCount() -> Int{
+        return self.__iterationCount
+    }
+
+    /**
+    Accessor for the sentenceIndex attribute.
+
+    - Returns: SentenceIndex attribute
+    */
+    public func getSentenceIndex() -> Int{
+        return self.__sentenceIndex
+    }
+
+    /**
+    Accessor for the sentencePosition attribute.
+
+    - Returns: SentencePosition attribute
+    */
+    public func getSentencePosition() -> Int{
+        return self.__sentencePosition
+    }
+
+    /**
+    Updates the alpha parameter after 10000 words has been processed.
+    */
+    public func alphaUpdate(){
+        if self.__wordCount - self.__lastWordCount > 10000{
+            self.__wordCountActual += self.__wordCount - self.__lastWordCount
+            self.__lastWordCount = self.__wordCount
+            self.__alpha = self.__startingAlpha * (1.0 - Double(self.__wordCountActual) /
+                                                   (Double(self.__wordToVecParameter.getNumberOfIterations()) *
+                                                    Double(self.__corpus.numberOfWords()) + 1.0))
+            if self.__alpha < self.__startingAlpha * 0.0001{
+                self.__alpha = self.__startingAlpha * 0.0001
+            }
+        }
+    }
+
+    /**
+    Updates sentencePosition, sentenceIndex (if needed) and returns the current sentence processed. If one sentence
+    is finished, the position shows the beginning of the next sentence and sentenceIndex is incremented. If the
+    current sentence is the last sentence, the system shuffles the sentences and returns the first sentence.
+
+    PARAMETERS
+    ----------
+    currentSentence : Sentence
+        Current sentence processed.
+
+    RETURNS
+    -------
+    Sentence
+        If current sentence is not changed, currentSentence; if changed the next sentence; if next sentence is
+        the last sentence; shuffles the corpus and returns the first sentence.
+    */
+    public func sentenceUpdate(currentSentence: Sentence) -> Sentence{
+        self.__sentencePosition = self.__sentencePosition + 1
+        if self.__sentencePosition >= currentSentence.wordCount(){
+            self.__wordCount += currentSentence.wordCount()
+            self.__sentenceIndex = self.__sentenceIndex + 1
+            self.__sentencePosition = 0
+            if self.__sentenceIndex == self.__corpus.sentenceCount(){
+                self.__iterationCount = self.__iterationCount + 1
+                self.__wordCount = 0
+                self.__lastWordCount = 0
+                self.__sentenceIndex = 0
+                self.__corpus.shuffleSentences(seed: 1)
+            }
+            return self.__corpus.getSentence(index: self.__sentenceIndex)
+        }
+        return currentSentence
+    }
+}
diff --git a/Sources/WordToVec/NeuralNetwork.swift b/Sources/WordToVec/NeuralNetwork.swift
new file mode 100644
index 0000000..eb8ff47
--- /dev/null
+++ b/Sources/WordToVec/NeuralNetwork.swift
@@ -0,0 +1,239 @@
+//
+//  File.swift
+//  
+//
+//  Created by Olcay Taner YILDIZ on 14.09.2020.
+//
+
+import Foundation
+import Math
+import Corpus
+import Dictionary
+
+class NeuralNetwork{
+    
+    private var __wordVectors: Matrix
+    private var __wordVectorUpdate: Matrix
+    private var __vocabulary: Vocabulary
+    private var __parameter: WordToVecParameter
+    private var __corpus: Corpus
+    private var __expTable: [Double] = []
+
+    private static var EXP_TABLE_SIZE = 1000
+    private static var MAX_EXP = 6
+
+    /**
+    Constructor for the NeuralNetwork class. Gets corpus and network parameters as input and sets the
+    corresponding parameters first. After that, initializes the network with random weights between -0.5 and 0.5.
+    Constructs vector update matrix and prepares the exp table.
+
+    - Parameters:
+        - corpus : Corpus used to train word vectors using Word2Vec algorithm.
+        - parameter : Parameters of the Word2Vec algorithm.
+    */
+    public init(corpus: Corpus, parameter: WordToVecParameter){
+        self.__vocabulary = Vocabulary(corpus: corpus)
+        self.__parameter = parameter
+        self.__corpus = corpus
+        self.__wordVectors = Matrix(row: self.__vocabulary.size(), col: self.__parameter.getLayerSize(), min: -0.5, max: 0.5)
+        self.__wordVectorUpdate = Matrix(row: self.__vocabulary.size(), col: self.__parameter.getLayerSize())
+        self.__prepareExpTable()
+    }
+
+    /**
+    Constructs the fast exponentiation table. Instead of taking exponent at each time, the algorithm will lookup
+    the table.
+    */
+    public func __prepareExpTable(){
+        self.__expTable = Array(repeating: 0.0, count: NeuralNetwork.EXP_TABLE_SIZE + 1)
+        for i in 0..<NeuralNetwork.EXP_TABLE_SIZE{
+            self.__expTable[i] = exp((Double(i) / Double(NeuralNetwork.EXP_TABLE_SIZE) * 2.0 - 1.0) * Double(NeuralNetwork.MAX_EXP))
+            self.__expTable[i] = self.__expTable[i] / (self.__expTable[i] + 1)
+        }
+    }
+
+    /**
+    Main method for training the Word2Vec algorithm. Depending on the training parameter, CBox or SkipGram algorithm
+    is applied.
+
+    - Returns: Dictionary of word vectors.
+    */
+    public func train() -> VectorizedDictionary{
+        let result : VectorizedDictionary = VectorizedDictionary()
+        if self.__parameter.isCbow(){
+            self.__trainCbow()
+        } else {
+            self.__trainSkipGram()
+        }
+        for i in 0..<self.__vocabulary.size(){
+            result.addWord(word: VectorizedWord(name: self.__vocabulary.getWord(index: i).getName(), vector: self.__wordVectors.getRowVector(row: i)))
+        }
+        return result
+    }
+
+    /**
+    Calculates G value in the Word2Vec algorithm.
+
+    - Parameters:
+        - f : F value.
+        - alpha : Learning rate alpha.
+        - label : Label of the instance.
+
+    - Returns: Calculated G value.
+    */
+    public func __calculateG(f: Double, alpha: Double, label: Double) -> Double{
+        if f > Double(NeuralNetwork.MAX_EXP){
+            return (label - 1) * alpha
+        } else if f < -Double(NeuralNetwork.MAX_EXP){
+            return label * alpha
+        } else {
+            return (label - self.__expTable[Int((f + Double(NeuralNetwork.MAX_EXP)) *
+                Double(NeuralNetwork.EXP_TABLE_SIZE / NeuralNetwork.MAX_EXP / 2))]) * alpha
+        }
+    }
+
+    /**
+    Main method for training the CBow version of Word2Vec algorithm.
+    */
+    public func __trainCbow(){
+        let iteration = Iteration(corpus: self.__corpus, wordToVecParameter: self.__parameter)
+        var currentSentence : Sentence = self.__corpus.getSentence(index: iteration.getSentenceIndex())
+        let outputs = Vector(size: self.__parameter.getLayerSize(), x: 0.0)
+        let outputUpdate = Vector(size: self.__parameter.getLayerSize(), x: 0)
+        self.__corpus.shuffleSentences(seed: 1)
+        while iteration.getIterationCount() < self.__parameter.getNumberOfIterations(){
+            iteration.alphaUpdate()
+            let wordIndex = self.__vocabulary.getPosition(word: currentSentence.getWord(index: iteration.getSentencePosition()))
+            let currentWord = self.__vocabulary.getWord(index: wordIndex)
+            outputs.clear()
+            outputUpdate.clear()
+            let b = Int.random(in: 0..<self.__parameter.getWindow())
+            var cw : Int = 0
+            for a in b..<self.__parameter.getWindow() * 2 + 1 - b{
+                let c = iteration.getSentencePosition() - self.__parameter.getWindow() + a
+                if a != self.__parameter.getWindow() && currentSentence.safeIndex(index: c){
+                    let lastWordIndex = self.__vocabulary.getPosition(word: currentSentence.getWord(index: c))
+                    outputs.addVector(v: self.__wordVectors.getRowVector(row: lastWordIndex))
+                    cw = cw + 1
+                }
+            }
+            if cw > 0{
+                outputs.divide(value: Double(cw))
+                if self.__parameter.isHierarchicalSoftMax(){
+                    for d in 0..<currentWord.getCodeLength(){
+                        let l2 = (currentWord as VocabularyWord).getPoint(index: d)
+                        var f : Double = outputs.dotProduct(v: self.__wordVectorUpdate.getRowVector(row: l2))
+                        if f <= -Double(NeuralNetwork.MAX_EXP) || f >= Double(NeuralNetwork.MAX_EXP){
+                            continue
+                        } else {
+                            f = self.__expTable[Int((f + Double(NeuralNetwork.MAX_EXP)) *
+                                                    Double(NeuralNetwork.EXP_TABLE_SIZE / NeuralNetwork.MAX_EXP / 2))]
+                        }
+                        let g = (1.0 - Double(currentWord.getCode(index: d)) - f) * iteration.getAlpha()
+                        outputUpdate.addVector(v: self.__wordVectorUpdate.getRowVector(row: l2).product(value: g))
+                        self.__wordVectorUpdate.addRowVector(rowNo: l2, v: outputs.product(value: g))
+                    }
+                } else {
+                    var target : Int
+                    var label : Int
+                    for d in 0..<self.__parameter.getNegativeSamplingSize() + 1{
+                        if d == 0{
+                            target = wordIndex
+                            label = 1
+                        } else {
+                            target = self.__vocabulary.getTableValue(index: Int.random(in: 0..<self.__vocabulary.getTableSize()))
+                            if target == 0{
+                                target = Int.random(in: 0..<self.__vocabulary.size() - 1) + 1
+                            }
+                            if target == wordIndex{
+                                continue
+                            }
+                            label = 0
+                        }
+                        let l2 = target
+                        let f = outputs.dotProduct(v: self.__wordVectorUpdate.getRowVector(row: l2))
+                        let g = self.__calculateG(f: f, alpha: iteration.getAlpha(), label: Double(label))
+                        outputUpdate.addVector(v: self.__wordVectorUpdate.getRowVector(row: l2).product(value: g))
+                        self.__wordVectorUpdate.addRowVector(rowNo: l2, v: outputs.product(value: g))
+                    }
+                }
+                for a in b..<self.__parameter.getWindow() * 2 + 1 - b{
+                    let c = iteration.getSentencePosition() - self.__parameter.getWindow() + a
+                    if a != self.__parameter.getWindow() && currentSentence.safeIndex(index: c){
+                        let lastWordIndex = self.__vocabulary.getPosition(word: currentSentence.getWord(index: c))
+                        self.__wordVectors.addRowVector(rowNo: lastWordIndex, v: outputUpdate)
+                    }
+                }
+            }
+            currentSentence = iteration.sentenceUpdate(currentSentence: currentSentence)
+        }
+    }
+
+    /**
+    Main method for training the SkipGram version of Word2Vec algorithm.
+    */
+    public func __trainSkipGram(){
+        let iteration = Iteration(corpus: self.__corpus, wordToVecParameter: self.__parameter)
+        var currentSentence : Sentence = self.__corpus.getSentence(index: iteration.getSentenceIndex())
+        let outputs = Vector(size: self.__parameter.getLayerSize(), x: 0.0)
+        let outputUpdate = Vector(size: self.__parameter.getLayerSize(), x: 0)
+        self.__corpus.shuffleSentences(seed: 1)
+        while iteration.getIterationCount() < self.__parameter.getNumberOfIterations(){
+            iteration.alphaUpdate()
+            let wordIndex = self.__vocabulary.getPosition(word: currentSentence.getWord(index: iteration.getSentencePosition()))
+            let currentWord = self.__vocabulary.getWord(index: wordIndex)
+            outputs.clear()
+            outputUpdate.clear()
+            let b = Int.random(in: 0..<self.__parameter.getWindow())
+            for a in b..<self.__parameter.getWindow() * 2 + 1 - b{
+                let c = iteration.getSentencePosition() - self.__parameter.getWindow() + a
+                if a != self.__parameter.getWindow() && currentSentence.safeIndex(index: c){
+                    let lastWordIndex = self.__vocabulary.getPosition(word: currentSentence.getWord(index: c))
+                    let l1 = lastWordIndex
+                    outputUpdate.clear()
+                    if self.__parameter.isHierarchicalSoftMax(){
+                        for d in 0..<currentWord.getCodeLength(){
+                            let l2 = currentWord.getPoint(index: d)
+                            var f : Double = self.__wordVectors.getRowVector(row: l1).dotProduct(v: self.__wordVectorUpdate.getRowVector(row: l2))
+                            if f <= -Double(NeuralNetwork.MAX_EXP) || f >= Double(NeuralNetwork.MAX_EXP){
+                                continue
+                            } else {
+                                f = self.__expTable[Int((f + Double(NeuralNetwork.MAX_EXP)) *
+                                                        Double(NeuralNetwork.EXP_TABLE_SIZE / NeuralNetwork.MAX_EXP / 2))]
+                            }
+                            let g = (1.0 - Double(currentWord.getCode(index: d)) - f) * iteration.getAlpha()
+                            outputUpdate.addVector(v: self.__wordVectorUpdate.getRowVector(row: l2).product(value: g))
+                            self.__wordVectorUpdate.addRowVector(rowNo: l2, v: self.__wordVectors.getRowVector(row: l1).product(value: g))
+                        }
+                    } else {
+                        var target : Int
+                        var label : Int
+                        for d in 0..<self.__parameter.getNegativeSamplingSize() + 1{
+                            if d == 0{
+                                target = wordIndex
+                                label = 1
+                            } else {
+                                target = self.__vocabulary.getTableValue(index: Int.random(in: 0..<self.__vocabulary.getTableSize()))
+                                if target == 0{
+                                    target = Int.random(in: 0..<self.__vocabulary.size() - 1) + 1
+                                }
+                                if target == wordIndex{
+                                    continue
+                                }
+                                label = 0
+                            }
+                            let l2 = target
+                            let f = self.__wordVectors.getRowVector(row: l1).dotProduct(v: self.__wordVectorUpdate.getRowVector(row: l2))
+                            let g = self.__calculateG(f: Double(f), alpha: iteration.getAlpha(), label: Double(label))
+                            outputUpdate.addVector(v: self.__wordVectorUpdate.getRowVector(row: l2).product(value: g))
+                            self.__wordVectorUpdate.addRowVector(rowNo: l2, v: self.__wordVectors.getRowVector(row: l1).product(value: g))
+                        }
+                    }
+                    self.__wordVectors.addRowVector(rowNo: l1, v: outputUpdate)
+                }
+            }
+            currentSentence = iteration.sentenceUpdate(currentSentence: currentSentence)
+        }
+    }
+
+}
diff --git a/Sources/WordToVec/Vocabulary.swift b/Sources/WordToVec/Vocabulary.swift
new file mode 100644
index 0000000..420d81d
--- /dev/null
+++ b/Sources/WordToVec/Vocabulary.swift
@@ -0,0 +1,186 @@
+//
+//  File.swift
+//  
+//
+//  Created by Olcay Taner YILDIZ on 14.09.2020.
+//
+
+import Foundation
+import Corpus
+import Dictionary
+
+public class Vocabulary{
+    
+    private var __vocabulary: [VocabularyWord] = []
+    private var __table: [Int] = []
+
+    /**
+    Constructor for the Vocabulary class. For each distinct word in the corpus, a VocabularyWord
+    instance is created. After that, words are sorted according to their occurrences. Unigram table is constructed,
+    where after Huffman tree is created based on the number of occurrences of the words.
+
+    - Parameter corpus : Corpus used to train word vectors using Word2Vec algorithm.
+    */
+    public init(corpus: Corpus){
+        let wordList = corpus.getWordList()
+        for word in wordList{
+            self.__vocabulary.append(VocabularyWord(name: word.getName(), count: corpus.getCount(word: word)))
+        }
+        self.__createUniGramTable()
+        self.__constructHuffmanTree()
+    }
+
+    /**
+    Returns number of words in the vocabulary.
+
+    - Returns: Number of words in the vocabulary.
+    */
+    public func size() -> Int{
+        return self.__vocabulary.count
+    }
+
+    /**
+    Searches a word and returns the position of that word in the vocabulary. Search is done using binary search.
+
+    - Parameter word : Word to be searched.
+
+    - Returns: Position of the word searched.
+    */
+    public func getPosition(word: Word) -> Int{
+        var lo : Int = 0
+        var hi : Int = self.__vocabulary.count
+        while lo < hi{
+            let mid : Int = (lo + hi) / 2
+            if self.__vocabulary[mid].getName() < word.getName(){
+                lo = mid + 1
+            } else {
+                hi = mid
+            }
+        }
+        return lo
+    }
+
+    /**
+    Returns the word at a given index.
+
+    - Parameter index : Index of the word.
+
+    - Returns: The word at a given index.
+    */
+    public func getWord(index: Int) -> VocabularyWord{
+        return self.__vocabulary[index]
+    }
+
+    /**
+    Constructs Huffman Tree based on the number of occurences of the words.
+    */
+    public func __constructHuffmanTree(){
+        var count : [Int] = Array(repeating: 0, count: self.__vocabulary.count * 2 + 1)
+        var code : [Int] = Array(repeating: 0, count: VocabularyWord.MAX_CODE_LENGTH)
+        var point : [Int] = Array(repeating: 0, count: VocabularyWord.MAX_CODE_LENGTH)
+        var binary : [Int] = Array(repeating: 0, count: self.__vocabulary.count * 2 + 1)
+        var parentNode : [Int] = Array(repeating: 0, count: self.__vocabulary.count * 2 + 1)
+        for a in 0..<self.__vocabulary.count{
+            count[a] = self.__vocabulary[a].getCount()
+        }
+        for a in self.__vocabulary.count..<self.__vocabulary.count * 2{
+            count[a] = 1000000000
+        }
+        var pos1 : Int = self.__vocabulary.count - 1
+        var pos2 : Int = self.__vocabulary.count
+        var min1i : Int
+        var min2i : Int
+        for a in 0..<self.__vocabulary.count - 1{
+            if pos1 >= 0{
+                if count[pos1] < count[pos2] {
+                    min1i = pos1
+                    pos1 = pos1 - 1
+                } else {
+                    min1i = pos2
+                    pos2 = pos2 + 1
+                }
+            } else {
+                min1i = pos2
+                pos2 = pos2 + 1
+            }
+            if pos1 >= 0{
+                if count[pos1] < count[pos2]{
+                    min2i = pos1
+                    pos1 = pos1 - 1
+                } else {
+                    min2i = pos2
+                    pos2 = pos2 + 1
+                }
+            } else {
+                min2i = pos2
+                pos2 = pos2 + 1
+            }
+            count[self.__vocabulary.count + a] = count[min1i] + count[min2i]
+            parentNode[min1i] = self.__vocabulary.count + a
+            parentNode[min2i] = self.__vocabulary.count + a
+            binary[min2i] = 1
+        }
+        for a in 0..<self.__vocabulary.count{
+            var b : Int = a
+            var i : Int = 0
+            while true{
+                code[i] = binary[b]
+                point[i] = b
+                i = i + 1
+                b = parentNode[b]
+                if b == self.__vocabulary.count * 2 - 2{
+                    break
+                }
+            }
+            self.__vocabulary[a].setCodeLength(codeLength: i)
+            self.__vocabulary[a].setPoint(index: 0, value: self.__vocabulary.count - 2)
+            for b in 0..<i{
+                self.__vocabulary[a].setCode(index: i - b - 1, value: code[b])
+                self.__vocabulary[a].setPoint(index: i - b, value: point[b] - self.__vocabulary.count)
+            }
+        }
+    }
+
+    /**
+    Constructs the unigram table based on the number of occurences of the words.
+    */
+    public func __createUniGramTable(){
+        var total : Double = 0
+        self.__table = Array(repeating: 0, count: 2 * self.__vocabulary.count)
+        for vocabularyWord in self.__vocabulary{
+            total += pow(Double(vocabularyWord.getCount()), 0.75)
+        }
+        var i : Int = 0
+        var d1 : Double = pow(Double(self.__vocabulary[i].getCount()), 0.75) / total
+        for a in 0..<2 * self.__vocabulary.count{
+            self.__table[a] = i
+            if Double(a) / (2 * Double(self.__vocabulary.count) + 0.0) > d1{
+                i = i + 1
+                d1 += pow(Double(self.__vocabulary[i].getCount()), 0.75) / total
+            }
+            if i >= self.__vocabulary.count{
+                i = self.__vocabulary.count - 1
+            }
+        }
+    }
+
+    /**
+    Accessor for the unigram table.
+
+    - Parameter index : Index of the word.
+
+    - Returns: Unigram table value at a given index.
+    */
+    public func getTableValue(index: Int) -> Int{
+        return self.__table[index]
+    }
+
+    /**
+    Returns size of the unigram table.
+
+    - Returns: Size of the unigram table.
+    */
+    public func getTableSize() -> Int{
+        return self.__table.count
+    }
+}
diff --git a/Sources/WordToVec/VocabularyWord.swift b/Sources/WordToVec/VocabularyWord.swift
new file mode 100644
index 0000000..0793678
--- /dev/null
+++ b/Sources/WordToVec/VocabularyWord.swift
@@ -0,0 +1,113 @@
+//
+//  File.swift
+//  
+//
+//  Created by Olcay Taner YILDIZ on 14.09.2020.
+//
+
+import Foundation
+import Dictionary
+
+public class VocabularyWord : Word{
+    
+    private var __count: Int
+    private var __code: [Int]
+    private var __poInt: [Int]
+    private var __codeLength: Int
+    public static var MAX_CODE_LENGTH = 40
+
+    /**
+    Constructor for a VocabularyWord. The constructor gets name and count values and sets the corresponding
+    attributes. It also initializes the code and poInt arrays for this word.
+
+    - Parameters:
+        - name : Lemma of the word
+        - count : Number of occurrences of this word in the corpus
+    */
+    public init(name: String, count: Int){
+        self.__count = count
+        self.__code = Array(repeating: 0, count: VocabularyWord.MAX_CODE_LENGTH)
+        self.__poInt = Array(repeating: 0, count: VocabularyWord.MAX_CODE_LENGTH)
+        self.__codeLength = 0
+        super.init(name: name)
+    }
+
+    public static func < (lhs: VocabularyWord, rhs: VocabularyWord) -> Bool {
+        return lhs.__count < rhs.__count
+    }
+
+    public static func == (lhs: VocabularyWord, rhs: VocabularyWord) -> Bool {
+        return lhs.__count == rhs.__count
+    }
+
+    /**
+    Accessor for the count attribute.
+
+    - Returns: Number of occurrences of this word.
+    */
+    public func getCount() -> Int{
+        return self.__count
+    }
+
+    /**
+    Mutator for codeLength attribute.
+
+    - Parameter codeLength : New value for the codeLength.
+    */
+    public func setCodeLength(codeLength: Int){
+        self.__codeLength = codeLength
+    }
+
+    /**
+    Mutator for code attribute.
+
+    - Parameters:
+        - index : Index of the code
+        - value : New value for that indexed element of code.
+    */
+    public func setCode(index: Int, value: Int){
+        self.__code[index] = value
+    }
+
+    /**
+    Mutator for poInt attribute.
+
+    - Parameters:
+        - index : Index of the poInt
+        - value : New value for that indexed element of poInt.
+    */
+    public func setPoint(index: Int, value: Int){
+        self.__poInt[index] = value
+    }
+
+    /**
+    Accessor for the codeLength attribute.
+
+    - Returns: Length of the Huffman code for this word.
+    */
+    public func getCodeLength() -> Int{
+        return self.__codeLength
+    }
+
+    /**
+    Accessor for poInt attribute.
+
+    - Parameter index : Index of the poInt.
+
+    - Returns: Value for that indexed element of poInt.
+    */
+    public func getPoint(index: Int) -> Int{
+        return self.__poInt[index]
+    }
+
+    /**
+    Accessor for code attribute.
+
+    - Parameter index : Index of the code.
+
+    - Returns: Value for that indexed element of code.
+    */
+    public func getCode(index: Int) -> Int{
+        return self.__code[index]
+    }
+}
diff --git a/Sources/WordToVec/WordToVec.swift b/Sources/WordToVec/WordToVec.swift
deleted file mode 100644
index 2e1b385..0000000
--- a/Sources/WordToVec/WordToVec.swift
+++ /dev/null
@@ -1,3 +0,0 @@
-struct WordToVec {
-    var text = "Hello, World!"
-}
diff --git a/Sources/WordToVec/WordToVecParameter.swift b/Sources/WordToVec/WordToVecParameter.swift
new file mode 100644
index 0000000..b3e248a
--- /dev/null
+++ b/Sources/WordToVec/WordToVecParameter.swift
@@ -0,0 +1,154 @@
+//
+//  File.swift
+//  
+//
+//  Created by Olcay Taner YILDIZ on 14.09.2020.
+//
+
+import Foundation
+
+public class WordToVecParameter{
+    
+    private var __layerSize: Int = 100
+    private var __cbow: Bool = true
+    private var __alpha: Double = 0.025
+    private var __window: Int = 5
+    private var __hierarchicalSoftMax: Bool = false
+    private var __negativeSamplingSize: Int = 5
+    private var __numberOfIterations: Int = 3
+
+    /**
+    Empty constructor for Word2Vec parameter
+    */
+    public init(){
+    }
+
+    /**
+    Accessor for layerSize attribute.
+
+    - Returns: Size of the word vectors.
+    */
+    public func getLayerSize() -> Int{
+        return self.__layerSize
+    }
+
+    /**
+    Accessor for CBow attribute.
+
+    - Returns: True is CBow will be applied, false otherwise.
+    */
+    public func isCbow() -> Bool{
+        return self.__cbow
+    }
+
+    /**
+    Accessor for the alpha attribute.
+
+    - Returns: Current learning rate alpha.
+    */
+    public func getAlpha() -> Double{
+        return self.__alpha
+    }
+
+    /**
+    Accessor for the window size attribute.
+
+    - Returns: Current window size.
+    */
+    public func getWindow() -> Int{
+        return self.__window
+    }
+
+    /**
+    Accessor for the hierarchicalSoftMax attribute.
+
+    - Returns: If hierarchical softmax will be applied, returns true; false otherwise.
+    */
+    public func isHierarchicalSoftMax() -> Bool{
+        return self.__hierarchicalSoftMax
+    }
+
+    /**
+    Accessor for the negativeSamplingSize attribute.
+
+    RETURNS
+    -------
+    Int
+        Number of negative samples that will be withdrawn.
+    */
+    public func getNegativeSamplingSize() -> Int{
+        return self.__negativeSamplingSize
+    }
+
+    /**
+    Accessor for the numberOfIterations attribute.
+
+    - Returns: Number of epochs to train the network.
+    */
+    public func getNumberOfIterations() -> Int{
+        return self.__numberOfIterations
+    }
+
+    /**
+    Mutator for the layerSize attribute.
+
+    - Parameter layerSize : New size of the word vectors.
+    */
+    public func setLayerSize(layerSize: Int){
+        self.__layerSize = layerSize
+    }
+
+    /**
+    Mutator for cBow attribute
+
+    - Parameter cbow : True if CBow applied; false if SkipGram applied.
+    */
+    public func setCbow(cbow: Bool){
+        self.__cbow = cbow
+    }
+
+    /**
+    Mutator for alpha attribute
+
+    - Parameter alpha : New learning rate.
+    */
+    public func setAlpha(alpha: Double){
+        self.__alpha = alpha
+    }
+
+    /**
+    Mutator for the window size attribute.
+
+    - Parameter window : New window size.
+    */
+    public func setWindow(window: Int){
+        self.__window = window
+    }
+
+    /**
+    Mutator for the hierarchicalSoftMax attribute.
+
+    - Parameter hierarchicalSoftMax : True is hierarchical softMax applied; false otherwise.
+    */
+    public func setHierarchialSoftMax(hierarchicalSoftMax: Bool){
+        self.__hierarchicalSoftMax = hierarchicalSoftMax
+    }
+
+    /**
+    Mutator for the negativeSamplingSize attribute.
+
+    - Parameter negativeSamplingSize : New number of negative instances that will be withdrawn.
+    */
+    public func setNegativeSamplingSize(negativeSamplingSize: Int){
+        self.__negativeSamplingSize = negativeSamplingSize
+    }
+
+    /**
+    Mutator for the numberOfIterations attribute.
+
+    - Parameter numberOfIterations : New number of iterations.
+    */
+    public func setNumberOfIterations(numberOfIterations: Int){
+        self.__numberOfIterations = numberOfIterations
+    }
+}
diff --git a/Tests/WordToVecTests/NeuralNetworkTest.swift b/Tests/WordToVecTests/NeuralNetworkTest.swift
new file mode 100644
index 0000000..51b65b1
--- /dev/null
+++ b/Tests/WordToVecTests/NeuralNetworkTest.swift
@@ -0,0 +1,44 @@
+import XCTest
+import Corpus
+import Dictionary
+@testable import WordToVec
+
+final class NeuralNetworkTest: XCTestCase {
+    private var turkish: Corpus = Corpus()
+    private var english: Corpus = Corpus()
+    
+    override func setUp() {
+        self.english = Corpus(fileName: "english-similarity-dataset.txt");
+        self.turkish = Corpus(fileName: "turkish-similarity-dataset.txt");
+    }
+
+    private func train(corpus: Corpus, cBow: Bool) -> VectorizedDictionary{
+        let parameter = WordToVecParameter()
+        parameter.setCbow(cbow: cBow)
+        let neuralNetwork = NeuralNetwork(corpus: corpus, parameter: parameter)
+        return neuralNetwork.train()
+    }
+    
+    public func testTrainEnglishCBow(){
+        let dictionary = self.train(corpus: self.english, cBow: true)
+    }
+
+    public func testTrainEnglishSkipGram(){
+        let dictionary = self.train(corpus: self.english, cBow: false)
+    }
+
+    public func testTrainTurkishCBow(){
+        let dictionary = self.train(corpus: self.turkish, cBow: true)
+    }
+
+    public func testTrainTurkishSkipGram(){
+        let dictionary = self.train(corpus: self.turkish, cBow: false)
+    }
+
+    static var allTests = [
+        ("testExample1", testTrainEnglishCBow),
+        ("testExample2", testTrainEnglishSkipGram),
+        ("testExample3", testTrainTurkishCBow),
+        ("testExample4", testTrainTurkishSkipGram),
+    ]
+}