Skip to content

Commit

Permalink
Added wordCount method and tests for TreeBank.
Browse files Browse the repository at this point in the history
  • Loading branch information
Olcay Taner YILDIZ committed Apr 21, 2022
1 parent 6024fd9 commit 1cd3678
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 4 deletions.
42 changes: 42 additions & 0 deletions Sources/ParseTree/ParseNode.swift
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,48 @@ open class ParseNode : Equatable{
self.data = data
}

/**
* Recursive function to count the number of words in the subtree rooted at this node.
- Parameters:
- excludeStopWords If true, stop words are not counted.
- Returns: Number of words in the subtree rooted at this node.
*/
public func wordCount(excludeStopWords: Bool) -> Int{
var sum = 0
if children?.count == 0{
if !excludeStopWords{
sum = 1
} else {
if Word.isPunctuationSymbol(surfaceForm: (data?.getName())!) || (data?.getName())!.contains("*") || data?.getName().lowercased() == "at" ||
data?.getName().lowercased() == "the" || data?.getName().lowercased() == "to" || data?.getName().lowercased() == "a" ||
data?.getName().lowercased() == "an" || data?.getName().lowercased() == "not" || data?.getName().lowercased() == "is" ||
data?.getName().lowercased() == "was" || data?.getName().lowercased() == "were" || data?.getName().lowercased() == "have" ||
data?.getName().lowercased() == "had" || data?.getName().lowercased() == "has" || data?.getName().lowercased() == "by" ||
data?.getName().lowercased() == "on" || data?.getName().lowercased() == "off" || data?.getName().lowercased() == "'s" ||
data?.getName().lowercased() == "n't" || data?.getName().lowercased() == "can" || data?.getName().lowercased() == "could" ||
data?.getName().lowercased() == "may" || data?.getName().lowercased() == "might" || data?.getName().lowercased() == "will" ||
data?.getName().lowercased() == "would" || data?.getName().lowercased() == "as" || data?.getName().lowercased() == "with" ||
data?.getName().lowercased() == "for" || data?.getName().lowercased() == "will" || data?.getName().lowercased() == "would" ||
data?.getName().lowercased() == "than" || data?.getName().lowercased() == "$" || data?.getName().lowercased() == "and" ||
data?.getName().lowercased() == "or" || data?.getName().lowercased() == "of" || data?.getName().lowercased() == "are" ||
data?.getName().lowercased() == "be" || data?.getName().lowercased() == "been" || data?.getName().lowercased() == "do" ||
data?.getName().lowercased() == "few" || data?.getName().lowercased() == "there" || data?.getName().lowercased() == "up" ||
data?.getName().lowercased() == "down" || data?.getName().lowercased() == "in" || data?.getName().lowercased() == "'re"{
sum = 0

} else {
sum = 1
}
}
} else {
sum = 0
}
for aChild in children!{
sum = sum + aChild.wordCount(excludeStopWords: excludeStopWords)
}
return sum
}

/**
* Construct recursively the constituent span list of a subtree rooted at this node.
- Parameters:
Expand Down
10 changes: 10 additions & 0 deletions Sources/ParseTree/ParseTree.swift
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,14 @@ open class ParseTree{
public func toSentence() -> String{
return (root?.toSentence().trimmingCharacters(in: .whitespacesAndNewlines))!
}

/**
* Calls recursive function to count the number of words in the tree.
- Parameters:
- excludeStopWords If true, stop words are not counted.
- Returns: Number of words in the tree.
*/
public func wordCount(excludeStopWords: Bool) -> Int{
return (root?.wordCount(excludeStopWords: excludeStopWords))!
}
}
20 changes: 16 additions & 4 deletions Sources/ParseTree/TreeBank.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ open class TreeBank {
do {
let listOfFiles = try fileManager.contentsOfDirectory(atPath: folder)
for file in listOfFiles {
let thisSourceFile = URL(fileURLWithPath: #file)
let thisDirectory = thisSourceFile.deletingLastPathComponent()
let thisDirectory = URL(fileURLWithPath: folder)
let url = thisDirectory.appendingPathComponent(file)
let parseTree = ParseTree(url: url)
if parseTree.getRoot() != nil{
Expand All @@ -52,8 +51,7 @@ open class TreeBank {
let listOfFiles = try fileManager.contentsOfDirectory(atPath: folder)
for file in listOfFiles {
if file.contains(pattern){
let thisSourceFile = URL(fileURLWithPath: #file)
let thisDirectory = thisSourceFile.deletingLastPathComponent()
let thisDirectory = URL(fileURLWithPath: folder)
let url = thisDirectory.appendingPathComponent(file)
let parseTree = ParseTree(url: url)
if parseTree.getRoot() != nil{
Expand Down Expand Up @@ -82,6 +80,20 @@ open class TreeBank {
return parseTrees.count
}

/**
* Calls recursive function to count the number of words in the treeBank.
- Parameters:
- excludeStopWords If true, stop words are not counted.
- Returns: Number of words in the treeBank.
*/
public func wordCount(excludeStopWords: Bool) -> Int{
var count = 0
for tree in parseTrees{
count = count + tree.wordCount(excludeStopWords: excludeStopWords)
}
return count
}

/**
* Accessor for a single ParseTree.
- Parameters:
Expand Down
27 changes: 27 additions & 0 deletions Tests/ParseTreeTests/TreeBankTest.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import XCTest
@testable import ParseTree

final class TreeBankTest: XCTestCase {

var treeBank1 : TreeBank = TreeBank()

override func setUp() {
let thisSourceFile = URL(fileURLWithPath: #file)
let thisDirectory = thisSourceFile.deletingLastPathComponent()
treeBank1 = TreeBank(folder: thisDirectory.appendingPathComponent("trees").path)
}

func testSize() {
XCTAssertEqual(5, treeBank1.size())
}

func testWordCount() {
XCTAssertEqual(30, treeBank1.wordCount(excludeStopWords: true))
XCTAssertEqual(52, treeBank1.wordCount(excludeStopWords: false))
}

static var allTests = [
("testExample1", testSize),
("testExample2", testWordCount),
]
}

0 comments on commit 1cd3678

Please sign in to comment.