Skip to content

Commit

Permalink
Merge pull request #2803 from AK-ayush/feat-edit-mat
Browse files Browse the repository at this point in the history
[REVIEW] Feature: pairwise edit distance for each string on a given nvstrings object
  • Loading branch information
Keith Kraus authored Sep 28, 2019
2 parents 989190f + ff9eced commit 7f32395
Show file tree
Hide file tree
Showing 24 changed files with 349 additions and 42 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
- PR #2844 Add Series/DataFrame notnull
- PR #2858 Add GTest type list utilities
- PR #2655 CuPy-based Series and Dataframe .values property
- PR #2803 Added `edit_distance_matrix()` function to calculate pairwise edit distance for each string on a given nvstrings object.

## Improvements

Expand Down
1 change: 1 addition & 0 deletions cpp/custrings/category/NVCategory.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <thrust/copy.h>
#include <rmm/rmm.h>
#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.hpp>

#include "nvstrings/NVCategory.h"
#include "nvstrings/NVStrings.h"
Expand Down
1 change: 1 addition & 0 deletions cpp/custrings/strings/NVStrings.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <thrust/unique.h>
#include <rmm/rmm.h>
#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.hpp>

#include "nvstrings/NVStrings.h"
#include "nvstrings/ipc_transfer.h"
Expand Down
1 change: 1 addition & 0 deletions cpp/custrings/strings/NVStringsImpl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <thrust/sort.h>
#include <rmm/rmm.h>
#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.hpp>

#include "nvstrings/NVStrings.h"

Expand Down
1 change: 1 addition & 0 deletions cpp/custrings/strings/array.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <thrust/count.h>
#include <rmm/rmm.h>
#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.hpp>

#include "nvstrings/NVStrings.h"

Expand Down
1 change: 1 addition & 0 deletions cpp/custrings/strings/attrs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <thrust/transform_scan.h>
#include <rmm/rmm.h>
#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.hpp>

#include "nvstrings/NVStrings.h"

Expand Down
1 change: 1 addition & 0 deletions cpp/custrings/strings/combine.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <thrust/for_each.h>
#include <rmm/rmm.h>
#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.hpp>

#include "nvstrings/NVStrings.h"

Expand Down
1 change: 1 addition & 0 deletions cpp/custrings/strings/convert.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <thrust/count.h>
#include <rmm/rmm.h>
#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.hpp>

#include "nvstrings/NVStrings.h"

Expand Down
1 change: 1 addition & 0 deletions cpp/custrings/strings/count.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <thrust/count.h>
#include <rmm/rmm.h>
#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.hpp>

#include "nvstrings/NVStrings.h"

Expand Down
1 change: 1 addition & 0 deletions cpp/custrings/strings/datetime.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <thrust/count.h>
#include <rmm/rmm.h>
#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.hpp>

#include "nvstrings/NVStrings.h"

Expand Down
1 change: 1 addition & 0 deletions cpp/custrings/strings/find.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <thrust/count.h>
#include <rmm/rmm.h>
#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.hpp>

#include "nvstrings/NVStrings.h"

Expand Down
1 change: 1 addition & 0 deletions cpp/custrings/strings/modify.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <thrust/sort.h>
#include <rmm/rmm.h>
#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.hpp>

#include "nvstrings/NVStrings.h"

Expand Down
1 change: 1 addition & 0 deletions cpp/custrings/strings/split.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <thrust/extrema.h>
#include <rmm/rmm.h>
#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.hpp>

#include "nvstrings/NVStrings.h"

Expand Down
1 change: 1 addition & 0 deletions cpp/custrings/strings/strip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <thrust/for_each.h>
#include <rmm/rmm.h>
#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.hpp>

#include "nvstrings/NVStrings.h"

Expand Down
21 changes: 21 additions & 0 deletions cpp/custrings/tests/test_text.cu
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,27 @@ TEST(TestText, EditDistance)
NVStrings::destroy(strs);
}

TEST(TestText, EditDistanceMatrix)
{
std::vector<const char*> hstrs{ "dog", nullptr, "cat", "mouse",
"pup", "", "puppy" };
NVStrings* strs = NVStrings::create_from_array(hstrs.data(),hstrs.size());

thrust::device_vector<unsigned int> results(hstrs.size()*hstrs.size(),0);

NVText::edit_distance_matrix(NVText::levenshtein,*strs,results.data().get());
unsigned int expected[] = { 0,3,3,4,3,3,5,
3,0,3,5,3,0,5,
3,3,0,5,3,3,5,
4,5,5,0,4,5,5,
3,3,3,4,0,3,2,
3,0,3,5,3,0,5,
5,5,5,5,2,5,0};
for( int idx = 0; idx < (int) (hstrs.size()*hstrs.size()); ++idx )
EXPECT_EQ(results[idx],expected[idx]);
NVStrings::destroy(strs);
}

TEST(TestText, NGrams)
{
NVStrings* strs = NVStrings::create_from_array(tstrs.data(),tstrs.size());
Expand Down
1 change: 1 addition & 0 deletions cpp/custrings/text/NVText.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <thrust/reduce.h>
#include <rmm/rmm.h>
#include <rmm/thrust_rmm_allocator.h>
#include <utilities/error_utils.hpp>

#include "nvstrings/NVStrings.h"
#include "nvstrings/NVText.h"
Expand Down
Loading

0 comments on commit 7f32395

Please sign in to comment.