diff --git a/Makefile b/Makefile index c8ca98f..5025e77 100644 --- a/Makefile +++ b/Makefile @@ -20,13 +20,13 @@ ifeq ($(UNAME_S),x86_64) endif EXTENSION = stringtheory -DATA = stringtheory--1.0.1.sql +DATA = stringtheory--1.0.2.sql PGFILEDESC = "stringtheory - tools for testing equality" PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) -REGRESS = equality strstr +REGRESS = equality strstr memory COMPILE.cxx.bc = $(CLANG) -xc++ $(BITCODE_CXXFLAGS) $(PG_CXXFLAGS) $(CPPFLAGS) -emit-llvm -c diff --git a/expected/memory.out b/expected/memory.out new file mode 100644 index 0000000..5fa5360 --- /dev/null +++ b/expected/memory.out @@ -0,0 +1,52 @@ +CREATE TABLE memory_test (i BIGINT, t TEXT); +INSERT INTO memory_test SELECT 1, repeat('a', (2^19)::INTEGER); +INSERT INTO memory_test SELECT 2, repeat('b', (2^20)::INTEGER); +INSERT INTO memory_test SELECT 3, repeat('c', (2^21)::INTEGER); +SELECT i, stringtheory.strstr(t, 'aa') FROM memory_test; + i | strstr +---+-------- + 1 | 0 + 2 | -1 + 3 | -1 +(3 rows) + +SELECT i, stringtheory.strstr(t, 'bb') FROM memory_test; + i | strstr +---+-------- + 1 | -1 + 2 | 0 + 3 | -1 +(3 rows) + +SELECT i, stringtheory.strstr(t, 'cc') FROM memory_test; + i | strstr +---+-------- + 1 | -1 + 2 | -1 + 3 | 0 +(3 rows) + +SELECT i, stringtheory.equals(t, repeat('a', (2^19)::INTEGER)) FROM memory_test; + i | equals +---+-------- + 1 | t + 2 | f + 3 | f +(3 rows) + +SELECT i, stringtheory.equals(t, repeat('b', (2^20)::INTEGER)) FROM memory_test; + i | equals +---+-------- + 1 | f + 2 | t + 3 | f +(3 rows) + +SELECT i, stringtheory.equals(t, repeat('c', (2^21)::INTEGER)) FROM memory_test; + i | equals +---+-------- + 1 | f + 2 | f + 3 | t +(3 rows) + diff --git a/sql/memory.sql b/sql/memory.sql new file mode 100644 index 0000000..6e7be6a --- /dev/null +++ b/sql/memory.sql @@ -0,0 +1,13 @@ +CREATE TABLE memory_test (i BIGINT, t TEXT); + +INSERT INTO memory_test SELECT 1, repeat('a', (2^19)::INTEGER); +INSERT INTO memory_test SELECT 2, repeat('b', (2^20)::INTEGER); +INSERT INTO memory_test SELECT 3, repeat('c', (2^21)::INTEGER); + +SELECT i, stringtheory.strstr(t, 'aa') FROM memory_test; +SELECT i, stringtheory.strstr(t, 'bb') FROM memory_test; +SELECT i, stringtheory.strstr(t, 'cc') FROM memory_test; + +SELECT i, stringtheory.equals(t, repeat('a', (2^19)::INTEGER)) FROM memory_test; +SELECT i, stringtheory.equals(t, repeat('b', (2^20)::INTEGER)) FROM memory_test; +SELECT i, stringtheory.equals(t, repeat('c', (2^21)::INTEGER)) FROM memory_test; diff --git a/src/text.cpp b/src/text.cpp index 7841b2f..98b7220 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -74,16 +74,41 @@ Datum pg_strstr(PG_FUNCTION_ARGS) { PG_RETURN_INT32(-1); } - /* Make a copy of the data to null terminate. */ - char left_term[ len_left + 1 ]; - memcpy(left_term, VARDATA_ANY(left), len_left); - left_term[ len_left ] = '\0'; - char right_term[ len_right + 1 ]; - memcpy(right_term, VARDATA_ANY(right), len_right); - right_term[ len_right ] = '\0'; - - /* Get the results from the simd functions. */ - size_t ret = fast_strstr(left_term, len_left, right_term, len_right); + /* + * Make a copy of the data to null terminate. + * + * If either side is over 2^20 bytes, don't allocate + * on the stack, but use heap instead. + */ + bool use_heap = (len_left >= 1048576 || len_right >= 1048576); + size_t ret; + + if (!use_heap) { + char left_term[ len_left + 1 ]; + memcpy(left_term, VARDATA_ANY(left), len_left); + left_term[ len_left ] = '\0'; + + char right_term[ len_right + 1 ]; + memcpy(right_term, VARDATA_ANY(right), len_right); + right_term[ len_right ] = '\0'; + + /* Get the results from the simd functions. */ + ret = fast_strstr(left_term, len_left, right_term, len_right); + } else { + char *left_term = (char *)palloc(len_left + 1); + memcpy(left_term, VARDATA_ANY(left), len_left); + left_term[ len_left ] = '\0'; + + char *right_term = (char *)palloc(len_right + 1); + memcpy(right_term, VARDATA_ANY(right), len_right); + right_term[ len_right ] = '\0'; + + /* Get the results from the simd functions. */ + ret = fast_strstr(left_term, len_left, right_term, len_right); + + pfree(left_term); + pfree(right_term); + } PG_RETURN_INT32(ret); } @@ -107,9 +132,41 @@ Datum pg_equals(PG_FUNCTION_ARGS) { PG_RETURN_BOOL(false); } - /* Get the results from the simd functions. */ - size_t ret = - fast_strstr(VARDATA_ANY(left), len_left, VARDATA_ANY(right), len_right); + /* + * Make a copy of the data to null terminate. + * + * If either side is over 2^20 bytes, don't allocate + * on the stack, but use heap instead. + */ + bool use_heap = (len_left >= 1048576 || len_right >= 1048576); + size_t ret; + + if (!use_heap) { + char left_term[ len_left + 1 ]; + memcpy(left_term, VARDATA_ANY(left), len_left); + left_term[ len_left ] = '\0'; + + char right_term[ len_right + 1 ]; + memcpy(right_term, VARDATA_ANY(right), len_right); + right_term[ len_right ] = '\0'; + + /* Get the results from the simd functions. */ + ret = fast_strstr(left_term, len_left, right_term, len_right); + } else { + char *left_term = (char *)palloc(len_left + 1); + memcpy(left_term, VARDATA_ANY(left), len_left); + left_term[ len_left ] = '\0'; + + char *right_term = (char *)palloc(len_right + 1); + memcpy(right_term, VARDATA_ANY(right), len_right); + right_term[ len_right ] = '\0'; + + /* Get the results from the simd functions. */ + ret = fast_strstr(left_term, len_left, right_term, len_right); + + pfree(left_term); + pfree(right_term); + } /* If the result is 0, strings are equal. */ PG_RETURN_BOOL(ret == 0); diff --git a/stringtheory--1.0.1.sql b/stringtheory--1.0.2.sql similarity index 100% rename from stringtheory--1.0.1.sql rename to stringtheory--1.0.2.sql diff --git a/stringtheory.control b/stringtheory.control index 2571ef7..b9ae28e 100644 --- a/stringtheory.control +++ b/stringtheory.control @@ -1,5 +1,5 @@ # compare extension comment = 'tools for comparing strings' -default_version = '1.0.1' +default_version = '1.0.2' module_pathname = '$libdir/stringtheory' relocatable = true