You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
__________________________________________ test_rlike_rewrite_optimization_empty_pattern __________________________________________
[gw0] linux -- Python 3.8.3 /home/haoyangl/.pyenv/versions/3.8.3/bin/python
def test_rlike_rewrite_optimization_empty_pattern():
gen = mk_str_gen('[ab\n]{3,6}')
> assert_gpu_and_cpu_are_equal_collect(
lambda spark: unary_op_df(spark, gen).selectExpr(
'a',
'rlike(a, "")'),
conf=_regexp_conf)
../../src/main/python/regexp_test.py:449:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../../src/main/python/asserts.py:595: in assert_gpu_and_cpu_are_equal_collect
_assert_gpu_and_cpu_are_equal(func, 'COLLECT', conf=conf, is_cpu_first=is_cpu_first, result_canonicalize_func_before_compare=result_canonicalize_func_before_compare)
../../src/main/python/asserts.py:503: in _assert_gpu_and_cpu_are_equal
from_gpu = run_on_gpu()
../../src/main/python/asserts.py:496: in run_on_gpu
from_gpu = with_gpu_session(bring_back, conf=conf)
../../src/main/python/spark_session.py:164: in with_gpu_session
return with_spark_session(func, conf=copy)
../../../../.pyenv/versions/3.8.3/lib/python3.8/contextlib.py:75: in inner
return func(*args, **kwds)
../../src/main/python/spark_session.py:131: in with_spark_session
ret = func(_spark)
../../src/main/python/asserts.py:205: in <lambda>
bring_back = lambda spark: limit_func(spark).collect()
../../../../spark-3.4.1-bin-hadoop3/python/pyspark/sql/dataframe.py:1216: in collect
sock_info = self._jdf.collectToPython()
/home/haoyangl/spark-3.4.1-bin-hadoop3/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1322: in __call__
return_value = get_return_value(
../../../../spark-3.4.1-bin-hadoop3/python/pyspark/errors/exceptions/captured.py:169: in deco
return f(*a, **kw)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
answer = 'xro447', gateway_client = <py4j.clientserver.JavaClient object at 0x7fa87eb26040>, target_id = 'o446'
name = 'collectToPython'
def get_return_value(answer, gateway_client, target_id=None, name=None):
"""Converts an answer received from the Java gateway into a Python object.
For example, string representation of integers are converted to Python
integer, string representation of objects are converted to JavaObject
instances, etc.
:param answer: the string returned by the Java gateway
:param gateway_client: the gateway client used to communicate with the Java
Gateway. Only necessary if the answer is a reference (e.g., object,
list, map)
:param target_id: the name of the object from which the answer comes from
(e.g., *object1* in `object1.hello()`). Optional.
:param name: the name of the member from which the answer comes from
(e.g., *hello* in `object1.hello()`). Optional.
"""
if is_error(answer)[0]:
if len(answer) > 1:
type = answer[1]
value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
if answer[1] == REFERENCE_TYPE:
> raise Py4JJavaError(
"An error occurred while calling {0}{1}{2}.\n".
format(target_id, ".", name), value)
E py4j.protocol.Py4JJavaError: An error occurred while calling o446.collectToPython.
E : java.util.NoSuchElementException
E at scala.collection.LinearSeqOptimized.last(LinearSeqOptimized.scala:150)
E at scala.collection.LinearSeqOptimized.last$(LinearSeqOptimized.scala:149)
E at scala.collection.immutable.List.last(List.scala:91)
E at com.nvidia.spark.rapids.RegexRewrite$.getPrefixRangePattern(RegexParser.scala:2048)
E at com.nvidia.spark.rapids.RegexRewrite$.matchSimplePattern(RegexParser.scala:2135)
E at org.apache.spark.sql.rapids.GpuRLikeMeta.tagExprForGpu(stringFunctions.scala:1076)
E at com.nvidia.spark.rapids.BaseExprMeta.tagSelfForGpu(RapidsMeta.scala:1220)
E at com.nvidia.spark.rapids.RapidsMeta.tagForGpu(RapidsMeta.scala:318)
E at com.nvidia.spark.rapids.RapidsMeta.$anonfun$tagForGpu$3(RapidsMeta.scala:294)
E at com.nvidia.spark.rapids.RapidsMeta.$anonfun$tagForGpu$3$adapted(RapidsMeta.scala:294)
E at scala.collection.Iterator.foreach(Iterator.scala:943)
E at scala.collection.Iterator.foreach$(Iterator.scala:943)
E at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
E at scala.collection.IterableLike.foreach(IterableLike.scala:74)
E at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
E at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
E at com.nvidia.spark.rapids.RapidsMeta.tagForGpu(RapidsMeta.scala:294)
E at com.nvidia.spark.rapids.RapidsMeta.$anonfun$tagForGpu$3(RapidsMeta.scala:294)
E at com.nvidia.spark.rapids.RapidsMeta.$anonfun$tagForGpu$3$adapted(RapidsMeta.scala:294)
E at scala.collection.immutable.Stream.foreach(Stream.scala:533)
E at com.nvidia.spark.rapids.RapidsMeta.tagForGpu(RapidsMeta.scala:294)
E at com.nvidia.spark.rapids.GpuOverrides$.wrapAndTagPlan(GpuOverrides.scala:4366)
E at com.nvidia.spark.rapids.GpuOverrides.applyOverrides(GpuOverrides.scala:4692)
E at com.nvidia.spark.rapids.GpuOverrides.$anonfun$applyWithContext$3(GpuOverrides.scala:4577)
E at com.nvidia.spark.rapids.GpuOverrides$.logDuration(GpuOverrides.scala:454)
E at com.nvidia.spark.rapids.GpuOverrides.$anonfun$applyWithContext$1(GpuOverrides.scala:4574)
E at com.nvidia.spark.rapids.GpuOverrideUtil$.$anonfun$tryOverride$1(GpuOverrides.scala:4540)
E at com.nvidia.spark.rapids.GpuOverrides.applyWithContext(GpuOverrides.scala:4594)
E at com.nvidia.spark.rapids.GpuOverrides.apply(GpuOverrides.scala:4567)
E at com.nvidia.spark.rapids.GpuOverrides.apply(GpuOverrides.scala:4563)
E at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.$anonfun$apply$1(Columnar.scala:564)
E at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.$anonfun$apply$1$adapted(Columnar.scala:564)
E at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
E at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
E at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
E at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.apply(Columnar.scala:564)
E at org.apache.spark.sql.execution.ApplyColumnarRulesAndInsertTransitions.apply(Columnar.scala:516)
E at org.apache.spark.sql.execution.QueryExecution$.$anonfun$prepareForExecution$1(QueryExecution.scala:457)
E at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
E at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
E at scala.collection.immutable.List.foldLeft(List.scala:91)
E at org.apache.spark.sql.execution.QueryExecution$.prepareForExecution(QueryExecution.scala:456)
E at org.apache.spark.sql.execution.QueryExecution.$anonfun$executedPlan$1(QueryExecution.scala:175)
E at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
E at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:202)
E at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:526)
E at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:202)
E at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
E at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:201)
E at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:175)
E at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:168)
E at org.apache.spark.sql.execution.QueryExecution.simpleString(QueryExecution.scala:221)
E at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:266)
E at org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:235)
E at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:112)
E at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:195)
E at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:103)
E at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
E at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
E at org.apache.spark.sql.Dataset.withAction(Dataset.scala:4165)
E at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3994)
E at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
E at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
E at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
E at java.lang.reflect.Method.invoke(Method.java:498)
E at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
E at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
E at py4j.Gateway.invoke(Gateway.java:282)
E at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
E at py4j.commands.CallCommand.execute(CallCommand.java:79)
E at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
E at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
E at java.lang.Thread.run(Thread.java:750)
/home/haoyangl/spark-3.4.1-bin-hadoop3/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py:326: Py4JJavaError
Expected behavior
Match cpu's behavior
The text was updated successfully, but these errors were encountered:
Describe the bug
rlike with empty pattern will fail with 'NoSuchElementException' when enabling regex rewrite.
Steps/Code to reproduce bug
integration test:
failed:
Expected behavior
Match cpu's behavior
The text was updated successfully, but these errors were encountered: