Skip to content

Commit

Permalink
fix issue 4642, fix DBFS file path problem on Dataricks (#5679)
Browse files Browse the repository at this point in the history
* fix issue 4642

* parse model_dir

Co-authored-by: Zhou <[email protected]>
  • Loading branch information
PatrickkZ and PatrickkZ authored Sep 8, 2022
1 parent 8a478a1 commit d31aabb
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
2 changes: 2 additions & 0 deletions python/dllib/src/bigdl/dllib/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ def mkdirs(path):


def is_local_path(path):
if path.startswith("/dbfs"):
return False
parse_result = urlparse(path)
return len(parse_result.scheme.lower()) == 0 or parse_result.scheme.lower() == "file"

Expand Down
8 changes: 7 additions & 1 deletion python/orca/src/bigdl/orca/learn/tf2/pyspark_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@
logger = logging.getLogger(__name__)


def parse_model_dir(model_dir):
if model_dir and model_dir.startswith("dbfs:/"):
model_dir = "/dbfs/" + model_dir[len("dbfs:/"):]
return model_dir


class SparkTFEstimator():
def __init__(self,
model_creator,
Expand Down Expand Up @@ -83,7 +89,7 @@ def __init__(self,
invalidInputError(False,
"Please do not specify batch_size in config. Input batch_size in the"
" fit/evaluate function of the estimator instead.")
self.model_dir = model_dir
self.model_dir = parse_model_dir(model_dir)
master = sc.getConf().get("spark.master")
if not master.startswith("local"):
logger.info("For cluster mode, make sure to use shared filesystem path "
Expand Down

0 comments on commit d31aabb

Please sign in to comment.