Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Support saving R code including .qmd and .Rmd #95

Merged
merged 3 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 55 additions & 13 deletions lamin_cli/_save.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,32 @@ def parse_uid_from_code(
version_pattern = re.compile(
r'\.transform\.version\s*=\s*\\["\']([^"\']+)\\["\']'
)
elif suffix in {".R", ".qmd", ".Rmd"}:
track_pattern = re.compile(r'track\(\s*[\'"]([a-zA-Z0-9]{16})[\'"]')
uid_pattern = None
stem_uid_pattern = None
version_pattern = None
else:
raise ValueError("Only .py and .ipynb files are supported.")
raise SystemExit(
"Only .py, .ipynb, .R, .qmd, .Rmd files are supported for saving"
" transforms."
)

# Search for matches in the entire file content
uid_match = track_pattern.search(content)
uid = uid_match.group(1) if uid_match else None
if uid is None:
uid_match = uid_pattern.search(content)
stem_uid_match = stem_uid_pattern.search(content)
version_match = version_pattern.search(content)
stem_uid = None
version = None

# Extract values if matches are found
uid = uid_match.group(1) if uid_match else None
stem_uid = stem_uid_match.group(1) if stem_uid_match else None
version = version_match.group(1) if version_match else None
if uid_pattern is not None and uid is None:
uid_match = uid_pattern.search(content)
uid = uid_match.group(1) if uid_match else None
if stem_uid_pattern is not None:
stem_uid_match = stem_uid_pattern.search(content)
stem_uid = stem_uid_match.group(1) if stem_uid_match else None
if version_pattern is not None:
version_match = version_pattern.search(content)
version = version_match.group(1) if version_match else None

if uid is None and (stem_uid is None or version is None):
target = "script" if suffix == ".py" else "notebook"
Expand Down Expand Up @@ -72,8 +83,27 @@ def save_from_filepath_cli(

ln_setup.settings.auto_connect = auto_connect_state

suffixes_transform = {
"py": set([".py", ".ipynb"]),
"R": set([".R", ".qmd", ".Rmd"]),
}

if (
filepath.suffix in {".qmd", ".Rmd"}
and not filepath.with_suffix(".html").exists()
):
raise SystemExit(
f"Please export your {filepath.suffix} file as an html file here"
f" {filepath.with_suffix('.html')}"
)

if registry is None:
registry = "transform" if filepath.suffix in {".py", ".ipynb"} else "artifact"
registry = (
"transform"
if filepath.suffix
in suffixes_transform["py"].union(suffixes_transform["R"])
else "artifact"
)

if registry == "artifact":
ln.settings.creation.artifact_silence_missing_run_warning = True
Expand Down Expand Up @@ -108,16 +138,28 @@ def save_from_filepath_cli(
run = ln.Run.filter(transform=transform).order_by("-started_at").first()
if run.created_by.id != ln_setup.settings.user.id:
response = input(
"You are trying to save a transform created by another user: Source and"
" report files will be tagged with *your* user id. Proceed? (y/n)"
"You are trying to save a transform created by another user: Source"
" and report files will be tagged with *your* user id. Proceed?"
" (y/n)"
)
if response != "y":
return "aborted-save-notebook-created-by-different-user"
return save_context_core(
return_code = save_context_core(
run=run,
transform=transform,
filepath=filepath,
from_cli=True,
)
if filepath.suffix in {".qmd", ".Rmd"}:
report_file = ln.Artifact(
filepath.with_suffix(".html"), # validated at the top that this exists
description=f"Report of run {run.uid}",
visibility=0, # hidden file
run=False,
)
report_file.save(upload=True, print_progress=False)
run.report = report_file
run.save()
return return_code
else:
raise SystemExit("Allowed values for '--registry' are: 'artifact', 'transform'")
20 changes: 20 additions & 0 deletions tests/scripts/run-track.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
library(reticulate)

# Import lamindb
ln <- import("lamindb")

ln$track("EPnfDtJz8qbE0000", path="run-track.R") # <-- unique id for the script, script path

# Create a sample R dataframe
r_df <- data.frame(
id = 1:5,
value = c(10.5, 20.3, 15.7, 25.1, 30.2),
category = c("A", "B", "A", "C", "B")
)

# Save the dataframe as RDS
storage_path <- "example_data.rds"
saveRDS(r_df, storage_path)

ln$Artifact(storage_path, description="Example dataframe")$save() # save an artifact
ln$finish() # mark the script run as finished
3 changes: 3 additions & 0 deletions tests/scripts/run-track.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# dummy qmd file

ln$track("HPnfDtJz8qbE0000", path="run-track.R") # <-- unique id for the script, script path
73 changes: 73 additions & 0 deletions tests/test_save_r_code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from pathlib import Path
import subprocess
import os
import lamindb as ln

scripts_dir = Path(__file__).parent.resolve() / "scripts"


def test_run_save_cache():
env = os.environ
env["LAMIN_TESTING"] = "true"
filepath = scripts_dir / "run-track.R"

transform = ln.Transform(
uid="EPnfDtJz8qbE0000", name="run-track.R", key="run-track.R", type="script"
).save()
ln.Run(transform=transform).save()

assert transform.source_code is None

result = subprocess.run(
f"lamin save {filepath}",
shell=True,
capture_output=True,
)
# print(result.stdout.decode())
# print(result.stderr.decode())
assert result.returncode == 0
assert "on uid 'EPnfDtJz8qbE0000'" in result.stdout.decode()

transform = ln.Transform.get("EPnfDtJz8qbE0000")
assert transform.source_code is not None

# now test a .qmd file (.Rmd adheres to same principles)
filepath = scripts_dir / "run-track.qmd"

transform = ln.Transform(
uid="HPnfDtJz8qbE0000",
name="run-track.qmd",
key="run-track.qmd",
type="notebook",
).save()
ln.Run(transform=transform).save()

assert transform.source_code is None
assert transform.latest_run.report is None

result = subprocess.run(
f"lamin save {filepath}",
shell=True,
capture_output=True,
)
# print(result.stdout.decode())
# print(result.stderr.decode())
assert result.returncode == 1
assert "Please export your" in result.stderr.decode()

filepath.with_suffix(".html").write_text("dummy html")

result = subprocess.run(
f"lamin save {filepath}",
shell=True,
capture_output=True,
)
print(result.stdout.decode())
print(result.stderr.decode())
assert result.returncode == 0

transform = ln.Transform.get("HPnfDtJz8qbE0000")
assert transform.source_code is not None
assert transform.latest_run.report is not None

filepath.with_suffix(".html").unlink()