Skip to content

Commit

Permalink
Merge pull request #78 from yibeichan/fix-strip
Browse files Browse the repository at this point in the history
fix strip() issue for choices
  • Loading branch information
satra authored Nov 21, 2024
2 parents aecab71 + d10601a commit 808ed12
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 6 deletions.
23 changes: 17 additions & 6 deletions reproschema/redcap2reproschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,10 @@ def process_choices(choices_str, field_name):
choices = []
choices_value_type = []
for ii, choice in enumerate(choices_str.split("|")):
parts = choice.split(", ")
choice = (
choice.strip()
) # Strip leading/trailing whitespace for each choice
parts = [p.strip() for p in choice.split(",")]

# Handle the case where the choice is something like "1,"
if len(parts) == 1:
Expand All @@ -213,14 +216,22 @@ def process_choices(choices_str, field_name):
)
parts = [ii, parts[0]]

# Try to convert the first part to an integer, if it fails, keep it as a string
try:
value = int(parts[0])
# Determine if value should be treated as an integer or string
if parts[0] == "0":
# Special case for "0", treat it as an integer
value = 0
choices_value_type.append("xsd:integer")
except ValueError:
elif parts[0].isdigit() and parts[0][0] == "0":
# If it has leading zeros, treat it as a string
value = parts[0]
choices_value_type.append("xsd:string")

else:
try:
value = int(parts[0])
choices_value_type.append("xsd:integer")
except ValueError:
value = parts[0]
choices_value_type.append("xsd:string")
choice_obj = {
"name": {"en": " ".join(parts[1:]).strip()},
"value": value,
Expand Down
121 changes: 121 additions & 0 deletions reproschema/tests/test_process_choices.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import os
import shutil

import pytest
import yaml
from click.testing import CliRunner

from ..cli import main
from ..redcap2reproschema import process_choices


def test_process_choices_numeric_codes():
# Test standard numeric codes with descriptions
choices_str = "1, Male | 2, Female | 3, Other"
choices, value_types = process_choices(choices_str, "gender")
assert choices == [
{"name": {"en": "Male"}, "value": 1},
{"name": {"en": "Female"}, "value": 2},
{"name": {"en": "Other"}, "value": 3},
]
assert value_types == ["xsd:integer"]


def test_process_choices_boolean():
# Test boolean choices (Yes/No)
choices_str = "1, Yes | 0, No"
choices, value_types = process_choices(choices_str, "boolean_field")
assert choices == [
{"name": {"en": "Yes"}, "value": 1},
{"name": {"en": "No"}, "value": 0},
]
assert value_types == ["xsd:integer"]


def test_process_choices_special_characters():
# Test choices with special characters
choices_str = "1, Option A | 2, \"Option B\" | 3, Option C with 'quotes'"
choices, value_types = process_choices(choices_str, "special_chars")
assert choices == [
{"name": {"en": "Option A"}, "value": 1},
{"name": {"en": '"Option B"'}, "value": 2},
{"name": {"en": "Option C with 'quotes'"}, "value": 3},
]
assert value_types == ["xsd:integer"]


def test_process_choices_with_missing_values():
# Test choices with a missing value (commonly used for "Not applicable" or "Prefer not to say")
choices_str = "1, Yes | 2, No | 99, Not applicable"
choices, value_types = process_choices(choices_str, "missing_values")
assert choices == [
{"name": {"en": "Yes"}, "value": 1},
{"name": {"en": "No"}, "value": 2},
{"name": {"en": "Not applicable"}, "value": 99},
]
assert value_types == ["xsd:integer"]


def test_process_choices_with_unicode():
# Test choices with Unicode characters (e.g., accents, symbols)
choices_str = "1, Café | 2, Niño | 3, Résumé | 4, ☺"
choices, value_types = process_choices(choices_str, "unicode_field")
assert choices == [
{"name": {"en": "Café"}, "value": 1},
{"name": {"en": "Niño"}, "value": 2},
{"name": {"en": "Résumé"}, "value": 3},
{"name": {"en": "☺"}, "value": 4},
]
assert value_types == ["xsd:integer"]


def test_process_choices_alpha_codes():
# Test alpha codes (e.g., categorical text codes)
choices_str = "A, Apple | B, Banana | C, Cherry"
choices, value_types = process_choices(choices_str, "alpha_codes")
assert choices == [
{"name": {"en": "Apple"}, "value": "A"},
{"name": {"en": "Banana"}, "value": "B"},
{"name": {"en": "Cherry"}, "value": "C"},
]
assert sorted(value_types) == ["xsd:string"]


def test_process_choices_incomplete_values():
# Test choices with missing descriptions
choices_str = "1, Yes | 2, | 3, No"
choices, value_types = process_choices(choices_str, "incomplete_values")
assert choices == [
{"name": {"en": "Yes"}, "value": 1},
{"name": {"en": ""}, "value": 2},
{"name": {"en": "No"}, "value": 3},
]
assert value_types == ["xsd:integer"]


def test_process_choices_numeric_strings():
# Test numeric strings as values (e.g., not converted to integers)
choices_str = "001, Option 001 | 002, Option 002 | 003, Option 003"
choices, value_types = process_choices(choices_str, "numeric_strings")
assert choices == [
{"name": {"en": "Option 001"}, "value": "001"},
{"name": {"en": "Option 002"}, "value": "002"},
{"name": {"en": "Option 003"}, "value": "003"},
]
assert sorted(value_types) == ["xsd:string"]


def test_process_choices_spaces_in_values():
# Test choices with spaces in values and names
choices_str = "A B, Choice AB | C D, Choice CD"
choices, value_types = process_choices(choices_str, "spaces_in_values")
assert choices == [
{"name": {"en": "Choice AB"}, "value": "A B"},
{"name": {"en": "Choice CD"}, "value": "C D"},
]
assert sorted(value_types) == ["xsd:string"]


# Run pytest if script is called directly
if __name__ == "__main__":
pytest.main()

0 comments on commit 808ed12

Please sign in to comment.