Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix deterministic ID handling with unicode #300

Merged
merged 2 commits into from
Oct 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions stix2/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,11 +394,14 @@ def _generate_id(self, kwargs):
if streamlined_obj_vals:
data = canonicalize(streamlined_obj_vals, utf8=False)

# try/except here to enable python 2 compatibility
try:
# The situation is complicated w.r.t. python 2/3 behavior, so
# I'd rather not rely on particular exceptions being raised to
# determine what to do. Better to just check the python version
# directly.
if six.PY3:
return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data))
except UnicodeDecodeError:
return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, six.binary_type(data)))
else:
return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data.encode("utf-8")))

# We return None if there are no values specified for any of the id-contributing-properties
return None
Expand Down
13 changes: 13 additions & 0 deletions stix2/test/v21/test_base.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import datetime as dt
import json
import uuid

import pytest
import pytz

import stix2
from stix2.base import STIXJSONEncoder


Expand All @@ -23,3 +25,14 @@ def test_encode_json_object():
json.dumps(test_dict, cls=STIXJSONEncoder)

assert " is not JSON serializable" in str(excinfo.value)


def test_deterministic_id_unicode():
mutex = {'name': u'D*Fl#Ed*\u00a3\u00a8', 'type': 'mutex'}
obs = stix2.parse_observable(mutex, version="2.1")

dd_idx = obs.id.index("--")
id_uuid = uuid.UUID(obs.id[dd_idx+2:])

assert id_uuid.variant == uuid.RFC_4122
assert id_uuid.version == 5