Skip to content

Commit

Permalink
NIAD-2822: Fix issue where incoming messages containing multibyte UTF…
Browse files Browse the repository at this point in the history
…-8 characters are mangled (#137)

* Create failing unit tests to reproduce NIAD-2822
* Use message_from_bytes method
  • Loading branch information
adrianclay authored Sep 6, 2023
1 parent 1e2e04b commit 474f302
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 1 deletion.
2 changes: 1 addition & 1 deletion mhs/common/mhs_common/messages/ebxml_request_envelope.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def _parse_mime_message(headers: Dict[str, str], message: str) -> email.message.
"""
content_type_header = f'{HttpHeaders.CONTENT_TYPE}: {headers[HttpHeaders.CONTENT_TYPE]}\r\n\r\n'

msg = email.message_from_string(content_type_header + message, policy=email.policy.HTTP)
msg = email.message_from_bytes(bytes(content_type_header + message, 'utf-8'), policy=email.policy.HTTP)

if msg.defects:
logger.warning('Found defects in MIME message during parsing. {Defects}',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,12 @@ def test_from_string_parses_valid_requests(self):

self.assertEqual(expected_values_with_payload, parsed_message.message_dictionary)

with self.subTest("A valid request containing multibyte UTF8 characters within HL7 XML"):
# Regression test for NIAD-2822
message, _ = message_utilities.load_test_data(self.message_dir, 'ebxml_request_multibyte_character')
parsed_message = ebxml_request_envelope.EbxmlRequestEnvelope.from_string(MULTIPART_MIME_HEADERS, message)
self.assertEquals(parsed_message.message_dictionary['hl7_message'], "<xml>¬ ❤️ 🧸</xml>")

with self.subTest("A valid request containing one textual attachment with no description provided"):
message, ebxml = message_utilities.load_test_data(self.message_dir, 'ebxml_request_one_attachment_application_xml_content_type_no_description')
attachments = [{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<soap:Envelope xmlns:eb="http://www.oasis-open.org/committees/ebxml-msg/schema/msg-header-2_0.xsd" xmlns:hl7ebxml="urn:hl7-org:transport/ebxml/DSTUv1.0" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Header><eb:MessageHeader eb:version="2.0" soap:mustUnderstand="1"><eb:From><eb:PartyId eb:type="urn:nhs:names:partyType:ocs+serviceInstance">5EP-807264</eb:PartyId></eb:From><eb:To><eb:PartyId eb:type="urn:nhs:names:partyType:ocs+serviceInstance">YGMYW-822993</eb:PartyId></eb:To><eb:CPAId>5f0ba4c0967d2a9ca9b3</eb:CPAId><eb:ConversationId>018A2D28-A043-73C9-BAE9-5CB491815950</eb:ConversationId><eb:Service>urn:nhs:names:services:gp2gp</eb:Service><eb:Action>RCMR_IN030000UK06</eb:Action><eb:MessageData><eb:MessageId>483E788A-3464-46E3-ACE3-E15DB68D8D36</eb:MessageId><eb:Timestamp>2023-08-25T14:45:57.571Z</eb:Timestamp><eb:TimeToLive>2023-08-25T21:00:57.571Z</eb:TimeToLive></eb:MessageData><eb:DuplicateElimination /></eb:MessageHeader><eb:AckRequested eb:version="2.0" soap:mustUnderstand="1" soap:actor="urn:oasis:names:tc:ebxml-msg:actor:nextMSH" eb:signed="false" /></soap:Header><soap:Body><eb:Manifest eb:version="2.0" soap:mustUnderstand="1"><eb:Reference xlink:href="cid:[email protected]/EMISWeb/GP2GP2.2A" xmlns:xlink="http://www.w3.org/1999/xlink"><eb:Description xml:lang="en">RCMR_IN030000UK06</eb:Description><hl7ebxml:Payload style="HL7" encoding="XML" version="3.0" /></eb:Reference><eb:Reference xlink:href="cid:[email protected]/EMISWeb/GP2GP2.2A" eb:id="_8231C804-9E4B-451E-8323-5FC86FEE6C2F" xmlns:xlink="http://www.w3.org/1999/xlink"><eb:Description xml:lang="en">8231C804-9E4B-451E-8323-5FC86FEE6C2F_MARBLES.png</eb:Description></eb:Reference></eb:Manifest></soap:Body></soap:Envelope>
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
----=_MIME-Boundary
Content-Id: <ContentRoot>
Content-Type: text/xml; charset=UTF-8

{{ebxml}}
----=_MIME-Boundary
Content-Id: <[email protected]/EMISWeb/GP2GP2.2A>
Content-Transfer-Encoding: 8bit
Content-Type: application/xml; charset=UTF-8

<xml>¬ ❤️ 🧸</xml>
----=_MIME-Boundary--

0 comments on commit 474f302

Please sign in to comment.