Skip to content

Commit

Permalink
Merge pull request #1878 from kevinbackhouse/XMLValidator
Browse files Browse the repository at this point in the history
Add some XML validation to avoid xmpsdk bugs
  • Loading branch information
kevinbackhouse authored Aug 27, 2021
2 parents c9f253f + 5703dbc commit f9248f9
Show file tree
Hide file tree
Showing 13 changed files with 212 additions and 35 deletions.
3 changes: 3 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ target_include_directories(exiv2lib SYSTEM PRIVATE
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/xmpsdk/include>
)

target_include_directories(exiv2lib PRIVATE ${EXPAT_INCLUDE_DIR})
target_link_libraries(exiv2lib PRIVATE EXPAT::EXPAT)

if (EXIV2_ENABLE_XMP)
target_link_libraries(exiv2lib PRIVATE exiv2-xmp)
elseif(EXIV2_ENABLE_EXTERNAL_XMP)
Expand Down
164 changes: 164 additions & 0 deletions src/xmp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <algorithm>
#include <cassert>
#include <string>
#include <expat.h>

// Adobe XMP Toolkit
#ifdef EXV_HAVE_XMP_TOOLKIT
Expand All @@ -42,6 +43,168 @@
# include <XMP.incl_cpp>
#endif // EXV_HAVE_XMP_TOOLKIT

// This anonymous namespace contains a class named XMLValidator, which uses
// libexpat to do a basic validation check on an XML document. This is to
// reduce the chance of hitting a bug in the (third-party) xmpsdk
// library. For example, it is easy to a trigger a stack overflow in xmpsdk
// with a deeply nested tree.
namespace {
using namespace Exiv2;

class XMLValidator {
size_t element_depth_ = 0;
size_t namespace_depth_ = 0;

// These fields are used to record whether an error occurred during
// parsing. Why do we need to store the error for later, rather
// than throw an exception immediately? Because expat is a C
// library, so it isn't designed to be able to handle exceptions
// thrown by the callback functions. Throwing exceptions during
// parsing is an example of one of the things that xmpsdk does
// wrong, leading to problems like https://github.com/Exiv2/exiv2/issues/1821.
bool haserror_ = false;
std::string errmsg_;
XML_Size errlinenum_ = 0;
XML_Size errcolnum_ = 0;

// Very deeply nested XML trees can cause a stack overflow in
// xmpsdk. They are also very unlikely to be valid XMP, so we
// error out if the depth exceeds this limit.
static const size_t max_recursion_limit_ = 1000;

const XML_Parser parser_;

public:
// Runs an XML parser on `buf`. Throws an exception if the XML is invalid.
static void check(const char* buf, size_t buflen) {
XMLValidator validator;
validator.check_internal(buf, buflen);
}

private:
// Private constructor, because this class is only constructed by
// the (static) check method.
XMLValidator() : parser_(XML_ParserCreateNS(0, '@')) {
if (!parser_) {
throw Error(kerXMPToolkitError, "Could not create expat parser");
}
}

~XMLValidator() {
XML_ParserFree(parser_);
}

void setError(const char* msg) {
const XML_Size errlinenum = XML_GetCurrentLineNumber(parser_);
const XML_Size errcolnum = XML_GetCurrentColumnNumber(parser_);
#ifndef SUPPRESS_WARNINGS
EXV_INFO << "Invalid XML at line " << errlinenum
<< ", column " << errcolnum
<< ": " << msg << "\n";
#endif
// If this is the first error, then save it.
if (!haserror_) {
haserror_ = true;
errmsg_ = msg;
errlinenum_ = errlinenum;
errcolnum_ = errcolnum;
}
}

void check_internal(const char* buf, size_t buflen) {
if (buflen > static_cast<size_t>(std::numeric_limits<int>::max())) {
throw Error(kerXMPToolkitError, "Buffer length is greater than INT_MAX");
}

XML_SetUserData(parser_, this);
XML_SetElementHandler(parser_, startElement_cb, endElement_cb);
XML_SetNamespaceDeclHandler(parser_, startNamespace_cb, endNamespace_cb);
XML_SetStartDoctypeDeclHandler(parser_, startDTD_cb);

const XML_Status result = XML_Parse(parser_, buf, static_cast<int>(buflen), true);
if (result == XML_STATUS_ERROR) {
setError(XML_ErrorString(XML_GetErrorCode(parser_)));
}

if (haserror_) {
throw XMP_Error(kXMPErr_BadXML, "Error in XMLValidator");
}
}

void startElement(const XML_Char*, const XML_Char**) noexcept {
if (element_depth_ > max_recursion_limit_) {
setError("Too deeply nested");
}
++element_depth_;
}

void endElement(const XML_Char*) noexcept {
if (element_depth_ > 0) {
--element_depth_;
} else {
setError("Negative depth");
}
}

void startNamespace(const XML_Char*, const XML_Char*) noexcept {
if (namespace_depth_ > max_recursion_limit_) {
setError("Too deeply nested");
}
++namespace_depth_;
}

void endNamespace(const XML_Char*) noexcept {
if (namespace_depth_ > 0) {
--namespace_depth_;
} else {
setError("Negative depth");
}
}

void startDTD(const XML_Char*, const XML_Char*, const XML_Char*, int) noexcept {
// DOCTYPE is used for XXE attacks.
setError("DOCTYPE not supported");
}

// This callback function is called by libexpat. It's a static wrapper
// around startElement().
static void XMLCALL startElement_cb(
void* userData, const XML_Char* name, const XML_Char* *attrs
) noexcept {
static_cast<XMLValidator*>(userData)->startElement(name, attrs);
}

// This callback function is called by libexpat. It's a static wrapper
// around endElement().
static void XMLCALL endElement_cb(void* userData, const XML_Char* name) noexcept {
static_cast<XMLValidator*>(userData)->endElement(name);
}

// This callback function is called by libexpat. It's a static wrapper
// around startNamespace().
static void XMLCALL startNamespace_cb(
void* userData, const XML_Char* prefix, const XML_Char* uri
) noexcept {
static_cast<XMLValidator*>(userData)->startNamespace(prefix, uri);
}

// This callback function is called by libexpat. It's a static wrapper
// around endNamespace().
static void XMLCALL endNamespace_cb(void* userData, const XML_Char* prefix) noexcept {
static_cast<XMLValidator*>(userData)->endNamespace(prefix);
}

static void XMLCALL startDTD_cb(
void *userData, const XML_Char *doctypeName, const XML_Char *sysid,
const XML_Char *pubid, int has_internal_subset
) noexcept {
static_cast<XMLValidator*>(userData)->startDTD(
doctypeName, sysid, pubid, has_internal_subset);
}
};
} // namespace


// *****************************************************************************
// local declarations
namespace {
Expand Down Expand Up @@ -601,6 +764,7 @@ namespace Exiv2 {
return 2;
}

XMLValidator::check(xmpPacket.data(), xmpPacket.size());
SXMPMeta meta(xmpPacket.data(), static_cast<XMP_StringLen>(xmpPacket.size()));
SXMPIterator iter(meta);
std::string schemaNs, propPath, propValue;
Expand Down
9 changes: 9 additions & 0 deletions test/data/coverage_xmp_doctype.exv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
<!DOCTYPE foo [
<!ELEMENT foo ANY >
<!ENTITY xxe SYSTEM "file:///dev/random" >]>
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="XMP Core 4.4.0-Exiv2">
<foo>
</foo>
</x:xmpmeta>
<?xpacket end="w"?>
2 changes: 1 addition & 1 deletion tests/bugfixes/github/test_CVE_2017_14857.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class TestCvePoC(metaclass=system_tests.CaseMeta):
Error: Offset of directory Image, entry 0x0132 is out of bounds: Offset = 0x30003030; truncating the entry
Error: Directory Image, entry 0x8649 has invalid size 4294967295*1; skipping entry.
Error: Directory Image, entry 0x8769 Sub-IFD pointer 0 is out of bounds; ignoring it.
Error: XMP Toolkit error 201: XML parsing failure
Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
"""
]
2 changes: 1 addition & 1 deletion tests/bugfixes/github/test_CVE_2017_14858.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class TestCvePoC(metaclass=system_tests.CaseMeta):
Warning: Directory Image, entry 0x0111: Strip 17 is outside of the data area; ignored.
Error: Directory Photo with 8224 entries considered invalid; not read.
Warning: Removing 913 characters from the beginning of the XMP packet
Error: XMP Toolkit error 201: XML parsing failure
Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
"""
]
Expand Down
20 changes: 20 additions & 0 deletions tests/bugfixes/github/test_coverage_xmp_doctype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-

from system_tests import CaseMeta, path, check_no_ASAN_UBSAN_errors

class coverage_xmp_doctype(metaclass=CaseMeta):
"""
Test added to improve code coverage in xmpsidecar.cpp after
Codecov complained about a lack of code coverage in this PR:
https://github.com/Exiv2/exiv2/pull/1878
"""

filename = path("$data_path/coverage_xmp_doctype.exv")
commands = ["$exiv2 $filename"]
stderr = ["""Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
$filename: No Exif data found in the file
"""]
retval = [253]

compare_stdout = check_no_ASAN_UBSAN_errors
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class coverage_xmpsidecar_isXmpType(metaclass=CaseMeta):

filename = path("$data_path/coverage_xmpsidecar_isXmpType.xmp")
commands = ["$exiv2 $filename"]
stderr = ["""Error: XMP Toolkit error 201: XML parsing failure
stderr = ["""Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
$filename: No Exif data found in the file
"""]
Expand Down
7 changes: 3 additions & 4 deletions tests/bugfixes/github/test_issue_1713.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,11 @@ class InvalidDateXMP(metaclass=CaseMeta):
commands = ["$exiv2 -Ph $filename"]

stderr = [
"""Warning: Failed to convert Xmp.xmp.CreateDate to Exif.Photo.DateTimeDigitized (Day is out of range)
Exiv2 exception in print action for file $filename:
Xmpdatum::copy: Not supported
"""Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
"""
]
retval = [1]
retval = [0]

def compare_stdout(self, i, command, got_stdout, expected_stdout):
""" We don't care about the stdout, just don't crash """
Expand Down
26 changes: 3 additions & 23 deletions tests/bugfixes/github/test_issue_1819.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,7 @@ class EmptyStringXmpTextValueRead(metaclass=CaseMeta):
File size : 1088 Bytes
MIME type : application/rdf+xml
Image size : 0 x 0
Thumbnail : None
Camera make :
Camera model :
Image timestamp :
File number :
Exposure time :
Aperture :
Exposure bias :
Flash :
Flash bias :
Focal length :
Subject distance:
ISO speed :
Exposure mode :
Metering mode :
Macro mode :
Image quality :
White balance :
Copyright :
Exif comment :
"""]
stderr = [""]
retval = [0]
stderr = ["""$filename: No Exif data found in the file
"""]
retval = [253]
2 changes: 1 addition & 1 deletion tests/bugfixes/github/test_issue_428.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class PngReadRawProfile(metaclass=system_tests.CaseMeta):
stderr.append("""$exiv2_exception_message """ + filenames[5] + """:
$kerInputDataReadFailed
""")
stderr.append("""Error: XMP Toolkit error 201: XML parsing failure
stderr.append("""Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
""" + stderr_exception(filenames[6]))
stderr.append("""Warning: Failed to decode Exif metadata.
Expand Down
4 changes: 2 additions & 2 deletions tests/bugfixes/github/test_issue_851.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ class DenialOfServiceInAdjustTimeOverflow(metaclass=CaseMeta):
Image size : 0 x 0
"""
]
stderr = [
"""Warning: Failed to convert Xmp.xmp.CreateDate to Exif.Photo.DateTimeDigitized (Day is out of range)
stderr = ["""Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
$filename: No Exif data found in the file
"""]
retval = [253]
2 changes: 1 addition & 1 deletion tests/bugfixes/github/test_issue_ghsa_8949_hhfh_j7rj.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class Jp2ImageEncodeJp2HeaderOutOfBoundsRead(metaclass=CaseMeta):
commands = ["$exiv2 in $filename1"]
stdout = [""]
stderr = [
"""Error: XMP Toolkit error 201: XML parsing failure
"""Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
"""]
retval = [0]
4 changes: 3 additions & 1 deletion tests/bugfixes/github/test_issue_ghsa_v5g7_46xf_h728.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ class Jp2ImageEncodeJp2HeaderOutOfBoundsRead2(metaclass=CaseMeta):
MIME type : application/rdf+xml
Image size : 0 x 0
"""]
stderr = ["""$filename: No Exif data found in the file
stderr = ["""Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
$filename: No Exif data found in the file
"""]
retval = [253]

0 comments on commit f9248f9

Please sign in to comment.