Skip to content

Commit

Permalink
Feat: added parser_options for more control over parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
kares committed Jan 22, 2020
1 parent 75bae57 commit 1aedffb
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 1 deletion.
34 changes: 33 additions & 1 deletion lib/logstash/filters/xml.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
#
config :xpath, :validate => :hash, :default => {}

# Supported XML parsing options are 'strict', 'no_error' and 'no_warning'.
# - strict mode turns on strict parsing rules (non-compliant xml will fail)
# - no_error and no_warning can be used to suppress errors/warnings
config :parse_options, :validate => :string
# NOTE: technically we support more but we purposefully do not document those.
# e.g. setting "strict|recover" will not turn on strict as they're conflicting

# By default the filter will store the whole parsed XML in the destination
# field as described above. Setting this to false will prevent that.
config :store_xml, :validate => :boolean, :default => true
Expand Down Expand Up @@ -110,6 +117,7 @@ def register
:error => "When the 'store_xml' configuration option is true, 'target' must also be set"
)
end
xml_parse_options # validates parse_options => ...
end

def filter(event)
Expand Down Expand Up @@ -141,11 +149,13 @@ def filter(event)

if @xpath
begin
doc = Nokogiri::XML(value, nil, value.encoding.to_s)
doc = Nokogiri::XML::Document.parse(value, nil, value.encoding.to_s, xml_parse_options)
rescue => e
event.tag(XMLPARSEFAILURE_TAG)
@logger.warn("Error parsing xml", :source => @source, :value => value, :exception => e, :backtrace => e.backtrace)
return
else
doc.errors.any? && @logger.debug? && @logger.debug("Parsed xml with #{doc.errors.size} errors")
end
doc.remove_namespaces! if @remove_namespaces

Expand Down Expand Up @@ -194,4 +204,26 @@ def filter(event)
filter_matched(event) if matched
@logger.debug? && @logger.debug("Event after xml filter", :event => event)
end

private

def xml_parse_options
return Nokogiri::XML::ParseOptions::DEFAULT_XML unless @parse_options # (RECOVER | NONET)
@xml_parse_options ||= begin
parse_options = @parse_options.split(/,|\|/).map do |opt|
name = opt.strip.tr('_', '').upcase
if name.empty?
nil
else
begin
Nokogiri::XML::ParseOptions.const_get(name)
rescue NameError
raise LogStash::ConfigurationError, "unsupported parse option: #{opt.inspect}"
end
end
end
parse_options.compact.inject(0, :|) # e.g. NOERROR | NOWARNING
end
end

end
49 changes: 49 additions & 0 deletions spec/filters/xml_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -418,4 +418,53 @@
end
end
end

describe "parsing invalid xml" do
subject { described_class.new(options) }
let(:options) { ({ 'source' => 'xmldata', 'store_xml' => false }) }
let(:xmldata) { "<xml> <sample attr='foo' attr=\"bar\"> <invalid> </sample> </xml>" }
let(:event) { LogStash::Event.new(data) }
let(:data) { { "xmldata" => xmldata } }

before { subject.register }
after { subject.close }

it 'does not fail (by default)' do
subject.filter(event)
expect( event.get("tags") ).to be nil
end

context 'strict option' do
let(:options) { super.merge({ 'parse_options' => 'strict' }) }

it 'does fail parsing' do
subject.filter(event)
expect( event.get("tags") ).to_not be nil
expect( event.get("tags") ).to include '_xmlparsefailure'
end
end
end

describe "parse_options" do
subject { described_class.new(options) }
let(:options) { ({ 'source' => 'xmldata', 'store_xml' => false, 'parse_options' => parse_options }) }

context 'valid' do
let(:parse_options) { 'no_error,NOWARNING' }

it 'registers filter' do
subject.register
expect( subject.send(:xml_parse_options) ).
to eql Nokogiri::XML::ParseOptions::NOERROR | Nokogiri::XML::ParseOptions::NOWARNING
end
end

context 'invalid' do
let(:parse_options) { 'strict,invalid0' }

it 'fails to register' do
expect { subject.register }.to raise_error(LogStash::ConfigurationError, 'unsupported parse option: "invalid0"')
end
end
end
end

0 comments on commit 1aedffb

Please sign in to comment.