Skip to content

Commit

Permalink
Feat: added parser_options for more control over XML parsing (#68)
Browse files Browse the repository at this point in the history
  • Loading branch information
kares authored Jan 28, 2020
1 parent 75bae57 commit 14863ea
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 2 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## 4.1.0
- Feat: added parser_options for more control over XML parsing [#68](https://github.com/logstash-plugins/logstash-filter-xml/pull/68)

## 4.0.7
- Fixed creation of empty arrays when xpath failed [#59](https://github.com/logstash-plugins/logstash-filter-xml/pull/59)

Expand Down
12 changes: 12 additions & 0 deletions docs/index.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
| <<plugins-{type}s-{plugin}-force_array>> |<<boolean,boolean>>|No
| <<plugins-{type}s-{plugin}-force_content>> |<<boolean,boolean>>|No
| <<plugins-{type}s-{plugin}-namespaces>> |<<hash,hash>>|No
| <<plugins-{type}s-{plugin}-parser_options>> |<<string,string>>|No
| <<plugins-{type}s-{plugin}-remove_namespaces>> |<<boolean,boolean>>|No
| <<plugins-{type}s-{plugin}-source>> |<<string,string>>|Yes
| <<plugins-{type}s-{plugin}-store_xml>> |<<boolean,boolean>>|No
Expand Down Expand Up @@ -87,6 +88,17 @@ filter {
}
}

[id="plugins-{type}s-{plugin}-parser_options"]
===== `parser_options`

* Value type is <<string,string>>
* There is no default value for this setting.

Setting XML parser options allows for more control of the parsing process.
By default the parser is not strict and thus accepts some invalid content.
Currently supported options are:

- `strict` - forces the parser to fail early instead of accumulating errors when content is not valid xml.

[id="plugins-{type}s-{plugin}-remove_namespaces"]
===== `remove_namespaces`
Expand Down
34 changes: 33 additions & 1 deletion lib/logstash/filters/xml.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
#
config :xpath, :validate => :hash, :default => {}

# Supported XML parsing options are 'strict', 'no_error' and 'no_warning'.
# - strict mode turns on strict parsing rules (non-compliant xml will fail)
# - no_error and no_warning can be used to suppress errors/warnings
config :parse_options, :validate => :string
# NOTE: technically we support more but we purposefully do not document those.
# e.g. setting "strict|recover" will not turn on strict as they're conflicting

# By default the filter will store the whole parsed XML in the destination
# field as described above. Setting this to false will prevent that.
config :store_xml, :validate => :boolean, :default => true
Expand Down Expand Up @@ -110,6 +117,7 @@ def register
:error => "When the 'store_xml' configuration option is true, 'target' must also be set"
)
end
xml_parse_options # validates parse_options => ...
end

def filter(event)
Expand Down Expand Up @@ -141,11 +149,13 @@ def filter(event)

if @xpath
begin
doc = Nokogiri::XML(value, nil, value.encoding.to_s)
doc = Nokogiri::XML::Document.parse(value, nil, value.encoding.to_s, xml_parse_options)
rescue => e
event.tag(XMLPARSEFAILURE_TAG)
@logger.warn("Error parsing xml", :source => @source, :value => value, :exception => e, :backtrace => e.backtrace)
return
else
doc.errors.any? && @logger.debug? && @logger.debug("Parsed xml with #{doc.errors.size} errors")
end
doc.remove_namespaces! if @remove_namespaces

Expand Down Expand Up @@ -194,4 +204,26 @@ def filter(event)
filter_matched(event) if matched
@logger.debug? && @logger.debug("Event after xml filter", :event => event)
end

private

def xml_parse_options
return Nokogiri::XML::ParseOptions::DEFAULT_XML unless @parse_options # (RECOVER | NONET)
@xml_parse_options ||= begin
parse_options = @parse_options.split(/,|\|/).map do |opt|
name = opt.strip.tr('_', '').upcase
if name.empty?
nil
else
begin
Nokogiri::XML::ParseOptions.const_get(name)
rescue NameError
raise LogStash::ConfigurationError, "unsupported parse option: #{opt.inspect}"
end
end
end
parse_options.compact.inject(0, :|) # e.g. NOERROR | NOWARNING
end
end

end
2 changes: 1 addition & 1 deletion logstash-filter-xml.gemspec
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Gem::Specification.new do |s|

s.name = 'logstash-filter-xml'
s.version = '4.0.7'
s.version = '4.1.0'
s.licenses = ['Apache License (2.0)']
s.summary = "Parses XML into fields"
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
Expand Down
59 changes: 59 additions & 0 deletions spec/filters/xml_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -418,4 +418,63 @@
end
end
end

describe "parsing invalid xml" do
subject { described_class.new(options) }
let(:options) { ({ 'source' => 'xmldata', 'store_xml' => false }) }
let(:xmldata) { "<xml> <sample attr='foo' attr=\"bar\"> <invalid> </sample> </xml>" }
let(:event) { LogStash::Event.new(data) }
let(:data) { { "xmldata" => xmldata } }

before { subject.register }
after { subject.close }

it 'does not fail (by default)' do
subject.filter(event)
expect( event.get("tags") ).to be nil
end

context 'strict option' do
let(:options) { super.merge({ 'parse_options' => 'strict' }) }

it 'does fail parsing' do
subject.filter(event)
expect( event.get("tags") ).to_not be nil
expect( event.get("tags") ).to include '_xmlparsefailure'
end
end
end

describe "parse_options" do
subject { described_class.new(options) }
let(:options) { ({ 'source' => 'xmldata', 'store_xml' => false, 'parse_options' => parse_options }) }

context 'strict (supported option)' do
let(:parse_options) { 'strict' }

it 'registers filter' do
subject.register
expect( subject.send(:xml_parse_options) ).
to eql Nokogiri::XML::ParseOptions::STRICT
end
end

context 'valid' do
let(:parse_options) { 'no_error,NOWARNING' }

it 'registers filter' do
subject.register
expect( subject.send(:xml_parse_options) ).
to eql Nokogiri::XML::ParseOptions::NOERROR | Nokogiri::XML::ParseOptions::NOWARNING
end
end

context 'invalid' do
let(:parse_options) { 'strict,invalid0' }

it 'fails to register' do
expect { subject.register }.to raise_error(LogStash::ConfigurationError, 'unsupported parse option: "invalid0"')
end
end
end
end

0 comments on commit 14863ea

Please sign in to comment.