scala-unity-eif-receiver-logstash.conf

#
# Logstash configuration to parse logs from a distributed EIF Receiver deployment in a SCALA environment.  
# Typical log source: /opt/scala/LogAnalysis/DataForwarders/EIFReceivers/logs/UnityEifReceiver_eif_inst_1.log
#
# Doug McClure
# v1.0 8/13/14
#
input {
  lumberjack {
    port => 5043
    
	#these are the local server certs - they must also be on each logstash-forwarder endpoint and referenced in those configs

	ssl_certificate => "/etc/pki/tls/certs/logstash-forwarder.crt"
    ssl_key => "/etc/pki/tls/private/logstash-forwarder.key"
    
	type => "scala-eifr-logs"
	
	#Ideally the multiline codex should be used here to fix multilines 
	#-- bug in multiline codec for logstash-forwarder/lumberjack per Jordan - use filter for now
	#codec => multiline {
    #  #stack trace message for connection timeout - rows beginning with tab (\t at)
	#  #fix the wrapping of message lines for the batch size wrapping to new line
	#  pattern => "(^\d+)|(^NONNEGINT)"
	#  what => "previous"
	#  multiline_tag => "fixed-multiline"
    #} #end codec
  
  } #end lumberjack

} #end input

filter {

# multilines or places where new lines exist
# stack trace message for connection timeout - rows beginning with tab (\t at)
# fix the wrapping of message lines for the batch size wrapping to new line
# use multiline filter due to its support for 'stream_identity' (host:path:type) so all streams are processed properly

	multiline {
		pattern => "(^\d{1,20}$)|(^Service.*)|(^updated.*)|(^\t)"
		what => "previous"
	} #end multiline

	#grok into a basic pattern
	
	if "multiline" not in [tags] {
		grep {
			drop => false
			match => { "message" => "BATCH_STATUS" }
			add_tag => "batchstatus"
		} #end grep
	} #end conditional
	
	if "multiline" or "batchstatus" in [tags] {
		grok {
			match => [ "message", "(?m)%{DATESTAMP:timestamp} %{TZ} \[%{DATA:Pool-Thread}\] %{DATA:LogLevel}  - %{DATA:Function} : %{GREEDYDATA:OrigMsg}" ]
			add_tag => "grok-1"
		} #end grok
	} #end conditional
	
	#whack LF (\n) from message field
	
	if "grok-1" in [tags] {
		mutate {
			gsub => [ "OrigMsg", "\n", " "]
			gsub => [ "message", "\n", " "]
			add_tag => "whack LF"
		} #end mutate
	} #end conditional
	
	#explode JSON batch message
	
	if "whack LF" in [tags] and [OrigMsg] =~ /^\{/ 
	{
		json {
			source => "OrigMsg"
			add_tag => "json exploded"
		
			#add fields to send to scala
		
			add_field => ["scalaFields", "EIFR-batchWriteTime"]
			add_field => ["scalaFields", "EIFR-indexedSourceVolume"]
			add_field => ["scalaFields", "EIFR-indexNumSuccessful"]
			add_field => ["scalaFields", "EIFR-indexNumFailures"]
			add_field => ["scalaFields", "EIFR-batchSize"]
			add_field => ["scalaFields", "EIFR-numSuccessful"]
			add_field => ["scalaFields", "EIFR-numFailures"]
						
		} #end json
		
	} #end conditional
	
	if "json exploded" in [tags] {
		mutate {
			#set fields with values
		
			replace => [ "EIFR-batchWriteTime", "%{[BATCH_STATUS][writeTime]}" ]
			replace => [ "EIFR-indexedSourceVolume", "%{[BATCH_STATUS][indexedSourceVolume]}" ]
			replace => [ "EIFR-indexNumSuccessful", "%{[BATCH_STATUS][indexNumSuccessful]}" ]
			replace => [ "EIFR-indexNumFailures", "%{[BATCH_STATUS][indexNumFailures]}" ]
			replace => [ "EIFR-batchSize", "%{[BATCH_STATUS][batchSize]}" ]
			replace => [ "EIFR-numSuccessful", "%{[BATCH_STATUS][numSuccessful]}" ]
			replace => [ "EIFR-numFailures", "%{[BATCH_STATUS][numFailures]}" ]
			
			#convert to integers for es/kibana - still have to set them for DSV
			
			convert => [ "EIFR-indexedSourceVolume", "integer" ]
			convert => [ "EIFR-indexNumSuccessful", "integer" ]
			convert => [ "EIFR-indexNumFailures", "integer" ]
			convert => [ "EIFR-batchSize", "integer" ]
			convert => [ "EIFR-numSuccessful", "integer" ]
			convert => [ "EIFR-numFailures", "integer" ]
			
			add_tag => "scala fields done"
			
		} #end mutate
	}
	
	#grok out metrics
	
	if "whack LF" in [tags] and "batchstatus" not in [tags] {
		grok {
			match => [ "OrigMsg", "PostDataJson of size: %{INT:PostQueuePostDataJsonSize}" ]
		} #end grok
		grok {
			match => [ "OrigMsg", "updated Service Queue -- size:%{INT:ServiceQueueSize}" ]
		} #end grok
		grok {
			match => [ "OrigMsg", "Posting Post Data Json of size: %{INT:PostDataSize}" ]
		} #end grok
		grok {
			match => [ "OrigMsg", "Service EIF remove:%{INT:ServiceEIFRemove}" ]
		} #end grok
	} #end conditional
		
} #end filter

output {

	#send to elasticsearch so we can visualize in kibana
	
	elasticsearch {
		embedded => true
	} #end es output

	#create a CSV file so we can visualize in a spreadsheet
	
#	csv {
#		fields => [ "[BATCH_STATUS][writeTime]","[BATCH_STATUS][indexedSourceVolume]","[BATCH_STATUS][indexNumSuccessful]","[BATCH_STATUS][indexNumFailures]","[BATCH_STATUS][batchSize]","[BATCH_STATUS][numSuccessful]","[BATCH_STATUS][numFailures]" ]
#		csv_options => { "force_quotes" => "true" }
#		path => "/var/log/EIFR-Perf-csv-output.log"
#	} #end CSV
	
} #end output