Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce memory usage of http parser #6680

Merged
merged 5 commits into from
Apr 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ https://github.com/elastic/beats/compare/v6.0.0-beta2...master[Check the HEAD di
- Add support for condition on bool type {issue}5659[5659] {pull}5954[5954]
- HTTP parses successfully on empty status phrase. {issue}6176[6176]
- HTTP parser supports broken status line. {pull}6631[6631]
- Fix high memory usage on HTTP body if body is not published. {pull}6680[6680]

*Winlogbeat*

Expand Down
117 changes: 39 additions & 78 deletions packetbeat/protos/http/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ type parserState uint8

const (
stateStart parserState = iota
stateFLine
stateHeaders
stateBody
stateBodyChunkedStart
Expand Down Expand Up @@ -67,7 +66,6 @@ type httpPlugin struct {
splitCookie bool
hideKeywords []string
redactAuthorization bool
includeBodyFor []string
maxMessageSize int

parserConfig parserConfig
Expand Down Expand Up @@ -124,7 +122,7 @@ func (http *httpPlugin) setFromConfig(config *httpConfig) {
http.splitCookie = config.SplitCookie
http.parserConfig.realIPHeader = strings.ToLower(config.RealIPHeader)
http.transactionTimeout = config.TransactionTimeout
http.includeBodyFor = config.IncludeBodyFor
http.parserConfig.includeBodyFor = config.IncludeBodyFor
http.maxMessageSize = config.MaxMessageSize

if config.SendAllHeaders {
Expand Down Expand Up @@ -168,13 +166,11 @@ func (http *httpPlugin) messageGap(s *stream, nbytes int) (ok bool, complete boo
}
if !m.hasContentLength && (bytes.Equal(m.connection, constClose) ||
(isVersion(m.version, 1, 0) && !bytes.Equal(m.connection, constKeepAlive))) {

s.bodyReceived += nbytes
m.contentLength += nbytes
return true, false
} else if len(s.data[s.parseOffset:])+nbytes >= m.contentLength-s.bodyReceived {
} else if len(s.data)+nbytes >= m.contentLength-s.bodyReceived {
// we're done, but the last portion of the data is gone
m.end = s.parseOffset
return true, true
} else {
s.bodyReceived += nbytes
Expand All @@ -186,7 +182,6 @@ func (http *httpPlugin) messageGap(s *stream, nbytes int) (ok bool, complete boo
}

func (st *stream) PrepareForNewMessage() {
st.data = st.data[st.message.end:]
st.parseState = stateStart
st.parseOffset = 0
st.bodyReceived = 0
Expand All @@ -201,8 +196,6 @@ func (http *httpPlugin) messageComplete(
dir uint8,
st *stream,
) {
st.message.raw = st.data[st.message.start:st.message.end]

http.handleHTTP(conn, st.message, tcptuple, dir)
}

Expand Down Expand Up @@ -274,7 +267,12 @@ func (http *httpPlugin) doParse(
conn.streams[dir] = st
} else {
// concatenate bytes
if len(st.data)+len(pkt.Payload) > http.maxMessageSize {
totalLength := len(st.data) + len(pkt.Payload)
msg := st.message
if msg != nil {
totalLength += len(msg.body)
}
if totalLength > http.maxMessageSize {
if isDebug {
debugf("Stream data too large, ignoring message")
}
Expand All @@ -284,13 +282,14 @@ func (http *httpPlugin) doParse(
}
}

for len(st.data) > 0 {
for len(st.data) > 0 || extraMsgSize > 0 {
if st.message == nil {
st.message = &message{ts: pkt.Ts}
}

parser := newParser(&http.parserConfig)
ok, complete := parser.parse(st, extraMsgSize)
extraMsgSize = 0
if !ok {
// drop this tcp stream. Will retry parsing with the next
// segment in it
Expand Down Expand Up @@ -338,8 +337,7 @@ func (http *httpPlugin) ReceivedFin(tcptuple *common.TCPTuple, dir uint8,

// send whatever data we got so far as complete. This
// is needed for the HTTP/1.0 without Content-Length situation.
if stream.message != nil && len(stream.data[stream.message.start:]) > 0 {
stream.message.raw = stream.data[stream.message.start:]
if stream.message != nil {
http.handleHTTP(conn, stream.message, tcptuple, dir)

// and reset message. Probably not needed, just to be sure.
Expand Down Expand Up @@ -445,7 +443,7 @@ func (http *httpPlugin) newTransaction(requ, resp *message) beat.Event {
// resp_time in milliseconds
responseTime := int32(resp.ts.Sub(requ.ts).Nanoseconds() / 1e6)

path, params, err := http.extractParameters(requ, requ.raw)
path, params, err := http.extractParameters(requ)
if err != nil {
logp.Warn("Fail to parse HTTP parameters: %v", err)
}
Expand Down Expand Up @@ -495,10 +493,10 @@ func (http *httpPlugin) newTransaction(requ, resp *message) beat.Event {
}

if http.sendRequest {
fields["request"] = string(http.cutMessageBody(requ))
fields["request"] = http.makeRawMessage(requ)
}
if http.sendResponse {
fields["response"] = string(http.cutMessageBody(resp))
fields["response"] = http.makeRawMessage(resp)
}

if len(requ.notes)+len(resp.notes) > 0 {
Expand All @@ -514,6 +512,16 @@ func (http *httpPlugin) newTransaction(requ, resp *message) beat.Event {
}
}

func (http *httpPlugin) makeRawMessage(m *message) string {
var result []byte
result = append(result, m.rawHeaders...)
if m.sendBody {
result = append(result, m.body...)
}
// TODO: (go1.10) Use strings.Builder to avoid allocation/copying
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a followup github issue?

return string(result)
}

func (http *httpPlugin) publishTransaction(event beat.Event) {
if http.results == nil {
return
Expand Down Expand Up @@ -554,9 +562,8 @@ func (http *httpPlugin) collectHeaders(m *message) interface{} {
}

func (http *httpPlugin) setBody(result common.MapStr, m *message) {
body := string(http.extractBody(m))
if len(body) > 0 {
result["body"] = body
if m.sendBody && len(m.body) > 0 {
result["body"] = string(m.body)
}
}

Expand All @@ -583,82 +590,38 @@ func parseCookieValue(raw string) string {
return raw
}

func (http *httpPlugin) extractBody(m *message) []byte {
body := []byte{}

if len(m.contentType) > 0 && http.shouldIncludeInBody(m.contentType) {
if len(m.chunkedBody) > 0 {
body = append(body, m.chunkedBody...)
} else {
if isDebug {
debugf("Body to include: [%s]", m.raw[m.bodyOffset:])
}
body = append(body, m.raw[m.bodyOffset:]...)
}
}

return body
}

func (http *httpPlugin) cutMessageBody(m *message) []byte {
cutMsg := []byte{}

// add headers always
cutMsg = m.raw[:m.bodyOffset]

// add body
return append(cutMsg, http.extractBody(m)...)
}

func (http *httpPlugin) shouldIncludeInBody(contenttype []byte) bool {
includedBodies := http.includeBodyFor
for _, include := range includedBodies {
if bytes.Contains(contenttype, []byte(include)) {
if isDebug {
debugf("Should Include Body = true Content-Type %s include_body %s",
contenttype, include)
}
return true
}
if isDebug {
debugf("Should Include Body = false Content-Type %s include_body %s",
contenttype, include)
}
}
return false
}

func (http *httpPlugin) hideHeaders(m *message) {
if !m.isRequest || !http.redactAuthorization {
return
}

msg := m.raw
msg := m.rawHeaders
limit := len(msg)

// byte64 != encryption, so obscure it in headers in case of Basic Authentication

redactHeaders := []string{"authorization", "proxy-authorization"}
authText := []byte("uthorization:") // [aA] case insensitive, also catches Proxy-Authorization:

authHeaderStartX := m.headerOffset
authHeaderEndX := m.bodyOffset
authHeaderEndX := limit

for authHeaderStartX < m.bodyOffset {
for authHeaderStartX < limit {
if isDebug {
debugf("looking for authorization from %d to %d",
authHeaderStartX, authHeaderEndX)
}

startOfHeader := bytes.Index(msg[authHeaderStartX:m.bodyOffset], authText)
startOfHeader := bytes.Index(msg[authHeaderStartX:], authText)
if startOfHeader >= 0 {
authHeaderStartX = authHeaderStartX + startOfHeader

endOfHeader := bytes.Index(msg[authHeaderStartX:m.bodyOffset], []byte("\r\n"))
endOfHeader := bytes.Index(msg[authHeaderStartX:], constCRLF)
if endOfHeader >= 0 {
authHeaderEndX = authHeaderStartX + endOfHeader

if authHeaderEndX > m.bodyOffset {
authHeaderEndX = m.bodyOffset
if authHeaderEndX > limit {
authHeaderEndX = limit
}

if isDebug {
Expand All @@ -670,17 +633,15 @@ func (http *httpPlugin) hideHeaders(m *message) {
}
}
}
authHeaderStartX = authHeaderEndX + len("\r\n")
authHeaderEndX = m.bodyOffset
authHeaderStartX = authHeaderEndX + len(constCRLF)
authHeaderEndX = len(m.rawHeaders)
}

for _, header := range redactHeaders {
if len(m.headers[header]) > 0 {
m.headers[header] = []byte("*")
}
}

m.raw = msg
}

func (http *httpPlugin) hideSecrets(values url.Values) url.Values {
Expand All @@ -700,7 +661,7 @@ func (http *httpPlugin) hideSecrets(values url.Values) url.Values {
// extractParameters parses the URL and the form parameters and replaces the secrets
// with the string xxxxx. The parameters containing secrets are defined in http.Hide_secrets.
// Returns the Request URI path and the (adjusted) parameters.
func (http *httpPlugin) extractParameters(m *message, msg []byte) (path string, params string, err error) {
func (http *httpPlugin) extractParameters(m *message) (path string, params string, err error) {
var values url.Values

u, err := url.Parse(string(m.requestURI))
Expand All @@ -712,9 +673,9 @@ func (http *httpPlugin) extractParameters(m *message, msg []byte) (path string,

paramsMap := http.hideSecrets(values)

if m.contentLength > 0 && bytes.Contains(m.contentType, []byte("urlencoded")) {
if m.contentLength > 0 && m.saveBody && bytes.Contains(m.contentType, []byte("urlencoded")) {

values, err = url.ParseQuery(string(msg[m.bodyOffset:]))
values, err = url.ParseQuery(string(m.body))
if err != nil {
return
}
Expand Down
Loading