This repository has been archived by the owner on May 25, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 41
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement operator for parsing absolute uri, relative uri, and query …
…string (#12)
- Loading branch information
Joseph Sirianni
authored
Feb 5, 2021
1 parent
bbb4f97
commit 4622fc6
Showing
3 changed files
with
956 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
## `uri_parser` operator | ||
|
||
The `uri_parser` operator parses the string-type field selected by `parse_from` as [URI](https://tools.ietf.org/html/rfc3986). | ||
|
||
`uri_parser` can handle: | ||
- Absolute URI | ||
- `https://google.com/v1/app?user_id=2&uuid=57b4dad2-063c-4965-941c-adfd4098face` | ||
- Relative URI | ||
- `/app?user=admin` | ||
- Query string | ||
- `?request=681e6fc4-3314-4ccc-933e-4f9c9f0efd24&env=stage&env=dev` | ||
- Query string must start with a question mark | ||
|
||
### Configuration Fields | ||
|
||
| Field | Default | Description | | ||
| --- | --- | --- | | ||
| `id` | `uri_parser` | A unique identifier for the operator | | ||
| `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries | | ||
| `parse_from` | $ | A [field](/docs/types/field.md) that indicates the field to be parsed as JSON | | ||
| `parse_to` | $ | A [field](/docs/types/field.md) that indicates the field to be parsed as JSON | | ||
| `preserve_to` | | Preserves the unparsed value at the specified [field](/docs/types/field.md) | | ||
| `on_error` | `send` | The behavior of the operator if it encounters an error. See [on_error](/docs/types/on_error.md) | | ||
| `if` | | An [expression](/docs/types/expression.md) that, when set, will be evaluated to determine whether this operator should be used for the given entry. This allows you to do easy conditional parsing without branching logic with routers. | | ||
|
||
|
||
### Output Fields | ||
|
||
The following fields are returned. Empty fields are not returned. | ||
|
||
| Field | Type | Example | Description | | ||
| --- | --- | --- | --- | | ||
| scheme | `string` | `"http"` | [URI Scheme](https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml). HTTP, HTTPS, FTP, etc. | | ||
| user | `string` | `"dev"` | [Userinfo](https://tools.ietf.org/html/rfc3986#section-3.2) username. Password is always ignored. | | ||
| host | `string` | `"golang.org"` | The [hostname](https://tools.ietf.org/html/rfc3986#section-3.2.2) such as `www.example.com`, `example.com`, `example`. A scheme is required in order to parse the `host` field. | | ||
| port | `string` | `"8443"` | The [port](https://tools.ietf.org/html/rfc3986#section-3.2.3) the request is sent to. A scheme is required in order to parse the `port` field. | | ||
| path | `string` | `"/v1/app"` | URI request [path](https://tools.ietf.org/html/rfc3986#section-3.3). | | ||
| query | `map[string][]string` | `"query":{"user":["admin"]}` | Parsed URI [query string](https://tools.ietf.org/html/rfc3986#section-3.4). | | ||
|
||
|
||
### Example Configurations | ||
|
||
|
||
#### Parse the field `message` as absolute URI | ||
|
||
Configuration: | ||
```yaml | ||
- type: uri_parser | ||
parse_from: message | ||
``` | ||
<table> | ||
<tr><td> Input record </td> <td> Output record </td></tr> | ||
<tr> | ||
<td> | ||
```json | ||
{ | ||
"timestamp": "", | ||
"record": { | ||
"message": "https://dev:[email protected]/app?user_id=2&token=001" | ||
} | ||
} | ||
``` | ||
|
||
</td> | ||
<td> | ||
|
||
```json | ||
{ | ||
"timestamp": "", | ||
"record": { | ||
"host": "google.com", | ||
"path": "/app", | ||
"query": { | ||
"user_id": [ | ||
"2" | ||
], | ||
"token": [ | ||
"001" | ||
] | ||
}, | ||
"scheme": "https", | ||
"user": "dev" | ||
} | ||
} | ||
``` | ||
|
||
</td> | ||
</tr> | ||
</table> | ||
|
||
#### Parse the field `message` as relative URI | ||
|
||
Configuration: | ||
```yaml | ||
- type: uri_parser | ||
parse_from: message | ||
``` | ||
<table> | ||
<tr><td> Input record </td> <td> Output record </td></tr> | ||
<tr> | ||
<td> | ||
```json | ||
{ | ||
"timestamp": "", | ||
"record": { | ||
"message": "/app?user=admin" | ||
} | ||
} | ||
``` | ||
|
||
</td> | ||
<td> | ||
|
||
```json | ||
{ | ||
"timestamp": "", | ||
"record": { | ||
"path": "/app", | ||
"query": { | ||
"user": [ | ||
"admin" | ||
] | ||
} | ||
} | ||
} | ||
``` | ||
|
||
</td> | ||
</tr> | ||
</table> | ||
|
||
#### Parse the field `query` as URI query string | ||
|
||
Configuration: | ||
```yaml | ||
- type: uri_parser | ||
parse_from: query | ||
``` | ||
<table> | ||
<tr><td> Input record </td> <td> Output record </td></tr> | ||
<tr> | ||
<td> | ||
```json | ||
{ | ||
"timestamp": "", | ||
"record": { | ||
"query": "?request=681e6fc4-3314-4ccc-933e-4f9c9f0efd24&env=stage&env=dev" | ||
} | ||
} | ||
``` | ||
|
||
</td> | ||
<td> | ||
|
||
```json | ||
{ | ||
"timestamp": "", | ||
"record": { | ||
"query": { | ||
"env": [ | ||
"stage", | ||
"dev" | ||
], | ||
"request": [ | ||
"681e6fc4-3314-4ccc-933e-4f9c9f0efd24" | ||
] | ||
} | ||
} | ||
} | ||
``` | ||
|
||
</td> | ||
</tr> | ||
</table> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
// Copyright The OpenTelemetry Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package uri | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"net/url" | ||
"strings" | ||
|
||
"github.com/open-telemetry/opentelemetry-log-collection/entry" | ||
"github.com/open-telemetry/opentelemetry-log-collection/operator" | ||
"github.com/open-telemetry/opentelemetry-log-collection/operator/helper" | ||
) | ||
|
||
func init() { | ||
operator.Register("uri_parser", func() operator.Builder { return NewURIParserConfig("") }) | ||
} | ||
|
||
// NewURIParserConfig creates a new uri parser config with default values. | ||
func NewURIParserConfig(operatorID string) *URIParserConfig { | ||
return &URIParserConfig{ | ||
ParserConfig: helper.NewParserConfig(operatorID, "uri_parser"), | ||
} | ||
} | ||
|
||
// URIParserConfig is the configuration of a uri parser operator. | ||
type URIParserConfig struct { | ||
helper.ParserConfig `yaml:",inline"` | ||
} | ||
|
||
// Build will build a uri parser operator. | ||
func (c URIParserConfig) Build(context operator.BuildContext) ([]operator.Operator, error) { | ||
parserOperator, err := c.ParserConfig.Build(context) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
uriParser := &URIParser{ | ||
ParserOperator: parserOperator, | ||
} | ||
|
||
return []operator.Operator{uriParser}, nil | ||
} | ||
|
||
// URIParser is an operator that parses a uri. | ||
type URIParser struct { | ||
helper.ParserOperator | ||
} | ||
|
||
// Process will parse an entry. | ||
func (u *URIParser) Process(ctx context.Context, entry *entry.Entry) error { | ||
return u.ParserOperator.ProcessWith(ctx, entry, u.parse) | ||
} | ||
|
||
// parse will parse a uri from a field and attach it to an entry. | ||
func (u *URIParser) parse(value interface{}) (interface{}, error) { | ||
switch m := value.(type) { | ||
case string: | ||
return parseURI(m) | ||
case []byte: | ||
return parseURI(string(m)) | ||
default: | ||
return nil, fmt.Errorf("type '%T' cannot be parsed as URI", value) | ||
} | ||
} | ||
|
||
// parseURI takes an absolute or relative uri and returns the parsed values. | ||
func parseURI(value string) (map[string]interface{}, error) { | ||
m := make(map[string]interface{}) | ||
|
||
if strings.HasPrefix(value, "?") { | ||
// remove the query string '?' prefix before parsing | ||
v, err := url.ParseQuery(value[1:]) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return queryToMap(v, m), nil | ||
} | ||
|
||
x, err := url.ParseRequestURI(value) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return urlToMap(x, m), nil | ||
} | ||
|
||
// urlToMap converts a url.URL to a map, excludes any values that are not set. | ||
func urlToMap(p *url.URL, m map[string]interface{}) map[string]interface{} { | ||
scheme := p.Scheme | ||
if scheme != "" { | ||
m["scheme"] = scheme | ||
} | ||
|
||
user := p.User.Username() | ||
if user != "" { | ||
m["user"] = user | ||
} | ||
|
||
host := p.Hostname() | ||
if host != "" { | ||
m["host"] = host | ||
} | ||
|
||
port := p.Port() | ||
if port != "" { | ||
m["port"] = port | ||
} | ||
|
||
path := p.EscapedPath() | ||
if path != "" { | ||
m["path"] = path | ||
} | ||
|
||
return queryToMap(p.Query(), m) | ||
} | ||
|
||
// queryToMap converts a query string url.Values to a map. | ||
func queryToMap(query url.Values, m map[string]interface{}) map[string]interface{} { | ||
// no-op if query is empty, do not create the key m["query"] | ||
if len(query) <= 0 { | ||
return m | ||
} | ||
|
||
/* 'parameter' will represent url.Values | ||
map[string]interface{}{ | ||
"parameter-a": []interface{}{ | ||
"a", | ||
"b", | ||
}, | ||
"parameter-b": []interface{}{ | ||
"x", | ||
"y", | ||
}, | ||
} | ||
*/ | ||
parameters := map[string]interface{}{} | ||
for param, values := range query { | ||
parameters[param] = queryParamValuesToMap(values) | ||
} | ||
m["query"] = parameters | ||
return m | ||
} | ||
|
||
|
||
// queryParamValuesToMap takes query string parameter values and | ||
// returns an []interface populated with the values | ||
func queryParamValuesToMap(values []string) []interface{} { | ||
v := make([]interface{}, len(values)) | ||
for i, value := range values { | ||
v[i] = value | ||
} | ||
return v | ||
} |
Oops, something went wrong.