Skip to content
This repository has been archived by the owner on May 25, 2022. It is now read-only.

URI parser operator #12

Merged
merged 2 commits into from
Feb 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 180 additions & 0 deletions docs/operators/uri_parser.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
## `uri_parser` operator

The `uri_parser` operator parses the string-type field selected by `parse_from` as [URI](https://tools.ietf.org/html/rfc3986).

`uri_parser` can handle:
- Absolute URI
- `https://google.com/v1/app?user_id=2&uuid=57b4dad2-063c-4965-941c-adfd4098face`
- Relative URI
- `/app?user=admin`
- Query string
- `?request=681e6fc4-3314-4ccc-933e-4f9c9f0efd24&env=stage&env=dev`
- Query string must start with a question mark

### Configuration Fields

| Field | Default | Description |
| --- | --- | --- |
| `id` | `uri_parser` | A unique identifier for the operator |
| `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries |
| `parse_from` | $ | A [field](/docs/types/field.md) that indicates the field to be parsed as JSON |
| `parse_to` | $ | A [field](/docs/types/field.md) that indicates the field to be parsed as JSON |
| `preserve_to` | | Preserves the unparsed value at the specified [field](/docs/types/field.md) |
| `on_error` | `send` | The behavior of the operator if it encounters an error. See [on_error](/docs/types/on_error.md) |
| `if` | | An [expression](/docs/types/expression.md) that, when set, will be evaluated to determine whether this operator should be used for the given entry. This allows you to do easy conditional parsing without branching logic with routers. |


### Output Fields

The following fields are returned. Empty fields are not returned.

| Field | Type | Example | Description |
| --- | --- | --- | --- |
| scheme | `string` | `"http"` | [URI Scheme](https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml). HTTP, HTTPS, FTP, etc. |
| user | `string` | `"dev"` | [Userinfo](https://tools.ietf.org/html/rfc3986#section-3.2) username. Password is always ignored. |
| host | `string` | `"golang.org"` | The [hostname](https://tools.ietf.org/html/rfc3986#section-3.2.2) such as `www.example.com`, `example.com`, `example`. A scheme is required in order to parse the `host` field. |
| port | `string` | `"8443"` | The [port](https://tools.ietf.org/html/rfc3986#section-3.2.3) the request is sent to. A scheme is required in order to parse the `port` field. |
| path | `string` | `"/v1/app"` | URI request [path](https://tools.ietf.org/html/rfc3986#section-3.3). |
| query | `map[string][]string` | `"query":{"user":["admin"]}` | Parsed URI [query string](https://tools.ietf.org/html/rfc3986#section-3.4). |


### Example Configurations


#### Parse the field `message` as absolute URI

Configuration:
```yaml
- type: uri_parser
parse_from: message
```

<table>
<tr><td> Input record </td> <td> Output record </td></tr>
<tr>
<td>

```json
{
"timestamp": "",
"record": {
"message": "https://dev:[email protected]/app?user_id=2&token=001"
}
}
```

</td>
<td>

```json
{
"timestamp": "",
"record": {
"host": "google.com",
"path": "/app",
"query": {
"user_id": [
"2"
],
"token": [
"001"
]
},
"scheme": "https",
"user": "dev"
}
}
```

</td>
</tr>
</table>

#### Parse the field `message` as relative URI

Configuration:
```yaml
- type: uri_parser
parse_from: message
```

<table>
<tr><td> Input record </td> <td> Output record </td></tr>
<tr>
<td>

```json
{
"timestamp": "",
"record": {
"message": "/app?user=admin"
}
}
```

</td>
<td>

```json
{
"timestamp": "",
"record": {
"path": "/app",
"query": {
"user": [
"admin"
]
}
}
}
```

</td>
</tr>
</table>

#### Parse the field `query` as URI query string

Configuration:
```yaml
- type: uri_parser
parse_from: query
```

<table>
<tr><td> Input record </td> <td> Output record </td></tr>
<tr>
<td>

```json
{
"timestamp": "",
"record": {
"query": "?request=681e6fc4-3314-4ccc-933e-4f9c9f0efd24&env=stage&env=dev"
}
}
```

</td>
<td>

```json
{
"timestamp": "",
"record": {
"query": {
"env": [
"stage",
"dev"
],
"request": [
"681e6fc4-3314-4ccc-933e-4f9c9f0efd24"
]
}
}
}
```

</td>
</tr>
</table>
166 changes: 166 additions & 0 deletions operator/builtin/parser/uri/uri.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package uri

import (
"context"
"fmt"
"net/url"
"strings"

"github.com/open-telemetry/opentelemetry-log-collection/entry"
"github.com/open-telemetry/opentelemetry-log-collection/operator"
"github.com/open-telemetry/opentelemetry-log-collection/operator/helper"
)

func init() {
operator.Register("uri_parser", func() operator.Builder { return NewURIParserConfig("") })
}

// NewURIParserConfig creates a new uri parser config with default values.
func NewURIParserConfig(operatorID string) *URIParserConfig {
return &URIParserConfig{
ParserConfig: helper.NewParserConfig(operatorID, "uri_parser"),
}
}

// URIParserConfig is the configuration of a uri parser operator.
type URIParserConfig struct {
helper.ParserConfig `yaml:",inline"`
}

// Build will build a uri parser operator.
func (c URIParserConfig) Build(context operator.BuildContext) ([]operator.Operator, error) {
parserOperator, err := c.ParserConfig.Build(context)
if err != nil {
return nil, err
}

uriParser := &URIParser{
ParserOperator: parserOperator,
}

return []operator.Operator{uriParser}, nil
}

// URIParser is an operator that parses a uri.
type URIParser struct {
helper.ParserOperator
}

// Process will parse an entry.
func (u *URIParser) Process(ctx context.Context, entry *entry.Entry) error {
return u.ParserOperator.ProcessWith(ctx, entry, u.parse)
}

// parse will parse a uri from a field and attach it to an entry.
func (u *URIParser) parse(value interface{}) (interface{}, error) {
switch m := value.(type) {
case string:
return parseURI(m)
case []byte:
return parseURI(string(m))
default:
return nil, fmt.Errorf("type '%T' cannot be parsed as URI", value)
}
}

// parseURI takes an absolute or relative uri and returns the parsed values.
func parseURI(value string) (map[string]interface{}, error) {
m := make(map[string]interface{})

if strings.HasPrefix(value, "?") {
// remove the query string '?' prefix before parsing
v, err := url.ParseQuery(value[1:])
if err != nil {
return nil, err
}
return queryToMap(v, m), nil
}

x, err := url.ParseRequestURI(value)
if err != nil {
return nil, err
}
return urlToMap(x, m), nil
}

// urlToMap converts a url.URL to a map, excludes any values that are not set.
func urlToMap(p *url.URL, m map[string]interface{}) map[string]interface{} {
scheme := p.Scheme
if scheme != "" {
m["scheme"] = scheme
}

user := p.User.Username()
if user != "" {
m["user"] = user
}

host := p.Hostname()
if host != "" {
m["host"] = host
}

port := p.Port()
if port != "" {
m["port"] = port
}

path := p.EscapedPath()
if path != "" {
m["path"] = path
}

return queryToMap(p.Query(), m)
}

// queryToMap converts a query string url.Values to a map.
func queryToMap(query url.Values, m map[string]interface{}) map[string]interface{} {
// no-op if query is empty, do not create the key m["query"]
if len(query) <= 0 {
return m
}

/* 'parameter' will represent url.Values
map[string]interface{}{
"parameter-a": []interface{}{
"a",
"b",
},
"parameter-b": []interface{}{
"x",
"y",
},
}
*/
parameters := map[string]interface{}{}
for param, values := range query {
parameters[param] = queryParamValuesToMap(values)
}
m["query"] = parameters
return m
}


// queryParamValuesToMap takes query string parameter values and
// returns an []interface populated with the values
func queryParamValuesToMap(values []string) []interface{} {
v := make([]interface{}, len(values))
for i, value := range values {
v[i] = value
}
return v
}
Loading