Skip to content

Commit

Permalink
uri parser implementation. parse absolute and relative uri, as well a…
Browse files Browse the repository at this point in the history
…s query strings.
  • Loading branch information
jsirianni committed Jan 31, 2021
1 parent f03c848 commit 445271a
Show file tree
Hide file tree
Showing 4 changed files with 929 additions and 0 deletions.
1 change: 1 addition & 0 deletions cmd/stanza/init_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
_ "github.com/observiq/stanza/operator/builtin/parser/severity"
_ "github.com/observiq/stanza/operator/builtin/parser/syslog"
_ "github.com/observiq/stanza/operator/builtin/parser/time"
_ "github.com/observiq/stanza/operator/builtin/parser/uri"

_ "github.com/observiq/stanza/operator/builtin/transformer/filter"
_ "github.com/observiq/stanza/operator/builtin/transformer/recombine"
Expand Down
180 changes: 180 additions & 0 deletions docs/operators/uri_parser.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
## `uri_parser` operator

The `uri_parser` operator parses the string-type field selected by `parse_from` as [URI](https://tools.ietf.org/html/rfc3986).

`uri_parser` can handle:
- Absolute URI
- `https://google.com/v1/app?user_id=2&uuid=57b4dad2-063c-4965-941c-adfd4098face`
- Relative URI
- `/app?user=admin`
- Query string
- `?request=681e6fc4-3314-4ccc-933e-4f9c9f0efd24&env=stage&env=dev`
- Query string must start with a question mark

### Configuration Fields

| Field | Default | Description |
| --- | --- | --- |
| `id` | `uri_parser` | A unique identifier for the operator |
| `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries |
| `parse_from` | $ | A [field](/docs/types/field.md) that indicates the field to be parsed as JSON |
| `parse_to` | $ | A [field](/docs/types/field.md) that indicates the field to be parsed as JSON |
| `preserve_to` | | Preserves the unparsed value at the specified [field](/docs/types/field.md) |
| `on_error` | `send` | The behavior of the operator if it encounters an error. See [on_error](/docs/types/on_error.md) |
| `if` | | An [expression](/docs/types/expression.md) that, when set, will be evaluated to determine whether this operator should be used for the given entry. This allows you to do easy conditional parsing without branching logic with routers. |


### Output Fields

The following fields are returned. Empty fields are not returned.

| Field | Type | Example | Description |
| --- | --- | --- | --- |
| scheme | `string` | `"http"` | [URI Scheme](https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml). HTTP, HTTPS, FTP, etc. |
| user | `string` | `"dev"` | [Userinfo](https://tools.ietf.org/html/rfc3986#section-3.2) username. Password is always ignored. |
| host | `string` | `"golang.org"` | The [hostname](https://tools.ietf.org/html/rfc3986#section-3.2.2) such as `www.example.com`, `example.com`, `example`. A scheme is required in order to parse the `host` field. |
| port | `string` | `"8443"` | The [port](https://tools.ietf.org/html/rfc3986#section-3.2.3) the request is sent to. A scheme is required in order to parse the `port` field. |
| path | `string` | `"/v1/app"` | URI request [path](https://tools.ietf.org/html/rfc3986#section-3.3). |
| query | `map[string][]string` | `"query":{"user":["admin"]}` | Parsed URI [query string](https://tools.ietf.org/html/rfc3986#section-3.4). |


### Example Configurations


#### Parse the field `message` as absolute URI

Configuration:
```yaml
- type: uri_parser
parse_from: message
```
<table>
<tr><td> Input record </td> <td> Output record </td></tr>
<tr>
<td>
```json
{
"timestamp": "",
"record": {
"message": "https://dev:[email protected]/v1/app?user_id=2&uuid=57b4dad2-063c-4965-941c-adfd4098face"
}
}
```

</td>
<td>

```json
{
"timestamp": "",
"record": {
"host": "google.com",
"path": "/v1/app",
"query": {
"user_id": [
"2"
],
"uuid": [
"57b4dad2-063c-4965-941c-adfd4098face"
]
},
"scheme": "https",
"user": "dev"
}
}
```

</td>
</tr>
</table>

#### Parse the field `message` as relative URI

Configuration:
```yaml
- type: uri_parser
parse_from: message
```
<table>
<tr><td> Input record </td> <td> Output record </td></tr>
<tr>
<td>
```json
{
"timestamp": "",
"record": {
"message": "/app?user=admin"
}
}
```

</td>
<td>

```json
{
"timestamp": "",
"record": {
"path": "/app",
"query": {
"user": [
"admin"
]
}
}
}
```

</td>
</tr>
</table>

#### Parse the field `query` as URI query string

Configuration:
```yaml
- type: uri_parser
parse_from: query
```
<table>
<tr><td> Input record </td> <td> Output record </td></tr>
<tr>
<td>
```json
{
"timestamp": "",
"record": {
"query": "?request=681e6fc4-3314-4ccc-933e-4f9c9f0efd24&env=stage&env=dev"
}
}
```

</td>
<td>

```json
{
"timestamp": "",
"record": {
"query": {
"env": [
"stage",
"dev"
],
"request": [
"681e6fc4-3314-4ccc-933e-4f9c9f0efd24"
]
}
}
}
```

</td>
</tr>
</table>
152 changes: 152 additions & 0 deletions operator/builtin/parser/uri/uri.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
package uri

import (
"context"
"fmt"
"net/url"
"strings"

"github.com/observiq/stanza/entry"
"github.com/observiq/stanza/operator"
"github.com/observiq/stanza/operator/helper"
)

func init() {
operator.Register("uri_parser", func() operator.Builder { return NewURIParserConfig("") })
}

// NewURIParserConfig creates a new uri parser config with default values.
func NewURIParserConfig(operatorID string) *URIParserConfig {
return &URIParserConfig{
ParserConfig: helper.NewParserConfig(operatorID, "uri_parser"),
}
}

// URIParserConfig is the configuration of a uri parser operator.
type URIParserConfig struct {
helper.ParserConfig `yaml:",inline"`
}

// Build will build a uri parser operator.
func (c URIParserConfig) Build(context operator.BuildContext) ([]operator.Operator, error) {
parserOperator, err := c.ParserConfig.Build(context)
if err != nil {
return nil, err
}

uriParser := &URIParser{
ParserOperator: parserOperator,
}

return []operator.Operator{uriParser}, nil
}

// URIParser is an operator that parses a uri.
type URIParser struct {
helper.ParserOperator
}

// Process will parse an entry.
func (u *URIParser) Process(ctx context.Context, entry *entry.Entry) error {
return u.ParserOperator.ProcessWith(ctx, entry, u.parse)
}

// parse will parse a uri from a field and attach it to an entry.
func (u *URIParser) parse(value interface{}) (interface{}, error) {
switch m := value.(type) {
case string:
return parseURI(m)
case []byte:
return parseURI(string(m))
default:
return nil, fmt.Errorf("type '%T' cannot be parsed as URI", value)
}
}

// parseURI takes an absolute or relative uri and returns the parsed values.
func parseURI(value string) (map[string]interface{}, error) {
m := make(map[string]interface{})

if strings.HasPrefix(value, "?") {
// remove the query string '?' prefix before parsing
v, err := url.ParseQuery(value[1:])
if err != nil {
return nil, err
}
return queryToMap(v, m), nil
}

x, err := url.ParseRequestURI(value)
if err != nil {
return nil, err
}
return urlToMap(x, m), nil
}

// urlToMap converts a url.URL to a map, excludes any values that are not set.
func urlToMap(p *url.URL, m map[string]interface{}) map[string]interface{} {
scheme := p.Scheme
if scheme != "" {
m["scheme"] = scheme
}

user := p.User.Username()
if user != "" {
m["user"] = user
}

host := p.Hostname()
if host != "" {
m["host"] = host
}

port := p.Port()
if port != "" {
m["port"] = port
}

path := p.EscapedPath()
if path != "" {
m["path"] = path
}

return queryToMap(p.Query(), m)
}

// queryToMap converts a query string url.Values to a map.
func queryToMap(query url.Values, m map[string]interface{}) map[string]interface{} {
// no-op if query is empty, do not create the key m["query"]
if len(query) <= 0 {
return m
}

/* 'parameter' will represent url.Values
map[string]interface{}{
"parameter-a": []interface{}{
"a",
"b",
},
"parameter-b": []interface{}{
"x",
"y",
},
}
*/
parameters := map[string]interface{}{}
for param, values := range query {
parameters[param] = queryParamValuesToMap(values)
}
m["query"] = parameters
return m
}


// queryParamValuesToMap takes query string parameter values and
// returns an []interface populated with the values
func queryParamValuesToMap(values []string) []interface{} {
v := make([]interface{}, len(values))
for i, value := range values {
v[i] = value
}
return v
}
Loading

0 comments on commit 445271a

Please sign in to comment.