forked from linkedin/goavro
-
Notifications
You must be signed in to change notification settings - Fork 0
/
canonical.go
188 lines (163 loc) · 5.54 KB
/
canonical.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
// Copyright [2019] LinkedIn Corp. Licensed under the Apache License, Version
// 2.0 (the "License"); you may not use this file except in compliance with the
// License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
package goavro
import (
"fmt"
"sort"
"strconv"
"strings"
)
// pcfProcessor is a function type that given a parsed JSON object, returns its
// Parsing Canonical Form according to the Avro specification.
type pcfProcessor func(s interface{}) (string, error)
// parsingCanonialForm returns the "Parsing Canonical Form" (pcf) for a parsed
// JSON structure of a valid Avro schema, or an error describing the schema
// error.
func parsingCanonicalForm(schema interface{}, parentNamespace string, typeLookup map[string]string) (string, error) {
switch val := schema.(type) {
case map[string]interface{}:
// JSON objects are decoded as a map of strings to empty interfaces
return pcfObject(val, parentNamespace, typeLookup)
case []interface{}:
// JSON arrays are decoded as a slice of empty interfaces
return pcfArray(val, parentNamespace, typeLookup)
case string:
// JSON string values are decoded as a Go string
return pcfString(val, typeLookup)
case float64:
// JSON numerical values are decoded as Go float64
return pcfNumber(val)
default:
return "", fmt.Errorf("cannot parse schema with invalid schema type; ought to be map[string]interface{}, []interface{}, string, or float64; received: %T: %v", schema, schema)
}
}
// pcfNumber returns the parsing canonical form for a numerical value.
func pcfNumber(val float64) (string, error) {
return strconv.FormatFloat(val, 'g', -1, 64), nil
}
// pcfString returns the parsing canonical form for a string value.
func pcfString(val string, typeLookup map[string]string) (string, error) {
if canonicalName, ok := typeLookup[val]; ok {
return `"` + canonicalName + `"`, nil
}
return `"` + val + `"`, nil
}
// pcfArray returns the parsing canonical form for a JSON array.
func pcfArray(val []interface{}, parentNamespace string, typeLookup map[string]string) (string, error) {
items := make([]string, len(val))
for i, el := range val {
p, err := parsingCanonicalForm(el, parentNamespace, typeLookup)
if err != nil {
return "", err
}
items[i] = p
}
return "[" + strings.Join(items, ",") + "]", nil
}
// pcfObject returns the parsing canonical form for a JSON object.
func pcfObject(jsonMap map[string]interface{}, parentNamespace string, typeLookup map[string]string) (string, error) {
pairs := make(stringPairs, 0, len(jsonMap))
// Remember the namespace to fully qualify names later
var namespace string
if namespaceJSON, ok := jsonMap["namespace"]; ok {
if namespaceStr, ok := namespaceJSON.(string); ok {
// and it's value is string (otherwise invalid schema)
if parentNamespace == "" {
namespace = namespaceStr
} else {
namespace = parentNamespace + "." + namespaceStr
}
parentNamespace = namespace
}
} else if objectType, ok := jsonMap["type"]; ok && objectType == "record" {
namespace = parentNamespace
}
for k, v := range jsonMap {
// Reduce primitive schemas to their simple form.
if len(jsonMap) == 1 && k == "type" {
if t, ok := v.(string); ok {
return "\"" + t + "\"", nil
}
}
// Only keep relevant attributes (strip 'doc', 'alias', 'namespace')
if _, ok := fieldOrder[k]; !ok {
continue
}
// Add namespace to a non-qualified name.
if k == "name" && namespace != "" {
// Check if the name isn't already qualified.
if t, ok := v.(string); ok && !strings.ContainsRune(t, '.') {
v = namespace + "." + t
typeLookup[t] = v.(string)
}
}
// Only fixed type allows size, and we must convert a string size to a
// float.
if k == "size" {
if s, ok := v.(string); ok {
s, err := strconv.ParseUint(s, 10, 0)
if err != nil {
// should never get here because already validated schema
return "", fmt.Errorf("Fixed size ought to be number greater than zero: %v", s)
}
v = float64(s)
}
}
pk, err := parsingCanonicalForm(k, parentNamespace, typeLookup)
if err != nil {
return "", err
}
pv, err := parsingCanonicalForm(v, parentNamespace, typeLookup)
if err != nil {
return "", err
}
pairs = append(pairs, stringPair{k, pk + ":" + pv})
}
// Sort keys by their order in specification.
sort.Sort(byAvroFieldOrder(pairs))
return "{" + strings.Join(pairs.Bs(), ",") + "}", nil
}
// stringPair represents a pair of string values.
type stringPair struct {
A string
B string
}
// stringPairs is a sortable slice of pairs of strings.
type stringPairs []stringPair
// Bs returns an array of second values of an array of pairs.
func (sp *stringPairs) Bs() []string {
items := make([]string, len(*sp))
for i, el := range *sp {
items[i] = el.B
}
return items
}
// fieldOrder defines fields that show up in canonical schema and specifies
// their precedence.
var fieldOrder = map[string]int{
"name": 1,
"type": 2,
"fields": 3,
"symbols": 4,
"items": 5,
"values": 6,
"size": 7,
}
// byAvroFieldOrder is equipped with a sort order of fields according to the
// specification.
type byAvroFieldOrder []stringPair
func (s byAvroFieldOrder) Len() int {
return len(s)
}
func (s byAvroFieldOrder) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s byAvroFieldOrder) Less(i, j int) bool {
return fieldOrder[s[i].A] < fieldOrder[s[j].A]
}