Skip to content

Commit

Permalink
[SOAR-0001] Implement Encoding for Property Names (#89)
Browse files Browse the repository at this point in the history
## Motivation

Encoding would help distinguish types properly. For eg. both `a b` and
`a_b` would be rendered as `a_b`. This change would make it render
`a20b` and `a_b` respectively.

## Modifications

Added a hexadecimal encoding for un-supported characters rather than
having just an underscore for everything.
Removed redundant keywords.

## Result
Proper differentiation between names as explained in the motivation,
along with that, it also reduces the reserved keywords set by removing
symbols and any keywords that prepend such symbols.

## Test Plan
Tested and verified for several combinations of special characters in
various positions in-between alphanumeric characters.

---------

Co-authored-by: Honza Dvorsky <[email protected]>
  • Loading branch information
denil-ct and czechboy0 authored Aug 2, 2023
1 parent 81f8743 commit bf49be1
Show file tree
Hide file tree
Showing 2 changed files with 141 additions and 59 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,62 @@ fileprivate extension String {
///
/// See the proposal SOAR-0001 for details.
///
/// In addition to replacing illegal characters with an underscores, also
/// For example, the string `$nake…` would be returned as `_dollar_nake_x2026_`, because
/// both the dollar and ellipsis sign are not valid characters in a Swift identifier.
/// So, it replaces such characters with their html entity equivalents or unicode hex representation,
/// in case it's not present in the `specialCharsMap`. It marks this replacement with `_` as a delimiter.
///
/// In addition to replacing illegal characters, it also
/// ensures that the identifier starts with a letter and not a number.
var proposedSafeForSwiftCode: String {
// TODO: New logic proposed in SOAR-0001 goes here.
return ""
guard !isEmpty else {
return "_empty"
}

let firstCharSet: CharacterSet = .letters.union(.init(charactersIn: "_"))
let numbers: CharacterSet = .decimalDigits
let otherCharSet: CharacterSet = .alphanumerics.union(.init(charactersIn: "_"))

var sanitizedScalars: [Unicode.Scalar] = []
for (index, scalar) in unicodeScalars.enumerated() {
let allowedSet = index == 0 ? firstCharSet : otherCharSet
let outScalar: Unicode.Scalar
if allowedSet.contains(scalar) {
outScalar = scalar
} else if index == 0 && numbers.contains(scalar) {
sanitizedScalars.append("_")
outScalar = scalar
} else {
sanitizedScalars.append("_")
if let entityName = Self.specialCharsMap[scalar] {
for char in entityName.unicodeScalars {
sanitizedScalars.append(char)
}
} else {
sanitizedScalars.append("x")
let hexString = String(scalar.value, radix: 16, uppercase: true)
for char in hexString.unicodeScalars {
sanitizedScalars.append(char)
}
}
sanitizedScalars.append("_")
continue
}
sanitizedScalars.append(outScalar)
}

let validString = String(UnicodeScalarView(sanitizedScalars))

//Special case for a single underscore.
//We can't add it to the map as its a valid swift identifier in other cases.
if validString == "_" {
return "_underscore_"
}

guard Self.keywords.contains(validString) else {
return validString
}
return "_\(validString)"
}

/// A list of Swift keywords.
Expand Down Expand Up @@ -138,62 +189,6 @@ fileprivate extension String {
"true",
"try",
"throws",
"__FILE__",
"__LINE__",
"__COLUMN__",
"__FUNCTION__",
"__DSO_HANDLE__",
"_",
"(",
")",
"{",
"}",
"[",
"]",
"<",
">",
".",
".",
",",
"...",
":",
";",
"=",
"@",
"#",
"&",
"->",
"`",
"\\",
"!",
"?",
"?",
"\"",
"\'",
"\"\"\"",
"#keyPath",
"#line",
"#selector",
"#file",
"#fileID",
"#filePath",
"#column",
"#function",
"#dsohandle",
"#assert",
"#sourceLocation",
"#warning",
"#error",
"#if",
"#else",
"#elseif",
"#endif",
"#available",
"#unavailable",
"#fileLiteral",
"#imageLiteral",
"#colorLiteral",
")",
"yield",
"String",
"Error",
Expand All @@ -205,4 +200,40 @@ fileprivate extension String {
"Protocol",
"await",
]

/// A map of ASCII printable characters to their HTML entity names. Used to reduce collisions in generated names.
private static let specialCharsMap: [Unicode.Scalar: String] = [
" ": "space",
"!": "excl",
"\"": "quot",
"#": "num",
"$": "dollar",
"%": "percnt",
"&": "amp",
"'": "apos",
"(": "lpar",
")": "rpar",
"*": "ast",
"+": "plus",
",": "comma",
"-": "hyphen",
".": "period",
"/": "sol",
":": "colon",
";": "semi",
"<": "lt",
"=": "equals",
">": "gt",
"?": "quest",
"@": "commat",
"[": "lbrack",
"\\": "bsol",
"]": "rbrack",
"^": "hat",
"`": "grave",
"{": "lcub",
"|": "verbar",
"}": "rcub",
"~": "tilde",
]
}
51 changes: 51 additions & 0 deletions Tests/OpenAPIGeneratorCoreTests/Extensions/Test_String.swift
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,55 @@ final class Test_String: Test_Core {
XCTAssertEqual(asSwiftSafeName(input), sanitized)
}
}

func testAsProposedSwiftName() {
let cases: [(String, String)] = [
// Simple
("foo", "foo"),

// Starts with a number
("3foo", "_3foo"),

// Keyword
("default", "_default"),

// Reserved name
("Type", "_Type"),

// Empty string
("", "_empty"),

// Special Char in middle
("inv@lidName", "inv_commat_lidName"),

// Special Char in first position
("!nvalidName", "_excl_nvalidName"),

// Special Char in last position
("invalidNam?", "invalidNam_quest_"),

// Valid underscore case
("__user", "__user"),

// Invalid underscore case
("_", "_underscore_"),

// Special character mixed with character not in map
("$nake…", "_dollar_nake_x2026_"),

// Only special character
("$", "_dollar_"),

// Only special character not in map
("……", "_x2026__x2026_"),

// Non Latin Characters
("$مرحبا", "_dollar_مرحبا"),
]
let translator = makeTranslator(featureFlags: [.proposal0001])
let asSwiftSafeName: (String) -> String = translator.swiftSafeName
for (input, sanitized) in cases {
XCTAssertEqual(asSwiftSafeName(input), sanitized)
}
}
}

0 comments on commit bf49be1

Please sign in to comment.