From 87bd2e784ced074338b7f7be3fd44ce46adaf6e6 Mon Sep 17 00:00:00 2001 From: Denil Chungath Date: Thu, 22 Jun 2023 01:35:49 +0530 Subject: [PATCH 1/8] Experimental encoding support --- .../Extensions/String.swift | 69 +++---------------- 1 file changed, 10 insertions(+), 59 deletions(-) diff --git a/Sources/_OpenAPIGeneratorCore/Extensions/String.swift b/Sources/_OpenAPIGeneratorCore/Extensions/String.swift index 7eeeb141..a77f33b0 100644 --- a/Sources/_OpenAPIGeneratorCore/Extensions/String.swift +++ b/Sources/_OpenAPIGeneratorCore/Extensions/String.swift @@ -67,8 +67,6 @@ fileprivate extension String { } // Only allow [a-zA-Z][a-zA-Z0-9_]* - // This is bad, is there something like percent encoding functionality but for general "allowed chars only"? - let firstCharSet: CharacterSet = .letters let numbers: CharacterSet = .decimalDigits let otherCharSet: CharacterSet = .alphanumerics.union(.init(charactersIn: "_")) @@ -83,7 +81,16 @@ fileprivate extension String { sanitizedScalars.append("_") outScalar = scalar } else { - outScalar = "_" + var hexString = String(scalar.value, radix: 16, uppercase: true) + if index == 0, + let firstChar = hexString.unicodeScalars.first, + !firstCharSet.contains(firstChar) { + hexString = "_\(hexString)" + } + for char in hexString.unicodeScalars { + sanitizedScalars.append(char) + } + continue } sanitizedScalars.append(outScalar) } @@ -153,62 +160,6 @@ fileprivate extension String { "true", "try", "throws", - "__FILE__", - "__LINE__", - "__COLUMN__", - "__FUNCTION__", - "__DSO_HANDLE__", - "_", - "(", - ")", - "{", - "}", - "[", - "]", - "<", - ">", - ".", - ".", - ",", - "...", - ":", - ";", - "=", - "@", - "#", - "&", - "->", - "`", - "\\", - "!", - "?", - "?", - "\"", - "\'", - "\"\"\"", - "#keyPath", - "#line", - "#selector", - "#file", - "#fileID", - "#filePath", - "#column", - "#function", - "#dsohandle", - "#assert", - "#sourceLocation", - "#warning", - "#error", - "#if", - "#else", - "#elseif", - "#endif", - "#available", - "#unavailable", - "#fileLiteral", - "#imageLiteral", - "#colorLiteral", - ")", "yield", "String", "Error", From a48a686178ddf11fad97e8006ac1d872899bd528 Mon Sep 17 00:00:00 2001 From: Denil Chungath <95201442+denil-ct@users.noreply.github.com> Date: Mon, 26 Jun 2023 11:27:58 +0530 Subject: [PATCH 2/8] Create SOAR-0001.md --- .../Documentation.docc/Proposals/SOAR-0001.md | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 Sources/swift-openapi-generator/Documentation.docc/Proposals/SOAR-0001.md diff --git a/Sources/swift-openapi-generator/Documentation.docc/Proposals/SOAR-0001.md b/Sources/swift-openapi-generator/Documentation.docc/Proposals/SOAR-0001.md new file mode 100644 index 00000000..26ec1d51 --- /dev/null +++ b/Sources/swift-openapi-generator/Documentation.docc/Proposals/SOAR-0001.md @@ -0,0 +1,52 @@ +# SOAR-0001 + +Encoding for Property Names + +## Overview + +- Proposal: SOAR-0001 +- Author(s): [Denil](https://github.com/denil-ct) +- Status: **Awaiting Review** +- Issue: https://github.com/apple/swift-openapi-generator/issues/21 +- Implementation: + - https://github.com/apple/swift-openapi-generator/pull/89 +- Affected components: + - generator + +### Introduction + +The goal of this proposal is to improve the way we handle unsupported characters in property names when generating code from specs. Currently, we use a block list approach, replacing offending characters with `_` which can cause name conflicts. By encoding the offending character we create unique and valid property names. This will avoid name collisions and ensure consistent code generation. + +### Motivation + +The current approach for handling unsupported characters in property names is not robust and can lead to unexpected and undesirable outcomes. For example, if there are two properties, `a_b` and `a b`, with the current implementation, this will result in the same generated property `a_b` for both, which would create a conflict. It can also result in loss of information or meaning from the original specification. Therefore, we need a better solution that can handle any unsupported character in a consistent and reliable way, without compromising the quality and functionality of the code. + +### Proposed solution + +The proposed solution to the problem is to use hex encoding for any unsupported character in property names. Hex encoding is a simple and standard way of representing any character as a sequence of hexadecimal digits. For example, the asterisk (*) character is encoded as 2A, the space ( ) character is encoded as 20, and the slash (/) character is encoded as 2F. Hex encoding also has the added benefit of not introducing any additional special characters. + +Some examples, + +yaml | swift +-- | -- +a b | a20b +a*b | a2Ab +ab_ | ab_ +ab* | ab2A +/ab | _2Fab +Hu&J_?kin | Hu26J_3Fkin +message | message + +This would mean, that for the users of the generator, a future version of the generator might produce different names that what it currently produces right now and should be ready to make those changes before upgrading to this version. + +### Detailed design + +The implementation for this is quite simple as you can see in https://github.com/apple/swift-openapi-generator/pull/89, we just made changes to the substitution logic where it used to substitute with `_`. We now add an additional encoding to the special character before substituting it. Contributors should be aware of this change and should review the places where they use this extension and evaluate if its suitable for them with this change. + +### API stability + +This is an API breaking change, as it will produce different symbol names than before. Other components such as the runtime and transports should not have any impacts. + +### Future directions + +The encoding strategy is open for further discussion. As a starting point, we have chosen the most simplest encoding format of hex. One of the reasons for this the hex encoding adds quite arbitrary symbols to the property name, which is not ideal. We could go towards a middle of the road approach where we have wordified versions of the special characters which we can map to. For example `a+b` can be `aplusb` or `a_plus_b` to add some kind of delimiter to specify the replaced portion. From 51a9890edf6022b4cbc94f9061685d963e130885 Mon Sep 17 00:00:00 2001 From: Denil Chungath Date: Wed, 28 Jun 2023 00:29:53 +0530 Subject: [PATCH 3/8] Add special characters map --- .../Extensions/String.swift | 73 +++++++++++++++---- .../Documentation.docc/Proposals/SOAR-0001.md | 20 +++-- 2 files changed, 69 insertions(+), 24 deletions(-) diff --git a/Sources/_OpenAPIGeneratorCore/Extensions/String.swift b/Sources/_OpenAPIGeneratorCore/Extensions/String.swift index cb52b5e8..4f63694d 100644 --- a/Sources/_OpenAPIGeneratorCore/Extensions/String.swift +++ b/Sources/_OpenAPIGeneratorCore/Extensions/String.swift @@ -56,18 +56,19 @@ fileprivate extension String { /// Returns a string sanitized to be usable as a Swift identifier. /// - /// For example, the string `$nake` would be returned as `_nake`, because - /// the dollar sign is not a valid character in a Swift identifier. + /// For example, the string `$nake…` would be returned as `_dollar_nake_x2026_`, because + /// both the dollar and ellipsis sign are not valid characters in a Swift identifier. + /// So, it replaces such characters with their html enity equivalents or unicode hex representation, + /// in case its not present in the `specialCharsMap`. It marks this replacement with `_` as a delimiter. /// - /// In addition to replacing illegal characters with an underscores, also + /// In addition to replacing illegal characters, it also /// ensures that the identifier starts with a letter and not a number. var sanitizedForSwiftCode: String { guard !isEmpty else { return "_empty" } - // Only allow [a-zA-Z][a-zA-Z0-9_]* - let firstCharSet: CharacterSet = .letters + let firstCharSet: CharacterSet = .letters.union(.init(charactersIn: "_")) let numbers: CharacterSet = .decimalDigits let otherCharSet: CharacterSet = .alphanumerics.union(.init(charactersIn: "_")) @@ -81,15 +82,19 @@ fileprivate extension String { sanitizedScalars.append("_") outScalar = scalar } else { - var hexString = String(scalar.value, radix: 16, uppercase: true) - if index == 0, - let firstChar = hexString.unicodeScalars.first, - !firstCharSet.contains(firstChar) { - hexString = "_\(hexString)" - } - for char in hexString.unicodeScalars { - sanitizedScalars.append(char) + sanitizedScalars.append("_") + if let entityName = Self.specialCharsMap[scalar] { + for char in entityName.unicodeScalars { + sanitizedScalars.append(char) + } + } else { + sanitizedScalars.append("x") + let hexString = String(scalar.value, radix: 16, uppercase: true) + for char in hexString.unicodeScalars { + sanitizedScalars.append(char) + } } + sanitizedScalars.append("_") continue } sanitizedScalars.append(outScalar) @@ -97,6 +102,12 @@ fileprivate extension String { let validString = String(UnicodeScalarView(sanitizedScalars)) + //Special case for a single underscore. + //We can't add it to the map as its a valid swift identifier in other cases. + if validString == "_" { + return "_underscore_" + } + guard Self.keywords.contains(validString) else { return validString } @@ -171,4 +182,40 @@ fileprivate extension String { "Protocol", "await", ] + + /// A map of ASCII printable characters to their HTML entity names. + private static let specialCharsMap: [Unicode.Scalar: String] = [ + " ": "space", + "!": "excl", + "\"": "quot", + "#": "num", + "$": "dollar", + "%": "percnt", + "&": "amp", + "'": "apos", + "(": "lpar", + ")": "rpar", + "*": "ast", + "+": "plus", + ",": "comma", + "-": "hyphen", + ".": "period", + "/": "sol", + ":": "colon", + ";": "semi", + "<": "lt", + "=": "equals", + ">": "gt", + "?": "quest", + "@": "commat", + "[": "lbrack", + "\\": "bsol", + "]": "rbrack", + "^": "hat", + "`": "grave", + "{": "lcub", + "|": "verbar", + "}": "rcub", + "~": "tilde", + ] } diff --git a/Sources/swift-openapi-generator/Documentation.docc/Proposals/SOAR-0001.md b/Sources/swift-openapi-generator/Documentation.docc/Proposals/SOAR-0001.md index 26ec1d51..a7386d76 100644 --- a/Sources/swift-openapi-generator/Documentation.docc/Proposals/SOAR-0001.md +++ b/Sources/swift-openapi-generator/Documentation.docc/Proposals/SOAR-0001.md @@ -23,30 +23,28 @@ The current approach for handling unsupported characters in property names is no ### Proposed solution -The proposed solution to the problem is to use hex encoding for any unsupported character in property names. Hex encoding is a simple and standard way of representing any character as a sequence of hexadecimal digits. For example, the asterisk (*) character is encoded as 2A, the space ( ) character is encoded as 20, and the slash (/) character is encoded as 2F. Hex encoding also has the added benefit of not introducing any additional special characters. +The proposed solution to the problem is to use a mix of replacement words and hex encoding for any unsupported character in property names. We replace characters in the printable ASCII range (20-7E) with a wordified representation inspired by the HTML entity names [here](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references). Hex encoding is a simple and standard way of representing any character as a sequence of hexadecimal digits. For example, the asterisk (*) character is encoded as 2A, the space ( ) character is encoded as 20, and the slash (/) character is encoded as 2F. Hex encoding also has the added benefit of not introducing any additional special characters. +In addition to this, we will be prefixing the hex codes with an `x` to indicate they are hex values. There are also delimiters added in the form of the underscore character to indicate a possible replacement. Some examples, yaml | swift -- | -- -a b | a20b -a*b | a2Ab +a b | a_space_b +a*b | a_ast_b ab_ | ab_ -ab* | ab2A -/ab | _2Fab -Hu&J_?kin | Hu26J_3Fkin +ab* | ab_ast_ +/ab | _sol_ab +Hu&J_?kin | Hu_amp_J__quest_kin +$nake… | \_dollar_nake_x2026\_ message | message This would mean, that for the users of the generator, a future version of the generator might produce different names that what it currently produces right now and should be ready to make those changes before upgrading to this version. ### Detailed design -The implementation for this is quite simple as you can see in https://github.com/apple/swift-openapi-generator/pull/89, we just made changes to the substitution logic where it used to substitute with `_`. We now add an additional encoding to the special character before substituting it. Contributors should be aware of this change and should review the places where they use this extension and evaluate if its suitable for them with this change. +The implementation for this is quite simple as you can see in https://github.com/apple/swift-openapi-generator/pull/89, we just made changes to the substitution logic where it used to substitute with `_`. We have added an additional encoding step to the special character before substituting it. Contributors should be aware of this change and should review the places where they use this extension and evaluate if its suitable for them with this change. ### API stability This is an API breaking change, as it will produce different symbol names than before. Other components such as the runtime and transports should not have any impacts. - -### Future directions - -The encoding strategy is open for further discussion. As a starting point, we have chosen the most simplest encoding format of hex. One of the reasons for this the hex encoding adds quite arbitrary symbols to the property name, which is not ideal. We could go towards a middle of the road approach where we have wordified versions of the special characters which we can map to. For example `a+b` can be `aplusb` or `a_plus_b` to add some kind of delimiter to specify the replaced portion. From f099857b7a7877b5420301a20b00a1bb957e6e88 Mon Sep 17 00:00:00 2001 From: Denil Chungath <95201442+denil-ct@users.noreply.github.com> Date: Wed, 28 Jun 2023 15:05:09 +0530 Subject: [PATCH 4/8] Fix typos --- Sources/_OpenAPIGeneratorCore/Extensions/String.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/_OpenAPIGeneratorCore/Extensions/String.swift b/Sources/_OpenAPIGeneratorCore/Extensions/String.swift index 4f63694d..a90f452f 100644 --- a/Sources/_OpenAPIGeneratorCore/Extensions/String.swift +++ b/Sources/_OpenAPIGeneratorCore/Extensions/String.swift @@ -58,8 +58,8 @@ fileprivate extension String { /// /// For example, the string `$nake…` would be returned as `_dollar_nake_x2026_`, because /// both the dollar and ellipsis sign are not valid characters in a Swift identifier. - /// So, it replaces such characters with their html enity equivalents or unicode hex representation, - /// in case its not present in the `specialCharsMap`. It marks this replacement with `_` as a delimiter. + /// So, it replaces such characters with their html entity equivalents or unicode hex representation, + /// in case it's not present in the `specialCharsMap`. It marks this replacement with `_` as a delimiter. /// /// In addition to replacing illegal characters, it also /// ensures that the identifier starts with a letter and not a number. From b03e4bbf98850114ac249236c17bc4fb03dbdc6b Mon Sep 17 00:00:00 2001 From: Denil Chungath Date: Wed, 28 Jun 2023 23:04:19 +0530 Subject: [PATCH 5/8] Split out proposal --- .../Documentation.docc/Proposals/SOAR-0001.md | 50 ------------------- 1 file changed, 50 deletions(-) delete mode 100644 Sources/swift-openapi-generator/Documentation.docc/Proposals/SOAR-0001.md diff --git a/Sources/swift-openapi-generator/Documentation.docc/Proposals/SOAR-0001.md b/Sources/swift-openapi-generator/Documentation.docc/Proposals/SOAR-0001.md deleted file mode 100644 index a7386d76..00000000 --- a/Sources/swift-openapi-generator/Documentation.docc/Proposals/SOAR-0001.md +++ /dev/null @@ -1,50 +0,0 @@ -# SOAR-0001 - -Encoding for Property Names - -## Overview - -- Proposal: SOAR-0001 -- Author(s): [Denil](https://github.com/denil-ct) -- Status: **Awaiting Review** -- Issue: https://github.com/apple/swift-openapi-generator/issues/21 -- Implementation: - - https://github.com/apple/swift-openapi-generator/pull/89 -- Affected components: - - generator - -### Introduction - -The goal of this proposal is to improve the way we handle unsupported characters in property names when generating code from specs. Currently, we use a block list approach, replacing offending characters with `_` which can cause name conflicts. By encoding the offending character we create unique and valid property names. This will avoid name collisions and ensure consistent code generation. - -### Motivation - -The current approach for handling unsupported characters in property names is not robust and can lead to unexpected and undesirable outcomes. For example, if there are two properties, `a_b` and `a b`, with the current implementation, this will result in the same generated property `a_b` for both, which would create a conflict. It can also result in loss of information or meaning from the original specification. Therefore, we need a better solution that can handle any unsupported character in a consistent and reliable way, without compromising the quality and functionality of the code. - -### Proposed solution - -The proposed solution to the problem is to use a mix of replacement words and hex encoding for any unsupported character in property names. We replace characters in the printable ASCII range (20-7E) with a wordified representation inspired by the HTML entity names [here](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references). Hex encoding is a simple and standard way of representing any character as a sequence of hexadecimal digits. For example, the asterisk (*) character is encoded as 2A, the space ( ) character is encoded as 20, and the slash (/) character is encoded as 2F. Hex encoding also has the added benefit of not introducing any additional special characters. -In addition to this, we will be prefixing the hex codes with an `x` to indicate they are hex values. There are also delimiters added in the form of the underscore character to indicate a possible replacement. - -Some examples, - -yaml | swift --- | -- -a b | a_space_b -a*b | a_ast_b -ab_ | ab_ -ab* | ab_ast_ -/ab | _sol_ab -Hu&J_?kin | Hu_amp_J__quest_kin -$nake… | \_dollar_nake_x2026\_ -message | message - -This would mean, that for the users of the generator, a future version of the generator might produce different names that what it currently produces right now and should be ready to make those changes before upgrading to this version. - -### Detailed design - -The implementation for this is quite simple as you can see in https://github.com/apple/swift-openapi-generator/pull/89, we just made changes to the substitution logic where it used to substitute with `_`. We have added an additional encoding step to the special character before substituting it. Contributors should be aware of this change and should review the places where they use this extension and evaluate if its suitable for them with this change. - -### API stability - -This is an API breaking change, as it will produce different symbol names than before. Other components such as the runtime and transports should not have any impacts. From 05031b15236ea0bef50a5153170b13c3e70e7b8d Mon Sep 17 00:00:00 2001 From: Denil Chungath Date: Tue, 1 Aug 2023 00:04:05 +0530 Subject: [PATCH 6/8] Move to new location --- .../Extensions/String.swift | 165 ------------------ .../CommonTranslations/SwiftSafeNames.swift | 149 +++++++++------- 2 files changed, 90 insertions(+), 224 deletions(-) diff --git a/Sources/_OpenAPIGeneratorCore/Extensions/String.swift b/Sources/_OpenAPIGeneratorCore/Extensions/String.swift index debae1b5..14c4235b 100644 --- a/Sources/_OpenAPIGeneratorCore/Extensions/String.swift +++ b/Sources/_OpenAPIGeneratorCore/Extensions/String.swift @@ -40,169 +40,4 @@ fileprivate extension String { with: transformation(self[firstLetterIndex]) ) } - - /// Returns a string sanitized to be usable as a Swift identifier. - /// - /// For example, the string `$nake…` would be returned as `_dollar_nake_x2026_`, because - /// both the dollar and ellipsis sign are not valid characters in a Swift identifier. - /// So, it replaces such characters with their html entity equivalents or unicode hex representation, - /// in case it's not present in the `specialCharsMap`. It marks this replacement with `_` as a delimiter. - /// - /// In addition to replacing illegal characters, it also - /// ensures that the identifier starts with a letter and not a number. - var sanitizedForSwiftCode: String { - guard !isEmpty else { - return "_empty" - } - - let firstCharSet: CharacterSet = .letters.union(.init(charactersIn: "_")) - let numbers: CharacterSet = .decimalDigits - let otherCharSet: CharacterSet = .alphanumerics.union(.init(charactersIn: "_")) - - var sanitizedScalars: [Unicode.Scalar] = [] - for (index, scalar) in unicodeScalars.enumerated() { - let allowedSet = index == 0 ? firstCharSet : otherCharSet - let outScalar: Unicode.Scalar - if allowedSet.contains(scalar) { - outScalar = scalar - } else if index == 0 && numbers.contains(scalar) { - sanitizedScalars.append("_") - outScalar = scalar - } else { - sanitizedScalars.append("_") - if let entityName = Self.specialCharsMap[scalar] { - for char in entityName.unicodeScalars { - sanitizedScalars.append(char) - } - } else { - sanitizedScalars.append("x") - let hexString = String(scalar.value, radix: 16, uppercase: true) - for char in hexString.unicodeScalars { - sanitizedScalars.append(char) - } - } - sanitizedScalars.append("_") - continue - } - sanitizedScalars.append(outScalar) - } - - let validString = String(UnicodeScalarView(sanitizedScalars)) - - //Special case for a single underscore. - //We can't add it to the map as its a valid swift identifier in other cases. - if validString == "_" { - return "_underscore_" - } - - guard Self.keywords.contains(validString) else { - return validString - } - return "_\(validString)" - } - - /// A list of Swift keywords. - /// - /// Copied from SwiftSyntax/TokenKind.swift - private static let keywords: Set = [ - "associatedtype", - "class", - "deinit", - "enum", - "extension", - "func", - "import", - "init", - "inout", - "let", - "operator", - "precedencegroup", - "protocol", - "struct", - "subscript", - "typealias", - "var", - "fileprivate", - "internal", - "private", - "public", - "static", - "defer", - "if", - "guard", - "do", - "repeat", - "else", - "for", - "in", - "while", - "return", - "break", - "continue", - "fallthrough", - "switch", - "case", - "default", - "where", - "catch", - "throw", - "as", - "Any", - "false", - "is", - "nil", - "rethrows", - "super", - "self", - "Self", - "true", - "try", - "throws", - "yield", - "String", - "Error", - "Int", - "Bool", - "Array", - "Type", - "type", - "Protocol", - "await", - ] - - /// A map of ASCII printable characters to their HTML entity names. - private static let specialCharsMap: [Unicode.Scalar: String] = [ - " ": "space", - "!": "excl", - "\"": "quot", - "#": "num", - "$": "dollar", - "%": "percnt", - "&": "amp", - "'": "apos", - "(": "lpar", - ")": "rpar", - "*": "ast", - "+": "plus", - ",": "comma", - "-": "hyphen", - ".": "period", - "/": "sol", - ":": "colon", - ";": "semi", - "<": "lt", - "=": "equals", - ">": "gt", - "?": "quest", - "@": "commat", - "[": "lbrack", - "\\": "bsol", - "]": "rbrack", - "^": "hat", - "`": "grave", - "{": "lcub", - "|": "verbar", - "}": "rcub", - "~": "tilde", - ] } diff --git a/Sources/_OpenAPIGeneratorCore/Translator/CommonTranslations/SwiftSafeNames.swift b/Sources/_OpenAPIGeneratorCore/Translator/CommonTranslations/SwiftSafeNames.swift index 3b89f7f6..ab85445a 100644 --- a/Sources/_OpenAPIGeneratorCore/Translator/CommonTranslations/SwiftSafeNames.swift +++ b/Sources/_OpenAPIGeneratorCore/Translator/CommonTranslations/SwiftSafeNames.swift @@ -74,11 +74,62 @@ fileprivate extension String { /// /// See the proposal SOAR-0001 for details. /// - /// In addition to replacing illegal characters with an underscores, also + /// For example, the string `$nake…` would be returned as `_dollar_nake_x2026_`, because + /// both the dollar and ellipsis sign are not valid characters in a Swift identifier. + /// So, it replaces such characters with their html entity equivalents or unicode hex representation, + /// in case it's not present in the `specialCharsMap`. It marks this replacement with `_` as a delimiter. + /// + /// In addition to replacing illegal characters, it also /// ensures that the identifier starts with a letter and not a number. var proposedSafeForSwiftCode: String { - // TODO: New logic proposed in SOAR-0001 goes here. - return "" + guard !isEmpty else { + return "_empty" + } + + let firstCharSet: CharacterSet = .letters.union(.init(charactersIn: "_")) + let numbers: CharacterSet = .decimalDigits + let otherCharSet: CharacterSet = .alphanumerics.union(.init(charactersIn: "_")) + + var sanitizedScalars: [Unicode.Scalar] = [] + for (index, scalar) in unicodeScalars.enumerated() { + let allowedSet = index == 0 ? firstCharSet : otherCharSet + let outScalar: Unicode.Scalar + if allowedSet.contains(scalar) { + outScalar = scalar + } else if index == 0 && numbers.contains(scalar) { + sanitizedScalars.append("_") + outScalar = scalar + } else { + sanitizedScalars.append("_") + if let entityName = Self.specialCharsMap[scalar] { + for char in entityName.unicodeScalars { + sanitizedScalars.append(char) + } + } else { + sanitizedScalars.append("x") + let hexString = String(scalar.value, radix: 16, uppercase: true) + for char in hexString.unicodeScalars { + sanitizedScalars.append(char) + } + } + sanitizedScalars.append("_") + continue + } + sanitizedScalars.append(outScalar) + } + + let validString = String(UnicodeScalarView(sanitizedScalars)) + + //Special case for a single underscore. + //We can't add it to the map as its a valid swift identifier in other cases. + if validString == "_" { + return "_underscore_" + } + + guard Self.keywords.contains(validString) else { + return validString + } + return "_\(validString)" } /// A list of Swift keywords. @@ -138,62 +189,6 @@ fileprivate extension String { "true", "try", "throws", - "__FILE__", - "__LINE__", - "__COLUMN__", - "__FUNCTION__", - "__DSO_HANDLE__", - "_", - "(", - ")", - "{", - "}", - "[", - "]", - "<", - ">", - ".", - ".", - ",", - "...", - ":", - ";", - "=", - "@", - "#", - "&", - "->", - "`", - "\\", - "!", - "?", - "?", - "\"", - "\'", - "\"\"\"", - "#keyPath", - "#line", - "#selector", - "#file", - "#fileID", - "#filePath", - "#column", - "#function", - "#dsohandle", - "#assert", - "#sourceLocation", - "#warning", - "#error", - "#if", - "#else", - "#elseif", - "#endif", - "#available", - "#unavailable", - "#fileLiteral", - "#imageLiteral", - "#colorLiteral", - ")", "yield", "String", "Error", @@ -205,4 +200,40 @@ fileprivate extension String { "Protocol", "await", ] + + /// A map of ASCII printable characters to their HTML entity names. Used to reduce collisions in generated names. + private static let specialCharsMap: [Unicode.Scalar: String] = [ + " ": "space", + "!": "excl", + "\"": "quot", + "#": "num", + "$": "dollar", + "%": "percnt", + "&": "amp", + "'": "apos", + "(": "lpar", + ")": "rpar", + "*": "ast", + "+": "plus", + ",": "comma", + "-": "hyphen", + ".": "period", + "/": "sol", + ":": "colon", + ";": "semi", + "<": "lt", + "=": "equals", + ">": "gt", + "?": "quest", + "@": "commat", + "[": "lbrack", + "\\": "bsol", + "]": "rbrack", + "^": "hat", + "`": "grave", + "{": "lcub", + "|": "verbar", + "}": "rcub", + "~": "tilde", + ] } From d8d9f8b5bf8322cc437a32469c04f4afd65ef928 Mon Sep 17 00:00:00 2001 From: Denil Chungath Date: Wed, 2 Aug 2023 00:08:59 +0530 Subject: [PATCH 7/8] fix soundness --- .../CommonTranslations/SwiftSafeNames.swift | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Sources/_OpenAPIGeneratorCore/Translator/CommonTranslations/SwiftSafeNames.swift b/Sources/_OpenAPIGeneratorCore/Translator/CommonTranslations/SwiftSafeNames.swift index ab85445a..22769cb9 100644 --- a/Sources/_OpenAPIGeneratorCore/Translator/CommonTranslations/SwiftSafeNames.swift +++ b/Sources/_OpenAPIGeneratorCore/Translator/CommonTranslations/SwiftSafeNames.swift @@ -85,11 +85,11 @@ fileprivate extension String { guard !isEmpty else { return "_empty" } - + let firstCharSet: CharacterSet = .letters.union(.init(charactersIn: "_")) let numbers: CharacterSet = .decimalDigits let otherCharSet: CharacterSet = .alphanumerics.union(.init(charactersIn: "_")) - + var sanitizedScalars: [Unicode.Scalar] = [] for (index, scalar) in unicodeScalars.enumerated() { let allowedSet = index == 0 ? firstCharSet : otherCharSet @@ -117,15 +117,15 @@ fileprivate extension String { } sanitizedScalars.append(outScalar) } - + let validString = String(UnicodeScalarView(sanitizedScalars)) - + //Special case for a single underscore. //We can't add it to the map as its a valid swift identifier in other cases. if validString == "_" { return "_underscore_" } - + guard Self.keywords.contains(validString) else { return validString } @@ -200,7 +200,7 @@ fileprivate extension String { "Protocol", "await", ] - + /// A map of ASCII printable characters to their HTML entity names. Used to reduce collisions in generated names. private static let specialCharsMap: [Unicode.Scalar: String] = [ " ": "space", From 33973c34b12bfdf02c81bbf0362584f5b142024d Mon Sep 17 00:00:00 2001 From: Denil Chungath Date: Wed, 2 Aug 2023 01:34:55 +0530 Subject: [PATCH 8/8] Add tests --- .../Extensions/Test_String.swift | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/Tests/OpenAPIGeneratorCoreTests/Extensions/Test_String.swift b/Tests/OpenAPIGeneratorCoreTests/Extensions/Test_String.swift index c680ea19..376a8681 100644 --- a/Tests/OpenAPIGeneratorCoreTests/Extensions/Test_String.swift +++ b/Tests/OpenAPIGeneratorCoreTests/Extensions/Test_String.swift @@ -39,4 +39,55 @@ final class Test_String: Test_Core { XCTAssertEqual(asSwiftSafeName(input), sanitized) } } + + func testAsProposedSwiftName() { + let cases: [(String, String)] = [ + // Simple + ("foo", "foo"), + + // Starts with a number + ("3foo", "_3foo"), + + // Keyword + ("default", "_default"), + + // Reserved name + ("Type", "_Type"), + + // Empty string + ("", "_empty"), + + // Special Char in middle + ("inv@lidName", "inv_commat_lidName"), + + // Special Char in first position + ("!nvalidName", "_excl_nvalidName"), + + // Special Char in last position + ("invalidNam?", "invalidNam_quest_"), + + // Valid underscore case + ("__user", "__user"), + + // Invalid underscore case + ("_", "_underscore_"), + + // Special character mixed with character not in map + ("$nake…", "_dollar_nake_x2026_"), + + // Only special character + ("$", "_dollar_"), + + // Only special character not in map + ("……", "_x2026__x2026_"), + + // Non Latin Characters + ("$مرحبا", "_dollar_مرحبا"), + ] + let translator = makeTranslator(featureFlags: [.proposal0001]) + let asSwiftSafeName: (String) -> String = translator.swiftSafeName + for (input, sanitized) in cases { + XCTAssertEqual(asSwiftSafeName(input), sanitized) + } + } }