Skip to content

Commit

Permalink
Handle keyword Self after stripping enum type prefix (#998)
Browse files Browse the repository at this point in the history
* Address #997: handle keyword `Self` after stripping enum type prefix

* create sanitize_identifier

* add to_snake back (i did it in two commits to not mess up the git blame)

* small fixes

* add test_sanitize_identifier

* remove unnecessary comment

* combine test_strip_enum_prefix and test_strip_enum_prefix_resulting_in_keyword

* update enum_keyword_variant.proto

* maybe improve docs of strip_enum_prefix?
  • Loading branch information
MixusMinimax authored Apr 9, 2024
1 parent 2e64d6f commit 963e942
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 52 deletions.
36 changes: 1 addition & 35 deletions prost-build/src/code_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use prost_types::{

use crate::ast::{Comments, Method, Service};
use crate::extern_paths::ExternPaths;
use crate::ident::{to_snake, to_upper_camel};
use crate::ident::{strip_enum_prefix, to_snake, to_upper_camel};
use crate::message_graph::MessageGraph;
use crate::{BytesType, Config, MapType};

Expand Down Expand Up @@ -1187,31 +1187,6 @@ fn unescape_c_escape_string(s: &str) -> Vec<u8> {
dst
}

/// Strip an enum's type name from the prefix of an enum value.
///
/// This function assumes that both have been formatted to Rust's
/// upper camel case naming conventions.
///
/// It also tries to handle cases where the stripped name would be
/// invalid - for example, if it were to begin with a number.
fn strip_enum_prefix(prefix: &str, name: &str) -> String {
let stripped = name.strip_prefix(prefix).unwrap_or(name);

// If the next character after the stripped prefix is not
// uppercase, then it means that we didn't have a true prefix -
// for example, "Foo" should not be stripped from "Foobar".
if stripped
.chars()
.next()
.map(char::is_uppercase)
.unwrap_or(false)
{
stripped.to_owned()
} else {
name.to_owned()
}
}

struct EnumVariantMapping<'a> {
path_idx: usize,
proto_name: &'a str,
Expand Down Expand Up @@ -1323,13 +1298,4 @@ mod tests {
fn test_unescape_c_escape_string_incomplete_hex_value() {
unescape_c_escape_string(r#"\x1"#);
}

#[test]
fn test_strip_enum_prefix() {
assert_eq!(strip_enum_prefix("Foo", "FooBar"), "Bar");
assert_eq!(strip_enum_prefix("Foo", "Foobar"), "Foobar");
assert_eq!(strip_enum_prefix("Foo", "Foo"), "Foo");
assert_eq!(strip_enum_prefix("Foo", "Bar"), "Bar");
assert_eq!(strip_enum_prefix("Foo", "Foo1"), "Foo1");
}
}
132 changes: 115 additions & 17 deletions prost-build/src/ident.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,11 @@

use heck::{ToSnakeCase, ToUpperCamelCase};

/// Converts a `camelCase` or `SCREAMING_SNAKE_CASE` identifier to a `lower_snake` case Rust field
/// identifier.
pub fn to_snake(s: &str) -> String {
let mut ident = s.to_snake_case();

pub fn sanitize_identifier(s: impl AsRef<str>) -> String {
let ident = s.as_ref();
// Use a raw identifier if the identifier matches a Rust keyword:
// https://doc.rust-lang.org/reference/keywords.html.
match ident.as_str() {
match ident {
// 2015 strict keywords.
| "as" | "break" | "const" | "continue" | "else" | "enum" | "false"
| "fn" | "for" | "if" | "impl" | "in" | "let" | "loop" | "match" | "mod" | "move" | "mut"
Expand All @@ -21,23 +18,52 @@ pub fn to_snake(s: &str) -> String {
| "abstract" | "become" | "box" | "do" | "final" | "macro" | "override" | "priv" | "typeof"
| "unsized" | "virtual" | "yield"
// 2018 reserved keywords.
| "async" | "await" | "try" => ident.insert_str(0, "r#"),
| "async" | "await" | "try" => format!("r#{}", ident),
// the following keywords are not supported as raw identifiers and are therefore suffixed with an underscore.
"self" | "super" | "extern" | "crate" => ident += "_",
_ => (),
"_" | "super" | "self" | "Self" | "extern" | "crate" => format!("{}_", ident),
// the following keywords begin with a number and are therefore prefixed with an underscore.
s if s.starts_with(|c: char| c.is_numeric()) => format!("_{}", ident),
_ => ident.to_string(),
}
ident
}

/// Converts a `camelCase` or `SCREAMING_SNAKE_CASE` identifier to a `lower_snake` case Rust field
/// identifier.
pub fn to_snake(s: impl AsRef<str>) -> String {
sanitize_identifier(s.as_ref().to_snake_case())
}

/// Converts a `snake_case` identifier to an `UpperCamel` case Rust type identifier.
pub fn to_upper_camel(s: &str) -> String {
let mut ident = s.to_upper_camel_case();
pub fn to_upper_camel(s: impl AsRef<str>) -> String {
sanitize_identifier(s.as_ref().to_upper_camel_case())
}

// Suffix an underscore for the `Self` Rust keyword as it is not allowed as raw identifier.
if ident == "Self" {
ident += "_";
}
ident
/// Strip an enum's type name from the prefix of an enum value.
///
/// This function assumes that both have been formatted to Rust's
/// upper camel case naming conventions.
///
/// It also tries to handle cases where the stripped name would be
/// invalid - for example, if it were to begin with a number.
///
/// If the stripped name is `"Self"`, it will be replaced with `"Self_"`
pub fn strip_enum_prefix(prefix: &str, name: &str) -> String {
let stripped = name.strip_prefix(prefix).unwrap_or(name);

// If the next character after the stripped prefix is not
// uppercase, then it means that we didn't have a true prefix -
// for example, "Foo" should not be stripped from "Foobar".
let stripped = if stripped
.chars()
.next()
.map(char::is_uppercase)
.unwrap_or(false)
{
stripped
} else {
name
};
sanitize_identifier(stripped)
}

#[cfg(test)]
Expand All @@ -47,6 +73,67 @@ mod tests {

use super::*;

#[test]
fn test_sanitize_identifier() {
assert_eq!(sanitize_identifier("as"), "r#as");
assert_eq!(sanitize_identifier("break"), "r#break");
assert_eq!(sanitize_identifier("const"), "r#const");
assert_eq!(sanitize_identifier("continue"), "r#continue");
assert_eq!(sanitize_identifier("else"), "r#else");
assert_eq!(sanitize_identifier("enum"), "r#enum");
assert_eq!(sanitize_identifier("false"), "r#false");
assert_eq!(sanitize_identifier("fn"), "r#fn");
assert_eq!(sanitize_identifier("for"), "r#for");
assert_eq!(sanitize_identifier("if"), "r#if");
assert_eq!(sanitize_identifier("impl"), "r#impl");
assert_eq!(sanitize_identifier("in"), "r#in");
assert_eq!(sanitize_identifier("let"), "r#let");
assert_eq!(sanitize_identifier("loop"), "r#loop");
assert_eq!(sanitize_identifier("match"), "r#match");
assert_eq!(sanitize_identifier("mod"), "r#mod");
assert_eq!(sanitize_identifier("move"), "r#move");
assert_eq!(sanitize_identifier("mut"), "r#mut");
assert_eq!(sanitize_identifier("pub"), "r#pub");
assert_eq!(sanitize_identifier("ref"), "r#ref");
assert_eq!(sanitize_identifier("return"), "r#return");
assert_eq!(sanitize_identifier("static"), "r#static");
assert_eq!(sanitize_identifier("struct"), "r#struct");
assert_eq!(sanitize_identifier("trait"), "r#trait");
assert_eq!(sanitize_identifier("true"), "r#true");
assert_eq!(sanitize_identifier("type"), "r#type");
assert_eq!(sanitize_identifier("unsafe"), "r#unsafe");
assert_eq!(sanitize_identifier("use"), "r#use");
assert_eq!(sanitize_identifier("where"), "r#where");
assert_eq!(sanitize_identifier("while"), "r#while");
assert_eq!(sanitize_identifier("dyn"), "r#dyn");
assert_eq!(sanitize_identifier("abstract"), "r#abstract");
assert_eq!(sanitize_identifier("become"), "r#become");
assert_eq!(sanitize_identifier("box"), "r#box");
assert_eq!(sanitize_identifier("do"), "r#do");
assert_eq!(sanitize_identifier("final"), "r#final");
assert_eq!(sanitize_identifier("macro"), "r#macro");
assert_eq!(sanitize_identifier("override"), "r#override");
assert_eq!(sanitize_identifier("priv"), "r#priv");
assert_eq!(sanitize_identifier("typeof"), "r#typeof");
assert_eq!(sanitize_identifier("unsized"), "r#unsized");
assert_eq!(sanitize_identifier("virtual"), "r#virtual");
assert_eq!(sanitize_identifier("yield"), "r#yield");
assert_eq!(sanitize_identifier("async"), "r#async");
assert_eq!(sanitize_identifier("await"), "r#await");
assert_eq!(sanitize_identifier("try"), "r#try");
assert_eq!(sanitize_identifier("self"), "self_");
assert_eq!(sanitize_identifier("super"), "super_");
assert_eq!(sanitize_identifier("extern"), "extern_");
assert_eq!(sanitize_identifier("crate"), "crate_");
assert_eq!(sanitize_identifier("foo"), "foo");
assert_eq!(sanitize_identifier("bar"), "bar");
assert_eq!(sanitize_identifier("baz"), "baz");
assert_eq!(sanitize_identifier("0foo"), "_0foo");
assert_eq!(sanitize_identifier("foo0"), "foo0");
assert_eq!(sanitize_identifier("foo_"), "foo_");
assert_eq!(sanitize_identifier("_foo"), "_foo");
}

#[test]
fn test_to_snake() {
assert_eq!("foo_bar", &to_snake("FooBar"));
Expand Down Expand Up @@ -158,4 +245,15 @@ mod tests {
assert_eq!("FuzzBuster", &to_upper_camel("FuzzBuster"));
assert_eq!("Self_", &to_upper_camel("self"));
}

#[test]
fn test_strip_enum_prefix() {
assert_eq!(strip_enum_prefix("Foo", "FooBar"), "Bar");
assert_eq!(strip_enum_prefix("Foo", "Foobar"), "Foobar");
assert_eq!(strip_enum_prefix("Foo", "Foo"), "Foo");
assert_eq!(strip_enum_prefix("Foo", "Bar"), "Bar");
assert_eq!(strip_enum_prefix("Foo", "Foo1"), "Foo1");
assert_eq!(strip_enum_prefix("Foo", "FooBar"), "Bar");
assert_eq!(strip_enum_prefix("Foo", "FooSelf"), "Self_");
}
}
4 changes: 4 additions & 0 deletions tests/src/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ fn main() {
.compile_protos(&[src.join("default_enum_value.proto")], includes)
.unwrap();

config
.compile_protos(&[src.join("enum_keyword_variant.proto")], includes)
.unwrap();

config
.compile_protos(&[src.join("groups.proto")], includes)
.unwrap();
Expand Down
23 changes: 23 additions & 0 deletions tests/src/enum_keyword_variant.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
syntax = "proto3";

package enum_keyword_variant;

enum Feeding {
FEEDING_UNSPECIFIED = 0;
FEEDING_ASSISTED = 1;
// Careful: code generation resulted in "Self". Now it is "Self_".
FEEDING_SELF = 2;
FEEDING_ELSE = 3;
}

enum Grooming {
UNSPECIFIED = 0;
ASSISTED = 1;
SELF = 2;
ELSE = 3;
}

enum Number {
NUMBER_UNSPECIFIED = 0;
NUMBER_1 = 1;
}
6 changes: 6 additions & 0 deletions tests/src/enum_keyword_variant.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
mod enum_keyword_variant {
include!(concat!(env!("OUT_DIR"), "/enum_keyword_variant.rs"));
}

#[test]
fn dummy() {}
2 changes: 2 additions & 0 deletions tests/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ mod debug;
#[cfg(test)]
mod deprecated_field;
#[cfg(test)]
mod enum_keyword_variant;
#[cfg(test)]
mod generic_derive;
#[cfg(test)]
mod message_encoding;
Expand Down

0 comments on commit 963e942

Please sign in to comment.