Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: use a consistent prefix for auto-generated names #2783

Merged
merged 29 commits into from
Sep 21, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
448d135
- only process auto generated type names
laststylebender14 Sep 2, 2024
b2543c9
- add unit tests
laststylebender14 Sep 2, 2024
72ec479
- lint changes
laststylebender14 Sep 2, 2024
3e2abc1
- handle grpc case in is_auto_generated
laststylebender14 Sep 2, 2024
e5df438
- lint changes
laststylebender14 Sep 2, 2024
717c017
- clean up of prompt
laststylebender14 Sep 2, 2024
c841a51
- since we are already storing used type names, we can store the mapp…
laststylebender14 Sep 2, 2024
35aafcd
- use GEN__ as prefix for auto generated names.
laststylebender14 Sep 4, 2024
011fbe7
- update snaps: prefix auto generated names with GEN__
laststylebender14 Sep 4, 2024
c76ea2a
- add GEN__ as prefix
laststylebender14 Sep 4, 2024
0a34ef3
Merge branch 'main' into feat/process-only-generated-type-names
laststylebender14 Sep 4, 2024
6dc30ed
- lint fixes
laststylebender14 Sep 4, 2024
c4368dc
- make func inline
laststylebender14 Sep 4, 2024
c6084d1
- add GEN__ prefix to auto generated types and fields names in root t…
laststylebender14 Sep 6, 2024
ca80147
- add comment explaining the change.
laststylebender14 Sep 9, 2024
bacd64c
- if output type is scalar then skip the `GEN__` prefix.
laststylebender14 Sep 9, 2024
cff282d
- skip field names having `GEN__` as prefix
laststylebender14 Sep 9, 2024
8e999ca
- update snaps
laststylebender14 Sep 9, 2024
e439d5c
- lint fixes.
laststylebender14 Sep 9, 2024
0530142
Merge branch 'main' into feat/process-only-generated-type-names
laststylebender14 Sep 16, 2024
1f8a06a
- moved GEN__ to assert_type_names
laststylebender14 Sep 17, 2024
cb19a9f
- made PREFIX as static const
laststylebender14 Sep 17, 2024
77df7ba
- fix prompt
laststylebender14 Sep 17, 2024
5e7a1ac
- remove logs
laststylebender14 Sep 17, 2024
de354ff
- update snap
laststylebender14 Sep 17, 2024
622586b
update prompts
tusharmath Sep 21, 2024
4b16d10
Merge branch 'main' into feat/process-only-generated-type-names
tusharmath Sep 21, 2024
2682b05
add a sample generate.yml
tusharmath Sep 21, 2024
7987ec5
refactor: add prefix to arg names
tusharmath Sep 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 53 additions & 11 deletions src/cli/llm/infer_type_name.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
use std::collections::HashMap;

use genai::chat::{ChatMessage, ChatRequest, ChatResponse};
use indexmap::IndexSet;
use serde::{Deserialize, Serialize};
use serde_json::json;

use super::{Error, Result, Wizard};
use crate::core::config::Config;
use crate::core::Mustache;

const BASE_TEMPLATE: &str = include_str!("prompts/infer_type_name.md");

pub struct InferTypeName {
wizard: Wizard<Question, Answer>,
}
Expand All @@ -29,6 +32,8 @@

#[derive(Clone, Serialize)]
struct Question {
#[serde(skip_serializing_if = "IndexSet::is_empty")]
used_types: IndexSet<String>,
laststylebender14 marked this conversation as resolved.
Show resolved Hide resolved
fields: Vec<(String, String)>,
}

Expand All @@ -37,6 +42,7 @@

fn try_into(self) -> Result<ChatRequest> {
let input = serde_json::to_string_pretty(&Question {
used_types: IndexSet::default(),
fields: vec![
("id".to_string(), "String".to_string()),
("name".to_string(), "String".to_string()),
Expand All @@ -54,10 +60,10 @@
],
})?;

let template_str = include_str!("prompts/infer_type_name.md");
let template = Mustache::parse(template_str);
let template = Mustache::parse(BASE_TEMPLATE);

let context = json!({
"used_types": self.used_types,
"input": input,
"output": output,
});
Expand All @@ -66,7 +72,9 @@

Ok(ChatRequest::new(vec![
ChatMessage::system(rendered_prompt),
ChatMessage::user(serde_json::to_string(&self)?),
ChatMessage::user(serde_json::to_string(&json!({
"fields": &self.fields,
}))?),
]))
}
}
Expand All @@ -76,20 +84,35 @@
Self { wizard: Wizard::new(model, secret) }
}

/// All generated type names starts with "GEN__"
#[inline]
fn is_auto_generated(type_name: &str) -> bool {
laststylebender14 marked this conversation as resolved.
Show resolved Hide resolved
type_name.starts_with("GEN__")
laststylebender14 marked this conversation as resolved.
Show resolved Hide resolved
}

pub async fn generate(&mut self, config: &Config) -> Result<HashMap<String, String>> {
let mut new_name_mappings: HashMap<String, String> = HashMap::new();

// removed root type from types.
// Filter out root operation types and types with non-auto-generated names

Check warning on line 95 in src/cli/llm/infer_type_name.rs

View check run for this annotation

Codecov / codecov/patch

src/cli/llm/infer_type_name.rs#L95

Added line #L95 was not covered by tests
let types_to_be_processed = config
.types
.iter()
.filter(|(type_name, _)| !config.is_root_operation_type(type_name))
.filter(|(type_name, _)| {
!config.is_root_operation_type(type_name) && Self::is_auto_generated(type_name)
})

Check warning on line 101 in src/cli/llm/infer_type_name.rs

View check run for this annotation

Codecov / codecov/patch

src/cli/llm/infer_type_name.rs#L99-L101

Added lines #L99 - L101 were not covered by tests
.collect::<Vec<_>>();

let mut used_type_names = config
.types
.iter()
.filter(|(ty_name, _)| !Self::is_auto_generated(ty_name))
.map(|(ty_name, _)| ty_name.to_owned())
.collect::<IndexSet<_>>();

Check warning on line 110 in src/cli/llm/infer_type_name.rs

View check run for this annotation

Codecov / codecov/patch

src/cli/llm/infer_type_name.rs#L104-L110

Added lines #L104 - L110 were not covered by tests
let total = types_to_be_processed.len();
for (i, (type_name, type_)) in types_to_be_processed.into_iter().enumerate() {
// convert type to sdl format.
let question = Question {
used_types: used_type_names.clone(),

Check warning on line 115 in src/cli/llm/infer_type_name.rs

View check run for this annotation

Codecov / codecov/patch

src/cli/llm/infer_type_name.rs#L115

Added line #L115 was not covered by tests
fields: type_
.fields
.iter()
Expand All @@ -104,12 +127,11 @@
Ok(answer) => {
let name = &answer.suggestions.join(", ");
for name in answer.suggestions {
if config.types.contains_key(&name)
|| new_name_mappings.contains_key(&name)
{
if config.types.contains_key(&name) || used_type_names.contains(&name) {

Check warning on line 130 in src/cli/llm/infer_type_name.rs

View check run for this annotation

Codecov / codecov/patch

src/cli/llm/infer_type_name.rs#L130

Added line #L130 was not covered by tests
continue;
}
new_name_mappings.insert(name, type_name.to_owned());
used_type_names.insert(name.clone());
new_name_mappings.insert(type_name.to_owned(), name);

Check warning on line 134 in src/cli/llm/infer_type_name.rs

View check run for this annotation

Codecov / codecov/patch

src/cli/llm/infer_type_name.rs#L133-L134

Added lines #L133 - L134 were not covered by tests
break;
}
tracing::info!(
Expand Down Expand Up @@ -142,19 +164,22 @@
}
}

Ok(new_name_mappings.into_iter().map(|(k, v)| (v, k)).collect())
Ok(new_name_mappings)

Check warning on line 167 in src/cli/llm/infer_type_name.rs

View check run for this annotation

Codecov / codecov/patch

src/cli/llm/infer_type_name.rs#L167

Added line #L167 was not covered by tests
}
}

#[cfg(test)]
mod test {
use genai::chat::{ChatRequest, ChatResponse, MessageContent};
use indexmap::indexset;

use super::{Answer, Question};
use crate::cli::llm::InferTypeName;

#[test]
fn test_to_chat_request_conversion() {
let question = Question {
used_types: indexset! {"Profile".to_owned(), "Person".to_owned()},
fields: vec![
("id".to_string(), "String".to_string()),
("name".to_string(), "String".to_string()),
Expand All @@ -176,4 +201,21 @@
let answer = Answer::try_from(resp).unwrap();
insta::assert_debug_snapshot!(answer);
}

#[test]
fn test_is_auto_generated() {
assert!(InferTypeName::is_auto_generated("GEN__T1"));
assert!(InferTypeName::is_auto_generated("GEN__T1234"));
assert!(InferTypeName::is_auto_generated("GEN__M1"));
assert!(InferTypeName::is_auto_generated("GEN__M5678"));
assert!(InferTypeName::is_auto_generated("GEN__Some__Type"));

assert!(!InferTypeName::is_auto_generated("Some__Type"));
assert!(!InferTypeName::is_auto_generated("User"));
assert!(!InferTypeName::is_auto_generated("T123"));
assert!(!InferTypeName::is_auto_generated("M1"));
assert!(!InferTypeName::is_auto_generated(""));
assert!(!InferTypeName::is_auto_generated("123T"));
assert!(!InferTypeName::is_auto_generated("A1234"));
}
}
6 changes: 3 additions & 3 deletions src/cli/llm/prompts/infer_type_name.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
Given the sample schema of a GraphQL type, suggest 5 meaningful names for it.
The name should be concise and preferably a single word.
The name should be concise, preferably a single word and must not be represent in used_types list.
laststylebender14 marked this conversation as resolved.
Show resolved Hide resolved

used_types: {{used_types}}
laststylebender14 marked this conversation as resolved.
Show resolved Hide resolved

Example Input:
{{input}}
Expand All @@ -8,5 +10,3 @@ Example Output:
{{output}}

Ensure the output is in valid JSON format.

Do not add any additional text before or after the JSON.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ ChatRequest {
ChatMessage {
role: System,
content: Text(
"Given the sample schema of a GraphQL type, suggest 5 meaningful names for it.\nThe name should be concise and preferably a single word.\n\nExample Input:\n{\n \"fields\": [\n [\n \"id\",\n \"String\"\n ],\n [\n \"name\",\n \"String\"\n ],\n [\n \"age\",\n \"Int\"\n ]\n ]\n}\n\nExample Output:\n{\n \"suggestions\": [\n \"Person\",\n \"Profile\",\n \"Member\",\n \"Individual\",\n \"Contact\"\n ]\n}\n\nEnsure the output is in valid JSON format.\n\nDo not add any additional text before or after the JSON.\n",
"Given the sample schema of a GraphQL type, suggest 5 meaningful names for it.\nThe name should be concise, preferably a single word and must not be represent in used_types list.\n\nused_types: [\"Profile\",\"Person\"]\n\nExample Input:\n{\n \"fields\": [\n [\n \"id\",\n \"String\"\n ],\n [\n \"name\",\n \"String\"\n ],\n [\n \"age\",\n \"Int\"\n ]\n ]\n}\n\nExample Output:\n{\n \"suggestions\": [\n \"Person\",\n \"Profile\",\n \"Member\",\n \"Individual\",\n \"Contact\"\n ]\n}\n\nEnsure the output is in valid JSON format.\n",
),
extra: None,
},
Expand Down
6 changes: 4 additions & 2 deletions src/core/config/transformer/improve_type_names.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,10 @@ impl<'a> CandidateGeneration<'a> {
fn generate(mut self) -> CandidateConvergence<'a> {
for (type_name, type_info) in self.config.types.iter() {
for (field_name, field_info) in type_info.fields.iter() {
if self.config.is_scalar(field_info.type_of.name()) {
// If field type is scalar then ignore type name inference.
if self.config.is_scalar(field_info.type_of.name())
|| field_name.starts_with("GEN__")
{
// If field type is scalar or auto generated then ignore type name inference.
continue;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ schema @server @upstream {
query: Query
}

type M1 {
type GEN__M1 {
body: String
id: Int
is_verified: Boolean
t1: M1
t1: GEN__M1
userId: Int
}

type Query {
q1: M1
q2: M1
q1: GEN__M1
q2: GEN__M1
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ input Far {
tar: String
}

input M1 {
input GEN__M1 {
tar: String
}

type Query {
bar(input: M1): String @http(path: "/bar")
bar(input: GEN__M1): String @http(path: "/bar")
far(input: Far): String @http(path: "/far")
foo(input: M1): String @http(path: "/foo")
foo(input: GEN__M1): String @http(path: "/foo")
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
source: src/core/config/transformer/merge_types/type_merger.rs
expression: config.to_sdl()
---
interface M1 {
interface GEN__M1 {
a: Int
}

type C implements M1 {
type C implements GEN__M1 {
a: Int
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ schema @server @upstream {
query: Query
}

type M1 {
type GEN__M1 {
id: Int
name: JSON
}

type Query {
bar: M1
foo: M1
bar: GEN__M1
foo: GEN__M1
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ schema @server @upstream {
query: Query
}

type M1 {
type GEN__M1 {
f1: String
f2: Int
f3: Boolean
Expand All @@ -15,8 +15,8 @@ type M1 {
}

type Query {
q1: M1
q2: M1
q3: M1
q4: M1
q1: GEN__M1
q2: GEN__M1
q3: GEN__M1
q4: GEN__M1
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ schema @server @upstream(baseURL: "http://jsonplacheholder.typicode.com") {
query: Query
}

union FooBar = Foo | M1
union FooBar = Foo | GEN__M1

type Foo {
a: String
bar: String
foo: String
}

type M1 {
type GEN__M1 {
bar: String
}

Expand Down
26 changes: 17 additions & 9 deletions src/core/config/transformer/merge_types/type_merger.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::collections::HashSet;

use indexmap::{IndexMap, IndexSet};

use super::mergeable_types::MergeableTypes;
use super::similarity::Similarity;
Expand Down Expand Up @@ -31,25 +33,31 @@ impl Default for TypeMerger {

impl TypeMerger {
fn merger(&self, mut merge_counter: u32, mut config: Config) -> Config {
let mut type_to_merge_type_mapping = BTreeMap::new();
let mut similar_type_group_list: Vec<BTreeSet<String>> = vec![];
let mut type_to_merge_type_mapping = IndexMap::new();
let mut similar_type_group_list: Vec<IndexSet<String>> = vec![];
let mut visited_types = HashSet::new();
let mut i = 0;
let mut stat_gen = Similarity::new(&config);
let mergeable_types = MergeableTypes::new(&config, self.threshold);

// fixes the flaky tests.
let mut types = mergeable_types.iter().collect::<Vec<_>>();
types.sort();

// step 1: identify all the types that satisfies the thresh criteria and group
// them.
for type_name_1 in mergeable_types.iter() {
for type_name_1 in types.iter() {
let type_name_1 = type_name_1.as_str();
if let Some(type_info_1) = config.types.get(type_name_1) {
if visited_types.contains(type_name_1) {
continue;
}

let mut similar_type_set = BTreeSet::new();
let mut similar_type_set = IndexSet::new();
similar_type_set.insert(type_name_1.to_string());

for type_name_2 in mergeable_types.iter().skip(i + 1) {
for type_name_2 in types.iter().skip(i + 1) {
let type_name_2 = type_name_2.as_str();
if visited_types.contains(type_name_2)
|| !mergeable_types.mergeable(type_name_1, type_name_2)
{
Expand All @@ -58,7 +66,7 @@ impl TypeMerger {

if let Some(type_info_2) = config.types.get(type_name_2) {
let threshold = mergeable_types.get_threshold(type_name_1, type_name_2);
visited_types.insert(type_name_1.clone());
visited_types.insert(type_name_1.to_owned());
let is_similar = stat_gen
.similarity(
(type_name_1, type_info_1),
Expand All @@ -69,7 +77,7 @@ impl TypeMerger {

if let Ok(similar) = is_similar {
if similar {
visited_types.insert(type_name_2.clone());
visited_types.insert(type_name_2.to_owned());
similar_type_set.insert(type_name_2.to_owned());
}
}
Expand All @@ -89,7 +97,7 @@ impl TypeMerger {
// step 2: merge similar types into single merged type.
for same_types in similar_type_group_list {
let mut merged_into = Type::default();
let merged_type_name = format!("M{}", merge_counter);
let merged_type_name = format!("GEN__M{}", merge_counter);
let mut did_we_merge = false;
for type_name in same_types {
if let Some(type_) = config.types.get(type_name.as_str()) {
Expand Down
Loading
Loading