From 0a06ab618b808d3f5454ce2b15921fa1f72349d7 Mon Sep 17 00:00:00 2001 From: Yann Rouillard Date: Mon, 5 Feb 2024 11:25:39 +0100 Subject: [PATCH 1/2] fix: improve avro library detection So far we checked for the presence of the library in the target schema class file but this library could be loaded indirectly and not explicitely listed. As a fallback, we now: - look for the library string without the import keyword, to allow to specify it using the schemas static annotation if needed (for instance '@schema(library="com.sksamuel.avro4s")') - search for the library in the build.sbt as well. --- kafka/check-local-schemas.sh | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/kafka/check-local-schemas.sh b/kafka/check-local-schemas.sh index 752e155..293605e 100755 --- a/kafka/check-local-schemas.sh +++ b/kafka/check-local-schemas.sh @@ -61,12 +61,27 @@ find_schema_class() { echo "${schema_package}.${schema_class_name}" } +is_library_used() { + local library="$1" candidate_class_file="$2" + + # if the library is not directly found in the candidate class file + # we fallback on checking the build.sbt file itself + # This doesn't fully protect against from indirect library loading + # but it's a good enough heuristic for now + for candidate in "${candidate_class_file}" build.sbt; do + if grep -q -E "[^#]*${library}" "${candidate}"; then + return 0 + fi + done + return 1 +} + find_avro_library() { local schema_class_file="$1" - if grep -q "import com.sksamuel.avro4s" "${schema_class_file}"; then + if is_library_used "com.sksamuel.avro4s" "${schema_class_file}"; then echo "avro4s" - elif grep -q "import vulcan" "${schema_class_file}"; then + elif is_library_used "vulcan" "${schema_class_file}"; then echo "vulcan" else error "Could not find any avro library import in ${schema_class_file}" From 60071b476ea126fe24d4e8882bce36ce9347f1b5 Mon Sep 17 00:00:00 2001 From: Yann Rouillard Date: Mon, 5 Feb 2024 11:48:25 +0100 Subject: [PATCH 2/2] fix: stop requiring com.lihaoyi libraries for schema generation Instead of requiring the project to load the com.lihaoyi.upickle and com.lihaoyi.os-lib libraries in the project, we now dynamically add them to the dependencies when running the schema generation command. It still not bulletproof as it could lead to version conflict with existing libraries but it will good enough for now :-) --- kafka/check-local-schemas.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kafka/check-local-schemas.sh b/kafka/check-local-schemas.sh index 293605e..f9c9eb0 100755 --- a/kafka/check-local-schemas.sh +++ b/kafka/check-local-schemas.sh @@ -116,6 +116,10 @@ run_schema_generator_code() { # to the existing source code, so we can run our generator code alongside the existing code # We need that as the generator code import the schema class sbt_command+="set Compile / unmanagedSourceDirectories += file(\"${generator_source_folder}\");" + # Dynamically add the required dependencies to the build.sbt file + sbt_command+="set libraryDependencies += \"com.lihaoyi\" %% \"upickle\" % \"3.1.3\";" + sbt_command+="set libraryDependencies += \"com.lihaoyi\" %% \"os-lib\" % \"0.9.1\";" + sbt_command+="runMain kp_pre_commit_hooks.generateSchemaFile ${target_schema_file}" sbt -batch -error "${sbt_command}"