Skip to content

Commit

Permalink
Fix oppia#3290: Add support for generic regex pattern matching
Browse files Browse the repository at this point in the history
  • Loading branch information
Sparsh1212 committed Jun 14, 2021
1 parent 82d8ee4 commit 225a4fc
Show file tree
Hide file tree
Showing 8 changed files with 336 additions and 0 deletions.
24 changes: 24 additions & 0 deletions model/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,30 @@ java_lite_proto_library(
deps = [":voiceover_proto"],
)

proto_library(
name = "filename_pattern_validation_structure_proto",
srcs = ["src/main/proto/filename_pattern_validation_checks.proto"],
visibility = ["//visibility:public"],
)

java_lite_proto_library(
name = "filename_pattern_validation_structure_java_proto_lite",
visibility = ["//visibility:public"],
deps = [":filename_pattern_validation_structure_proto"],
)

proto_library(
name = "file_content_validation_structure_proto",
srcs = ["src/main/proto/file_content_validation_checks.proto"],
visibility = ["//visibility:public"],
)

java_lite_proto_library(
name = "file_content_validation_structure_java_proto_lite",
visibility = ["//visibility:public"],
deps = [":file_content_validation_structure_proto"],
)

format_import_proto_library(
name = "feedback_reporting",
src = "src/main/proto/feedback_reporting.proto",
Expand Down
21 changes: 21 additions & 0 deletions model/src/main/proto/file_content_validation_checks.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
syntax = "proto3";

package model;

option java_package = "org.oppia.android.app.model";
option java_multiple_files = true;

message FileContentChecks {
repeated FileContentCheck file_content_checks = 1;
}

message FileContentCheck {
// Matches to filename in which to do the content check.
string filename_regex = 1;

// Regex which should not be contained in the file.
string prohibited_content_regex = 2;

// Error to show if any files matching the filename regex have contents matching the content regex.
string failure_message = 3;
}
18 changes: 18 additions & 0 deletions model/src/main/proto/filename_pattern_validation_checks.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
syntax = "proto3";

package model;

option java_package = "org.oppia.android.app.model";
option java_multiple_files = true;

message FilenameChecks {
repeated FilenameCheck filename_checks = 1;
}

message FilenameCheck {
// Regex no files should match.
string prohibited_filename_regex = 1;

// Error to show if any files match the regex.
string failure_message = 2;
}
23 changes: 23 additions & 0 deletions scripts/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
load("@io_bazel_rules_kotlin//kotlin:kotlin.bzl", "kt_jvm_binary")
load("//scripts:script_assets.bzl", "generate_assets_list_from_text_protos")

SCRIPT_ASSETS = generate_assets_list_from_text_protos(
name = "script_assets",
file_content_validation_file_names = [
"file_content_validation_checks",
],
filename_validation_file_names = [
"filename_pattern_validation_checks",
],
)

kt_jvm_binary(
name = "pattern_validation_check",
srcs = glob(["src/main/java/org/oppia/android/scripts/*.kt"]),
data = SCRIPT_ASSETS,
main_class = "org.oppia.android.scripts.RegexPatternValidationCheck$Companion",
deps = [
"//model:file_content_validation_structure_java_proto_lite",
"//model:filename_pattern_validation_structure_java_proto_lite",
],
)
186 changes: 186 additions & 0 deletions scripts/script_assets.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
"""
Macros for preparing & creating assets to include in the domain module.
"""

load("@rules_proto//proto:defs.bzl", "ProtoInfo")

def _extract_proto_sources(deps):
"""
Returns the list of proto source files that make up the specified list of proto dependencies.
The returned list includes transitive dependencies.
"""

# See https://github.com/bazelbuild/rules_proto/pull/77/files &
# https://github.com/bazelbuild/rules_proto/issues/57 &
# https://docs.bazel.build/versions/master/skylark/lib/ProtoInfo.html for references.
combined_sources = []
for dep in deps:
combined_sources.extend(dep[ProtoInfo].transitive_sources.to_list())
return combined_sources

def _gen_binary_proto_from_text_impl(ctx):
# See: https://docs.bazel.build/versions/master/skylark/lib/actions.html#declare_file.
output_file = ctx.outputs.output_file
input_file = ctx.attr.input_file.files.to_list()[0].short_path
input_proto_files = _extract_proto_sources(ctx.attr.proto_deps)

# See 'protoc --help' for specifics on the arguments passed to the tool for converting text
# proto to binary, and expected stdin/stdout configurations. Note that the actual proto files
# are passed to the compiler since it requires them in order to transcode the text proto file.
command_path = ctx.executable._protoc_tool.path
arguments = [command_path] + [
"--encode %s" % ctx.attr.proto_type_name,
] + [file.path for file in input_proto_files] + [
"< %s" % input_file,
"> %s" % output_file.path,
]

# Reference: https://docs.bazel.build/versions/master/skylark/lib/actions.html#run. This
# actually runs the proto compiler to perform the conversion. Note that this needs to use
# run_shell() instead of run() because it requires input redirection.
ctx.actions.run_shell(
outputs = [output_file],
inputs = ctx.files.input_file + input_proto_files,
tools = [ctx.executable._protoc_tool],
command = " ".join(arguments),
mnemonic = "GenerateBinaryProtoFromText",
)
return DefaultInfo(
files = depset([output_file]),
runfiles = ctx.runfiles(files = [output_file]),
)

# Custom Starlark rule for running the proto compiler in encode mode to convert a text proto to
# binary. The custom rule allows this to be done as part of the build graph so that binary files
# never need to be checked into the repository.
_gen_binary_proto_from_text = rule(
attrs = {
"input_file": attr.label(
allow_files = True,
mandatory = True,
),
"output_file": attr.output(
mandatory = True,
),
"proto_deps": attr.label_list(
allow_empty = False,
mandatory = True,
),
"proto_type_name": attr.string(mandatory = True),
"_protoc_tool": attr.label(
# This was partly inspired by https://stackoverflow.com/a/39138074.
executable = True,
cfg = "host",
default = "@protobuf_tools//:protoc",
),
},
implementation = _gen_binary_proto_from_text_impl,
)

def gen_binary_proto_from_text(name, proto_type_name, input_file, output_file, proto_deps):
"""
Generates a binary proto from a text proto.
Args:
name: str. A unique name to identify this generation. This can be built directly using Bazel
like any other build rule.
proto_type_name: str. The qualified type name of the proto being converted (e.g.
'model.Exploration').
input_file: file. The path to the text proto file being converted.
output_file: file. The output path for the generated binary proto file.
proto_deps: list of targets. The list of proto_library dependencies that are needed to
perform the conversion. Generally, only the proto file corresponding to the proto type
is needed since proto_library automatically pulls in transitive dependencies.
Returns:
str. The path to the newly generated binary file (same as output_file).
"""
_gen_binary_proto_from_text(
name = name,
proto_type_name = proto_type_name,
input_file = input_file,
output_file = output_file,
proto_deps = proto_deps,
)
return output_file

def _generate_single_asset_proto_binary(name, proto_file_name, proto_dep_name, proto_type_name):
"""
Converts a single asset text proto to a new binary asset.
Args:
name: str. The name of this target.
proto_file_name: str. The file name of the text proto under the assets directory that will
be converted. This is assuming to correspond to 'src/main/assets/<name>.textproto' and
will lead to a new generated file called 'src/main/assets/<name>.pb'.
proto_dep_name: str. The name of the proto library under //model that contains the proto
definition being converted to binary.
proto_type_name: str. The name of the proto type being converted in the text proto. This is
assumed to be part of the shared 'model' package.
Returns:
str. The path to the newly generated binary file.
"""
asset_dir = "src/main/assets"
return gen_binary_proto_from_text(
name = "generate_binary_proto_for_text_proto_%s" % name,
input_file = "%s/%s.textproto" % (asset_dir, proto_file_name),
output_file = "%s/%s.pb" % (asset_dir, proto_file_name),
proto_deps = [
"//model:%s_proto" % proto_dep_name,
],
proto_type_name = "model.%s" % proto_type_name,
)

def _generate_proto_binary_assets(names, proto_dep_name, proto_type_name, name_prefix):
"""
Converts a list of text proto assets to binary.
Args:
names: list of str. The list of text proto file names under the assets directory that should
be converted.
proto_dep_name: str. See _generate_single_asset_proto_binary.
proto_type_name: str. See _generate_single_asset_proto_binary.
name_prefix: str. A prefix to attach to the name of this target.
Returns:
list of str. The list of new proto binary asset files that were generated.
"""
return [
_generate_single_asset_proto_binary(
name = "%s_%s" % (name_prefix, name),
proto_file_name = name,
proto_dep_name = proto_dep_name,
proto_type_name = proto_type_name,
)
for name in names
]

def generate_assets_list_from_text_protos(
name,
filename_validation_file_names,
file_content_validation_file_names
):
"""
Converts multiple lists of text proto assets to binary.
Args:
name: str. The name of this generation instance. This will be a prefix for derived targets.
filename_validation_file_names: list of str. The list of prohibited filename pattern file names.
file_content_validation_file_names: list of str. The list of prohibited file contents file names.
Returns:
list of str. The list of new proto binary asset files that were generated.
"""
return _generate_proto_binary_assets(
names = filename_validation_file_names,
proto_dep_name = "filename_pattern_validation_structure",
proto_type_name = "FilenameChecks",
name_prefix = name,
) + _generate_proto_binary_assets(
names = file_content_validation_file_names,
proto_dep_name = "file_content_validation_structure",
proto_type_name = "FileContentChecks",
name_prefix = name,
)
10 changes: 10 additions & 0 deletions scripts/src/main/assets/file_content_validation_checks.textproto
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
file_content_checks {
filename_regex: ".+?.kt"
prohibited_content_regex: "^import .+?support.+?$"
failure_message: "AndroidX should be used instead of the support library"
}
file_content_checks {
filename_regex: ".+?.kt"
prohibited_content_regex: "^import .+?compiler.+?$"
failure_message: "Compiler should not be used directly"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
filename_checks {
prohibited_filename_regex: "[^app]/src/main/.+?Activity.kt"
failure_message: "Activities can only be placed in the app module"
}
filename_checks {
prohibited_filename_regex: "[^app]/src/main/.+?View.kt"
failure_message: "Views can only be placed in the app module"
}
filename_checks {
prohibited_filename_regex: "[^app]/src/main/.+?Fragment.kt"
failure_message: "Fragments can only be placed in the app module"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package org.oppia.android.scripts

import java.io.File
import java.io.FileInputStream
import org.oppia.android.app.model.FilenameChecks
import org.oppia.android.app.model.FileContentChecks

class RegexPatternValidationCheck {
companion object {
@JvmStatic
fun main(vararg args: String) {
val path = System.getProperty("user.dir")
val fileNamePatternsBinaryFile =
File("$path/scripts/src/main/assets/filename_pattern_validation_checks.pb")
val fileContentsBinaryFile =
File("$path/scripts/src/main/assets/file_content_validation_checks.pb")
val filenameCheckBuilder = FilenameChecks.newBuilder()
val fileContentCheckBuilder = FileContentChecks.newBuilder()
val namePatternsObj: FilenameChecks =
FileInputStream(fileNamePatternsBinaryFile).use {
filenameCheckBuilder.mergeFrom(it)
}.build() as FilenameChecks
val fileContentsObj: FileContentChecks =
FileInputStream(fileContentsBinaryFile).use {
fileContentCheckBuilder.mergeFrom(it)
}.build() as FileContentChecks

namePatternsObj.getFilenameChecksList().forEach {
println(it.getProhibitedFilenameRegex())
println(it.getFailureMessage())
println("------------------------------------")
}

fileContentsObj.getFileContentChecksList().forEach {
println(it.getFilenameRegex())
println(it.getFailureMessage())
println(it.getProhibitedContentRegex())
println("------------------------------------")
}
}
}
}

0 comments on commit 225a4fc

Please sign in to comment.