Skip to content

Commit

Permalink
✨ Identify interesting Jars to decomp and improve decomp performance (k…
Browse files Browse the repository at this point in the history
…onveyor#352)

Fixes konveyor#317 
Fixes konveyor#319 

Summary of changes:
* When decompiling binaries, for every JAR we find, we attempt to look
at its metadata to get artifact and group. If we dont find metadata, we
look it up on maven central using its sha. For all such JARs that we
find accurate information about, we add them to java project's pom as
dependencies. For all other JARs, we send them to decompile.

* In decompile, we are running concurrently. Also, instead of
decompiling each class file individually, we are now decompiling whole
JAR using fernflower and then exploding it (this is faster than
individual .class file decompile)

* Prior to initing Java provider, we are now downloading sources for all
dependencies. If we find any that don't have sources, we are passing
them to decompile.

* When getting dependencies for a binary, we are now using same logic
from step 1 to get more fine grained info for a JAR.

---------

Signed-off-by: David Zager <[email protected]>
Signed-off-by: Pranav Gaikwad <[email protected]>
Co-authored-by: David Zager <[email protected]>
  • Loading branch information
pranavgaikwad and djzager authored Oct 6, 2023
1 parent 3b536c0 commit 199aed0
Show file tree
Hide file tree
Showing 6 changed files with 640 additions and 66 deletions.
36 changes: 27 additions & 9 deletions provider/internal/java/dependency.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,12 @@ func (p *javaServiceClient) GetDependencies(ctx context.Context) (map[uri.URI][]
return m, nil
}

func (p *javaServiceClient) getLocalRepoPath() string {
func getMavenLocalRepoPath(mvnSettingsFile string) string {
args := []string{
"help:evaluate", "-Dexpression=settings.localRepository", "-q", "-DforceStdout",
}
if p.mvnSettingsFile != "" {
args = append(args, "-s", p.mvnSettingsFile)
if mvnSettingsFile != "" {
args = append(args, "-s", mvnSettingsFile)
}
cmd := exec.Command("mvn", args...)
var outb bytes.Buffer
Expand Down Expand Up @@ -147,7 +147,7 @@ func (p *javaServiceClient) GetDependencyFallback(ctx context.Context) (map[uri.
}

func (p *javaServiceClient) GetDependenciesDAG(ctx context.Context) (map[uri.URI][]provider.DepDAGItem, error) {
localRepoPath := p.getLocalRepoPath()
localRepoPath := getMavenLocalRepoPath(p.mvnSettingsFile)

path := p.findPom()
file := uri.File(path)
Expand Down Expand Up @@ -209,13 +209,17 @@ func (p *javaServiceClient) GetDependenciesDAG(ctx context.Context) (map[uri.URI
func (p *javaServiceClient) discoverDepsFromJars(path string, ll map[uri.URI][]konveyor.DepDAGItem) {
// for binaries we only find JARs embedded in archive
w := walker{
deps: ll,
deps: ll,
depToLabels: p.depToLabels,
m2RepoPath: getMavenLocalRepoPath(p.mvnSettingsFile),
}
filepath.WalkDir(path, w.walkDirForJar)
}

type walker struct {
deps map[uri.URI][]provider.DepDAGItem
deps map[uri.URI][]provider.DepDAGItem
depToLabels map[string]*depLabelItem
m2RepoPath string
}

func (w *walker) walkDirForJar(path string, info fs.DirEntry, err error) error {
Expand All @@ -229,6 +233,20 @@ func (w *walker) walkDirForJar(path string, info fs.DirEntry, err error) error {
d := provider.Dep{
Name: info.Name(),
}
artifact, _ := toDependency(context.TODO(), path)
if (artifact != javaArtifact{}) {
d.Name = fmt.Sprintf("%s.%s", artifact.GroupId, artifact.ArtifactId)
d.Version = artifact.Version
d.Labels = addDepLabels(w.depToLabels, d.Name)
d.ResolvedIdentifier = artifact.sha1
// when we can successfully get javaArtifact from a jar
// we added it to the pom and it should be in m2Repo path
if w.m2RepoPath != "" {
d.FileURIPrefix = filepath.Join(w.m2RepoPath,
strings.Replace(artifact.GroupId, ".", "/", -1), artifact.ArtifactId, artifact.Version)
}
}

w.deps[uri.URI(filepath.Join(path, info.Name()))] = []provider.DepDAGItem{
{
Dep: d,
Expand Down Expand Up @@ -271,15 +289,15 @@ func (p *javaServiceClient) parseDepString(dep, localRepoPath string) (provider.
d.ResolvedIdentifier = string(b)
}

d.Labels = p.addDepLabels(d.Name)
d.Labels = addDepLabels(p.depToLabels, d.Name)
d.FileURIPrefix = fmt.Sprintf("file://%v", filepath.Dir(fp))

return d, nil
}

func (p *javaServiceClient) addDepLabels(depName string) []string {
func addDepLabels(depToLabels map[string]*depLabelItem, depName string) []string {
m := map[string]interface{}{}
for _, d := range p.depToLabels {
for _, d := range depToLabels {
if d.r.Match([]byte(depName)) {
for label, _ := range d.labels {
m[label] = nil
Expand Down
138 changes: 117 additions & 21 deletions provider/internal/java/provider.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package java

import (
"bufio"
"context"
"fmt"
"io"
"os"
"os/exec"
"path"
"path/filepath"
"strings"

"github.com/getkin/kin-openapi/openapi3"
Expand All @@ -17,6 +20,7 @@ import (
)

const (
JavaFile = ".java"
JavaArchive = ".jar"
WebArchive = ".war"
EnterpriseArchive = ".ear"
Expand Down Expand Up @@ -186,26 +190,7 @@ func (p *javaProvider) Init(ctx context.Context, log logr.Logger, config provide
}
log = log.WithValues("provider", "java")

isBinary := false
var returnErr error
// each service client should have their own context
ctx, cancelFunc := context.WithCancel(ctx)
extension := strings.ToLower(path.Ext(config.Location))
switch extension {
case JavaArchive, WebArchive, EnterpriseArchive:
depLocation, sourceLocation, err := decompileJava(ctx, log, config.Location)
if err != nil {
cancelFunc()
return nil, err
}
config.Location = sourceLocation
// for binaries, we fallback to looking at .jar files only for deps
config.DependencyPath = depLocation
// for binaries, always run in source-only mode as we don't know how to correctly resolve deps
config.AnalysisMode = provider.SourceOnlyAnalysisMode
isBinary = true
}

// read provider settings
bundlesString, ok := config.ProviderSpecificConfig[BUNDLES_INIT_OPTION].(string)
if !ok {
bundlesString = ""
Expand All @@ -224,10 +209,35 @@ func (p *javaProvider) Init(ctx context.Context, log logr.Logger, config provide

lspServerPath, ok := config.ProviderSpecificConfig[provider.LspServerPathConfigKey].(string)
if !ok || lspServerPath == "" {
cancelFunc()
return nil, fmt.Errorf("invalid lspServerPath provided, unable to init java provider")
}

isBinary := false
var returnErr error
// each service client should have their own context
ctx, cancelFunc := context.WithCancel(ctx)
extension := strings.ToLower(path.Ext(config.Location))
switch extension {
case JavaArchive, WebArchive, EnterpriseArchive:
depLocation, sourceLocation, err := decompileJava(ctx, log, config.Location)
if err != nil {
cancelFunc()
return nil, err
}
config.Location = sourceLocation
// for binaries, we fallback to looking at .jar files only for deps
config.DependencyPath = depLocation
isBinary = true
}

// we attempt to decompile JARs of dependencies that don't have a sources JAR attached
// we need to do this for jdtls to correctly recognize source attachment for dep
err := resolveSourcesJars(ctx, log, config.Location, mavenSettingsFile)
if err != nil {
// TODO (pgaikwad): should we ignore this failure?
log.Error(err, "failed to resolve sources jar for location", "location", config.Location)
}

// handle proxy settings
for k, v := range config.Proxy.ToEnvVars() {
os.Setenv(k, v)
Expand Down Expand Up @@ -304,3 +314,89 @@ func (p *javaProvider) GetDependencies(ctx context.Context) (map[uri.URI][]*prov
func (p *javaProvider) GetDependenciesDAG(ctx context.Context) (map[uri.URI][]provider.DepDAGItem, error) {
return provider.FullDepDAGResponse(ctx, p.clients)
}

// resolveSourcesJars for a given source code location, runs maven to find
// deps that don't have sources attached and decompiles them
func resolveSourcesJars(ctx context.Context, log logr.Logger, location, mavenSettings string) error {
decompileJobs := []decompileJob{}
mvnOutput, err := os.CreateTemp("", "mvn-sources-")
if err != nil {
return err
}
defer mvnOutput.Close()
args := []string{
"dependency:sources",
"-Djava.net.useSystemProxies=true",
fmt.Sprintf("-DoutputFile=%s", mvnOutput.Name()),
}
if mavenSettings != "" {
args = append(args, "-s", mavenSettings)
}
cmd := exec.CommandContext(ctx, "mvn", args...)
cmd.Dir = location
err = cmd.Run()
if err != nil {
return err
}
artifacts, err := parseUnresolvedSources(mvnOutput)
if err != nil {
return err
}
m2Repo := getMavenLocalRepoPath(mavenSettings)
if m2Repo == "" {
return nil
}
for _, artifact := range artifacts {
groupDirs := filepath.Join(strings.Split(artifact.GroupId, ".")...)
artifactDirs := filepath.Join(strings.Split(artifact.ArtifactId, ".")...)
jarName := fmt.Sprintf("%s-%s.jar", artifact.ArtifactId, artifact.Version)
decompileJobs = append(decompileJobs, decompileJob{
artifact: artifact,
inputPath: filepath.Join(
m2Repo, groupDirs, artifactDirs, artifact.Version, jarName),
outputPath: filepath.Join(
m2Repo, groupDirs, artifactDirs, artifact.Version, "decompiled", jarName),
})
}
err = decompile(ctx, log, alwaysDecompileFilter(true), 10, decompileJobs, "")
if err != nil {
return err
}
// move decompiled files to base location of the jar
for _, decompileJob := range decompileJobs {
jarName := strings.TrimSuffix(filepath.Base(decompileJob.inputPath), ".jar")
moveFile(decompileJob.outputPath,
filepath.Join(filepath.Dir(decompileJob.inputPath),
fmt.Sprintf("%s-sources.jar", jarName)))
}
return nil
}

func parseUnresolvedSources(output io.Reader) ([]javaArtifact, error) {
artifacts := []javaArtifact{}
scanner := bufio.NewScanner(output)
unresolvedSeparatorSeen := false
for scanner.Scan() {
line := scanner.Text()
line = strings.TrimLeft(line, " ")
if strings.HasPrefix(line, "The following files have NOT been resolved:") {
unresolvedSeparatorSeen = true
} else if unresolvedSeparatorSeen {
parts := strings.Split(line, ":")
if len(parts) != 6 {
continue
}
groupId := parts[0]
artifactId := parts[1]
version := parts[4]
artifacts = append(artifacts,
javaArtifact{
packaging: JavaArchive,
ArtifactId: artifactId,
GroupId: groupId,
Version: version,
})
}
}
return artifacts, scanner.Err()
}
48 changes: 48 additions & 0 deletions provider/internal/java/provider_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package java

import (
"reflect"
"strings"
"testing"
)

func Test_parseUnresolvedSources(t *testing.T) {
tests := []struct {
name string
mvnOutput string
wantErr bool
wantList []javaArtifact
}{
{
name: "valid sources output",
mvnOutput: `
The following files have been resolved:
org.springframework.boot:spring-boot:jar:sources:2.5.0:compile
The following files have NOT been resolved:
io.konveyor.demo:config-utils:jar:sources:1.0.0:compile
`,
wantErr: false,
wantList: []javaArtifact{
{
packaging: JavaArchive,
GroupId: "io.konveyor.demo",
ArtifactId: "config-utils",
Version: "1.0.0",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
outputReader := strings.NewReader(tt.mvnOutput)
gotList, gotErr := parseUnresolvedSources(outputReader)
if (gotErr != nil) != tt.wantErr {
t.Errorf("parseUnresolvedSources() gotErr = %v, wantErr %v", gotErr, tt.wantErr)
}
if !reflect.DeepEqual(gotList, tt.wantList) {
t.Errorf("parseUnresolvedSources() gotList = %v, wantList %v", gotList, tt.wantList)
}
})
}
}
1 change: 0 additions & 1 deletion provider/internal/java/service_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (

type javaServiceClient struct {
rpc *jsonrpc2.Conn
ctx context.Context
cancelFunc context.CancelFunc
config provider.InitConfig
log logr.Logger
Expand Down
Loading

0 comments on commit 199aed0

Please sign in to comment.