Skip to content

Commit

Permalink
- major refactoring of GraknMigrator
Browse files Browse the repository at this point in the history
 - data warnings now include exact row number of input file
  • Loading branch information
hkuich committed Apr 8, 2021
1 parent 8478839 commit 2be3ce5
Show file tree
Hide file tree
Showing 21 changed files with 359 additions and 240 deletions.
19 changes: 2 additions & 17 deletions src/main/java/cli/GramiCLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import java.io.IOException;

@CommandLine.Command(description="Welcome to the CLI of GraMi - your grakn data migration tool", name = "grami", version = "0.1.0-alpha-12", mixinStandardHelpOptions = true)
@CommandLine.Command(description="Welcome to the CLI of GraMi - your grakn data migration tool", name = "grami", version = "0.1.0", mixinStandardHelpOptions = true)
public class GramiCLI {

public static void main(String[] args) {
Expand Down Expand Up @@ -46,9 +46,6 @@ class MigrateCommand implements Runnable {
@CommandLine.Option(names = {"-cm", "--cleanMigration"}, description = "optional - delete old schema and data and restart migration from scratch - default: continue previous migration, if exists")
private boolean cleanMigration;

@CommandLine.Option(names = {"-sc", "--scope"}, description = "optional - set migration scope: 0 - apply schema only (Note: this has no effect unless you also set the cleanMigration flag to true.); 1 - migrate entities; 2 - migrate entities & relations; 3 - migrate entites, relations, & relation-with-relations; everything else defaults to 4 - migrate all (entities, relations, relation-with-relations, append-attributes")
private int scope = 4;

@Override
public void run() {
spec.commandLine().getOut().println("############## GraMi migration ###############");
Expand All @@ -60,24 +57,12 @@ public void run() {
spec.commandLine().getOut().println("\tdatabase: " + databaseName);
spec.commandLine().getOut().println("\tgrakn server: " + graknURI);
spec.commandLine().getOut().println("\tdelete database and all data in it for a clean new migration?: " + cleanMigration);
spec.commandLine().getOut().println("\tmigration scope: " + scope);

final MigrationConfig migrationConfig = new MigrationConfig(graknURI, databaseName, schemaFilePath, dataConfigFilePath, processorConfigFilePath);

try {
GraknMigrator mig = new GraknMigrator(migrationConfig, migrationStatusFilePath, cleanMigration);

if (scope != 0 && scope != 1 && scope != 2 && scope != 3) {
scope = 4;
}

switch (scope) {
case 0: mig.migrate(false, false, false, false); break;
case 1: mig.migrate(true, false, false, false); break;
case 2: mig.migrate(true, true, false, false); break;
case 3: mig.migrate(true, true, true, false); break;
case 4: mig.migrate(true, true, true, true); break;
}
mig.migrate();
} catch (IOException e) {
e.printStackTrace();
}
Expand Down
5 changes: 5 additions & 0 deletions src/main/java/configuration/DataConfigEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ public class DataConfigEntry {
private DataConfigGeneratorMapping[] relationPlayers;
private int batchSize;
private int threads;
private Integer order;

public String getDataPath() {
return dataPath;
Expand Down Expand Up @@ -46,6 +47,10 @@ public int getThreads() {
return threads;
}

public Integer getOrder() {
return order;
}

public ArrayList<DataConfigEntry.DataConfigGeneratorMapping> getMatchAttributes() {
ArrayList<DataConfigEntry.DataConfigGeneratorMapping> matchAttributes = new ArrayList<>();
for (DataConfigEntry.DataConfigGeneratorMapping attributeMapping: getAttributes()) {
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/configuration/ProcessorConfigEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public ConceptGenerator getRelationPlayerGenerator(String key) {

public HashMap<String,ConceptGenerator> getRelationRequiredPlayers() {
HashMap<String,ConceptGenerator> relationPlayers = new HashMap<>();
if (processorType.equals("relation") || processorType.equals("relation-with-relation")) {
if (processorType.equals("relation") || processorType.equals("nested-relation") || processorType.equals("attribute-relation")) {
HashMap<String, ConceptGenerator> playerGenerators = getConceptGenerators().get("players");
for (Map.Entry<String, ConceptGenerator> pg: playerGenerators.entrySet()) {
if (pg.getValue().isRequired()) {
Expand Down
27 changes: 15 additions & 12 deletions src/main/java/generator/AppendAttributeGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@
import java.util.HashMap;
import java.util.Map;

import static generator.GeneratorUtil.*;
import static generator.GeneratorUtil.addAttribute;
import static generator.GeneratorUtil.malformedRow;

public class AppendAttributeGenerator extends InsertGenerator {

private final DataConfigEntry dce;
private final ProcessorConfigEntry pce;
private static final Logger appLogger = LogManager.getLogger("com.bayer.dt.grami");
private static final Logger dataLogger = LogManager.getLogger("com.bayer.dt.grami.data");
private final DataConfigEntry dce;
private final ProcessorConfigEntry pce;

public AppendAttributeGenerator(DataConfigEntry dataConfigEntry,
ProcessorConfigEntry processorConfigEntry) {
Expand All @@ -33,24 +34,25 @@ public AppendAttributeGenerator(DataConfigEntry dataConfigEntry,
}

public HashMap<String, ArrayList<ArrayList<ThingVariable<?>>>> graknAppendAttributeInsert(ArrayList<String> rows,
String header) throws Exception {
String header, int rowCounter) throws Exception {
HashMap<String, ArrayList<ArrayList<ThingVariable<?>>>> matchInsertPatterns = new HashMap<>();

ArrayList<ArrayList<ThingVariable<?>>> matchPatterns = new ArrayList<>();
ArrayList<ArrayList<ThingVariable<?>>> insertPatterns = new ArrayList<>();

int insertCounter = 0;

int batchCounter = 1;
for (String row : rows) {
ArrayList<ArrayList<ThingVariable<?>>> tmp = graknAppendAttributeQueryFromRow(row, header, insertCounter);

ArrayList<ArrayList<ThingVariable<?>>> tmp = graknAppendAttributeQueryFromRow(row, header, insertCounter, rowCounter + batchCounter);
if (tmp != null) {
if (tmp.get(0) != null && tmp.get(1) != null) {
matchPatterns.add(tmp.get(0));
insertPatterns.add(tmp.get(1));
insertCounter++;
}
}

batchCounter = batchCounter + 1;
}
matchInsertPatterns.put("match", matchPatterns);
matchInsertPatterns.put("insert", insertPatterns);
Expand All @@ -59,7 +61,8 @@ public HashMap<String, ArrayList<ArrayList<ThingVariable<?>>>> graknAppendAttrib

public ArrayList<ArrayList<ThingVariable<?>>> graknAppendAttributeQueryFromRow(String row,
String header,
int insertCounter) throws Exception {
int insertCounter,
int rowCounter) throws Exception {
String fileSeparator = dce.getSeparator();
String[] rowTokens = row.split(fileSeparator);
String[] columnNames = header.split(fileSeparator);
Expand All @@ -77,7 +80,7 @@ public ArrayList<ArrayList<ThingVariable<?>>> graknAppendAttributeQueryFromRow(S
Thing appendAttributeMatchPattern = addEntityToMatchPattern(insertCounter);
for (DataConfigEntry.DataConfigGeneratorMapping generatorMappingForMatchAttribute : dce.getAttributes()) {
if (generatorMappingForMatchAttribute.isMatch()) {
appendAttributeMatchPattern = addAttribute(rowTokens, appendAttributeMatchPattern, columnNames, generatorMappingForMatchAttribute, pce, generatorMappingForMatchAttribute.getPreprocessor());
appendAttributeMatchPattern = addAttribute(rowTokens, appendAttributeMatchPattern, columnNames, rowCounter, generatorMappingForMatchAttribute, pce, generatorMappingForMatchAttribute.getPreprocessor());
}
}
matchPatterns.add(appendAttributeMatchPattern);
Expand All @@ -87,7 +90,7 @@ public ArrayList<ArrayList<ThingVariable<?>>> graknAppendAttributeQueryFromRow(S
Thing appendAttributeInsertPattern = null;
for (DataConfigEntry.DataConfigGeneratorMapping generatorMappingForAppendAttribute : dce.getAttributes()) {
if (!generatorMappingForAppendAttribute.isMatch()) {
appendAttributeInsertPattern = addAttribute(rowTokens, thingVar, columnNames, generatorMappingForAppendAttribute, pce, generatorMappingForAppendAttribute.getPreprocessor());
appendAttributeInsertPattern = addAttribute(rowTokens, thingVar, rowCounter, columnNames, generatorMappingForAppendAttribute, pce, generatorMappingForAppendAttribute.getPreprocessor());
}
}
if (appendAttributeInsertPattern != null) {
Expand All @@ -100,10 +103,10 @@ public ArrayList<ArrayList<ThingVariable<?>>> graknAppendAttributeQueryFromRow(S


if (isValid(assembledPatterns)) {
appLogger.debug("valid query: <" + assembleQuery(assembledPatterns).toString() + ">");
appLogger.debug("valid query: <" + assembleQuery(assembledPatterns) + ">");
return assembledPatterns;
} else {
dataLogger.warn("in datapath <" + dce.getDataPath() + ">: skipped row b/c does not contain at least one match attribute and one insert attribute. Faulty tokenized row: " + Arrays.toString(rowTokens));
dataLogger.warn("in datapath <" + dce.getDataPath() + ">: skipped row " + rowCounter + " b/c does not contain at least one match attribute and one insert attribute. Faulty tokenized row: " + Arrays.toString(rowTokens));
return null;
}
}
Expand Down
12 changes: 7 additions & 5 deletions src/main/java/generator/AttributeInsertGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,25 @@ public AttributeInsertGenerator(DataConfigEntry dataConfigEntry, ProcessorConfig
}

public ArrayList<ThingVariable<?>> graknAttributeInsert(ArrayList<String> rows,
String header) throws IllegalArgumentException {
String header, int rowCounter) throws IllegalArgumentException {
ArrayList<ThingVariable<?>> patterns = new ArrayList<>();
int batchCount = 1;
for (String row : rows) {
try {
ThingVariable<?> temp = graknAttributeQueryFromRow(row, header);
ThingVariable<?> temp = graknAttributeQueryFromRow(row, header, rowCounter + batchCount);
if (temp != null) {
patterns.add(temp);
}
} catch (Exception e) {
e.printStackTrace();
}
batchCount = batchCount + 1;
}
return patterns;
}

public ThingVariable<Attribute> graknAttributeQueryFromRow(String row,
String header) throws Exception {
String header, int rowCounter) throws Exception {
String fileSeparator = dce.getSeparator();
String[] rowTokens = row.split(fileSeparator);
String[] columnNames = header.split(fileSeparator);
Expand All @@ -58,7 +60,7 @@ public ThingVariable<Attribute> graknAttributeQueryFromRow(String row,
Attribute attributeInsertStatement = null;

for (DataConfigEntry.DataConfigGeneratorMapping generatorMappingForAttribute : dce.getAttributes()) {
attributeInsertStatement = addValue(rowTokens, attributeInitialStatement, columnNames, generatorMappingForAttribute, pce, generatorMappingForAttribute.getPreprocessor());
attributeInsertStatement = addValue(rowTokens, attributeInitialStatement, rowCounter, columnNames, generatorMappingForAttribute, pce, generatorMappingForAttribute.getPreprocessor());
}

if (attributeInsertStatement != null) {
Expand All @@ -68,7 +70,7 @@ public ThingVariable<Attribute> graknAttributeQueryFromRow(String row,
appLogger.debug("valid query: <insert " + attributeInsertStatement.toString() + ";>");
return attributeInsertStatement;
} else {
dataLogger.warn("in datapath <" + dce.getDataPath() + ">: skipped row b/c does not have a proper <isa> statement or is missing required attributes. Faulty tokenized row: " + Arrays.toString(rowTokens));
dataLogger.warn("in datapath <" + dce.getDataPath() + ">: skipped row " + rowCounter + " b/c does not have a proper <isa> statement or is missing required attributes. Faulty tokenized row: " + Arrays.toString(rowTokens));
return null;
}
} else {
Expand Down
23 changes: 13 additions & 10 deletions src/main/java/generator/EntityInsertGenerator.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
package generator;

import static generator.GeneratorUtil.malformedRow;
import static generator.GeneratorUtil.addAttribute;

import configuration.DataConfigEntry;
import configuration.ProcessorConfigEntry;
import graql.lang.Graql;
Expand All @@ -16,12 +13,15 @@
import java.util.Arrays;
import java.util.Map;

import static generator.GeneratorUtil.addAttribute;
import static generator.GeneratorUtil.malformedRow;

public class EntityInsertGenerator extends InsertGenerator {

private final DataConfigEntry dce;
private final ProcessorConfigEntry pce;
private static final Logger appLogger = LogManager.getLogger("com.bayer.dt.grami");
private static final Logger dataLogger = LogManager.getLogger("com.bayer.dt.grami.data");
private final DataConfigEntry dce;
private final ProcessorConfigEntry pce;

public EntityInsertGenerator(DataConfigEntry dataConfigEntry, ProcessorConfigEntry processorConfigEntry) {
super();
Expand All @@ -31,23 +31,26 @@ public EntityInsertGenerator(DataConfigEntry dataConfigEntry, ProcessorConfigEnt
}

public ArrayList<ThingVariable<?>> graknEntityInsert(ArrayList<String> rows,
String header) throws IllegalArgumentException {
String header, int rowCounter) throws IllegalArgumentException {
ArrayList<ThingVariable<?>> patterns = new ArrayList<>();
int batchCount = 1;
for (String row : rows) {
try {
ThingVariable temp = graknEntityQueryFromRow(row, header);
ThingVariable temp = graknEntityQueryFromRow(row, header, rowCounter + batchCount);
if (temp != null) {
patterns.add(temp);
}
} catch (Exception e) {
e.printStackTrace();
}
batchCount = batchCount + 1;
}
return patterns;
}

public ThingVariable graknEntityQueryFromRow(String row,
String header) throws Exception {
String header,
int rowCounter) throws Exception {
String fileSeparator = dce.getSeparator();
String[] rowTokens = row.split(fileSeparator);
String[] columnNames = header.split(fileSeparator);
Expand All @@ -57,14 +60,14 @@ public ThingVariable graknEntityQueryFromRow(String row,
Thing entityInsertStatement = addEntityToStatement();

for (DataConfigEntry.DataConfigGeneratorMapping generatorMappingForAttribute : dce.getAttributes()) {
entityInsertStatement = addAttribute(rowTokens, entityInsertStatement, columnNames, generatorMappingForAttribute, pce, generatorMappingForAttribute.getPreprocessor());
entityInsertStatement = addAttribute(rowTokens, entityInsertStatement, columnNames, rowCounter, generatorMappingForAttribute, pce, generatorMappingForAttribute.getPreprocessor());
}

if (isValid(entityInsertStatement)) {
appLogger.debug("valid query: <insert " + entityInsertStatement.toString() + ";>");
return entityInsertStatement;
} else {
dataLogger.warn("in datapath <" + dce.getDataPath() + ">: skipped row b/c does not have a proper <isa> statement or is missing required attributes. Faulty tokenized row: " + Arrays.toString(rowTokens));
dataLogger.warn("in datapath <" + dce.getDataPath() + ">: skipped row " + rowCounter + " b/c does not have a proper <isa> statement or is missing required attributes. Faulty tokenized row: " + Arrays.toString(rowTokens));
return null;
}
}
Expand Down
Loading

0 comments on commit 2be3ce5

Please sign in to comment.