From e245495cc3bebb1872a5fc9fc84d0d81374057a0 Mon Sep 17 00:00:00 2001 From: hkuich Date: Thu, 8 Apr 2021 15:43:26 +0200 Subject: [PATCH] - finished order of migration entries - extended migration sequence management... there is a default order, can always put something in the end using a "orderAfter" entry in dataConfig --- .../java/configuration/DataConfigEntry.java | 11 +- src/main/java/migrator/GraknMigrator.java | 50 +++++++-- src/test/java/migrator/MigrationTest.java | 2 +- .../genericTests/migrationStatus-test.json | 1 + .../resources/phone-calls/dataConfig.json | 102 ++++++++++++++++++ .../phone-calls/migrationStatus.json | 2 +- src/test/resources/phone-calls/person1.csv | 2 + src/test/resources/phone-calls/person2.csv | 2 + src/test/resources/phone-calls/person3.csv | 2 + 9 files changed, 161 insertions(+), 13 deletions(-) create mode 100644 src/test/resources/phone-calls/person1.csv create mode 100644 src/test/resources/phone-calls/person2.csv create mode 100644 src/test/resources/phone-calls/person3.csv diff --git a/src/main/java/configuration/DataConfigEntry.java b/src/main/java/configuration/DataConfigEntry.java index 8ba6fc6..0cfe78a 100644 --- a/src/main/java/configuration/DataConfigEntry.java +++ b/src/main/java/configuration/DataConfigEntry.java @@ -13,7 +13,8 @@ public class DataConfigEntry { private DataConfigGeneratorMapping[] relationPlayers; private int batchSize; private int threads; - private Integer order; + private Integer orderBefore; + private Integer orderAfter; public String getDataPath() { return dataPath; @@ -47,8 +48,12 @@ public int getThreads() { return threads; } - public Integer getOrder() { - return order; + public Integer getOrderBefore() { + return orderBefore; + } + + public Integer getOrderAfter() { + return orderAfter; } public ArrayList getMatchAttributes() { diff --git a/src/main/java/migrator/GraknMigrator.java b/src/main/java/migrator/GraknMigrator.java index 16fdf89..9876d36 100644 --- a/src/main/java/migrator/GraknMigrator.java +++ b/src/main/java/migrator/GraknMigrator.java @@ -19,9 +19,7 @@ import java.lang.reflect.Type; import java.math.RoundingMode; import java.text.DecimalFormat; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; +import java.util.*; public class GraknMigrator { @@ -146,10 +144,18 @@ private void migrateThingsInOrder(GraknSession session) throws IOException { } appLogger.info("migration of attribute-relations completed"); + // dependency migrations (go by number, low to high) -// appLogger.info("migrating dependency-migrations..."); -// getStatusAndMigrateByDependency(session); -// appLogger.info("migration of attribute-relations completed"); + appLogger.info("migrating order-after entries..."); + migrationBatch = getOrderedAfterEntryMigrationConfigs(); + for (EntryMigrationConfig conf : migrationBatch) { + appLogger.info("starting migration for: [" + conf.getMigrationStatusKey() + "] of order-after " + conf.getDce().getOrderAfter()); + batchDataBuildQueriesAndInsert(conf, session); + updateMigrationStatusIsCompleted(conf.getMigrationStatusKey()); + System.out.println(conf.getDce().getOrderAfter()); + appLogger.info("migration for: [" + conf.getMigrationStatusKey() + "] of order-after " + conf.getDce().getOrderAfter() + " is completed"); + } + appLogger.info("migration of order-after entries completed"); } @@ -160,7 +166,7 @@ private List getEntryMigrationConfigsByProcessorType(Strin ProcessorConfigEntry pce = getProcessorConfigEntry(dce.getProcessor()); String migrationStatusKey = dcEntryKey + "-" + dce.getDataPath(); - if (isOfProcessorType(dce.getProcessor(), processorType) && dce.getOrder() == null) { + if (isOfProcessorType(dce.getProcessor(), processorType) && dce.getOrderAfter() == null && dce.getOrderBefore() == null) { if (migrationStatus != null && migrationStatus.get(migrationStatusKey) != null) { // previous migration present appLogger.info("previous migration status found for: [" + migrationStatusKey + "]"); if (!migrationStatus.get(migrationStatusKey).isCompleted()) { @@ -180,6 +186,34 @@ private List getEntryMigrationConfigsByProcessorType(Strin return entries; } + private List getOrderedAfterEntryMigrationConfigs() { + List entries = new ArrayList<>(); + for (String dcEntryKey : dataConfig.keySet()) { + DataConfigEntry dce = dataConfig.get(dcEntryKey); + ProcessorConfigEntry pce = getProcessorConfigEntry(dce.getProcessor()); + String migrationStatusKey = dcEntryKey + "-" + dce.getDataPath(); + + if (dce.getOrderAfter() != null) { + if (migrationStatus != null && migrationStatus.get(migrationStatusKey) != null) { // previous migration present + appLogger.info("previous migration status found for ordered entry: [" + migrationStatusKey + "]"); + if (!migrationStatus.get(migrationStatusKey).isCompleted()) { + appLogger.info(migrationStatusKey + " not completely migrated yet, rows already migrated: " + migrationStatus.get(migrationStatusKey).getMigratedRows()); + EntryMigrationConfig entry = new EntryMigrationConfig(dce, pce, migrationStatusKey, migrationStatus.get(migrationStatusKey).getMigratedRows(), getProcessor(dce)); + entries.add(entry); + } else { // migration already completed + appLogger.info(migrationStatusKey + " is already completely migrated - moving on..."); + } + } else { // no previous migration + appLogger.info("nothing previously migrated for ordered entry [" + migrationStatusKey + "] - starting with row 0"); + EntryMigrationConfig entry = new EntryMigrationConfig(dce, pce, migrationStatusKey, 0, getProcessor(dce)); + entries.add(entry); + } + } + } + entries.sort(Comparator.comparing(entry -> entry.getDce().getOrderAfter())); + return entries; + } + private boolean isOfProcessorType(String key, String conceptType) { for (ProcessorConfigEntry gce : migrationConfig.getProcessorConfig().get("processors")) { if (gce.getProcessor().equals(key)) { @@ -270,7 +304,7 @@ private void buildQueriesAndInsert(EntryMigrationConfig conf, GraknSession sessi } else if (isOfProcessorType(conf.getDce().getProcessor(), "relation") || isOfProcessorType(conf.getDce().getProcessor(), "nested-relation") || - isOfProcessorType(conf.getDce().getProcessor(), "attribute-relation") ) { + isOfProcessorType(conf.getDce().getProcessor(), "attribute-relation")) { HashMap>>> statements = conf.getInsertGenerator().graknRelationInsert(rows, header, rowCounter - lineCounter); appLogger.trace("number of generated insert Statements: " + statements.get("match").size()); graknInserter.matchInsertThreadedInserting(statements, session, threads, conf.getDce().getBatchSize()); diff --git a/src/test/java/migrator/MigrationTest.java b/src/test/java/migrator/MigrationTest.java index ce6a17d..d598357 100644 --- a/src/test/java/migrator/MigrationTest.java +++ b/src/test/java/migrator/MigrationTest.java @@ -79,7 +79,7 @@ public void testEntities(GraknSession session) { // query all entities of type person read = session.transaction(GraknTransaction.Type.READ); getQuery = Graql.match(var("c").isa("person")).get("c").limit(1000); - Assert.assertEquals(32, read.query().match(getQuery).count()); + Assert.assertEquals(35, read.query().match(getQuery).count()); // query all entites of type company read = session.transaction(GraknTransaction.Type.READ); diff --git a/src/test/resources/genericTests/migrationStatus-test.json b/src/test/resources/genericTests/migrationStatus-test.json index e69de29..21f8d94 100644 --- a/src/test/resources/genericTests/migrationStatus-test.json +++ b/src/test/resources/genericTests/migrationStatus-test.json @@ -0,0 +1 @@ +{"entity3-src/test/resources/genericTests/entity3-test-data.tsv":{"conceptName":"entity3","isCompleted":true,"migratedRows":12},"entity2-src/test/resources/genericTests/entity2-test-data.tsv":{"conceptName":"entity2","isCompleted":true,"migratedRows":12},"rel1-src/test/resources/genericTests/rel1-test-data.tsv":{"conceptName":"rel1","isCompleted":true,"migratedRows":44},"entity1-src/test/resources/genericTests/entity1-test-data.tsv":{"conceptName":"entity1","isCompleted":true,"migratedRows":32}} \ No newline at end of file diff --git a/src/test/resources/phone-calls/dataConfig.json b/src/test/resources/phone-calls/dataConfig.json index 777ac10..13cb535 100644 --- a/src/test/resources/phone-calls/dataConfig.json +++ b/src/test/resources/phone-calls/dataConfig.json @@ -230,5 +230,107 @@ "generator": "status" } ] + }, + "person-order3": { + "orderAfter": 3, + "dataPath": "src/test/resources/phone-calls/person3.csv", + "separator": ",", + "processor": "person", + "batchSize": 50, + "threads": 4, + "attributes": [ + { + "columnName": "first_name", + "generator": "first-name" + }, + { + "columnName": "last_name", + "generator" : "last-name" + }, + { + "columnName": "phone_number", + "generator": "phone-number" + }, + { + "columnName": "city", + "generator": "city" + }, + { + "columnName": "age", + "generator": "age" + }, + { + "columnName": "nick_name", + "generator": "nick-name", + "listSeparator": ";" + }] + }, + "person-order1": { + "orderAfter": 1, + "dataPath": "src/test/resources/phone-calls/person1.csv", + "separator": ",", + "processor": "person", + "batchSize": 50, + "threads": 4, + "attributes": [ + { + "columnName": "first_name", + "generator": "first-name" + }, + { + "columnName": "last_name", + "generator" : "last-name" + }, + { + "columnName": "phone_number", + "generator": "phone-number" + }, + { + "columnName": "city", + "generator": "city" + }, + { + "columnName": "age", + "generator": "age" + }, + { + "columnName": "nick_name", + "generator": "nick-name", + "listSeparator": ";" + }] + }, + "person-order2": { + "orderAfter": 2, + "dataPath": "src/test/resources/phone-calls/person2.csv", + "separator": ",", + "processor": "person", + "batchSize": 50, + "threads": 4, + "attributes": [ + { + "columnName": "first_name", + "generator": "first-name" + }, + { + "columnName": "last_name", + "generator" : "last-name" + }, + { + "columnName": "phone_number", + "generator": "phone-number" + }, + { + "columnName": "city", + "generator": "city" + }, + { + "columnName": "age", + "generator": "age" + }, + { + "columnName": "nick_name", + "generator": "nick-name", + "listSeparator": ";" + }] } } \ No newline at end of file diff --git a/src/test/resources/phone-calls/migrationStatus.json b/src/test/resources/phone-calls/migrationStatus.json index c05578e..a0a350d 100644 --- a/src/test/resources/phone-calls/migrationStatus.json +++ b/src/test/resources/phone-calls/migrationStatus.json @@ -1 +1 @@ -{"append-pp-fakebook-src/test/resources/phone-calls/append-fb-preprocessed.csv":{"conceptName":"person","isCompleted":true,"migratedRows":5},"person-src/test/resources/phone-calls/person.csv":{"conceptName":"person","isCompleted":true,"migratedRows":35},"in-use-src/test/resources/phone-calls/in-use.csv":{"conceptName":"in-use","isCompleted":true,"migratedRows":8},"company-src/test/resources/phone-calls/company.csv":{"conceptName":"company","isCompleted":true,"migratedRows":2},"calls-src/test/resources/phone-calls/call.csv":{"conceptName":"call","isCompleted":true,"migratedRows":218},"contract-src/test/resources/phone-calls/contract.csv":{"conceptName":"contract","isCompleted":true,"migratedRows":12},"is-in-use-src/test/resources/phone-calls/is-in-use.csv":{"conceptName":"is-in-use","isCompleted":true,"migratedRows":2},"communication-channel-pm-src/test/resources/phone-calls/communication-channel-pm.csv":{"conceptName":"communication-channel","isCompleted":true,"migratedRows":6},"append-twitter-src/test/resources/phone-calls/append-twitter.csv":{"conceptName":"person","isCompleted":true,"migratedRows":10},"communication-channel-src/test/resources/phone-calls/communication-channel.csv":{"conceptName":"communication-channel","isCompleted":true,"migratedRows":7},"append-call-rating-src/test/resources/phone-calls/append-call-rating.csv":{"conceptName":"call","isCompleted":true,"migratedRows":7}} \ No newline at end of file +{"in-use-src/test/resources/phone-calls/in-use.csv":{"conceptName":"in-use","isCompleted":true,"migratedRows":8},"contract-src/test/resources/phone-calls/contract.csv":{"conceptName":"contract","isCompleted":true,"migratedRows":12},"is-in-use-src/test/resources/phone-calls/is-in-use.csv":{"conceptName":"is-in-use","isCompleted":true,"migratedRows":2},"communication-channel-pm-src/test/resources/phone-calls/communication-channel-pm.csv":{"conceptName":"communication-channel","isCompleted":true,"migratedRows":6},"append-call-rating-src/test/resources/phone-calls/append-call-rating.csv":{"conceptName":"call","isCompleted":true,"migratedRows":7},"append-pp-fakebook-src/test/resources/phone-calls/append-fb-preprocessed.csv":{"conceptName":"person","isCompleted":true,"migratedRows":5},"person-src/test/resources/phone-calls/person.csv":{"conceptName":"person","isCompleted":true,"migratedRows":35},"person-order3-src/test/resources/phone-calls/person3.csv":{"conceptName":"person","isCompleted":true,"migratedRows":1},"company-src/test/resources/phone-calls/company.csv":{"conceptName":"company","isCompleted":true,"migratedRows":2},"calls-src/test/resources/phone-calls/call.csv":{"conceptName":"call","isCompleted":true,"migratedRows":218},"person-order2-src/test/resources/phone-calls/person2.csv":{"conceptName":"person","isCompleted":true,"migratedRows":1},"append-twitter-src/test/resources/phone-calls/append-twitter.csv":{"conceptName":"person","isCompleted":true,"migratedRows":10},"communication-channel-src/test/resources/phone-calls/communication-channel.csv":{"conceptName":"communication-channel","isCompleted":true,"migratedRows":7},"person-order1-src/test/resources/phone-calls/person1.csv":{"conceptName":"person","isCompleted":true,"migratedRows":1}} \ No newline at end of file diff --git a/src/test/resources/phone-calls/person1.csv b/src/test/resources/phone-calls/person1.csv new file mode 100644 index 0000000..40c7c3b --- /dev/null +++ b/src/test/resources/phone-calls/person1.csv @@ -0,0 +1,2 @@ +first_name,last_name,phone_number,city,age,nick_name +fn_order_1,ln_order_1,+7 171 898 0801,London,1 diff --git a/src/test/resources/phone-calls/person2.csv b/src/test/resources/phone-calls/person2.csv new file mode 100644 index 0000000..76c6041 --- /dev/null +++ b/src/test/resources/phone-calls/person2.csv @@ -0,0 +1,2 @@ +first_name,last_name,phone_number,city,age,nick_name +fn_order_2,ln_order_2,+7 171 898 0802,London,2 diff --git a/src/test/resources/phone-calls/person3.csv b/src/test/resources/phone-calls/person3.csv new file mode 100644 index 0000000..5e01aee --- /dev/null +++ b/src/test/resources/phone-calls/person3.csv @@ -0,0 +1,2 @@ +first_name,last_name,phone_number,city,age,nick_name +fn_order_3,ln_order_3,+7 171 898 0803,London,3