diff --git a/README.md b/README.md index 1c38455..7375f7b 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,7 @@ Do above for all entities and their attributes in the schema. GraMi will ensure For each relation in your schema, define a processor object that specifies - each relation attribute, its value type, and whether it is required - - each relation player entity type, role, identifying attribute in the data file and its value type, as well as whether the player is required + - each relation player of type entity, its role, identifying attribute in the data file and value type, as well as whether the player is required For example, given the following relation in your schema: @@ -126,7 +126,8 @@ call sub relation, relates caller, relates callee, has started-at, - has duration; + has duration, + plays past-call; ``` Add the following processor object: @@ -143,8 +144,7 @@ Add the following processor object: "uniquePlayerId": "phone-number", // using attribute phone-number as unique identifier for type person "idValueType": "string", // of value type string "roleType": "caller", // inserts person as player the role caller - "required": true // which is a required role for each data record - + "required": true // which is a required role for each data record }, "callee": { // ID of player generator "playerType": "person", // matches entity of type person @@ -172,6 +172,64 @@ Add the following processor object: Do above for all relations and their players and attributes in the schema. GraMi will ensure that all values in your data files adhere to the value type specified or try to cast them. GraMi will also ensure that no data records enter grakn that are incomplete (missing required attributes/players). +##### Relation-Of-Relation Processors + +Grakn comes with the powerful feature of using relations as players in other relations. + +For each relation-of-relation in your schema, define a processor object that specifies + - each relation attribute, its value type, and whether it is required + - each relation player of type entity, its role, identifying attribute in the data file and its value type, as well as whether the player is required + - each relation player of type relation, its role, identifying attribute in the data file and its value type, as well as whether the player is required + +For example, given the following relation in your schema: + +```GraphQL +person sub entity, + ..., + plays peer; + +call sub relation, + relates caller, + relates callee, + has started-at, + has duration, + plays past-call; + +communication-channel sub relation, + relates peer, + relates past-call; +``` + +Add the following processor object: + +``` +{ + "processor": "communication-channel", // the ID of your processor + "processorType": "relation-of-relation", // creates a relation + "schemaType": "communication-channel", // of type communication-channel + "conceptGenerators": { + "players": { // with the following players according to schema + "peer": { // ID of player generator + "playerType": "person", // matches entity of type person + "uniquePlayerId": "phone-number", // using attribute phone-number as unique identifier for type person + "idValueType": "string", // of value type string + "roleType": "peer", // inserts person as player the role caller + "required": true // which is a required role for each data record + } + "past-call": { // ID of player generator + "playerType": "call", // matches entity of type person + "uniquePlayerId": "started-at", // using attribute phone-number as unique identifier for type person + "idValueType": "date", // of value type string + "roleType": "past-call", // inserts person as player the role callee + "required": true // which is a required role for each data record + }, + } + } +} +``` + +Just remember that these relations of relation must be added AFTER the relations that will act as players in the relation have been migrated. GraMi will migrate all relation-of-relations after having migrated entities and relations - but keep this in mind as you are building your graph - relations are only inserted as expected when all its players are already present. + See the [full configuration file for phone-calls here](https://github.com/bayer-science-for-a-better-life/grami/tree/master/src/test/resources/phone-calls/processorConfig.json). #### Data Configuration @@ -257,7 +315,7 @@ The data config entry would be: "players": [ // player columns present in the data file { "columnName": "caller_id", // column name in data file - "generator": "caller" // player generator in processor call to be used for the column + "generator": "caller" // player generator in processor call to be used for the column }, { "columnName": "callee_id", // column name in data file @@ -267,7 +325,7 @@ The data config entry would be: "attributes": [ // attribute columns present in the data file { "columnName": "started_at", // column name in data file - "generator": "started-at" // attribute generator in processor call to be used for the column + "generator": "started-at" // attribute generator in processor call to be used for the column }, { "columnName": "duration", // column name in data file @@ -279,6 +337,83 @@ The data config entry would be: Do above for all data files that need to be migrated. +Please note that you can also add a listSeparator for players that are in a list in a column: + +Your data might look like: + +``` +company_name,person_id +Unity,+62 999 888 7777###+62 999 888 7778 +``` + +``` +"contract": { + "dataPath": "src/test/resources/phone-calls/contract.csv", + "separator": ",", + "processor": "contract", + "players": [ + { + "columnName": "company_name", + "generator": "provider" + }, + { + "columnName": "person_id", + "generator": "customer", + "listSeparator": "###" // like this! + } + ], + "batchSize": 100, + "threads": 4 + } +``` + +For troubleshooting, it might be worth setting the troublesome data configuration entry to a single thread, as the log messages for error from grakn are more verbose and specific that way... + +##### Relation-of-Relation Data Config Entries + +Given the data file [communication-channel.csv](https://github.com/bayer-science-for-a-better-life/grami/tree/master/src/test/resources/phone-calls/communication-channel.csv): + +```CSV +peer_1,peer_2,call_started_at ++54 398 559 0423,+48 195 624 2025,2018-09-16T22:24:19 ++54 398 559 0423,+48 195 624 2025,2018-09-17T22:24:19 ++54 398 559 0423,+48 195 624 2025,2018-09-18T22:24:19 ++54 398 559 0423,+48 195 624 2025,2018-09-19T22:24:19 ++54 398 559 0423,+48 195 624 2025,2018-09-20T22:24:19 ++263 498 495 0617,+33 614 339 0298,2018-09-11T22:10:34###2018-09-12T22:10:34###2018-09-13T22:10:34###2018-09-14T22:10:34###2018-09-15T22:10:34###2018-09-16T22:10:34 ++54 398 559 0423,+7 552 196 4096,2018-09-25T20:24:59 +... +``` + +The data config entry would be: + +``` +"communication-channel": { + "dataPath": "/your/absolute/path/to/communication-channel.csv", // the absolute path to your data file + "separator": ",", // the separation character used in your data file (alternatives: "\t", ";", etc...) + "processor": "communication-channel", // processor from processor config file + "batchSize": 100, // batchSize to be used for this data file + "threads": 4, // # of threads to be used for this data file + "players": [ // player columns present in the data file + { + "columnName": "peer_1", // column name in data file + "generator": "peer" // player generator in processor call to be used for the column + }, + { + "columnName": "peer_2", // column name in data file + "generator": "peer" // player generator in processor call to be used for the column + }, + { + "columnName": "call_started_at", // column name in data file + "generator": "past-call", // player generator in processor call to be used for the column + "listSeparator": "###" + } + ] +} +``` + +Do above for all data files that need to be migrated. + For troubleshooting, it might be worth setting the troublesome data configuration entry to a single thread, as the log messages for error from grakn are more verbose and specific that way... See the [full configuration file for phone-calls here](https://github.com/bayer-science-for-a-better-life/grami/tree/master/src/test/resources/phone-calls/dataConfig.json). diff --git a/build.gradle b/build.gradle index cdfde82..994e81c 100644 --- a/build.gradle +++ b/build.gradle @@ -5,7 +5,7 @@ plugins { } group 'com.github.bayer-science-for-a-better-life' -version '0.0.2' +version '0.0.2-hotfix-1' repositories { mavenCentral() diff --git a/src/main/java/generator/GeneratorUtil.java b/src/main/java/generator/GeneratorUtil.java index ce28836..f71fac2 100644 --- a/src/main/java/generator/GeneratorUtil.java +++ b/src/main/java/generator/GeneratorUtil.java @@ -68,7 +68,7 @@ public static StatementInstance cleanExplodeAdd(StatementInstance pattern, Strin } } - private static StatementInstance addAttributeOfColumnType(StatementInstance pattern, String conceptType, String valueType, String cleanedValue) { + public static StatementInstance addAttributeOfColumnType(StatementInstance pattern, String conceptType, String valueType, String cleanedValue) { if (valueType.equals("string")) { pattern = pattern.has(conceptType, cleanedValue); } else if (valueType.equals("long")) { diff --git a/src/main/java/generator/RelationInsertGenerator.java b/src/main/java/generator/RelationInsertGenerator.java index 565cd3a..4f41120 100644 --- a/src/main/java/generator/RelationInsertGenerator.java +++ b/src/main/java/generator/RelationInsertGenerator.java @@ -3,6 +3,7 @@ import static generator.GeneratorUtil.idxOf; import static generator.GeneratorUtil.cleanToken; import static generator.GeneratorUtil.addAttribute; +import static generator.GeneratorUtil.addAttributeOfColumnType; import configuration.DataConfigEntry; import configuration.ProcessorConfigEntry; @@ -12,10 +13,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Map; +import java.util.*; public class RelationInsertGenerator extends InsertGenerator { @@ -62,12 +60,13 @@ public ArrayList> graknRelationshipQueryFromRow(String row, appLogger.debug("processing tokenized row: " + Arrays.toString(tokens)); GeneratorUtil.malformedRow(row, tokens, headerTokens.length); - ArrayList matchStatements = new ArrayList<>(playersMatch(tokens, headerTokens, insertCounter)); + ArrayList miStatements = new ArrayList<>(createPlayerMatchAndInsert(tokens, headerTokens, insertCounter)); + ArrayList matchStatements = new ArrayList<>(miStatements.subList(0, miStatements.size() - 1)); ArrayList insertStatements = new ArrayList<>(); if (!matchStatements.isEmpty()) { - StatementInstance insert = playersInsert(matchStatements, insertCounter); - insert = relationInsert(insert); + StatementInstance playerInsert = (StatementInstance) miStatements.subList(miStatements.size() - 1, miStatements.size()).get(0); + StatementInstance insert = relationInsert(playerInsert); if (dce.getAttributes() != null) { for (DataConfigEntry.GeneratorSpecification attDataConfigEntry : dce.getAttributes()) { insert = addAttribute(tokens, insert, headerTokens, attDataConfigEntry, gce.getAttributeGenerator(attDataConfigEntry.getGenerator())); @@ -101,61 +100,61 @@ private String assembleQuery(ArrayList> queries) { return ret.toString(); } - private StatementInstance relationInsert(StatementInstance si) { - if (si != null) { - si = si.isa(gce.getSchemaType()); - return si; - } else { - return null; - } - } - - private StatementInstance playersInsert(ArrayList matchStatements, int insertCounter) { - Statement s = Graql.var("rel-" + insertCounter); - int playerCounter = 0; - for (DataConfigEntry.GeneratorSpecification dataPlayer : dce.getPlayers()) { - ProcessorConfigEntry.ConceptGenerator playerGenerator = gce.getPlayerGenerator(dataPlayer.getGenerator()); - boolean insert = false; - for (Statement st :matchStatements) { - //need to have player in match statement or cannot insert as player in relation - if (st.toString().contains(playerGenerator.getUniquePlayerId())) { - insert = true; - } - } - if (insert) { - s = s.rel(playerGenerator.getRoleType(), playerGenerator.getPlayerType() + "-" + playerCounter + "-" + insertCounter); - } - playerCounter++; - } - if (s.toString().contains("(")) { - return (StatementInstance) s; - } else { - return null; - } - } - - private Collection playersMatch(String[] tokens, String[] headerTokens, int insertCounter) { + private Collection createPlayerMatchAndInsert(String[] tokens, String[] headerTokens, int insertCounter) { ArrayList players = new ArrayList<>(); + Statement playersInsertStatement = Graql.var("rel-" + insertCounter); int playerCounter = 0; for (DataConfigEntry.GeneratorSpecification playerDataConfigEntry : dce.getPlayers()) { ProcessorConfigEntry.ConceptGenerator playerGenerator = gce.getPlayerGenerator(playerDataConfigEntry.getGenerator()); int playerDataIndex = idxOf(headerTokens, playerDataConfigEntry); + if(playerDataIndex == -1) { appLogger.error("The column header in your dataconfig mapping to the uniquePlayerId [" + playerGenerator.getUniquePlayerId() + "] cannot be found in the file you specified."); } - if (tokens.length > playerDataIndex && - !cleanToken(tokens[playerDataIndex]).isEmpty()) { - StatementInstance ms = Graql - .var(playerGenerator.getPlayerType() + "-" + playerCounter + "-" + insertCounter) - .isa(playerGenerator.getPlayerType()).has(playerGenerator.getUniquePlayerId(), - cleanToken(tokens[playerDataIndex])); - players.add(ms); + + if (tokens.length > playerDataIndex && // make sure that there are enough tokens in the row for your column of interest + !cleanToken(tokens[playerDataIndex]).isEmpty()) { // make sure that after cleaning, there is more than an empty string + String listSeparator = playerDataConfigEntry.getListSeparator(); + if(listSeparator != null) { + for (String exploded: tokens[playerDataIndex].split(listSeparator)) { + if(!cleanToken(exploded).isEmpty()) { + String playerVariable = playerGenerator.getPlayerType() + "-" + playerCounter + "-" + insertCounter; + players.add(createPlayerMatchStatement(exploded, playerGenerator, playerVariable)); + playersInsertStatement = playersInsertStatement.rel(playerGenerator.getRoleType(), playerVariable); + playerCounter++; + } + } + } else { // single player, no listSeparator + String playerVariable = playerGenerator.getPlayerType() + "-" + playerCounter + "-" + insertCounter; + players.add(createPlayerMatchStatement(cleanToken(tokens[playerDataIndex]), playerGenerator, playerVariable)); + playersInsertStatement = playersInsertStatement.rel(playerGenerator.getRoleType(), playerVariable); + playerCounter++; + } } - playerCounter++; } + players.add(playersInsertStatement); return players; } + private StatementInstance createPlayerMatchStatement(String token, ProcessorConfigEntry.ConceptGenerator playerGenerator, String playerVariable) { + String cleanedValue = cleanToken(token); + StatementInstance ms = Graql + .var(playerVariable) + .isa(playerGenerator.getPlayerType()); + ms = addAttributeOfColumnType(ms, playerGenerator.getUniquePlayerId(), playerGenerator.getIdValueType(), cleanedValue); + //.has(playerGenerator.getUniquePlayerId(), cleanedValue); + return ms; + } + + private StatementInstance relationInsert(StatementInstance si) { + if (si != null) { + si = si.isa(gce.getSchemaType()); + return si; + } else { + return null; + } + } + private boolean isValid(ArrayList> si) { ArrayList matchStatements = si.get(0); ArrayList insertStatements = si.get(1); @@ -169,6 +168,9 @@ private boolean isValid(ArrayList> si) { if (!matchStatement.toString().contains("isa " + generatorEntry.getValue().getPlayerType())) { return false; } + if (!insertStatement.contains(generatorEntry.getValue().getRoleType())) { + return false; + } } // missing required attribute for (Map.Entry generatorEntry: gce.getRequiredAttributes().entrySet()) { diff --git a/src/main/java/migrator/GraknMigrator.java b/src/main/java/migrator/GraknMigrator.java index a959d09..f88d3ba 100644 --- a/src/main/java/migrator/GraknMigrator.java +++ b/src/main/java/migrator/GraknMigrator.java @@ -74,6 +74,9 @@ public void migrate(boolean migrateEntities, boolean migrateRelations) throws IO } private void migrateThingsInOrder(GraknClient.Session session, boolean migrateEntities, boolean migrateRelations) throws IOException { + if(!migrateEntities && migrateRelations) { + migrateEntities = true; + } if (migrateEntities) { appLogger.info("migrating entities..."); getStatusAndMigrate(session, "entity"); diff --git a/src/test/java/generator/RelationInsertGeneratorTest.java b/src/test/java/generator/RelationInsertGeneratorTest.java index 6435b3f..6576651 100644 --- a/src/test/java/generator/RelationInsertGeneratorTest.java +++ b/src/test/java/generator/RelationInsertGeneratorTest.java @@ -35,18 +35,48 @@ public void graknRelationQueryFromRowTest() throws Exception { ArrayList>> result = testRelationInsertGenerator.graknRelationInsert(rows, header); + // test all there + String tc2m = "$entity1-0-2 isa entity1, has entity1-id \"entity1id1\";$entity2-1-2 isa entity2, has entity2-id \"entity2id1\";$entity3-2-2 isa entity3, has entity3-id \"entity3id1\";"; + Assert.assertEquals(tc2m, concatMatches(result.get(0).get(2))); + String tc2i = "$rel-2 (player-one: $entity1-0-2, player-two: $entity2-1-2, player-optional: $entity3-2-2) isa rel1, has relAt-1 \"att2\", has relAt-2 \"opt2\";"; + Assert.assertEquals(tc2i, result.get(1).get(2).get(0).toString()); + + // test no optional player & no optional attribute + String tc15m = "$entity1-0-15 isa entity1, has entity1-id \"entity1id1\";$entity2-1-15 isa entity2, has entity2-id \"entity2id1\";"; + Assert.assertEquals(tc15m, concatMatches(result.get(0).get(15))); + String tc15i = "$rel-15 (player-one: $entity1-0-15, player-two: $entity2-1-15) isa rel1, has relAt-1 \"att15\";"; + Assert.assertEquals(tc15i, result.get(1).get(15).get(0).toString()); + + // test attribute explosion String tc0m = "$entity1-0-0 isa entity1, has entity1-id \"entity1id1\";$entity2-1-0 isa entity2, has entity2-id \"entity2id1\";$entity3-2-0 isa entity3, has entity3-id \"entity3id1\";"; Assert.assertEquals(tc0m, concatMatches(result.get(0).get(0))); String tc0i = "$rel-0 (player-one: $entity1-0-0, player-two: $entity2-1-0, player-optional: $entity3-2-0) isa rel1, has relAt-1 \"att0\", has relAt-1 \"explosion0\", has relAt-2 \"opt0\";"; Assert.assertEquals(tc0i, result.get(1).get(0).get(0).toString()); + // test empty explosion String tc10m = "$entity1-0-9 isa entity1, has entity1-id \"entity1id1\";$entity2-1-9 isa entity2, has entity2-id \"entity2id1\";$entity3-2-9 isa entity3, has entity3-id \"entity3id1\";"; Assert.assertEquals(tc10m, concatMatches(result.get(0).get(9))); String tc10i = "$rel-9 (player-one: $entity1-0-9, player-two: $entity2-1-9, player-optional: $entity3-2-9) isa rel1, has relAt-1 \"att9\", has relAt-2 \"opt9\";"; Assert.assertEquals(tc10i, result.get(1).get(9).get(0).toString()); + // test exploded player complete + String tc25m = "$entity1-0-25 isa entity1, has entity1-id \"entity1id1\";$entity1-1-25 isa entity1, has entity1-id \"entity1id2\";$entity2-2-25 isa entity2, has entity2-id \"entity2id1\";$entity3-3-25 isa entity3, has entity3-id \"entity3id1\";"; + Assert.assertEquals(tc25m, concatMatches(result.get(0).get(25))); + String tc25i = "$rel-25 (player-one: $entity1-0-25, player-one: $entity1-1-25, player-two: $entity2-2-25, player-optional: $entity3-3-25) isa rel1, has relAt-1 \"att39\", has relAt-2 \"opt39\";"; + Assert.assertEquals(tc25i, result.get(1).get(25).get(0).toString()); + + // test exploded player without optional player and optional attribute + String tc26m = "$entity1-0-26 isa entity1, has entity1-id \"entity1id1\";$entity1-1-26 isa entity1, has entity1-id \"entity1id2\";$entity2-2-26 isa entity2, has entity2-id \"entity2id1\";"; + Assert.assertEquals(tc26m, concatMatches(result.get(0).get(26))); + String tc26i = "$rel-26 (player-one: $entity1-0-26, player-one: $entity1-1-26, player-two: $entity2-2-26) isa rel1, has relAt-1 \"att40\";"; + Assert.assertEquals(tc26i, result.get(1).get(26).get(0).toString()); + Assert.assertEquals(2, result.size()); - Assert.assertEquals(25, result.get(0).size()); + + // number of match statements = number of valid statements that would be inserted + Assert.assertEquals(27, result.get(0).size()); + Assert.assertEquals(27, result.get(1).size()); + } } \ No newline at end of file diff --git a/src/test/java/migrator/MigrationTest.java b/src/test/java/migrator/MigrationTest.java index 9d58d17..6cbe293 100644 --- a/src/test/java/migrator/MigrationTest.java +++ b/src/test/java/migrator/MigrationTest.java @@ -35,5 +35,36 @@ public void migratePhoneCallsTest() throws IOException { GraknMigrator mig = new GraknMigrator(migrationConfig, msp, true); mig.migrate(true, true); } + + @Test + public void checkCorrectInsertPhoneCallsTest() throws IOException { + + String keyspaceName = "grami_phone_call_test"; + String asp = getAbsPath("src/test/resources/phone-calls/schema.gql"); + String msp = getAbsPath("src/test/resources/phone-calls/migrationStatus.json"); + String adcp = getAbsPath("src/test/resources/phone-calls/dataConfig.json"); + String gcp = getAbsPath("src/test/resources/phone-calls/processorConfig.json"); + + MigrationConfig migrationConfig = new MigrationConfig("localhost:48555",keyspaceName, asp, adcp, gcp); + GraknMigrator mig = new GraknMigrator(migrationConfig, msp, true); + mig.migrate(true, true); + + //TODO + } + + @Test + public void issue10Test() throws IOException { + + String keyspaceName = "issue10"; + String asp = getAbsPath("src/test/resources/bugfixing/issue10/schema.gql"); + String msp = getAbsPath("src/test/resources/bugfixing/issue10/migrationStatus.json"); + String adcp = getAbsPath("src/test/resources/bugfixing/issue10/dataConfig.json"); + String gcp = getAbsPath("src/test/resources/bugfixing/issue10/processorConfig.json"); + + MigrationConfig migrationConfig = new MigrationConfig("localhost:48555",keyspaceName, asp, adcp, gcp); + GraknMigrator mig = new GraknMigrator(migrationConfig, msp, true); + mig.migrate(true, true); + + } } diff --git a/src/test/resources/bugfixing/issue10/dataConfig.json b/src/test/resources/bugfixing/issue10/dataConfig.json new file mode 100644 index 0000000..31e4b93 --- /dev/null +++ b/src/test/resources/bugfixing/issue10/dataConfig.json @@ -0,0 +1,43 @@ +{ + "text": { + "dataPath": "src/test/resources/bugfixing/issue10/text.csv", + "separator": ",", + "processor": "text", + "batchSize": 50, + "threads": 4, + "attributes": [ + { + "columnName": "uid", + "generator": "uid" + }] + }, + "label": { + "dataPath": "src/test/resources/bugfixing/issue10/label.csv", + "separator": ",", + "processor": "label", + "batchSize": 50, + "threads": 4, + "attributes": [ + { + "columnName": "name", + "generator": "name" + }] + }, + "tag": { + "dataPath": "src/test/resources/bugfixing/issue10/tag.csv", + "separator": ",", + "processor": "tag", + "batchSize": 50, + "threads": 4, + "players": [ + { + "columnName": "label_name", + "generator": "tagger" + }, + { + "columnName": "text_id", + "generator": "tagged" + } + ] + } +} \ No newline at end of file diff --git a/src/test/resources/bugfixing/issue10/label.csv b/src/test/resources/bugfixing/issue10/label.csv new file mode 100644 index 0000000..2b0b592 --- /dev/null +++ b/src/test/resources/bugfixing/issue10/label.csv @@ -0,0 +1,4 @@ +name +blue +flower +might \ No newline at end of file diff --git a/src/test/resources/bugfixing/issue10/processorConfig.json b/src/test/resources/bugfixing/issue10/processorConfig.json new file mode 100644 index 0000000..76d0d77 --- /dev/null +++ b/src/test/resources/bugfixing/issue10/processorConfig.json @@ -0,0 +1,56 @@ +{ + "processors": + [ + { + "processor": "text", + "processorType": "entity", + "schemaType": "text", + "conceptGenerators": { + "attributes": { + "uid": { + "attributeType": "uid", + "valueType": "long", + "required": true + } + } + } + }, + { + "processor": "label", + "processorType": "entity", + "schemaType": "label", + "conceptGenerators": { + "attributes": { + "name": { + "attributeType": "name", + "valueType": "string", + "required": true + } + } + } + }, + { + "processor": "tag", + "processorType": "relation", + "schemaType": "tag", + "conceptGenerators": { + "players": { + "tagger": { + "playerType": "label", + "roleType": "tagger", + "required": true, + "uniquePlayerId": "name", + "idValueType": "string" + }, + "tagged": { + "playerType": "text", + "roleType": "tagged", + "required": true, + "uniquePlayerId": "uid", + "idValueType": "long" + } + } + } + } + ] +} diff --git a/src/test/resources/bugfixing/issue10/schema.gql b/src/test/resources/bugfixing/issue10/schema.gql new file mode 100644 index 0000000..354e220 --- /dev/null +++ b/src/test/resources/bugfixing/issue10/schema.gql @@ -0,0 +1,15 @@ +define + +text sub entity, + key uid, + plays tagged; +uid sub attribute, value long; + +label sub entity, + key name, + plays tagger; +name sub attribute, value string; + +tag sub relation, + relates tagger, + relates tagged; \ No newline at end of file diff --git a/src/test/resources/bugfixing/issue10/tag.csv b/src/test/resources/bugfixing/issue10/tag.csv new file mode 100644 index 0000000..018155f --- /dev/null +++ b/src/test/resources/bugfixing/issue10/tag.csv @@ -0,0 +1,4 @@ +label_name,text_id +blue,28974 +flower,83682 +might,263684 \ No newline at end of file diff --git a/src/test/resources/bugfixing/issue10/text.csv b/src/test/resources/bugfixing/issue10/text.csv new file mode 100644 index 0000000..096ceb5 --- /dev/null +++ b/src/test/resources/bugfixing/issue10/text.csv @@ -0,0 +1,4 @@ +uid +28974 +83682 +263684 \ No newline at end of file diff --git a/src/test/resources/genericTests/dataConfig-test.json b/src/test/resources/genericTests/dataConfig-test.json index e58a7ea..c999b85 100644 --- a/src/test/resources/genericTests/dataConfig-test.json +++ b/src/test/resources/genericTests/dataConfig-test.json @@ -1,6 +1,6 @@ { "entity1": { - "dataPath": "/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/genericTests/entity1-test-data.tsv", + "dataPath": "src/test/resources/genericTests/entity1-test-data.tsv", "separator": "\t", "processor": "entity1", "attributes": [ @@ -22,7 +22,7 @@ "threads": 4 }, "entity2": { - "dataPath": "/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/genericTests/entity2-test-data.tsv", + "dataPath": "src/test/resources/genericTests/entity2-test-data.tsv", "separator": "\t", "processor": "entity2", "attributes": [ @@ -43,7 +43,7 @@ "threads": 4 }, "entity3": { - "dataPath": "/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/genericTests/entity3-test-data.tsv", + "dataPath": "src/test/resources/genericTests/entity3-test-data.tsv", "separator": "\t", "processor": "entity3", "attributes": [ @@ -60,13 +60,14 @@ "threads": 1 }, "rel1": { - "dataPath": "/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/genericTests/rel1-test-data.tsv", + "dataPath": "src/test/resources/genericTests/rel1-test-data.tsv", "separator": "\t", "processor": "rel1", "players": [ { "columnName": "entity1-id", - "generator": "player-one" + "generator": "player-one", + "listSeparator": "###" }, { "columnName": "entity2-id", diff --git a/src/test/resources/genericTests/migrationStatus-test.json b/src/test/resources/genericTests/migrationStatus-test.json index 573a89f..421f67b 100644 --- a/src/test/resources/genericTests/migrationStatus-test.json +++ b/src/test/resources/genericTests/migrationStatus-test.json @@ -1 +1 @@ -{"/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/genericTests/entity1-test-data.tsv":{"conceptName":"entity1","isCompleted":true,"migratedRows":32},"/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/genericTests/entity2-test-data.tsv":{"conceptName":"entity2","isCompleted":true,"migratedRows":12},"/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/genericTests/entity3-test-data.tsv":{"conceptName":"entity3","isCompleted":true,"migratedRows":12},"/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/genericTests/rel1-test-data.tsv":{"conceptName":"rel1","isCompleted":true,"migratedRows":39}} \ No newline at end of file +{"src/test/resources/genericTests/entity1-test-data.tsv":{"conceptName":"entity1","isCompleted":true,"migratedRows":32},"src/test/resources/genericTests/entity2-test-data.tsv":{"conceptName":"entity2","isCompleted":true,"migratedRows":12},"src/test/resources/genericTests/entity3-test-data.tsv":{"conceptName":"entity3","isCompleted":true,"migratedRows":12},"src/test/resources/genericTests/rel1-test-data.tsv":{"conceptName":"rel1","isCompleted":true,"migratedRows":44}} \ No newline at end of file diff --git a/src/test/resources/genericTests/rel1-test-data.tsv b/src/test/resources/genericTests/rel1-test-data.tsv index d3f9e67..2a25fc5 100644 --- a/src/test/resources/genericTests/rel1-test-data.tsv +++ b/src/test/resources/genericTests/rel1-test-data.tsv @@ -38,3 +38,8 @@ entity1id1 entity2id1 entity3id1 entity1id1 entity3id1 att37 opt36 entity1id1 entity3id1 att38 opt37 entity1id1 entity3id1 att39 opt38 +entity1id1###entity1id2 entity2id1 entity3id1 att39 opt39 +entity1id1###entity1id2 entity2id1 att40 +### entity2id1 entity3id1 att41 opt41 + ### entity2id1 entity3id1 att42 opt42 + ### entity2id1 entity3id1 att43 opt43 diff --git a/src/test/resources/phone-calls/call.csv b/src/test/resources/phone-calls/call.csv index 5a74e27..2608b98 100644 --- a/src/test/resources/phone-calls/call.csv +++ b/src/test/resources/phone-calls/call.csv @@ -1,8 +1,17 @@ caller_id,callee_id,started_at,duration +54 398 559 0423,+48 195 624 2025,2018-09-16T22:24:19,122 ++54 398 559 0423,+48 195 624 2025,2018-09-17T22:24:19,122 ++54 398 559 0423,+48 195 624 2025,2018-09-18T22:24:19,122 ++54 398 559 0423,+48 195 624 2025,2018-09-19T22:24:19,122 ++54 398 559 0423,+48 195 624 2025,2018-09-20T22:24:19,122 +263 498 495 0617,+48 195 624 2025,2018-09-18T01:34:48,514 +81 308 988 7153,+33 614 339 0298,2018-09-21T20:21:17,120 -+263 498 495 0617,+33 614 339 0298,2018-09-17T22:10:34,144 ++263 498 495 0617,+33 614 339 0298,2018-09-11T22:10:34,144 ++263 498 495 0617,+33 614 339 0298,2018-09-12T22:10:34,144 ++263 498 495 0617,+33 614 339 0298,2018-09-13T22:10:34,144 ++263 498 495 0617,+33 614 339 0298,2018-09-14T22:10:34,144 ++263 498 495 0617,+33 614 339 0298,2018-09-15T22:10:34,144 ++263 498 495 0617,+33 614 339 0298,2018-09-16T22:10:34,144 +54 398 559 0423,+7 552 196 4096,2018-09-25T20:24:59,556 +81 308 988 7153,+351 515 605 7915,2018-09-23T22:23:25,336 +261 860 539 4754,+351 272 414 6570,2018-09-26T05:34:19,405 @@ -199,3 +208,8 @@ caller_id,callee_id,started_at,duration +81 746 154 2598,+351 515 605 7915,2018-09-19T08:10:14,76 +54 398 559 0423,+81 746 154 2598,2018-09-18T22:47:52,5356 +63 815 962 6097,+7 552 196 4096,2018-09-23T01:14:56,53 ++63 815 962 6097,+7 552 196 4096,2018-09-23T01:14:56, ++63 815 962 6097,+7 552 196 4096,,53 ++63 815 962 6097,,2018-09-23T01:14:56,53 +,+7 552 196 4096,2018-09-23T01:14:56,53 + diff --git a/src/test/resources/phone-calls/communication-channel.csv b/src/test/resources/phone-calls/communication-channel.csv new file mode 100644 index 0000000..1a4b677 --- /dev/null +++ b/src/test/resources/phone-calls/communication-channel.csv @@ -0,0 +1,10 @@ +peer_1,peer_2,call_started_at ++54 398 559 0423,+48 195 624 2025,2018-09-16T22:24:19 ++54 398 559 0423,+48 195 624 2025,2018-09-17T22:24:19 ++54 398 559 0423,+48 195 624 2025,2018-09-18T22:24:19 ++54 398 559 0423,+48 195 624 2025,2018-09-19T22:24:19 ++54 398 559 0423,+48 195 624 2025,2018-09-20T22:24:19 ++263 498 495 0617,+33 614 339 0298,2018-09-11T22:10:34### 2018-09-12T22:10:34###2018-09-13T22:10:34 ###2018-09-14T22:10:34###2018-09-15T22:10:34###2018-09-16T22:10:34 ++54 398 559 0423,+7 552 196 4096,2018-09-25T20:24:59 ++7 690 597 4443,+54 398 559 0423, ++7 690 597 4443,+54 398 559 0423, diff --git a/src/test/resources/phone-calls/company.csv b/src/test/resources/phone-calls/company.csv index f58e37a..654e636 100644 --- a/src/test/resources/phone-calls/company.csv +++ b/src/test/resources/phone-calls/company.csv @@ -1,2 +1,3 @@ name Telecom +Unity diff --git a/src/test/resources/phone-calls/contract.csv b/src/test/resources/phone-calls/contract.csv index 59583f6..cbd589b 100644 --- a/src/test/resources/phone-calls/contract.csv +++ b/src/test/resources/phone-calls/contract.csv @@ -7,5 +7,7 @@ Telecom,+7 690 597 4443 Telecom,+263 498 495 0617 Telecom,+63 815 962 6097 Telecom,+81 746 154 2598 + Telecom,+261 860 539 4754 Telecom,+62 107 530 7500 +Unity,+62 999 888 7777###+62 999 888 7778 diff --git a/src/test/resources/phone-calls/dataConfig.json b/src/test/resources/phone-calls/dataConfig.json index 97d623c..8a4b0a4 100644 --- a/src/test/resources/phone-calls/dataConfig.json +++ b/src/test/resources/phone-calls/dataConfig.json @@ -1,6 +1,6 @@ { "person": { - "dataPath": "/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/phone-calls/person.csv", + "dataPath": "src/test/resources/phone-calls/person.csv", "separator": ",", "processor": "person", "batchSize": 2000, @@ -33,7 +33,7 @@ }] }, "company": { - "dataPath": "/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/phone-calls/company.csv", + "dataPath": "src/test/resources/phone-calls/company.csv", "separator": ",", "processor": "company", "attributes": [ @@ -45,7 +45,7 @@ "threads": 4 }, "contract": { - "dataPath": "/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/phone-calls/contract.csv", + "dataPath": "src/test/resources/phone-calls/contract.csv", "separator": ",", "processor": "contract", "players": [ @@ -55,14 +55,15 @@ }, { "columnName": "person_id", - "generator": "customer" + "generator": "customer", + "listSeparator": "###" } ], "batchSize": 100, "threads": 4 }, "calls": { - "dataPath": "/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/phone-calls/call.csv", + "dataPath": "src/test/resources/phone-calls/call.csv", "separator": ",", "processor": "call", "players": [ @@ -87,5 +88,27 @@ ], "batchSize": 100, "threads": 4 + }, + "communication-channel": { + "dataPath": "src/test/resources/phone-calls/communication-channel.csv", + "separator": ",", + "processor": "communication-channel", + "batchSize": 100, + "threads": 4, + "players": [ + { + "columnName": "peer_1", + "generator": "peer" + }, + { + "columnName": "peer_2", + "generator": "peer" + }, + { + "columnName": "call_started_at", + "generator": "past-call", + "listSeparator": "###" + } + ] } } \ No newline at end of file diff --git a/src/test/resources/phone-calls/migrationStatus.json b/src/test/resources/phone-calls/migrationStatus.json index 5f6be86..0a48da1 100644 --- a/src/test/resources/phone-calls/migrationStatus.json +++ b/src/test/resources/phone-calls/migrationStatus.json @@ -1 +1 @@ -{"/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/phone-calls/contract.csv":{"conceptName":"contract","isCompleted":true,"migratedRows":10},"/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/phone-calls/person.csv":{"conceptName":"person","isCompleted":true,"migratedRows":30},"/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/phone-calls/call.csv":{"conceptName":"call","isCompleted":true,"migratedRows":200},"/Users/henning.kuich@bayer.com/IdeaProjects/GraMi/src/test/resources/phone-calls/company.csv":{"conceptName":"company","isCompleted":true,"migratedRows":1}} \ No newline at end of file +{"src/test/resources/phone-calls/person.csv":{"conceptName":"person","isCompleted":true,"migratedRows":32},"src/test/resources/phone-calls/call.csv":{"conceptName":"call","isCompleted":true,"migratedRows":214},"src/test/resources/phone-calls/company.csv":{"conceptName":"company","isCompleted":true,"migratedRows":2},"src/test/resources/phone-calls/contract.csv":{"conceptName":"contract","isCompleted":true,"migratedRows":12}} \ No newline at end of file diff --git a/src/test/resources/phone-calls/person.csv b/src/test/resources/phone-calls/person.csv index 07a0fb0..9393b9f 100644 --- a/src/test/resources/phone-calls/person.csv +++ b/src/test/resources/phone-calls/person.csv @@ -28,4 +28,6 @@ Elenore,Stokey,+62 107 530 7500,Oxford,35, ,,+48 195 624 2025,,, ,,+1 254 875 4647,,, ,,+7 552 196 4096,,, -,,+86 892 682 0628,,, \ No newline at end of file +,,+86 892 682 0628,,, +John,Smith,+62 999 888 7777,London,43,Jack;J +Jane,Smith,+62 999 888 7778,London,43, \ No newline at end of file diff --git a/src/test/resources/phone-calls/processorConfig.json b/src/test/resources/phone-calls/processorConfig.json index 3ab8c2b..059ee8e 100644 --- a/src/test/resources/phone-calls/processorConfig.json +++ b/src/test/resources/phone-calls/processorConfig.json @@ -110,6 +110,29 @@ } } } + }, + { + "processor": "communication-channel", + "processorType": "relation-of-relation", + "schemaType": "communication-channel", + "conceptGenerators": { + "players": { + "peer": { + "playerType": "person", + "uniquePlayerId": "phone-number", + "idValueType": "string", + "roleType": "peer", + "required": true + }, + "past-call": { + "playerType": "call", + "uniquePlayerId": "started-at", + "idValueType": "datetime", + "roleType": "past-call", + "required": true + } + } + } } ] } \ No newline at end of file diff --git a/src/test/resources/phone-calls/schema.gql b/src/test/resources/phone-calls/schema.gql index c0d5489..6ae83fe 100644 --- a/src/test/resources/phone-calls/schema.gql +++ b/src/test/resources/phone-calls/schema.gql @@ -27,7 +27,12 @@ define relates caller, relates callee, has started-at, - has duration; + has duration, + plays past-call; + + communication-channel sub relation, + relates peer, + relates past-call; company sub entity, plays provider, @@ -42,4 +47,5 @@ define has phone-number, has city, has age, - has nick-name; \ No newline at end of file + has nick-name, + plays peer;