-
Notifications
You must be signed in to change notification settings - Fork 380
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[#3607] feat(core): Support to import the entities when loading entities #3623
Changes from 4 commits
99b1caf
3dac56c
d5ca7e7
c831685
fe731f2
1b8f375
065d229
b5d5cd2
f39720a
08cf4e2
e762b58
ad5400a
e12c766
bde6b0d
870670e
8e2a7d3
e661e56
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
import com.datastrato.gravitino.Schema; | ||
import com.datastrato.gravitino.meta.AuditInfo; | ||
import com.datastrato.gravitino.meta.SchemaEntity; | ||
import java.util.Collections; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.stream.Collectors; | ||
|
@@ -23,6 +24,7 @@ public final class EntityCombinedSchema implements Schema { | |
|
||
// Sets of properties that should be hidden from the user. | ||
private Set<String> hiddenProperties; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps we can handle this not in this PR, we may need a class named 'combinedEntity' to unify the handling of entities from different sources. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mchades what is the meaning here, can you explain more? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean do we need to provide an abstract class |
||
private boolean imported; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you should add some comments about what it was used for. What should we do if it's true or false? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Further information may be required, such as why we need to import it to the Gravitino-managed store. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
@qqqttt123 do you take a look at these comments and check whether they need to be resolved? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let me update the doc. |
||
|
||
private EntityCombinedSchema(Schema schema, SchemaEntity schemaEntity) { | ||
this.schema = schema; | ||
|
@@ -42,6 +44,11 @@ public EntityCombinedSchema withHiddenPropertiesSet(Set<String> hiddenProperties | |
return this; | ||
} | ||
|
||
public EntityCombinedSchema withImported(boolean imported) { | ||
this.imported = imported; | ||
return this; | ||
} | ||
|
||
@Override | ||
public String name() { | ||
return schema.name(); | ||
|
@@ -73,4 +80,12 @@ public Audit auditInfo() { | |
? schema.auditInfo() | ||
: mergedAudit.merge(schemaEntity.auditInfo(), true /* overwrite */); | ||
} | ||
|
||
public boolean imported() { | ||
return imported; | ||
} | ||
|
||
Map<String, String> schemaProperties() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you need this method, can you use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I need a get There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you should define a better method for your requirement, also is it package public or public? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. Package public is enough now. |
||
return Collections.unmodifiableMap(schema.properties()); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,8 @@ | |
import com.datastrato.gravitino.exceptions.NoSuchSchemaException; | ||
import com.datastrato.gravitino.exceptions.NonEmptySchemaException; | ||
import com.datastrato.gravitino.exceptions.SchemaAlreadyExistsException; | ||
import com.datastrato.gravitino.lock.LockType; | ||
import com.datastrato.gravitino.lock.TreeLockUtils; | ||
import com.datastrato.gravitino.meta.AuditInfo; | ||
import com.datastrato.gravitino.meta.SchemaEntity; | ||
import com.datastrato.gravitino.storage.IdGenerator; | ||
|
@@ -159,47 +161,17 @@ public Schema createSchema(NameIdentifier ident, String comment, Map<String, Str | |
*/ | ||
@Override | ||
public Schema loadSchema(NameIdentifier ident) throws NoSuchSchemaException { | ||
NameIdentifier catalogIdentifier = getCatalogIdentifier(ident); | ||
Schema schema = | ||
doWithCatalog( | ||
catalogIdentifier, | ||
c -> c.doWithSchemaOps(s -> s.loadSchema(ident)), | ||
NoSuchSchemaException.class); | ||
EntityCombinedSchema schema = | ||
TreeLockUtils.doWithTreeLock(ident, LockType.READ, () -> loadCombinedSchema(ident)); | ||
|
||
// If the Schema is maintained by the Gravitino's store, we don't have to load again. | ||
boolean isManagedSchema = isManagedEntity(catalogIdentifier, Capability.Scope.SCHEMA); | ||
if (isManagedSchema) { | ||
return EntityCombinedSchema.of(schema) | ||
.withHiddenPropertiesSet( | ||
getHiddenPropertyNames( | ||
catalogIdentifier, | ||
HasPropertyMetadata::schemaPropertiesMetadata, | ||
schema.properties())); | ||
if (schema == null || schema.imported()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should throw a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you really think that method will return null, can you please carefully check the code? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The method definition here will throw a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, do you really check that the code above will return null or throw an exception? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I checked the method won't return null. I will modify here. I just obey the same style as the method There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I ever thought that we assume that this method may return null, I add the check of null.
Do we need to modify here, too? |
||
return schema; | ||
} | ||
|
||
StringIdentifier stringId = getStringIdFromProperties(schema.properties()); | ||
// Case 1: The schema is not created by Gravitino. | ||
if (stringId == null) { | ||
return EntityCombinedSchema.of(schema) | ||
.withHiddenPropertiesSet( | ||
getHiddenPropertyNames( | ||
catalogIdentifier, | ||
HasPropertyMetadata::schemaPropertiesMetadata, | ||
schema.properties())); | ||
} | ||
TreeLockUtils.doWithTreeLock( | ||
NameIdentifier.of(ident.namespace().levels()), LockType.WRITE, () -> importSchema(ident)); | ||
|
||
SchemaEntity schemaEntity = | ||
operateOnEntity( | ||
ident, | ||
identifier -> store.get(identifier, SCHEMA, SchemaEntity.class), | ||
"GET", | ||
stringId.id()); | ||
return EntityCombinedSchema.of(schema, schemaEntity) | ||
.withHiddenPropertiesSet( | ||
getHiddenPropertyNames( | ||
catalogIdentifier, | ||
HasPropertyMetadata::schemaPropertiesMetadata, | ||
schema.properties())); | ||
return schema; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you return a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If schema is imported, we don't need to import it again, so we can return it directly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, what I mean is that you will have a new entity combined schema in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Has this comment been resolved? I have the same question, I think here should return There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After digging into the code, the difference of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it. If there is no change in the |
||
} | ||
|
||
/** | ||
|
@@ -280,6 +252,7 @@ public Schema alterSchema(NameIdentifier ident, SchemaChange... changes) | |
.build()), | ||
"UPDATE", | ||
stringId.id()); | ||
|
||
return EntityCombinedSchema.of(alteredSchema, updatedSchemaEntity) | ||
.withHiddenPropertiesSet( | ||
getHiddenPropertyNames( | ||
|
@@ -330,4 +303,103 @@ public boolean dropSchema(NameIdentifier ident, boolean cascade) throws NonEmpty | |
? droppedFromStore | ||
: droppedFromCatalog; | ||
} | ||
|
||
private boolean importSchema(NameIdentifier identifier) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You didn't leverage the return value of this method, so why do you need to define a return value? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, Maybe I should add some logs. |
||
EntityCombinedSchema combinedSchema = loadCombinedSchema(identifier); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you need to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The table may be imported by other thread before we use write lock after we released read lock. |
||
if (combinedSchema.imported()) { | ||
return false; | ||
} | ||
|
||
StringIdentifier stringId = getStringIdFromProperties(combinedSchema.schemaProperties()); | ||
long uid; | ||
if (stringId != null) { | ||
// If the entity in the store doesn't match the external system, we use the data | ||
// of external system to correct it. | ||
uid = stringId.id(); | ||
} else { | ||
// If store doesn't exist entity, we sync the entity from the external system. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct this comment's grammar. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
uid = idGenerator.nextId(); | ||
} | ||
|
||
SchemaEntity schemaEntity = | ||
SchemaEntity.builder() | ||
.withId(uid) | ||
.withName(identifier.name()) | ||
.withNamespace(identifier.namespace()) | ||
.withAuditInfo( | ||
AuditInfo.builder() | ||
.withCreator(combinedSchema.auditInfo().creator()) | ||
.withCreateTime(combinedSchema.auditInfo().createTime()) | ||
.withLastModifier(combinedSchema.auditInfo().lastModifier()) | ||
.withLastModifiedTime(combinedSchema.auditInfo().lastModifiedTime()) | ||
.build()) | ||
.build(); | ||
try { | ||
store.put(schemaEntity, true); | ||
} catch (Exception e) { | ||
LOG.error(FormattedErrorMessages.STORE_OP_FAILURE, "put", identifier, e); | ||
throw new RuntimeException("Fail to access underlying storage"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you need to pass the exception There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I pass the
|
||
} | ||
|
||
return true; | ||
} | ||
|
||
private EntityCombinedSchema loadCombinedSchema(NameIdentifier ident) { | ||
NameIdentifier catalogIdentifier = getCatalogIdentifier(ident); | ||
Schema schema = | ||
doWithCatalog( | ||
catalogIdentifier, | ||
c -> c.doWithSchemaOps(s -> s.loadSchema(ident)), | ||
NoSuchSchemaException.class); | ||
|
||
// If the Schema is maintained by the Gravitino's store, we don't have to load again. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can't understand why we do not need to load schema again as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fileset schema is only stored in the backend storage. |
||
boolean isManagedSchema = isManagedEntity(catalogIdentifier, Capability.Scope.SCHEMA); | ||
if (isManagedSchema) { | ||
return EntityCombinedSchema.of(schema) | ||
.withHiddenPropertiesSet( | ||
getHiddenPropertyNames( | ||
catalogIdentifier, | ||
HasPropertyMetadata::schemaPropertiesMetadata, | ||
schema.properties())) | ||
.withImported(true); | ||
} | ||
|
||
StringIdentifier stringId = getStringIdFromProperties(schema.properties()); | ||
// Case 1: The schema is not created by Gravitino. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The schema is not created by Gravitino or the backend storage does not support storing string identifiers. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You didn't update as what @yuqi1129 mentioned here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I revert the change by mistake. I have done again. |
||
if (stringId == null) { | ||
return EntityCombinedSchema.of(schema) | ||
.withHiddenPropertiesSet( | ||
getHiddenPropertyNames( | ||
catalogIdentifier, | ||
HasPropertyMetadata::schemaPropertiesMetadata, | ||
schema.properties())) | ||
.withImported(isEntityExist(ident)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need to check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. First case: PG schema doesn't have properties. So we should judge it whether is imported by enitity exist instead of StringIdentifier. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should comment on it, not explained here when we ask. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, I will. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
} | ||
|
||
SchemaEntity schemaEntity = | ||
operateOnEntity( | ||
ident, | ||
identifier -> store.get(identifier, SCHEMA, SchemaEntity.class), | ||
"GET", | ||
stringId.id()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For entity store, it throws a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
boolean imported = schemaEntity != null; | ||
|
||
return EntityCombinedSchema.of(schema, schemaEntity) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should you check that two uid are different here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need. If two uids are different. The |
||
.withHiddenPropertiesSet( | ||
getHiddenPropertyNames( | ||
catalogIdentifier, | ||
HasPropertyMetadata::schemaPropertiesMetadata, | ||
schema.properties())) | ||
.withImported(imported); | ||
} | ||
|
||
private boolean isEntityExist(NameIdentifier ident) { | ||
try { | ||
return store.exists(ident, SCHEMA); | ||
} catch (Exception e) { | ||
LOG.error(FormattedErrorMessages.STORE_OP_FAILURE, "exists", ident, e); | ||
throw new RuntimeException("Fail to access underlying storage"); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure if you can use mock or junit reflection solution to test these interfaces, instead of create bunch of methods only for tests.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can have a try.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Java reflection can do the same thing, but I'm not sure whether it's more elegant that the current implementation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.