Skip to content

Commit

Permalink
#1511 - Do not initialize POS mapping loader if mapping is disabled
Browse files Browse the repository at this point in the history
- Do not initialize/configure mapper if the mapped feature is disabled
  • Loading branch information
reckart committed Dec 31, 2022
1 parent f3d62c1 commit c217616
Show file tree
Hide file tree
Showing 14 changed files with 538 additions and 624 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,8 @@
@ResourceMetaData(name = "Berkeley Parser")
@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}")
@OperationalProperties(multipleDeploymentAllowed = false)
@TypeCapability(
inputs = {
"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token",
"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence" },
outputs = {
@TypeCapability(inputs = { "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token",
"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence" }, outputs = {
"de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.Constituent",
"de.tudarmstadt.ukp.dkpro.core.api.syntax.type.PennTree" })
public class BerkeleyParser
Expand All @@ -105,19 +102,20 @@ public class BerkeleyParser
protected String variant;

/**
* URI of the model artifact. This can be used to override the default model resolving
* mechanism and directly address a particular model.
* URI of the model artifact. This can be used to override the default model resolving mechanism
* and directly address a particular model.
*
* <p>The URI format is {@code mvn:${groupId}:${artifactId}:${version}}. Remember to set
* the variant parameter to match the artifact. If the artifact contains the model in
* a non-default location, you also have to specify the model location parameter, e.g.
* {@code classpath:/model/path/in/artifact/model.bin}.</p>
* <p>
* The URI format is {@code mvn:${groupId}:${artifactId}:${version}}. Remember to set the
* variant parameter to match the artifact. If the artifact contains the model in a non-default
* location, you also have to specify the model location parameter, e.g.
* {@code classpath:/model/path/in/artifact/model.bin}.
* </p>
*/
public static final String PARAM_MODEL_ARTIFACT_URI =
ComponentParameters.PARAM_MODEL_ARTIFACT_URI;
public static final String PARAM_MODEL_ARTIFACT_URI = ComponentParameters.PARAM_MODEL_ARTIFACT_URI;
@ConfigurationParameter(name = PARAM_MODEL_ARTIFACT_URI, mandatory = false)
protected String modelArtifactUri;

/**
* Load the model from this location instead of locating the model automatically.
*/
Expand All @@ -129,23 +127,20 @@ public class BerkeleyParser
* Enable/disable type mapping.
*/
public static final String PARAM_MAPPING_ENABLED = ComponentParameters.PARAM_MAPPING_ENABLED;
@ConfigurationParameter(name = PARAM_MAPPING_ENABLED, mandatory = true, defaultValue =
ComponentParameters.DEFAULT_MAPPING_ENABLED)
@ConfigurationParameter(name = PARAM_MAPPING_ENABLED, mandatory = true, defaultValue = ComponentParameters.DEFAULT_MAPPING_ENABLED)
protected boolean mappingEnabled;

/**
* Location of the mapping file for part-of-speech tags to UIMA types.
*/
public static final String PARAM_POS_MAPPING_LOCATION =
ComponentParameters.PARAM_POS_MAPPING_LOCATION;
public static final String PARAM_POS_MAPPING_LOCATION = ComponentParameters.PARAM_POS_MAPPING_LOCATION;
@ConfigurationParameter(name = PARAM_POS_MAPPING_LOCATION, mandatory = false)
protected String posMappingLocation;

/**
* Location of the mapping file for constituent tags to UIMA types.
*/
public static final String PARAM_CONSTITUENT_MAPPING_LOCATION =
ComponentParameters.PARAM_CONSTITUENT_MAPPING_LOCATION;
public static final String PARAM_CONSTITUENT_MAPPING_LOCATION = ComponentParameters.PARAM_CONSTITUENT_MAPPING_LOCATION;
@ConfigurationParameter(name = PARAM_CONSTITUENT_MAPPING_LOCATION, mandatory = false)
protected String constituentMappingLocation;

Expand All @@ -163,7 +158,7 @@ public class BerkeleyParser
public static final String PARAM_READ_POS = ComponentParameters.PARAM_READ_POS;
@ConfigurationParameter(name = PARAM_READ_POS, mandatory = true, defaultValue = "true")
private boolean readPos;

/**
* Sets whether to create or not to create POS tags. The creation of constituent tags must be
* turned on for this to work.
Expand Down Expand Up @@ -234,35 +229,37 @@ public class BerkeleyParser
private MappingProvider constituentMappingProvider;

@Override
public void initialize(UimaContext aContext)
throws ResourceInitializationException
public void initialize(UimaContext aContext) throws ResourceInitializationException
{
super.initialize(aContext);

modelProvider = new BerkeleyParserModelProvider();

posMappingProvider = createPosMappingProvider(this, posMappingLocation, language,
modelProvider);
if (writePos) {
posMappingProvider = createPosMappingProvider(this, posMappingLocation, language,
modelProvider);
}

constituentMappingProvider = createConstituentMappingProvider(this,
constituentMappingLocation, language, modelProvider);
}

@Override
public void process(JCas aJCas)
throws AnalysisEngineProcessException
public void process(JCas aJCas) throws AnalysisEngineProcessException
{
CAS cas = aJCas.getCas();

modelProvider.configure(cas);
posMappingProvider.configure(cas);
if (writePos) {
posMappingProvider.configure(cas);
}
constituentMappingProvider.configure(cas);

for (Sentence sentence : select(aJCas, Sentence.class)) {
List<Token> tokens = selectCovered(aJCas, Token.class, sentence);
List<String> tokenText = tokens.stream().map(t ->
t.getText()).collect(Collectors.toList());
List<String> tokenText = tokens.stream().map(t -> t.getText())
.collect(Collectors.toList());

List<String> posTags = null;
if (readPos) {
posTags = new ArrayList<String>(tokens.size());
Expand All @@ -271,15 +268,15 @@ public void process(JCas aJCas)
}
}

Tree<String> parseOutput = modelProvider.getResource().getBestConstrainedParse(
tokenText, posTags, false);
Tree<String> parseOutput = modelProvider.getResource()
.getBestConstrainedParse(tokenText, posTags, false);

// Check if the sentence could be parsed or not
if (parseOutput.getChildren().isEmpty()) {
getLogger().warn("Unable to parse sentence: [" + sentence.getCoveredText() + "]");
continue;
}

if (!binarize) {
parseOutput = TreeAnnotations.unAnnotateTree(parseOutput, keepFunctionLabels);
}
Expand Down Expand Up @@ -361,8 +358,7 @@ private Annotation createConstituentAnnotationFromTree(JCas aJCas, Tree<String>

// Now that we know how many children we have, link annotation of
// current node with its children
FSArray childArray = FSCollectionFactory.createFSArray(aJCas,
childAnnotations);
FSArray childArray = FSCollectionFactory.createFSArray(aJCas, childAnnotations);
constAnno.setChildren(childArray);

// write annotation for current node to index
Expand Down Expand Up @@ -391,8 +387,7 @@ private class BerkeleyParserModelProvider
}

@Override
protected CoarseToFineMaxRuleParser produceResource(URL aUrl)
throws IOException
protected CoarseToFineMaxRuleParser produceResource(URL aUrl) throws IOException
{
try (ObjectInputStream is = new ObjectInputStream(
new GZIPInputStream(aUrl.openStream()))) {
Expand All @@ -405,10 +400,10 @@ protected CoarseToFineMaxRuleParser produceResource(URL aUrl)
double threshold = 1.0;

Properties metadata = getResourceMetaData();
SingletonTagset posTags = new SingletonTagset(
POS.class, metadata.getProperty("pos.tagset"));
SingletonTagset constTags = new SingletonTagset(
Constituent.class, metadata.getProperty("constituent.tagset"));
SingletonTagset posTags = new SingletonTagset(POS.class,
metadata.getProperty("pos.tagset"));
SingletonTagset constTags = new SingletonTagset(Constituent.class,
metadata.getProperty("constituent.tagset"));

Numberer tagNumberer = (Numberer) pData.getNumbs().get("tags");
for (int i = 0; i < tagNumberer.size(); i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static java.util.Arrays.asList;
import static org.apache.uima.fit.util.JCasUtil.select;
import static org.apache.uima.fit.util.JCasUtil.selectCovered;
import static org.dkpro.core.api.resources.MappingProviderFactory.createPosMappingProvider;

import java.io.IOException;
import java.net.URL;
Expand Down Expand Up @@ -50,7 +51,6 @@
import org.dkpro.core.api.parameter.ComponentParameters;
import org.dkpro.core.api.resources.CasConfigurableProviderBase;
import org.dkpro.core.api.resources.MappingProvider;
import org.dkpro.core.api.resources.MappingProviderFactory;
import org.dkpro.core.api.resources.ModelProviderBase;

import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
Expand All @@ -67,10 +67,8 @@
@ResourceMetaData(name = "CoGrOO POS-Tagger")
@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}")
@LanguageCapability("pt")
@TypeCapability(
inputs = {
"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token",
"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence" })
@TypeCapability(inputs = { "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token",
"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence" })
public class CogrooPosTagger
extends JCasAnnotator_ImplBase
{
Expand All @@ -85,29 +83,27 @@ public class CogrooPosTagger
* Enable/disable type mapping.
*/
public static final String PARAM_MAPPING_ENABLED = ComponentParameters.PARAM_MAPPING_ENABLED;
@ConfigurationParameter(name = PARAM_MAPPING_ENABLED, mandatory = true, defaultValue =
ComponentParameters.DEFAULT_MAPPING_ENABLED)
@ConfigurationParameter(name = PARAM_MAPPING_ENABLED, mandatory = true, defaultValue = ComponentParameters.DEFAULT_MAPPING_ENABLED)
protected boolean mappingEnabled;

/**
* Load the part-of-speech tag to UIMA type mapping from this location instead of locating
* the mapping automatically.
* Load the part-of-speech tag to UIMA type mapping from this location instead of locating the
* mapping automatically.
*/
public static final String PARAM_POS_MAPPING_LOCATION =
ComponentParameters.PARAM_POS_MAPPING_LOCATION;
public static final String PARAM_POS_MAPPING_LOCATION = ComponentParameters.PARAM_POS_MAPPING_LOCATION;
@ConfigurationParameter(name = PARAM_POS_MAPPING_LOCATION, mandatory = false)
protected String posMappingLocation;

private CasConfigurableProviderBase<Analyzer> modelProvider;
private MappingProvider mappingProvider;

@Override
public void initialize(UimaContext aContext)
throws ResourceInitializationException
public void initialize(UimaContext aContext) throws ResourceInitializationException
{
super.initialize(aContext);

modelProvider = new ModelProviderBase<Analyzer>() {
modelProvider = new ModelProviderBase<Analyzer>()
{
{
setContextObject(CogrooPosTagger.this);

Expand All @@ -116,8 +112,7 @@ public void initialize(UimaContext aContext)
}

@Override
protected Analyzer produceResource(URL aUrl)
throws IOException
protected Analyzer produceResource(URL aUrl) throws IOException
{
Properties props = getAggregatedProperties();

Expand All @@ -133,13 +128,11 @@ protected Analyzer produceResource(URL aUrl)
}
};

mappingProvider = MappingProviderFactory.createPosMappingProvider(this, posMappingLocation,
"bosque", language);
mappingProvider = createPosMappingProvider(this, posMappingLocation, "bosque", language);
}

@Override
public void process(JCas aJCas)
throws AnalysisEngineProcessException
public void process(JCas aJCas) throws AnalysisEngineProcessException
{
CAS cas = aJCas.getCas();
modelProvider.configure(cas);
Expand Down
Loading

0 comments on commit c217616

Please sign in to comment.