diff --git a/hugegraph-core/pom.xml b/hugegraph-core/pom.xml index f8b3f8a45e..fd24d1f2e8 100644 --- a/hugegraph-core/pom.xml +++ b/hugegraph-core/pom.xml @@ -13,6 +13,21 @@ ${basedir}/.. + 1.3.11 + 0.7.4 + 1.8.0 + 1.3.1 + 1.10.0 + 2.6.2 + portable-1.8.3 + 5.1.6 + 8.11.2 + 1.0.2 + 2012_u6 + 1.21 + 11.1.0 + 8.5.9 + 0.11.5 @@ -53,7 +68,7 @@ com.alipay.sofa jraft-core - 1.3.9 + ${jraft.version} org.slf4j @@ -85,7 +100,7 @@ org.caffinitas.ohc ohc-core - 0.7.0 + ${ohc.version} com.google.guava @@ -97,7 +112,7 @@ org.apdplat word - 1.3 + ${apdplat-word.version} ch.qos.logback @@ -108,87 +123,87 @@ org.ansj ansj_seg - 5.1.6 + ${ansj-seg.version} com.hankcs hanlp - portable-1.5.0 + ${hanlp.version} org.apache.lucene lucene-analyzers-smartcn - 7.4.0 + ${lucene.version} org.apache.lucene lucene-core - 7.4.0 + ${lucene.version} com.huaban jieba-analysis - 1.0.2 + ${jieba-analysis.version} org.lionsoul jcseg-core - 2.2.0 + ${jcseg.version} com.chenlb.mmseg4j mmseg4j-core - 1.10.0 + ${mmseg4j-core.version} com.janeluo ikanalyzer - 2012_u6 + ${ikanalyzer.version} org.lz4 lz4-java - 1.7.1 + ${lz4.version} org.apache.commons commons-compress - 1.21 + ${commons-compress.version} org.eclipse.collections eclipse-collections-api - 10.4.0 + ${eclipse-collections.version} org.eclipse.collections eclipse-collections - 10.4.0 + ${eclipse-collections.version} it.unimi.dsi fastutil - 8.1.0 + ${fastutil.version} io.jsonwebtoken jjwt-api - 0.11.2 + ${jjwt.version} io.jsonwebtoken jjwt-impl - 0.11.2 + ${jjwt.version} runtime io.jsonwebtoken jjwt-jackson - 0.11.2 + ${jjwt.version} runtime diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/JcsegAnalyzer.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/JcsegAnalyzer.java index 211f384295..64a15fe40c 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/JcsegAnalyzer.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/JcsegAnalyzer.java @@ -23,12 +23,11 @@ import java.util.List; import java.util.Set; -import org.lionsoul.jcseg.tokenizer.core.ADictionary; -import org.lionsoul.jcseg.tokenizer.core.DictionaryFactory; -import org.lionsoul.jcseg.tokenizer.core.ISegment; -import org.lionsoul.jcseg.tokenizer.core.IWord; -import org.lionsoul.jcseg.tokenizer.core.JcsegTaskConfig; -import org.lionsoul.jcseg.tokenizer.core.SegmentFactory; +import org.lionsoul.jcseg.ISegment; +import org.lionsoul.jcseg.IWord; +import org.lionsoul.jcseg.dic.ADictionary; +import org.lionsoul.jcseg.dic.DictionaryFactory; +import org.lionsoul.jcseg.segmenter.SegmenterConfig; import com.baidu.hugegraph.HugeException; import com.baidu.hugegraph.config.ConfigException; @@ -45,11 +44,11 @@ public class JcsegAnalyzer implements Analyzer { "Complex" ); - private static final JcsegTaskConfig CONFIG = new JcsegTaskConfig(); + private static final SegmenterConfig CONFIG = new SegmenterConfig(); private static final ADictionary DIC = - DictionaryFactory.createDefaultDictionary(new JcsegTaskConfig()); + DictionaryFactory.createDefaultDictionary(CONFIG); - private int segMode; + private final ISegment.Type type; public JcsegAnalyzer(String mode) { if (!SUPPORT_MODES.contains(mode)) { @@ -57,17 +56,23 @@ public JcsegAnalyzer(String mode) { "Unsupported segment mode '%s' for jcseg analyzer, " + "the available values are %s", mode, SUPPORT_MODES); } - this.segMode = SUPPORT_MODES.indexOf(mode) + 1; + + if (mode.equals("Simple")) { + this.type = ISegment.SIMPLE; + } else { + this.type = ISegment.COMPLEX; + } } @Override public Set segment(String text) { Set result = InsertionOrderUtil.newSet(); try { - Object[] args = new Object[]{new StringReader(text), CONFIG, DIC}; - ISegment seg = SegmentFactory.createJcseg(this.segMode, args); - IWord word = null; - while ((word = seg.next()) != null) { + ISegment segmentor = this.type.factory.create(CONFIG, DIC); + segmentor.reset(new StringReader(text)); + + IWord word; + while ((word = segmentor.next()) != null) { result.add(word.getValue()); } } catch (Exception e) {