Skip to content

Commit

Permalink
chore(core):update core's maven dependencies version and fix JcsegAna…
Browse files Browse the repository at this point in the history
…lyzer

jraft 1.3.9->1.3.11
ohc-core 0.7.0->0.7.4
org.apdplat.word 1.3->1.3.1
hanlp portable-1.5.0->portable-1.8.3
lucene-analyzers-smartcn,lucene-core 7.4.0->8.11.2
jcseg-core->2.2.0 ->2.6.2
lz4-java 1.7.1->1.8.0
eclipse-collections 10.4.0->11.1.0
fastutil 8.1.0->8.4.0
jjwt 0.11.2->0.11.5
  • Loading branch information
jadepeng committed Oct 26, 2022
1 parent 9b5950e commit 7bc8d13
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 33 deletions.
53 changes: 34 additions & 19 deletions hugegraph-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,21 @@

<properties>
<top.level.dir>${basedir}/..</top.level.dir>
<jraft.version>1.3.11</jraft.version>
<ohc.version>0.7.4</ohc.version>
<lz4.version>1.8.0</lz4.version>
<apdplat-word.version>1.3.1</apdplat-word.version>
<mmseg4j-core.version>1.10.0</mmseg4j-core.version>
<jcseg.version>2.6.2</jcseg.version>
<hanlp.version>portable-1.8.3</hanlp.version>
<ansj-seg.version>5.1.6</ansj-seg.version>
<lucene.version>8.11.2</lucene.version>
<jieba-analysis.version>1.0.2</jieba-analysis.version>
<ikanalyzer.version>2012_u6</ikanalyzer.version>
<commons-compress.version>1.21</commons-compress.version>
<eclipse-collections.version>11.1.0</eclipse-collections.version>
<fastutil.version>8.5.9</fastutil.version>
<jjwt.version>0.11.5</jjwt.version>
</properties>

<dependencies>
Expand Down Expand Up @@ -53,7 +68,7 @@
<dependency>
<groupId>com.alipay.sofa</groupId>
<artifactId>jraft-core</artifactId>
<version>1.3.9</version>
<version>${jraft.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
Expand Down Expand Up @@ -85,7 +100,7 @@
<dependency>
<groupId>org.caffinitas.ohc</groupId>
<artifactId>ohc-core</artifactId>
<version>0.7.0</version>
<version>${ohc.version}</version>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
Expand All @@ -97,7 +112,7 @@
<dependency>
<groupId>org.apdplat</groupId>
<artifactId>word</artifactId>
<version>1.3</version>
<version>${apdplat-word.version}</version>
<exclusions>
<exclusion>
<groupId>ch.qos.logback</groupId>
Expand All @@ -108,87 +123,87 @@
<dependency>
<groupId>org.ansj</groupId>
<artifactId>ansj_seg</artifactId>
<version>5.1.6</version>
<version>${ansj-seg.version}</version>
</dependency>
<dependency>
<groupId>com.hankcs</groupId>
<artifactId>hanlp</artifactId>
<version>portable-1.5.0</version>
<version>${hanlp.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>7.4.0</version>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>7.4.0</version>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>com.huaban</groupId>
<artifactId>jieba-analysis</artifactId>
<version>1.0.2</version>
<version>${jieba-analysis.version}</version>
</dependency>
<dependency>
<groupId>org.lionsoul</groupId>
<artifactId>jcseg-core</artifactId>
<version>2.2.0</version>
<version>${jcseg.version}</version>
</dependency>
<dependency>
<groupId>com.chenlb.mmseg4j</groupId>
<artifactId>mmseg4j-core</artifactId>
<version>1.10.0</version>
<version>${mmseg4j-core.version}</version>
</dependency>
<dependency>
<groupId>com.janeluo</groupId>
<artifactId>ikanalyzer</artifactId>
<version>2012_u6</version>
<version>${ikanalyzer.version}</version>
</dependency>

<dependency>
<groupId>org.lz4</groupId>
<artifactId>lz4-java</artifactId>
<version>1.7.1</version>
<version>${lz4.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.21</version>
<version>${commons-compress.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.collections</groupId>
<artifactId>eclipse-collections-api</artifactId>
<version>10.4.0</version>
<version>${eclipse-collections.version}</version>
</dependency>
<dependency>
<groupId>org.eclipse.collections</groupId>
<artifactId>eclipse-collections</artifactId>
<version>10.4.0</version>
<version>${eclipse-collections.version}</version>
</dependency>

<!-- https://mvnrepository.com/artifact/it.unimi.dsi/fastutil -->
<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>fastutil</artifactId>
<version>8.1.0</version>
<version>${fastutil.version}</version>
</dependency>
<!-- jwt auth token TODO: move to auth pom -->
<dependency>
<groupId>io.jsonwebtoken</groupId>
<artifactId>jjwt-api</artifactId>
<version>0.11.2</version>
<version>${jjwt.version}</version>
</dependency>
<dependency>
<groupId>io.jsonwebtoken</groupId>
<artifactId>jjwt-impl</artifactId>
<version>0.11.2</version>
<version>${jjwt.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>io.jsonwebtoken</groupId>
<artifactId>jjwt-jackson</artifactId>
<version>0.11.2</version>
<version>${jjwt.version}</version>
<scope>runtime</scope>
</dependency>
</dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,11 @@
import java.util.List;
import java.util.Set;

import org.lionsoul.jcseg.tokenizer.core.ADictionary;
import org.lionsoul.jcseg.tokenizer.core.DictionaryFactory;
import org.lionsoul.jcseg.tokenizer.core.ISegment;
import org.lionsoul.jcseg.tokenizer.core.IWord;
import org.lionsoul.jcseg.tokenizer.core.JcsegTaskConfig;
import org.lionsoul.jcseg.tokenizer.core.SegmentFactory;
import org.lionsoul.jcseg.ISegment;
import org.lionsoul.jcseg.IWord;
import org.lionsoul.jcseg.dic.ADictionary;
import org.lionsoul.jcseg.dic.DictionaryFactory;
import org.lionsoul.jcseg.segmenter.SegmenterConfig;

import com.baidu.hugegraph.HugeException;
import com.baidu.hugegraph.config.ConfigException;
Expand All @@ -45,29 +44,35 @@ public class JcsegAnalyzer implements Analyzer {
"Complex"
);

private static final JcsegTaskConfig CONFIG = new JcsegTaskConfig();
private static final SegmenterConfig CONFIG = new SegmenterConfig();
private static final ADictionary DIC =
DictionaryFactory.createDefaultDictionary(new JcsegTaskConfig());
DictionaryFactory.createDefaultDictionary(CONFIG);

private int segMode;
private final ISegment.Type type;

public JcsegAnalyzer(String mode) {
if (!SUPPORT_MODES.contains(mode)) {
throw new ConfigException(
"Unsupported segment mode '%s' for jcseg analyzer, " +
"the available values are %s", mode, SUPPORT_MODES);
}
this.segMode = SUPPORT_MODES.indexOf(mode) + 1;

if (mode.equals("Simple")) {
this.type = ISegment.SIMPLE;
} else {
this.type = ISegment.COMPLEX;
}
}

@Override
public Set<String> segment(String text) {
Set<String> result = InsertionOrderUtil.newSet();
try {
Object[] args = new Object[]{new StringReader(text), CONFIG, DIC};
ISegment seg = SegmentFactory.createJcseg(this.segMode, args);
IWord word = null;
while ((word = seg.next()) != null) {
ISegment segmentor = this.type.factory.create(CONFIG, DIC);
segmentor.reset(new StringReader(text));

IWord word;
while ((word = segmentor.next()) != null) {
result.add(word.getValue());
}
} catch (Exception e) {
Expand Down

0 comments on commit 7bc8d13

Please sign in to comment.