From ba8bb85f31521f55a1c30b0d1a4edd60851227fd Mon Sep 17 00:00:00 2001 From: medcl Date: Thu, 11 Apr 2019 09:35:19 +0800 Subject: [PATCH 01/14] update to support 7.x --- README.md | 13 +++---------- pom.xml | 2 +- .../index/analysis/IkTokenizerFactory.java | 2 +- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 617e0804..1c41a0fc 100644 --- a/README.md +++ b/README.md @@ -10,16 +10,9 @@ Versions IK version | ES version -----------|----------- -master | 6.x -> master -6.3.0| 6.3.0 -6.2.4| 6.2.4 -6.1.3| 6.1.3 -5.6.8| 5.6.8 -5.5.3| 5.5.3 -5.4.3| 5.4.3 -5.3.3| 5.3.3 -5.2.2| 5.2.2 -5.1.2| 5.1.2 +master | 7.x -> master +6.x| 6.x +5.x| 5.x 1.10.6 | 2.4.6 1.9.5 | 2.3.5 1.8.1 | 2.2.1 diff --git a/pom.xml b/pom.xml index 32fb4e14..b734e39f 100644 --- a/pom.xml +++ b/pom.xml @@ -12,7 +12,7 @@ 2011 - 6.5.0 + 7.0.0 1.8 ${project.basedir}/src/main/assemblies/plugin.xml analysis-ik diff --git a/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java index 17d09806..abe4fa1d 100644 --- a/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java @@ -11,7 +11,7 @@ public class IkTokenizerFactory extends AbstractTokenizerFactory { private Configuration configuration; public IkTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); configuration=new Configuration(env,settings); } From 90c9b58354c8f67a4a9725eab706d88c7d2fc476 Mon Sep 17 00:00:00 2001 From: medcl Date: Thu, 11 Apr 2019 10:07:22 +0800 Subject: [PATCH 02/14] update example --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 1c41a0fc..5172478e 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ curl -XPUT http://localhost:9200/index 2.create a mapping ```bash -curl -XPOST http://localhost:9200/index/fulltext/_mapping -H 'Content-Type:application/json' -d' +curl -XPOST http://localhost:9200/index/_mapping -H 'Content-Type:application/json' -d' { "properties": { "content": { @@ -73,25 +73,25 @@ curl -XPOST http://localhost:9200/index/fulltext/_mapping -H 'Content-Type:appli 3.index some docs ```bash -curl -XPOST http://localhost:9200/index/fulltext/1 -H 'Content-Type:application/json' -d' +curl -XPOST http://localhost:9200/index/_create/1 -H 'Content-Type:application/json' -d' {"content":"美国留给伊拉克的是个烂摊子吗"} ' ``` ```bash -curl -XPOST http://localhost:9200/index/fulltext/2 -H 'Content-Type:application/json' -d' +curl -XPOST http://localhost:9200/index/_create/2 -H 'Content-Type:application/json' -d' {"content":"公安部:各地校车将享最高路权"} ' ``` ```bash -curl -XPOST http://localhost:9200/index/fulltext/3 -H 'Content-Type:application/json' -d' +curl -XPOST http://localhost:9200/index/_create/3 -H 'Content-Type:application/json' -d' {"content":"中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"} ' ``` ```bash -curl -XPOST http://localhost:9200/index/fulltext/4 -H 'Content-Type:application/json' -d' +curl -XPOST http://localhost:9200/index/_create/4 -H 'Content-Type:application/json' -d' {"content":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"} ' ``` @@ -99,7 +99,7 @@ curl -XPOST http://localhost:9200/index/fulltext/4 -H 'Content-Type:application/ 4.query with highlighting ```bash -curl -XPOST http://localhost:9200/index/fulltext/_search -H 'Content-Type:application/json' -d' +curl -XPOST http://localhost:9200/index/_search -H 'Content-Type:application/json' -d' { "query" : { "match" : { "content" : "中国" }}, "highlight" : { @@ -241,13 +241,13 @@ curl -XGET "http://localhost:9200/your_index/_analyze" -H 'Content-Type: applica 4. ik_max_word 和 ik_smart 什么区别? -ik_max_word: 会将文本做最细粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,中华人民,中华,华人,人民共和国,人民,人,民,共和国,共和,和,国国,国歌”,会穷尽各种可能的组合; +ik_max_word: 会将文本做最细粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,中华人民,中华,华人,人民共和国,人民,人,民,共和国,共和,和,国国,国歌”,会穷尽各种可能的组合,适合 Term Query; -ik_smart: 会做最粗粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,国歌”。 +ik_smart: 会做最粗粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,国歌”,适合 Phrase 查询。 Changes ------ -*5.0.0* +*自 v5.0.0 起* - 移除名为 `ik` 的analyzer和tokenizer,请分别使用 `ik_smart` 和 `ik_max_word` From a1d6ba8ca206d750e68bbf3e53a25284100174bd Mon Sep 17 00:00:00 2001 From: Hongliang Wang Date: Fri, 19 Apr 2019 20:23:43 +0800 Subject: [PATCH 03/14] Correct Search Analyzer (#668) The former search analyzer `ik-max-word` will give the wrong result against described later in the README file. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5172478e..f1406443 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ curl -XPOST http://localhost:9200/index/_mapping -H 'Content-Type:application/js "content": { "type": "text", "analyzer": "ik_max_word", - "search_analyzer": "ik_max_word" + "search_analyzer": "ik_smart" } } From 06e8a23d1828ae993aef21b81829a7729d06f224 Mon Sep 17 00:00:00 2001 From: zhipingpan <42376954+zhipingpan@users.noreply.github.com> Date: Wed, 1 May 2019 16:57:44 +0800 Subject: [PATCH 04/14] Update AnalyzeContext.java (#673) --- src/main/java/org/wltea/analyzer/core/AnalyzeContext.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java b/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java index 5bf1ac90..890d9080 100644 --- a/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java +++ b/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java @@ -268,13 +268,13 @@ void outputToResult(){ while(l != null){ this.results.add(l); //字典中无单字,但是词元冲突了,切分出相交词元的前一个词元中的单字 - int innerIndex = index + 1; + /*int innerIndex = index + 1; for (; innerIndex < index + l.getLength(); innerIndex++) { Lexeme innerL = path.peekFirst(); if (innerL != null && innerIndex == innerL.getBegin()) { this.outputSingleCJK(innerIndex - 1); } - } + }*/ //将index移至lexeme后 index = l.getBegin() + l.getLength(); From 904a7493eae275fe636f4fde60b4e896ca9ee0dc Mon Sep 17 00:00:00 2001 From: medcl Date: Mon, 7 Oct 2019 19:01:29 +0800 Subject: [PATCH 05/14] update to 7.4.0 --- pom.xml | 2 +- .../org/elasticsearch/index/analysis/IkTokenizerFactory.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index b734e39f..4737083b 100644 --- a/pom.xml +++ b/pom.xml @@ -12,7 +12,7 @@ 2011 - 7.0.0 + 7.4.0 1.8 ${project.basedir}/src/main/assemblies/plugin.xml analysis-ik diff --git a/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java index abe4fa1d..d2805618 100644 --- a/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java @@ -11,7 +11,7 @@ public class IkTokenizerFactory extends AbstractTokenizerFactory { private Configuration configuration; public IkTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { - super(indexSettings, settings); + super(indexSettings, settings,name); configuration=new Configuration(env,settings); } From 4619effa155056eb67411a87e8a96b5a78cc6d0b Mon Sep 17 00:00:00 2001 From: Howard Date: Thu, 19 Dec 2019 15:31:05 +0800 Subject: [PATCH 06/14] transfer log message from chinese to english (#746) --- src/main/java/org/wltea/analyzer/dic/DictSegment.java | 2 +- src/main/java/org/wltea/analyzer/dic/Dictionary.java | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/wltea/analyzer/dic/DictSegment.java b/src/main/java/org/wltea/analyzer/dic/DictSegment.java index bc330332..9e7b6fe4 100644 --- a/src/main/java/org/wltea/analyzer/dic/DictSegment.java +++ b/src/main/java/org/wltea/analyzer/dic/DictSegment.java @@ -57,7 +57,7 @@ class DictSegment implements Comparable{ DictSegment(Character nodeChar){ if(nodeChar == null){ - throw new IllegalArgumentException("参数为空异常,字符不能为空"); + throw new IllegalArgumentException("node char cannot be empty"); } this.nodeChar = nodeChar; } diff --git a/src/main/java/org/wltea/analyzer/dic/Dictionary.java b/src/main/java/org/wltea/analyzer/dic/Dictionary.java index b61e0381..39212002 100644 --- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java +++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java @@ -294,7 +294,7 @@ private String getDictRoot() { */ public static Dictionary getSingleton() { if (singleton == null) { - throw new IllegalStateException("词典尚未初始化,请先调用initial方法"); + throw new IllegalStateException("ik dict has not been initialized yet, please call initial method first."); } return singleton; } @@ -419,7 +419,7 @@ private void loadRemoteExtDict() { List lists = getRemoteWords(location); // 如果找不到扩展的字典,则忽略 if (lists == null) { - logger.error("[Dict Loading] " + location + "加载失败"); + logger.error("[Dict Loading] " + location + " load failed"); continue; } for (String theWord : lists) { @@ -518,7 +518,7 @@ private void loadStopWordDict() { List lists = getRemoteWords(location); // 如果找不到扩展的字典,则忽略 if (lists == null) { - logger.error("[Dict Loading] " + location + "加载失败"); + logger.error("[Dict Loading] " + location + " load failed"); continue; } for (String theWord : lists) { @@ -562,7 +562,7 @@ private void loadPrepDict() { } void reLoadMainDict() { - logger.info("重新加载词典..."); + logger.info("start to reload ik dict."); // 新开一个实例加载词典,减少加载过程对当前词典使用的影响 Dictionary tmpDict = new Dictionary(configuration); tmpDict.configuration = getSingleton().configuration; @@ -570,7 +570,7 @@ void reLoadMainDict() { tmpDict.loadStopWordDict(); _MainDict = tmpDict._MainDict; _StopWords = tmpDict._StopWords; - logger.info("重新加载词典完毕..."); + logger.info("reload ik dict finished."); } } From 1375ca6d39c53c85542ee4e8c147bfff8131e420 Mon Sep 17 00:00:00 2001 From: medcl Date: Wed, 10 Jun 2020 16:05:01 +0800 Subject: [PATCH 07/14] fix #789 --- src/main/java/org/wltea/analyzer/dic/Dictionary.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/wltea/analyzer/dic/Dictionary.java b/src/main/java/org/wltea/analyzer/dic/Dictionary.java index 39212002..216ff17e 100644 --- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java +++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java @@ -80,7 +80,7 @@ public class Dictionary { */ private Configuration configuration; - private static final Logger logger = ESPluginLoggerFactory.getLogger(Monitor.class.getName()); + private static final Logger logger = ESPluginLoggerFactory.getLogger(Dictionary.class.getName()); private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1); From 5a1b8c8da629a05bd7c16245da5cbe0d3a0dad1c Mon Sep 17 00:00:00 2001 From: Jack Date: Sun, 6 Sep 2020 16:34:40 +0800 Subject: [PATCH 08/14] Read chunked remote words (#817) Fix chunked content could not be read as it will not get content length I see there is an issue #780 and this fix it --- src/main/java/org/wltea/analyzer/dic/Dictionary.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/wltea/analyzer/dic/Dictionary.java b/src/main/java/org/wltea/analyzer/dic/Dictionary.java index 216ff17e..2e72ed02 100644 --- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java +++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java @@ -469,7 +469,7 @@ private static List getRemoteWordsUnprivileged(String location) { } } - if (entity.getContentLength() > 0) { + if (entity.getContentLength() > 0 || entity.isChunked()) { in = new BufferedReader(new InputStreamReader(entity.getContent(), charset)); String line; while ((line = in.readLine()) != null) { From 0fccc038e277be0b47bfa805fb1a9ef03c4cfb19 Mon Sep 17 00:00:00 2001 From: Medcl Date: Wed, 19 May 2021 16:50:12 +0800 Subject: [PATCH 09/14] Create FUNDING.yml --- .github/FUNDING.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/FUNDING.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..cac3a37e --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +patreon: medcl From e0157d5f39d74a6da01d556ade9fbe0109bce399 Mon Sep 17 00:00:00 2001 From: Medcl Date: Wed, 19 May 2021 17:27:04 +0800 Subject: [PATCH 10/14] Update FUNDING.yml --- .github/FUNDING.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index cac3a37e..bc0ef67b 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1 +1,2 @@ patreon: medcl +custom: ["https://www.buymeacoffee.com/medcl", buymeacoffee.com] From 8e36b3240e00622f2523d3afb9a87b5e9445e49b Mon Sep 17 00:00:00 2001 From: Medcl Date: Wed, 19 May 2021 17:27:37 +0800 Subject: [PATCH 11/14] Update FUNDING.yml --- .github/FUNDING.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index bc0ef67b..9c6fb0d2 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1,2 +1,2 @@ patreon: medcl -custom: ["https://www.buymeacoffee.com/medcl", buymeacoffee.com] +custom: ["https://www.buymeacoffee.com/medcl"] From db00bd2b528cf762110edf44d8b3b8d59b1da94e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BB=BB=E4=B8=B0=E7=A1=95?= <274962992@qq.com> Date: Wed, 2 Jun 2021 15:19:35 +0800 Subject: [PATCH 12/14] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E5=88=86=E8=AF=8D?= =?UTF-8?q?=E4=BB=8Emysql=E5=90=8C=E6=AD=A5=E6=89=A9=E5=B1=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config/IKAnalyzer.cfg.xml | 8 + mysql.extend.md | 75 ++++++++ pom.xml | 7 +- src/main/assemblies/plugin.xml | 1 + .../org/wltea/analyzer/cfg/Configuration.java | 8 + .../wltea/analyzer/db/DBConfigProperties.java | 58 ++++++ .../wltea/analyzer/db/DataSourceFactory.java | 43 +++++ .../java/org/wltea/analyzer/db/JdbcUtil.java | 92 +++++++++ .../java/org/wltea/analyzer/db/Lexicon.java | 51 +++++ .../analyzer/db/core/ConnectionManager.java | 151 +++++++++++++++ .../analyzer/db/core/ListRecordMapper.java | 25 +++ .../wltea/analyzer/db/core/MapRowMapper.java | 22 +++ .../org/wltea/analyzer/db/core/Record.java | 22 +++ .../db/core/RecordAdapterForResultSet.java | 177 ++++++++++++++++++ .../wltea/analyzer/db/core/RecordMapper.java | 10 + .../java/org/wltea/analyzer/db/core/Row.java | 59 ++++++ .../org/wltea/analyzer/db/core/RowMapper.java | 11 ++ .../org/wltea/analyzer/dic/Dictionary.java | 154 ++++++++++++++- .../org/wltea/analyzer/dic/MysqlMonitor.java | 13 ++ src/main/resources/plugin-security.policy | 2 + 20 files changed, 987 insertions(+), 2 deletions(-) create mode 100644 mysql.extend.md create mode 100644 src/main/java/org/wltea/analyzer/db/DBConfigProperties.java create mode 100644 src/main/java/org/wltea/analyzer/db/DataSourceFactory.java create mode 100644 src/main/java/org/wltea/analyzer/db/JdbcUtil.java create mode 100644 src/main/java/org/wltea/analyzer/db/Lexicon.java create mode 100644 src/main/java/org/wltea/analyzer/db/core/ConnectionManager.java create mode 100644 src/main/java/org/wltea/analyzer/db/core/ListRecordMapper.java create mode 100644 src/main/java/org/wltea/analyzer/db/core/MapRowMapper.java create mode 100644 src/main/java/org/wltea/analyzer/db/core/Record.java create mode 100644 src/main/java/org/wltea/analyzer/db/core/RecordAdapterForResultSet.java create mode 100644 src/main/java/org/wltea/analyzer/db/core/RecordMapper.java create mode 100644 src/main/java/org/wltea/analyzer/db/core/Row.java create mode 100644 src/main/java/org/wltea/analyzer/db/core/RowMapper.java create mode 100644 src/main/java/org/wltea/analyzer/dic/MysqlMonitor.java diff --git a/config/IKAnalyzer.cfg.xml b/config/IKAnalyzer.cfg.xml index fe69bb20..0c39dac6 100644 --- a/config/IKAnalyzer.cfg.xml +++ b/config/IKAnalyzer.cfg.xml @@ -10,4 +10,12 @@ + + + + root + + root + + 10 diff --git a/mysql.extend.md b/mysql.extend.md new file mode 100644 index 00000000..2d6b3688 --- /dev/null +++ b/mysql.extend.md @@ -0,0 +1,75 @@ +IK Analysis 扩展词新增mysql同步来源 +============================= + +- 支持启动全量加载扩展词 +- 支持热更新扩展词 + +> mysql 扩展词表结构 + + +```mysql + +CREATE TABLE `es_lexicon` ( + `id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '词库id', + `create_date` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + `modify_date` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '修改时间', + `lexicon_text` varchar(40) NOT NULL COMMENT '词条关键词', + `lexicon_type` tinyint(1) NOT NULL DEFAULT '0' COMMENT '0扩展词库 1停用词库', + `lexicon_status` tinyint(1) NOT NULL DEFAULT '0' COMMENT '词条状态 0正常 1暂停使用', + `del_flag` tinyint(1) NOT NULL DEFAULT '0' COMMENT '作废标志 0正常 1作废', + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='ES远程扩展词库表' +``` + + + +```IKAnalyzer.cfg.xml``` + + +```xml + + + + IK Analyzer 扩展配置 + + + + + + + + + + + + root + + 123456 + + 10 + + +``` + +> 自行打包 放入elasticsearch plugins 目录即可 + +启动日志如下: + +``` +[2021-06-02T15:16:07,593][INFO ][o.w.a.d.Dictionary ] ======start mysql to reload ik dict.====== +[2021-06-02T15:16:07,828][INFO ][o.w.a.d.Dictionary ] last update mysql ext dic time :2021-05-27T14:36:05.000+0800,fill count:4843 ,disable count:0 +[2021-06-02T15:16:07,837][INFO ][o.w.a.d.Dictionary ] the last reload stop word not found, the last update time :null +[2021-06-02T15:16:07,838][INFO ][o.w.a.d.Dictionary ] last update mysql stop word time :null,fill count:0 ,disable count:0 +[2021-06-02T15:16:07,838][INFO ][o.w.a.d.Dictionary ] ======reload mysql ik dict finished.====== +[2021-06-02T15:16:17,587][INFO ][o.w.a.d.Dictionary ] ======start mysql to reload ik dict.====== +[2021-06-02T15:16:17,615][INFO ][o.w.a.d.Dictionary ] last update mysql ext dic time :2021-06-01T09:44:50.000+0800,fill count:4842 ,disable count:0 +[2021-06-02T15:16:17,623][INFO ][o.w.a.d.Dictionary ] the last reload stop word not found, the last update time :null +[2021-06-02T15:16:17,624][INFO ][o.w.a.d.Dictionary ] last update mysql stop word time :null,fill count:0 ,disable count:0 +[2021-06-02T15:16:17,624][INFO ][o.w.a.d.Dictionary ] ======reload mysql ik dict finished.====== +[2021-06-02T15:16:27,596][INFO ][o.w.a.d.Dictionary ] ======start mysql to reload ik dict.====== +[2021-06-02T15:16:27,602][INFO ][o.w.a.d.Dictionary ] the latest update record was not found, the last update time :2021-06-01T09:44:50.000+0800 +[2021-06-02T15:16:27,602][INFO ][o.w.a.d.Dictionary ] last update mysql ext dic time :2021-06-01T09:44:50.000+0800,fill count:0 ,disable count:0 +[2021-06-02T15:16:27,608][INFO ][o.w.a.d.Dictionary ] the last reload stop word not found, the last update time :null +[2021-06-02T15:16:27,608][INFO ][o.w.a.d.Dictionary ] last update mysql stop word time :null,fill count:0 ,disable count:0 +[2021-06-02T15:16:27,608][INFO ][o.w.a.d.Dictionary ] ======reload mysql ik dict finished.====== +``` \ No newline at end of file diff --git a/pom.xml b/pom.xml index 4737083b..92bbb311 100644 --- a/pom.xml +++ b/pom.xml @@ -12,7 +12,7 @@ 2011 - 7.4.0 + 7.13.0 1.8 ${project.basedir}/src/main/assemblies/plugin.xml analysis-ik @@ -95,6 +95,11 @@ log4j-api 2.3 + + mysql + mysql-connector-java + 5.1.49 + org.hamcrest diff --git a/src/main/assemblies/plugin.xml b/src/main/assemblies/plugin.xml index 8b6fa594..9bc850c8 100644 --- a/src/main/assemblies/plugin.xml +++ b/src/main/assemblies/plugin.xml @@ -39,6 +39,7 @@ true org.apache.httpcomponents:httpclient + mysql:mysql-connector-java diff --git a/src/main/java/org/wltea/analyzer/cfg/Configuration.java b/src/main/java/org/wltea/analyzer/cfg/Configuration.java index dadd0f20..78e9f5d4 100644 --- a/src/main/java/org/wltea/analyzer/cfg/Configuration.java +++ b/src/main/java/org/wltea/analyzer/cfg/Configuration.java @@ -24,6 +24,9 @@ public class Configuration { //是否启用远程词典加载 private boolean enableRemoteDict=false; + //是否启用远程词典加载 + private boolean enableMysqlDict=false; + //是否启用小写处理 private boolean enableLowercase=true; @@ -36,6 +39,7 @@ public Configuration(Environment env,Settings settings) { this.useSmart = settings.get("use_smart", "false").equals("true"); this.enableLowercase = settings.get("enable_lowercase", "true").equals("true"); this.enableRemoteDict = settings.get("enable_remote_dict", "true").equals("true"); + this.enableMysqlDict = settings.get("enable_mysql_dict", "true").equals("true"); Dictionary.initial(this); @@ -69,6 +73,10 @@ public boolean isEnableRemoteDict() { return enableRemoteDict; } + public boolean isEnableMysqlDict() { + return enableMysqlDict; + } + public boolean isEnableLowercase() { return enableLowercase; } diff --git a/src/main/java/org/wltea/analyzer/db/DBConfigProperties.java b/src/main/java/org/wltea/analyzer/db/DBConfigProperties.java new file mode 100644 index 00000000..eabb6296 --- /dev/null +++ b/src/main/java/org/wltea/analyzer/db/DBConfigProperties.java @@ -0,0 +1,58 @@ +package org.wltea.analyzer.db; + +import java.io.Serializable; + +/** + * @author fsren + * @date 2021-05-25 + */ +public class DBConfigProperties implements Serializable { + + private static final long serialVersionUID = 688310733642302993L; + private String dbUrl; + private String user; + private String password; + private Integer reloadInterval; + + public String getDbUrl() { + return dbUrl; + } + + public void setDbUrl(String dbUrl) { + this.dbUrl = dbUrl; + } + + public String getUser() { + return user; + } + + public void setUser(String user) { + this.user = user; + } + + public String getPassword() { + return password; + } + + public void setPassword(String password) { + this.password = password; + } + + public Integer getReloadInterval() { + return reloadInterval; + } + + public void setReloadInterval(Integer reloadInterval) { + this.reloadInterval = reloadInterval; + } + + @Override + public String toString() { + return "DBConfigProperties{" + + "dbUrl='" + dbUrl + '\'' + + ", user='" + user + '\'' + + ", password='" + password + '\'' + + ", reloadInterval=" + reloadInterval + + '}'; + } +} diff --git a/src/main/java/org/wltea/analyzer/db/DataSourceFactory.java b/src/main/java/org/wltea/analyzer/db/DataSourceFactory.java new file mode 100644 index 00000000..65f9d032 --- /dev/null +++ b/src/main/java/org/wltea/analyzer/db/DataSourceFactory.java @@ -0,0 +1,43 @@ +package org.wltea.analyzer.db; + +import com.mysql.jdbc.jdbc2.optional.MysqlConnectionPoolDataSource; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.SpecialPermission; +import org.wltea.analyzer.help.ESPluginLoggerFactory; + +import javax.sql.DataSource; +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.sql.SQLException; + +/** + * @author fsren + * @date 2021-05-25 + */ +public class DataSourceFactory { + + + private static final Logger logger = ESPluginLoggerFactory.getLogger(DataSourceFactory.class.getName()); + + + public static DataSource getDataSource(DBConfigProperties configProperties) { + + SpecialPermission.check(); + return AccessController.doPrivileged((PrivilegedAction) () -> { + logger.info("load datasource start"); + MysqlConnectionPoolDataSource dataSource = new MysqlConnectionPoolDataSource(); + dataSource.setURL(configProperties.getDbUrl()); + dataSource.setUser(configProperties.getUser()); + dataSource.setPassword(configProperties.getPassword()); + dataSource.setAllowMultiQueries(true); + try { + dataSource.setSocketTimeout(1000); + } catch (SQLException ignore) { + } + logger.info("load datasource end"); + return dataSource; + }); + } + + +} diff --git a/src/main/java/org/wltea/analyzer/db/JdbcUtil.java b/src/main/java/org/wltea/analyzer/db/JdbcUtil.java new file mode 100644 index 00000000..7cea9676 --- /dev/null +++ b/src/main/java/org/wltea/analyzer/db/JdbcUtil.java @@ -0,0 +1,92 @@ +package org.wltea.analyzer.db; + +import org.wltea.analyzer.db.core.*; + +import javax.sql.DataSource; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.List; + +/** + * @author fsren + * @date 2021-05-26 + */ +public class JdbcUtil { + + private final ConnectionManager connManager; + + + /** + * 创建JdbcUtils + * + * @param dataSource 数据源 + */ + public JdbcUtil(DataSource dataSource) { + connManager = new ConnectionManager(dataSource); + } + + /** + * 创建语句 + * + * @param conn 连接 + * @param sql sql语句 + * @param params sql参数 + * @return 创建的PreparedStatement对象 + * @throws SQLException 来自JDBC的异常 + */ + private PreparedStatement createPreparedStatement(Connection conn, String sql, Object... params) throws SQLException { + PreparedStatement stmt = conn.prepareStatement(sql); + for (int i = 0; i < params.length; ++i) { + stmt.setObject(i + 1, params[i]); + } + return stmt; + } + + /** + * 查询数据库并转换结果集。 + * 用户可自定义结果集转换器。 + * 用户也可使用预定义的结果集转换器。 + * + * @param sql sql语句 + * @param recordMapper 结果集转换器 + * @param params sql参数 + * @param resultSetMapper返回的结果类型 + * @return 成功则返回转换结果,失败则抛出DbException,结果为空则返回空列表 + * @see RecordMapper + * @see ListRecordMapper + */ + public T query(String sql, RecordMapper recordMapper, Object... params) { + ResultSet rs = null; + PreparedStatement stmt = null; + Connection conn = null; + try { + conn = connManager.getConnection(); + stmt = createPreparedStatement(conn, sql, params); + rs = stmt.executeQuery(); + return recordMapper.map(new RecordAdapterForResultSet(rs)); + } catch (Exception e) { + throw new RuntimeException(e.getMessage(), e); + } finally { + connManager.close(conn, stmt, rs); + } + } + + /** + * 查询数据库,对结果集的每一行进行转换,然后将所有行封装成列表。 + * 用户可自定义行转换器。 + * 用户也可使用预定义的行转换器。 + * + * @param sql sql语句 + * @param rowMapper 行转换器 + * @param params sql参数 + * @param rowMapper返回的结果类型 + * @return 成功则返回结果列表,失败则抛出DbException,结果为空则返回空列表 + * @see RowMapper + * @see MapRowMapper + */ + public List queryList(String sql, RowMapper rowMapper, Object... params) { + return query(sql, new ListRecordMapper<>(rowMapper), params); + } +} diff --git a/src/main/java/org/wltea/analyzer/db/Lexicon.java b/src/main/java/org/wltea/analyzer/db/Lexicon.java new file mode 100644 index 00000000..6b0233a6 --- /dev/null +++ b/src/main/java/org/wltea/analyzer/db/Lexicon.java @@ -0,0 +1,51 @@ +package org.wltea.analyzer.db; + +import java.io.Serializable; +import java.sql.Timestamp; + +/** + * @author fsren + * @date 2021-05-26 + */ +public class Lexicon implements Serializable { + + private static final long serialVersionUID = 7628519160135272308L; + /** + * 扩展词 + */ + private String lexiconText; + + /** + * 加载状态 true 代表加载, false 代表屏蔽 + */ + private Boolean isFill; + + /** + * 最后更新时间 + */ + private Timestamp modifyDate; + + public String getLexiconText() { + return lexiconText; + } + + public void setLexiconText(String lexiconText) { + this.lexiconText = lexiconText; + } + + public Boolean getFill() { + return isFill; + } + + public void setFill(Boolean fill) { + isFill = fill; + } + + public Timestamp getModifyDate() { + return modifyDate; + } + + public void setModifyDate(Timestamp modifyDate) { + this.modifyDate = modifyDate; + } +} diff --git a/src/main/java/org/wltea/analyzer/db/core/ConnectionManager.java b/src/main/java/org/wltea/analyzer/db/core/ConnectionManager.java new file mode 100644 index 00000000..0071b04b --- /dev/null +++ b/src/main/java/org/wltea/analyzer/db/core/ConnectionManager.java @@ -0,0 +1,151 @@ +package org.wltea.analyzer.db.core; + +import org.elasticsearch.SpecialPermission; + +import javax.sql.DataSource; +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; + +/** + * 连接管理器 + * 封装了线程安全的数据库连接 + * + * @author fsren + * @date 2021-05-26 + */ +public class ConnectionManager { + + + private final DataSource dataSource; + private final ThreadLocal connHolder = new ThreadLocal<>(); + + public ConnectionManager(DataSource dataSource) { + this.dataSource = dataSource; + } + + public Connection getConnection() { + SpecialPermission.check(); + return AccessController.doPrivileged((PrivilegedAction) () -> { + try { + Connection conn = connHolder.get(); + if (conn == null) { + conn = dataSource.getConnection(); + connHolder.set(conn); + } + return conn; + } catch (SQLException e) { + throw new RuntimeException("An error occurred while creating a database connection.", e); + } + }); + } + + public void close(Connection conn, Statement stmt) { + SpecialPermission.check(); + AccessController.doPrivileged((PrivilegedAction) () -> { + if (stmt != null) { + try { + stmt.close(); + } catch (SQLException ignored) { + } + } + if (conn != null) { + try { + if (conn.getAutoCommit()) { + conn.close(); + connHolder.remove(); + } + } catch (SQLException ignored) { + } + } + return null; + }); + + } + + public void close(Connection conn, Statement stmt, ResultSet rs) { + SpecialPermission.check(); + AccessController.doPrivileged((PrivilegedAction) () -> { + if (rs != null) { + try { + rs.close(); + } catch (SQLException ignored) { + } + } + return null; + }); + close(conn, stmt); + } + + public void startTransaction() { + SpecialPermission.check(); + AccessController.doPrivileged((PrivilegedAction) () -> { + try { + Connection conn = connHolder.get(); + if (conn != null) { + conn.close(); + connHolder.remove(); + } + conn = dataSource.getConnection(); + conn.setAutoCommit(false); + connHolder.set(conn); + } catch (SQLException e) { + throw new RuntimeException("An error occurred while starting transaction.", e); + } + return null; + }); + + } + + public void commit() { + SpecialPermission.check(); + AccessController.doPrivileged((PrivilegedAction) () -> { + Connection conn = connHolder.get(); + if (conn != null) { + try { + conn.commit(); + conn.close(); + connHolder.remove(); + } catch (SQLException e) { + throw new RuntimeException("An error occurred while committing transaction.", e); + } + } + return null; + }); + + } + + public void rollback() { + SpecialPermission.check(); + AccessController.doPrivileged((PrivilegedAction) () -> { + Connection conn = connHolder.get(); + if (conn != null) { + try { + conn.rollback(); + conn.close(); + connHolder.remove(); + } catch (SQLException e) { + throw new RuntimeException("An error occurred while committing transaction.", e); + } + } + return null; + }); + + } + + public boolean inTransaction() { + SpecialPermission.check(); + return AccessController.doPrivileged((PrivilegedAction) () -> { + Connection conn = connHolder.get(); + try { + return conn != null && !conn.getAutoCommit(); + } catch (SQLException e) { + throw new RuntimeException("An error occurred while getting auto commit.", e); + } + }); + + } +} diff --git a/src/main/java/org/wltea/analyzer/db/core/ListRecordMapper.java b/src/main/java/org/wltea/analyzer/db/core/ListRecordMapper.java new file mode 100644 index 00000000..e10103d0 --- /dev/null +++ b/src/main/java/org/wltea/analyzer/db/core/ListRecordMapper.java @@ -0,0 +1,25 @@ +package org.wltea.analyzer.db.core; + +import java.util.ArrayList; +import java.util.List; + +/** + * @author fsren + * @date 2021-05-26 + */ +public class ListRecordMapper implements RecordMapper> { + private final RowMapper rowMapper; + + public ListRecordMapper(RowMapper rowMapper) { + this.rowMapper = rowMapper; + } + + @Override + public List map(Record record) { + List result = new ArrayList<>(); + while (record.next()) { + result.add(rowMapper.map(record.getCurrentRow())); + } + return result; + } +} \ No newline at end of file diff --git a/src/main/java/org/wltea/analyzer/db/core/MapRowMapper.java b/src/main/java/org/wltea/analyzer/db/core/MapRowMapper.java new file mode 100644 index 00000000..b58b02ec --- /dev/null +++ b/src/main/java/org/wltea/analyzer/db/core/MapRowMapper.java @@ -0,0 +1,22 @@ +package org.wltea.analyzer.db.core; + +import java.util.Hashtable; +import java.util.Map; + +/** + * @author fsren + * @date 2021-05-26 + */ +public class MapRowMapper implements RowMapper> { + @Override + public Map map(Row row) { + Map map = new Hashtable<>(); + int count = row.getColumnCount(); + for (int i = 1; i <= count; i++) { + String key = row.getColumnLabel(i); + Object value = row.getObject(i); + map.put(key, value); + } + return map; + } +} diff --git a/src/main/java/org/wltea/analyzer/db/core/Record.java b/src/main/java/org/wltea/analyzer/db/core/Record.java new file mode 100644 index 00000000..18f3aad3 --- /dev/null +++ b/src/main/java/org/wltea/analyzer/db/core/Record.java @@ -0,0 +1,22 @@ +package org.wltea.analyzer.db.core; + +/** + * @author fsren + * @date 2021-05-26 + */ +public interface Record { + + /** + * 获取当前行 + * + * @return 当前行 + */ + Row getCurrentRow(); + + /** + * 移动到下一行 + * + * @return 若当前已到达最后一行,则返回false,否则返回true + */ + boolean next(); +} diff --git a/src/main/java/org/wltea/analyzer/db/core/RecordAdapterForResultSet.java b/src/main/java/org/wltea/analyzer/db/core/RecordAdapterForResultSet.java new file mode 100644 index 00000000..afdbbbb6 --- /dev/null +++ b/src/main/java/org/wltea/analyzer/db/core/RecordAdapterForResultSet.java @@ -0,0 +1,177 @@ +package org.wltea.analyzer.db.core; + +import java.sql.*; + +/** + * 将java.sql.ResultSet转换成自定义的Record,封装底层实现细节 + * + * @author fsren + * @date 2021-05-26 + */ +public class RecordAdapterForResultSet implements Record, Row { + private final ResultSet rs; + + public RecordAdapterForResultSet(ResultSet resultSet) { + this.rs = resultSet; + } + + @Override + public Object getObject(String columnLabel) { + try { + return rs.getObject(columnLabel); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public Object getObject(int columnIndex) { + try { + return rs.getObject(columnIndex); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public int getInt(String columnLabel) { + try { + return rs.getInt(columnLabel); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public int getInt(int columnIndex) { + try { + return rs.getInt(columnIndex); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public String getString(String columnLabel) { + try { + return rs.getString(columnLabel); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public String getString(int columnIndex) { + try { + return rs.getString(columnIndex); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public double getDouble(String columnLabel) { + try { + return rs.getDouble(columnLabel); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public double getDouble(int columnIndex) { + try { + return rs.getDouble(columnIndex); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public Date getDate(int columnIndex) { + try { + return rs.getDate(columnIndex); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public Date getDate(String columnLabel) { + try { + return rs.getDate(columnLabel); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public Boolean getBoolean(int columnIndex) { + try { + return rs.getBoolean(columnIndex); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public Boolean getBoolean(String columnLabel) { + try { + return rs.getBoolean(columnLabel); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public Timestamp getTimestamp(int columnIndex) { + try { + return rs.getTimestamp(columnIndex); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public Timestamp getTimestamp(String columnLabel) { + try { + return rs.getTimestamp(columnLabel); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public int getColumnCount() { + try { + ResultSetMetaData metaData = rs.getMetaData(); + return metaData.getColumnCount(); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public String getColumnLabel(int index) { + try { + ResultSetMetaData metaData = rs.getMetaData(); + return metaData.getColumnLabel(index); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } + + @Override + public Row getCurrentRow() { + return this; + } + + @Override + public boolean next() { + try { + return rs.next(); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage(), e); + } + } +} \ No newline at end of file diff --git a/src/main/java/org/wltea/analyzer/db/core/RecordMapper.java b/src/main/java/org/wltea/analyzer/db/core/RecordMapper.java new file mode 100644 index 00000000..975af800 --- /dev/null +++ b/src/main/java/org/wltea/analyzer/db/core/RecordMapper.java @@ -0,0 +1,10 @@ +package org.wltea.analyzer.db.core; + +/** + * @author fsren + * @date 2021-05-26 + */ +public interface RecordMapper { + + T map(Record record); +} diff --git a/src/main/java/org/wltea/analyzer/db/core/Row.java b/src/main/java/org/wltea/analyzer/db/core/Row.java new file mode 100644 index 00000000..73d06431 --- /dev/null +++ b/src/main/java/org/wltea/analyzer/db/core/Row.java @@ -0,0 +1,59 @@ +package org.wltea.analyzer.db.core; + +import java.sql.Date; +import java.sql.Timestamp; + +/** + * * 数据行:封装了结果集的一行数据。 + * * 数据行由若干列组成,每列都是一个值。 + * * Row通过Record的getCurrentRow方法获取。 + * * Row中包含了一系列getXXX方法用于获取列中的值。 + * + * @author fsren + * @date 2021-05-26 + */ +public interface Row { + + Object getObject(String columnLabel); + + Object getObject(int columnIndex); + + int getInt(String columnLabel); + + int getInt(int columnIndex); + + String getString(String columnLabel); + + String getString(int columnIndex); + + double getDouble(String columnLabel); + + double getDouble(int columnIndex); + + Date getDate(int columnIndex); + + Date getDate(String columnLabel); + + Boolean getBoolean(int columnIndex); + + Boolean getBoolean(String columnLabel); + + Timestamp getTimestamp(int columnIndex); + + Timestamp getTimestamp(String columnLabel); + + /** + * 获取列数 + * + * @return 列数 + */ + int getColumnCount(); + + /** + * 获取列标签 + * + * @param index 列索引(从1开始) + * @return 列标签 + */ + String getColumnLabel(int index); +} diff --git a/src/main/java/org/wltea/analyzer/db/core/RowMapper.java b/src/main/java/org/wltea/analyzer/db/core/RowMapper.java new file mode 100644 index 00000000..4dc2d962 --- /dev/null +++ b/src/main/java/org/wltea/analyzer/db/core/RowMapper.java @@ -0,0 +1,11 @@ +package org.wltea.analyzer.db.core; + +/** + * 行转换器接口 + * + * @author fsren + * @date 2021-05-26 + */ +public interface RowMapper { + T map(Row row); +} diff --git a/src/main/java/org/wltea/analyzer/dic/Dictionary.java b/src/main/java/org/wltea/analyzer/dic/Dictionary.java index 2e72ed02..6620a7b6 100644 --- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java +++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java @@ -38,10 +38,12 @@ import java.nio.file.SimpleFileVisitor; import java.security.AccessController; import java.security.PrivilegedAction; +import java.sql.Timestamp; import java.util.*; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.http.Header; import org.apache.http.HttpEntity; @@ -56,6 +58,10 @@ import org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin; import org.wltea.analyzer.cfg.Configuration; import org.apache.logging.log4j.Logger; +import org.wltea.analyzer.db.DBConfigProperties; +import org.wltea.analyzer.db.DataSourceFactory; +import org.wltea.analyzer.db.JdbcUtil; +import org.wltea.analyzer.db.Lexicon; import org.wltea.analyzer.help.ESPluginLoggerFactory; @@ -82,7 +88,7 @@ public class Dictionary { private static final Logger logger = ESPluginLoggerFactory.getLogger(Dictionary.class.getName()); - private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1); + private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(2); private static final String PATH_DIC_MAIN = "main.dic"; private static final String PATH_DIC_SURNAME = "surname.dic"; @@ -97,9 +103,19 @@ public class Dictionary { private final static String EXT_STOP = "ext_stopwords"; private final static String REMOTE_EXT_STOP = "remote_ext_stopwords"; + private final static String DB_URL = "db_url"; + private final static String DB_USER = "db_user"; + private final static String DB_PASSWORD = "db_password"; + private final static String DB_RELOAD_INTERVAL = "db_reload_interval"; + private Path conf_dir; private Properties props; + private static DBConfigProperties configProperties; + private static JdbcUtil jdbcUtil; + private Timestamp lastReloadDate; + private Timestamp lastReloadStopWordDate; + private Dictionary(Configuration cfg) { this.configuration = cfg; this.props = new Properties(); @@ -128,6 +144,7 @@ private Dictionary(Configuration cfg) { logger.error("ik-analyzer", e); } } + loadJdbcConfig(); } private String getProperty(String key){ @@ -136,6 +153,41 @@ private String getProperty(String key){ } return null; } + private void loadJdbcConfig(){ + + String dbUrl = getProperty(DB_URL); + if(dbUrl == null || dbUrl.trim().equals("")){ + logger.info("db synchronization is not turned on, ignore detection"); + return; + } + logger.info("init db config start"); + configProperties = new DBConfigProperties(); + configProperties.setDbUrl(checkDefault(getProperty(DB_URL),"jdbc:mysql://127.0.0.1:3306/post_bar")); + configProperties.setUser(checkDefault(getProperty(DB_USER),"root")); + configProperties.setPassword(checkDefault(getProperty(DB_PASSWORD),"root")); + configProperties.setReloadInterval(checkDefault(getProperty(DB_RELOAD_INTERVAL),60)); + jdbcUtil = new JdbcUtil(DataSourceFactory.getDataSource(configProperties)); + logger.info("db config {}",configProperties.toString()); + logger.info("init db config end"); + } + private String checkDefault(String value,String defaultValue){ + if(value == null || "".equals(value.trim())){ + return defaultValue; + } + return value.trim(); + } + + private Integer checkDefault(String value,int defaultValue){ + if(value == null || "".equals(value.trim())){ + return defaultValue; + } + value = value.trim(); + try { + return Integer.parseInt(value); + }catch(Exception e){ + return defaultValue; + } + } /** * 词典初始化 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化 * 只有当Dictionary类被实际调用时,才会开始载入词典, 这将延长首次分词操作的时间 该方法提供了一个在应用加载阶段就初始化字典的手段 @@ -155,6 +207,12 @@ public static synchronized void initial(Configuration cfg) { singleton.loadPrepDict(); singleton.loadStopWordDict(); + //启动线程 + if(cfg.isEnableMysqlDict()){ + // 10 秒是初始延迟可以修改的 ReloadInterval是间隔时间 单位秒 + pool.scheduleAtFixedRate(new MysqlMonitor(), 10, configProperties.getReloadInterval(), TimeUnit.SECONDS); + } + if(cfg.isEnableRemoteDict()){ // 建立监控线程 for (String location : singleton.getRemoteExtDictionarys()) { @@ -573,4 +631,98 @@ void reLoadMainDict() { logger.info("reload ik dict finished."); } + void reLoadMysqlMainDict() { + if(jdbcUtil == null){ + logger.info("mysql parameters are not configured, mysql synchronization is ignored"); + return; + } + logger.info("======start mysql to reload ik dict.======"); + List mainDictList = loadMySQLExtDict(); + + AtomicInteger mainDictFillSegmentCount = new AtomicInteger(); + AtomicInteger mainDictDisableSegmentCount = new AtomicInteger(); + mainDictList.forEach(lexicon -> { + if(lexicon.getFill()){ + singleton._MainDict.fillSegment(lexicon.getLexiconText().trim().toLowerCase().toCharArray()); + mainDictFillSegmentCount.getAndIncrement(); + }else { + singleton._MainDict.disableSegment(lexicon.getLexiconText().trim().toLowerCase().toCharArray()); + mainDictDisableSegmentCount.getAndIncrement(); + } + }); + logger.info("last update mysql ext dic time :{},fill count:{} ,disable count:{} " ,singleton.lastReloadDate,mainDictFillSegmentCount,mainDictDisableSegmentCount); + + List stopWordDictList = loadMySQLStopWordDict(); + AtomicInteger stopWordDictFillSegmentCount = new AtomicInteger(); + AtomicInteger stopWordDictDisableSegmentCount = new AtomicInteger(); + stopWordDictList.forEach(lexicon -> { + if(lexicon.getFill()){ + singleton._StopWords.fillSegment(lexicon.getLexiconText().trim().toLowerCase().toCharArray()); + stopWordDictFillSegmentCount.getAndIncrement(); + }else { + singleton._StopWords.disableSegment(lexicon.getLexiconText().trim().toLowerCase().toCharArray()); + stopWordDictDisableSegmentCount.getAndIncrement(); + } + }); + logger.info("last update mysql stop word time :{},fill count:{} ,disable count:{} " ,singleton.lastReloadStopWordDate,stopWordDictFillSegmentCount,stopWordDictDisableSegmentCount); + logger.info("======reload mysql ik dict finished.======"); + } + /** + * 从MySql中加载动态词库 + */ + private List loadMySQLExtDict(){ + + String commandSql = "SELECT lexicon_text ,modify_date,lexicon_status,del_flag FROM es_lexicon WHERE lexicon_type = 0 ORDER BY modify_date desc"; + LinkedList params = new LinkedList<>(); + if(singleton.lastReloadDate != null){ + commandSql = "SELECT lexicon_text ,modify_date,lexicon_status,del_flag FROM es_lexicon WHERE lexicon_type = 0 AND modify_date > ? ORDER BY modify_date ASC"; + params.add(singleton.lastReloadDate); + } + List lexiconList = loadLexicon(commandSql,params.toArray()); + if(lexiconList == null || lexiconList.size() == 0){ + logger.info("the latest update record was not found, the last update time :{} " ,singleton.lastReloadDate); + return Collections.emptyList(); + } + singleton.lastReloadDate = lexiconList.get(lexiconList.size() - 1).getModifyDate(); + return lexiconList; + + } + + + /** + * 从MySql中加载远程停用词库 + */ + private List loadMySQLStopWordDict(){ + + String commandSql = "SELECT lexicon_text ,modify_date,lexicon_status,del_flag FROM es_lexicon WHERE lexicon_type = 1 ORDER BY modify_date ASC"; + LinkedList params = new LinkedList<>(); + if(singleton.lastReloadStopWordDate != null){ + commandSql = "SELECT lexicon_text ,modify_date,lexicon_status,del_flag FROM es_lexicon WHERE lexicon_type = 1 AND modify_date > ? ORDER BY modify_date ASC"; + params.add(singleton.lastReloadStopWordDate); + } + List lexiconList = loadLexicon(commandSql,params.toArray()); + if(lexiconList == null || lexiconList.size() == 0){ + logger.info("the last reload stop word not found, the last update time :{} " ,singleton.lastReloadStopWordDate); + return Collections.emptyList(); + } + singleton.lastReloadStopWordDate = lexiconList.get(lexiconList.size() - 1).getModifyDate(); + return lexiconList; + } + + private List loadLexicon(String commandSql,Object[] param){ + return jdbcUtil.queryList(commandSql, row -> { + String lexiconText = row.getString("lexicon_text"); + Timestamp modifyDate = row.getTimestamp("modify_date"); + //词条状态 0正常 1暂停使用 + int status = row.getInt("lexicon_status"); + //删除状态 0正常 1暂停使用 + int deleteStatus = row.getInt("del_flag"); + Lexicon lexicon = new Lexicon(); + lexicon.setLexiconText(lexiconText); + lexicon.setModifyDate(modifyDate); + lexicon.setFill(status == 0 && deleteStatus == 0); + return lexicon; + },param); + } + } diff --git a/src/main/java/org/wltea/analyzer/dic/MysqlMonitor.java b/src/main/java/org/wltea/analyzer/dic/MysqlMonitor.java new file mode 100644 index 00000000..5cdbba58 --- /dev/null +++ b/src/main/java/org/wltea/analyzer/dic/MysqlMonitor.java @@ -0,0 +1,13 @@ +package org.wltea.analyzer.dic; + +/** + * @author fsren + * @date 2021-05-25 + */ +public class MysqlMonitor implements Runnable{ + + @Override + public void run() { + Dictionary.getSingleton().reLoadMysqlMainDict(); + } +} diff --git a/src/main/resources/plugin-security.policy b/src/main/resources/plugin-security.policy index 55d759a3..c354562c 100644 --- a/src/main/resources/plugin-security.policy +++ b/src/main/resources/plugin-security.policy @@ -1,4 +1,6 @@ grant { // needed because of the hot reload functionality + permission java.lang.RuntimePermission "setContextClassLoader","getClassLoader"; + permission java.lang.reflect.ReflectPermission "suppressAccessChecks"; permission java.net.SocketPermission "*", "connect,resolve"; }; \ No newline at end of file From 7eb3fd1438cd38a39c2dae714778d53e557b4c80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BB=BB=E4=B8=B0=E7=A1=95?= <274962992@qq.com> Date: Wed, 2 Jun 2021 15:25:49 +0800 Subject: [PATCH 13/14] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E5=88=86=E8=AF=8D?= =?UTF-8?q?=E4=BB=8Emysql=E5=90=8C=E6=AD=A5=E6=89=A9=E5=B1=95=206.x=20?= =?UTF-8?q?=E5=88=86=E6=94=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 92bbb311..7ab39feb 100644 --- a/pom.xml +++ b/pom.xml @@ -12,7 +12,7 @@ 2011 - 7.13.0 + 6.2.3 1.8 ${project.basedir}/src/main/assemblies/plugin.xml analysis-ik From ef1c595e2ac5668c1feff1d900206824716f3376 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BB=BB=E4=B8=B0=E7=A1=95?= <274962992@qq.com> Date: Wed, 2 Jun 2021 15:35:03 +0800 Subject: [PATCH 14/14] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E5=88=86=E8=AF=8D?= =?UTF-8?q?=E4=BB=8Emysql=E5=90=8C=E6=AD=A5=E6=89=A9=E5=B1=95=206.x=20?= =?UTF-8?q?=E5=88=86=E6=94=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../org/elasticsearch/index/analysis/IkTokenizerFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java index d2805618..51773ddd 100644 --- a/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java @@ -11,7 +11,7 @@ public class IkTokenizerFactory extends AbstractTokenizerFactory { private Configuration configuration; public IkTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { - super(indexSettings, settings,name); + super(indexSettings, name,settings); configuration=new Configuration(env,settings); }