diff --git a/.github/workflows/postgress12.yml b/.github/workflows/postgress12.yml deleted file mode 100644 index ebea2a5..0000000 --- a/.github/workflows/postgress12.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: Build pg_cjk_parser for postgres 12 - -on: - push: - branches: [ "master" ] - pull_request: - branches: [ "master" ] - -jobs: - build: - - runs-on: ubuntu-20.04 - - steps: - - uses: actions/checkout@v3 - - name: uninstall postgresql-14 - run: sudo apt-get remove -y postgresql-14 libpq-dev - - name: install dependencies - run: sudo apt-get install -y --allow-downgrades postgresql-12 postgresql-server-dev-12 libpq-dev libpq5=12.13-0ubuntu0.20.04.1 gcc icu-devtools libicu-dev - - name: make - run: make - - name: make install - run: sudo make install - diff --git a/.github/workflows/postgress12_16.yml b/.github/workflows/postgress12_16.yml new file mode 100644 index 0000000..776d33e --- /dev/null +++ b/.github/workflows/postgress12_16.yml @@ -0,0 +1,65 @@ +name: Build pg_cjk_parser for postgres 12 and 16 + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build_pg12: + runs-on: ubuntu-latest + services: + registry: + image: registry:2 + ports: + - 5000:5000 + steps: + - uses: actions/checkout@v4 + - + name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + driver-opts: network=host + - + name: Build and push + uses: docker/build-push-action@v5 + with: + push: true + file: Dockerfile_pg12 + tags: localhost:5000/postgres:12-dev + - + name: Run bash script to verify image postgres:12-dev + run: docker pull localhost:5000/postgres:12-dev && docker tag localhost:5000/postgres:12-dev postgres:12-dev && chmod +x ./postgres-12.sh && ./postgres-12.sh + + build_pg16: + runs-on: ubuntu-latest + services: + registry: + image: registry:2 + ports: + - 5000:5000 + steps: + - uses: actions/checkout@v4 + - + name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + driver-opts: network=host + - + name: Build and push + uses: docker/build-push-action@v5 + with: + push: true + file: Dockerfile_pg16 + tags: localhost:5000/postgres:16-dev + - + name: Run bash script to verify image postgres:16-dev + run: docker pull localhost:5000/postgres:16-dev && docker tag localhost:5000/postgres:16-dev postgres:16-dev && chmod +x ./postgres-16.sh && ./postgres-16.sh + diff --git a/Dockerfile b/Dockerfile_pg12 similarity index 76% rename from Dockerfile rename to Dockerfile_pg12 index b0f035d..c92835c 100644 --- a/Dockerfile +++ b/Dockerfile_pg12 @@ -1,7 +1,7 @@ -FROM postgres:11 +FROM postgres:12 RUN apt-get update -RUN apt-get install -y postgresql-server-dev-all -RUN apt-get install -y gcc +RUN apt-get install -y postgresql-server-dev-12 +RUN apt-get install -y gcc make RUN apt-get install -y icu-devtools libicu-dev RUN mkdir -p /root/parser diff --git a/Dockerfile_pg16 b/Dockerfile_pg16 new file mode 100644 index 0000000..da34da5 --- /dev/null +++ b/Dockerfile_pg16 @@ -0,0 +1,14 @@ +FROM postgres:16 +RUN apt-get update +RUN apt-get install -y postgresql-server-dev-16 +RUN apt-get install -y gcc make +RUN apt-get install -y icu-devtools libicu-dev + +RUN mkdir -p /root/parser +WORKDIR /root/parser +COPY pg_cjk_parser.c /root/parser/ +COPY pg_cjk_parser.control /root/parser/ +COPY Makefile /root/parser/ +COPY pg_cjk_parser--0.0.1.sql /root/parser/ +COPY zht2zhs.h /root/parser/ +RUN make clean && make install \ No newline at end of file diff --git a/Readme.md b/Readme.md index 293b365..56d6d33 100644 --- a/Readme.md +++ b/Readme.md @@ -1,6 +1,8 @@ # Postgres CJK Parser - pg_cjk_parser -Postgres CJK Parser pg_cjk_parser is a fts (full text search) parser derived from the default parser in PostgreSQL 11. When a postgres database uses utf-8 encoding, this parser supports all the features of the default parser while splitting CJK (Chinese, Japanese, Korean) characters into 2-gram tokens. If the database's encoding is not utf-8, the parser behaves just like the default parser. +Postgres CJK Parser pg_cjk_parser is a fts (full text search) parser derived from the default parser in PostgreSQL. When a postgres database uses utf-8 encoding, this parser supports all the features of the default parser while splitting CJK (Chinese, Japanese, Korean) characters into 2-gram tokens. If the database's encoding is not utf-8, the parser behaves just like the default parser. + +Now pg_cjk_parser supports PostgreSQL 12 to 16. ## Introduction @@ -59,16 +61,22 @@ You can build pg_cjk_parser in a docker container. 1. Clone this repository into your local computer, say in /home/user/pg_cjk_parser 2. Ener /home/user/pg_cjk_parser -3. Build the docker image postgres:11-dev +3. Build the docker image postgres:12-dev + +To build this extension for PostgreSQL 12 +```bash +docker build -t postgres:12-dev . -f Dockerfile_pg12 +``` +To build this extension for PostgreSQL 16 ```bash -docker build -t postgres:11-dev . +docker build -t postgres:12-dev . -f Dockerfile_pg16 ``` 4. Run the following command ```bash -docker run -it --rm -v $(PWD):/root/code postgres:11-dev /bin/bash -c "cd /root/code && make clean && make" +docker run -it --rm -v $(pwd):/root/code postgres:12-dev /bin/bash -c "cd /root/code && make clean && make" ``` Then pg_cjk_parser.bc and pg_cjk_parser.so will be available in current directory (/home/user/pg_cjk_parser). You can manually install the parser to a PostgreSQL instances or you can install it as an extension. @@ -79,11 +87,12 @@ You can manually install pg_cjk_parser or you can install it as an extension. ### Install as an extension -Let's say that you have an instance of PostgreSQL 11 running, either on a docker container on a server. +Let's say that you have an instance of PostgreSQL 12 running, either on a docker container on a server. Make sure you have the following dependencies installed. ```bash -sudo apt-get install -y postgresql-server-dev-all +# replace 12 with 16 if you build this extension for pg 16 +sudo apt-get install -y postgresql-server-dev-12 sudo apt-get install -y gcc sudo apt-get install -y icu-devtools libicu-dev ``` @@ -103,6 +112,7 @@ Run the following command on the server ```bash cd /home/user/parser make clean && make install +sudo make USE_PGXS=1 install ``` Connect to your server via pgAdmin or other clients and then execute the following sql to create the pg_cjk_parser extension. @@ -132,13 +142,18 @@ Now you can execute the sql demonstrated in the introduction section to see the ### Docker image -There is a Dockerfile in this repository which helps you build a docker image based on postgres:11. +There is a Dockerfile in this repository which helps you build a docker image based on postgres:12. ```bash -docker build -t postgres:11-dev . +docker build -t postgres:12-dev . -f Dockerfile_pg12 ``` -If you use this image to start an instance of postgres:11, all you need to do is to create the extension, search parser and configuration in pgAdmin. +There is also a Dockerfile in this repository which helps you build a docker image based on postgres:16. +```bash +docker build -t postgres:16-dev . -f Dockerfile_pg16 +``` + +If you use this image to start an instance of postgres:12, all you need to do is to create the extension, search parser and configuration in pgAdmin. Connect to your server via pgAdmin or other clients and then execute the following sql to create the pg_cjk_parser extension. @@ -167,10 +182,10 @@ Now you can execute the sql demonstrated in the introduction section to see the ### Install manually -Suppose you have an docker instance of postgres name postgres_db_1 whose image is postgres:11. +Suppose you have an docker instance of postgres name postgres_db_1 whose image is postgres:12. ```bash -docker cp pg_cjk_parser.so postgres_db_1:/usr/lib/postgresql/11/lib/ +docker cp pg_cjk_parser.so postgres_db_1:/usr/lib/postgresql/12/lib/ ``` Connect to the postgres instance via pgAdmin or other clients and then execute the following sql @@ -335,6 +350,14 @@ to_tsvector('Doraemnon Nobita「ドラえもん のび太の牧場物語」多 |-|-|-|-|-| |'doraemnon':1 'nobita':2 'χψψωω':22 '「':3 '」':15 'えも':6 'のび':8 'の牧':11 'び太':9 'もん':7 'ドラ':4 'ラえ':5 '場物':13 '多拉':16 '大雄':21 '太の':10 '梦':18 '比大':20 '牧場':12 '物語':14 '野比':19|"'のび' & 'び太'"|"'野比' & '比大' & '大雄'"|true|true| +```sql +SELECT to_tsvector('大韩民国개인정보의 수집 및 이용 목적(「개인정보 보호법」 제15조)'), to_tsquery('「大韩民国개인정보'); +``` + +|to_tsvector|to_tsquery| +|-|-| +| '15':21 '「':13 '」':19 '国개':4 '大韩':1 '民国':3 '韩民':2 '개인':5,14 '목적':12 '및':10 '보의':8 '보호':17 '수집':9 '이용':11 '인정':6,15 '정보':7,16 '제':20 '조':22 '호법':18|'「' & '大韩' & '韩民' & '民国' & '国개' & '개인' & '인정' & '정보'| + ## License ### PG CJK Parser diff --git a/pg_cjk_parser.c b/pg_cjk_parser.c index 223769d..aa560ce 100644 --- a/pg_cjk_parser.c +++ b/pg_cjk_parser.c @@ -613,7 +613,7 @@ p_isnotCJK(TParser *prs){ else c = (pg_wchar) *(prs->wstr + prs->state->poschar); - if (c >= 0x2E80 && c <= 0x9FFF){ + if ((c >= 0x2E80 && c <= 0x9FFF) || (c >= 0xAC00 && c <= 0xD7A3)){ return 0; } for(int i=0; i<7; i++){ @@ -647,7 +647,7 @@ p_isCJK(TParser *prs){ c = (pg_wchar) *(prs->wstr + prs->state->poschar); - if (c >= 0x2E80 && c <= 0x9FFF){ + if ((c >= 0x2E80 && c <= 0x9FFF) || (c >= 0xAC00 && c <= 0xD7A3)){ #ifdef WPARSER_TRACE fprintf(stderr, "%x isCJK?", c); fprintf(stderr, " = true\n"); #endif @@ -682,7 +682,7 @@ p_isCJK2gram(TParser *prs){ else c = (pg_wchar) *(prs->wstr + prs->state->poschar); - if (c >= 0x3040 && c <= 0x9FFF){ + if ((c >= 0x3040 && c <= 0x9FFF) || (c >= 0xAC00 && c <= 0xD7A3)){ //CJK Unified Ideographs //a 2-gram token return 1; @@ -714,7 +714,7 @@ utf8_cjkCodePoint(char * s){ static void utf8_setCjkCodePoint(char * s, unsigned int codePoint){ - if(codePoint >= 0x2E80 && codePoint <= 0x9FFF){ + if((codePoint >= 0x2E80 && codePoint <= 0x9FFF) || (codePoint >= 0xAC00 && codePoint <= 0xD7A3)){ s[0] = 0xE0 | (codePoint>>12); s[1] = 0x80 | ((codePoint>>6) & 0x3F); s[2] = 0x80 | (codePoint & 0x3F); @@ -763,7 +763,7 @@ p_isCJK2gram_twice(TParser *prs){ return 0; } - if (c >= 0x3040 && c <= 0x9FFF){ + if ((c >= 0x3040 && c <= 0x9FFF) || (c >= 0xAC00 && c <= 0xD7A3)){ //CJK Unified Ideographs //token as if it is a 2-gram pg_wchar nc; @@ -772,7 +772,7 @@ p_isCJK2gram_twice(TParser *prs){ else nc = (pg_wchar) *(prs->wstr + prs->state->poschar); - if (nc >= 0x3040 && nc <= 0x9FFF){ + if ((nc >= 0x3040 && nc <= 0x9FFF) || (nc >= 0xAC00 && nc <= 0xD7A3)){ #ifdef WPARSER_TRACE fprintf(stderr, " %x %x is 2-gram state=", c, nc); fprintf(stderr, "%d \n", prs->state->state); @@ -873,7 +873,7 @@ p_isCJKunigram(TParser *prs){ fprintf(stderr, "p_isCJKunigram: current char = %x\n", c); #endif - if (c >= 0x3040 && c <= 0x9FFF){ + if ((c >= 0x3040 && c <= 0x9FFF) || (c >= 0xAC00 && c <= 0xD7A3)){ //CJK Unified Ideographs //if it is surrounded by non-CJK chars or CJK unigrams, //it is also unigram @@ -886,12 +886,12 @@ p_isCJKunigram(TParser *prs){ #ifdef WPARSER_TRACE fprintf(stderr, "p_isCJKunigram: next char = %x\n", c); #endif - if(c < 0x3040|| c > 0x9FFF){ + if( !((c >= 0x3040 && c <= 0x9FFF) || (c >= 0xAC00 && c <= 0xD7A3)) ){ c = p_prevChar(prs); #ifdef WPARSER_TRACE fprintf(stderr, "p_isCJKunigram: prev char = %x\n", c); #endif - if(c < 0x3040 || c > 0x9FFF)return 1; + if( !((c >= 0x3040 && c <= 0x9FFF) || (c >= 0xAC00 && c <= 0xD7A3)) )return 1; } return 0; } @@ -2341,19 +2341,37 @@ typedef struct int len; } hlCheck; -static bool +#ifndef PG_VERSION_NUM + #error "Cannot determine which postgresql version to build against" +#endif + +#if PG_VERSION_NUM < 130000 + #define TSTernaryValue bool + #define TS_YES true + #define TS_NO false +#endif + +/* + * TS_execute callback for matching a tsquery operand to headline words + * + * Note: it's tempting to report words[] indexes as pos values to save + * searching in hlCover; but that would screw up phrase matching, which + * expects to measure distances in lexemes not tokens. + */ +static TSTernaryValue checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data) { - int i; hlCheck *checkval = (hlCheck *) opaque; + int i; + /* scan words array for matching items */ for (i = 0; i < checkval->len; i++) { if (checkval->words[i].item == val) { - /* don't need to find all positions */ + /* if data == NULL, don't need to report positions */ if (!data) - return true; + return TS_YES; if (!data->pos) { @@ -2370,9 +2388,9 @@ checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data) } if (data && data->npos > 0) - return true; + return TS_YES; - return false; + return TS_NO; } @@ -2869,13 +2887,13 @@ prsd2_headline(PG_FUNCTION_ARGS) char *val = defGetString(defel); if (pg_strcasecmp(defel->defname, "MaxWords") == 0) - max_words = pg_atoi(val, sizeof(int32), 0); + max_words = pg_strtoint32(val); else if (pg_strcasecmp(defel->defname, "MinWords") == 0) - min_words = pg_atoi(val, sizeof(int32), 0); + min_words = pg_strtoint32(val); else if (pg_strcasecmp(defel->defname, "ShortWord") == 0) - shortword = pg_atoi(val, sizeof(int32), 0); + shortword = pg_strtoint32(val); else if (pg_strcasecmp(defel->defname, "MaxFragments") == 0) - max_fragments = pg_atoi(val, sizeof(int32), 0); + max_fragments = pg_strtoint32(val); else if (pg_strcasecmp(defel->defname, "StartSel") == 0) prs->startsel = pstrdup(val); else if (pg_strcasecmp(defel->defname, "StopSel") == 0) diff --git a/postgres-12.sh b/postgres-12.sh new file mode 100644 index 0000000..2557c50 --- /dev/null +++ b/postgres-12.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +docker run --name postgres12 -e POSTGRES_PASSWORD=password -d postgres:12-dev +sleep 5 +docker exec postgres12 psql -U postgres -c 'CREATE EXTENSION pg_cjk_parser;' +docker exec postgres12 psql -U postgres -c "CREATE TEXT SEARCH PARSER public.pg_cjk_parser (START = prsd2_cjk_start, GETTOKEN = prsd2_cjk_nexttoken, END = prsd2_cjk_end, LEXTYPES = prsd2_cjk_lextype, HEADLINE = prsd2_cjk_headline); CREATE TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ( PARSER = pg_cjk_parser ); SET default_text_search_config = 'public.config_2_gram_cjk';" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR asciihword WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR cjk WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR email WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR asciiword WITH english_stem;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR entity WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR file WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR float WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR host WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR hword WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR hword_asciipart WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR hword_numpart WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR hword_part WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR int WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR numhword WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR numword WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR protocol WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR sfloat WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR tag WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR uint WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR url WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR url_path WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR version WITH simple;" + +docker exec postgres12 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR word WITH simple;" + +docker exec postgres12 psql -U postgres -c "SET default_text_search_config = 'public.config_2_gram_cjk'; SELECT to_tsvector('Doraemnon Nobita「ドラえもん のび太の牧場物語」多拉A梦 野比大雄χΨψΩω'), to_tsquery('のび太'), to_tsquery('野比大雄');" + +docker exec postgres12 psql -U postgres -c "SET default_text_search_config = 'public.config_2_gram_cjk'; SELECT to_tsvector('大韩民国개인정보의 수집 및 이용 목적(「개인정보 보호법」 제15조)'), to_tsquery('「大韩民国개인정보');" + +docker stop postgres12 && docker rm postgres12 \ No newline at end of file diff --git a/postgres-16.sh b/postgres-16.sh new file mode 100644 index 0000000..6ab753a --- /dev/null +++ b/postgres-16.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +docker run --name postgres16 -e POSTGRES_PASSWORD=password -d postgres:16-dev +sleep 5 +docker exec postgres16 psql -U postgres -c 'CREATE EXTENSION pg_cjk_parser;' +docker exec postgres16 psql -U postgres -c "CREATE TEXT SEARCH PARSER public.pg_cjk_parser (START = prsd2_cjk_start, GETTOKEN = prsd2_cjk_nexttoken, END = prsd2_cjk_end, LEXTYPES = prsd2_cjk_lextype, HEADLINE = prsd2_cjk_headline); CREATE TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ( PARSER = pg_cjk_parser ); SET default_text_search_config = 'public.config_2_gram_cjk';" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR asciihword WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR cjk WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR email WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR asciiword WITH english_stem;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR entity WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR file WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR float WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR host WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR hword WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR hword_asciipart WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR hword_numpart WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR hword_part WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR int WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR numhword WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR numword WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR protocol WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR sfloat WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR tag WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR uint WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR url WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR url_path WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR version WITH simple;" + +docker exec postgres16 psql -U postgres -c "ALTER TEXT SEARCH CONFIGURATION public.config_2_gram_cjk ADD MAPPING FOR word WITH simple;" + +docker exec postgres16 psql -U postgres -c "SET default_text_search_config = 'public.config_2_gram_cjk'; SELECT to_tsvector('Doraemnon Nobita「ドラえもん のび太の牧場物語」多拉A梦 野比大雄χΨψΩω'), to_tsquery('のび太'), to_tsquery('野比大雄');" + +docker exec postgres16 psql -U postgres -c "SET default_text_search_config = 'public.config_2_gram_cjk'; SELECT to_tsvector('大韩民国개인정보의 수집 및 이용 목적(「개인정보 보호법」 제15조)'), to_tsquery('「大韩民国개인정보');" + +docker stop postgres16 && docker rm postgres16 \ No newline at end of file