allow user defined text search config in database See merge request pleroma/pleroma!3275youtube-fix
@@ -141,3 +141,21 @@ but should only be run if necessary. **It is safe to cancel this.** | |||
```sh | |||
mix pleroma.database ensure_expiration | |||
``` | |||
## Change Text Search Configuration | |||
Change `default_text_search_config` for database and (if necessary) text_search_config used in index, then rebuild index (it may take time). | |||
=== "OTP" | |||
```sh | |||
./bin/pleroma_ctl database set_text_search_config english | |||
``` | |||
=== "From Source" | |||
```sh | |||
mix pleroma.database set_text_search_config english | |||
``` | |||
See [PostgreSQL documentation](https://www.postgresql.org/docs/current/textsearch-configuration.html) and `docs/configuration/howto_search_cjk.md` for more detail. |
@@ -0,0 +1,42 @@ | |||
# How to enable text search for Chinese, Japanese and Korean | |||
Pleroma's full text search feature is powered by PostgreSQL's native [text search](https://www.postgresql.org/docs/current/textsearch.html), it works well out of box for most of languages, but needs extra configurations for some asian languages like Chinese, Japanese and Korean (CJK). | |||
## Setup and test the new search config | |||
In most cases, you would need an extension installed to support parsing CJK text. Here are a few extension you may choose from, or you are more than welcome to share additional ones you found working for you with the rest of Pleroma community. | |||
* [a generic n-gram parser](https://github.com/huangjimmy/pg_cjk_parser) supports Simplifed/Traditional Chinese, Japanese, and Korean | |||
* [a Korean parser](https://github.com/i0seph/textsearch_ko) based on mecab | |||
* [a Japanese parser](https://www.amris.co.jp/tsja/index.html) based on mecab | |||
* [zhparser](https://github.com/amutu/zhparser/) is a PostgreSQL extension base on the Simple Chinese Word Segmentation(SCWS) | |||
* [another Chinese parser](https://github.com/jaiminpan/pg_jieba) based on Jieba Chinese Word Segmentation | |||
Once you have the new search config , make sure you test it with the `pleroma` user in PostgreSQL (change `YOUR.CONFIG` to your real configuration name) | |||
``` | |||
SELECT ts_debug('YOUR.CONFIG', '安装和配置Nginx, ElixirとErlangをインストールします'); | |||
``` | |||
Check output of the query, and see if it matches your expectation. | |||
## Update text search config and index in database | |||
=== "OTP" | |||
```sh | |||
./bin/pleroma_ctl database set_text_search_config YOUR.CONFIG | |||
``` | |||
=== "From Source" | |||
```sh | |||
mix pleroma.database set_text_search_config YOUR.CONFIG | |||
``` | |||
Note: index update may take a while. | |||
## Restart database connection | |||
Since some changes above will only apply with a new database connection, you will have to restart either Pleroma or PostgreSQL process, or use `pg_terminate_backend` SQL command without restarting either. | |||
Now the search results of statuses should be much more friendly for your language of choice, the results for searching users and tags were not changed, as the default parsing/matching should work for most cases. |
@@ -167,4 +167,51 @@ defmodule Mix.Tasks.Pleroma.Database do | |||
end) | |||
|> Stream.run() | |||
end | |||
def run(["set_text_search_config", tsconfig]) do | |||
start_pleroma() | |||
%{rows: [[tsc]]} = Ecto.Adapters.SQL.query!(Pleroma.Repo, "SHOW default_text_search_config;") | |||
shell_info("Current default_text_search_config: #{tsc}") | |||
%{rows: [[db]]} = Ecto.Adapters.SQL.query!(Pleroma.Repo, "SELECT current_database();") | |||
shell_info("Update default_text_search_config: #{tsconfig}") | |||
%{messages: msg} = | |||
Ecto.Adapters.SQL.query!( | |||
Pleroma.Repo, | |||
"ALTER DATABASE #{db} SET default_text_search_config = '#{tsconfig}';" | |||
) | |||
# non-exist config will not raise excpetion but only give >0 messages | |||
if length(msg) > 0 do | |||
shell_info("Error: #{inspect(msg, pretty: true)}") | |||
else | |||
rum_enabled = Pleroma.Config.get([:database, :rum_enabled]) | |||
shell_info("Recreate index, RUM: #{rum_enabled}") | |||
# Note SQL below needs to be kept up-to-date with latest GIN or RUM index definition in future | |||
if rum_enabled do | |||
Ecto.Adapters.SQL.query!( | |||
Pleroma.Repo, | |||
"CREATE OR REPLACE FUNCTION objects_fts_update() RETURNS trigger AS $$ BEGIN | |||
new.fts_content := to_tsvector(new.data->>'content'); | |||
RETURN new; | |||
END | |||
$$ LANGUAGE plpgsql" | |||
) | |||
shell_info("Refresh RUM index") | |||
Ecto.Adapters.SQL.query!(Pleroma.Repo, "UPDATE objects SET updated_at = NOW();") | |||
else | |||
Ecto.Adapters.SQL.query!(Pleroma.Repo, "DROP INDEX IF EXISTS objects_fts;") | |||
Ecto.Adapters.SQL.query!( | |||
Pleroma.Repo, | |||
"CREATE INDEX objects_fts ON objects USING gin(to_tsvector('#{tsconfig}', data->>'content')); " | |||
) | |||
end | |||
shell_info('Done.') | |||
end | |||
end | |||
end |
@@ -64,7 +64,7 @@ defmodule Pleroma.Activity.Search do | |||
from([a, o] in q, | |||
where: | |||
fragment( | |||
"to_tsvector('english', ?->>'content') @@ plainto_tsquery('english', ?)", | |||
"to_tsvector(?->>'content') @@ plainto_tsquery(?)", | |||
o.data, | |||
^search_query | |||
) | |||
@@ -75,7 +75,7 @@ defmodule Pleroma.Activity.Search do | |||
from([a, o] in q, | |||
where: | |||
fragment( | |||
"to_tsvector('english', ?->>'content') @@ websearch_to_tsquery('english', ?)", | |||
"to_tsvector(?->>'content') @@ websearch_to_tsquery(?)", | |||
o.data, | |||
^search_query | |||
) | |||
@@ -86,7 +86,7 @@ defmodule Pleroma.Activity.Search do | |||
from([a, o] in q, | |||
where: | |||
fragment( | |||
"? @@ plainto_tsquery('english', ?)", | |||
"? @@ plainto_tsquery(?)", | |||
o.fts_content, | |||
^search_query | |||
), | |||
@@ -98,7 +98,7 @@ defmodule Pleroma.Activity.Search do | |||
from([a, o] in q, | |||
where: | |||
fragment( | |||
"? @@ websearch_to_tsquery('english', ?)", | |||
"? @@ websearch_to_tsquery(?)", | |||
o.fts_content, | |||
^search_query | |||
), | |||
@@ -0,0 +1,11 @@ | |||
defmodule Pleroma.Repo.Migrations.AddDefaultTextSearchConfig do | |||
use Ecto.Migration | |||
def change do | |||
execute("DO $$ | |||
BEGIN | |||
execute 'ALTER DATABASE '||current_database()||' SET default_text_search_config = ''english'' '; | |||
END | |||
$$;") | |||
end | |||
end |
@@ -17,7 +17,7 @@ defmodule Pleroma.Repo.Migrations.AddFtsIndexToObjectsTwo do | |||
execute("CREATE FUNCTION objects_fts_update() RETURNS trigger AS $$ | |||
begin | |||
new.fts_content := to_tsvector('english', new.data->>'content'); | |||
new.fts_content := to_tsvector(new.data->>'content'); | |||
return new; | |||
end | |||
$$ LANGUAGE plpgsql") | |||