From d82c8e8e971d1e563dd93c6f8dea6edcba2337ae Mon Sep 17 00:00:00 2001 From: PrivateGER Date: Wed, 19 Feb 2025 14:55:50 +0100 Subject: [PATCH 1/6] Implement tsvector search support --- .config/example.yml | 8 ++++ packages/backend/src/config.ts | 2 +- packages/backend/src/core/SearchService.ts | 54 ++++++++++++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/.config/example.yml b/.config/example.yml index 07c613f62d..297f794324 100644 --- a/.config/example.yml +++ b/.config/example.yml @@ -222,6 +222,14 @@ fulltextSearch: # You need to install pgroonga and configure it as a PostgreSQL extension. # In addition to the above, you need to create a pgroonga index on the text column of the note table. # see: https://pgroonga.github.io/tutorial/ + # - tsvector + # Use Postgres tsvectors. + # You need to create a generated column and index on the note table to use this, followed by an ANALYZE on the table. Beware, this will take a while to be created and the database will remain locked during this process. + # This also enables advanced search syntax, see documentation of websearch_to_tsquery: https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES + # Support for non-English languages is currently rather poor and will be improved once post languages become a feature. + # ALTER TABLE note ADD COLUMN tsvector_embedding tsvector GENERATED ALWAYS AS ( to_tsvector('english', COALESCE(text, '') || ' ' || COALESCE(cw, '') || ' ' || COALESCE(name, ''))) STORED; + # CREATE INDEX vector_idx ON note USING GIN (tsvector_embedding); + # ANALYZE note; # - meilisearch # Use Meilisearch. # You need to install Meilisearch and configure. diff --git a/packages/backend/src/config.ts b/packages/backend/src/config.ts index 3c76c76469..938f44c024 100644 --- a/packages/backend/src/config.ts +++ b/packages/backend/src/config.ts @@ -254,7 +254,7 @@ export type Config = { }; }; -export type FulltextSearchProvider = 'sqlLike' | 'sqlPgroonga' | 'meilisearch'; +export type FulltextSearchProvider = 'sqlLike' | 'sqlPgroonga' | 'meilisearch' | 'tsvector'; const _filename = fileURLToPath(import.meta.url); const _dirname = dirname(_filename); diff --git a/packages/backend/src/core/SearchService.ts b/packages/backend/src/core/SearchService.ts index 6e46fb798c..efef87cfb7 100644 --- a/packages/backend/src/core/SearchService.ts +++ b/packages/backend/src/core/SearchService.ts @@ -248,6 +248,9 @@ export class SearchService { case 'meilisearch': { return this.searchNoteByMeiliSearch(q, me, opts, pagination); } + case 'tsvector': { + return this.searchNoteByTsvector(q, me, opts, pagination); + } default: { // eslint-disable-next-line @typescript-eslint/no-unused-vars const typeCheck: never = this.provider; @@ -256,6 +259,57 @@ export class SearchService { } } + @bindThis + private async searchNoteByTsvector(q: string, + me: MiUser | null, + opts: SearchOpts, + pagination: SearchPagination, + ): Promise { + const query = this.queryService.makePaginationQuery(this.notesRepository.createQueryBuilder('note'), pagination.sinceId, pagination.untilId); + + if (opts.userId) { + query.andWhere('note.userId = :userId', { userId: opts.userId }); + } else if (opts.channelId) { + query.andWhere('note.channelId = :channelId', { channelId: opts.channelId }); + } + + query + .innerJoinAndSelect('note.user', 'user') + .leftJoinAndSelect('note.reply', 'reply') + .leftJoinAndSelect('note.renote', 'renote') + .leftJoinAndSelect('reply.user', 'replyUser') + .leftJoinAndSelect('renote.user', 'renoteUser'); + + query.andWhere('note.tsvector @@ websearch_to_tsquery(:q)', { q }); + + if (opts.order === 'asc') { + query + .addSelect('ts_rank_cd(note.tsvector_embedding, websearch_to_tsquery(:q))', 'rank') + .orderBy('rank', 'DESC'); + } else { + query + .orderBy('note.created_at', 'DESC'); + } + + if (opts.host) { + if (opts.host === '.') { + query.andWhere('note.userHost IS NULL'); + } else { + query.andWhere('note.userHost = :host', { host: opts.host }); + } + } + + if (opts.filetype) { + query.andWhere('note."attachedFileTypes" && :types', { types: fileTypes[opts.filetype] }); + } + + this.queryService.generateVisibilityQuery(query, me); + if (me) this.queryService.generateMutedUserQuery(query, me); + if (me) this.queryService.generateBlockedUserQuery(query, me); + + return await query.limit(pagination.limit).getMany(); + } + @bindThis private async searchNoteByLike( q: string, From 4fde14d1cc6343ceb0fce6ec005f260dde972d5c Mon Sep 17 00:00:00 2001 From: PrivateGER Date: Wed, 19 Feb 2025 15:12:16 +0100 Subject: [PATCH 2/6] fix wrong column name --- packages/backend/src/core/SearchService.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/backend/src/core/SearchService.ts b/packages/backend/src/core/SearchService.ts index efef87cfb7..c99944fa51 100644 --- a/packages/backend/src/core/SearchService.ts +++ b/packages/backend/src/core/SearchService.ts @@ -280,7 +280,7 @@ export class SearchService { .leftJoinAndSelect('reply.user', 'replyUser') .leftJoinAndSelect('renote.user', 'renoteUser'); - query.andWhere('note.tsvector @@ websearch_to_tsquery(:q)', { q }); + query.andWhere('note.tsvector_embedding @@ websearch_to_tsquery(:q)', { q }); if (opts.order === 'asc') { query From 006e92a7d06e5717b5f13d001128a07145fd213d Mon Sep 17 00:00:00 2001 From: PrivateGER Date: Wed, 19 Feb 2025 15:20:48 +0100 Subject: [PATCH 3/6] adjust docs --- .config/example.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.config/example.yml b/.config/example.yml index 297f794324..fd30d8923b 100644 --- a/.config/example.yml +++ b/.config/example.yml @@ -226,10 +226,15 @@ fulltextSearch: # Use Postgres tsvectors. # You need to create a generated column and index on the note table to use this, followed by an ANALYZE on the table. Beware, this will take a while to be created and the database will remain locked during this process. # This also enables advanced search syntax, see documentation of websearch_to_tsquery: https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES - # Support for non-English languages is currently rather poor and will be improved once post languages become a feature. + # Support for multiple languages is currently rather poor and will be improved once post languages become a feature. + # + # Example to set up tsvectors for an English instance: # ALTER TABLE note ADD COLUMN tsvector_embedding tsvector GENERATED ALWAYS AS ( to_tsvector('english', COALESCE(text, '') || ' ' || COALESCE(cw, '') || ' ' || COALESCE(name, ''))) STORED; # CREATE INDEX vector_idx ON note USING GIN (tsvector_embedding); # ANALYZE note; + # + # Note: You can opt to use a different dictionary for better results if your main instance language is not English. + # To get a list, use "SELECT cfgname FROM pg_ts_config;" and replace 'english' with the desired dictionary name. # - meilisearch # Use Meilisearch. # You need to install Meilisearch and configure. From b5208c2ad052ed318d75b8c85cd0463b25cf1ace Mon Sep 17 00:00:00 2001 From: PrivateGER Date: Wed, 19 Feb 2025 16:26:02 +0100 Subject: [PATCH 4/6] simplify tsvector implementation, remove cover density --- packages/backend/src/core/SearchService.ts | 61 ++-------------------- 1 file changed, 5 insertions(+), 56 deletions(-) diff --git a/packages/backend/src/core/SearchService.ts b/packages/backend/src/core/SearchService.ts index c99944fa51..2934d08c2d 100644 --- a/packages/backend/src/core/SearchService.ts +++ b/packages/backend/src/core/SearchService.ts @@ -240,7 +240,8 @@ export class SearchService { ): Promise { switch (this.provider) { case 'sqlLike': - case 'sqlPgroonga': { + case 'sqlPgroonga': + case 'tsvector': { // ほとんど内容に差がないのでsqlLikeとsqlPgroongaを同じ処理にしている. // 今後の拡張で差が出る用であれば関数を分ける. return this.searchNoteByLike(q, me, opts, pagination); @@ -248,9 +249,6 @@ export class SearchService { case 'meilisearch': { return this.searchNoteByMeiliSearch(q, me, opts, pagination); } - case 'tsvector': { - return this.searchNoteByTsvector(q, me, opts, pagination); - } default: { // eslint-disable-next-line @typescript-eslint/no-unused-vars const typeCheck: never = this.provider; @@ -259,57 +257,6 @@ export class SearchService { } } - @bindThis - private async searchNoteByTsvector(q: string, - me: MiUser | null, - opts: SearchOpts, - pagination: SearchPagination, - ): Promise { - const query = this.queryService.makePaginationQuery(this.notesRepository.createQueryBuilder('note'), pagination.sinceId, pagination.untilId); - - if (opts.userId) { - query.andWhere('note.userId = :userId', { userId: opts.userId }); - } else if (opts.channelId) { - query.andWhere('note.channelId = :channelId', { channelId: opts.channelId }); - } - - query - .innerJoinAndSelect('note.user', 'user') - .leftJoinAndSelect('note.reply', 'reply') - .leftJoinAndSelect('note.renote', 'renote') - .leftJoinAndSelect('reply.user', 'replyUser') - .leftJoinAndSelect('renote.user', 'renoteUser'); - - query.andWhere('note.tsvector_embedding @@ websearch_to_tsquery(:q)', { q }); - - if (opts.order === 'asc') { - query - .addSelect('ts_rank_cd(note.tsvector_embedding, websearch_to_tsquery(:q))', 'rank') - .orderBy('rank', 'DESC'); - } else { - query - .orderBy('note.created_at', 'DESC'); - } - - if (opts.host) { - if (opts.host === '.') { - query.andWhere('note.userHost IS NULL'); - } else { - query.andWhere('note.userHost = :host', { host: opts.host }); - } - } - - if (opts.filetype) { - query.andWhere('note."attachedFileTypes" && :types', { types: fileTypes[opts.filetype] }); - } - - this.queryService.generateVisibilityQuery(query, me); - if (me) this.queryService.generateMutedUserQuery(query, me); - if (me) this.queryService.generateBlockedUserQuery(query, me); - - return await query.limit(pagination.limit).getMany(); - } - @bindThis private async searchNoteByLike( q: string, @@ -333,7 +280,9 @@ export class SearchService { .leftJoinAndSelect('renote.user', 'renoteUser'); if (this.config.fulltextSearch?.provider === 'sqlPgroonga') { - query.andWhere('note.text &@~ :q', { q }); + query.andWhere('note.text &@~ :q', {q}); + } else if (this.config.fulltextSearch?.provider === "tsvector") { + query.andWhere('note.tsvector_embedding @@ websearch_to_tsquery(:q)', { q }); } else { query.andWhere('note.text ILIKE :q', { q: `%${ sqlLikeEscape(q) }%` }); } From e6464906e6f96efcc93ef2d89c29065160a70243 Mon Sep 17 00:00:00 2001 From: PrivateGER Date: Wed, 19 Feb 2025 16:26:02 +0100 Subject: [PATCH 5/6] change to sqlTsvector --- packages/backend/src/core/SearchService.ts | 61 ++-------------------- 1 file changed, 5 insertions(+), 56 deletions(-) diff --git a/packages/backend/src/core/SearchService.ts b/packages/backend/src/core/SearchService.ts index c99944fa51..2934d08c2d 100644 --- a/packages/backend/src/core/SearchService.ts +++ b/packages/backend/src/core/SearchService.ts @@ -240,7 +240,8 @@ export class SearchService { ): Promise { switch (this.provider) { case 'sqlLike': - case 'sqlPgroonga': { + case 'sqlPgroonga': + case 'tsvector': { // ほとんど内容に差がないのでsqlLikeとsqlPgroongaを同じ処理にしている. // 今後の拡張で差が出る用であれば関数を分ける. return this.searchNoteByLike(q, me, opts, pagination); @@ -248,9 +249,6 @@ export class SearchService { case 'meilisearch': { return this.searchNoteByMeiliSearch(q, me, opts, pagination); } - case 'tsvector': { - return this.searchNoteByTsvector(q, me, opts, pagination); - } default: { // eslint-disable-next-line @typescript-eslint/no-unused-vars const typeCheck: never = this.provider; @@ -259,57 +257,6 @@ export class SearchService { } } - @bindThis - private async searchNoteByTsvector(q: string, - me: MiUser | null, - opts: SearchOpts, - pagination: SearchPagination, - ): Promise { - const query = this.queryService.makePaginationQuery(this.notesRepository.createQueryBuilder('note'), pagination.sinceId, pagination.untilId); - - if (opts.userId) { - query.andWhere('note.userId = :userId', { userId: opts.userId }); - } else if (opts.channelId) { - query.andWhere('note.channelId = :channelId', { channelId: opts.channelId }); - } - - query - .innerJoinAndSelect('note.user', 'user') - .leftJoinAndSelect('note.reply', 'reply') - .leftJoinAndSelect('note.renote', 'renote') - .leftJoinAndSelect('reply.user', 'replyUser') - .leftJoinAndSelect('renote.user', 'renoteUser'); - - query.andWhere('note.tsvector_embedding @@ websearch_to_tsquery(:q)', { q }); - - if (opts.order === 'asc') { - query - .addSelect('ts_rank_cd(note.tsvector_embedding, websearch_to_tsquery(:q))', 'rank') - .orderBy('rank', 'DESC'); - } else { - query - .orderBy('note.created_at', 'DESC'); - } - - if (opts.host) { - if (opts.host === '.') { - query.andWhere('note.userHost IS NULL'); - } else { - query.andWhere('note.userHost = :host', { host: opts.host }); - } - } - - if (opts.filetype) { - query.andWhere('note."attachedFileTypes" && :types', { types: fileTypes[opts.filetype] }); - } - - this.queryService.generateVisibilityQuery(query, me); - if (me) this.queryService.generateMutedUserQuery(query, me); - if (me) this.queryService.generateBlockedUserQuery(query, me); - - return await query.limit(pagination.limit).getMany(); - } - @bindThis private async searchNoteByLike( q: string, @@ -333,7 +280,9 @@ export class SearchService { .leftJoinAndSelect('renote.user', 'renoteUser'); if (this.config.fulltextSearch?.provider === 'sqlPgroonga') { - query.andWhere('note.text &@~ :q', { q }); + query.andWhere('note.text &@~ :q', {q}); + } else if (this.config.fulltextSearch?.provider === "tsvector") { + query.andWhere('note.tsvector_embedding @@ websearch_to_tsquery(:q)', { q }); } else { query.andWhere('note.text ILIKE :q', { q: `%${ sqlLikeEscape(q) }%` }); } From 285bbcb81f9b34cdd8d41e69c3be9e885511b824 Mon Sep 17 00:00:00 2001 From: PrivateGER Date: Wed, 19 Feb 2025 16:46:59 +0100 Subject: [PATCH 6/6] fix linter... --- packages/backend/src/core/SearchService.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/backend/src/core/SearchService.ts b/packages/backend/src/core/SearchService.ts index 3aafac1599..4782a6c7b0 100644 --- a/packages/backend/src/core/SearchService.ts +++ b/packages/backend/src/core/SearchService.ts @@ -280,8 +280,8 @@ export class SearchService { .leftJoinAndSelect('renote.user', 'renoteUser'); if (this.config.fulltextSearch?.provider === 'sqlPgroonga') { - query.andWhere('note.text &@~ :q', {q}); - } else if (this.config.fulltextSearch?.provider === "sqlTsvector") { + query.andWhere('note.text &@~ :q', { q }); + } else if (this.config.fulltextSearch?.provider === 'sqlTsvector') { query.andWhere('note.tsvector_embedding @@ websearch_to_tsquery(:q)', { q }); } else { query.andWhere('note.text ILIKE :q', { q: `%${ sqlLikeEscape(q) }%` });