[Patchew-devel] [PATCH 1/2] use postgresql full text search
Paolo Bonzini
pbonzini at redhat.com
Tue Nov 20 18:20:27 UTC 2018
On 20/11/18 02:45, Fam Zheng wrote:
> On Mon, 11/19 14:42, Paolo Bonzini wrote:
>> Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
>> ---
>> api/migrations/0040_postgres_fts.py | 26 +++++++++++++++++++++++++
>> api/migrations/__init__.py | 15 +++++++++++++++
>> api/search.py | 30 +++++++++++++++++++++--------
>> patchew/settings.py | 3 +++
>> 4 files changed, 66 insertions(+), 8 deletions(-)
>> create mode 100644 api/migrations/0040_postgres_fts.py
>>
>> diff --git a/api/migrations/0040_postgres_fts.py b/api/migrations/0040_postgres_fts.py
>> new file mode 100644
>> index 0000000..8d8ab95
>> --- /dev/null
>> +++ b/api/migrations/0040_postgres_fts.py
>> @@ -0,0 +1,25 @@
>> +# -*- coding: utf-8 -*-
>> +from __future__ import unicode_literals
>> +
>> +from django.db import migrations, models
>> +import django.db.models.deletion
>> +from django.contrib.postgres.operations import TrigramExtension
>> +
>> +from api.migrations import PostgresOnlyMigration
>> +
>> +
>> +class Migration(PostgresOnlyMigration):
>> +
>> + dependencies = [
>> + ('api', '0039_fix_project_attr_help_texts'),
>> + ]
>> +
>> + operations = [
>> + TrigramExtension(),
>> + migrations.RunSQL("create index api_message_subject_gin on api_message using gin(to_tsvector('english', subject::text));",
>> + "drop index api_message_subject_gin"),
>> + migrations.RunSQL("create index api_message_sender_gin on api_message using gin(upper(sender) gin_trgm_ops);",
>> + "drop index api_message_sender_gin"),
>> + migrations.RunSQL("create index api_message_recipients_gin on api_message using gin(upper(recipients) gin_trgm_ops);",
>> + "drop index api_message_recipients_gin"),
>> + ]
>> diff --git a/api/migrations/__init__.py b/api/migrations/__init__.py
>> index 510b26a..a273050 100644
>> --- a/api/migrations/__init__.py
>> +++ b/api/migrations/__init__.py
>> @@ -8,6 +8,7 @@
>> # This work is licensed under the MIT License. Please see the LICENSE file or
>> # http://opensource.org/licenses/MIT.
>>
>> +from django.db import migrations
>>
>> import json
>> from api import blobs
>> @@ -48,3 +49,17 @@ def set_property(model, name, value, **kwargs):
>> if hasattr(mp, 'blob'):
>> mp.blob = False
>> mp.save()
>> +
>> +
>> +class PostgresOnlyMigration(migrations.Migration):
>> + def apply(self, project_state, schema_editor, collect_sql=False):
>> + if schema_editor.connection.vendor == "postgresql":
>> + return super().apply(project_state, schema_editor, collect_sql=collect_sql)
>> + else:
>> + return project_state
>> +
>> + def unapply(self, project_state, schema_editor, collect_sql=False):
>> + if schema_editor.connection.vendor == "postgresql":
>> + return super().unapply(project_state, schema_editor, collect_sql=collect_sql)
>> + else:
>> + return project_state
>> diff --git a/api/search.py b/api/search.py
>> index 9b41641..488afa1 100644
>> --- a/api/search.py
>> +++ b/api/search.py
>> @@ -10,9 +10,12 @@
>>
>> from .models import Message, MessageProperty, MessageResult, Result, Review
>> from functools import reduce
>> +
>> +from django.db import connection
>> from django.db.models import Q
>>
>> -from django.db.models import Lookup
>> +from django.contrib.postgres.search import SearchQuery, SearchVector
>> +from django.db.models import Lookup, lookups
>> from django.db.models.fields import Field
>>
>>
>> @@ -215,14 +218,15 @@ Search text keyword in the email message. Example:
>> q = Q(date__lte=p)
>> return q
>>
>> - def _make_filter_keywords(self, t):
>> + def _add_to_keywords(self, t):
>> self._last_keywords.append(t)
>> - return Q(subject__icontains=t)
>> + return Q()
>
> Maybe we should simply return None and change all the callers?
Yeah, that's a matter of taste I guess. Returning None from the callers
is okay, but then it has to be filtered higher in the chain. Q() is
effectively a "queryset None".
>> def _make_filter_is(self, cond):
>> if cond == "complete":
>> return Q(is_complete=True)
>> elif cond == "pull":
>> + self._add_to_keywords('PULL')
>> return Q(subject__contains='[PULL') | Q(subject__contains='[GIT PULL')
>
> Why not call self._add_to_keywords three times like this:
>
> for kw in ['PULL', '[PULL', '[GIT PULL']:
> self._add_to_keywords(kw)
Because full-text search on subject uses word stemming (i.e. you search
"testing" and you get "test" but not "retest", so the brackets are
unused anyway.
Paolo
More information about the Patchew-devel
mailing list