[Patchew-devel] [PATCH 1/2] use postgresql full text search

Fam Zheng famz at redhat.com
Tue Nov 20 01:45:55 UTC 2018


On Mon, 11/19 14:42, Paolo Bonzini wrote:
> Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
> ---
>  api/migrations/0040_postgres_fts.py | 26 +++++++++++++++++++++++++
>  api/migrations/__init__.py          | 15 +++++++++++++++
>  api/search.py                       | 30 +++++++++++++++++++++--------
>  patchew/settings.py                 |  3 +++
>  4 files changed, 66 insertions(+), 8 deletions(-)
>  create mode 100644 api/migrations/0040_postgres_fts.py
> 
> diff --git a/api/migrations/0040_postgres_fts.py b/api/migrations/0040_postgres_fts.py
> new file mode 100644
> index 0000000..8d8ab95
> --- /dev/null
> +++ b/api/migrations/0040_postgres_fts.py
> @@ -0,0 +1,25 @@
> +# -*- coding: utf-8 -*-
> +from __future__ import unicode_literals
> +
> +from django.db import migrations, models
> +import django.db.models.deletion
> +from django.contrib.postgres.operations import TrigramExtension
> +
> +from api.migrations import PostgresOnlyMigration
> +
> +
> +class Migration(PostgresOnlyMigration):
> +
> +    dependencies = [
> +        ('api', '0039_fix_project_attr_help_texts'),
> +    ]
> +
> +    operations = [
> +        TrigramExtension(),
> +        migrations.RunSQL("create index api_message_subject_gin on api_message using gin(to_tsvector('english', subject::text));",
> +                          "drop index api_message_subject_gin"),
> +        migrations.RunSQL("create index api_message_sender_gin on api_message using gin(upper(sender) gin_trgm_ops);",
> +                          "drop index api_message_sender_gin"),
> +        migrations.RunSQL("create index api_message_recipients_gin on api_message using gin(upper(recipients) gin_trgm_ops);",
> +                          "drop index api_message_recipients_gin"),
> +    ]
> diff --git a/api/migrations/__init__.py b/api/migrations/__init__.py
> index 510b26a..a273050 100644
> --- a/api/migrations/__init__.py
> +++ b/api/migrations/__init__.py
> @@ -8,6 +8,7 @@
>  # This work is licensed under the MIT License.  Please see the LICENSE file or
>  # http://opensource.org/licenses/MIT.
>  
> +from django.db import migrations
>  
>  import json
>  from api import blobs
> @@ -48,3 +49,17 @@ def set_property(model, name, value, **kwargs):
>      if hasattr(mp, 'blob'):
>          mp.blob = False
>      mp.save()
> +
> +
> +class PostgresOnlyMigration(migrations.Migration):
> +    def apply(self, project_state, schema_editor, collect_sql=False):
> +        if schema_editor.connection.vendor == "postgresql":
> +            return super().apply(project_state, schema_editor, collect_sql=collect_sql)
> +        else:
> +            return project_state
> +
> +    def unapply(self, project_state, schema_editor, collect_sql=False):
> +        if schema_editor.connection.vendor == "postgresql":
> +            return super().unapply(project_state, schema_editor, collect_sql=collect_sql)
> +        else:
> +            return project_state
> diff --git a/api/search.py b/api/search.py
> index 9b41641..488afa1 100644
> --- a/api/search.py
> +++ b/api/search.py
> @@ -10,9 +10,12 @@
>  
>  from .models import Message, MessageProperty, MessageResult, Result, Review
>  from functools import reduce
> +
> +from django.db import connection
>  from django.db.models import Q
>  
> -from django.db.models import Lookup
> +from django.contrib.postgres.search import SearchQuery, SearchVector
> +from django.db.models import Lookup, lookups
>  from django.db.models.fields import Field
>  
>  
> @@ -215,14 +218,15 @@ Search text keyword in the email message. Example:
>              q = Q(date__lte=p)
>          return q
>  
> -    def _make_filter_keywords(self, t):
> +    def _add_to_keywords(self, t):
>          self._last_keywords.append(t)
> -        return Q(subject__icontains=t)
> +        return Q()

Maybe we should simply return None and change all the callers?
>  
>      def _make_filter_is(self, cond):
>          if cond == "complete":
>              return Q(is_complete=True)
>          elif cond == "pull":
> +            self._add_to_keywords('PULL')
>              return Q(subject__contains='[PULL') | Q(subject__contains='[GIT PULL')

Why not call self._add_to_keywords three times like this:

    for kw in ['PULL', '[PULL', '[GIT PULL']:
        self._add_to_keywords(kw)

?

>          elif cond == "reviewed":
>              return self._make_filter_subquery(MessageProperty, Q(name="reviewed", value=True))
> @@ -263,16 +267,16 @@ Search text keyword in the email message. Example:
>              return Q(recipients__icontains=cond)
>          elif term.startswith("subject:"):
>              cond = term[term.find(":") + 1:]
> -            return Q(subject__icontains=cond)
> +            return self._add_to_keywords(cond)
>          elif term.startswith("id:"):
>              cond = term[term.find(":") + 1:]
>              if cond[0] == "<" and cond[-1] == ">":
>                  cond = cond[1:-1]
>              return Q(message_id=cond)
>          elif term.startswith("is:"):
> -            return self._make_filter_is(term[3:]) or self._make_filter_keywords(term)
> +            return self._make_filter_is(term[3:]) or self._add_to_keywords(term)
>          elif term.startswith("not:"):
> -            return ~self._make_filter_is(term[4:]) or self._make_filter_keywords(term)
> +            return ~self._make_filter_is(term[4:]) or self._add_to_keywords(term)
>          elif term.startswith("has:"):
>              cond = term[term.find(":") + 1:]
>              if cond == "replies":
> @@ -305,7 +309,7 @@ Search text keyword in the email message. Example:
>              return Q(project__name=cond) | Q(project__parent_project__name=cond)
>  
>          # Keyword in subject is the default
> -        return self._make_filter_keywords(term)
> +        return self._add_to_keywords(term)
>  
>      def _process_term(self, term, user):
>          """ Return a Q object that will be applied to the query """
> @@ -316,7 +320,7 @@ Search text keyword in the email message. Example:
>              term = term[1:]
>  
>          if is_plusminus and ":" not in term:
> -            q = self._make_filter_is(term) or self._make_filter_keywords(term)
> +            q = self._make_filter_is(term) or self._add_to_keywords(term)
>          else:
>              q = self._make_filter(term, user)
>          if neg:
> @@ -340,4 +344,14 @@ Search text keyword in the email message. Example:
>          )
>          if queryset is None:
>              queryset = Message.objects.series_heads()
> +        if self._last_keywords:
> +            if connection.vendor == 'postgresql':
> +                queryset = queryset.annotate(subjsearch=SearchVector('subject', config='english'))
> +                searchq = reduce(lambda x, y: x & SearchQuery(y,config='english'),
> +                                 self._last_keywords)
> +                q = q & Q(subjsearch=searchq)
> +            else:
> +                q = reduce(lambda x, y: x & Q(subject__icontains=y),
> +                           self._last_keywords, q)
> +
>          return queryset.filter(q)
> diff --git a/patchew/settings.py b/patchew/settings.py
> index e433a9b..b09bba4 100644
> --- a/patchew/settings.py
> +++ b/patchew/settings.py
> @@ -128,6 +128,9 @@ def env_detect():
>  
>  DEBUG, DATA_DIR, DATABASES = env_detect()
>  
> +if DATABASES['default']['ENGINE'] == 'django.db.backends.postgresql':
> +    INSTALLED_APPS += ['django.contrib.postgres']
> +
>  # In production environments, we run in a container, behind nginx, which should
>  # filter the allowed host names. So be a little flexible here
>  ALLOWED_HOSTS = ["*"]
> -- 
> 2.19.1
> 
> 
> _______________________________________________
> Patchew-devel mailing list
> Patchew-devel at redhat.com
> https://www.redhat.com/mailman/listinfo/patchew-devel

Fam




More information about the Patchew-devel mailing list