Refactor and improve resolve_discussion_suggestions

Main reason for this was to keep the most important content suggestions first. E.g. the user's own school should always be the first school that we're suggesting.
skoleapp · Feb 11, 2021 · 95e2cf7 · 95e2cf7
1 parent ab62394
commit 95e2cf7
Show file tree

Hide file tree

Showing 4 changed files with 115 additions and 68 deletions.
diff --git a/.idea/dictionaries/eero.xml b/.idea/dictionaries/eero.xml
diff --git a/skole/schemas/comment.py b/skole/schemas/comment.py
@@ -21,6 +21,7 @@
 from skole.types import ID, ResolveInfo
 from skole.utils.constants import Messages
 from skole.utils.pagination import get_paginator
+from skole.utils.shortcuts import join_queries
 
 
 class CommentObjectType(VoteMixin, DjangoObjectType):
@@ -184,88 +185,63 @@ def resolve_discussion_suggestions(
         country = getattr(city, "country", None)
         cut = settings.DISCUSSION_SUGGESTIONS_COUNT // 3
 
-        # Include schools that:
-        # - Is the user's school.
-        # - Have been commented by the user.
-        # - Have reply comments from the user.
-        schools = (
-            School.objects.filter(
-                Q(users=user)
-                | Q(comments__user=user)
-                | Q(comments__reply_comments__user=user)
-            )
-            .order_by("-comment_count")
-            .distinct()
+        # Note: the different Q object arguments passed to `join_queries` contain
+        # different priority filterings. E.g. `Q(users=user)` on the schools query is
+        # the most important one and its results should always appear first.
+
+        # Include:
+        # - Include the the user's school.
+        # - Schools that have been commented by the user or have reply comments from the user.
+        # - The best schools from the user's city.
+        # - The best schools from the user's country.
+        schools = join_queries(
+            School,
+            Q(users=user),
+            Q(comments__user=user) | Q(comments__reply_comments__user=user),
+            *([Q(city=city)] if city else []),
+            *([Q(city__country=country)] if country else []),
+            order_by=["-comment_count"],
         )
 
-        # If there are not enough schools in the queryset, add the best schools from the user's city.
-        if len(schools) < cut:
-            schools = schools.union(
-                School.objects.filter(city=city).order_by("-comment_count")
-            ).distinct()
-
-        # If there are still not enough schools, do the same based on the user's country.
-        if len(schools) < cut:
-            schools = schools.union(
-                School.objects.filter(city__country=country).order_by("-comment_count")
-            ).distinct()
-
         # Include courses that:
         # - Are created by the user.
         # - Have been starred by the user.
         # - Have been voted by the user.
         # - Have been commented by the user.
         # - Have reply comments from the user.
         # - Have resources added by the user.
-        courses = (
-            Course.objects.filter(
-                Q(user=user)
-                | Q(stars__user=user)
-                | Q(votes__user=user)
-                | Q(comments__user=user)
-                | Q(comments__reply_comments__user=user)
-                | Q(resources__user=user)
-            )
-            .order_by("-score", "-resource_count", "-comment_count")
-            .distinct()
+        courses = join_queries(
+            Course,
+            Q(user=user)
+            | Q(stars__user=user)
+            | Q(votes__user=user)
+            | Q(comments__user=user)
+            | Q(comments__reply_comments__user=user)
+            | Q(resources__user=user),
+            *([Q(subjects=user.subject)] if user.subject else []),
+            order_by=["-score", "-resource_count", "-comment_count"],
         )
 
-        # If there are not enough courses in the queryset, add the best courses from the user's subject.
-        if len(courses) < cut:
-            courses = courses.union(
-                Course.objects.filter(subjects=user.subject).order_by(
-                    "-score", "-resource_count", "-comment_count"
-                )
-            ).distinct()
-
         # Include resources that:
         # - Are created by the user.
         # - Have their course created by the user.
         # - Have been starred by the user.
         # - Have been voted by the user.
         # - Have been commented by the user.
         # - Have reply comments from the user.
-        resources = (
-            Resource.objects.filter(
-                Q(user=user)
-                | Q(course__user=user)
-                | Q(stars__user=user)
-                | Q(votes__user=user)
-                | Q(comments__user=user)
-                | Q(comments__reply_comments__user=user)
-            )
-            .order_by("-score", "-comment_count")
-            .distinct()
+        # - The best resources from the user's subject.
+        resources = join_queries(
+            Resource,
+            Q(user=user)
+            | Q(course__user=user)
+            | Q(stars__user=user)
+            | Q(votes__user=user)
+            | Q(comments__user=user)
+            | Q(comments__reply_comments__user=user),
+            *([Q(course__subjects=user.subject)] if user.subject else []),
+            order_by=["-score", "-comment_count"],
         )
 
-        # If there are not enough resources in the queryset, add the best resources from the user's subject.
-        if len(resources) < cut:
-            resources = resources.union(
-                Resource.objects.filter(course__subjects=user.subject).order_by(
-                    "-score", "-comment_count"
-                )
-            ).distinct()
-
         return [*schools[:cut], *courses[:cut], *resources[:cut]]
 
 

diff --git a/skole/tests/schemas/test_comment.py b/skole/tests/schemas/test_comment.py
@@ -1,6 +1,7 @@
 from typing import Optional
 
 from django.conf import settings
+from django.test import override_settings
 
 from skole.models import Comment, Course, Resource, School
 from skole.tests.helpers import (
@@ -139,7 +140,7 @@ def query_discussion_suggestions(
         self,
         *,
         assert_error: bool = False,
-    ) -> JsonDict:
+    ) -> list[JsonDict]:
         # language=GraphQL
         graphql = """
             query DiscussionSuggestions {
@@ -511,11 +512,26 @@ def test_discussion(self) -> None:
     def test_discussion_suggestions(self) -> None:
         res = self.query_discussion_suggestions()
         assert len(res) <= settings.DISCUSSION_SUGGESTIONS_COUNT
+        # User's own school should be the first suggestion.
+        # Ignore: Mypy doesn't cannot know that `user.school` is not None here.
+        assert res[0]["id"] == str(self.get_authenticated_user().school.pk)  # type: ignore[union-attr]
+        assert res[0]["name"] == "University of Turku"
+        assert res[1]["name"] == "Turku University of Applied Sciences"
+        assert res[2]["courseName"] == "Test Engineering Course 1"
+        # A few courses and resources are tied for the second place, since we don't
+        # order the tied scores base d on the primary keys (to reduce query overhead),
+        # the order of the results is not exactly stable and we can't test for exact match
+        # here. It's just important that the second result is distinct from the first.
+        assert res[3]["courseName"] != "Test Engineering Course 1"
+        assert res[4]["title"] == "Sample Exam 1"
+        assert res[5]["title"] != "Sample Exam 1"
+
+        # Check that no duplicates are ever returned.
+        with override_settings(DISCUSSION_SUGGESTIONS_COUNT=1000):
+            res = self.query_discussion_suggestions()
+            assert len(res) == len({tuple(elem.items()) for elem in res})
 
         # Test that anonymous users cannot query discussion suggestions.
-
         self.authenticated_user = None
         res = self.query_discussion_suggestions(assert_error=True)
         assert "permission" in get_graphql_error(res)
-
-        # TODO: Test the remaining cases for the discussion suggestions.
diff --git a/skole/utils/shortcuts.py b/skole/utils/shortcuts.py
@@ -1,7 +1,19 @@
+from collections.abc import Iterable
 from typing import Any, TypeVar
 
 from django import forms
-from django.db.models import Func, Model, OuterRef, QuerySet, Subquery
+from django.db.models import (
+    Case,
+    Func,
+    IntegerField,
+    Model,
+    OuterRef,
+    Q,
+    QuerySet,
+    Subquery,
+    Value,
+    When,
+)
 
 from skole.types import FormError, JsonDict
 from skole.utils.constants import ValidationErrors
@@ -91,6 +103,47 @@ def safe_annotation(qs: QuerySet[Any], expression: Func) -> Subquery:
     Return a subquery that can be safely used in an `annotate()` call with mixed `Sum`
     and `Count` expressions.
 
-    More info: https://stackoverflow.com/a/56619484/9835872
+    References:
+        https://stackoverflow.com/a/56619484/9835872
     """
     return Subquery(qs.annotate(res=expression).filter(pk=OuterRef("pk")).values("res"))
+
+
+def join_queries(
+    model: type[M], *expressions: Q, order_by: Iterable[str] = ()
+) -> QuerySet[M]:
+    """
+    Join queries while preserving their individual order.
+
+    If one would want to join the queries:
+    `Foo.objects.filter(name="foo")` and `Foo.objects.filter(name="bar")` so that the
+    objects from the first query would be at the start of the queryset and the objects
+    from the second query would at the end of the queryset, one could use:
+    `join_querysets(Foo, Q(name="foo"), Q(name="bar"))`
+
+    References:
+        https://stackoverflow.com/q/38583295/9835872
+    """
+    if isinstance(order_by, str):
+        raise TypeError(
+            "`order_by` should be an iterable of strings, and not a string."
+        )
+
+    def values_pk(expr: Q) -> QuerySet[M]:
+        return model.objects.filter(expr).values("pk")
+
+    when_cases = (
+        When(pk__in=values_pk(expr), then=Value(i))
+        for i, expr in enumerate(expressions)
+    )
+
+    return (
+        model.objects.annotate(
+            grouped_ordering=Case(
+                *when_cases,
+                output_field=IntegerField(),
+            ),
+        )
+        .exclude(grouped_ordering__isnull=True)
+        .order_by("grouped_ordering", *order_by)
+    )