Skip to content
This repository has been archived by the owner on Sep 13, 2021. It is now read-only.

Commit

Permalink
Refactor and improve resolve_discussion_suggestions
Browse files Browse the repository at this point in the history
Main reason for this was to keep the most important content suggestions
first. E.g. the user's own school should always be the first school that
we're suggesting.
  • Loading branch information
ruohola committed Feb 11, 2021
1 parent ab62394 commit 95e2cf7
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 68 deletions.
2 changes: 2 additions & 0 deletions .idea/dictionaries/eero.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

100 changes: 38 additions & 62 deletions skole/schemas/comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from skole.types import ID, ResolveInfo
from skole.utils.constants import Messages
from skole.utils.pagination import get_paginator
from skole.utils.shortcuts import join_queries


class CommentObjectType(VoteMixin, DjangoObjectType):
Expand Down Expand Up @@ -184,88 +185,63 @@ def resolve_discussion_suggestions(
country = getattr(city, "country", None)
cut = settings.DISCUSSION_SUGGESTIONS_COUNT // 3

# Include schools that:
# - Is the user's school.
# - Have been commented by the user.
# - Have reply comments from the user.
schools = (
School.objects.filter(
Q(users=user)
| Q(comments__user=user)
| Q(comments__reply_comments__user=user)
)
.order_by("-comment_count")
.distinct()
# Note: the different Q object arguments passed to `join_queries` contain
# different priority filterings. E.g. `Q(users=user)` on the schools query is
# the most important one and its results should always appear first.

# Include:
# - Include the the user's school.
# - Schools that have been commented by the user or have reply comments from the user.
# - The best schools from the user's city.
# - The best schools from the user's country.
schools = join_queries(
School,
Q(users=user),
Q(comments__user=user) | Q(comments__reply_comments__user=user),
*([Q(city=city)] if city else []),
*([Q(city__country=country)] if country else []),
order_by=["-comment_count"],
)

# If there are not enough schools in the queryset, add the best schools from the user's city.
if len(schools) < cut:
schools = schools.union(
School.objects.filter(city=city).order_by("-comment_count")
).distinct()

# If there are still not enough schools, do the same based on the user's country.
if len(schools) < cut:
schools = schools.union(
School.objects.filter(city__country=country).order_by("-comment_count")
).distinct()

# Include courses that:
# - Are created by the user.
# - Have been starred by the user.
# - Have been voted by the user.
# - Have been commented by the user.
# - Have reply comments from the user.
# - Have resources added by the user.
courses = (
Course.objects.filter(
Q(user=user)
| Q(stars__user=user)
| Q(votes__user=user)
| Q(comments__user=user)
| Q(comments__reply_comments__user=user)
| Q(resources__user=user)
)
.order_by("-score", "-resource_count", "-comment_count")
.distinct()
courses = join_queries(
Course,
Q(user=user)
| Q(stars__user=user)
| Q(votes__user=user)
| Q(comments__user=user)
| Q(comments__reply_comments__user=user)
| Q(resources__user=user),
*([Q(subjects=user.subject)] if user.subject else []),
order_by=["-score", "-resource_count", "-comment_count"],
)

# If there are not enough courses in the queryset, add the best courses from the user's subject.
if len(courses) < cut:
courses = courses.union(
Course.objects.filter(subjects=user.subject).order_by(
"-score", "-resource_count", "-comment_count"
)
).distinct()

# Include resources that:
# - Are created by the user.
# - Have their course created by the user.
# - Have been starred by the user.
# - Have been voted by the user.
# - Have been commented by the user.
# - Have reply comments from the user.
resources = (
Resource.objects.filter(
Q(user=user)
| Q(course__user=user)
| Q(stars__user=user)
| Q(votes__user=user)
| Q(comments__user=user)
| Q(comments__reply_comments__user=user)
)
.order_by("-score", "-comment_count")
.distinct()
# - The best resources from the user's subject.
resources = join_queries(
Resource,
Q(user=user)
| Q(course__user=user)
| Q(stars__user=user)
| Q(votes__user=user)
| Q(comments__user=user)
| Q(comments__reply_comments__user=user),
*([Q(course__subjects=user.subject)] if user.subject else []),
order_by=["-score", "-comment_count"],
)

# If there are not enough resources in the queryset, add the best resources from the user's subject.
if len(resources) < cut:
resources = resources.union(
Resource.objects.filter(course__subjects=user.subject).order_by(
"-score", "-comment_count"
)
).distinct()

return [*schools[:cut], *courses[:cut], *resources[:cut]]


Expand Down
24 changes: 20 additions & 4 deletions skole/tests/schemas/test_comment.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Optional

from django.conf import settings
from django.test import override_settings

from skole.models import Comment, Course, Resource, School
from skole.tests.helpers import (
Expand Down Expand Up @@ -139,7 +140,7 @@ def query_discussion_suggestions(
self,
*,
assert_error: bool = False,
) -> JsonDict:
) -> list[JsonDict]:
# language=GraphQL
graphql = """
query DiscussionSuggestions {
Expand Down Expand Up @@ -511,11 +512,26 @@ def test_discussion(self) -> None:
def test_discussion_suggestions(self) -> None:
res = self.query_discussion_suggestions()
assert len(res) <= settings.DISCUSSION_SUGGESTIONS_COUNT
# User's own school should be the first suggestion.
# Ignore: Mypy doesn't cannot know that `user.school` is not None here.
assert res[0]["id"] == str(self.get_authenticated_user().school.pk) # type: ignore[union-attr]
assert res[0]["name"] == "University of Turku"
assert res[1]["name"] == "Turku University of Applied Sciences"
assert res[2]["courseName"] == "Test Engineering Course 1"
# A few courses and resources are tied for the second place, since we don't
# order the tied scores base d on the primary keys (to reduce query overhead),
# the order of the results is not exactly stable and we can't test for exact match
# here. It's just important that the second result is distinct from the first.
assert res[3]["courseName"] != "Test Engineering Course 1"
assert res[4]["title"] == "Sample Exam 1"
assert res[5]["title"] != "Sample Exam 1"

# Check that no duplicates are ever returned.
with override_settings(DISCUSSION_SUGGESTIONS_COUNT=1000):
res = self.query_discussion_suggestions()
assert len(res) == len({tuple(elem.items()) for elem in res})

# Test that anonymous users cannot query discussion suggestions.

self.authenticated_user = None
res = self.query_discussion_suggestions(assert_error=True)
assert "permission" in get_graphql_error(res)

# TODO: Test the remaining cases for the discussion suggestions.
57 changes: 55 additions & 2 deletions skole/utils/shortcuts.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
from collections.abc import Iterable
from typing import Any, TypeVar

from django import forms
from django.db.models import Func, Model, OuterRef, QuerySet, Subquery
from django.db.models import (
Case,
Func,
IntegerField,
Model,
OuterRef,
Q,
QuerySet,
Subquery,
Value,
When,
)

from skole.types import FormError, JsonDict
from skole.utils.constants import ValidationErrors
Expand Down Expand Up @@ -91,6 +103,47 @@ def safe_annotation(qs: QuerySet[Any], expression: Func) -> Subquery:
Return a subquery that can be safely used in an `annotate()` call with mixed `Sum`
and `Count` expressions.
More info: https://stackoverflow.com/a/56619484/9835872
References:
https://stackoverflow.com/a/56619484/9835872
"""
return Subquery(qs.annotate(res=expression).filter(pk=OuterRef("pk")).values("res"))


def join_queries(
model: type[M], *expressions: Q, order_by: Iterable[str] = ()
) -> QuerySet[M]:
"""
Join queries while preserving their individual order.
If one would want to join the queries:
`Foo.objects.filter(name="foo")` and `Foo.objects.filter(name="bar")` so that the
objects from the first query would be at the start of the queryset and the objects
from the second query would at the end of the queryset, one could use:
`join_querysets(Foo, Q(name="foo"), Q(name="bar"))`
References:
https://stackoverflow.com/q/38583295/9835872
"""
if isinstance(order_by, str):
raise TypeError(
"`order_by` should be an iterable of strings, and not a string."
)

def values_pk(expr: Q) -> QuerySet[M]:
return model.objects.filter(expr).values("pk")

when_cases = (
When(pk__in=values_pk(expr), then=Value(i))
for i, expr in enumerate(expressions)
)

return (
model.objects.annotate(
grouped_ordering=Case(
*when_cases,
output_field=IntegerField(),
),
)
.exclude(grouped_ordering__isnull=True)
.order_by("grouped_ordering", *order_by)
)

0 comments on commit 95e2cf7

Please sign in to comment.