-
Notifications
You must be signed in to change notification settings - Fork 521
🚸(backend) sort user search results by proximity with the active user #1802
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| """ | ||
| Unit tests for the users_sharing_documents_with utility function. | ||
| """ | ||
|
|
||
| from django.utils import timezone | ||
|
|
||
| import pytest | ||
|
|
||
| from core import factories, utils | ||
|
|
||
| pytestmark = pytest.mark.django_db | ||
|
|
||
|
|
||
| def test_utils_users_sharing_documents_with(): | ||
| """Test users_sharing_documents_with function.""" | ||
|
|
||
| user = factories.UserFactory( | ||
| email="martin.bernard@anct.gouv.fr", full_name="Martin Bernard" | ||
| ) | ||
|
|
||
| pierre_1 = factories.UserFactory( | ||
| email="pierre.dupont@beta.gouv.fr", full_name="Pierre Dupont" | ||
| ) | ||
| pierre_2 = factories.UserFactory( | ||
| email="pierre.durand@impots.gouv.fr", full_name="Pierre Durand" | ||
| ) | ||
|
|
||
| now = timezone.now() | ||
| yesterday = now - timezone.timedelta(days=1) | ||
| last_week = now - timezone.timedelta(days=7) | ||
| last_month = now - timezone.timedelta(days=30) | ||
|
|
||
| document_1 = factories.DocumentFactory(creator=user) | ||
| document_2 = factories.DocumentFactory(creator=user) | ||
| document_3 = factories.DocumentFactory(creator=user) | ||
|
|
||
| factories.UserDocumentAccessFactory(user=user, document=document_1) | ||
| factories.UserDocumentAccessFactory(user=user, document=document_2) | ||
| factories.UserDocumentAccessFactory(user=user, document=document_3) | ||
|
|
||
| # The factory cannot set the created_at directly, so we force it after creation | ||
| doc_1_pierre_1 = factories.UserDocumentAccessFactory( | ||
| user=pierre_1, document=document_1, created_at=last_week | ||
| ) | ||
| doc_1_pierre_1.created_at = last_week | ||
| doc_1_pierre_1.save() | ||
| doc_2_pierre_2 = factories.UserDocumentAccessFactory( | ||
| user=pierre_2, document=document_2 | ||
| ) | ||
| doc_2_pierre_2.created_at = last_month | ||
| doc_2_pierre_2.save() | ||
|
|
||
| doc_3_pierre_2 = factories.UserDocumentAccessFactory( | ||
| user=pierre_2, document=document_3 | ||
| ) | ||
| doc_3_pierre_2.created_at = yesterday | ||
| doc_3_pierre_2.save() | ||
|
|
||
| shared_map = utils.users_sharing_documents_with(user) | ||
|
|
||
| assert shared_map == { | ||
| pierre_1.id: last_week, | ||
| pierre_2.id: yesterday, | ||
| } |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -4,10 +4,14 @@ | |||||
| import re | ||||||
| from collections import defaultdict | ||||||
|
|
||||||
| from django.core.exceptions import ValidationError | ||||||
| from django.core.validators import validate_email | ||||||
| from django.db import models as db | ||||||
|
|
||||||
| import pycrdt | ||||||
| from bs4 import BeautifulSoup | ||||||
|
|
||||||
| from core import enums | ||||||
| from core import enums, models | ||||||
|
|
||||||
|
|
||||||
| def get_ancestor_to_descendants_map(paths, steplen): | ||||||
|
|
@@ -96,3 +100,39 @@ def extract_attachments(content): | |||||
|
|
||||||
| xml_content = base64_yjs_to_xml(content) | ||||||
| return re.findall(enums.MEDIA_STORAGE_URL_EXTRACT, xml_content) | ||||||
|
|
||||||
|
|
||||||
| def users_sharing_documents_with(user): | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a bit afraid of this, because on each search query, you will load all user accesses, I don't have a better proposition and maybe we should let it like that but we should at least add a "timer" log to be able to see if this takes too much time. Maybe a cache could also help? |
||||||
| """ | ||||||
| Returns a map of users sharing documents with the given user, | ||||||
| sorted by last shared date. | ||||||
| """ | ||||||
|
|
||||||
| user_docs_qs = models.DocumentAccess.objects.filter(user=user).values_list( | ||||||
| "document_id", flat=True | ||||||
| ) | ||||||
| shared_qs = ( | ||||||
| models.DocumentAccess.objects.filter(document_id__in=user_docs_qs) | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would using a Subquery be better here? (I mean, to prevent data from being passed to Django for nothing) |
||||||
| .exclude(user=user) | ||||||
| .values("user") | ||||||
| .annotate(last_shared=db.Max("created_at")) | ||||||
| ) | ||||||
| return {item["user"]: item["last_shared"] for item in shared_qs} | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
I would suggest to use an iterator here. |
||||||
|
|
||||||
|
|
||||||
| def extract_email_domain_parts(email): | ||||||
| """ | ||||||
| Extracts the full domain and partial domain from an email address as a tuple. | ||||||
| The partial domain consists of the last two segments of the domain, eg. "gouv.fr". | ||||||
|
|
||||||
| If the email is invalid (eg, is an empty string), returns empty strings. | ||||||
| """ | ||||||
| try: | ||||||
| validate_email(email) | ||||||
| except ValidationError: | ||||||
| return "", "" | ||||||
|
|
||||||
| domain = email.split("@", 1)[1].lower() | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Such split is not "safe", the safe way is using Note: django-lasuite already provides a |
||||||
| parts = domain.split(".") | ||||||
| partial_domain = ".".join(parts[-2:]) if len(parts) >= 2 else domain | ||||||
| return domain, partial_domain | ||||||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe create an issue now in django-lasuite so we don't forget to upstream it as it will have to be shared with all our apps once it has proven its efficiency?