tournesol-app · lfaucon · Jan 17, 2023 · Feb 20, 2023 · Feb 20, 2023 · Jan 16, 2023
diff --git a/backend/tournesol/suggestions/graph.py b/backend/tournesol/suggestions/graph.py
@@ -16,6 +16,12 @@
 from tournesol.suggestions.suggested_user_video import SuggestedUserVideo
 from tournesol.suggestions.suggested_video import SuggestedVideo
 
+from typing import List
+import logging
+import time
+
+logger = logging.getLogger(__name__)
+
 
 class CompleteGraph:
     _local_poll: Poll
@@ -217,6 +223,7 @@ def compute_offline_parameters(self, scaling_factor_increasing_videos: list[Sugg
         the video scores otherwise
         """
         if self.dirty:
+            begin = time.perf_counter()
             self.dirty = False
             self.build_adjacency_matrix()
             self.build_similarity_matrix()
@@ -238,35 +245,45 @@ def compute_offline_parameters(self, scaling_factor_increasing_videos: list[Sugg
                 .filter(contributor_rating__poll__name=self._local_poll.name)
                 .aggregate(mean=Avg("score"))
             )["mean"] or 0.0
+            logger.debug("Fetch data for offline parameters: %.3fs" % (time.perf_counter() - begin))
 
             self.compute_information_gain(scaling_factor_increasing_videos)
 
-    def compute_information_gain(self, scaling_factor_increasing_videos: list[SuggestedVideo]):
+    def compute_information_gain(self, scaling_factor_increasing_videos: List[SuggestedVideo]):
         """
         Function used to compute the estimated information gain
         """
         # First try to increase the scaling accuracy of the user if necessary
         scale_uncertainty = self.local_user_scaling.scale_uncertainty
+        if scale_uncertainty is None:
+            scale_uncertainty = 1
         translation_uncertainty = self.local_user_scaling.translation_uncertainty
+        if translation_uncertainty is None:
+            translation_uncertainty = 0
 
         weighted_scaling_uncertainty = scale_uncertainty * self.local_user_mean
         actual_scaling_uncertainty = weighted_scaling_uncertainty + translation_uncertainty
 
+        # For a new user with a small number of comparisons
+        logger.debug(f"{len(self.nodes)=}")
+        logger.debug(f"{actual_scaling_uncertainty=}, {self.MIN_SCALING_ACCURACY=}")
         if actual_scaling_uncertainty > self.MIN_SCALING_ACCURACY or len(self.nodes) == 0:
-            for va in self._nodes:
-                for vb in self._nodes:
-                    if (
-                            va in scaling_factor_increasing_videos
-                            and vb in scaling_factor_increasing_videos
-                    ):
-                        va.video1_score = 1
-                        va._graph_sparsity_score[vb] = 1
-                    else:
-                        va.video1_score = 0
-                        va._graph_sparsity_score[vb] = 0
+            logger.debug(f"Number of nodes to traverse {len(self._nodes)}")
+            logger.debug(f"Number of nodes in scaling_factor_increasing_videos {len(scaling_factor_increasing_videos)}")
+            nodes_per_uid = {n.uid: n for n in self._nodes}
+            begin = time.perf_counter()
+            for va in scaling_factor_increasing_videos:
+                if va.uid in nodes_per_uid:
+                    nodes_per_uid[va.uid].video1_score = 1
+                    for vb in scaling_factor_increasing_videos:
+                        if vb.uid in nodes_per_uid:
+                            nodes_per_uid[va.uid]._graph_sparsity_score[nodes_per_uid[vb.uid]] = 1
+            logger.debug("Went through all pairs of nodes: %.3fs" % (time.perf_counter() - begin))
+
         # Once the scaling factor is high enough, check what video should gain
         # information being compared by the user
         else:
+            logger.debug("There")
             sub_graphs = self.find_connected_sub_graphs()
             if len(sub_graphs) == 1:
                 sub_graphs = [self]

diff --git a/backend/tournesol/suggestions/suggested_user_video.py b/backend/tournesol/suggestions/suggested_user_video.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from collections import defaultdict
+
 from tournesol.suggestions.suggested_user import SuggestedUser
 from tournesol.suggestions.suggested_video import SuggestedVideo
 
@@ -13,7 +15,7 @@ def __init__(
             local_user: SuggestedUser
     ):
         super().__init__()
-        self._graph_sparsity_score = {}
+        self._graph_sparsity_score = defaultdict(int)
         self.uid = parent.uid
         self.nb_comparison_with = parent.nb_comparison_with
         self.local_user = local_user

diff --git a/backend/tournesol/suggestions/suggestionprovider.py b/backend/tournesol/suggestions/suggestionprovider.py
@@ -1,13 +1,18 @@
+import logging
+import time
 from typing import Optional
 
+import numpy as np
 from django.db.models import F, QuerySet
 
 from core.models import User
 from tournesol.models import Comparison, ComparisonCriteriaScore, Entity, Poll
 from tournesol.suggestions.graph import CompleteGraph, Graph
-from tournesol.suggestions.suggested_user import SuggestedUser as RecommendationUser
+from tournesol.suggestions.suggested_user import SuggestedUser
 from tournesol.suggestions.suggested_video import SuggestedVideo
 
+logger = logging.getLogger(__name__)
+
 
 class SuggestionProvider:
     """
@@ -82,12 +87,16 @@ def _get_user_comparability_augmenting_videos(self) -> list[SuggestedVideo]:
             for entity_uid in comparison
         )
 
-        return [
+        supertursted_compared_entities = [
             self._entity_to_video[uid]
             for uid in supertursted_compared_entities
             if self._entity_to_video.get(uid)
         ]
 
+        np.random.shuffle(supertursted_compared_entities)
+
+        return supertursted_compared_entities[:100]
+
     def _get_user_rate_later_video_list(self, user: User) -> list[SuggestedVideo]:
         """
         Function to get the list of videos of the user's rate later list
@@ -119,7 +128,7 @@ def register_new_user(self, new_user: User):
         Function used to register a new user wanting suggestions, it thus initializes its
         comparison graph
         """
-        recommendation_user = RecommendationUser(
+        recommendation_user = SuggestedUser(
             self._entity_to_video, new_user, self.criteria, self.poll
         )
         self._user_specific_graphs[new_user.id] = Graph(
@@ -166,22 +175,35 @@ def get_first_video_recommendation(
         """
         # Lazily load the user graph
         if user.id not in self._user_specific_graphs:
+            begin = time.perf_counter()
             self.register_new_user(user)
+            logger.debug("Registered new user: %.3fs" % (time.perf_counter() - begin))
         result = []
 
         # Give the first video id to the graph so the sorting will take that into account
         user_graph = self._user_specific_graphs[user.id]
-        user_graph.compute_offline_parameters(self._get_user_comparability_augmenting_videos())
+        begin = time.perf_counter()
+        scaling_factor_increasing_videos = self._get_user_comparability_augmenting_videos()
+        user_graph.compute_offline_parameters(scaling_factor_increasing_videos)
+        logger.debug("Offline parameters for user graph: %.3fs" % (time.perf_counter() - begin))
+        begin = time.perf_counter()
         self._complete_graph.compute_offline_parameters()
+        logger.debug("Offline parameters for complete graph: %.3fs" % (time.perf_counter() - begin))
 
         # Prepare the set of videos to sort, taking the videos present in the graph
         # and append the ones that are not yet compared by the user
         considered_vid_list = self._prepare_video_list(user, None)
 
+        # TODO Get rate later videos for the user
+        rate_later = []
+
         max_vid_pref = 0
         # Todo : take into account the rate later list / already seen videos ?
         for v in considered_vid_list:
             v.user_pref = max(v.nb_comparison_with.values()) / v.comparison_nb
+            v.user_pref += v.score/100
+            if v in rate_later:
+                v.user_pref += 0.5
             if v.user_pref > max_vid_pref:
                 max_vid_pref = v.user_pref
 
@@ -224,9 +246,15 @@ def get_second_video_recommendation(
         # the ones that are not yet compared by the user
         considered_vid_list = self._prepare_video_list(user, first_video)
 
+        # TODO Get rate later videos for the user
+        rate_later = []
+
         max_vid_pref = 0
         for v in considered_vid_list:
             v.user_pref = v.nb_comparison_with[first_video_id] / v.comparison_nb
+            v.user_pref += v.score/100
+            if v in rate_later:
+                v.user_pref += 0.5
             if v.user_pref > max_vid_pref:
                 max_vid_pref = v.user_pref
 

diff --git a/backend/tournesol/views/entities_to_compare.py b/backend/tournesol/views/entities_to_compare.py
@@ -1,3 +1,6 @@
+import logging
+import time
+
 from drf_spectacular.types import OpenApiTypes
 from drf_spectacular.utils import OpenApiParameter, extend_schema, extend_schema_view
 from rest_framework.exceptions import ValidationError
@@ -10,6 +13,7 @@
 from tournesol.suggestions.suggester_store import SuggesterStore
 from tournesol.views import PollScopedViewMixin
 
+logger = logging.getLogger(__name__)
 
 @extend_schema_view(
     get=extend_schema(
@@ -28,25 +32,30 @@ class EntitiesToCompareView(PollScopedViewMixin, ListAPIView):
     serializer_class = EntityNoExtraFieldSerializer
 
     def list(self, request, *args, **kwargs):
+        begin = time.perf_counter()
         poll = self.poll_from_url
+
+
         if poll.name != DEFAULT_POLL_NAME:
             raise ValidationError({"detail": "only poll 'videos' is supported"})
 
         user = self.request.user
         suggester = SuggesterStore.actual_store.get_suggester(poll)
+        logger.debug("Built SuggesterStore: %.3fs" % (time.perf_counter() - begin))
 
         opt_first_entity = self.request.query_params.get("first_entity_uid")
         limit = int(self.request.query_params.get("limit", 10))
         if opt_first_entity is None:
+            begin = time.perf_counter()
+            logger.debug("Start getting first video")
             suggestions = suggester.get_first_video_recommendation(user, limit)
+            logger.debug("Got first video: %.3fs" % (time.perf_counter() - begin))
+
         else:
             suggestions = suggester.get_second_video_recommendation(user, opt_first_entity, limit)
 
-        entities = {
-            e.uid: e
-            for e in Entity.objects.filter(
-                uid__in=(s.uid for s in suggestions)
-            )
-        }
-        ser = self.get_serializer([entities[s.uid] for s in suggestions], many=True)
+
+        sorted_entity_ids = [s.uid for s in suggestions]
+        entities = {e.uid: e for e in Entity.objects.filter(uid__in=sorted_entity_ids)}
+        ser = self.get_serializer([entities[e_id] for e_id in sorted_entity_ids], many=True)
         return Response({"results": ser.data})
diff --git a/infra/ansible/roles/monitoring/tasks/main.yml b/infra/ansible/roles/monitoring/tasks/main.yml
@@ -168,12 +168,12 @@
 
 - name: Add Grafana repository signing key
   apt_key:
-    url: https://packages.grafana.com/gpg.key
+    url: https://apt.grafana.com/gpg.key
     state: present
 
 - name: Add Grafana repository
   apt_repository:
-    repo: deb https://packages.grafana.com/oss/deb stable main
+    repo: deb https://apt.grafana.com stable main
     state: present
 
 - name: Gather the package facts