Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wip: perf_counter logs and fix cubic complexity in compute_information_gain #1290

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 29 additions & 12 deletions backend/tournesol/suggestions/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@
from tournesol.suggestions.suggested_user_video import SuggestedUserVideo
from tournesol.suggestions.suggested_video import SuggestedVideo

from typing import List
import logging
import time

logger = logging.getLogger(__name__)


class CompleteGraph:
_local_poll: Poll
Expand Down Expand Up @@ -217,6 +223,7 @@ def compute_offline_parameters(self, scaling_factor_increasing_videos: list[Sugg
the video scores otherwise
"""
if self.dirty:
begin = time.perf_counter()
self.dirty = False
self.build_adjacency_matrix()
self.build_similarity_matrix()
Expand All @@ -238,35 +245,45 @@ def compute_offline_parameters(self, scaling_factor_increasing_videos: list[Sugg
.filter(contributor_rating__poll__name=self._local_poll.name)
.aggregate(mean=Avg("score"))
)["mean"] or 0.0
logger.debug("Fetch data for offline parameters: %.3fs" % (time.perf_counter() - begin))

self.compute_information_gain(scaling_factor_increasing_videos)

def compute_information_gain(self, scaling_factor_increasing_videos: list[SuggestedVideo]):
def compute_information_gain(self, scaling_factor_increasing_videos: List[SuggestedVideo]):
"""
Function used to compute the estimated information gain
"""
# First try to increase the scaling accuracy of the user if necessary
scale_uncertainty = self.local_user_scaling.scale_uncertainty
if scale_uncertainty is None:
scale_uncertainty = 1
translation_uncertainty = self.local_user_scaling.translation_uncertainty
if translation_uncertainty is None:
translation_uncertainty = 0

weighted_scaling_uncertainty = scale_uncertainty * self.local_user_mean
actual_scaling_uncertainty = weighted_scaling_uncertainty + translation_uncertainty

# For a new user with a small number of comparisons
logger.debug(f"{len(self.nodes)=}")
logger.debug(f"{actual_scaling_uncertainty=}, {self.MIN_SCALING_ACCURACY=}")
if actual_scaling_uncertainty > self.MIN_SCALING_ACCURACY or len(self.nodes) == 0:
for va in self._nodes:
for vb in self._nodes:
if (
va in scaling_factor_increasing_videos
and vb in scaling_factor_increasing_videos
):
va.video1_score = 1
va._graph_sparsity_score[vb] = 1
else:
va.video1_score = 0
va._graph_sparsity_score[vb] = 0
logger.debug(f"Number of nodes to traverse {len(self._nodes)}")
logger.debug(f"Number of nodes in scaling_factor_increasing_videos {len(scaling_factor_increasing_videos)}")
nodes_per_uid = {n.uid: n for n in self._nodes}
begin = time.perf_counter()
for va in scaling_factor_increasing_videos:
if va.uid in nodes_per_uid:
nodes_per_uid[va.uid].video1_score = 1
for vb in scaling_factor_increasing_videos:
if vb.uid in nodes_per_uid:
nodes_per_uid[va.uid]._graph_sparsity_score[nodes_per_uid[vb.uid]] = 1
logger.debug("Went through all pairs of nodes: %.3fs" % (time.perf_counter() - begin))

# Once the scaling factor is high enough, check what video should gain
# information being compared by the user
else:
logger.debug("There")
sub_graphs = self.find_connected_sub_graphs()
if len(sub_graphs) == 1:
sub_graphs = [self]
Expand Down
4 changes: 3 additions & 1 deletion backend/tournesol/suggestions/suggested_user_video.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

from collections import defaultdict

from tournesol.suggestions.suggested_user import SuggestedUser
from tournesol.suggestions.suggested_video import SuggestedVideo

Expand All @@ -13,7 +15,7 @@ def __init__(
local_user: SuggestedUser
):
super().__init__()
self._graph_sparsity_score = {}
self._graph_sparsity_score = defaultdict(int)
self.uid = parent.uid
self.nb_comparison_with = parent.nb_comparison_with
self.local_user = local_user
Expand Down
36 changes: 32 additions & 4 deletions backend/tournesol/suggestions/suggestionprovider.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
import logging
import time
from typing import Optional

import numpy as np
from django.db.models import F, QuerySet

from core.models import User
from tournesol.models import Comparison, ComparisonCriteriaScore, Entity, Poll
from tournesol.suggestions.graph import CompleteGraph, Graph
from tournesol.suggestions.suggested_user import SuggestedUser as RecommendationUser
from tournesol.suggestions.suggested_user import SuggestedUser
from tournesol.suggestions.suggested_video import SuggestedVideo

logger = logging.getLogger(__name__)


class SuggestionProvider:
"""
Expand Down Expand Up @@ -82,12 +87,16 @@ def _get_user_comparability_augmenting_videos(self) -> list[SuggestedVideo]:
for entity_uid in comparison
)

return [
supertursted_compared_entities = [
self._entity_to_video[uid]
for uid in supertursted_compared_entities
if self._entity_to_video.get(uid)
]

np.random.shuffle(supertursted_compared_entities)

return supertursted_compared_entities[:100]

def _get_user_rate_later_video_list(self, user: User) -> list[SuggestedVideo]:
"""
Function to get the list of videos of the user's rate later list
Expand Down Expand Up @@ -119,7 +128,7 @@ def register_new_user(self, new_user: User):
Function used to register a new user wanting suggestions, it thus initializes its
comparison graph
"""
recommendation_user = RecommendationUser(
recommendation_user = SuggestedUser(
self._entity_to_video, new_user, self.criteria, self.poll
)
self._user_specific_graphs[new_user.id] = Graph(
Expand Down Expand Up @@ -166,22 +175,35 @@ def get_first_video_recommendation(
"""
# Lazily load the user graph
if user.id not in self._user_specific_graphs:
begin = time.perf_counter()
self.register_new_user(user)
logger.debug("Registered new user: %.3fs" % (time.perf_counter() - begin))
result = []

# Give the first video id to the graph so the sorting will take that into account
user_graph = self._user_specific_graphs[user.id]
user_graph.compute_offline_parameters(self._get_user_comparability_augmenting_videos())
begin = time.perf_counter()
scaling_factor_increasing_videos = self._get_user_comparability_augmenting_videos()
user_graph.compute_offline_parameters(scaling_factor_increasing_videos)
logger.debug("Offline parameters for user graph: %.3fs" % (time.perf_counter() - begin))
begin = time.perf_counter()
self._complete_graph.compute_offline_parameters()
logger.debug("Offline parameters for complete graph: %.3fs" % (time.perf_counter() - begin))

# Prepare the set of videos to sort, taking the videos present in the graph
# and append the ones that are not yet compared by the user
considered_vid_list = self._prepare_video_list(user, None)

# TODO Get rate later videos for the user
rate_later = []

max_vid_pref = 0
# Todo : take into account the rate later list / already seen videos ?
for v in considered_vid_list:
v.user_pref = max(v.nb_comparison_with.values()) / v.comparison_nb
v.user_pref += v.score/100
if v in rate_later:
v.user_pref += 0.5
if v.user_pref > max_vid_pref:
max_vid_pref = v.user_pref

Expand Down Expand Up @@ -224,9 +246,15 @@ def get_second_video_recommendation(
# the ones that are not yet compared by the user
considered_vid_list = self._prepare_video_list(user, first_video)

# TODO Get rate later videos for the user
rate_later = []

max_vid_pref = 0
for v in considered_vid_list:
v.user_pref = v.nb_comparison_with[first_video_id] / v.comparison_nb
v.user_pref += v.score/100
if v in rate_later:
v.user_pref += 0.5
if v.user_pref > max_vid_pref:
max_vid_pref = v.user_pref

Expand Down
23 changes: 16 additions & 7 deletions backend/tournesol/views/entities_to_compare.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import logging
import time

from drf_spectacular.types import OpenApiTypes
from drf_spectacular.utils import OpenApiParameter, extend_schema, extend_schema_view
from rest_framework.exceptions import ValidationError
Expand All @@ -10,6 +13,7 @@
from tournesol.suggestions.suggester_store import SuggesterStore
from tournesol.views import PollScopedViewMixin

logger = logging.getLogger(__name__)

@extend_schema_view(
get=extend_schema(
Expand All @@ -28,25 +32,30 @@ class EntitiesToCompareView(PollScopedViewMixin, ListAPIView):
serializer_class = EntityNoExtraFieldSerializer

def list(self, request, *args, **kwargs):
begin = time.perf_counter()
poll = self.poll_from_url


if poll.name != DEFAULT_POLL_NAME:
raise ValidationError({"detail": "only poll 'videos' is supported"})

user = self.request.user
suggester = SuggesterStore.actual_store.get_suggester(poll)
logger.debug("Built SuggesterStore: %.3fs" % (time.perf_counter() - begin))

opt_first_entity = self.request.query_params.get("first_entity_uid")
limit = int(self.request.query_params.get("limit", 10))
if opt_first_entity is None:
begin = time.perf_counter()
logger.debug("Start getting first video")
suggestions = suggester.get_first_video_recommendation(user, limit)
logger.debug("Got first video: %.3fs" % (time.perf_counter() - begin))

else:
suggestions = suggester.get_second_video_recommendation(user, opt_first_entity, limit)

entities = {
e.uid: e
for e in Entity.objects.filter(
uid__in=(s.uid for s in suggestions)
)
}
ser = self.get_serializer([entities[s.uid] for s in suggestions], many=True)

sorted_entity_ids = [s.uid for s in suggestions]
entities = {e.uid: e for e in Entity.objects.filter(uid__in=sorted_entity_ids)}
ser = self.get_serializer([entities[e_id] for e_id in sorted_entity_ids], many=True)
return Response({"results": ser.data})
4 changes: 2 additions & 2 deletions infra/ansible/roles/monitoring/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -168,12 +168,12 @@

- name: Add Grafana repository signing key
apt_key:
url: https://packages.grafana.com/gpg.key
url: https://apt.grafana.com/gpg.key
state: present

- name: Add Grafana repository
apt_repository:
repo: deb https://packages.grafana.com/oss/deb stable main
repo: deb https://apt.grafana.com stable main
state: present

- name: Gather the package facts
Expand Down