Commit 1d94e4a2 authored by Richard Glosner's avatar Richard Glosner
Browse files

Merge branch 'llm-cache-and-serialization' into 'main'

Llm cache and serialization

See merge request inject/backend!483
parents eefffb40 a8710a6e
Loading
Loading
Loading
Loading
+87 −6
Original line number Diff line number Diff line
@@ -18,7 +18,13 @@ from exercise.serializers import (
    ExerciseSerializer,
    UserSerializer,
)
from exercise_definition.models import Feature, FileInfo
from exercise_definition.models import (
    Feature,
    FileInfo,
    EmailAddress,
    FreeFormQuestion,
    LLMAssessment,
)
from exercise_definition.serializers import (
    MilestoneSerializer,
    ToolSerializer,
@@ -27,12 +33,21 @@ from exercise_definition.serializers import (
    QuestionnaireSerializer,
    LearningObjectiveSerializer,
    ChannelSerializer,
    LLMAssessmentSerializer,
)
from running_exercise.models import (
    EmailThread,
    LLMEmailSuggestion,
    LLMFreeFormEvaluation,
    LLMEmailEvaluation,
)
from running_exercise.models import EmailThread
from running_exercise.serializers import (
    ActionLogSerializer,
    EmailThreadSerializer,
    TeamQuestionnaireStateSerializer,
    LLMEmailSuggestionSerializer,
    LLMFreeFormEvaluationSerializer,
    LLMEmailEvaluationSerializer,
)


@@ -65,6 +80,7 @@ class TeamLogSerializer:
    team: Team
    team_log_path: str
    team_upload_path: str
    team_llm_eval_path: str
    context: Dict[str, object]

    def __init__(
@@ -81,8 +97,13 @@ class TeamLogSerializer:
        )
        os.makedirs(self.team_upload_path)

        self.team_llm_eval_path = os.path.join(
            self.team_log_path, "llm-evaluations"
        )
        os.makedirs(self.team_llm_eval_path)

    def serialize_all(
        self, with_emails: bool, with_questionnaires: bool
        self, with_emails: bool, with_questionnaires: bool, with_llm: bool
    ) -> None:
        self.serialize_action_log()
        self.serialize_milestone_states()
@@ -91,9 +112,14 @@ class TeamLogSerializer:

        if with_emails:
            self.serialize_emails()
            if with_llm:
                self.serialize_llm_email_suggestions()
                self.serialize_llm_email_evaluations()

        if with_questionnaires:
            self.serialize_questionnaires()
            if with_llm:
                self.serialize_llm_free_form_evaluations()

    def serialize_action_log(self) -> None:
        with open(
@@ -158,6 +184,38 @@ class TeamLogSerializer:
                self.team.team_questionnaire_states.all(),
            )

    def serialize_llm_email_suggestions(self) -> None:
        suggestions = LLMEmailSuggestion.objects.filter(
            trigger_email__sender__team_id=self.team.id
        )
        with open(
            os.path.join(self.team_llm_eval_path, "email_suggestions.jsonl"),
            "w",
        ) as f:
            _serialize_data(f, LLMEmailSuggestionSerializer, suggestions)

    def serialize_llm_email_evaluations(self) -> None:
        evaluations = LLMEmailEvaluation.objects.filter(
            action_log__team_id=self.team.id
        )
        with open(
            os.path.join(self.team_llm_eval_path, "email_evaluations.jsonl"),
            "w",
        ) as f:
            _serialize_data(f, LLMEmailEvaluationSerializer, evaluations)

    def serialize_llm_free_form_evaluations(self) -> None:
        evaluations = LLMFreeFormEvaluation.objects.filter(
            questionnaire_answer__submission__team_questionnaire_state__team_id=self.team.id
        )
        with open(
            os.path.join(
                self.team_llm_eval_path, "free_form_evaluations.jsonl"
            ),
            "w",
        ) as f:
            _serialize_data(f, LLMFreeFormEvaluationSerializer, evaluations)


class ExerciseLogSerializer:
    exercise: Exercise
@@ -191,7 +249,11 @@ class ExerciseLogSerializer:
        if with_emails:
            self.serialize_email_participants()

        self.serialize_teams(with_emails, with_questionnaires)
        with_llm = self.exercise.llm
        if with_llm:
            self.serialize_llm_assessments()

        self.serialize_teams(with_emails, with_questionnaires, with_llm)
        self.serialize_instructors()
        self.serialize_file_infos()
        self.copy_definition_files()
@@ -243,7 +305,7 @@ class ExerciseLogSerializer:
            )

    def serialize_teams(
        self, with_emails: bool, with_questionnaires: bool
        self, with_emails: bool, with_questionnaires: bool, with_llm: bool
    ) -> None:
        with open(os.path.join(self.log_path, "teams.jsonl"), "w") as teams:
            _serialize_data(
@@ -255,7 +317,7 @@ class ExerciseLogSerializer:

        for team in self.exercise.teams.all():
            serializer = TeamLogSerializer(team, self.log_path, self.context)
            serializer.serialize_all(with_emails, with_questionnaires)
            serializer.serialize_all(with_emails, with_questionnaires, with_llm)

    def serialize_instructors(self) -> None:
        with open(
@@ -323,6 +385,25 @@ class ExerciseLogSerializer:
                self.exercise.definition.channels.all(),
            )

    def serialize_llm_assessments(self) -> None:
        email_assessment_ids = EmailAddress.objects.filter(
            definition_id=self.exercise.definition_id,
            llm_assessment__isnull=False,
        ).values_list("llm_assessment_id", flat=True)

        question_assessment_ids = FreeFormQuestion.objects.filter(
            definition_id=self.exercise.definition_id,
            llm_assessment__isnull=False,
        ).values_list("llm_assessment_id", flat=True)

        all_ids = set(email_assessment_ids) | set(question_assessment_ids)
        assessments = LLMAssessment.objects.filter(id__in=all_ids)

        with open(
            os.path.join(self.log_path, "llm_assessments.jsonl"), "w"
        ) as f:
            _serialize_data(f, LLMAssessmentSerializer, assessments)


class LogManager:
    log_path: str
+7 −0
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@ from exercise_definition.models import (
    MultipleChoiceQuestion,
    Repeatable,
    Overlay,
    LLMAssessment,
)


@@ -358,3 +359,9 @@ class LearningObjectiveSerializer(ModelSerializer):

    def get_total_score(self, obj):
        return obj.total_score()


class LLMAssessmentSerializer(ModelSerializer):
    class Meta:
        model = LLMAssessment
        fields = ["id", "persona", "assessment"]
+11 −0
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@ import threading

from django.apps import AppConfig
from django.conf import settings
from django.db import connection


def test_llm_connection():
@@ -15,6 +16,8 @@ def test_llm_connection():
        logger.info("LLM connection test successful.")
    except Exception as e:
        logger.error(f"LLM connection test failed: {e}")
    finally:
        connection.close()


class LlmConfig(AppConfig):
@@ -34,11 +37,19 @@ class LlmConfig(AppConfig):
            )
            return

        if settings.LLM_URL == "":
            logger.info(
                "LLM URL not configured, LLM client will not be initialized."
            )
            return

        # TODO: implement switch for OpenAI child class and possibly other
        client_module.LLM_CLIENT = client_module.OllamaLLM(
            model=settings.LLM_MODEL,
            llm_url=settings.LLM_URL,
            timeout=settings.LLM_TIMEOUT,
            api_key_header=settings.LLM_API_KEY_HEADER,
            api_key_value=settings.LLM_API_KEY_VALUE,
        )

        threading.Thread(target=test_llm_connection).start()
+30 −3
Original line number Diff line number Diff line
@@ -17,17 +17,44 @@ LLM_CLIENT: Optional[LLMInterface] = None


class OllamaLLM(LLMInterface):
    def __init__(self, model: str, llm_url: str, timeout: float):
    def __init__(
        self,
        model: str,
        llm_url: str,
        timeout: float,
        api_key_header: str,
        api_key_value: str,
    ):
        self.model = model
        self.llm_url = llm_url
        self.timeout = timeout
        self.api_key_header = api_key_header
        self.api_key_value = api_key_value

    def query_llm(self, messages: List[LLMMessage]) -> str:
        payload = {"model": self.model, "stream": False}
        payload["messages"] = messages
        response = post(self.llm_url, json=payload, timeout=self.timeout)
        headers = (
            {self.api_key_header: self.api_key_value}
            if self.api_key_header and self.api_key_value
            else {}
        )
        response = post(
            self.llm_url, json=payload, timeout=self.timeout, headers=headers
        )
        response.raise_for_status()
        return response.json()["message"]["content"]
        data = response.json()

        if "message" in data:
            return data["message"]["content"]

        if "choices" in data and len(data["choices"]) > 0:
            return data["choices"][0]["message"]["content"]

        if "candidates" in data and len(data["candidates"]) > 0:
            return data["candidates"][0]["content"]["parts"][0]["text"]

        raise ValueError("Unexpected response format from LLM.")


# TODO: Implement OpenAILLM child class and possibly other
+127 −19
Original line number Diff line number Diff line
from typing import Optional
import time

from common_lib.exceptions import RunningExerciseOperationException
from common_lib.utils import ensure_exists
from common_lib.logger import logger
from llm.context_prompts import EMAIL_SUGGESTION_SYSTEM, EMAIL_SUGGESTION_TASK
from llm.serializer.template_suggestor import TemplateSuggestor
from llm.prompter import Prompter
from llm.schema.validation import extract_email_suggestion_response
from running_exercise.models import (
    EmailThread,
    QuestionnaireAnswer,
    ActionLog,
    Email,
    LLMEmailSuggestion,
    LLMFreeFormEvaluation,
    LLMEmailEvaluation,
)
from exercise_definition.models import (
    FreeFormQuestion,
    QuestionTypes,
    LLMAssessment,
)
from exercise.models import EmailParticipant


def wrap_trainee_text(text: str) -> str:
@@ -24,10 +31,13 @@ def wrap_trainee_text(text: str) -> str:

class LLMOrchestrator:
    @staticmethod
    def suggest_email_response(
        thread: EmailThread, participant_id: Optional[int] = None
    def _generate_and_save_email_suggestion(
        thread: EmailThread,
        trigger_email: Email,
        participant_id: Optional[int] = None,
    ) -> str:
        response = Prompter.prompt_llm(
        start_time = time.time()
        response_text = Prompter.prompt_llm(
            EMAIL_SUGGESTION_SYSTEM,
            TemplateSuggestor(
                thread,
@@ -35,7 +45,81 @@ class LLMOrchestrator:
            ).serialize(),
            EMAIL_SUGGESTION_TASK,
        )
        # TODO: add saving the suggestion to DB
        logger.info(
            f"LLM email suggestion response for thread id '{thread.id}' "
            f"and participant id '{participant_id}' received. "
            f"Time taken: {time.time() - start_time:.2f} seconds."
        )

        data = extract_email_suggestion_response(response_text)
        suggested_participant_id = int(data["participant_id"])
        participant = ensure_exists(
            EmailParticipant.objects.filter(id=suggested_participant_id)
        )

        LLMEmailSuggestion.objects.update_or_create(
            trigger_email=trigger_email,
            email_participant=participant,
            defaults={"thread": thread, "response": response_text},
        )
        return response_text

    @staticmethod
    def suggest_email_response(
        thread: EmailThread, participant_id: Optional[int] = None
    ) -> str:
        last_trainee_email = (
            thread.emails.select_related("sender")
            .filter(sender__definition_address__isnull=True)
            .order_by("timestamp")
            .last()
        )

        if (
            not last_trainee_email
        ):  # Place holder response for threads without trainee emails
            return (
                '{"participant_id": "-1", '
                '"reasoning": "No trainee emails found in the thread.", '
                '"new_suggestion": "No suggestion can be made."}'
            )

        query = LLMEmailSuggestion.objects.filter(
            trigger_email=last_trainee_email
        )
        if participant_id:
            query = query.filter(email_participant=participant_id)

        if (cached := query.first()) is not None:
            logger.info(
                f"Using cached LLM email suggestion for thread id '{thread.id}' "
                f"and participant id '{participant_id}'"
            )
            return cached.response

        return LLMOrchestrator._generate_and_save_email_suggestion(
            thread, last_trainee_email, participant_id
        )

    @staticmethod
    def _generate_and_save_free_form_assessment(
        answer: QuestionnaireAnswer, free_form_details: FreeFormQuestion
    ) -> str:
        start_time = time.time()
        response = Prompter.prompt_llm(
            free_form_details.llm_assessment.persona,
            wrap_trainee_text(answer.answer[0]),
            free_form_details.llm_assessment.assessment,
        )
        logger.info(
            f"LLM free-form assessment response for answer id '{answer.id}' received. "
            f"Time taken: {time.time() - start_time:.2f} seconds."
        )
        LLMFreeFormEvaluation.objects.update_or_create(
            questionnaire_answer=answer,
            assessment_definition=free_form_details.llm_assessment,
            defaults={"response": response},
        )
        return response

    @staticmethod
@@ -49,18 +133,23 @@ class LLMOrchestrator:
                id=answer.question.details_id
            )
        )
        response = Prompter.prompt_llm(
            free_form_details.llm_assessment.persona,
            wrap_trainee_text(answer.answer[0]),
            free_form_details.llm_assessment.assessment,
        if not free_form_details.llm_assessment:
            return "This free-form question does not have LLM assessment configured."

        if (
            cached := LLMFreeFormEvaluation.objects.filter(
                questionnaire_answer=answer,
                assessment_definition=free_form_details.llm_assessment,
            ).first()
        ) is not None:
            logger.info(
                f"Using cached LLM free-form assessment for answer id '{answer.id}'"
            )
            return cached.response

        return LLMOrchestrator._generate_and_save_free_form_assessment(
            answer, free_form_details
        )
        # TODO: add saving the suggestion to DB
        # questionnaire_state_id
        # question_id
        # answer_id
        # assessment_id
        # response
        return response

    @staticmethod
    def assess_email(action_log: ActionLog, assessment_id: int) -> str:
@@ -72,13 +161,32 @@ class LLMOrchestrator:
        assessment: LLMAssessment = ensure_exists(
            LLMAssessment.objects.filter(id=assessment_id)
        )
        if (
            cached := LLMEmailEvaluation.objects.filter(
                action_log=action_log, assessment_definition=assessment
            ).first()
        ) is not None:
            logger.info(
                f"Using cached LLM email assessment for action log id '{action_log.id}' "
                f"and assessment id '{assessment.id}'"
            )
            return cached.response

        start_time = time.time()
        response = Prompter.prompt_llm(
            assessment.persona,
            wrap_trainee_text(email.content.raw),
            assessment.assessment,
        )
        # TODO: add saving the suggestion to DB
        # action_log_id
        # assessment_id
        # response
        logger.info(
            f"LLM email assessment response for action log id '{action_log.id}' "
            f"and assessment id '{assessment.id}' received. "
            f"Time taken: {time.time() - start_time:.2f} seconds."
        )

        LLMEmailEvaluation.objects.update_or_create(
            action_log=action_log,
            assessment_definition=assessment,
            defaults={"response": response},
        )
        return response
Loading