Merge branch 'llm-cache-and-serialization' into 'main' (1d94e4a2) · Commits · INJECT / backend

exercise/lib/log_manager.py

+87 −6

Original line number	Diff line number	Diff line
		@@ -18,7 +18,13 @@ from exercise.serializers import (
		ExerciseSerializer,
		UserSerializer,
		)
		from exercise_definition.models import Feature, FileInfo
		from exercise_definition.models import (
		Feature,
		FileInfo,
		EmailAddress,
		FreeFormQuestion,
		LLMAssessment,
		)
		from exercise_definition.serializers import (
		MilestoneSerializer,
		ToolSerializer,
		@@ -27,12 +33,21 @@ from exercise_definition.serializers import (
		QuestionnaireSerializer,
		LearningObjectiveSerializer,
		ChannelSerializer,
		LLMAssessmentSerializer,
		)
		from running_exercise.models import (
		EmailThread,
		LLMEmailSuggestion,
		LLMFreeFormEvaluation,
		LLMEmailEvaluation,
		)
		from running_exercise.models import EmailThread
		from running_exercise.serializers import (
		ActionLogSerializer,
		EmailThreadSerializer,
		TeamQuestionnaireStateSerializer,
		LLMEmailSuggestionSerializer,
		LLMFreeFormEvaluationSerializer,
		LLMEmailEvaluationSerializer,
		)


		@@ -65,6 +80,7 @@ class TeamLogSerializer:
		team: Team
		team_log_path: str
		team_upload_path: str
		team_llm_eval_path: str
		context: Dict[str, object]

		def __init__(
		@@ -81,8 +97,13 @@ class TeamLogSerializer:
		)
		os.makedirs(self.team_upload_path)

		self.team_llm_eval_path = os.path.join(
		self.team_log_path, "llm-evaluations"
		)
		os.makedirs(self.team_llm_eval_path)

		def serialize_all(
		self, with_emails: bool, with_questionnaires: bool
		self, with_emails: bool, with_questionnaires: bool, with_llm: bool
		) -> None:
		self.serialize_action_log()
		self.serialize_milestone_states()
		@@ -91,9 +112,14 @@ class TeamLogSerializer:

		if with_emails:
		self.serialize_emails()
		if with_llm:
		self.serialize_llm_email_suggestions()
		self.serialize_llm_email_evaluations()

		if with_questionnaires:
		self.serialize_questionnaires()
		if with_llm:
		self.serialize_llm_free_form_evaluations()

		def serialize_action_log(self) -> None:
		with open(
		@@ -158,6 +184,38 @@ class TeamLogSerializer:
		self.team.team_questionnaire_states.all(),
		)

		def serialize_llm_email_suggestions(self) -> None:
		suggestions = LLMEmailSuggestion.objects.filter(
		trigger_email__sender__team_id=self.team.id
		)
		with open(
		os.path.join(self.team_llm_eval_path, "email_suggestions.jsonl"),
		"w",
		) as f:
		_serialize_data(f, LLMEmailSuggestionSerializer, suggestions)

		def serialize_llm_email_evaluations(self) -> None:
		evaluations = LLMEmailEvaluation.objects.filter(
		action_log__team_id=self.team.id
		)
		with open(
		os.path.join(self.team_llm_eval_path, "email_evaluations.jsonl"),
		"w",
		) as f:
		_serialize_data(f, LLMEmailEvaluationSerializer, evaluations)

		def serialize_llm_free_form_evaluations(self) -> None:
		evaluations = LLMFreeFormEvaluation.objects.filter(
		questionnaire_answer__submission__team_questionnaire_state__team_id=self.team.id
		)
		with open(
		os.path.join(
		self.team_llm_eval_path, "free_form_evaluations.jsonl"
		),
		"w",
		) as f:
		_serialize_data(f, LLMFreeFormEvaluationSerializer, evaluations)


		class ExerciseLogSerializer:
		exercise: Exercise
		@@ -191,7 +249,11 @@ class ExerciseLogSerializer:
		if with_emails:
		self.serialize_email_participants()

		self.serialize_teams(with_emails, with_questionnaires)
		with_llm = self.exercise.llm
		if with_llm:
		self.serialize_llm_assessments()

		self.serialize_teams(with_emails, with_questionnaires, with_llm)
		self.serialize_instructors()
		self.serialize_file_infos()
		self.copy_definition_files()
		@@ -243,7 +305,7 @@ class ExerciseLogSerializer:
		)

		def serialize_teams(
		self, with_emails: bool, with_questionnaires: bool
		self, with_emails: bool, with_questionnaires: bool, with_llm: bool
		) -> None:
		with open(os.path.join(self.log_path, "teams.jsonl"), "w") as teams:
		_serialize_data(
		@@ -255,7 +317,7 @@ class ExerciseLogSerializer:

		for team in self.exercise.teams.all():
		serializer = TeamLogSerializer(team, self.log_path, self.context)
		serializer.serialize_all(with_emails, with_questionnaires)
		serializer.serialize_all(with_emails, with_questionnaires, with_llm)

		def serialize_instructors(self) -> None:
		with open(
		@@ -323,6 +385,25 @@ class ExerciseLogSerializer:
		self.exercise.definition.channels.all(),
		)

		def serialize_llm_assessments(self) -> None:
		email_assessment_ids = EmailAddress.objects.filter(
		definition_id=self.exercise.definition_id,
		llm_assessment__isnull=False,
		).values_list("llm_assessment_id", flat=True)

		question_assessment_ids = FreeFormQuestion.objects.filter(
		definition_id=self.exercise.definition_id,
		llm_assessment__isnull=False,
		).values_list("llm_assessment_id", flat=True)

		all_ids = set(email_assessment_ids) \| set(question_assessment_ids)
		assessments = LLMAssessment.objects.filter(id__in=all_ids)

		with open(
		os.path.join(self.log_path, "llm_assessments.jsonl"), "w"
		) as f:
		_serialize_data(f, LLMAssessmentSerializer, assessments)


		class LogManager:
		log_path: str

exercise_definition/serializers.py

+7 −0

Original line number	Diff line number	Diff line
		@@ -33,6 +33,7 @@ from exercise_definition.models import (
		MultipleChoiceQuestion,
		Repeatable,
		Overlay,
		LLMAssessment,
		)


		@@ -358,3 +359,9 @@ class LearningObjectiveSerializer(ModelSerializer):

		def get_total_score(self, obj):
		return obj.total_score()


		class LLMAssessmentSerializer(ModelSerializer):
		class Meta:
		model = LLMAssessment
		fields = ["id", "persona", "assessment"]

llm/apps.py

+11 −0

Original line number	Diff line number	Diff line
		@@ -2,6 +2,7 @@ import threading

		from django.apps import AppConfig
		from django.conf import settings
		from django.db import connection


		def test_llm_connection():
		@@ -15,6 +16,8 @@ def test_llm_connection():
		logger.info("LLM connection test successful.")
		except Exception as e:
		logger.error(f"LLM connection test failed: {e}")
		finally:
		connection.close()


		class LlmConfig(AppConfig):
		@@ -34,11 +37,19 @@ class LlmConfig(AppConfig):
		)
		return

		if settings.LLM_URL == "":
		logger.info(
		"LLM URL not configured, LLM client will not be initialized."
		)
		return

		# TODO: implement switch for OpenAI child class and possibly other
		client_module.LLM_CLIENT = client_module.OllamaLLM(
		model=settings.LLM_MODEL,
		llm_url=settings.LLM_URL,
		timeout=settings.LLM_TIMEOUT,
		api_key_header=settings.LLM_API_KEY_HEADER,
		api_key_value=settings.LLM_API_KEY_VALUE,
		)

		threading.Thread(target=test_llm_connection).start()

llm/llm_classes.py

+30 −3

Original line number	Diff line number	Diff line
		@@ -17,17 +17,44 @@ LLM_CLIENT: Optional[LLMInterface] = None


		class OllamaLLM(LLMInterface):
		def __init__(self, model: str, llm_url: str, timeout: float):
		def __init__(
		self,
		model: str,
		llm_url: str,
		timeout: float,
		api_key_header: str,
		api_key_value: str,
		):
		self.model = model
		self.llm_url = llm_url
		self.timeout = timeout
		self.api_key_header = api_key_header
		self.api_key_value = api_key_value

		def query_llm(self, messages: List[LLMMessage]) -> str:
		payload = {"model": self.model, "stream": False}
		payload["messages"] = messages
		response = post(self.llm_url, json=payload, timeout=self.timeout)
		headers = (
		{self.api_key_header: self.api_key_value}
		if self.api_key_header and self.api_key_value
		else {}
		)
		response = post(
		self.llm_url, json=payload, timeout=self.timeout, headers=headers
		)
		response.raise_for_status()
		return response.json()["message"]["content"]
		data = response.json()

		if "message" in data:
		return data["message"]["content"]

		if "choices" in data and len(data["choices"]) > 0:
		return data["choices"][0]["message"]["content"]

		if "candidates" in data and len(data["candidates"]) > 0:
		return data["candidates"][0]["content"]["parts"][0]["text"]

		raise ValueError("Unexpected response format from LLM.")


		# TODO: Implement OpenAILLM child class and possibly other

llm/schema/llm_orchestrator.py

+127 −19

Original line number	Diff line number	Diff line
		from typing import Optional
		import time

		from common_lib.exceptions import RunningExerciseOperationException
		from common_lib.utils import ensure_exists
		from common_lib.logger import logger
		from llm.context_prompts import EMAIL_SUGGESTION_SYSTEM, EMAIL_SUGGESTION_TASK
		from llm.serializer.template_suggestor import TemplateSuggestor
		from llm.prompter import Prompter
		from llm.schema.validation import extract_email_suggestion_response
		from running_exercise.models import (
		EmailThread,
		QuestionnaireAnswer,
		ActionLog,
		Email,
		LLMEmailSuggestion,
		LLMFreeFormEvaluation,
		LLMEmailEvaluation,
		)
		from exercise_definition.models import (
		FreeFormQuestion,
		QuestionTypes,
		LLMAssessment,
		)
		from exercise.models import EmailParticipant


		def wrap_trainee_text(text: str) -> str:
		@@ -24,10 +31,13 @@ def wrap_trainee_text(text: str) -> str:

		class LLMOrchestrator:
		@staticmethod
		def suggest_email_response(
		thread: EmailThread, participant_id: Optional[int] = None
		def _generate_and_save_email_suggestion(
		thread: EmailThread,
		trigger_email: Email,
		participant_id: Optional[int] = None,
		) -> str:
		response = Prompter.prompt_llm(
		start_time = time.time()
		response_text = Prompter.prompt_llm(
		EMAIL_SUGGESTION_SYSTEM,
		TemplateSuggestor(
		thread,
		@@ -35,7 +45,81 @@ class LLMOrchestrator:
		).serialize(),
		EMAIL_SUGGESTION_TASK,
		)
		# TODO: add saving the suggestion to DB
		logger.info(
		f"LLM email suggestion response for thread id '{thread.id}' "
		f"and participant id '{participant_id}' received. "
		f"Time taken: {time.time() - start_time:.2f} seconds."
		)

		data = extract_email_suggestion_response(response_text)
		suggested_participant_id = int(data["participant_id"])
		participant = ensure_exists(
		EmailParticipant.objects.filter(id=suggested_participant_id)
		)

		LLMEmailSuggestion.objects.update_or_create(
		trigger_email=trigger_email,
		email_participant=participant,
		defaults={"thread": thread, "response": response_text},
		)
		return response_text

		@staticmethod
		def suggest_email_response(
		thread: EmailThread, participant_id: Optional[int] = None
		) -> str:
		last_trainee_email = (
		thread.emails.select_related("sender")
		.filter(sender__definition_address__isnull=True)
		.order_by("timestamp")
		.last()
		)

		if (
		not last_trainee_email
		): # Place holder response for threads without trainee emails
		return (
		'{"participant_id": "-1", '
		'"reasoning": "No trainee emails found in the thread.", '
		'"new_suggestion": "No suggestion can be made."}'
		)

		query = LLMEmailSuggestion.objects.filter(
		trigger_email=last_trainee_email
		)
		if participant_id:
		query = query.filter(email_participant=participant_id)

		if (cached := query.first()) is not None:
		logger.info(
		f"Using cached LLM email suggestion for thread id '{thread.id}' "
		f"and participant id '{participant_id}'"
		)
		return cached.response

		return LLMOrchestrator._generate_and_save_email_suggestion(
		thread, last_trainee_email, participant_id
		)

		@staticmethod
		def _generate_and_save_free_form_assessment(
		answer: QuestionnaireAnswer, free_form_details: FreeFormQuestion
		) -> str:
		start_time = time.time()
		response = Prompter.prompt_llm(
		free_form_details.llm_assessment.persona,
		wrap_trainee_text(answer.answer[0]),
		free_form_details.llm_assessment.assessment,
		)
		logger.info(
		f"LLM free-form assessment response for answer id '{answer.id}' received. "
		f"Time taken: {time.time() - start_time:.2f} seconds."
		)
		LLMFreeFormEvaluation.objects.update_or_create(
		questionnaire_answer=answer,
		assessment_definition=free_form_details.llm_assessment,
		defaults={"response": response},
		)
		return response

		@staticmethod
		@@ -49,18 +133,23 @@ class LLMOrchestrator:
		id=answer.question.details_id
		)
		)
		response = Prompter.prompt_llm(
		free_form_details.llm_assessment.persona,
		wrap_trainee_text(answer.answer[0]),
		free_form_details.llm_assessment.assessment,
		if not free_form_details.llm_assessment:
		return "This free-form question does not have LLM assessment configured."

		if (
		cached := LLMFreeFormEvaluation.objects.filter(
		questionnaire_answer=answer,
		assessment_definition=free_form_details.llm_assessment,
		).first()
		) is not None:
		logger.info(
		f"Using cached LLM free-form assessment for answer id '{answer.id}'"
		)
		return cached.response

		return LLMOrchestrator._generate_and_save_free_form_assessment(
		answer, free_form_details
		)
		# TODO: add saving the suggestion to DB
		# questionnaire_state_id
		# question_id
		# answer_id
		# assessment_id
		# response
		return response

		@staticmethod
		def assess_email(action_log: ActionLog, assessment_id: int) -> str:
		@@ -72,13 +161,32 @@ class LLMOrchestrator:
		assessment: LLMAssessment = ensure_exists(
		LLMAssessment.objects.filter(id=assessment_id)
		)
		if (
		cached := LLMEmailEvaluation.objects.filter(
		action_log=action_log, assessment_definition=assessment
		).first()
		) is not None:
		logger.info(
		f"Using cached LLM email assessment for action log id '{action_log.id}' "
		f"and assessment id '{assessment.id}'"
		)
		return cached.response

		start_time = time.time()
		response = Prompter.prompt_llm(
		assessment.persona,
		wrap_trainee_text(email.content.raw),
		assessment.assessment,
		)
		# TODO: add saving the suggestion to DB
		# action_log_id
		# assessment_id
		# response
		logger.info(
		f"LLM email assessment response for action log id '{action_log.id}' "
		f"and assessment id '{assessment.id}' received. "
		f"Time taken: {time.time() - start_time:.2f} seconds."
		)

		LLMEmailEvaluation.objects.update_or_create(
		action_log=action_log,
		assessment_definition=assessment,
		defaults={"response": response},
		)
		return response