Add Promethus histograms for some parts of the code (#114)

* Add support for exporting code block times as histograms * Adjust perf metrics * Aff perf metrics for message processing and title gen * Add metric for opening app home views * Performance metrics are now defined individually * Remove performance metric from slack.py * New metrics for ticket title gen and app home rendering * Prevent .labels({}) from happening * Make import consistent
2026-04-19 16:28:16 +00:00 · 2025-11-17 11:23:09 +00:00 · 2025-11-17 11:23:09 +00:00 · 527fafe2b4
commit 527fafe2b4
parent 2cd5ad4b09
4 changed files with 34 additions and 5 deletions
--- a/nephthys/events/app_home_opened.py
+++ b/nephthys/events/app_home_opened.py
@ -2,6 +2,7 @@ import logging
 import traceback
 from typing import Any

+from prometheus_client import Histogram
 from slack_sdk.web.async_client import AsyncWebClient

 from nephthys.utils.env import env
@ -21,6 +22,13 @@ async def on_app_home_opened(event: dict[str, Any], client: AsyncWebClient):
    await open_app_home("default", client, user_id)


+APP_HOME_RENDER_DURATION = Histogram(
+    "nephthys_app_home_render_duration_seconds",
+    "How long it takes to load the app home screen",
+    ["home_type"],
+)
+
+
 async def open_app_home(home_type: str, client: AsyncWebClient, user_id: str):
    try:
        await client.views_publish(view=get_loading_view(), user_id=user_id)
@ -37,7 +45,11 @@ async def open_app_home(home_type: str, client: AsyncWebClient, user_id: str):
            view = get_unknown_user_view(name)
        else:
            logging.info(f"Opening {home_type} for {user_id}")
-            async with perf_timer(f"Rendering app home (type={home_type})"):
+            async with perf_timer(
+                f"Rendering app home (type={home_type})",
+                APP_HOME_RENDER_DURATION,
+                home_type=home_type,
+            ):
                match home_type:
                    case "default" | "dashboard":
                        view = await get_helper_view(user)
--- a/nephthys/events/message.py
+++ b/nephthys/events/message.py
@ -4,6 +4,7 @@ from typing import Any
 from typing import Dict

 from openai import OpenAIError
+from prometheus_client import Histogram
 from slack_sdk.errors import SlackApiError
 from slack_sdk.web.async_client import AsyncWebClient

@ -19,6 +20,11 @@ from prisma.models import User
 # Message subtypes that should be handled by on_message (messages with no subtype are always handled)
 ALLOWED_SUBTYPES = ["file_share", "me_message", "thread_broadcast"]

+TICKET_TITLE_GENERATION_DURATION = Histogram(
+    "nephthys_ticket_title_generation_duration_seconds",
+    "How long it takes to generate a ticket title using AI",
+)
+

 async def handle_message_sent_to_channel(event: Dict[str, Any], client: AsyncWebClient):
    """Tell a non-helper off because they sent a thread message with the 'send to channel' box checked."""
@ -185,7 +191,9 @@ async def handle_new_question(
            event, client, text=user_facing_message_text, ticket_url=ticket_url
        )

-    async with perf_timer("AI ticket title generation"):
+    async with perf_timer(
+        "AI ticket title generation", TICKET_TITLE_GENERATION_DURATION
+    ):
        title = await generate_ticket_title(text)

    user_facing_message_ts = user_facing_message["ts"]
--- a/nephthys/utils/performance.py
+++ b/nephthys/utils/performance.py
@ -3,12 +3,21 @@ import logging
 from contextlib import asynccontextmanager
 from time import perf_counter

+from prometheus_client import Histogram
+

@asynccontextmanager
-async def perf_timer(name: str):
+async def perf_timer(
+    name: str, histogram_metric: Histogram | None = None, **metric_labels
+):
    start_time = perf_counter()
    yield
    duration = perf_counter() - start_time
    # Get the name of the function that called the `perf_timer()`
    function_name = inspect.stack()[2].function
    logging.debug(f"{function_name}: {name} took {duration:.3f}s")
+    if histogram_metric:
+        if metric_labels:
+            histogram_metric.labels(**metric_labels).observe(duration)
+        else:
+            histogram_metric.observe(duration)
--- a/nephthys/utils/slack.py
+++ b/nephthys/utils/slack.py
@ -33,8 +33,8 @@ async def handle_message(event: Dict[str, Any], client: AsyncWebClient):
        and event["message"].get("subtype") == "tombstone"
    ) or event.get("subtype") == "message_deleted"

-    async with perf_timer("Processing message event (total time)"):
-        if event["channel"] == env.slack_help_channel:
+    if event["channel"] == env.slack_help_channel:
+        async with perf_timer("Processing message event (total time)"):
            if is_message_deletion:
                await on_message_deletion(event, client)
            else: