Fast leaderboards (#954)

* Fast leaderboards

* oop

* CUT index

* Update leaderboard_update_job.rb

* Update db/migrate/20260215223000_add_leaderboard_heartbeat_index.rb

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update app/models/concerns/heartbeatable.rb

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
This commit is contained in:
Mahad Kalam 2026-02-16 00:05:14 +00:00 committed by GitHub
parent 43060a0d76
commit ac32690ee9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 72 additions and 25 deletions

View file

@ -2,6 +2,7 @@ class LeaderboardUpdateJob < ApplicationJob
queue_as :latency_10s
include GoodJob::ActiveJobExtensions::Concurrency
ENTRY_INSERT_BATCH_SIZE = ENV.fetch("LEADERBOARD_ENTRY_INSERT_BATCH_SIZE", 800).to_i
# Limits concurrency to 1 job per period/date combination
good_job_control_concurrency_with(
@ -18,6 +19,7 @@ class LeaderboardUpdateJob < ApplicationJob
private
def build_leaderboard(date, period, force_update = false)
build_started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
board = ::Leaderboard.find_or_create_by!(
start_date: date,
period_type: period,
@ -29,20 +31,24 @@ class LeaderboardUpdateJob < ApplicationJob
Rails.logger.info "Building leaderboard for #{period} on #{date}"
range = LeaderboardDateRange.calculate(date, period)
now = Time.current
entries_count = 0
eligible_user_ids = User.where.not(github_uid: nil)
.where.not(trust_level: User.trust_levels[:red])
.select(:id)
ActiveRecord::Base.transaction do
# Build the base heartbeat query
heartbeat_query = Heartbeat.where(time: range)
.with_valid_timestamps
.joins(:user)
.coding_only
.where.not(users: { github_uid: nil })
.where.not(users: { trust_level: User.trust_levels[:red] })
.where(user_id: eligible_user_ids)
data = heartbeat_query.group(:user_id).duration_seconds
.filter { |_, seconds| seconds > 60 }
data = heartbeat_query.group(:user_id)
.duration_seconds(minimum_seconds: 60)
streaks = Heartbeat.daily_streaks_for_users(data.keys)
streaks = data.keys.any? ? Heartbeat.daily_streaks_for_users(data.keys) : {}
entries = data.map do |user_id, seconds|
{
@ -50,27 +56,26 @@ class LeaderboardUpdateJob < ApplicationJob
user_id: user_id,
total_seconds: seconds,
streak_count: streaks[user_id] || 0,
created_at: Time.current,
updated_at: Time.current
created_at: now,
updated_at: now
}
end
LeaderboardEntry.upsert_all(entries, unique_by: %i[leaderboard_id user_id]) if entries.any?
if data.keys.any?
board.entries.where.not(user_id: data.keys).delete_all
else
board.entries.delete_all
board.entries.delete_all
entries.each_slice(ENTRY_INSERT_BATCH_SIZE) do |entry_batch|
LeaderboardEntry.insert_all(entry_batch) if entry_batch.any?
end
board.update!(finished_generating_at: Time.current)
board.update!(finished_generating_at: now)
entries_count = entries.length
end
# Cache the board
cache_key = LeaderboardCache.global_key(period, date)
LeaderboardCache.write(cache_key, board)
Rails.logger.debug "Persisted leaderboard for #{period} with #{board.entries.count} entries"
build_elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - build_started_at
Rails.logger.info("Persisted leaderboard for #{period} with #{entries_count} entries in #{build_elapsed.round(2)}s")
board
end

View file

@ -123,10 +123,15 @@ module Heartbeatable
)
# Then aggregate the results
current_time = Time.current
current_dates_by_timezone = {}
timezone_validity_cache = {}
daily_durations = connection.select_all(
"SELECT user_id, user_timezone, day_group, COALESCE(SUM(diff), 0)::integer as duration
FROM (#{raw_durations.to_sql}) AS diffs
GROUP BY user_id, user_timezone, day_group"
GROUP BY user_id, user_timezone, day_group
ORDER BY user_id, day_group DESC"
).group_by { |row| row["user_id"] }
.transform_values do |rows|
timezone = rows.first["user_timezone"]
@ -135,20 +140,27 @@ module Heartbeatable
Rails.logger.warn "nil tz, going to utc."
timezone = "UTC"
else
begin
TZInfo::Timezone.get(timezone)
rescue TZInfo::InvalidTimezoneIdentifier, ArgumentError
timezone_is_valid = timezone_validity_cache.fetch(timezone) do
timezone_validity_cache[timezone] = begin
TZInfo::Timezone.get(timezone)
true
rescue TZInfo::InvalidTimezoneIdentifier, ArgumentError
false
end
end
unless timezone_is_valid
Rails.logger.warn "Invalid timezone for streak calculation: #{timezone}. Defaulting to UTC."
timezone = "UTC"
end
end
current_date = Time.current.in_time_zone(timezone).to_date
current_date = current_dates_by_timezone[timezone] ||= current_time.in_time_zone(timezone).to_date
{
current_date: current_date,
days: rows.map do |row|
[ row["day_group"].to_date, row["duration"].to_i ]
end.sort_by { |date, _| date }.reverse
end
}
end
@ -207,7 +219,7 @@ module Heartbeatable
.map { |date, duration| [ date.to_date, duration ] }
end
def duration_seconds(scope = all)
def duration_seconds(scope = all, minimum_seconds: nil)
scope = scope.with_valid_timestamps
timeout = heartbeat_timeout_duration.to_i
@ -229,10 +241,16 @@ module Heartbeatable
.where.not(time: nil)
.unscope(:group)
having_clause = if minimum_seconds
" HAVING COALESCE(SUM(diff), 0)::integer > #{ActiveRecord::Base.connection.quote(minimum_seconds.to_i)}"
else
""
end
connection.select_all(
"SELECT grouped_time, COALESCE(SUM(diff), 0)::integer as duration
FROM (#{capped_diffs.to_sql}) AS diffs
GROUP BY grouped_time"
GROUP BY grouped_time#{having_clause}"
).each_with_object({}) do |row, hash|
hash[row["grouped_time"]] = row["duration"].to_i
end

View file

@ -0,0 +1,11 @@
class AddLeaderboardHeartbeatIndex < ActiveRecord::Migration[8.1]
disable_ddl_transaction!
def change
add_index :heartbeats,
[ :time, :user_id ],
where: "(deleted_at IS NULL AND category = 'coding')",
name: "idx_heartbeats_coding_time_user",
algorithm: :concurrently
end
end

View file

@ -0,0 +1,11 @@
class AddCodingUserTimeIndexForHeartbeats < ActiveRecord::Migration[8.1]
disable_ddl_transaction!
def change
add_index :heartbeats,
[ :user_id, :time ],
where: "(deleted_at IS NULL AND category = 'coding')",
name: "idx_heartbeats_coding_user_time",
algorithm: :concurrently
end
end

4
db/schema.rb generated
View file

@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[8.1].define(version: 2026_02_15_220822) do
ActiveRecord::Schema[8.1].define(version: 2026_02_15_234652) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_catalog.plpgsql"
enable_extension "pg_stat_statements"
@ -308,6 +308,7 @@ ActiveRecord::Schema[8.1].define(version: 2026_02_15_220822) do
t.index ["project_id"], name: "index_heartbeats_on_project_id"
t.index ["raw_heartbeat_upload_id"], name: "index_heartbeats_on_raw_heartbeat_upload_id"
t.index ["source_type", "time", "user_id", "project"], name: "index_heartbeats_on_source_type_time_user_project"
t.index ["time", "user_id"], name: "idx_heartbeats_coding_time_user", where: "((deleted_at IS NULL) AND ((category)::text = 'coding'::text))"
t.index ["user_agent_id"], name: "index_heartbeats_on_user_agent_id"
t.index ["user_id", "category", "time"], name: "idx_heartbeats_user_category_time", where: "(deleted_at IS NULL)"
t.index ["user_id", "editor", "time"], name: "idx_heartbeats_user_editor_time", where: "(deleted_at IS NULL)"
@ -321,6 +322,7 @@ ActiveRecord::Schema[8.1].define(version: 2026_02_15_220822) do
t.index ["user_id", "time", "language_id"], name: "idx_heartbeats_user_time_language_id", where: "(deleted_at IS NULL)"
t.index ["user_id", "time", "project"], name: "idx_heartbeats_user_time_project_stats", where: "(deleted_at IS NULL)"
t.index ["user_id", "time", "project_id"], name: "idx_heartbeats_user_time_project_id", where: "(deleted_at IS NULL)"
t.index ["user_id", "time"], name: "idx_heartbeats_coding_user_time", where: "((deleted_at IS NULL) AND ((category)::text = 'coding'::text))"
t.index ["user_id", "time"], name: "idx_heartbeats_user_time_active", where: "(deleted_at IS NULL)"
t.index ["user_id"], name: "index_heartbeats_on_user_id"
end