rework lb generation (#467)

This commit is contained in:
Echo 2025-08-08 22:10:33 -04:00 committed by GitHub
parent 2c8cfb549d
commit e30e9862a7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 297 additions and 217 deletions

View file

@ -78,35 +78,31 @@ class LeaderboardsController < ApplicationController
def generate_regional_leaderboard
return nil unless current_user&.timezone_utc_offset
LeaderboardGenerator.generate_timezone_offset_leaderboard(
start_date, current_user.timezone_utc_offset, @period_type
LeaderboardService.get(
period: @period_type,
date: start_date,
offset: current_user.timezone_utc_offset
)
end
def generate_timezone_leaderboard
return nil unless current_user&.timezone
LeaderboardGenerator.generate_timezone_leaderboard(
start_date, current_user.timezone, @period_type
offset = current_user.timezone_utc_offset
return nil unless offset
LeaderboardService.get(
period: @period_type,
date: start_date,
offset: offset
)
end
def find_or_generate_global_leaderboard
cache_key = "leaderboard_#{@period_type}_#{start_date}"
leaderboard = Rails.cache.fetch(cache_key, expires_in: 1.minute) do
Leaderboard.where.not(finished_generating_at: nil)
.find_by(start_date: start_date, period_type: @period_type, deleted_at: nil)
end
Rails.cache.delete(cache_key) if leaderboard.nil?
if leaderboard.nil?
LeaderboardUpdateJob.perform_later(@period_type)
nil
else
leaderboard
end
LeaderboardService.get(
period: @period_type,
date: start_date
)
end
def start_date

View file

@ -89,31 +89,23 @@ class StaticPagesController < ApplicationController
if use_timezone_leaderboard && current_user&.timezone_utc_offset
# we now doing it by default wooo
@leaderboard = LeaderboardGenerator.generate_timezone_offset_leaderboard(
Date.current, current_user.timezone_utc_offset, :daily
@leaderboard = LeaderboardService.get(
period: :daily,
date: Date.current,
offset: current_user.timezone_utc_offset
)
if @leaderboard&.entries&.empty?
Rails.logger.warn "[MiniLeaderboard] Regional leaderboard empty for offset #{current_user.timezone_utc_offset}"
@leaderboard = nil
end
else
# Use global leaderboard
@leaderboard = Leaderboard.where.associated(:entries)
.where(start_date: Date.current)
.where(deleted_at: nil)
.where(period_type: :daily)
.distinct
.first
end
if @leaderboard.nil? || @leaderboard.entries.empty?
Rails.logger.info "[MiniLeaderboard] Falling back to global leaderboard"
@leaderboard = Leaderboard.where.associated(:entries)
.where(start_date: Date.current)
.where(deleted_at: nil)
.where(period_type: :daily)
.distinct
.first
if @leaderboard.nil?
@leaderboard = LeaderboardService.get(
period: :daily,
date: Date.current
)
end
@active_projects = Cache::ActiveProjectsJob.perform_now

View file

@ -1,79 +1,111 @@
class LeaderboardUpdateJob < ApplicationJob
queue_as :latency_10s
BATCH_SIZE = 1000
include GoodJob::ActiveJobExtensions::Concurrency
# Limits concurrency to 1 job per date
# Limits concurrency to 1 job per period/date combination
good_job_control_concurrency_with(
key: -> { "#{arguments[0] || 'daily'}_#{arguments[1] || Date.current.to_s}" },
key: -> { "leaderboard_#{arguments[0] || 'daily'}_#{arguments[1] || Date.current.to_s}" },
total: 1,
drop: true
)
def perform(period_type = :daily, date = Date.current)
parsed_date = date.is_a?(Date) ? date : Date.parse(date.to_s)
def perform(period = :daily, date = Date.current)
date = LeaderboardDateRange.normalize_date(date, period)
parsed_date = parsed_date.beginning_of_week if period_type == :weekly
Rails.logger.info "Starting leaderboard generation for #{period} on #{date}"
leaderboard = Leaderboard.create!(
start_date: parsed_date,
period_type: period_type
)
board = build_global(date, period)
build_timezones(date, period)
date_range = case period_type
when :weekly
(parsed_date.beginning_of_day...(parsed_date + 7.days).beginning_of_day)
when :last_7_days
((parsed_date - 6.days).beginning_of_day...parsed_date.end_of_day)
else
parsed_date.all_day
Rails.logger.info "Completed leaderboard generation for #{period} on #{date}"
board
rescue => e
Rails.logger.error "Failed to update leaderboard: #{e.message}"
Honeybadger.notify(e, context: { period: period, date: date })
raise
end
private
def build_global(date, period)
range = LeaderboardDateRange.calculate(date, period)
board = ::Leaderboard.find_or_create_by!(
start_date: date,
period_type: period,
timezone_offset: nil
) do |lb|
lb.finished_generating_at = nil
end
Rails.logger.info "Starting leaderboard generation for #{period_type} on #{parsed_date}"
return board if board.finished_generating_at.present?
ActiveRecord::Base.transaction do
entries_data = Heartbeat.where(time: date_range)
.coding_only
.with_valid_timestamps
.joins(:user)
.where.not(users: { github_uid: nil })
.group(:user_id)
.duration_seconds
board.entries.delete_all
data = Heartbeat.where(time: range)
.with_valid_timestamps
.joins(:user)
.coding_only
.where.not(users: { github_uid: nil })
.group(:user_id)
.duration_seconds
entries_data = entries_data.filter { |_, total_seconds| total_seconds > 60 }
data = data.filter { |_, seconds| seconds > 60 }
convicted_users = User.where(trust_level: User.trust_levels[:red]).pluck(:id)
entries_data = entries_data.reject { |user_id, _| convicted_users.include?(user_id) }
convicted = User.where(trust_level: User.trust_levels[:red]).pluck(:id)
data = data.reject { |user_id, _| convicted.include?(user_id) }
streaks = Heartbeat.daily_streaks_for_users(entries_data.keys)
streaks = Heartbeat.daily_streaks_for_users(data.keys)
entries_data = entries_data.map do |user_id, total_seconds|
entries = data.map do |user_id, seconds|
{
leaderboard_id: leaderboard.id,
leaderboard_id: board.id,
user_id: user_id,
total_seconds: total_seconds,
streak_count: streaks[user_id] || 0
total_seconds: seconds,
streak_count: streaks[user_id] || 0,
created_at: Time.current,
updated_at: Time.current
}
end
LeaderboardEntry.insert_all!(entries_data) if entries_data.any?
LeaderboardEntry.insert_all!(entries) if entries.any?
board.update!(finished_generating_at: Time.current)
end
leaderboard.finished_generating_at = Time.current
leaderboard.save!
key = LeaderboardCache.global_key(period, date)
LeaderboardCache.write(key, board)
Leaderboard.where.not(id: leaderboard.id)
.where(start_date: parsed_date, period_type: period_type)
.where(deleted_at: nil)
.update_all(deleted_at: Time.current)
board
end
leaderboard
rescue => e
Rails.logger.error "Failed to update current leaderboard: #{e.message}"
raise
rescue Date::Error
raise ArgumentError, "Invalid date format provided"
def build_timezones(date, period)
range = LeaderboardDateRange.calculate(date, period)
offsets = User.joins(:heartbeats)
.where(heartbeats: { time: range })
.where.not(timezone_utc_offset: nil)
.distinct
.pluck(:timezone_utc_offset)
.compact
Rails.logger.info "Generating timezone leaderboards for #{offsets.size} active UTC offsets"
offsets.each do |offset|
build_timezone(date, period, offset)
end
end
def build_timezone(date, period, offset)
key = LeaderboardCache.timezone_key(offset, date, period)
data = LeaderboardCache.fetch(key) do
users = User.users_in_timezone_offset(offset).not_convicted
LeaderboardBuilder.build_for_users(users, date, "UTC#{offset >= 0 ? '+' : ''}#{offset}", period)
end
Rails.logger.debug "Cached timezone leaderboard for UTC#{offset >= 0 ? '+' : ''}#{offset} with #{data&.entries&.size || 0} entries"
data
end
end

View file

@ -0,0 +1,41 @@
class TimezoneLeaderboardJob < ApplicationJob
queue_as :latency_5m
include GoodJob::ActiveJobExtensions::Concurrency
# Limits concurrency to 1 job per timezone/period/date combination
good_job_control_concurrency_with(
key: -> { "timezone_#{arguments[0]}_#{arguments[1]}_#{arguments[2]}" },
total: 1,
drop: true
)
def perform(period = :daily, date = Date.current, offset = 0)
date = LeaderboardDateRange.normalize_date(date, period)
Rails.logger.info "Generating timezone leaderboard for UTC#{offset >= 0 ? '+' : ''}#{offset} (#{period}, #{date})"
key = LeaderboardCache.timezone_key(offset, date, period)
# Generate the leaderboard
board = build_timezone(date, period, offset)
# Cache it for 10 minutes
LeaderboardCache.write(key, board)
Rails.logger.info "Cached timezone leaderboard for UTC#{offset >= 0 ? '+' : ''}#{offset} with #{board&.entries&.size || 0} entries"
board
rescue => e
Rails.logger.error "Failed to generate timezone leaderboard for UTC#{offset}: #{e.message}"
Honeybadger.notify(e, context: { period: period, date: date, offset: offset })
raise
end
private
def build_timezone(date, period, offset)
users = User.users_in_timezone_offset(offset).not_convicted
LeaderboardBuilder.build_for_users(users, date, "UTC#{offset >= 0 ? '+' : ''}#{offset}", period)
end
end

View file

@ -1,20 +0,0 @@
class WarmMiniLeaderboardCacheJob < ApplicationJob
queue_as :default
def perform
offsets = [ -8, -7, -6, -5, -4, -3, 0, 1, 2, 8, 9, 10, 11, 12 ]
offsets.each do |offset|
begin
LeaderboardGenerator.generate_timezone_offset_leaderboard(
Date.current,
offset,
:daily
)
Rails.logger.info "Warmed mini leaderboard cache for UTC#{offset >= 0 ? '+' : ''}#{offset}"
rescue => e
Rails.logger.error "Failed to warm cache for UTC#{offset >= 0 ? '+' : ''}#{offset}: #{e.message}"
end
end
end
end

View file

@ -0,0 +1,49 @@
module LeaderboardBuilder
module_function
def build_for_users(users, date, scope, period)
date = Date.current if date.blank?
board = ::Leaderboard.new(
start_date: date,
period_type: period,
finished_generating_at: Time.current
)
ids = users.pluck(:id)
return board if ids.empty?
users_map = users.index_by(&:id)
range = LeaderboardDateRange.calculate(date, period)
beats = Heartbeat.where(user_id: ids, time: range)
.coding_only
.with_valid_timestamps
.joins(:user)
.where.not(users: { github_uid: nil })
totals = beats.group(:user_id).duration_seconds
totals = totals.filter { |_, seconds| seconds > 60 }
streak_ids = totals.keys
streaks = streak_ids.any? ? Heartbeat.daily_streaks_for_users(streak_ids, start_date: 30.days.ago) : {}
entries = totals.map do |user_id, seconds|
entry = LeaderboardEntry.new(
leaderboard: board,
user_id: user_id,
total_seconds: seconds,
streak_count: streaks[user_id] || 0
)
entry.user = users_map[user_id]
entry
end.sort_by(&:total_seconds).reverse
board.define_singleton_method(:entries) { entries }
board.define_singleton_method(:scope_name) { scope }
board
end
end

View file

@ -0,0 +1,25 @@
module LeaderboardCache
CACHE_EXPIRATION = 10.minutes
module_function
def global_key(period, date)
"leaderboard_#{period}_#{date}"
end
def timezone_key(offset, date, period)
"tz_leaderboard_#{offset}_#{date}_#{period}"
end
def write(key, data)
Rails.cache.write(key, data, expires_in: CACHE_EXPIRATION)
end
def read(key)
Rails.cache.read(key)
end
def fetch(key, &block)
Rails.cache.fetch(key, expires_in: CACHE_EXPIRATION, &block)
end
end

View file

@ -0,0 +1,21 @@
module LeaderboardDateRange
module_function
def calculate(date, period)
case period
when :weekly
(date.beginning_of_day...(date + 7.days).beginning_of_day)
when :last_7_days
((date - 6.days).beginning_of_day...date.end_of_day)
else
date.all_day
end
end
def normalize_date(date, period)
date = Date.current if date.blank?
date = date.is_a?(Date) ? date : Date.parse(date.to_s)
date = date.beginning_of_week if period == :weekly
date
end
end

View file

@ -1,100 +0,0 @@
class LeaderboardGenerator
include TimezoneRegions
def self.generate_timezone_offset_leaderboard(date, utc_offset, period_type = :daily)
new.generate_timezone_offset_leaderboard(date, utc_offset, period_type)
end
def self.generate_timezone_leaderboard(date, timezone, period_type = :daily)
new.generate_timezone_leaderboard(date, timezone, period_type)
end
def generate_timezone_offset_leaderboard(date, utc_offset, period_type = :daily)
date = Date.current if date.blank?
cache_key = "timezone_leaderboard_#{utc_offset}_#{date}_#{period_type}"
Rails.cache.fetch(cache_key, expires_in: 5.minutes) do
users = User.users_in_timezone_offset(utc_offset).not_convicted
generate_leaderboard_for_users(users, date, "UTC#{utc_offset >= 0 ? '+' : ''}#{utc_offset}", period_type)
end
end
def generate_timezone_leaderboard(date, timezone, period_type = :daily)
date = Date.current if date.blank?
cache_key = "timezone_leaderboard_#{timezone.gsub('/', '_')}_#{date}_#{period_type}"
Rails.cache.fetch(cache_key, expires_in: 5.minutes) do
users = User.users_in_timezone(timezone).not_convicted
generate_leaderboard_for_users(users, date, timezone, period_type)
end
end
private
def generate_leaderboard_for_users(users, date, scope_name, period_type = :daily)
# Ensure date is valid
date = Date.current if date.blank?
# Create a virtual leaderboard object (not saved to DB)
leaderboard = Leaderboard.new(
start_date: date,
period_type: period_type,
finished_generating_at: Time.current
)
# Get user IDs and preload users hash for faster lookups
user_ids = users.pluck(:id)
return leaderboard if user_ids.empty?
# Preload users into a hash for O(1) lookups
users_hash = users.index_by(&:id)
# Calculate heartbeats for the date range in UTC
date_range = case period_type
when :weekly
date.beginning_of_week.beginning_of_day..date.end_of_week.end_of_day
when :last_7_days
(date - 6.days).beginning_of_day..Date.current.end_of_day
else
date.all_day
end
# Get heartbeats for these users - limit to reduce query size
heartbeats = Heartbeat.where(user_id: user_ids, time: date_range)
.coding_only
.with_valid_timestamps
.joins(:user)
.where.not(users: { github_uid: nil })
# Group by user and calculate totals
user_totals = heartbeats.group(:user_id).duration_seconds
user_totals = user_totals.filter { |_, total_seconds| total_seconds > 60 }
# Only calculate streaks for users who actually have time today
# This significantly reduces the streak calculation overhead
streak_user_ids = user_totals.keys
streaks = streak_user_ids.any? ? Heartbeat.daily_streaks_for_users(streak_user_ids, start_date: 30.days.ago) : {}
# Create virtual leaderboard entries
entries = user_totals.map do |user_id, total_seconds|
entry = LeaderboardEntry.new(
leaderboard: leaderboard,
user_id: user_id,
total_seconds: total_seconds,
streak_count: streaks[user_id] || 0
)
# Use preloaded users hash instead of find
entry.user = users_hash[user_id]
entry
end.sort_by(&:total_seconds).reverse
# Attach entries to leaderboard
leaderboard.define_singleton_method(:entries) { entries }
leaderboard.define_singleton_method(:scope_name) { scope_name }
leaderboard
end
end

View file

@ -0,0 +1,53 @@
class LeaderboardService
include TimezoneRegions
def self.get(period: :daily, date: Date.current, offset: nil)
new.get(period: period, date: date, offset: offset)
end
def get(period: :daily, date: Date.current, offset: nil)
date = Date.current if date.blank?
if offset.present?
get_timezone(date, period, offset)
else
get_global(date, period)
end
end
private
def get_timezone(date, period, offset)
key = LeaderboardCache.timezone_key(offset, date, period)
board = LeaderboardCache.read(key)
if board.present?
Rails.logger.debug "Cache HIT for timezone leaderboard UTC#{offset >= 0 ? '+' : ''}#{offset}"
return board
end
Rails.logger.debug "Cache MISS for timezone leaderboard UTC#{offset >= 0 ? '+' : ''}#{offset}"
TimezoneLeaderboardJob.perform_later(period, date, offset)
Rails.logger.info "Falling back to global leaderboard for UTC#{offset >= 0 ? '+' : ''}#{offset}"
get_global(date, period)
end
def get_global(date, period)
date = LeaderboardDateRange.normalize_date(date, period)
key = LeaderboardCache.global_key(period, date)
board = LeaderboardCache.read(key)
return board if board.present?
board = ::Leaderboard.where.not(finished_generating_at: nil)
.find_by(start_date: date, period_type: period, timezone_offset: nil, deleted_at: nil)
if board.present?
LeaderboardCache.write(key, board)
return board
end
Rails.logger.info "No leaderboard found for #{period} #{date}, triggering background generation"
LeaderboardUpdateJob.perform_later(period, date)
nil
end
end

View file

@ -31,7 +31,7 @@
<div>
<label class="block text-md font-bold text-white mb-2">how to use it?</label>
<p class="text-md text-gray-400 mb-2">
most likely you are not crazy enough to build your own api client but you can use <a href="https://fraudcheck.3kh0.net/" class="text-blue-400 hover:underline" target="_blank">rowan's fraud check tool</a> to use the admin api.
most likely you are not crazy enough to build your own api client but you can use rowan's fraud check tool to use the admin api.
</p>
<p class="text-md text-gray-400">
if you are building your own client, just use the token as a bearer token in the auth header of your requests, check the actual source code for more details. or you could just be normal and use the fraud check tool i already made for you.

View file

@ -1,11 +0,0 @@
# Schedule mini leaderboard cache warming
# This will run every 5 minutes to keep the cache warm
Rails.application.config.after_initialize do
if defined?(Sidekiq::Cron::Job)
Sidekiq::Cron::Job.create(
name: "Warm Mini Leaderboard Cache",
cron: "*/5 * * * *",
class: "WarmMiniLeaderboardCacheJob"
)
end
end

4
db/schema.rb generated
View file

@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[8.0].define(version: 2025_07_09_001202) do
ActiveRecord::Schema[8.0].define(version: 2025_07_22_233948) do
create_schema "pganalyze"
# These are extensions that must be enabled in order to support this database
@ -258,6 +258,8 @@ ActiveRecord::Schema[8.0].define(version: 2025_07_09_001202) do
t.bigint "raw_heartbeat_upload_id"
t.index ["category", "time"], name: "index_heartbeats_on_category_and_time"
t.index ["fields_hash"], name: "index_heartbeats_on_fields_hash_when_not_deleted", unique: true, where: "(deleted_at IS NULL)"
t.index ["project", "time"], name: "index_heartbeats_on_project_and_time"
t.index ["project"], name: "index_heartbeats_on_project"
t.index ["raw_heartbeat_upload_id"], name: "index_heartbeats_on_raw_heartbeat_upload_id"
t.index ["source_type", "time", "user_id", "project"], name: "index_heartbeats_on_source_type_time_user_project"
t.index ["user_id", "project", "time"], name: "idx_heartbeats_user_project_time_stats", where: "((deleted_at IS NULL) AND (project IS NOT NULL))"