hackatime/app/jobs/process_commit_job.rb
Mahad Kalam 28fa174861
Add Sentry monitoring for previously unreported errors (#1066)
* Add Sentry monitoring for previously unreported errors

* Fix

* Fixes

* whoops!
2026-03-13 11:06:12 +00:00

131 lines
6.5 KiB
Ruby

require "http"
require "json"
class ProcessCommitJob < ApplicationJob
queue_as :literally_whenever
# Retry on common network issues or temporary API errors
retry_on HTTP::TimeoutError, HTTP::ConnectionError, wait: :exponentially_longer, attempts: 5
retry_on JSON::ParserError, wait: 10.seconds, attempts: 3 # If API returns malformed JSON
discard_on ActiveJob::DeserializationError # If User record is gone
def perform(user_id, commit_sha, commit_api_url, provider_string, repository_id = nil)
provider_sym = provider_string.to_sym # Convert string back to symbol
user = User.find_by(id: user_id)
repository = repository_id ? Repository.find_by(id: repository_id) : nil
unless user
Rails.logger.warn "[ProcessCommitJob] User ##{user_id} not found. Skipping commit #{commit_sha}."
return
end
# Idempotency: Check if commit already exists
if Commit.exists?(sha: commit_sha)
# Rails.logger.info "[ProcessCommitJob] Commit #{commit_sha} already exists. Skipping."
# Optionally, you could update provider-specific raw data here if it's from a different provider
# and the commit record already exists (e.g., adding gitlab_raw to an existing commit)
return
end
Rails.logger.info "[ProcessCommitJob] Processing commit #{commit_sha} for User ##{user_id} via #{provider_sym} from URL: #{commit_api_url}"
case provider_sym
when :github
process_github_commit(user, commit_sha, commit_api_url, repository)
# Add other providers like :gitlab later
# when :gitlab
# process_gitlab_commit(user, commit_sha, commit_api_url, repository)
else
report_message("[ProcessCommitJob] Unknown provider '#{provider_sym}' for commit #{commit_sha}.")
end
end
private
def process_github_commit(user, commit_sha, commit_api_url, repository)
unless user.github_access_token.present?
Rails.logger.warn "[ProcessCommitJob] User ##{user.id} missing GitHub token for commit #{commit_sha}. Skipping."
return
end
begin
response = HTTP.headers(
"Accept" => "application/vnd.github.v3+json",
"Authorization" => "Bearer #{user.github_access_token}",
"X-GitHub-Api-Version" => "2022-11-28"
).timeout(connect: 5, read: 10).get(commit_api_url)
if response.status.success?
commit_data_json = response.parse
api_commit_sha = commit_data_json["sha"]
unless api_commit_sha == commit_sha
report_message("[ProcessCommitJob] SHA mismatch for User ##{user.id}. Expected #{commit_sha}, API returned #{api_commit_sha}. URL: #{commit_api_url}")
return # Critical data integrity issue
end
committer_date_str = commit_data_json.dig("commit", "committer", "date")
unless committer_date_str
report_message("[ProcessCommitJob] Committer date not found in API response for commit #{commit_sha}.")
return
end
begin
# API dates are typically ISO8601 (UTC). Time.zone.parse respects the application's zone.
# It's good practice to store in UTC, which parse will do correctly for ISO8601.
commit_actual_created_at = Time.zone.parse(committer_date_str)
rescue ArgumentError => e
report_error(e, message: "[ProcessCommitJob] Invalid committer date format '#{committer_date_str}' for commit #{commit_sha}.")
return
end
commit = Commit.find_or_create_by(sha: api_commit_sha) do |c|
c.user_id = user.id
c.repository_id = repository&.id
c.github_raw = sanitize_json_data(commit_data_json)
c.created_at = commit_actual_created_at
c.updated_at = Time.current
end
Rails.logger.info "[ProcessCommitJob] Successfully processed commit #{api_commit_sha} for User ##{user.id}."
elsif response.status.code == 401 # Unauthorized
report_message("[ProcessCommitJob] Unauthorized (401) for User ##{user.id}. GitHub token expired/invalid. URL: #{commit_api_url}")
user.update!(github_access_token: nil)
Rails.logger.info "[ProcessCommitJob] Cleared invalid GitHub token for User ##{user.id}. User will need to re-authenticate."
elsif response.status.code == 404
Rails.logger.warn "[ProcessCommitJob] Commit #{commit_sha} not found (404) at #{commit_api_url} for User ##{user.id}."
elsif response.status.code == 403 # Forbidden, could be rate limit or permissions
if response.headers["X-RateLimit-Remaining"].to_i == 0
reset_time = Time.at(response.headers["X-RateLimit-Reset"].to_i)
delay_seconds = [ (reset_time - Time.current).ceil, 5 ].max # at least 5s delay
Rails.logger.warn "[ProcessCommitJob] GitHub API rate limit exceeded for User ##{user.id}. Retrying in #{delay_seconds}s. URL: #{commit_api_url}"
self.class.set(wait: delay_seconds.seconds).perform_later(user.id, commit_sha, commit_api_url, "github", repository&.id)
else
report_message("[ProcessCommitJob] GitHub API forbidden (403) for User ##{user.id}. URL: #{commit_api_url}. Response: #{response.body.to_s.truncate(500)}")
end
else
report_message("[ProcessCommitJob] GitHub API error for User ##{user.id}. Status: #{response.status}. URL: #{commit_api_url}. Response: #{response.body.to_s.truncate(500)}")
raise "GitHub API Error: Status #{response.status}" if response.status.server_error? # Trigger retry for server errors
end
rescue HTTP::Error => e # Covers TimeoutError, ConnectionError
report_error(e, message: "[ProcessCommitJob] HTTP Error fetching commit #{commit_sha} for User ##{user.id}. URL: #{commit_api_url}")
raise # Re-raise to allow GoodJob to retry based on retry_on
rescue JSON::ParserError => e
report_error(e, message: "[ProcessCommitJob] JSON Parse Error for commit #{commit_sha} (User ##{user.id}). URL: #{commit_api_url}. Body: #{response&.body&.to_s&.truncate(200)}")
# Malformed JSON usually isn't temporary, so might not retry unless API is known to be flaky.
rescue ActiveRecord::RecordInvalid => e
report_error(e, message: "[ProcessCommitJob] Validation failed for commit #{commit_sha} (User ##{user.id})")
end
end
def sanitize_json_data(data)
json_string = data.to_json
sanitized_string = json_string.gsub(/\\u0000/, "")
JSON.parse(sanitized_string)
rescue JSON::ParserError => e
Rails.logger.warn "[ProcessCommitJob] Failed to sanitize JSON data: #{e.message}. Falling back to original data."
data
end
end