feat(storage): public R2 URLs with Cloudflare edge caching (#28)

Co-authored-by: 24c02 <163450896+24c02@users.noreply.github.com>
This commit is contained in:
End 2026-02-05 11:11:12 -07:00 committed by GitHub
parent 56687e4fc1
commit ae1ffadfcd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 218 additions and 23 deletions

View file

@ -8,7 +8,7 @@ R2_ENDPOINT=https://YOUR_ACCOUNT_ID.r2.cloudflarestorage.com
# Public hostname for CDN URLs (used in generated links) # Public hostname for CDN URLs (used in generated links)
CDN_HOST=cdn.hackclub.com CDN_HOST=cdn.hackclub.com
CDN_ASSETS_HOST=cdn.hackclub-assets.com
# ============================================================================= # =============================================================================
# Hack Club OAuth # Hack Club OAuth
# ============================================================================= # =============================================================================

View file

@ -114,7 +114,7 @@ GEM
argon2-kdf (>= 0.2) argon2-kdf (>= 0.2)
bootsnap (1.21.1) bootsnap (1.21.1)
msgpack (~> 1.2) msgpack (~> 1.2)
brakeman (8.0.1) brakeman (8.0.2)
racc racc
builder (3.3.0) builder (3.3.0)
capybara (3.40.0) capybara (3.40.0)

View file

@ -78,11 +78,19 @@ See `.env.example` for the full list. Key variables:
| `R2_BUCKET_NAME` | R2 bucket name | | `R2_BUCKET_NAME` | R2 bucket name |
| `R2_ENDPOINT` | R2 endpoint URL | | `R2_ENDPOINT` | R2 endpoint URL |
| `CDN_HOST` | Public hostname for CDN URLs | | `CDN_HOST` | Public hostname for CDN URLs |
| `CDN_ASSETS_HOST` | Public R2 bucket hostname |
| `HACKCLUB_CLIENT_ID` | OAuth client ID from Hack Club Auth | | `HACKCLUB_CLIENT_ID` | OAuth client ID from Hack Club Auth |
| `HACKCLUB_CLIENT_SECRET` | OAuth client secret | | `HACKCLUB_CLIENT_SECRET` | OAuth client secret |
| `LOCKBOX_MASTER_KEY` | 64-char hex key for encrypting API keys | | `LOCKBOX_MASTER_KEY` | 64-char hex key for encrypting API keys |
| `BLIND_INDEX_MASTER_KEY` | 64-char hex key for searchable encryption | | `BLIND_INDEX_MASTER_KEY` | 64-char hex key for searchable encryption |
## DNS Setup
| Domain | Points to |
|--------|-----------|
| `cdn.hackclub.com` | Rails app (Heroku/Fly/etc.) |
| `cdn.hackclub-assets.com` | R2 bucket (custom domain in R2 settings) |
## API ## API
The API uses bearer token authentication. Create an API key from the web dashboard after logging in. The API uses bearer token authentication. Create an API key from the web dashboard after logging in.

View file

@ -16,13 +16,19 @@ module API
content_type = Marcel::MimeType.for(file.tempfile, name: file.original_filename) || file.content_type || "application/octet-stream" content_type = Marcel::MimeType.for(file.tempfile, name: file.original_filename) || file.content_type || "application/octet-stream"
# Pre-gen upload ID for predictable storage path
upload_id = SecureRandom.uuid_v7
sanitized_filename = ActiveStorage::Filename.new(file.original_filename).sanitized
storage_key = "#{upload_id}/#{sanitized_filename}"
blob = ActiveStorage::Blob.create_and_upload!( blob = ActiveStorage::Blob.create_and_upload!(
io: file.tempfile, io: file.tempfile,
filename: file.original_filename, filename: file.original_filename,
content_type: content_type content_type: content_type,
key: storage_key
) )
upload = current_user.uploads.create!(blob: blob, provenance: :api) upload = current_user.uploads.create!(id: upload_id, blob: blob, provenance: :api)
render json: upload_json(upload), status: :created render json: upload_json(upload), status: :created
rescue => e rescue => e

View file

@ -5,8 +5,8 @@ class ExternalUploadsController < ApplicationController
def show def show
upload = Upload.includes(:blob).find(params[:id]) upload = Upload.includes(:blob).find(params[:id])
expires_in ActiveStorage.service_urls_expire_in, public: true expires_in 1.year, public: true
redirect_to upload.blob.url(disposition: :inline), allow_other_host: true redirect_to upload.assets_url, allow_other_host: true
rescue ActiveRecord::RecordNotFound rescue ActiveRecord::RecordNotFound
head :not_found head :not_found
end end

View file

@ -24,16 +24,23 @@ class UploadsController < ApplicationController
content_type = Marcel::MimeType.for(uploaded_file.tempfile, name: uploaded_file.original_filename) || uploaded_file.content_type || "application/octet-stream" content_type = Marcel::MimeType.for(uploaded_file.tempfile, name: uploaded_file.original_filename) || uploaded_file.content_type || "application/octet-stream"
blob = ActiveStorage::Blob.create_and_upload!( # pre-gen upload ID for predictable storage path
io: uploaded_file.tempfile, upload_id = SecureRandom.uuid_v7
filename: uploaded_file.original_filename, sanitized_filename = ActiveStorage::Filename.new(uploaded_file.original_filename).sanitized
content_type: content_type storage_key = "#{upload_id}/#{sanitized_filename}"
)
@upload = current_user.uploads.create!( blob = ActiveStorage::Blob.create_and_upload!(
blob: blob, io: uploaded_file.tempfile,
provenance: :web filename: uploaded_file.original_filename,
) content_type: content_type,
key: storage_key
)
@upload = current_user.uploads.create!(
id: upload_id,
blob: blob,
provenance: :web
)
redirect_to uploads_path, notice: "File uploaded successfully!" redirect_to uploads_path, notice: "File uploaded successfully!"
rescue StandardError => e rescue StandardError => e

View file

@ -10,7 +10,7 @@ module QuotaHelper
render Primer::Beta::Flash.new(scheme: :danger) do render Primer::Beta::Flash.new(scheme: :danger) do
<<~EOM <<~EOM
You've exceeded your storage quota. You've exceeded your storage quota.
You're using #{number_to_human_size(usage[:storage_used])} of #{number_to_human_size(usage[:storage_limit])}. You're using #{number_to_human_size(usage[:storage_used])} of #{number_to_human_size(usage[:storage_limit])}.#{' '}
Please delete some files to continue uploading. Please delete some files to continue uploading.
EOM EOM
end end

View file

@ -56,6 +56,12 @@ class Upload < ApplicationRecord
ActiveSupport::NumberHelper.number_to_human_size(byte_size) ActiveSupport::NumberHelper.number_to_human_size(byte_size)
end end
# Direct URL to public R2 bucket
def assets_url
host = ENV.fetch("CDN_ASSETS_HOST", "cdn.hackclub-assets.com")
"https://#{host}/#{blob.key}"
end
# Get CDN URL (uses external uploads controller) # Get CDN URL (uses external uploads controller)
def cdn_url def cdn_url
Rails.application.routes.url_helpers.external_upload_url( Rails.application.routes.url_helpers.external_upload_url(
@ -71,7 +77,6 @@ class Upload < ApplicationRecord
f.response :follow_redirects, limit: 5 f.response :follow_redirects, limit: 5
f.adapter Faraday.default_adapter f.adapter Faraday.default_adapter
end end
# Disable CRL checking which fails on some servers
conn.options.open_timeout = 30 conn.options.open_timeout = 30
conn.options.timeout = 120 conn.options.timeout = 120
@ -89,14 +94,21 @@ class Upload < ApplicationRecord
body = response.body body = response.body
content_type = Marcel::MimeType.for(StringIO.new(body), name: filename) || response.headers["content-type"] || "application/octet-stream" content_type = Marcel::MimeType.for(StringIO.new(body), name: filename) || response.headers["content-type"] || "application/octet-stream"
# Pre-generate upload ID for predictable storage path
upload_id = SecureRandom.uuid_v7
sanitized_filename = ActiveStorage::Filename.new(filename).sanitized
storage_key = "#{upload_id}/#{sanitized_filename}"
blob = ActiveStorage::Blob.create_and_upload!( blob = ActiveStorage::Blob.create_and_upload!(
io: StringIO.new(body), io: StringIO.new(body),
filename: filename, filename: filename,
content_type: content_type, content_type: content_type,
identify: false identify: false,
key: storage_key
) )
create!( create!(
id: upload_id,
user: user, user: user,
blob: blob, blob: blob,
provenance: provenance, provenance: provenance,

View file

@ -1,5 +1,28 @@
{ {
"ignored_warnings": [ "ignored_warnings": [
{
"warning_type": "Redirect",
"warning_code": 18,
"fingerprint": "1b547d3d3a3da6fb3a8813588bc1cc46dec4d4383cab676fbabdf68254550bad",
"check_name": "Redirect",
"message": "Possible unprotected redirect",
"file": "app/controllers/external_uploads_controller.rb",
"line": 9,
"link": "https://brakemanscanner.org/docs/warning_types/redirect/",
"code": "redirect_to(Upload.includes(:blob).find(params[:id]).assets_url, :allow_other_host => true)",
"render_path": null,
"location": {
"type": "method",
"class": "ExternalUploadsController",
"method": "show"
},
"user_input": "Upload.includes(:blob).find(params[:id]).assets_url",
"confidence": "Weak",
"cwe_id": [
601
],
"note": "Redirect target is CDN_ASSETS_HOST env var + blob.key from database, not user input"
},
{ {
"warning_type": "Redirect", "warning_type": "Redirect",
"warning_code": 18, "warning_code": 18,
@ -7,7 +30,7 @@
"check_name": "Redirect", "check_name": "Redirect",
"message": "Possible unprotected redirect", "message": "Possible unprotected redirect",
"file": "app/controllers/external_uploads_controller.rb", "file": "app/controllers/external_uploads_controller.rb",
"line": 24, "line": 26,
"link": "https://brakemanscanner.org/docs/warning_types/redirect/", "link": "https://brakemanscanner.org/docs/warning_types/redirect/",
"code": "redirect_to(Upload.includes(:blob).find_by(:original_url => params[:url]).cdn_url, :allow_other_host => true)", "code": "redirect_to(Upload.includes(:blob).find_by(:original_url => params[:url]).cdn_url, :allow_other_host => true)",
"render_path": null, "render_path": null,
@ -21,8 +44,8 @@
"cwe_id": [ "cwe_id": [
601 601
], ],
"note": "" "note": "Redirect to cdn_url which points to our own CDN_HOST domain, not user input"
} }
], ],
"brakeman_version": "8.0.1" "brakeman_version": "8.0.2"
} }

View file

@ -66,7 +66,6 @@ Rails.application.configure do
# Raise error when a before_action's only/except options reference missing actions. # Raise error when a before_action's only/except options reference missing actions.
config.action_controller.raise_on_missing_callback_actions = true config.action_controller.raise_on_missing_callback_actions = true
# Apply autocorrection by RuboCop to files generated by `bin/rails generate`. # Apply autocorrection by RuboCop to files generated by `bin/rails generate`.
# config.generators.apply_rubocop_autocorrect_after_generate! # config.generators.apply_rubocop_autocorrect_after_generate!
end end

View file

@ -0,0 +1,84 @@
# frozen_string_literal: true
namespace :storage do
desc "Phase 1: Copy existing blobs to new key structure (safe, no deletions)"
task copy_to_public_keys: :environment do
require "aws-sdk-s3"
service = ActiveStorage::Blob.service
unless service.is_a?(ActiveStorage::Service::S3Service)
puts "This task only works with S3/R2 storage. Current service: #{service.class}"
exit 1
end
client = service.client.client
bucket = service.bucket
i=0
migrations = []
Upload.select(:id, :blob_id).includes(:blob).find_each(batch_size: 5000) do |upload|
blob = upload.blob
old_key = blob.key
new_key = "#{upload.id}/#{blob.filename.sanitized}"
next if old_key == new_key
migrations << { upload_id: upload.id, blob: blob, old_key: old_key, new_key: new_key }
puts i+=1
end
puts "Found #{migrations.size} files to migrate (#{Upload.count - migrations.size} already migrated)"
exit 0 if migrations.empty?
require "concurrent"
copied = Concurrent::AtomicFixnum.new(0)
errors = Concurrent::Array.new
progress = Concurrent::AtomicFixnum.new(0)
pool = Concurrent::FixedThreadPool.new(67)
migrations.each do |m|
pool.post do
begin
blob = m[:blob]
client.copy_object(
bucket: bucket.name,
copy_source: "#{bucket.name}/hackclub-cdn/#{m[:old_key]}",
key: m[:new_key],
content_type: blob.content_type || "application/octet-stream",
content_disposition: "inline",
metadata_directive: "REPLACE"
)
copied.increment
rescue StandardError => e
errors << { upload_id: m[:upload_id], old_key: m[:old_key], error: e.message }
end
print "\r[#{progress.increment}/#{migrations.size}] Copying..."
end
end
pool.shutdown
pool.wait_for_termination
puts "\nCopied: #{copied.value}, Errors: #{errors.size}"
errors.each { |err| puts " - #{err[:upload_id]}: #{err[:error]}" } if errors.any?
puts "\nRun `bin/rails storage:update_blob_keys` to update database keys"
end
desc "Phase 2: Update blob keys in database to point to new locations"
task update_blob_keys: :environment do
updated = 0
Upload.select(:id, :blob_id).includes(:blob).find_each(batch_size: 5000) do |upload|
blob = upload.blob
new_key = "#{upload.id}/#{blob.filename.sanitized}"
next if blob.key == new_key
blob.update_column(:key, new_key)
updated += 1
print "\r[#{updated}] Updating keys..."
end
puts "\nUpdated #{updated} blob keys"
end
end

56
lib/tasks/del_old.rake Normal file
View file

@ -0,0 +1,56 @@
# frozen_string_literal: true
namespace :storage do
desc "Phase 2: Delete old keys (run after deploy)"
task delete_old_keys: :environment do
require "aws-sdk-s3"
key_file = Rails.root.join("tmp/old_storage_keys.txt")
unless File.exist?(key_file)
puts "No old keys file found at #{key_file}"
puts "Run storage:copy_to_public_keys first."
exit 1
end
old_keys = File.read(key_file).split("\n").reject(&:blank?)
puts "Found #{old_keys.size} old keys to delete"
if old_keys.empty?
puts "Nothing to delete."
exit 0
end
service = ActiveStorage::Blob.service
unless service.is_a?(ActiveStorage::Service::S3Service)
puts "This task only works with S3/R2 storage. Current service: #{service.class}"
exit 1
end
client = service.client.client
bucket = service.bucket
deleted = 0
errors = []
old_keys.each_with_index do |key, idx|
print "\r[#{idx + 1}/#{old_keys.size}] Deleting..."
begin
client.delete_object(bucket: bucket.name, key: key)
deleted += 1
rescue StandardError => e
puts "\n ERROR deleting #{key}: #{e.message}"
errors << { key: key, error: e.message }
end
end
puts "\nDeleted: #{deleted}, Errors: #{errors.size}"
if errors.empty?
File.delete(key_file)
puts "Cleanup complete!"
else
errors.each { |err| puts " - #{err[:key]}: #{err[:error]}" }
end
end
end

View file

@ -133,7 +133,7 @@ namespace :import do
error_log_path = "import_errors_#{Time.now.strftime('%Y%m%d_%H%M%S')}.csv" error_log_path = "import_errors_#{Time.now.strftime('%Y%m%d_%H%M%S')}.csv"
CSV.open(error_log_path, "w") do |csv| CSV.open(error_log_path, "w") do |csv|
csv << %w[id original_url error] csv << %w[id original_url error]
errors.each { |err| csv << [err[:id], err[:original_url], err[:error]] } errors.each { |err| csv << [ err[:id], err[:original_url], err[:error] ] }
end end
puts "Full error log written to: #{error_log_path}" puts "Full error log written to: #{error_log_path}"
end end