From ae1ffadfcd20d84d113889f6f2f11aaa958b46ba Mon Sep 17 00:00:00 2001 From: End Date: Thu, 5 Feb 2026 11:11:12 -0700 Subject: [PATCH] feat(storage): public R2 URLs with Cloudflare edge caching (#28) Co-authored-by: 24c02 <163450896+24c02@users.noreply.github.com> --- .env.example | 2 +- Gemfile.lock | 2 +- README.md | 8 ++ app/controllers/api/v4/uploads_controller.rb | 10 ++- .../external_uploads_controller.rb | 4 +- app/controllers/uploads_controller.rb | 25 ++++-- app/helpers/quota_helper.rb | 2 +- app/models/upload.rb | 16 +++- config/brakeman.ignore | 29 ++++++- config/environments/development.rb | 1 - lib/tasks/copy_to_pub.rake | 84 +++++++++++++++++++ lib/tasks/del_old.rake | 56 +++++++++++++ lib/tasks/import_slack_files.rake | 2 +- 13 files changed, 218 insertions(+), 23 deletions(-) create mode 100644 lib/tasks/copy_to_pub.rake create mode 100644 lib/tasks/del_old.rake diff --git a/.env.example b/.env.example index f85a825..180fd78 100644 --- a/.env.example +++ b/.env.example @@ -8,7 +8,7 @@ R2_ENDPOINT=https://YOUR_ACCOUNT_ID.r2.cloudflarestorage.com # Public hostname for CDN URLs (used in generated links) CDN_HOST=cdn.hackclub.com - +CDN_ASSETS_HOST=cdn.hackclub-assets.com # ============================================================================= # Hack Club OAuth # ============================================================================= diff --git a/Gemfile.lock b/Gemfile.lock index d7cb5c1..ee8ac11 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -114,7 +114,7 @@ GEM argon2-kdf (>= 0.2) bootsnap (1.21.1) msgpack (~> 1.2) - brakeman (8.0.1) + brakeman (8.0.2) racc builder (3.3.0) capybara (3.40.0) diff --git a/README.md b/README.md index 166f6d8..56e1a6a 100644 --- a/README.md +++ b/README.md @@ -78,11 +78,19 @@ See `.env.example` for the full list. Key variables: | `R2_BUCKET_NAME` | R2 bucket name | | `R2_ENDPOINT` | R2 endpoint URL | | `CDN_HOST` | Public hostname for CDN URLs | +| `CDN_ASSETS_HOST` | Public R2 bucket hostname | | `HACKCLUB_CLIENT_ID` | OAuth client ID from Hack Club Auth | | `HACKCLUB_CLIENT_SECRET` | OAuth client secret | | `LOCKBOX_MASTER_KEY` | 64-char hex key for encrypting API keys | | `BLIND_INDEX_MASTER_KEY` | 64-char hex key for searchable encryption | +## DNS Setup + +| Domain | Points to | +|--------|-----------| +| `cdn.hackclub.com` | Rails app (Heroku/Fly/etc.) | +| `cdn.hackclub-assets.com` | R2 bucket (custom domain in R2 settings) | + ## API The API uses bearer token authentication. Create an API key from the web dashboard after logging in. diff --git a/app/controllers/api/v4/uploads_controller.rb b/app/controllers/api/v4/uploads_controller.rb index 60a6331..e496705 100644 --- a/app/controllers/api/v4/uploads_controller.rb +++ b/app/controllers/api/v4/uploads_controller.rb @@ -16,13 +16,19 @@ module API content_type = Marcel::MimeType.for(file.tempfile, name: file.original_filename) || file.content_type || "application/octet-stream" + # Pre-gen upload ID for predictable storage path + upload_id = SecureRandom.uuid_v7 + sanitized_filename = ActiveStorage::Filename.new(file.original_filename).sanitized + storage_key = "#{upload_id}/#{sanitized_filename}" + blob = ActiveStorage::Blob.create_and_upload!( io: file.tempfile, filename: file.original_filename, - content_type: content_type + content_type: content_type, + key: storage_key ) - upload = current_user.uploads.create!(blob: blob, provenance: :api) + upload = current_user.uploads.create!(id: upload_id, blob: blob, provenance: :api) render json: upload_json(upload), status: :created rescue => e diff --git a/app/controllers/external_uploads_controller.rb b/app/controllers/external_uploads_controller.rb index 6952c05..0d4dd19 100644 --- a/app/controllers/external_uploads_controller.rb +++ b/app/controllers/external_uploads_controller.rb @@ -5,8 +5,8 @@ class ExternalUploadsController < ApplicationController def show upload = Upload.includes(:blob).find(params[:id]) - expires_in ActiveStorage.service_urls_expire_in, public: true - redirect_to upload.blob.url(disposition: :inline), allow_other_host: true + expires_in 1.year, public: true + redirect_to upload.assets_url, allow_other_host: true rescue ActiveRecord::RecordNotFound head :not_found end diff --git a/app/controllers/uploads_controller.rb b/app/controllers/uploads_controller.rb index e959398..c284262 100644 --- a/app/controllers/uploads_controller.rb +++ b/app/controllers/uploads_controller.rb @@ -24,16 +24,23 @@ class UploadsController < ApplicationController content_type = Marcel::MimeType.for(uploaded_file.tempfile, name: uploaded_file.original_filename) || uploaded_file.content_type || "application/octet-stream" - blob = ActiveStorage::Blob.create_and_upload!( - io: uploaded_file.tempfile, - filename: uploaded_file.original_filename, - content_type: content_type - ) + # pre-gen upload ID for predictable storage path + upload_id = SecureRandom.uuid_v7 + sanitized_filename = ActiveStorage::Filename.new(uploaded_file.original_filename).sanitized + storage_key = "#{upload_id}/#{sanitized_filename}" - @upload = current_user.uploads.create!( - blob: blob, - provenance: :web - ) + blob = ActiveStorage::Blob.create_and_upload!( + io: uploaded_file.tempfile, + filename: uploaded_file.original_filename, + content_type: content_type, + key: storage_key + ) + + @upload = current_user.uploads.create!( + id: upload_id, + blob: blob, + provenance: :web + ) redirect_to uploads_path, notice: "File uploaded successfully!" rescue StandardError => e diff --git a/app/helpers/quota_helper.rb b/app/helpers/quota_helper.rb index 0a76083..93c71e9 100644 --- a/app/helpers/quota_helper.rb +++ b/app/helpers/quota_helper.rb @@ -10,7 +10,7 @@ module QuotaHelper render Primer::Beta::Flash.new(scheme: :danger) do <<~EOM You've exceeded your storage quota. - You're using #{number_to_human_size(usage[:storage_used])} of #{number_to_human_size(usage[:storage_limit])}. + You're using #{number_to_human_size(usage[:storage_used])} of #{number_to_human_size(usage[:storage_limit])}.#{' '} Please delete some files to continue uploading. EOM end diff --git a/app/models/upload.rb b/app/models/upload.rb index 8d96df6..1e3e0fc 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -56,6 +56,12 @@ class Upload < ApplicationRecord ActiveSupport::NumberHelper.number_to_human_size(byte_size) end + # Direct URL to public R2 bucket + def assets_url + host = ENV.fetch("CDN_ASSETS_HOST", "cdn.hackclub-assets.com") + "https://#{host}/#{blob.key}" + end + # Get CDN URL (uses external uploads controller) def cdn_url Rails.application.routes.url_helpers.external_upload_url( @@ -71,7 +77,6 @@ class Upload < ApplicationRecord f.response :follow_redirects, limit: 5 f.adapter Faraday.default_adapter end - # Disable CRL checking which fails on some servers conn.options.open_timeout = 30 conn.options.timeout = 120 @@ -89,14 +94,21 @@ class Upload < ApplicationRecord body = response.body content_type = Marcel::MimeType.for(StringIO.new(body), name: filename) || response.headers["content-type"] || "application/octet-stream" + # Pre-generate upload ID for predictable storage path + upload_id = SecureRandom.uuid_v7 + sanitized_filename = ActiveStorage::Filename.new(filename).sanitized + storage_key = "#{upload_id}/#{sanitized_filename}" + blob = ActiveStorage::Blob.create_and_upload!( io: StringIO.new(body), filename: filename, content_type: content_type, - identify: false + identify: false, + key: storage_key ) create!( + id: upload_id, user: user, blob: blob, provenance: provenance, diff --git a/config/brakeman.ignore b/config/brakeman.ignore index d19f2c1..266ff34 100644 --- a/config/brakeman.ignore +++ b/config/brakeman.ignore @@ -1,5 +1,28 @@ { "ignored_warnings": [ + { + "warning_type": "Redirect", + "warning_code": 18, + "fingerprint": "1b547d3d3a3da6fb3a8813588bc1cc46dec4d4383cab676fbabdf68254550bad", + "check_name": "Redirect", + "message": "Possible unprotected redirect", + "file": "app/controllers/external_uploads_controller.rb", + "line": 9, + "link": "https://brakemanscanner.org/docs/warning_types/redirect/", + "code": "redirect_to(Upload.includes(:blob).find(params[:id]).assets_url, :allow_other_host => true)", + "render_path": null, + "location": { + "type": "method", + "class": "ExternalUploadsController", + "method": "show" + }, + "user_input": "Upload.includes(:blob).find(params[:id]).assets_url", + "confidence": "Weak", + "cwe_id": [ + 601 + ], + "note": "Redirect target is CDN_ASSETS_HOST env var + blob.key from database, not user input" + }, { "warning_type": "Redirect", "warning_code": 18, @@ -7,7 +30,7 @@ "check_name": "Redirect", "message": "Possible unprotected redirect", "file": "app/controllers/external_uploads_controller.rb", - "line": 24, + "line": 26, "link": "https://brakemanscanner.org/docs/warning_types/redirect/", "code": "redirect_to(Upload.includes(:blob).find_by(:original_url => params[:url]).cdn_url, :allow_other_host => true)", "render_path": null, @@ -21,8 +44,8 @@ "cwe_id": [ 601 ], - "note": "" + "note": "Redirect to cdn_url which points to our own CDN_HOST domain, not user input" } ], - "brakeman_version": "8.0.1" + "brakeman_version": "8.0.2" } diff --git a/config/environments/development.rb b/config/environments/development.rb index 4cc21c4..05a6a5a 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -66,7 +66,6 @@ Rails.application.configure do # Raise error when a before_action's only/except options reference missing actions. config.action_controller.raise_on_missing_callback_actions = true - # Apply autocorrection by RuboCop to files generated by `bin/rails generate`. # config.generators.apply_rubocop_autocorrect_after_generate! end diff --git a/lib/tasks/copy_to_pub.rake b/lib/tasks/copy_to_pub.rake new file mode 100644 index 0000000..9c37a7d --- /dev/null +++ b/lib/tasks/copy_to_pub.rake @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +namespace :storage do + desc "Phase 1: Copy existing blobs to new key structure (safe, no deletions)" + task copy_to_public_keys: :environment do + require "aws-sdk-s3" + + service = ActiveStorage::Blob.service + unless service.is_a?(ActiveStorage::Service::S3Service) + puts "This task only works with S3/R2 storage. Current service: #{service.class}" + exit 1 + end + + client = service.client.client + bucket = service.bucket + i=0 + migrations = [] + Upload.select(:id, :blob_id).includes(:blob).find_each(batch_size: 5000) do |upload| + blob = upload.blob + old_key = blob.key + new_key = "#{upload.id}/#{blob.filename.sanitized}" + next if old_key == new_key + + migrations << { upload_id: upload.id, blob: blob, old_key: old_key, new_key: new_key } + puts i+=1 + end + + puts "Found #{migrations.size} files to migrate (#{Upload.count - migrations.size} already migrated)" + exit 0 if migrations.empty? + + require "concurrent" + + copied = Concurrent::AtomicFixnum.new(0) + errors = Concurrent::Array.new + progress = Concurrent::AtomicFixnum.new(0) + + pool = Concurrent::FixedThreadPool.new(67) + + migrations.each do |m| + pool.post do + begin + blob = m[:blob] + + client.copy_object( + bucket: bucket.name, + copy_source: "#{bucket.name}/hackclub-cdn/#{m[:old_key]}", + key: m[:new_key], + content_type: blob.content_type || "application/octet-stream", + content_disposition: "inline", + metadata_directive: "REPLACE" + ) + copied.increment + rescue StandardError => e + errors << { upload_id: m[:upload_id], old_key: m[:old_key], error: e.message } + end + print "\r[#{progress.increment}/#{migrations.size}] Copying..." + end + end + + pool.shutdown + pool.wait_for_termination + + puts "\nCopied: #{copied.value}, Errors: #{errors.size}" + errors.each { |err| puts " - #{err[:upload_id]}: #{err[:error]}" } if errors.any? + + puts "\nRun `bin/rails storage:update_blob_keys` to update database keys" + end + + desc "Phase 2: Update blob keys in database to point to new locations" + task update_blob_keys: :environment do + updated = 0 + Upload.select(:id, :blob_id).includes(:blob).find_each(batch_size: 5000) do |upload| + blob = upload.blob + new_key = "#{upload.id}/#{blob.filename.sanitized}" + next if blob.key == new_key + + blob.update_column(:key, new_key) + updated += 1 + print "\r[#{updated}] Updating keys..." + end + + puts "\nUpdated #{updated} blob keys" + end +end diff --git a/lib/tasks/del_old.rake b/lib/tasks/del_old.rake new file mode 100644 index 0000000..2e7a41b --- /dev/null +++ b/lib/tasks/del_old.rake @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +namespace :storage do + desc "Phase 2: Delete old keys (run after deploy)" + task delete_old_keys: :environment do + require "aws-sdk-s3" + + key_file = Rails.root.join("tmp/old_storage_keys.txt") + unless File.exist?(key_file) + puts "No old keys file found at #{key_file}" + puts "Run storage:copy_to_public_keys first." + exit 1 + end + + old_keys = File.read(key_file).split("\n").reject(&:blank?) + puts "Found #{old_keys.size} old keys to delete" + + if old_keys.empty? + puts "Nothing to delete." + exit 0 + end + + service = ActiveStorage::Blob.service + unless service.is_a?(ActiveStorage::Service::S3Service) + puts "This task only works with S3/R2 storage. Current service: #{service.class}" + exit 1 + end + + client = service.client.client + bucket = service.bucket + + deleted = 0 + errors = [] + + old_keys.each_with_index do |key, idx| + print "\r[#{idx + 1}/#{old_keys.size}] Deleting..." + + begin + client.delete_object(bucket: bucket.name, key: key) + deleted += 1 + rescue StandardError => e + puts "\n ERROR deleting #{key}: #{e.message}" + errors << { key: key, error: e.message } + end + end + + puts "\nDeleted: #{deleted}, Errors: #{errors.size}" + + if errors.empty? + File.delete(key_file) + puts "Cleanup complete!" + else + errors.each { |err| puts " - #{err[:key]}: #{err[:error]}" } + end + end +end diff --git a/lib/tasks/import_slack_files.rake b/lib/tasks/import_slack_files.rake index 03f00a1..eeaa8ad 100644 --- a/lib/tasks/import_slack_files.rake +++ b/lib/tasks/import_slack_files.rake @@ -133,7 +133,7 @@ namespace :import do error_log_path = "import_errors_#{Time.now.strftime('%Y%m%d_%H%M%S')}.csv" CSV.open(error_log_path, "w") do |csv| csv << %w[id original_url error] - errors.each { |err| csv << [err[:id], err[:original_url], err[:error]] } + errors.each { |err| csv << [ err[:id], err[:original_url], err[:error] ] } end puts "Full error log written to: #{error_log_path}" end