From 2f02ae096183bc4241ddf12e6ed367a27c0fb550 Mon Sep 17 00:00:00 2001 From: 24c02 <163450896+24c02@users.noreply.github.com> Date: Fri, 20 Mar 2026 15:45:58 -0400 Subject: [PATCH] charset on content-type if not provided --- app/controllers/api/v4/uploads_controller.rb | 1 + app/controllers/uploads_controller.rb | 1 + app/models/upload.rb | 11 +++++++++++ 3 files changed, 13 insertions(+) diff --git a/app/controllers/api/v4/uploads_controller.rb b/app/controllers/api/v4/uploads_controller.rb index e496705..df5bc02 100644 --- a/app/controllers/api/v4/uploads_controller.rb +++ b/app/controllers/api/v4/uploads_controller.rb @@ -15,6 +15,7 @@ module API end content_type = Marcel::MimeType.for(file.tempfile, name: file.original_filename) || file.content_type || "application/octet-stream" + content_type = Upload.normalize_content_type(content_type) # Pre-gen upload ID for predictable storage path upload_id = SecureRandom.uuid_v7 diff --git a/app/controllers/uploads_controller.rb b/app/controllers/uploads_controller.rb index c284262..3de3df4 100644 --- a/app/controllers/uploads_controller.rb +++ b/app/controllers/uploads_controller.rb @@ -23,6 +23,7 @@ class UploadsController < ApplicationController end content_type = Marcel::MimeType.for(uploaded_file.tempfile, name: uploaded_file.original_filename) || uploaded_file.content_type || "application/octet-stream" + content_type = Upload.normalize_content_type(content_type) # pre-gen upload ID for predictable storage path upload_id = SecureRandom.uuid_v7 diff --git a/app/models/upload.rb b/app/models/upload.rb index 1e3e0fc..1ff4aeb 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -36,6 +36,16 @@ class Upload < ApplicationRecord alias_method :file_size, :byte_size alias_method :mime_type, :content_type + # Ensure text content types include charset=utf-8 so browsers + # don't fall back to ISO-8859-1 and mangle unicode + def self.normalize_content_type(content_type) + if content_type&.start_with?("text/") && !content_type.include?("charset") + "#{content_type}; charset=utf-8" + else + content_type + end + end + # Provenance enum enum :provenance, { slack: "slack", @@ -93,6 +103,7 @@ class Upload < ApplicationRecord filename ||= File.basename(URI.parse(url).path) body = response.body content_type = Marcel::MimeType.for(StringIO.new(body), name: filename) || response.headers["content-type"] || "application/octet-stream" + content_type = normalize_content_type(content_type) # Pre-generate upload ID for predictable storage path upload_id = SecureRandom.uuid_v7