charset on content-type if not provided

This commit is contained in:
24c02 2026-03-20 15:45:58 -04:00
parent 2eeddb620a
commit 2f02ae0961
3 changed files with 13 additions and 0 deletions

View file

@ -15,6 +15,7 @@ module API
end
content_type = Marcel::MimeType.for(file.tempfile, name: file.original_filename) || file.content_type || "application/octet-stream"
content_type = Upload.normalize_content_type(content_type)
# Pre-gen upload ID for predictable storage path
upload_id = SecureRandom.uuid_v7

View file

@ -23,6 +23,7 @@ class UploadsController < ApplicationController
end
content_type = Marcel::MimeType.for(uploaded_file.tempfile, name: uploaded_file.original_filename) || uploaded_file.content_type || "application/octet-stream"
content_type = Upload.normalize_content_type(content_type)
# pre-gen upload ID for predictable storage path
upload_id = SecureRandom.uuid_v7

View file

@ -36,6 +36,16 @@ class Upload < ApplicationRecord
alias_method :file_size, :byte_size
alias_method :mime_type, :content_type
# Ensure text content types include charset=utf-8 so browsers
# don't fall back to ISO-8859-1 and mangle unicode
def self.normalize_content_type(content_type)
if content_type&.start_with?("text/") && !content_type.include?("charset")
"#{content_type}; charset=utf-8"
else
content_type
end
end
# Provenance enum
enum :provenance, {
slack: "slack",
@ -93,6 +103,7 @@ class Upload < ApplicationRecord
filename ||= File.basename(URI.parse(url).path)
body = response.body
content_type = Marcel::MimeType.for(StringIO.new(body), name: filename) || response.headers["content-type"] || "application/octet-stream"
content_type = normalize_content_type(content_type)
# Pre-generate upload ID for predictable storage path
upload_id = SecureRandom.uuid_v7