From 5c5bce93398c7222ce258c24f2cf30cf09e4cf53 Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Wed, 19 Jul 2023 12:55:26 +0100 Subject: [PATCH] Update attachment masking Always start with the unmasked attachment body when applying the masks and censor rules. We're going to make changes which mean `FoiAttachment#body` will return masked body, fetch this with `default_body` could result in attachment with old or superseded censor rules still applied. Needed to force HTML attachment into CRLF line endings due to an issue with the mail gem which results in a different hexdigest after rebuilding the raw emails from a file on disk in the specs. Once https://github.com/mikel/mail/pull/1512 is merged we can revert the FoiAttachment factory change. --- app/jobs/foi_attachment_mask_job.rb | 2 +- app/models/foi_attachment.rb | 10 ++++++++ lib/mail_handler/backends/mail_backend.rb | 7 ++++++ .../attachments_controller_spec.rb | 1 + spec/factories/foi_attchments.rb | 9 ++++++- .../prominence/viewing_raw_attachment_spec.rb | 2 ++ spec/integration/view_request_spec.rb | 2 ++ spec/jobs/foi_attachment_mask_job_spec.rb | 2 ++ .../backends/mail_backend_spec.rb | 25 +++++++++++++++++++ spec/models/foi_attachment_spec.rb | 15 +++++++++++ spec/support/email_helpers.rb | 19 ++++++++++++++ 11 files changed, 92 insertions(+), 2 deletions(-) diff --git a/app/jobs/foi_attachment_mask_job.rb b/app/jobs/foi_attachment_mask_job.rb index 4bbd08e8408..bca6429bbea 100644 --- a/app/jobs/foi_attachment_mask_job.rb +++ b/app/jobs/foi_attachment_mask_job.rb @@ -18,7 +18,7 @@ def perform(attachment) @attachment = attachment body = AlaveteliTextMasker.apply_masks( - attachment.default_body, + attachment.unmasked_body, attachment.content_type, masks ) diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index e8ac31e8aef..f13367a7567 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -32,6 +32,7 @@ class FoiAttachment < ApplicationRecord belongs_to :incoming_message, inverse_of: :foi_attachments + has_one :raw_email, through: :incoming_message, source: :raw_email has_one_attached :file, service: :attachments @@ -123,6 +124,15 @@ def default_body text_type? ? body_as_text.string : body end + # return the body as it is in the raw email, unmasked without censor rules + # applied + def unmasked_body + MailHandler.attachment_body_for_hexdigest( + raw_email.mail, + hexdigest: hexdigest + ) + end + def main_body_part? self == incoming_message.get_main_body_text_part end diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index 9354f9ff882..f20b99a8df3 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -380,6 +380,13 @@ def get_attachment_attributes(mail) end end + def attachment_body_for_hexdigest(mail, hexdigest:) + attributes = get_attachment_attributes(mail).find do |attrs| + attrs[:hexdigest] == hexdigest + end + attributes&.fetch(:body) + end + # Format def address_from_name_and_email(name, email) unless MySociety::Validate.is_valid_email(email) diff --git a/spec/controllers/attachments_controller_spec.rb b/spec/controllers/attachments_controller_spec.rb index 8cb5a6c3e48..6000a8652c0 100644 --- a/spec/controllers/attachments_controller_spec.rb +++ b/spec/controllers/attachments_controller_spec.rb @@ -41,6 +41,7 @@ def show(params = {}) file_name: attachment.display_filename } default_params[:id] = info_request.id unless params[:public_token] + rebuild_raw_emails(info_request) get :show, params: default_params.merge(params) end diff --git a/spec/factories/foi_attchments.rb b/spec/factories/foi_attchments.rb index 888928b8a38..5268c83eb1c 100644 --- a/spec/factories/foi_attchments.rb +++ b/spec/factories/foi_attchments.rb @@ -21,7 +21,14 @@ factory :html_attachment do content_type { 'text/html' } filename { 'interesting.html' } - body { load_file_fixture('interesting.html') } + body { + # Needed to force HTML attachment into CRLF line endings due to an issue + # with the mail gem which results in a different hexdigest after + # rebuilding the raw emails. + # Once https://github.com/mikel/mail/pull/1512 is merged we can revert + # the FoiAttachment factory change. + Mail::Utilities.to_crlf(load_file_fixture('interesting.html')) + } end factory :jpeg_attachment do content_type { 'image/jpeg' } diff --git a/spec/integration/prominence/viewing_raw_attachment_spec.rb b/spec/integration/prominence/viewing_raw_attachment_spec.rb index fef97f5f0a2..a432f31c45f 100644 --- a/spec/integration/prominence/viewing_raw_attachment_spec.rb +++ b/spec/integration/prominence/viewing_raw_attachment_spec.rb @@ -7,6 +7,8 @@ let(:within_session) do -> { + rebuild_raw_emails(info_request) + visit get_attachment_url( incoming_message_id: attachment.incoming_message_id, part: attachment.url_part_number, diff --git a/spec/integration/view_request_spec.rb b/spec/integration/view_request_spec.rb index 350f1db628d..931b1f2f223 100644 --- a/spec/integration/view_request_spec.rb +++ b/spec/integration/view_request_spec.rb @@ -41,6 +41,8 @@ info_request = FactoryBot.create(:info_request_with_incoming_attachments) incoming_message = info_request.incoming_messages.first attachment_url = "/es/request/#{info_request.id}/response/#{incoming_message.id}/attach/2/interesting.pdf" + rebuild_raw_emails(info_request) + using_session(non_owner) { visit(attachment_url) } expect(cache_directories_exist?(info_request)).to be true diff --git a/spec/jobs/foi_attachment_mask_job_spec.rb b/spec/jobs/foi_attachment_mask_job_spec.rb index ab144bc702f..b916acb11cd 100644 --- a/spec/jobs/foi_attachment_mask_job_spec.rb +++ b/spec/jobs/foi_attachment_mask_job_spec.rb @@ -6,6 +6,8 @@ let(:attachment) { incoming_message.foi_attachments.last } let(:body) { described_class.new.perform(attachment) } + before { rebuild_raw_emails(info_request) } + it 'sanitises HTML attachments' do # Nokogiri adds the meta tag; see # https://github.com/sparklemotion/nokogiri/issues/1008 diff --git a/spec/lib/mail_handler/backends/mail_backend_spec.rb b/spec/lib/mail_handler/backends/mail_backend_spec.rb index 9a161e061d8..97c0c125949 100644 --- a/spec/lib/mail_handler/backends/mail_backend_spec.rb +++ b/spec/lib/mail_handler/backends/mail_backend_spec.rb @@ -406,4 +406,29 @@ to eq(['aperson@domain.abc']) end end + + describe 'attachment_body_for_hexdigest' do + let(:mail) do + Mail.new do + add_file filename: 'file.txt', content: 'hereisthetext' + end + end + + context 'matching hexdigest' do + it 'returns the body of the attachment' do + body = attachment_body_for_hexdigest( + mail, hexdigest: Digest::MD5.hexdigest('hereisthetext') + ) + + expect(body).to eq('hereisthetext') + end + end + + context 'non-matching hexdigest' do + it 'returns nil' do + body = attachment_body_for_hexdigest(mail, hexdigest: 'incorrect') + expect(body).to be_nil + end + end + end end diff --git a/spec/models/foi_attachment_spec.rb b/spec/models/foi_attachment_spec.rb index a9555a84dc5..c6b6bf3221d 100644 --- a/spec/models/foi_attachment_spec.rb +++ b/spec/models/foi_attachment_spec.rb @@ -144,6 +144,21 @@ end + describe '#unmasked_body' do + + it 'returns the attachment body from the raw email' do + foi_attachment = FactoryBot.build(:body_text) + + allow(foi_attachment).to receive(:raw_email). + and_return(double.as_null_object) + allow(MailHandler).to receive(:attachment_body_for_hexdigest). + and_return('hereistheunmaskedtext') + + expect(foi_attachment.unmasked_body).to eq('hereistheunmaskedtext') + end + + end + describe '#main_body_part?' do subject { attachment.main_body_part? } diff --git a/spec/support/email_helpers.rb b/spec/support/email_helpers.rb index 03dbed0e999..b723164876b 100644 --- a/spec/support/email_helpers.rb +++ b/spec/support/email_helpers.rb @@ -49,3 +49,22 @@ def gsub_addresses(content, **kargs) end content end + +def rebuild_raw_emails(info_request) + info_request.incoming_messages.each do |im| + mail = Mail.new + + mail.to = info_request.incoming_email + mail.from = "#{im.from_name} <#{im.from_email}>" + mail.subject = im.subject + mail.date = im.sent_at + mail.body = im.cached_main_body_text_unfolded + + im.foi_attachments.each do |a| + mail.add_file filename: a.filename, content: a.body + end + + im.raw_email.data = mail + im.raw_email.save! + end +end