diff --git a/app/jobs/foi_attachment_mask_job.rb b/app/jobs/foi_attachment_mask_job.rb index 4bbd08e8408..bca6429bbea 100644 --- a/app/jobs/foi_attachment_mask_job.rb +++ b/app/jobs/foi_attachment_mask_job.rb @@ -18,7 +18,7 @@ def perform(attachment) @attachment = attachment body = AlaveteliTextMasker.apply_masks( - attachment.default_body, + attachment.unmasked_body, attachment.content_type, masks ) diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index e8ac31e8aef..f13367a7567 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -32,6 +32,7 @@ class FoiAttachment < ApplicationRecord belongs_to :incoming_message, inverse_of: :foi_attachments + has_one :raw_email, through: :incoming_message, source: :raw_email has_one_attached :file, service: :attachments @@ -123,6 +124,15 @@ def default_body text_type? ? body_as_text.string : body end + # return the body as it is in the raw email, unmasked without censor rules + # applied + def unmasked_body + MailHandler.attachment_body_for_hexdigest( + raw_email.mail, + hexdigest: hexdigest + ) + end + def main_body_part? self == incoming_message.get_main_body_text_part end diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index 9354f9ff882..f20b99a8df3 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -380,6 +380,13 @@ def get_attachment_attributes(mail) end end + def attachment_body_for_hexdigest(mail, hexdigest:) + attributes = get_attachment_attributes(mail).find do |attrs| + attrs[:hexdigest] == hexdigest + end + attributes&.fetch(:body) + end + # Format def address_from_name_and_email(name, email) unless MySociety::Validate.is_valid_email(email) diff --git a/spec/controllers/attachments_controller_spec.rb b/spec/controllers/attachments_controller_spec.rb index 8cb5a6c3e48..6000a8652c0 100644 --- a/spec/controllers/attachments_controller_spec.rb +++ b/spec/controllers/attachments_controller_spec.rb @@ -41,6 +41,7 @@ def show(params = {}) file_name: attachment.display_filename } default_params[:id] = info_request.id unless params[:public_token] + rebuild_raw_emails(info_request) get :show, params: default_params.merge(params) end diff --git a/spec/factories/foi_attchments.rb b/spec/factories/foi_attchments.rb index 888928b8a38..5268c83eb1c 100644 --- a/spec/factories/foi_attchments.rb +++ b/spec/factories/foi_attchments.rb @@ -21,7 +21,14 @@ factory :html_attachment do content_type { 'text/html' } filename { 'interesting.html' } - body { load_file_fixture('interesting.html') } + body { + # Needed to force HTML attachment into CRLF line endings due to an issue + # with the mail gem which results in a different hexdigest after + # rebuilding the raw emails. + # Once https://github.com/mikel/mail/pull/1512 is merged we can revert + # the FoiAttachment factory change. + Mail::Utilities.to_crlf(load_file_fixture('interesting.html')) + } end factory :jpeg_attachment do content_type { 'image/jpeg' } diff --git a/spec/integration/prominence/viewing_raw_attachment_spec.rb b/spec/integration/prominence/viewing_raw_attachment_spec.rb index fef97f5f0a2..a432f31c45f 100644 --- a/spec/integration/prominence/viewing_raw_attachment_spec.rb +++ b/spec/integration/prominence/viewing_raw_attachment_spec.rb @@ -7,6 +7,8 @@ let(:within_session) do -> { + rebuild_raw_emails(info_request) + visit get_attachment_url( incoming_message_id: attachment.incoming_message_id, part: attachment.url_part_number, diff --git a/spec/integration/view_request_spec.rb b/spec/integration/view_request_spec.rb index 350f1db628d..931b1f2f223 100644 --- a/spec/integration/view_request_spec.rb +++ b/spec/integration/view_request_spec.rb @@ -41,6 +41,8 @@ info_request = FactoryBot.create(:info_request_with_incoming_attachments) incoming_message = info_request.incoming_messages.first attachment_url = "/es/request/#{info_request.id}/response/#{incoming_message.id}/attach/2/interesting.pdf" + rebuild_raw_emails(info_request) + using_session(non_owner) { visit(attachment_url) } expect(cache_directories_exist?(info_request)).to be true diff --git a/spec/jobs/foi_attachment_mask_job_spec.rb b/spec/jobs/foi_attachment_mask_job_spec.rb index ab144bc702f..b916acb11cd 100644 --- a/spec/jobs/foi_attachment_mask_job_spec.rb +++ b/spec/jobs/foi_attachment_mask_job_spec.rb @@ -6,6 +6,8 @@ let(:attachment) { incoming_message.foi_attachments.last } let(:body) { described_class.new.perform(attachment) } + before { rebuild_raw_emails(info_request) } + it 'sanitises HTML attachments' do # Nokogiri adds the meta tag; see # https://github.com/sparklemotion/nokogiri/issues/1008 diff --git a/spec/lib/mail_handler/backends/mail_backend_spec.rb b/spec/lib/mail_handler/backends/mail_backend_spec.rb index 9a161e061d8..97c0c125949 100644 --- a/spec/lib/mail_handler/backends/mail_backend_spec.rb +++ b/spec/lib/mail_handler/backends/mail_backend_spec.rb @@ -406,4 +406,29 @@ to eq(['aperson@domain.abc']) end end + + describe 'attachment_body_for_hexdigest' do + let(:mail) do + Mail.new do + add_file filename: 'file.txt', content: 'hereisthetext' + end + end + + context 'matching hexdigest' do + it 'returns the body of the attachment' do + body = attachment_body_for_hexdigest( + mail, hexdigest: Digest::MD5.hexdigest('hereisthetext') + ) + + expect(body).to eq('hereisthetext') + end + end + + context 'non-matching hexdigest' do + it 'returns nil' do + body = attachment_body_for_hexdigest(mail, hexdigest: 'incorrect') + expect(body).to be_nil + end + end + end end diff --git a/spec/models/foi_attachment_spec.rb b/spec/models/foi_attachment_spec.rb index a9555a84dc5..c6b6bf3221d 100644 --- a/spec/models/foi_attachment_spec.rb +++ b/spec/models/foi_attachment_spec.rb @@ -144,6 +144,21 @@ end + describe '#unmasked_body' do + + it 'returns the attachment body from the raw email' do + foi_attachment = FactoryBot.build(:body_text) + + allow(foi_attachment).to receive(:raw_email). + and_return(double.as_null_object) + allow(MailHandler).to receive(:attachment_body_for_hexdigest). + and_return('hereistheunmaskedtext') + + expect(foi_attachment.unmasked_body).to eq('hereistheunmaskedtext') + end + + end + describe '#main_body_part?' do subject { attachment.main_body_part? } diff --git a/spec/support/email_helpers.rb b/spec/support/email_helpers.rb index 03dbed0e999..b723164876b 100644 --- a/spec/support/email_helpers.rb +++ b/spec/support/email_helpers.rb @@ -49,3 +49,22 @@ def gsub_addresses(content, **kargs) end content end + +def rebuild_raw_emails(info_request) + info_request.incoming_messages.each do |im| + mail = Mail.new + + mail.to = info_request.incoming_email + mail.from = "#{im.from_name} <#{im.from_email}>" + mail.subject = im.subject + mail.date = im.sent_at + mail.body = im.cached_main_body_text_unfolded + + im.foi_attachments.each do |a| + mail.add_file filename: a.filename, content: a.body + end + + im.raw_email.data = mail + im.raw_email.save! + end +end