From 59d016a9368edf18d5db9a124b3680203f245c97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dafydd=20Ll=C5=B7r=20Pearson?= Date: Fri, 5 Jan 2024 09:41:23 +0000 Subject: [PATCH] fix: Unescape HTML entities for alternate DOMPurify check (#2630) * fix: Unescape quotations before DOMPurify check * fix: Handle nbsp * feat: Log unclean HTML for future troubleshooting * fix: Ensure original error is also logged --- api.planx.uk/helpers.ts | 2 +- .../service/validateInput/utils.test.ts | 1 + .../webhooks/service/validateInput/utils.ts | 31 ++++++++++++++++++- 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/api.planx.uk/helpers.ts b/api.planx.uk/helpers.ts index 6ee538e977..7e09603aa4 100644 --- a/api.planx.uk/helpers.ts +++ b/api.planx.uk/helpers.ts @@ -77,7 +77,7 @@ const insertFlow = async ( return { id }; } catch (error) { throw Error( - `User ${creatorId} failed to insert flow to teamId ${teamId}. Please check permissions.`, + `User ${creatorId} failed to insert flow to teamId ${teamId}. Please check permissions. Error: ${error}`, ); } }; diff --git a/api.planx.uk/modules/webhooks/service/validateInput/utils.test.ts b/api.planx.uk/modules/webhooks/service/validateInput/utils.test.ts index c3d58232ec..957a5b7e5c 100644 --- a/api.planx.uk/modules/webhooks/service/validateInput/utils.test.ts +++ b/api.planx.uk/modules/webhooks/service/validateInput/utils.test.ts @@ -104,6 +104,7 @@ describe("isCleanHTML() helper function", () => { `

Subheading

This is a paragraph under the subheading.

`, `

Main Title

This is a nested paragraph with a nested element.

`, `

Content with image

Dog photo
`, + `

Content with HTML entitie's<>"&

`, ]; for (const example of cleanHTML) { diff --git a/api.planx.uk/modules/webhooks/service/validateInput/utils.ts b/api.planx.uk/modules/webhooks/service/validateInput/utils.ts index 507c4d1ccb..ba597c7ad2 100644 --- a/api.planx.uk/modules/webhooks/service/validateInput/utils.ts +++ b/api.planx.uk/modules/webhooks/service/validateInput/utils.ts @@ -1,6 +1,7 @@ import { isObject } from "lodash"; import { JSDOM } from "jsdom"; import createDOMPurify from "dompurify"; +import { userContext } from "../../../auth/middleware"; // Setup JSDOM and DOMPurify const window = new JSDOM("").window; @@ -39,7 +40,35 @@ export const isCleanHTML = (input: unknown): boolean => { if (typeof input !== "string") return true; const cleanHTML = DOMPurify.sanitize(input, { ADD_ATTR: ["target"] }); + // DOMPurify has not removed any attributes or values - const isClean = cleanHTML.length === input.length; + const isClean = + cleanHTML.length === input.length || + unescapeHTML(cleanHTML).length === unescapeHTML(input).length; + + if (!isClean) logUncleanHTMLError(input, cleanHTML); + return isClean; }; + +/** + * Explicity log error when unsafe HTML is encountered + * This is very likely a content / sanitation error as opposed to a security issue + * Logging this should help us identify and resolve these + */ +const logUncleanHTMLError = (input: string, cleanHTML: string) => { + const userId = userContext.getStore()?.user.sub; + + console.error({ + message: `Warning: Unclean HTML submitted!`, + userId, + input, + cleanHTML, + }); +}; + +const unescapeHTML = (input: string): string => + input + .replace(/"/gi, '"') + .replace(/'/gi, "'") + .replace(/ /gi, " ");