From ab2f9be0f8e70ab5c7f7784a72e269ded83cb2f0 Mon Sep 17 00:00:00 2001
From: gentlementlegen <fernand.veyrier@epitech.eu>
Date: Tue, 19 Nov 2024 20:16:54 +0900
Subject: [PATCH] fix(parser): remove footnotes and improve URL extraction

Added regex to remove footnotes in data-purge-module.
---
 src/parser/data-purge-module.ts           | 2 ++
 src/parser/formatting-evaluator-module.ts | 9 ++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/parser/data-purge-module.ts b/src/parser/data-purge-module.ts
index 15a41b3b..81bac909 100644
--- a/src/parser/data-purge-module.ts
+++ b/src/parser/data-purge-module.ts
@@ -33,6 +33,8 @@ export class DataPurgeModule extends BaseModule {
           .replace(/^\/.+/g, "")
           // Remove HTML comments
           .replace(/<!--[\s\S]*?-->/g, "")
+          // Remove the footnotes
+          .replace(/^###### .*?\[\^\d+\^][\s\S]*$/gm, "")
           // Keep only one new line needed by markdown-it package to convert to html
           .replace(/\n\s*\n/g, "\n")
           .trim();
diff --git a/src/parser/formatting-evaluator-module.ts b/src/parser/formatting-evaluator-module.ts
index 72a5ed62..f6c4cea9 100644
--- a/src/parser/formatting-evaluator-module.ts
+++ b/src/parser/formatting-evaluator-module.ts
@@ -175,11 +175,18 @@ export class FormattingEvaluatorModule extends BaseModule {
           urlSet.add(url.split("#")[0]);
         }
       } else {
+        const bodyContent = element.textContent;
+        const urlPattern = /https?:\/\/\S+/g;
+        const matches = bodyContent?.match(urlPattern);
+        matches?.map((url) => url.split("#")[0]).forEach((url) => urlSet.add(url));
         this._updateTagCount(formatting, tagName, score);
       }
     }
-    console.log(urlSet);
+    urlSet.forEach(() => {
+      this._updateTagCount(formatting, "a", this._multipliers[commentType].html["a"].score ?? 0);
+    });
     const words = this._countWordsFromRegex(htmlElement.textContent ?? "", this._multipliers[commentType]?.wordValue);
+
     return { formatting, words };
   }