Skip to content

Commit

Permalink
feat: urls in css now replaced by the base64 content (#215)
Browse files Browse the repository at this point in the history
* feat: urls in css now replaced by the base64 content

Refs: SchweizerischeBundesbahnen/weasyprint-service#61
  • Loading branch information
Jumas authored Sep 23, 2024
1 parent fe7c7ea commit 8bc75d6
Show file tree
Hide file tree
Showing 26 changed files with 537 additions and 195 deletions.
8 changes: 8 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
<exec-maven-plugin.version>3.3.0</exec-maven-plugin.version>
<weasyprint.version>62.3</weasyprint.version>
<awaitility.version>4.2.2</awaitility.version>
<tika.version>1.28.5</tika.version>

<maven-jar-plugin.Extension-Context>pdf-exporter</maven-jar-plugin.Extension-Context>
<maven-jar-plugin.Automatic-Module-Name>ch.sbb.polarion.extension.pdf_exporter</maven-jar-plugin.Automatic-Module-Name>
Expand Down Expand Up @@ -94,6 +95,13 @@
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>${tika.version}</version>
<scope>provided</scope>
</dependency>

<!-- 3rd-party libraries to be packaged into resulting jar -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ String preprocessHtml(String origHtml, Orientation orientation, PaperSize paperS
} else {
html = replaceTagContent(origHtml, "head", head);
}
html = htmlProcessor.replaceImagesAsBase64Encoded(html);
html = htmlProcessor.replaceResourcesAsBase64Encoded(html);
html = htmlProcessor.internalizeLinks(html);

return html;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ String getCssContent(
String processed = velocityEvaluator.evaluateVelocityExpressions(documentData, content);

String cssContent = (exportParams.getDocumentType() != DocumentType.LIVE_DOC) ? appendWikiCss(processed) : processed;
return htmlProcessor.replaceImagesAsBase64Encoded(cssContent);
return htmlProcessor.replaceResourcesAsBase64Encoded(cssContent);
}

@VisibleForTesting
Expand All @@ -310,7 +310,7 @@ String getHeaderFooterContent(
.toList();

String headerFooterContent = String.format(ScopeUtils.getFileContent("webapp/pdf-exporter/html/headerAndFooter.html"), nonNullHeaderFooterContents.toArray());
return htmlProcessor.replaceImagesAsBase64Encoded(headerFooterContent);
return htmlProcessor.replaceResourcesAsBase64Encoded(headerFooterContent);
}

private String appendWikiCss(String css) {
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package ch.sbb.polarion.extension.pdf_exporter.util;

import com.polarion.alm.tracker.internal.url.IUrlResolver;
import com.polarion.core.util.logging.Logger;
import lombok.SneakyThrows;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.VisibleForTesting;

import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;

import static java.net.HttpURLConnection.*;

/**
* Custom version of {@link com.polarion.alm.tracker.internal.url.GenericUrlResolver} with the redirect support.
*/
public class CustomResourceUrlResolver implements IUrlResolver {

private static final int CONNECTION_TIMEOUT_MS = 3_000;
private static final int READ_TIMEOUT_MS = 3_000;

public boolean canResolve(@NotNull String url) {
return url.startsWith("http://") || url.startsWith("https://");
}

public InputStream resolve(@NotNull String urlStr) {
try {
URL url = new URL(normalizeUrl(urlStr));
return resolveImpl(url);
} catch (Exception e) {
Logger.getLogger(this).warn("Failed to load resource: " + urlStr, e);
}
return null;
}

@SneakyThrows
@VisibleForTesting
public InputStream resolveImpl(@NotNull URL url) {
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setConnectTimeout(CONNECTION_TIMEOUT_MS);
connection.setReadTimeout(READ_TIMEOUT_MS);
connection.connect();
int responseCode = connection.getResponseCode();
if (responseCode == HTTP_OK) {
return connection.getInputStream();
} else if (responseCode == HTTP_MOVED_PERM || responseCode == HTTP_MOVED_TEMP) {
String location = connection.getHeaderField("Location");
if (location != null && canResolve(location)) {
return resolve(location);
}
}
return null;
}

private String normalizeUrl(String urlStr) {
return urlStr.replace(" ", "%20").replace("%5F", "_");
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
package ch.sbb.polarion.extension.pdf_exporter.util;

import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

public interface FileResourceProvider {

byte[] getResourceAsBytes(String resource);
@Nullable
String getResourceAsBase64String(@NotNull String resource);

byte[] getResourceAsBytes(@NotNull String resource);

}
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,7 @@
import org.jetbrains.annotations.Nullable;
import org.jetbrains.annotations.VisibleForTesting;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.net.URLConnection;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Base64;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
Expand Down Expand Up @@ -192,8 +186,8 @@ public String processHtmlForPDF(@NotNull String html, @NotNull ExportParams expo
yield removeFloatLeftFromReports(processingHtml);
}
};
html = replaceImagesAsBase64Encoded(html);
html = removeSvgUnsupportedFeatureHint(html); //note that there is one more replacement attempt before replacing images with base64 representation
html = replaceResourcesAsBase64Encoded(html);
html = MediaUtils.removeSvgUnsupportedFeatureHint(html); //note that there is one more replacement attempt before replacing images with base64 representation
html = properTableHeads(html);
html = cleanExtraTableContent(html);
html = switch (exportParams.getDocumentType()) {
Expand Down Expand Up @@ -980,58 +974,14 @@ private int findTableEnd(String html, int tableStart) {

@SneakyThrows
@SuppressWarnings({"java:S5852", "java:S5857"}) //need by design
public String replaceImagesAsBase64Encoded(String html) {
StringBuilder result = new StringBuilder();

//Retrieves data of 'src' attribute from 'img' tags
IRegexEngine imageRegexEngine = RegexMatcher.get("<img[^<>]*src=(\"|')(?<url>[^(\"|')]*)(\"|')").createEngine(html);
while (imageRegexEngine.find()) {
String url = imageRegexEngine.group("url");
if (url.startsWith("data:")) {
continue;
}
byte[] imgBytes = fileResourceProvider.getResourceAsBytes(url.replace("%5F", "_")); // Replace encoded underscore symbol in 'src' attribute of images
if (imgBytes != null && imgBytes.length != 0) { // Don't make any manipulations if image wasn't resolved
try (InputStream is = new BufferedInputStream(new ByteArrayInputStream(imgBytes))) {
String mimeType = URLConnection.guessContentTypeFromStream(is);

// looks like sometimes mime type for svg isn't recognized
if (url.contains(".svg") && (mimeType == null || mimeType.equals(MIME_TYPE_SVG))) {
imgBytes = processPossibleSvgImage(imgBytes);
}

String encodedImage = String.format("data:%s;base64, %s", mimeType, Base64.getEncoder().encodeToString(imgBytes));

imageRegexEngine.appendReplacement(result, imageRegexEngine.group().replace(url, encodedImage));
}
}
}
imageRegexEngine.appendTail(result);

return result.toString();
public String replaceResourcesAsBase64Encoded(String html) {
return MediaUtils.inlineBase64Resources(html, fileResourceProvider);
}

public String internalizeLinks(String html) {
return httpLinksHelper.internalizeLinks(html);
}

@VisibleForTesting
@SuppressWarnings("squid:S1166") // no need to log or rethrow exception by design
public byte[] processPossibleSvgImage(byte[] possibleSvgImageBytes) {
try {
String svgContent = new String(possibleSvgImageBytes, StandardCharsets.UTF_8);
return removeSvgUnsupportedFeatureHint(svgContent).getBytes(StandardCharsets.UTF_8);
} catch (Exception e) {
// not a valid string, just nvm
}
return possibleSvgImageBytes;
}

@VisibleForTesting
public String removeSvgUnsupportedFeatureHint(String html) {
return html.replaceAll("(?s)<switch>[^<]*?<g requiredFeatures=\"[^\"]+?\"/>.*?</switch>", "");
}

private boolean hasCustomPageBreaks(String html) {
return html.contains(PAGE_BREAK_MARK);
}
Expand Down
Loading

0 comments on commit 8bc75d6

Please sign in to comment.