This repository has been archived by the owner on Apr 27, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 546
/
miniProxy.php
602 lines (531 loc) · 25.9 KB
/
miniProxy.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
<?php
/*
NOTE: miniProxy IS NO LONGER MAINTAINED AS OF APRIL 26th, 2020.
IF YOU USE IT, YOU DO SO ENTIRELY AT YOUR OWN RISK.
More information is available at <https://github.com/joshdick/miniProxy>.
*/
/*
miniProxy - A simple PHP web proxy. <https://github.com/joshdick/miniProxy>
Written and maintained by Joshua Dick <http://joshdick.net>.
miniProxy is licensed under the GNU GPL v3 <https://www.gnu.org/licenses/gpl-3.0.html>.
*/
/****************************** START CONFIGURATION ******************************/
//NOTE: If a given URL matches a pattern in both $whitelistPatterns and $blacklistPatterns,
//that URL will be treated as blacklisted.
//To allow proxying any URL, set $whitelistPatterns to an empty array (the default).
//To only allow proxying of specific URLs (whitelist), add corresponding regular expressions
//to the $whitelistPatterns array. To prevent possible abuse, enter the narrowest/most-specific patterns possible.
//You can optionally use the "getHostnamePattern()" helper function to build a regular expression that
//matches all URLs for a given hostname.
$whitelistPatterns = [
//Usage example: To whitelist any URL at example.net, including sub-domains, uncomment the
//line below (which is equivalent to [ @^https?://([a-z0-9-]+\.)*example\.net@i ]):
//getHostnamePattern("example.net")
];
//To disallow proxying of specific URLs (blacklist), add corresponding regular expressions
//to the $blacklistPatterns array. To prevent possible abuse, enter the broadest/least-specific patterns possible.
//You can optionally use the "getHostnamePattern()" helper function to build a regular expression that
//matches all URLs for a given hostname.
$blacklistPatterns = [
//Usage example: To blacklist any URL at example.net, including sub-domains, uncomment the
//line below (which is equivalent to [ @^https?://([a-z0-9-]+\.)*example\.net@i ]):
//getHostnamePattern("example.net")
];
//To enable CORS (cross-origin resource sharing) for proxied sites, set $forceCORS to true.
$forceCORS = false;
//Set to false to allow sites on the local network (where miniProxy is running) to be proxied.
$disallowLocal = true;
//Set to false to report the client machine's IP address to proxied sites via the HTTP `x-forwarded-for` header.
//Setting to false may improve compatibility with some sites, but also exposes more information about end users to proxied sites.
$anonymize = true;
//Start/default URL that that will be proxied when miniProxy is first loaded in a browser/accessed directly with no URL to proxy.
//If empty, miniProxy will show its own landing page.
$startURL = "";
//When no $startURL is configured above, miniProxy will show its own landing page with a URL form field
//and the configured example URL. The example URL appears in the instructional text on the miniProxy landing page,
//and is proxied when pressing the 'Proxy It!' button on the landing page if its URL form is left blank.
$landingExampleURL = "https://example.net";
/****************************** END CONFIGURATION ******************************/
ob_start("ob_gzhandler");
if (version_compare(PHP_VERSION, "5.4.7", "<")) {
die("miniProxy requires PHP version 5.4.7 or later.");
}
$requiredExtensions = ["curl", "mbstring", "xml"];
foreach($requiredExtensions as $requiredExtension) {
if (!extension_loaded($requiredExtension)) {
die("miniProxy requires PHP's \"" . $requiredExtension . "\" extension. Please install/enable it on your server and try again.");
}
}
//Helper function for use inside $whitelistPatterns/$blacklistPatterns.
//Returns a regex that matches all HTTP[S] URLs for a given hostname.
function getHostnamePattern($hostname) {
$escapedHostname = str_replace(".", "\.", $hostname);
return "@^https?://([a-z0-9-]+\.)*" . $escapedHostname . "@i";
}
//Helper function that determines whether to allow proxying of a given URL.
function isValidURL($url) {
//Validates a URL against the whitelist.
function passesWhitelist($url) {
if (count($GLOBALS['whitelistPatterns']) === 0) return true;
foreach ($GLOBALS['whitelistPatterns'] as $pattern) {
if (preg_match($pattern, $url)) {
return true;
}
}
return false;
}
//Validates a URL against the blacklist.
function passesBlacklist($url) {
foreach ($GLOBALS['blacklistPatterns'] as $pattern) {
if (preg_match($pattern, $url)) {
return false;
}
}
return true;
}
function isLocal($url) {
//First, generate a list of IP addresses that correspond to the requested URL.
$ips = [];
$host = parse_url($url, PHP_URL_HOST);
if (filter_var($host, FILTER_VALIDATE_IP)) {
//The supplied host is already a valid IP address.
$ips = [$host];
} else {
//The host is not a valid IP address; attempt to resolve it to one.
$dnsResult = dns_get_record($host, DNS_A + DNS_AAAA);
$ips = array_map(function($dnsRecord) { return $dnsRecord['type'] == 'A' ? $dnsRecord['ip'] : $dnsRecord['ipv6']; }, $dnsResult);
}
foreach ($ips as $ip) {
//Determine whether any of the IPs are in the private or reserved range.
if (!filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) {
return true;
}
}
return false;
}
return passesWhitelist($url) && passesBlacklist($url) && ($GLOBALS['disallowLocal'] ? !isLocal($url) : true);
}
//Helper function used to removes/unset keys from an associative array using case insensitive matching
function removeKeys(&$assoc, $keys2remove) {
$keys = array_keys($assoc);
$map = [];
$removedKeys = [];
foreach ($keys as $key) {
$map[strtolower($key)] = $key;
}
foreach ($keys2remove as $key) {
$key = strtolower($key);
if (isset($map[$key])) {
unset($assoc[$map[$key]]);
$removedKeys[] = $map[$key];
}
}
return $removedKeys;
}
if (!function_exists("getallheaders")) {
//Adapted from http://www.php.net/manual/en/function.getallheaders.php#99814
function getallheaders() {
$result = [];
foreach($_SERVER as $key => $value) {
if (substr($key, 0, 5) == "HTTP_") {
$key = str_replace(" ", "-", ucwords(strtolower(str_replace("_", " ", substr($key, 5)))));
$result[$key] = $value;
}
}
return $result;
}
}
$usingDefaultPort = (!isset($_SERVER["HTTPS"]) && $_SERVER["SERVER_PORT"] === 80) || (isset($_SERVER["HTTPS"]) && $_SERVER["SERVER_PORT"] === 443);
$prefixPort = $usingDefaultPort ? "" : ":" . $_SERVER["SERVER_PORT"];
//Use HTTP_HOST to support client-configured DNS (instead of SERVER_NAME), but remove the port if one is present
$prefixHost = $_SERVER["HTTP_HOST"];
$prefixHost = strpos($prefixHost, ":") ? implode(":", explode(":", $_SERVER["HTTP_HOST"], -1)) : $prefixHost;
define("PROXY_PREFIX", "http" . (isset($_SERVER["HTTPS"]) ? "s" : "") . "://" . $prefixHost . $prefixPort . $_SERVER["SCRIPT_NAME"] . "?");
//Makes an HTTP request via cURL, using request data that was passed directly to this script.
function makeRequest($url) {
global $anonymize;
//Tell cURL to make the request using the brower's user-agent if there is one, or a fallback user-agent otherwise.
$user_agent = $_SERVER["HTTP_USER_AGENT"];
if (empty($user_agent)) {
$user_agent = "Mozilla/5.0 (compatible; miniProxy)";
}
$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);
//Get ready to proxy the browser's request headers...
$browserRequestHeaders = getallheaders();
//...but let cURL set some headers on its own.
$removedHeaders = removeKeys(
$browserRequestHeaders,
[
"Accept-Encoding", //Throw away the browser's Accept-Encoding header if any and let cURL make the request using gzip if possible.
"Content-Length",
"Host",
"Origin"
]
);
$removedHeaders = array_map("strtolower", $removedHeaders);
curl_setopt($ch, CURLOPT_ENCODING, "");
//Transform the associative array from getallheaders() into an
//indexed array of header strings to be passed to cURL.
$curlRequestHeaders = [];
foreach ($browserRequestHeaders as $name => $value) {
$curlRequestHeaders[] = $name . ": " . $value;
}
if (!$anonymize) {
$curlRequestHeaders[] = "X-Forwarded-For: " . $_SERVER["REMOTE_ADDR"];
}
//Any `origin` header sent by the browser will refer to the proxy itself.
//If an `origin` header is present in the request, rewrite it to point to the correct origin.
if (in_array("origin", $removedHeaders)) {
$urlParts = parse_url($url);
$port = $urlParts["port"];
$curlRequestHeaders[] = "Origin: " . $urlParts["scheme"] . "://" . $urlParts["host"] . (empty($port) ? "" : ":" . $port);
};
curl_setopt($ch, CURLOPT_HTTPHEADER, $curlRequestHeaders);
//Proxy any received GET/POST/PUT data.
switch ($_SERVER["REQUEST_METHOD"]) {
case "POST":
curl_setopt($ch, CURLOPT_POST, true);
//For some reason, $HTTP_RAW_POST_DATA isn't working as documented at
//http://php.net/manual/en/reserved.variables.httprawpostdata.php
//but the php://input method works. This is likely to be flaky
//across different server environments.
//More info here: http://stackoverflow.com/questions/8899239/http-raw-post-data-not-being-populated-after-upgrade-to-php-5-3
//If the miniProxyFormAction field appears in the POST data, remove it so the destination server doesn't receive it.
$postData = [];
parse_str(file_get_contents("php://input"), $postData);
if (isset($postData["miniProxyFormAction"])) {
unset($postData["miniProxyFormAction"]);
}
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($postData));
break;
case "PUT":
curl_setopt($ch, CURLOPT_PUT, true);
curl_setopt($ch, CURLOPT_INFILE, fopen("php://input", "r"));
break;
}
//Other cURL options.
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
//Set the request URL.
curl_setopt($ch, CURLOPT_URL, $url);
//Make the request.
$response = curl_exec($ch);
$responseInfo = curl_getinfo($ch);
$headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
curl_close($ch);
//Setting CURLOPT_HEADER to true above forces the response headers and body
//to be output together--separate them.
$responseHeaders = substr($response, 0, $headerSize);
$responseBody = substr($response, $headerSize);
return ["headers" => $responseHeaders, "body" => $responseBody, "responseInfo" => $responseInfo];
}
//Converts relative URLs to absolute ones, given a base URL.
//Modified version of code found at http://nashruddin.com/PHP_Script_for_Converting_Relative_to_Absolute_URL
function rel2abs($rel, $base) {
if (empty($rel)) $rel = ".";
if (parse_url($rel, PHP_URL_SCHEME) != "" || strpos($rel, "//") === 0) return $rel; //Return if already an absolute URL
if ($rel[0] == "#" || $rel[0] == "?") return $base.$rel; //Queries and anchors
extract(parse_url($base)); //Parse base URL and convert to local variables: $scheme, $host, $path
$path = isset($path) ? preg_replace("#/[^/]*$#", "", $path) : "/"; //Remove non-directory element from path
if ($rel[0] == "/") $path = ""; //Destroy path if relative url points to root
$port = isset($port) && $port != 80 ? ":" . $port : "";
$auth = "";
if (isset($user)) {
$auth = $user;
if (isset($pass)) {
$auth .= ":" . $pass;
}
$auth .= "@";
}
$abs = "$auth$host$port$path/$rel"; //Dirty absolute URL
for ($n = 1; $n > 0; $abs = preg_replace(["#(/\.?/)#", "#/(?!\.\.)[^/]+/\.\./#"], "/", $abs, -1, $n)) {} //Replace '//' or '/./' or '/foo/../' with '/'
return $scheme . "://" . $abs; //Absolute URL is ready.
}
//Proxify contents of url() references in blocks of CSS text.
function proxifyCSS($css, $baseURL) {
//Add a "url()" wrapper to any CSS @import rules that only specify a URL without the wrapper,
//so that they're proxified when searching for "url()" wrappers below.
$sourceLines = explode("\n", $css);
$normalizedLines = [];
foreach ($sourceLines as $line) {
if (preg_match("/@import\s+url/i", $line)) {
$normalizedLines[] = $line;
} else {
$normalizedLines[] = preg_replace_callback(
"/(@import\s+)([^;\s]+)([\s;])/i",
function($matches) use ($baseURL) {
return $matches[1] . "url(" . $matches[2] . ")" . $matches[3];
},
$line);
}
}
$normalizedCSS = implode("\n", $normalizedLines);
return preg_replace_callback(
"/url\((.*?)\)/i",
function($matches) use ($baseURL) {
$url = $matches[1];
//Remove any surrounding single or double quotes from the URL so it can be passed to rel2abs - the quotes are optional in CSS
//Assume that if there is a leading quote then there should be a trailing quote, so just use trim() to remove them
if (strpos($url, "'") === 0) {
$url = trim($url, "'");
}
if (strpos($url, "\"") === 0) {
$url = trim($url, "\"");
}
if (stripos($url, "data:") === 0) return "url(" . $url . ")"; //The URL isn't an HTTP URL but is actual binary data. Don't proxify it.
return "url(" . PROXY_PREFIX . rel2abs($url, $baseURL) . ")";
},
$normalizedCSS);
}
//Proxify "srcset" attributes (normally associated with <img> tags.)
function proxifySrcset($srcset, $baseURL) {
$sources = array_map("trim", explode(",", $srcset)); //Split all contents by comma and trim each value
$proxifiedSources = array_map(function($source) use ($baseURL) {
$components = array_map("trim", str_split($source, strrpos($source, " "))); //Split by last space and trim
$components[0] = PROXY_PREFIX . rel2abs(ltrim($components[0], "/"), $baseURL); //First component of the split source string should be an image URL; proxify it
return implode($components, " "); //Recombine the components into a single source
}, $sources);
$proxifiedSrcset = implode(", ", $proxifiedSources); //Recombine the sources into a single "srcset"
return $proxifiedSrcset;
}
//Extract and sanitize the requested URL, handling cases where forms have been rewritten to point to the proxy.
if (isset($_POST["miniProxyFormAction"])) {
$url = $_POST["miniProxyFormAction"];
unset($_POST["miniProxyFormAction"]);
} else {
$queryParams = [];
parse_str($_SERVER["QUERY_STRING"], $queryParams);
//If the miniProxyFormAction field appears in the query string, make $url start with its value, and rebuild the the query string without it.
if (isset($queryParams["miniProxyFormAction"])) {
$formAction = $queryParams["miniProxyFormAction"];
unset($queryParams["miniProxyFormAction"]);
$url = $formAction . "?" . http_build_query($queryParams);
} else {
$url = substr($_SERVER["REQUEST_URI"], strlen($_SERVER["SCRIPT_NAME"]) + 1);
}
}
if (empty($url)) {
if (empty($startURL)) {
die("<html><head><title>miniProxy</title></head><body><h1>Welcome to miniProxy!</h1>miniProxy can be directly invoked like this: <a href=\"" . PROXY_PREFIX . $landingExampleURL . "\">" . PROXY_PREFIX . $landingExampleURL . "</a><br /><br />Or, you can simply enter a URL below:<br /><br /><form onsubmit=\"if (document.getElementById('site').value) { window.location.href='" . PROXY_PREFIX . "' + document.getElementById('site').value; return false; } else { window.location.href='" . PROXY_PREFIX . $landingExampleURL . "'; return false; }\" autocomplete=\"off\"><input id=\"site\" type=\"text\" size=\"50\" /><input type=\"submit\" value=\"Proxy It!\" /></form></body></html>");
} else {
$url = $startURL;
}
} else if (strpos($url, ":/") !== strpos($url, "://")) {
//Work around the fact that some web servers (e.g. IIS 8.5) change double slashes appearing in the URL to a single slash.
//See https://github.com/joshdick/miniProxy/pull/14
$pos = strpos($url, ":/");
$url = substr_replace($url, "://", $pos, strlen(":/"));
}
$scheme = parse_url($url, PHP_URL_SCHEME);
if (empty($scheme)) {
if (strpos($url, "//") === 0) {
//Assume that any supplied URLs starting with // are HTTP URLs.
$url = "http:" . $url;
} else {
//Assume that any supplied URLs without a scheme (just a host) are HTTP URLs.
$url = "http://" . $url;
}
} else if (!preg_match("/^https?$/i", $scheme)) {
die('Error: Detected a "' . $scheme . '" URL. miniProxy exclusively supports http[s] URLs.');
}
if (!isValidURL($url)) {
die("Error: The requested URL was disallowed by the server administrator.");
}
$response = makeRequest($url);
$rawResponseHeaders = $response["headers"];
$responseBody = $response["body"];
$responseInfo = $response["responseInfo"];
//If CURLOPT_FOLLOWLOCATION landed the proxy at a diferent URL than
//what was requested, explicitly redirect the proxy there.
$responseURL = $responseInfo["url"];
if ($responseURL !== $url) {
header("Location: " . PROXY_PREFIX . $responseURL, true);
exit(0);
}
//A regex that indicates which server response headers should be stripped out of the proxified response.
$header_blacklist_pattern = "/^Content-Length|^Transfer-Encoding|^Content-Encoding.*gzip/i";
//cURL can make multiple requests internally (for example, if CURLOPT_FOLLOWLOCATION is enabled), and reports
//headers for every request it makes. Only proxy the last set of received response headers,
//corresponding to the final request made by cURL for any given call to makeRequest().
$responseHeaderBlocks = array_filter(explode("\r\n\r\n", $rawResponseHeaders));
$lastHeaderBlock = end($responseHeaderBlocks);
$headerLines = explode("\r\n", $lastHeaderBlock);
foreach ($headerLines as $header) {
$header = trim($header);
if (!preg_match($header_blacklist_pattern, $header)) {
header($header, false);
}
}
//Prevent robots from indexing proxified pages
header("X-Robots-Tag: noindex, nofollow", true);
if ($forceCORS) {
//This logic is based on code found at: http://stackoverflow.com/a/9866124/278810
//CORS headers sent below may conflict with CORS headers from the original response,
//so these headers are sent after the original response headers to ensure their values
//are the ones that actually end up getting sent to the browser.
//Explicit [ $replace = true ] is used for these headers even though this is PHP's default behavior.
//Allow access from any origin.
header("Access-Control-Allow-Origin: *", true);
header("Access-Control-Allow-Credentials: true", true);
//Handle CORS headers received during OPTIONS requests.
if ($_SERVER["REQUEST_METHOD"] == "OPTIONS") {
if (isset($_SERVER["HTTP_ACCESS_CONTROL_REQUEST_METHOD"])) {
header("Access-Control-Allow-Methods: GET, POST, OPTIONS", true);
}
if (isset($_SERVER["HTTP_ACCESS_CONTROL_REQUEST_HEADERS"])) {
header("Access-Control-Allow-Headers: {$_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']}", true);
}
//No further action is needed for OPTIONS requests.
exit(0);
}
}
$contentType = "";
if (isset($responseInfo["content_type"])) $contentType = $responseInfo["content_type"];
//This is presumably a web page, so attempt to proxify the DOM.
if (stripos($contentType, "text/html") !== false) {
//Attempt to normalize character encoding.
$detectedEncoding = mb_detect_encoding($responseBody, "UTF-8, ISO-8859-1");
if ($detectedEncoding) {
$responseBody = mb_convert_encoding($responseBody, "HTML-ENTITIES", $detectedEncoding);
}
//Parse the DOM.
$doc = new DomDocument();
@$doc->loadHTML($responseBody);
$xpath = new DOMXPath($doc);
//Rewrite forms so that their actions point back to the proxy.
foreach($xpath->query("//form") as $form) {
$method = $form->getAttribute("method");
$action = $form->getAttribute("action");
//If the form doesn't have an action, the action is the page itself.
//Otherwise, change an existing action to an absolute version.
$action = empty($action) ? $url : rel2abs($action, $url);
//Rewrite the form action to point back at the proxy.
$form->setAttribute("action", rtrim(PROXY_PREFIX, "?"));
//Add a hidden form field that the proxy can later use to retreive the original form action.
$actionInput = $doc->createDocumentFragment();
$actionInput->appendXML('<input type="hidden" name="miniProxyFormAction" value="' . htmlspecialchars($action) . '" />');
$form->appendChild($actionInput);
}
//Proxify <meta> tags with an 'http-equiv="refresh"' attribute.
foreach ($xpath->query("//meta[@http-equiv]") as $element) {
if (strcasecmp($element->getAttribute("http-equiv"), "refresh") === 0) {
$content = $element->getAttribute("content");
if (!empty($content)) {
$splitContent = preg_split("/=/", $content);
if (isset($splitContent[1])) {
$element->setAttribute("content", $splitContent[0] . "=" . PROXY_PREFIX . rel2abs($splitContent[1], $url));
}
}
}
}
//Profixy <style> tags.
foreach($xpath->query("//style") as $style) {
$style->nodeValue = proxifyCSS($style->nodeValue, $url);
}
//Proxify tags with a "style" attribute.
foreach ($xpath->query("//*[@style]") as $element) {
$element->setAttribute("style", proxifyCSS($element->getAttribute("style"), $url));
}
//Proxify "srcset" attributes in <img> tags.
foreach ($xpath->query("//img[@srcset]") as $element) {
$element->setAttribute("srcset", proxifySrcset($element->getAttribute("srcset"), $url));
}
//Proxify any of these attributes appearing in any tag.
$proxifyAttributes = ["href", "src"];
foreach($proxifyAttributes as $attrName) {
foreach($xpath->query("//*[@" . $attrName . "]") as $element) { //For every element with the given attribute...
$attrContent = $element->getAttribute($attrName);
if ($attrName == "href" && preg_match("/^(about|javascript|magnet|mailto):|#/i", $attrContent)) continue;
if ($attrName == "src" && preg_match("/^(data):/i", $attrContent)) continue;
$attrContent = rel2abs($attrContent, $url);
$attrContent = PROXY_PREFIX . $attrContent;
$element->setAttribute($attrName, $attrContent);
}
}
//Attempt to force AJAX requests to be made through the proxy by
//wrapping window.XMLHttpRequest.prototype.open in order to make
//all request URLs absolute and point back to the proxy.
//The rel2abs() JavaScript function serves the same purpose as the server-side one in this file,
//but is used in the browser to ensure all AJAX request URLs are absolute and not relative.
//Uses code from these sources:
//http://stackoverflow.com/questions/7775767/javascript-overriding-xmlhttprequest-open
//https://gist.github.com/1088850
//TODO: This is obviously only useful for browsers that use XMLHttpRequest but
//it's better than nothing.
$head = $xpath->query("//head")->item(0);
$body = $xpath->query("//body")->item(0);
$prependElem = $head != null ? $head : $body;
//Only bother trying to apply this hack if the DOM has a <head> or <body> element;
//insert some JavaScript at the top of whichever is available first.
//Protects against cases where the server sends a Content-Type of "text/html" when
//what's coming back is most likely not actually HTML.
//TODO: Do this check before attempting to do any sort of DOM parsing?
if ($prependElem != null) {
$scriptElem = $doc->createElement("script",
'(function() {
if (window.XMLHttpRequest) {
function parseURI(url) {
var m = String(url).replace(/^\s+|\s+$/g, "").match(/^([^:\/?#]+:)?(\/\/(?:[^:@]*(?::[^:@]*)?@)?(([^:\/?#]*)(?::(\d*))?))?([^?#]*)(\?[^#]*)?(#[\s\S]*)?/);
// authority = "//" + user + ":" + pass "@" + hostname + ":" port
return (m ? {
href : m[0] || "",
protocol : m[1] || "",
authority: m[2] || "",
host : m[3] || "",
hostname : m[4] || "",
port : m[5] || "",
pathname : m[6] || "",
search : m[7] || "",
hash : m[8] || ""
} : null);
}
function rel2abs(base, href) { // RFC 3986
function removeDotSegments(input) {
var output = [];
input.replace(/^(\.\.?(\/|$))+/, "")
.replace(/\/(\.(\/|$))+/g, "/")
.replace(/\/\.\.$/, "/../")
.replace(/\/?[^\/]*/g, function (p) {
if (p === "/..") {
output.pop();
} else {
output.push(p);
}
});
return output.join("").replace(/^\//, input.charAt(0) === "/" ? "/" : "");
}
href = parseURI(href || "");
base = parseURI(base || "");
return !href || !base ? null : (href.protocol || base.protocol) +
(href.protocol || href.authority ? href.authority : base.authority) +
removeDotSegments(href.protocol || href.authority || href.pathname.charAt(0) === "/" ? href.pathname : (href.pathname ? ((base.authority && !base.pathname ? "/" : "") + base.pathname.slice(0, base.pathname.lastIndexOf("/") + 1) + href.pathname) : base.pathname)) +
(href.protocol || href.authority || href.pathname ? href.search : (href.search || base.search)) +
href.hash;
}
var proxied = window.XMLHttpRequest.prototype.open;
window.XMLHttpRequest.prototype.open = function() {
if (arguments[1] !== null && arguments[1] !== undefined) {
var url = arguments[1];
url = rel2abs("' . $url . '", url);
if (url.indexOf("' . PROXY_PREFIX . '") == -1) {
url = "' . PROXY_PREFIX . '" + url;
}
arguments[1] = url;
}
return proxied.apply(this, [].slice.call(arguments));
};
}
})();'
);
$scriptElem->setAttribute("type", "text/javascript");
$prependElem->insertBefore($scriptElem, $prependElem->firstChild);
}
echo "<!-- Proxified page constructed by miniProxy -->\n" . $doc->saveHTML();
} else if (stripos($contentType, "text/css") !== false) { //This is CSS, so proxify url() references.
echo proxifyCSS($responseBody, $url);
} else { //This isn't a web page or CSS, so serve unmodified through the proxy with the correct headers (images, JavaScript, etc.)
header("Content-Length: " . strlen($responseBody), true);
echo $responseBody;
}