Skip to content

Commit

Permalink
Added functions uri_encode() and uri_decode() (#941)
Browse files Browse the repository at this point in the history
  • Loading branch information
jesper-friis authored Sep 5, 2024
1 parent 415ce82 commit 32f09e5
Show file tree
Hide file tree
Showing 6 changed files with 371 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ set(sources
bson.c
session.c
rng.c
uri_encode.c

md5.c
sha1.c
Expand Down
36 changes: 36 additions & 0 deletions src/utils/LICENSES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ the version available at <http://www.jhweiss.de/software/snprintf.html>.
However, this is not a requirement for using or redistributing (possibly
modified) versions of this file, nor is leaving this notice intact mandatory.


License for pymongo
-------------------
Apache License
Expand Down Expand Up @@ -341,3 +342,38 @@ License for pymongo
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


License for url_encode.c/url_encode.h
-------------------------------------
This software is Copyright (c) 2016 by David Farrell.

This is free software, licensed under:

The (two-clause) FreeBSD License

The FreeBSD License

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the
distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1 change: 1 addition & 0 deletions src/utils/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ set(tests
test_bson
test_session
test_rng
test_uri_encode

tgen_example
)
Expand Down
174 changes: 174 additions & 0 deletions src/utils/tests/test_uri_encode.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "uri_encode.h"

#include "minunit/minunit.h"


char buf[256];

/* tests for encode_uri */
MU_TEST(test_encode_empty) {
int n = uri_encode("", 0, buf);
mu_assert_string_eq("", buf);
mu_assert_int_eq(0, n);
}
MU_TEST(test_encode_something) {
int n = uri_encode("something", 9, buf);
mu_assert_string_eq("something", buf);
mu_assert_int_eq(9, n);
}
MU_TEST(test_encode_space) {
int n = uri_encode(" ", 1, buf);
mu_assert_string_eq("%20", buf);
mu_assert_int_eq(3, n);
}
MU_TEST(test_encode_percent) {
int n = uri_encode("%%20", 4, buf);
mu_assert_string_eq("%25%2520", buf);
mu_assert_int_eq(8, n);
}
MU_TEST(test_encode_latin1) {
int n = uri_encode("|abcå", 6, buf);
mu_assert_string_eq("%7Cabc%C3%A5", buf);
mu_assert_int_eq(12, n);
}
MU_TEST(test_encode_symbols) {
int n = uri_encode("~*'()", 5, buf);
mu_assert_string_eq("~%2A%27%28%29", buf);
mu_assert_int_eq(13, n);
}
MU_TEST(test_encode_angles) {
int n = uri_encode("<\">", 3, buf);
mu_assert_string_eq("%3C%22%3E", buf);
mu_assert_int_eq(9, n);
}
MU_TEST(test_encode_middle_null) {
int n = uri_encode("ABC\0DEF", 3, buf);
mu_assert_string_eq("ABC", buf);
mu_assert_int_eq(3, n);
}
MU_TEST(test_encode_middle_null_len) {
int n = uri_encode("ABC\0DEF", 7, buf);
mu_assert_string_eq("ABC%00DEF", buf);
mu_assert_int_eq(9, n);
}
MU_TEST(test_encode_latin1_utf8) {
int n = uri_encode("åäö", strlen("åäö"), buf);
mu_assert_string_eq("%C3%A5%C3%A4%C3%B6", buf);
mu_assert_int_eq(18, n);
}
MU_TEST(test_encode_utf8) {
int n = uri_encode("❤", strlen("❤"), buf);
mu_assert_string_eq("%E2%9D%A4", buf);
mu_assert_int_eq(9, n);
}

/* tests for decode_uri */
MU_TEST(test_decode_empty) {
int n = uri_decode("", 0, buf);
mu_assert_string_eq("", buf);
mu_assert_int_eq(0, n);
}
MU_TEST(test_decode_something) {
int n = uri_decode("something", 9, buf);
mu_assert_string_eq("something", buf);
mu_assert_int_eq(9, n);
}
MU_TEST(test_decode_something_percent) {
int n = uri_decode("something%", 10, buf);
mu_assert_string_eq("something%", buf);
mu_assert_int_eq(10, n);
}
MU_TEST(test_decode_something_percenta) {
int n = uri_decode("something%a", 11, buf);
mu_assert_string_eq("something%a", buf);
mu_assert_int_eq(11, n);
}
MU_TEST(test_decode_something_zslash) {
int n = uri_decode("something%Z/", 12, buf);
mu_assert_string_eq("something%Z/", buf);
mu_assert_int_eq(12, n);
}
MU_TEST(test_decode_space) {
int n = uri_decode("%20", 3, buf);
mu_assert_string_eq(" ", buf);
mu_assert_int_eq(1, n);
}
MU_TEST(test_decode_percents) {
int n = uri_decode("%25%2520", 8, buf);
mu_assert_string_eq("%%20", buf);
mu_assert_int_eq(4, n);
}
MU_TEST(test_decode_latin1) {
int n = uri_decode("%7Cabc%C3%A5", 12, buf);
mu_assert_string_eq("|abcå", buf);
mu_assert_int_eq(6, n);
}
MU_TEST(test_decode_symbols) {
int n = uri_decode("~%2A%27%28%29", 13, buf);
mu_assert_string_eq("~*'()", buf);
mu_assert_int_eq(5, n);
}
MU_TEST(test_decode_angles) {
int n = uri_decode("%3C%22%3E", 9, buf);
mu_assert_string_eq("<\">", buf);
mu_assert_int_eq(3, n);
}
MU_TEST(test_decode_middle_null) {
int n = uri_decode("ABC%00DEF", 6, buf);
mu_assert_string_eq("ABC\0", buf);
mu_assert_int_eq(4, n);
}
MU_TEST(test_decode_middle_null2) {
int n = uri_decode("ABC%00DEF", 5, buf);
mu_assert_string_eq("ABC%0", buf);
mu_assert_int_eq(5, n);
}
MU_TEST(test_decode_middle_full) {
int n = uri_decode("ABC%00DEF", 9, buf);
mu_assert_string_eq("ABC\0DEF", buf);
mu_assert_int_eq(7, n);
}


/***********************************************************************/

MU_TEST_SUITE(test_suite)
{
MU_RUN_TEST(test_encode_empty);
MU_RUN_TEST(test_encode_something);
MU_RUN_TEST(test_encode_percent);
MU_RUN_TEST(test_encode_space);
MU_RUN_TEST(test_encode_empty);
MU_RUN_TEST(test_encode_latin1);
MU_RUN_TEST(test_encode_symbols);
MU_RUN_TEST(test_encode_angles);
MU_RUN_TEST(test_encode_middle_null);
MU_RUN_TEST(test_encode_middle_null_len);
MU_RUN_TEST(test_encode_latin1_utf8);
MU_RUN_TEST(test_encode_utf8);

MU_RUN_TEST(test_decode_empty);
MU_RUN_TEST(test_decode_something);
MU_RUN_TEST(test_decode_something_percent);
MU_RUN_TEST(test_decode_something_percenta);
MU_RUN_TEST(test_decode_something_zslash);
MU_RUN_TEST(test_decode_space);
MU_RUN_TEST(test_decode_percents);
MU_RUN_TEST(test_decode_latin1);
MU_RUN_TEST(test_decode_symbols);
MU_RUN_TEST(test_decode_angles);
MU_RUN_TEST(test_decode_middle_null);
MU_RUN_TEST(test_decode_middle_null2);
MU_RUN_TEST(test_decode_middle_full);
}


int main()
{
MU_RUN_SUITE(test_suite);
MU_REPORT();
return (minunit_fail) ? 1 : 0;
}
120 changes: 120 additions & 0 deletions src/utils/uri_encode.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/* uri_encode.c -- C library for URI percent encoding/decoding
*
* This software is Copyright (c) 2016 by David Farrell
*
* Distributed under terms of the (two-clause) FreeBSD License
*
* See: https://github.com/dnmfarrell/URI-Encode-C
*
* Modified by Jesper Friis, 2024
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "uri_encode.h"

#include <inttypes.h>

#define _______ "\0\0\0\0"
static const char uri_encode_tbl[ sizeof(int32_t) * 0x100 ] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
"%00\0" "%01\0" "%02\0" "%03\0" "%04\0" "%05\0" "%06\0" "%07\0" "%08\0" "%09\0" "%0A\0" "%0B\0" "%0C\0" "%0D\0" "%0E\0" "%0F\0" /* 0: 0 ~ 15 */
"%10\0" "%11\0" "%12\0" "%13\0" "%14\0" "%15\0" "%16\0" "%17\0" "%18\0" "%19\0" "%1A\0" "%1B\0" "%1C\0" "%1D\0" "%1E\0" "%1F\0" /* 1: 16 ~ 31 */
"%20\0" "%21\0" "%22\0" "%23\0" "%24\0" "%25\0" "%26\0" "%27\0" "%28\0" "%29\0" "%2A\0" "%2B\0" "%2C\0" _______ _______ "%2F\0" /* 2: 32 ~ 47 */
_______ _______ _______ _______ _______ _______ _______ _______ _______ _______ "%3A\0" "%3B\0" "%3C\0" "%3D\0" "%3E\0" "%3F\0" /* 3: 48 ~ 63 */
"%40\0" _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ /* 4: 64 ~ 79 */
_______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ "%5B\0" "%5C\0" "%5D\0" "%5E\0" _______ /* 5: 80 ~ 95 */
"%60\0" _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ /* 6: 96 ~ 111 */
_______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ "%7B\0" "%7C\0" "%7D\0" _______ "%7F\0" /* 7: 112 ~ 127 */
"%80\0" "%81\0" "%82\0" "%83\0" "%84\0" "%85\0" "%86\0" "%87\0" "%88\0" "%89\0" "%8A\0" "%8B\0" "%8C\0" "%8D\0" "%8E\0" "%8F\0" /* 8: 128 ~ 143 */
"%90\0" "%91\0" "%92\0" "%93\0" "%94\0" "%95\0" "%96\0" "%97\0" "%98\0" "%99\0" "%9A\0" "%9B\0" "%9C\0" "%9D\0" "%9E\0" "%9F\0" /* 9: 144 ~ 159 */
"%A0\0" "%A1\0" "%A2\0" "%A3\0" "%A4\0" "%A5\0" "%A6\0" "%A7\0" "%A8\0" "%A9\0" "%AA\0" "%AB\0" "%AC\0" "%AD\0" "%AE\0" "%AF\0" /* A: 160 ~ 175 */
"%B0\0" "%B1\0" "%B2\0" "%B3\0" "%B4\0" "%B5\0" "%B6\0" "%B7\0" "%B8\0" "%B9\0" "%BA\0" "%BB\0" "%BC\0" "%BD\0" "%BE\0" "%BF\0" /* B: 176 ~ 191 */
"%C0\0" "%C1\0" "%C2\0" "%C3\0" "%C4\0" "%C5\0" "%C6\0" "%C7\0" "%C8\0" "%C9\0" "%CA\0" "%CB\0" "%CC\0" "%CD\0" "%CE\0" "%CF\0" /* C: 192 ~ 207 */
"%D0\0" "%D1\0" "%D2\0" "%D3\0" "%D4\0" "%D5\0" "%D6\0" "%D7\0" "%D8\0" "%D9\0" "%DA\0" "%DB\0" "%DC\0" "%DD\0" "%DE\0" "%DF\0" /* D: 208 ~ 223 */
"%E0\0" "%E1\0" "%E2\0" "%E3\0" "%E4\0" "%E5\0" "%E6\0" "%E7\0" "%E8\0" "%E9\0" "%EA\0" "%EB\0" "%EC\0" "%ED\0" "%EE\0" "%EF\0" /* E: 224 ~ 239 */
"%F0\0" "%F1\0" "%F2\0" "%F3\0" "%F4\0" "%F5\0" "%F6\0" "%F7\0" "%F8\0" "%F9\0" "%FA\0" "%FB\0" "%FC\0" "%FD\0" "%FE\0" "%FF" /* F: 240 ~ 255 */
};
#undef _______

#define __ 0xFF
static const unsigned char hexval[0x100] = {
__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 00-0F */
__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 10-1F */
__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 20-2F */
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,__,__,__,__,__,__, /* 30-3F */
__,10,11,12,13,14,15,__,__,__,__,__,__,__,__,__, /* 40-4F */
__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 50-5F */
__,10,11,12,13,14,15,__,__,__,__,__,__,__,__,__, /* 60-6F */
__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 70-7F */
__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 80-8F */
__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 90-9F */
__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* A0-AF */
__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* B0-BF */
__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* C0-CF */
__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* D0-DF */
__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* E0-EF */
__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* F0-FF */
};
#undef __



/*
uri_encode.c - functions for URI percent encoding / decoding
*/

size_t uri_encode(const char *src, const size_t len, char *dst)
{
size_t i=0, j=0;
if (!dst) {
while (i < len) {
const int32_t code = ((int32_t*)uri_encode_tbl)[ (unsigned char)src[i++] ];
j += (code) ? 3 : 1;
}
} else {
while (i < len) {
const char octet = src[i++];
const int32_t code = ((int32_t*)uri_encode_tbl)[ (unsigned char)octet ];
if (code) {
*((int32_t*)&dst[j]) = code;
j += 3;
} else {
dst[j++] = octet;
}
}
dst[j] = '\0';
}
return j;
}

size_t uri_decode(const char *src, const size_t len, char *dst)
{
size_t i = 0, j = 0;
while(i < len)
{
int copy_char = 1;
if(src[i] == '%' && i + 2 < len)
{
const unsigned char v1 = hexval[ (unsigned char)src[i+1] ];
const unsigned char v2 = hexval[ (unsigned char)src[i+2] ];

/* skip invalid hex sequences */
if ((v1 | v2) != 0xFF)
{
if (dst) dst[j] = (v1 << 4) | v2;
j++;
i += 3;
copy_char = 0;
}
}
if (copy_char)
{
if (dst) dst[j] = src[i];
i++;
j++;
}
}
if (dst) dst[j] = '\0';
return j;
}
39 changes: 39 additions & 0 deletions src/utils/uri_encode.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/* uri_encode.h -- C library for URI percent encoding/decoding
*
* This software is Copyright (c) 2016 by David Farrell
*
* Distributed under terms of the (two-clause) FreeBSD License
*
* See: https://github.com/dnmfarrell/URI-Encode-C
*
* Modified by Jesper Friis, 2024
*/
#ifndef _URI_ENCODE_H
#define _URI_ENCODE_H

#include <stdlib.h>

/**
Percent-encode `src`, which is a buffer of length `len`, and write
the result to `dst`.
If `dst` is NULL, only return its expected length (minus one).
Returns the number of bytes written to `dst`, not including the
terminating NUL.
*/
size_t uri_encode(const char *src, const size_t len, char *dst);


/**
Percent-decode `src`, which is a buffer of length `len`, and write
the result to `dst`.
If `dst` is NULL, only return its expected length (minus one).
Returns the number of bytes written to `dst`, not including the
terminating NUL.
*/
size_t uri_decode(const char *src, const size_t len, char *dst);

#endif /* _URI_ENCODE_H */

0 comments on commit 32f09e5

Please sign in to comment.