Skip to content

Commit

Permalink
Do not allow invalid UTF8 in method, uri, header names
Browse files Browse the repository at this point in the history
  • Loading branch information
cpq committed Nov 25, 2023
1 parent 28162f8 commit bd53e46
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 28 deletions.
35 changes: 22 additions & 13 deletions mongoose.c
Original file line number Diff line number Diff line change
Expand Up @@ -1948,13 +1948,20 @@ struct mg_str *mg_http_get_header(struct mg_http_message *h, const char *name) {
return NULL;
}

// Get character length. Used to parse method, URI, headers
static size_t clen(const char *s) {
uint8_t c = *(uint8_t *) s;
// Is it a valid utf-8 continuation byte
static bool vcb(uint8_t c) {
return (c & 0xc0) == 0x80;
}

// Get character length (valid utf-8). Used to parse method, URI, headers
static size_t clen(const char *s, const char *end) {
const unsigned char *u = (unsigned char *) s, c = *u;
long n = end - s;
if (c > ' ' && c < '~') return 1; // Usual ascii printed char
if ((c & 0xe0) == 0xc0) return 2; // 2-byte UTF8
if ((c & 0xf0) == 0xe0) return 3; // 3-byte UTF8
if ((c & 0xf8) == 0xf0) return 4; // 4-byte UTF8
if ((c & 0xe0) == 0xc0 && n > 1 && vcb(u[1])) return 2; // 2-byte UTF8
if ((c & 0xf0) == 0xe0 && n > 2 && vcb(u[1]) && vcb(u[2])) return 3;
if ((c & 0xf8) == 0xf0 && n > 3 && vcb(u[1]) && vcb(u[2]) && vcb(u[3]))
return 4;
return 0;
}

Expand All @@ -1976,10 +1983,12 @@ static bool mg_http_parse_headers(const char *s, const char *end,
if (s >= end) return false;
if (s[0] == '\n' || (s[0] == '\r' && s[1] == '\n')) break;
k.ptr = s;
while (s < end && s[0] != ':' && (n = clen(s)) > 0) s += n, k.len += n;
if (k.len == 0) return false; // Empty name
if (s >= end || *s++ != ':') return false; // Invalid, not followed by :
while (s < end && s[0] == ' ') s++; // Skip spaces
while (s < end && s[0] != ':' && (n = clen(s, end)) > 0) s += n, k.len += n;
if (k.len == 0) return false; // Empty name
if (s >= end || clen(s, end) == 0) return false; // Invalid UTF-8
if (*s++ != ':') return false; // Invalid, not followed by :
// if (clen(s, end) == 0) return false; // Invalid UTF-8
while (s < end && s[0] == ' ') s++; // Skip spaces
if ((s = skiptorn(s, end, &v)) == NULL) return false;
while (v.len > 0 && v.ptr[v.len - 1] == ' ') v.len--; // Trim spaces
// MG_INFO(("--HH [%.*s] [%.*s]", (int) k.len, k.ptr, (int) v.len, v.ptr));
Expand All @@ -2004,10 +2013,10 @@ int mg_http_parse(const char *s, size_t len, struct mg_http_message *hm) {

// Parse request line
hm->method.ptr = s;
while (s < end && (n = clen(s)) > 0) s += n, hm->method.len += n;
while (s < end && (n = clen(s, end)) > 0) s += n, hm->method.len += n;
while (s < end && s[0] == ' ') s++; // Skip spaces
hm->uri.ptr = s;
while (s < end && (n = clen(s)) > 0) s += n, hm->uri.len += n;
while (s < end && (n = clen(s, end)) > 0) s += n, hm->uri.len += n;
while (s < end && s[0] == ' ') s++; // Skip spaces
if ((s = skiptorn(s, end, &hm->proto)) == NULL) return false;

Expand Down Expand Up @@ -2764,7 +2773,7 @@ static void http_cb(struct mg_connection *c, int ev, void *evd, void *fnd) {
if (dl == 0) break;
}
ofs += (size_t) (n + o);
} else { // Normal, non-chunked data
} else { // Normal, non-chunked data
size_t len = c->recv.len - ofs - (size_t) n;
if (hm.body.len > len) break; // Buffer more data
ofs += (size_t) n + hm.body.len;
Expand Down
35 changes: 22 additions & 13 deletions src/http.c
Original file line number Diff line number Diff line change
Expand Up @@ -193,13 +193,20 @@ struct mg_str *mg_http_get_header(struct mg_http_message *h, const char *name) {
return NULL;
}

// Get character length. Used to parse method, URI, headers
static size_t clen(const char *s) {
uint8_t c = *(uint8_t *) s;
// Is it a valid utf-8 continuation byte
static bool vcb(uint8_t c) {
return (c & 0xc0) == 0x80;
}

// Get character length (valid utf-8). Used to parse method, URI, headers
static size_t clen(const char *s, const char *end) {
const unsigned char *u = (unsigned char *) s, c = *u;
long n = end - s;
if (c > ' ' && c < '~') return 1; // Usual ascii printed char
if ((c & 0xe0) == 0xc0) return 2; // 2-byte UTF8
if ((c & 0xf0) == 0xe0) return 3; // 3-byte UTF8
if ((c & 0xf8) == 0xf0) return 4; // 4-byte UTF8
if ((c & 0xe0) == 0xc0 && n > 1 && vcb(u[1])) return 2; // 2-byte UTF8
if ((c & 0xf0) == 0xe0 && n > 2 && vcb(u[1]) && vcb(u[2])) return 3;
if ((c & 0xf8) == 0xf0 && n > 3 && vcb(u[1]) && vcb(u[2]) && vcb(u[3]))
return 4;
return 0;
}

Expand All @@ -221,10 +228,12 @@ static bool mg_http_parse_headers(const char *s, const char *end,
if (s >= end) return false;
if (s[0] == '\n' || (s[0] == '\r' && s[1] == '\n')) break;
k.ptr = s;
while (s < end && s[0] != ':' && (n = clen(s)) > 0) s += n, k.len += n;
if (k.len == 0) return false; // Empty name
if (s >= end || *s++ != ':') return false; // Invalid, not followed by :
while (s < end && s[0] == ' ') s++; // Skip spaces
while (s < end && s[0] != ':' && (n = clen(s, end)) > 0) s += n, k.len += n;
if (k.len == 0) return false; // Empty name
if (s >= end || clen(s, end) == 0) return false; // Invalid UTF-8
if (*s++ != ':') return false; // Invalid, not followed by :
// if (clen(s, end) == 0) return false; // Invalid UTF-8
while (s < end && s[0] == ' ') s++; // Skip spaces
if ((s = skiptorn(s, end, &v)) == NULL) return false;
while (v.len > 0 && v.ptr[v.len - 1] == ' ') v.len--; // Trim spaces
// MG_INFO(("--HH [%.*s] [%.*s]", (int) k.len, k.ptr, (int) v.len, v.ptr));
Expand All @@ -249,10 +258,10 @@ int mg_http_parse(const char *s, size_t len, struct mg_http_message *hm) {

// Parse request line
hm->method.ptr = s;
while (s < end && (n = clen(s)) > 0) s += n, hm->method.len += n;
while (s < end && (n = clen(s, end)) > 0) s += n, hm->method.len += n;
while (s < end && s[0] == ' ') s++; // Skip spaces
hm->uri.ptr = s;
while (s < end && (n = clen(s)) > 0) s += n, hm->uri.len += n;
while (s < end && (n = clen(s, end)) > 0) s += n, hm->uri.len += n;
while (s < end && s[0] == ' ') s++; // Skip spaces
if ((s = skiptorn(s, end, &hm->proto)) == NULL) return false;

Expand Down Expand Up @@ -1009,7 +1018,7 @@ static void http_cb(struct mg_connection *c, int ev, void *evd, void *fnd) {
if (dl == 0) break;
}
ofs += (size_t) (n + o);
} else { // Normal, non-chunked data
} else { // Normal, non-chunked data
size_t len = c->recv.len - ofs - (size_t) n;
if (hm.body.len > len) break; // Buffer more data
ofs += (size_t) n + hm.body.len;
Expand Down
10 changes: 8 additions & 2 deletions test/unit_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -1482,8 +1482,8 @@ static void test_http_parse(void) {
}

// #2292: fail on stray \r inside the headers
ASSERT(mg_http_parse("a є\n\n", 6, &req) > 0);
ASSERT(mg_http_parse("a b\n\n", 5, &req) > 0);
ASSERT(mg_http_parse("a є\n\n", 6, &req) == 6);
ASSERT(mg_http_parse("a b\n\n", 5, &req) == 5);
ASSERT(mg_http_parse("a b\na:\n\n", 8, &req) > 0);
ASSERT(mg_http_parse("a b\na:\r\n\n", 9, &req) > 0);
ASSERT(mg_http_parse("a b\n\ra:\r\n\n", 10, &req) == -1);
Expand Down Expand Up @@ -1602,6 +1602,12 @@ static void test_http_parse(void) {
ASSERT(mg_http_parse(s, strlen(s), &hm) == (int) strlen(s));
s = "a\nb:b\nc:c\n\n";
ASSERT(mg_http_parse(s, strlen(s), &hm) < 0);
s = "a b\nc: \xc0\n\n"; // Invalid UTF in the header value: accept
ASSERT(mg_http_parse(s, strlen(s), &hm) == (int) strlen(s));
ASSERT((v = mg_http_get_header(&hm, "c")) != NULL);
ASSERT(mg_vcmp(v, "\xc0") == 0);
s = "a b\n\xc0: 2\n\n"; // Invalid UTF in the header name: do NOT accept
ASSERT(mg_http_parse(s, strlen(s), &hm) == -1);
}
}

Expand Down

0 comments on commit bd53e46

Please sign in to comment.