Skip to content

Commit

Permalink
fix mishandled case of "|hello" / initial quote at end of buff
Browse files Browse the repository at this point in the history
add test for "|hello"
fix select w named columns crash if some column names are empty
  • Loading branch information
liquidaty committed Jan 21, 2022
1 parent 87258b4 commit 273451e
Show file tree
Hide file tree
Showing 12 changed files with 69 additions and 24 deletions.
6 changes: 1 addition & 5 deletions app/2json.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ int MAIN(int argc, const char *argv[]) {

FILE *out = NULL;
int err = 0;
struct zsv_opts opts = zsv_get_default_opts(); // leave up here so that below goto stmts do not cross initialization
for(int i = 1; !err && i < argc; i++) {
if(!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) {
for(int j = 0; usage[j]; j++)
Expand Down Expand Up @@ -196,7 +197,6 @@ int MAIN(int argc, const char *argv[]) {
if(!out)
out = stdout;

struct zsv_opts opts = zsv_get_default_opts();
opts.cell = zsv_2json_cell;
opts.row = zsv_2json_row;
opts.ctx = &data;
Expand Down Expand Up @@ -228,7 +228,3 @@ int MAIN(int argc, const char *argv[]) {
exit_2json:
return err;
}




2 changes: 1 addition & 1 deletion app/builtin/help.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ static int main_help(int argc, const char *argv[]) {
"",
"Options common to all commands:",
" -c,--max-column-count: set the maximum number of columns parsed per row. defaults to 1024",
" -r,--max-row-size: set the minimum supported maximum row size. defaults to 128k",
" -r,--max-row-size: set the minimum supported maximum row size. defaults to 64k",
" -B,--buff-size: set internal buffer size. defaults to 256k",
" -t,--tab-delim: set column delimiter to tab",
" -O,--other-delim: set column delimiter to other column",
Expand Down
2 changes: 1 addition & 1 deletion app/cli.c
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ static enum zsv_ext_status ext_init(struct zsv_ext *ext) {
struct zsv_ext_callbacks cb;
zsv_ext_callbacks_init(&cb);
ext->commands_next = &ext->commands;

struct zsv_execution_data d;
memset(&d, 0, sizeof(d));
d.ext = ext;
Expand Down
5 changes: 2 additions & 3 deletions app/cli_ini.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ static void write_extension_config(struct zsv_ext *ext, FILE *f) {
fprintf(f, "[%s]\n\n", ext->id);
}

// config_save: return error
// config_save: return error
static int config_save(struct cli_config *config) {
int err = 1;
char *tmp;
Expand Down Expand Up @@ -124,7 +124,7 @@ static int add_extension(const char *id, struct zsv_ext **exts, char ignore_err,
} else {
ext->next = *exts;
*exts = ext;
}
}
}
free(extension_id);
}
Expand Down Expand Up @@ -207,4 +207,3 @@ static void ext_command_delete(struct zsv_ext_command *cmd) {
free(cmd);
}
}

2 changes: 1 addition & 1 deletion app/count.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,4 @@ int MAIN(int argc, const char *argv[]) {
fclose(opts.stream);

return err;
}
}
14 changes: 10 additions & 4 deletions app/select.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
* https://opensource.org/licenses/MIT
*/

#ifdef _WIN32
#define _CRT_RAND_S
#endif

#include <zsv.h>
#include <zsv/utils/writer.h>
#include <zsv/utils/signal.h>
Expand All @@ -15,9 +19,6 @@

#include <stdio.h>

#ifdef _WIN32
#define _CRT_RAND_S
#endif
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
Expand Down Expand Up @@ -183,9 +184,14 @@ static void zsv_select_add_output_col(struct zsv_select_data *data, unsigned in_
static inline unsigned int str_array_ifind(const unsigned char *needle,
unsigned char *haystack[],
unsigned hay_count) {
for(unsigned int i = 0; i < hay_count; i++)
for(unsigned int i = 0; i < hay_count; i++) {
if(!(needle && *needle) && !(haystack[i] && *haystack[i]))
return i + 1;
if(!(needle && *needle && haystack[i] && *haystack[i]))
continue;
if(!zsv_stricmp(needle, haystack[i]))
return i + 1;
}
return 0;
}

Expand Down
10 changes: 8 additions & 2 deletions app/test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,18 @@ test-echo : ${BUILD_DIR}/bin/zsv_echo
worldcitiespop_mil.csv:
curl -LOk 'https://burntsushi.net/stuff/worldcitiespop_mil.csv'

test-count: ${BUILD_DIR}/bin/zsv_count worldcitiespop_mil.csv
test-count: test-count-1 test-count-2

test-count-1: ${BUILD_DIR}/bin/zsv_count worldcitiespop_mil.csv
@${TEST_NAME}
@cat worldcitiespop_mil.csv | ${PREFIX} $< ${REDIRECT} /tmp/$@.out
@${CMP} /tmp/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL}

test-count-2: ${BUILD_DIR}/bin/zsv_count${EXE} ${TEST_DATA_DIR}/test/buffsplit_quote.csv
@${TEST_NAME}
@for x in 5000 5002 5004 5006 5008 5010 5013 5015 5017 5019 5021 5101 5105 5111 5113 5115 5117 5119 5121 5123 5125 5127 5129 5131 5211 5213 5215 5217 5311 5313 5315 5317 5413 5431 5433 5455 6133 ; do $< -r $$x ${TEST_DATA_DIR}/test/buffsplit_quote.csv ; done > /tmp/$@.out
@${CMP} /tmp/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL}

test-select: test-select-n test-select-6 test-select-7 test-select-8

test-select-n: ${BUILD_DIR}/bin/zsv_select${EXE}
Expand Down Expand Up @@ -146,7 +153,6 @@ test-select-8: ${BUILD_DIR}/bin/zsv_select${EXE}
@${PREFIX} $< --whitespace-clean-no-newline ${TEST_DATA_DIR}/test/white_utf8.csv ${REDIRECT} /tmp/$@.out3
@${CMP} /tmp/$@.out3 expected/$@.out3 && ${TEST_PASS} || ${TEST_FAIL}


test-stack: test-stack1 test-stack2

test-stack1: ${BUILD_DIR}/bin/zsv_stack${EXE}
Expand Down
File renamed without changes.
37 changes: 37 additions & 0 deletions app/test/expected/test-count-2.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
999
4 changes: 2 additions & 2 deletions include/zsv/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ zsv_opts_new(
void *ctx, /* pointer passed to row / cell handler(s) */
zsv_generic_read read, /* defaults to fread */
void *stream, /* defaults to stdin */

unsigned char *buff, /* user-provided buff */
size_t buffsize, /* size of user-provided buff */

Expand All @@ -131,7 +131,7 @@ zsv_opts_new(
* buffsize = 2 * max_row_size
*/
unsigned max_row_size,

char delimiter, /* defaults to comma */
char no_quotes /* defaults to false */
);
Expand Down
3 changes: 2 additions & 1 deletion src/zsv.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ enum zsv_status zsv_parse_more(struct zsv_scanner *scanner) {
if(scanner->old_bytes_read) {
if(scanner->row_start < scanner->old_bytes_read) {
size_t len = scanner->old_bytes_read - scanner->row_start;

if(len < scanner->row_start)
memcpy(scanner->buff.buff, scanner->buff.buff + scanner->row_start, len);
else
Expand Down Expand Up @@ -69,7 +70,7 @@ enum zsv_status zsv_parse_more(struct zsv_scanner *scanner) {

scanner->opts.row = zsv_throwaway_row;
scanner->opts.ctx = scanner;

scanner->partial_row_length = 0;
capacity = scanner->buff.size;
}
Expand Down
8 changes: 4 additions & 4 deletions src/zsv_internal.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ __attribute__((always_inline)) static inline void cell1(struct zsv_scanner * sca
scanner->quoted = ZSV_PARSER_QUOTE_NEEDED;
}
// end quote handling

if(VERY_UNLIKELY(scanner->waiting_for_end != 0)) { // overflow: cell size exceeds allocated memory
if(scanner->opts.overflow)
scanner->opts.overflow(scanner->opts.ctx, s, n);
Expand All @@ -136,7 +136,7 @@ __attribute__((always_inline)) static inline void cell1(struct zsv_scanner * sca
row->cells[row->used++] = c;
} else
scanner->row.overflow++;
}
}
scanner->waiting_for_end = !is_end;
scanner->have_cell = 1;

Expand Down Expand Up @@ -239,11 +239,11 @@ static enum zsv_status zsv_scan(struct zsv_scanner *scanner,
memset(&qt_v, 0, sizeof(qt_v));
}

// check if we have an embedded dbl-quote past the initial opening quote, which was
// case "hel"|"o": check if we have an embedded dbl-quote past the initial opening quote, which was
// split between the last buffer and this one e.g. "hel""o" where the last buffer ended
// with "hel" and this one starts with "o"
if((scanner->quoted & ZSV_PARSER_QUOTE_UNCLOSED)
&& i > scanner->cell_start
&& i > scanner->cell_start + 1 // case "|hello": need the + 1 in case split after first char of quoted value e.g. "hello" => " and hello"
&& scanner->last == quote) {
if(buff[i] != quote) {
scanner->quoted |= ZSV_PARSER_QUOTE_CLOSED;
Expand Down

0 comments on commit 273451e

Please sign in to comment.