diff --git a/app/2json.c b/app/2json.c index c25dd424..195292b6 100644 --- a/app/2json.c +++ b/app/2json.c @@ -150,6 +150,7 @@ int MAIN(int argc, const char *argv[]) { FILE *out = NULL; int err = 0; + struct zsv_opts opts = zsv_get_default_opts(); // leave up here so that below goto stmts do not cross initialization for(int i = 1; !err && i < argc; i++) { if(!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h")) { for(int j = 0; usage[j]; j++) @@ -196,7 +197,6 @@ int MAIN(int argc, const char *argv[]) { if(!out) out = stdout; - struct zsv_opts opts = zsv_get_default_opts(); opts.cell = zsv_2json_cell; opts.row = zsv_2json_row; opts.ctx = &data; @@ -228,7 +228,3 @@ int MAIN(int argc, const char *argv[]) { exit_2json: return err; } - - - - diff --git a/app/builtin/help.c b/app/builtin/help.c index 727956ed..d66d40e1 100644 --- a/app/builtin/help.c +++ b/app/builtin/help.c @@ -24,7 +24,7 @@ static int main_help(int argc, const char *argv[]) { "", "Options common to all commands:", " -c,--max-column-count: set the maximum number of columns parsed per row. defaults to 1024", - " -r,--max-row-size: set the minimum supported maximum row size. defaults to 128k", + " -r,--max-row-size: set the minimum supported maximum row size. defaults to 64k", " -B,--buff-size: set internal buffer size. defaults to 256k", " -t,--tab-delim: set column delimiter to tab", " -O,--other-delim: set column delimiter to other column", diff --git a/app/cli.c b/app/cli.c index 8535926f..f6adb54e 100644 --- a/app/cli.c +++ b/app/cli.c @@ -327,7 +327,7 @@ static enum zsv_ext_status ext_init(struct zsv_ext *ext) { struct zsv_ext_callbacks cb; zsv_ext_callbacks_init(&cb); ext->commands_next = &ext->commands; - + struct zsv_execution_data d; memset(&d, 0, sizeof(d)); d.ext = ext; diff --git a/app/cli_ini.c b/app/cli_ini.c index 7e51eb6b..0ad24833 100644 --- a/app/cli_ini.c +++ b/app/cli_ini.c @@ -32,7 +32,7 @@ static void write_extension_config(struct zsv_ext *ext, FILE *f) { fprintf(f, "[%s]\n\n", ext->id); } -// config_save: return error +// config_save: return error static int config_save(struct cli_config *config) { int err = 1; char *tmp; @@ -124,7 +124,7 @@ static int add_extension(const char *id, struct zsv_ext **exts, char ignore_err, } else { ext->next = *exts; *exts = ext; - } + } } free(extension_id); } @@ -207,4 +207,3 @@ static void ext_command_delete(struct zsv_ext_command *cmd) { free(cmd); } } - diff --git a/app/count.c b/app/count.c index 94ea3e5f..d3968510 100644 --- a/app/count.c +++ b/app/count.c @@ -94,4 +94,4 @@ int MAIN(int argc, const char *argv[]) { fclose(opts.stream); return err; -} +} diff --git a/app/select.c b/app/select.c index 61ce13c2..9193302c 100644 --- a/app/select.c +++ b/app/select.c @@ -5,6 +5,10 @@ * https://opensource.org/licenses/MIT */ +#ifdef _WIN32 +#define _CRT_RAND_S +#endif + #include #include #include @@ -15,9 +19,6 @@ #include -#ifdef _WIN32 -#define _CRT_RAND_S -#endif #include #include #include @@ -183,9 +184,14 @@ static void zsv_select_add_output_col(struct zsv_select_data *data, unsigned in_ static inline unsigned int str_array_ifind(const unsigned char *needle, unsigned char *haystack[], unsigned hay_count) { - for(unsigned int i = 0; i < hay_count; i++) + for(unsigned int i = 0; i < hay_count; i++) { + if(!(needle && *needle) && !(haystack[i] && *haystack[i])) + return i + 1; + if(!(needle && *needle && haystack[i] && *haystack[i])) + continue; if(!zsv_stricmp(needle, haystack[i])) return i + 1; + } return 0; } diff --git a/app/test/Makefile b/app/test/Makefile index c6e51359..159ab9c2 100644 --- a/app/test/Makefile +++ b/app/test/Makefile @@ -103,11 +103,18 @@ test-echo : ${BUILD_DIR}/bin/zsv_echo worldcitiespop_mil.csv: curl -LOk 'https://burntsushi.net/stuff/worldcitiespop_mil.csv' -test-count: ${BUILD_DIR}/bin/zsv_count worldcitiespop_mil.csv +test-count: test-count-1 test-count-2 + +test-count-1: ${BUILD_DIR}/bin/zsv_count worldcitiespop_mil.csv @${TEST_NAME} @cat worldcitiespop_mil.csv | ${PREFIX} $< ${REDIRECT} /tmp/$@.out @${CMP} /tmp/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL} +test-count-2: ${BUILD_DIR}/bin/zsv_count${EXE} ${TEST_DATA_DIR}/test/buffsplit_quote.csv + @${TEST_NAME} + @for x in 5000 5002 5004 5006 5008 5010 5013 5015 5017 5019 5021 5101 5105 5111 5113 5115 5117 5119 5121 5123 5125 5127 5129 5131 5211 5213 5215 5217 5311 5313 5315 5317 5413 5431 5433 5455 6133 ; do $< -r $$x ${TEST_DATA_DIR}/test/buffsplit_quote.csv ; done > /tmp/$@.out + @${CMP} /tmp/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL} + test-select: test-select-n test-select-6 test-select-7 test-select-8 test-select-n: ${BUILD_DIR}/bin/zsv_select${EXE} @@ -146,7 +153,6 @@ test-select-8: ${BUILD_DIR}/bin/zsv_select${EXE} @${PREFIX} $< --whitespace-clean-no-newline ${TEST_DATA_DIR}/test/white_utf8.csv ${REDIRECT} /tmp/$@.out3 @${CMP} /tmp/$@.out3 expected/$@.out3 && ${TEST_PASS} || ${TEST_FAIL} - test-stack: test-stack1 test-stack2 test-stack1: ${BUILD_DIR}/bin/zsv_stack${EXE} diff --git a/app/test/expected/test-count.out b/app/test/expected/test-count-1.out similarity index 100% rename from app/test/expected/test-count.out rename to app/test/expected/test-count-1.out diff --git a/app/test/expected/test-count-2.out b/app/test/expected/test-count-2.out new file mode 100644 index 00000000..581b3e4e --- /dev/null +++ b/app/test/expected/test-count-2.out @@ -0,0 +1,37 @@ +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 diff --git a/include/zsv/api.h b/include/zsv/api.h index e36b07ff..c0affb44 100644 --- a/include/zsv/api.h +++ b/include/zsv/api.h @@ -119,7 +119,7 @@ zsv_opts_new( void *ctx, /* pointer passed to row / cell handler(s) */ zsv_generic_read read, /* defaults to fread */ void *stream, /* defaults to stdin */ - + unsigned char *buff, /* user-provided buff */ size_t buffsize, /* size of user-provided buff */ @@ -131,7 +131,7 @@ zsv_opts_new( * buffsize = 2 * max_row_size */ unsigned max_row_size, - + char delimiter, /* defaults to comma */ char no_quotes /* defaults to false */ ); diff --git a/src/zsv.c b/src/zsv.c index a3eb3f6f..034121a7 100644 --- a/src/zsv.c +++ b/src/zsv.c @@ -36,6 +36,7 @@ enum zsv_status zsv_parse_more(struct zsv_scanner *scanner) { if(scanner->old_bytes_read) { if(scanner->row_start < scanner->old_bytes_read) { size_t len = scanner->old_bytes_read - scanner->row_start; + if(len < scanner->row_start) memcpy(scanner->buff.buff, scanner->buff.buff + scanner->row_start, len); else @@ -69,7 +70,7 @@ enum zsv_status zsv_parse_more(struct zsv_scanner *scanner) { scanner->opts.row = zsv_throwaway_row; scanner->opts.ctx = scanner; - + scanner->partial_row_length = 0; capacity = scanner->buff.size; } diff --git a/src/zsv_internal.c b/src/zsv_internal.c index d8b9c5a2..b3737d0f 100644 --- a/src/zsv_internal.c +++ b/src/zsv_internal.c @@ -123,7 +123,7 @@ __attribute__((always_inline)) static inline void cell1(struct zsv_scanner * sca scanner->quoted = ZSV_PARSER_QUOTE_NEEDED; } // end quote handling - + if(VERY_UNLIKELY(scanner->waiting_for_end != 0)) { // overflow: cell size exceeds allocated memory if(scanner->opts.overflow) scanner->opts.overflow(scanner->opts.ctx, s, n); @@ -136,7 +136,7 @@ __attribute__((always_inline)) static inline void cell1(struct zsv_scanner * sca row->cells[row->used++] = c; } else scanner->row.overflow++; - } + } scanner->waiting_for_end = !is_end; scanner->have_cell = 1; @@ -239,11 +239,11 @@ static enum zsv_status zsv_scan(struct zsv_scanner *scanner, memset(&qt_v, 0, sizeof(qt_v)); } - // check if we have an embedded dbl-quote past the initial opening quote, which was + // case "hel"|"o": check if we have an embedded dbl-quote past the initial opening quote, which was // split between the last buffer and this one e.g. "hel""o" where the last buffer ended // with "hel" and this one starts with "o" if((scanner->quoted & ZSV_PARSER_QUOTE_UNCLOSED) - && i > scanner->cell_start + && i > scanner->cell_start + 1 // case "|hello": need the + 1 in case split after first char of quoted value e.g. "hello" => " and hello" && scanner->last == quote) { if(buff[i] != quote) { scanner->quoted |= ZSV_PARSER_QUOTE_CLOSED;