Skip to content

Commit

Permalink
initial commit: only applies to select cmd (to do: test, apply to all… (
Browse files Browse the repository at this point in the history
#50)

* move header offset/depth generic options (-R,--skip-head / -d,--header-row-span) into parser and make available for all commands, instead of only for select command
* other miscellaneous cleanup
  • Loading branch information
liquidaty authored Aug 19, 2022
1 parent 7e48c93 commit 421f84a
Show file tree
Hide file tree
Showing 14 changed files with 283 additions and 138 deletions.
6 changes: 3 additions & 3 deletions app/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@ endif

ZSV=$(BINDIR)/zsv${EXE}

SOURCES= echo count select 2json serialize flatten pretty stack desc 2tsv sql 2db
CLI_SOURCES=select desc count pretty sql flatten 2json 2tsv serialize stack 2db
SOURCES= echo count select 2tsv 2json serialize flatten pretty stack desc sql 2db
CLI_SOURCES=select desc count 2tsv pretty sql flatten 2json serialize stack 2db

ifneq ($(LDFLAGS_JQ),)
SOURCES+= jq
Expand Down Expand Up @@ -210,7 +210,7 @@ help:
@echo "which will build and test all apps, or to build/test a single app:"
@echo " ${MAKE} test-xx"
@echo "where xx is any of:"
@echo " echo count select 2json serialize flatten pretty stack desc 2tsv sql 2db"
@echo " echo count select 2tsv 2json serialize flatten pretty stack desc sql 2db"
@echo ""

install: ${ZSV}
Expand Down
4 changes: 2 additions & 2 deletions app/benchmark/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ endif
CCBN=$(shell basename ${CC})
ZSVBIN=../../build/${BUILD_SUBDIR}/${CCBN}/bin/zsv_

QUICK=
QUICK=1

help:
@echo "To run all tests (set QUICK to skip mlr and csvcut):"
@echo " make all [QUICK=1]"
@echo " make all [QUICK=0]"
@echo " make CLI"

CLI: ZSVBIN="zsv "
Expand Down
2 changes: 2 additions & 0 deletions app/builtin/help.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ static int main_help(int argc, const char *argv[]) {
" -t,--tab-delim: set column delimiter to tab",
" -O,--other-delim <char>: set column delimiter to specified character",
" -q,--no-quote: turn off quote handling",
" -R,--skip-head <n>: skip specified number of initial rows",
" -d,--header-row-span <n>: apply header depth (rowspan) of n",
" -v,--verbose: verbose output",
"",
"Commands:",
Expand Down
4 changes: 4 additions & 0 deletions app/ext_example/test/expected/zsvext-test-3.out
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ Options common to all commands:
-t,--tab-delim: set column delimiter to tab
-O,--other-delim <char>: set column delimiter to specified character
-q,--no-quote: turn off quote handling
-R,--skip-head <n>: skip specified number of initial rows
-d,--header-row-span <n>: apply header depth (rowspan) of n
-v,--verbose: verbose output

Commands:
Expand Down Expand Up @@ -68,6 +70,8 @@ Options common to all commands:
-t,--tab-delim: set column delimiter to tab
-O,--other-delim <char>: set column delimiter to specified character
-q,--no-quote: turn off quote handling
-R,--skip-head <n>: skip specified number of initial rows
-d,--header-row-span <n>: apply header depth (rowspan) of n
-v,--verbose: verbose output

Commands:
Expand Down
9 changes: 2 additions & 7 deletions app/jq.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@
#include <string.h>

#include "jq_internal.h"

size_t fwrite1(void *restrict FILE_ptr, const void *restrict buff, size_t len) {
return fwrite(buff, len, 1, FILE_ptr);
}

#include "jq_internal.c"

#ifndef APPNAME
Expand Down Expand Up @@ -82,11 +77,11 @@ int MAIN(int argc, const char *argv[]) {
if(!err) {
void (*jqfunc)(jv, void *) = to_csv ? jv_to_csv : jv_to_json_func;
struct jv_to_json_ctx ctx;
ctx.write1 = fwrite1;
ctx.write1 = zsv_jq_fwrite1;
ctx.ctx = f_out;
ctx.flags = JV_PRINT_PRETTY | JV_PRINT_SPACE1;

void *jqctx = to_csv ? f_out : &ctx;
void *jqctx = to_csv ? (void *)f_out : (void *)&ctx;
enum zsv_jq_status jqstat;
zsv_jq_handle zjq = zsv_jq_new(jqfilter, jqfunc, jqctx, &jqstat);
if(jqstat != zsv_jq_status_ok) {
Expand Down
27 changes: 3 additions & 24 deletions app/jq_internal.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,21 +118,13 @@ static void jv_to_csv_aux(jv value, FILE *f, int inside_string) {
jv_free(value);
}

/*
void jv_to_json_w_ctx(jv value, void *ctx) {
struct jv_to_json_ctx *c = ctx;
jv_dumpf(value, ctx->out, c->flags);
size_t zsv_jq_fwrite1(void *restrict FILE_ptr, const void *restrict buff, size_t len) {
return fwrite(buff, len, 1, FILE_ptr);
}

void jv_to_json(jv value, void *file) {
FILE *f = file;
jv_dumpf(value, f, 0);
}
*/

void jv_to_json_func(jv value, void *ctx) {
struct jv_to_json_ctx *data = ctx;
if(data->write1 == fwrite1)
if(data->write1 == zsv_jq_fwrite1)
jv_dumpf(value, data->ctx, data->flags);
else {
// jv_dump_string is memory-inefficient
Expand Down Expand Up @@ -184,7 +176,6 @@ void jv_to_csv(jv value, void *file) {
jv_free(value);
}

///
static void jv_to_txt_aux(jv value, FILE *f) {
f = f ? f : stdout;
if(!jv_print_scalar(jv_copy(value), 0, f, 0)) {
Expand Down Expand Up @@ -262,8 +253,6 @@ void jv_to_lqjq(jv value, void *h) {
jv_free(jv_s);
}

///

struct zsv_jq_data {
void *jq;
struct jv_parser *parser;
Expand Down Expand Up @@ -398,7 +387,6 @@ static int zsv_jq_process(jq_state *jq,
return ret;
}


void jv_to_bool(jv value, void *char_result) {
char *c = char_result;
switch(jv_get_kind(value)) {
Expand All @@ -416,12 +404,3 @@ void jv_to_bool(jv value, void *char_result) {
}
jv_free(value);
}

static const unsigned char *strrchru(const unsigned char *s, char c) {
return (const unsigned char *)strrchr((const char *)s, c);
}

static size_t zsv_jq_parse1(void *restrict h, const void *restrict s, size_t len) {
enum zsv_jq_status stat = zsv_jq_parse(h, s, len);
return (size_t) stat;
}
4 changes: 3 additions & 1 deletion app/jq_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ enum zsv_jq_status {
zsv_jq_status_error
};

size_t zsv_jq_fwrite1(void *restrict FILE_ptr, const void *restrict buff, size_t len);

struct jv_to_json_ctx {
size_t (*write1)(void *restrict ctx, const void *restrict buff, size_t len); // e.g. common/write1
size_t (*write1)(void *restrict ctx, const void *restrict buff, size_t len); // e.g. zsv_jq_fwrite1
void *ctx; // e.g. FILE *
int flags; // passed on to jv_dumpf / jv_dump_string
};
Expand Down
63 changes: 19 additions & 44 deletions app/select.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ struct zsv_select_data {

unsigned int output_col_index; // num of cols printed in current row
size_t file_row_count;
size_t header_rows_processed;

// output columns:
const char **col_argv;
Expand Down Expand Up @@ -112,10 +111,8 @@ struct zsv_select_data {

double sample_pct;

unsigned char skip_rows;
unsigned char skip_rows_orig;
unsigned char sample_every_n;
unsigned char header_depth;

size_t data_rows_limit;
size_t skip_data_rows;

Expand Down Expand Up @@ -522,34 +519,27 @@ static void zsv_select_header_row(void *ctx) {
if(data->cancelled)
return;

if(data->skip_rows > 0)
data->skip_rows--;
else {
data->header_rows_processed++;
unsigned int cols = zsv_column_count(data->parser);
unsigned int max_header_ix = 0;
for(unsigned int i = 0; i < cols; i++) {
struct zsv_cell cell = zsv_get_cell(data->parser, i);
cell.str = zsv_select_cell_clean(data, cell.str, cell.quoted, &cell.len);
if(i < data->opts.max_columns) {
zsv_select_append_spaced_word(&data->header_names[i], cell.str, cell.len);
if(cell.len)
max_header_ix = i+1;
}
unsigned int cols = zsv_column_count(data->parser);
unsigned int max_header_ix = 0;
for(unsigned int i = 0; i < cols; i++) {
struct zsv_cell cell = zsv_get_cell(data->parser, i);
cell.str = zsv_select_cell_clean(data, cell.str, cell.quoted, &cell.len);
if(i < data->opts.max_columns) {
zsv_select_append_spaced_word(&data->header_names[i], cell.str, cell.len);
if(cell.len)
max_header_ix = i+1;
}
}

// in case we want to make this an option later
char trim_trailing_columns = 1;
if(!trim_trailing_columns)
max_header_ix = cols;
// in case we want to make this an option later
char trim_trailing_columns = 1;
if(!trim_trailing_columns)
max_header_ix = cols;

if(max_header_ix > data->header_name_count)
data->header_name_count = max_header_ix;
if(max_header_ix > data->header_name_count)
data->header_name_count = max_header_ix;

// if this was the last row in the header, finish header processing
if(data->header_rows_processed >= data->header_depth)
zsv_select_header_finish(data);
}
zsv_select_header_finish(data);
}

#define ZSV_SELECT_MAX_COLS_DEFAULT 1024
Expand Down Expand Up @@ -586,8 +576,6 @@ const char *zsv_select_usage_msg[] =
" --distinct: skip subsequent occurrences of columns with the same name",
" --merge: merge subsequent occurrences of columns with the same name, outputting first non-null value",
// --rename: like distinct, but instead of removing cols with dupe names, renames them, trying _<n> for n up to max cols
" -R, --skip-head <n>: skip specified number of rows",
" -D, --skip-data <n>: skip the specified number of data rows",
" -e <embedded lineend char>: char to replace embedded lineend. if none provided, embedded lineends are preserved",
" If the provided string begins with 0x, it will be interpreted as the hex representation of a string",
" -x <column>: exclude the indicated column. can be specified more than once",
Expand Down Expand Up @@ -658,8 +646,6 @@ int MAIN(int argc, const char *argv[]) {
data.opts = zsv_get_default_opts();

struct zsv_csv_writer_options writer_opts = zsv_writer_get_default_opts();
data.header_depth = 1;

int col_index_arg_i = 0;
const char *insert_header_row = NULL;
for(int arg_i = 1; !err && arg_i < argc; arg_i++) {
Expand Down Expand Up @@ -733,13 +719,8 @@ int MAIN(int argc, const char *argv[]) {
else if(!strcmp(argv[arg_i], "--whitespace-clean-no-newline")) {
data.clean_white = 1;
data.whitspace_clean_flags = 1;
} else if(!strcmp(argv[arg_i], "-W") || !strcmp(argv[arg_i], "--no-trim"))
} else if(!strcmp(argv[arg_i], "-W") || !strcmp(argv[arg_i], "--no-trim")) {
data.no_trim_whitespace = 1;
else if(!strcmp(argv[arg_i], "-d") || !strcmp(argv[arg_i], "--header-row-span")) {
if(!(arg_i + 1 < argc && atoi(argv[arg_i+1]) >= 0 && atoi(argv[arg_i+1]) < 256))
err = zsv_printerr(1, "%s option value invalid: should be integer between 1 and 255; got %s", argv[arg_i], arg_i + 1 < argc ? argv[arg_i+1] : "");
else
data.header_depth = (unsigned char)atoi(argv[++arg_i]);
} else if(!strcmp(argv[arg_i], "--header-row")) {
arg_i++;
if(!(arg_i < argc))
Expand Down Expand Up @@ -768,12 +749,6 @@ int MAIN(int argc, const char *argv[]) {
err = zsv_printerr(1, "%s option value invalid: should be positive integer; got %s", argv[arg_i], arg_i + 1 < argc ? argv[arg_i+1] : "");
else
data.data_rows_limit = atoi(argv[++arg_i]) + 1;
} else if(!strcmp(argv[arg_i], "-R") || !strcmp(argv[arg_i], "--skip-head")) {
++arg_i;
if(!(arg_i < argc && atoi(argv[arg_i]) >= 0 && atoi(argv[arg_i]) < 256))
err = zsv_printerr(1, "-R option value invalid: should be positive integer smaller than 256");
else
data.skip_rows = data.skip_rows_orig = atoi(argv[arg_i]);
} else if(!strcmp(argv[arg_i], "-D") || !strcmp(argv[arg_i], "--skip-data")) {
++arg_i;
if(!(arg_i < argc && atoi(argv[arg_i]) >= 0))
Expand Down
Loading

0 comments on commit 421f84a

Please sign in to comment.