From 058a990e2086e639d1e11ed8b2ae81b03e4bfcac Mon Sep 17 00:00:00 2001 From: liquidaty Date: Fri, 11 Nov 2022 18:26:55 -0800 Subject: [PATCH] re-enable clang --- .github/workflows/ci.yml | 106 +++++++++++----------- app/Makefile | 25 +++--- configure | 58 +++++------- src/Makefile | 2 - src/zsv_internal.c | 187 --------------------------------------- src/zsv_scan_delim.c | 22 +---- 6 files changed, 94 insertions(+), 306 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f57c617d..0fd889be 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,7 +25,7 @@ jobs: env: TAG: "0.3.2" AMD64_LINUX_GCC: amd64-linux-gcc -# AMD64_LINUX_CLANG: amd64-linux-clang + AMD64_LINUX_CLANG: amd64-linux-clang AMD64_WINDOWS_MINGW: amd64-windows-mingw AMD64_MACOSX_GCC: amd64-macosx-gcc AMD64_FREEBSD_GCC: amd64-freebsd-gcc @@ -98,18 +98,18 @@ jobs: ./scripts/ci-create-debian-package.sh ./scripts/ci-create-rpm-package.sh - # - name: Build on Linux (${{ env.AMD64_LINUX_CLANG }}) - # if: runner.os == 'Linux' - # env: - # PREFIX: ${{ env.AMD64_LINUX_CLANG }} - # CC: clang - # MAKE: make - # RUN_TESTS: true - # shell: bash - # run: | - # ./scripts/ci-build.sh - # ./scripts/ci-create-debian-package.sh - # ./scripts/ci-create-rpm-package.sh + - name: Build on Linux (${{ env.AMD64_LINUX_CLANG }}) + if: runner.os == 'Linux' + env: + PREFIX: ${{ env.AMD64_LINUX_CLANG }} + CC: clang + MAKE: make + RUN_TESTS: true + shell: bash + run: | + ./scripts/ci-build.sh + ./scripts/ci-create-debian-package.sh + ./scripts/ci-create-rpm-package.sh - name: Build on Linux (${{ env.AMD64_WINDOWS_MINGW }}) if: runner.os == 'Linux' @@ -151,16 +151,16 @@ jobs: retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} if-no-files-found: error - # - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.zip) - # if: runner.os == 'Linux' - # uses: actions/upload-artifact@v3 - # env: - # ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.zip - # with: - # name: ${{ env.ARTIFACT_NAME }} - # path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }} - # retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} - # if-no-files-found: error + - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.zip) + if: runner.os == 'Linux' + uses: actions/upload-artifact@v3 + env: + ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.zip + with: + name: ${{ env.ARTIFACT_NAME }} + path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }} + retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} + if-no-files-found: error - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.deb) if: runner.os == 'Linux' @@ -173,16 +173,16 @@ jobs: retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} if-no-files-found: error - # - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.deb) - # if: runner.os == 'Linux' - # uses: actions/upload-artifact@v3 - # env: - # ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.deb - # with: - # name: ${{ env.ARTIFACT_NAME }} - # path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }} - # retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} - # if-no-files-found: error + - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.deb) + if: runner.os == 'Linux' + uses: actions/upload-artifact@v3 + env: + ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.deb + with: + name: ${{ env.ARTIFACT_NAME }} + path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }} + retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} + if-no-files-found: error - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.rpm) if: runner.os == 'Linux' @@ -195,16 +195,16 @@ jobs: retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} if-no-files-found: error - # - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.rpm) - # if: runner.os == 'Linux' - # uses: actions/upload-artifact@v3 - # env: - # ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.rpm - # with: - # name: ${{ env.ARTIFACT_NAME }} - # path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }} - # retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} - # if-no-files-found: error + - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.rpm) + if: runner.os == 'Linux' + uses: actions/upload-artifact@v3 + env: + ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.rpm + with: + name: ${{ env.ARTIFACT_NAME }} + path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }} + retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} + if-no-files-found: error - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_WINDOWS_MINGW }}.zip) if: runner.os == 'Linux' @@ -261,16 +261,16 @@ jobs: retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} if-no-files-found: error - # - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.tar.gz) - # if: runner.os == 'Linux' - # uses: actions/upload-artifact@v3 - # env: - # ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.tar.gz - # with: - # name: ${{ env.ARTIFACT_NAME }} - # path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }} - # retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} - # if-no-files-found: error + - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.tar.gz) + if: runner.os == 'Linux' + uses: actions/upload-artifact@v3 + env: + ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.tar.gz + with: + name: ${{ env.ARTIFACT_NAME }} + path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }} + retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} + if-no-files-found: error - name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_WINDOWS_MINGW }}.tar.gz) if: runner.os == 'Linux' diff --git a/app/Makefile b/app/Makefile index 752f646a..de2259f7 100755 --- a/app/Makefile +++ b/app/Makefile @@ -115,7 +115,6 @@ THIS_LIB_BASE=$(shell cd .. && pwd) INCLUDE_DIR=${THIS_LIB_BASE}/include BUILD_DIR=${THIS_LIB_BASE}/build/${BUILD_SUBDIR}/${CCBN} UTILS1=writer file err signal mem clock arg dl string dirs prop cache jq -# LDFLAGS= ZSV_EXTRAS ?= ifneq ($(WIN),0) @@ -201,14 +200,17 @@ YAJL_HELPER_INCLUDE=-I${THIS_MAKEFILE_DIR}/external/yajl_helper JQ_TARBALL=${THIS_MAKEFILE_DIR}/external/jq-1.6.tar.bz2 JQ_SRC=${BUILD_DIR}/external/jq-src -JQ_PREFIX= +JQ_PREFIX ?= ifeq ($(JQ_PREFIX),) JQ_PREFIX=${BUILD_DIR}-external/jq-build + JQ_INCLUDE_DIR=${JQ_PREFIX}/include + JQ_BUNDLE_LIB=${JQ_PREFIX}/lib/libjq.a + JQ_LIB=${JQ_BUNDLE_LIB} +else + JQ_INCLUDE_DIR=${JQ_PREFIX}/include + JQ_LIB= endif -JQ_INCLUDE_DIR=${JQ_PREFIX}/include -JQ_LIB=${JQ_PREFIX}/lib/libjq.a - ## json writer JSONWRITER_SRC=${THIS_MAKEFILE_DIR}/external/json_writer-1.0 JSONWRITER_INCLUDE=-I${THIS_MAKEFILE_DIR}/external/json_writer-1.0 @@ -286,7 +288,7 @@ ${CLEANS}: clean-%: .SUFFIXES: .SUFFIXES: .o .c .a -${BUILD_DIR}/objs/utils/%.o : utils/%.c ${INCLUDE_DIR}/zsv/utils/%.h ${JQ_INCLUDE_DIR} +${BUILD_DIR}/objs/utils/%.o : utils/%.c ${INCLUDE_DIR}/zsv/utils/%.h ${JQ_LIB} @mkdir -p `dirname "$@"` ${CC} ${CFLAGS} -I${INCLUDE_DIR} -I${UTF8PROC_INCLUDE} -DINCLUDE_SRC -o $@ -c utils/$*.c ${MORE_SOURCE} @@ -302,7 +304,7 @@ ${INIH_OBJECT}: ${INIH_SRC}/ini.c @mkdir -p `dirname "$@"` ${CC} ${CFLAGS} -I${INIH_INCLUDE} -DINI_HANDLER_LINENO=1 -DINI_CALL_HANDLER_ON_NEW_SECTION=1 -c $< -o $@ -${CLI_APP_OBJECT} : cli_ini.c builtin/*.c ${JQ_INCLUDE_DIR} +${CLI_APP_OBJECT} : cli_ini.c builtin/*.c ${JQ_LIB} ${CLI_APP_OBJECT} ${CLI_OBJECTS}: ${CLI_OBJ_PFX}%.o: %.c ${UTF8PROC_SRC}/utf8proc.c # ${MORE_OBJECTS} @mkdir -p `dirname "$@"` ${CC} ${CFLAGS} -DVERSION=\"${VERSION}\" -DZSV_CLI ${CLI_INCLUDE} -I${THIS_MAKEFILE_DIR}/external/sglib -I${INCLUDE_DIR} -c $< -o $@ ${MORE_SOURCE} @@ -312,8 +314,6 @@ ${CLI}: cli_internal.c.in cli_internal.h cli_internal.h.in ${CLI_APP_OBJECT} ${C ${CC} ${CFLAGS} ${CFLAGS_EXE} -I${INCLUDE_DIR} -o $@ ${CLI_APP_OBJECT} ${CLI_OBJECTS} ${OBJECTS} ${UTF8PROC_OBJECT} ${INIH_OBJECT} -L${LIBDIR} ${LIBZSV_L} ${LDFLAGS} ${LDFLAGS_OPT} ${MORE_OBJECTS} ${MORE_SOURCE} ${MORE_LIBS} ${STATIC_LIB_FLAGS} @echo Built $@ -# cli_internal: cli_internal.c.in cli_internal.h cli_internal.h.in - cli_internal.h.in: ${THIS_MAKEFILE_DIR}/../include/zsv/ext/implementation_private.h cat $< | perl -ne 'print if /ZSV_EXT_EXPORT/ .. /;/' | sed 's/ZSV_EXT_EXPORT *//g' | sed $$'s/ *;.*/;\\\n/' | sed 's/(/)(/' | sed 's/zsv_ext_\([a-z]*\))/(*\1)/' | grep -v '^$$' > $@ @@ -322,7 +322,7 @@ cli_internal.c.in: cli_internal.h.in ${SQLITE_EXT}: ${SQLITE_SRC} -${JQ_INCLUDE_DIR}: ${JQ_LIB} +# ${JQ_INCLUDE_DIR}: ${JQ_LIB} ${JQ_SRC}: ${JQ_TARBALL} @rm -rf $@-tmp @@ -337,12 +337,13 @@ ${JQ_SRC}: ${JQ_TARBALL} lib-jq: ${JQ_LIB} @echo "Using jq library ${JQ_LIB}" -${JQ_LIB}: ${JQ_SRC} ${JQ_INCLUDE_DIR} +${JQ_BUNDLE_LIB}: ${JQ_SRC} # -D_REENTRANT needed for clang to not break cd ${JQ_SRC} \ - && CC="${CC}" CFLAGS="${CFLAGS}" ./configure \ + && CC="${CC}" CFLAGS="${CFLAGS} -D_REENTRANT" ./configure \ --prefix="${JQ_PREFIX}" \ --disable-maintainer-mode \ --without-oniguruma \ + --no-recursion \ --disable-docs \ --disable-shared \ --enable-static \ diff --git a/configure b/configure index f4876b9a..045f366d 100755 --- a/configure +++ b/configure @@ -300,10 +300,16 @@ for arg ; do case "$arg" in --help|-h) usage ;; --host=*) host=${arg#*=} ;; + --arch=*) ARCH=${arg#*=} ;; ARCH=*) ARCH=${arg#*=} ;; + --config-file=*) CONFIGFILE=${arg#*=} ;; CONFIGFILE=*) CONFIGFILE=${arg#*=} ;; + + --jq-prefix=*) JQ_PREFIX=${arg#*=} ;; + JQ_PREFIX=*) JQ_PREFIX=${arg#*=} ;; + --prefix=*) ;; # already handled --exec-prefix=*) ;; # already handled EXEC_PREFIX=${arg#*=} ;; --docdir=*) DOCDIR=${arg#*=} ;; @@ -410,8 +416,8 @@ CCSEARCHPATH=$(echo | ${CC} -E -Wp,-v - 2>&1 | ${AWK} '/ \//{print substr($0,2); # Check if it is clang, and the llvm tools instead compiler=$(${CC} -v 2>&1 | ${AWK} '/ +version +/{for(i=1;i<=NF;i++){if($i=="version"){printf("%s\n",(last=="LLVM")?"clang":last);exit 0;}last=$i;}}') if test "$compiler" = "clang"; then - arlist="$CC-llvm-ar $host-llvm-ar $CC-ar $host-ar llvm-ar `ls /usr/bin/llvm-ar*` ar" - ranliblist="$CC-llvm-ranlib $host-llvm-ranlib $CC-ranlib $host-ranlib llvm-ranlib `ls /usr/bin/llvm-ranlib*` ranlib" + arlist="$CC-llvm-ar $host-llvm-ar $CC-ar $host-ar llvm-ar `ls /usr/bin/llvm-ar* 2>/dev/null` ar" + ranliblist="$CC-llvm-ranlib $host-llvm-ranlib $CC-ranlib $host-ranlib llvm-ranlib `ls /usr/bin/llvm-ranlib* 2>/dev/null` ranlib" else arlist="$CC-ar $host-$compiler-ar $host-ar $compiler-ar ar" ranliblist="$CC-ranlib $host-$compiler-ranlib $host-ranlib $compiler-ranlib $compiler-ranlib ranlib" @@ -504,14 +510,6 @@ test "$usepic" = "no" && tryflag CFLAGS_PIC -fno-pic test "$usepie" = "no" && tryldflag LDFLAGS_PIE -no-pie test "$usepic" = "no" && trysharedldflag LDFLAGS_PIC -fno-pic -#if test $MINGW -eq 0 ; then -#LDFLAGS_STD="-lc" -#tryflag CFLAGS_AUTO -fstack-protector-all -#case "$CFLAGS_AUTO" in -#*-fstack-protector*) CFLAGS_AUTO="-D_FORTIFY_SOURCE=2"; ;; -#esac -#fi - tryflag CFLAGS -pipe # Try flags to optimize speed @@ -594,17 +592,6 @@ if [ "$TRY_AVX512" = "yes" ]; then fi fi -#if [ "$HAVE_AVX256" != "1" ]; then -# tryccfn CFLAGS_AVX_256 "_mm256_movemask_epi8" "immintrin.h" && trycchdr CFLAGS_AVX_256 "immintrin.h" && ( tryccfn CFLAGS_AVX_256 "_blsr_u32" "immintrin.h" || tryccfn CFLAGS_AVX_256 "__blsr_u32" "immintrin.h" ) && HAVE_AVX256=1 -# fi - -#if [ "$HAVE_AVX256" = "1" ] && [ "$CFLAGS_AVX" != "-mavx2" ] ; then -# echo "AVX256 requested, but -mavx2 compiler flag not supported" -# exit 1 -#elif [ "$HAVE_AVX256" != "1" ]; then -# CFLAGS_AVX= -#fi - tryccfn CFLAGS_AUTO "memmem" "string.h" if [ "$usetermcap" = "yes" ] || [ "$usetermcap" = "auto" ] ; then @@ -615,12 +602,16 @@ if [ "$usetermcap" = "yes" ] || [ "$usetermcap" = "auto" ] ; then fi fi -# if [ "$usejq" = "yes" ] || [ "$usejq" = "auto" ] ; then -# tryldflag LDFLAGS_JQ -ljq -L${PREFIX}/lib || \ -# if test "$usejq" = "yes"; then -# echo "Error: --enable-jq specified, but not found" -# exit 1 -# fi +if [ "$JQ_PREFIX" != "" ] && [ "$ARCH" = "native" ]; then + echo "checking --prefix-jq ${JQ_PREFIX}" + if ! tryldflag LDFLAGS_JQ -ljq -L${JQ_PREFIX}/lib ; then + echo "Error: Failed to compile with -ljq and -L${JQ_PREFIX}/lib" + exit 1 + else + LDFLAGS_JQ="$LDFLAGS_JQ -L$JQ_PREFIX/lib" + fi +fi + tryldflag LDFLAGS_JQ -lm tryldflag LDFLAGS_JQ -lshlwapi if [ "$MINGW" = "1" ]; then @@ -628,7 +619,6 @@ if [ "$MINGW" = "1" ]; then else tryldflag LDFLAGS_JQ -pthread fi -# fi tryccfn CFLAGS_AUTO "arc4random_uniform" "stdlib.h" || tryccfn CFLAGS_AUTO "rand_s" "stdlib.h" "" "#define _CRT_RAND_S" tryccfn1 CFLAGS_AUTO "__builtin_expect" "0,0" @@ -678,6 +668,7 @@ LDFLAGS_STD = $LDFLAGS_STD CFLAGS_OPT = $CFLAGS_OPT LDFLAGS_OPT = $LDFLAGS_OPT LDFLAGS_TERMCAP = $LDFLAGS_TERMCAP +JQ_PREFIX = $JQ_PREFIX LDFLAGS_JQ = $LDFLAGS_JQ STATIC_LIBS = $STATIC_LIBS CFLAGS_AUTO = $CFLAGS_AUTO @@ -715,16 +706,15 @@ echo "" echo "****************************************************************" echo "* zsv configuration *" echo "****************************************************************" -# if [ "$LDFLAGS_JQ" = "" ]; then -# echo "* - libjq (-ljq): no. \`jq\` command will be disabled *" -# else -# echo "* - libjq: yes *" -# fi + +if [ "$JQ_PREFIX" != "" ]; then + echo "* - jq-prefix: $JQ_PREFIX *" +fi if [ "$LDFLAGS_TERMCAP" = "" ]; then echo "* - termcap: no. \`pretty\` will use default width assumption *" else - echo "* - termcap: yes *" + echo "* - termcap: yes *" fi if [ "$HAVE_AVX512" = "1" ]; then diff --git a/src/Makefile b/src/Makefile index 5a71ddbe..7b884176 100644 --- a/src/Makefile +++ b/src/Makefile @@ -111,8 +111,6 @@ ifeq ($(NO_UTF8_CHECK),1) endif -# CFLAGS+= -Dfwrite=zsv_fwrite_clock -Dfflush=zsv_fflush_clock -Dfread=zsv_fread_clock - help: @echo "${MAKE} [CONFIGFILE=config.mk] [NO_UTF8_CHECK=1] [VERBOSE=1] [LIBDIR=${LIBDIR}] [INCLUDEDIR=${INCLUDEDIR}] [LIB_SUFFIX=]" diff --git a/src/zsv_internal.c b/src/zsv_internal.c index 4a0e30bf..b8689056 100644 --- a/src/zsv_internal.c +++ b/src/zsv_internal.c @@ -71,12 +71,6 @@ struct collate_header { }; struct zsv_scan_delim_regs { - struct { - zsv_uc_vector dl; - zsv_uc_vector nl; - zsv_uc_vector cr; - zsv_uc_vector qt; - } v; size_t i; size_t bytes_chunk_end; size_t bytes_read; @@ -409,187 +403,6 @@ static inline zsv_mask_t movemask_pseudo(zsv_uc_vector v) { # include "vector_delim.c" -/* -static enum zsv_status zsv_scan_delim(struct zsv_scanner *scanner, - unsigned char *buff, - size_t bytes_read - ) { - struct { - zsv_uc_vector dl; - zsv_uc_vector nl; - zsv_uc_vector cr; - zsv_uc_vector qt; - } v; - - size_t i; - size_t bytes_chunk_end; - char delimiter; - unsigned char c; - char skip_next_delim; - int quote; - size_t mask_total_offset; - zsv_mask_t mask; - int mask_last_start; - - bytes_read += scanner->partial_row_length; - i = scanner->partial_row_length; - skip_next_delim = 0; - bytes_chunk_end = bytes_read >= sizeof(zsv_uc_vector) ? bytes_read - sizeof(zsv_uc_vector) + 1 : 0; - delimiter = scanner->opts.delimiter; - - scanner->partial_row_length = 0; - - quote = scanner->opts.no_quotes > 0 ? -1 : '"'; // ascii code 34 - memset(&v.dl, delimiter, sizeof(zsv_uc_vector)); // ascii 44 - memset(&v.nl, '\n', sizeof(zsv_uc_vector)); // ascii code 10 - memset(&v.cr, '\r', sizeof(zsv_uc_vector)); // ascii code 13 - memset(&v.qt, scanner->opts.no_quotes > 0 ? 0 : '"', sizeof(v.qt)); - - // case "hel"|"o": check if we have an embedded dbl-quote past the initial opening quote, which was - // split between the last buffer and this one e.g. "hel""o" where the last buffer ended - // with "hel" and this one starts with "o" - if((scanner->quoted & ZSV_PARSER_QUOTE_UNCLOSED) - && i > scanner->cell_start + 1 // case "|hello": need the + 1 in case split after first char of quoted value e.g. "hello" => " and hello" - && scanner->last == quote) { - if(buff[i] != quote) { - scanner->quoted |= ZSV_PARSER_QUOTE_CLOSED; - scanner->quoted -= ZSV_PARSER_QUOTE_UNCLOSED; - scanner->quote_close_position = i - scanner->cell_start - 1; - } else { - scanner->quoted |= ZSV_PARSER_QUOTE_NEEDED; - scanner->quoted |= ZSV_PARSER_QUOTE_EMBEDDED; - i++; - } - } - -#define scanner_last (i ? buff[i-1] : scanner->last) - - mask_total_offset = 0; - mask = 0; - scanner->buffer_end = bytes_read; - for(; i < bytes_read; i++) { - if(UNLIKELY(mask == 0)) { - mask_last_start = i; - if(VERY_LIKELY(i < bytes_chunk_end)) { - // keep going until we get a delim or we are at the eof - mask_total_offset = vec_delims(buff + i, - bytes_read - i, - &v.dl, - &v.nl, - &v.cr, - &v.qt, - &mask); - if(LIKELY(mask_total_offset != 0)) { - i += mask_total_offset; - if(VERY_UNLIKELY(mask == 0 && i == bytes_read)) - break; // vector processing ended on exactly our buffer end - } - } else if(skip_next_delim) { - skip_next_delim = 0; - continue; - } - } - if(VERY_LIKELY(mask)) { - size_t next_offset = NEXT_BIT(mask); - i = mask_last_start + next_offset - 1; - mask = clear_lowest_bit(mask); - if(VERY_UNLIKELY(skip_next_delim)) { - skip_next_delim = 0; - continue; - } - } - - // to do: consolidate csv and tsv/scanner->delimiter parsers - c = buff[i]; - if(LIKELY(c == delimiter)) { // case ',': - if((scanner->quoted & ZSV_PARSER_QUOTE_UNCLOSED) == 0) { - scanner->scanned_length = i; - cell_dl(scanner, buff + scanner->cell_start, i - scanner->cell_start); - scanner->cell_start = i + 1; - c = 0; - continue; // this char is not part of the cell content - } else - // we are inside an open quote, which is needed to escape this char - scanner->quoted |= ZSV_PARSER_QUOTE_NEEDED; - } else if(UNLIKELY(c == '\r')) { - if((scanner->quoted & ZSV_PARSER_QUOTE_UNCLOSED) == 0) { - scanner->scanned_length = i; - enum zsv_status stat = cell_and_row_dl(scanner, buff + scanner->cell_start, i - scanner->cell_start); - if(VERY_UNLIKELY(stat)) - return stat; - - scanner->cell_start = i + 1; - scanner->row_start = i + 1; - continue; // this char is not part of the cell content - } else - // we are inside an open quote, which is needed to escape this char - scanner->quoted |= ZSV_PARSER_QUOTE_NEEDED; - } else if(UNLIKELY(c == '\n')) { - if((scanner->quoted & ZSV_PARSER_QUOTE_UNCLOSED) == 0) { - if(scanner_last == '\r') { // ignore; we are outside a cell and last char was rowend - scanner->cell_start = i + 1; - scanner->row_start = i + 1; - } else { - // this is a row end - scanner->scanned_length = i; - enum zsv_status stat = cell_and_row_dl(scanner, buff + scanner->cell_start, i - scanner->cell_start); - if(VERY_UNLIKELY(stat)) - return stat; - scanner->cell_start = i + 1; - scanner->row_start = i + 1; - } - continue; // this char is not part of the cell content - } else - // we are inside an open quote, which is needed to escape this char - scanner->quoted |= ZSV_PARSER_QUOTE_NEEDED; - } else if(LIKELY(c == quote)) { - if(i == scanner->cell_start) { - scanner->quoted = ZSV_PARSER_QUOTE_UNCLOSED; - scanner->quote_close_position = 0; - c = 0; - } else if(scanner->quoted & ZSV_PARSER_QUOTE_UNCLOSED) { - // the cell started with a quote that is not yet closed - if(VERY_LIKELY(i + 1 < bytes_read)) { - if(LIKELY(buff[i+1] != quote)) { - // buff[i] is the closing quote (not an escaped quote) - scanner->quoted |= ZSV_PARSER_QUOTE_CLOSED; - scanner->quoted -= ZSV_PARSER_QUOTE_UNCLOSED; - - // keep track of closing quote position to handle the edge case - // where content follows the closing quote e.g. cell content is: - // "this-cell"-did-not-need-quotes - if(LIKELY(scanner->quote_close_position == 0)) - scanner->quote_close_position = i - scanner->cell_start; - } else { - // next char is also '"' - // e.g. cell content is: "this "" is a dbl quote" - // cursor is here => ^ - // include in cell content and don't further process - scanner->quoted |= ZSV_PARSER_QUOTE_NEEDED; - scanner->quoted |= ZSV_PARSER_QUOTE_EMBEDDED; - skip_next_delim = 1; - } - } - } else { - // cell_length > 0 and cell did not start w quote, so - // we have a quote in middle of an unquoted cell - // process as a normal char - scanner->quoted |= ZSV_PARSER_QUOTE_EMBEDDED; - scanner->quote_close_position = 0; - } - } - } - scanner->scanned_length = i; - - // save bytes_read-- we will need to shift any remaining partial row - // before we read next from our input. however, we intentionally refrain - // from doing this until the next parse_more() call, so that the entirety - // of all rows parsed thus far are still available until that next call - scanner->old_bytes_read = bytes_read; - return zsv_status_ok; -} -*/ - #ifdef ZSV_SUPPORT_PULL_PARSER #undef ZSV_SUPPORT_PULL_PARSER #endif diff --git a/src/zsv_scan_delim.c b/src/zsv_scan_delim.c index 8c798bbf..caef8a02 100644 --- a/src/zsv_scan_delim.c +++ b/src/zsv_scan_delim.c @@ -15,17 +15,8 @@ zsv_internal_save_reg(mask_total_offset); \ zsv_internal_save_reg(mask); \ zsv_internal_save_reg(mask_last_start); \ - memcpy(&scanner->pull.regs->delim.v, &v, \ - sizeof(v)); \ } while(0) -/* - zsv_internal_save_reg(v.dl); \ - zsv_internal_save_reg(v.nl); \ - zsv_internal_save_reg(v.cr); \ - zsv_internal_save_reg(v.qt); \ -*/ - #define zsv_internal_restore_reg(x) x = scanner->pull.regs->delim.x #define zsv_internal_restore_regs() do { \ buff = scanner->pull.buff; \ @@ -40,16 +31,11 @@ zsv_internal_restore_reg(mask_total_offset); \ zsv_internal_restore_reg(mask); \ zsv_internal_restore_reg(mask_last_start); \ - memcpy(&v, &scanner->pull.regs->delim.v, \ - sizeof(v)); \ + memset(&v.dl, scanner->opts.delimiter, sizeof(zsv_uc_vector)); \ + memset(&v.nl, '\n', sizeof(zsv_uc_vector)); \ + memset(&v.cr, '\r', sizeof(zsv_uc_vector)); \ + memset(&v.qt, scanner->opts.no_quotes > 0 ? 0 : '"', sizeof(v.qt)); \ } while(0) -/* - - zsv_internal_restore_reg(v.dl); \ - zsv_internal_restore_reg(v.nl); \ - zsv_internal_restore_reg(v.cr); \ - zsv_internal_restore_reg(v.qt); \ -*/ #endif static enum zsv_status ZSV_SCAN_DELIM(struct zsv_scanner *scanner,