Skip to content

Commit

Permalink
CPU features detection fix
Browse files Browse the repository at this point in the history
  • Loading branch information
ilyakurdyukov authored May 17, 2020
1 parent 02c3e54 commit 57cfb77
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 13 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ Without multithreading and SIMD optimizations it runs slower than native code.
`-t, --threads n` Set the number of CPU threads to use
`-o, --optimize` Option for libjpeg to produce smaller output file
`-v, --verbose n` Print libjpeg debug output
`-i, --info n` Print quantsmooth debug output: 0 - silent, 8 - processing time, 15 - all (default)
`-i, --info n` Print quantsmooth debug output (default is 15)
Use the sum of flags: 0 - silent, 1/2/4 - various information, 8 - processing time, 16 - SIMD type.
`-p, --cpu n` Use to lower the SIMD type if CPU detection fails:
0 - auto, 1 - scalar, 2 - SSE2, 3 - AVX2, 4 - AVX512.
(`x86` build selects between modes 1-3, `x86_64` from 2-4)

- The processing time includes only the smoothing algorithm, jpeg reading and writing time is not included.
- More iterations can make the result look like CG art, can make the photos look unnatural.
Expand Down
6 changes: 4 additions & 2 deletions libjpegqs.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,19 @@ JPEGQS_ATTR int do_quantsmooth QS_ARGS {
get_cpuid(0, 0, cpuid); m = cpuid[0];
if (m < 1) break;
get_cpuid(1, 0, cpuid);
if (!(cpuid[3] & (1 << 25))) break; // SSE
if (!(cpuid[3] & (1 << 26))) break; // SSE2
type = 2;
// VirtualBox clears FMA, even if AVX2 is set
// if (!(cpuid[2] & (1 << 12))) break; // FMA
if (!(cpuid[2] & (1 << 27))) break; // OSXSAVE
if (!(cpuid[2] & (1 << 28))) break; // AVX
xcr0 = ~xgetbv(0);
if (m < 7) break;
get_cpuid(7, 0, cpuid);
if (!(cpuid[1] & (1 << 5)) && xcr0 & 6) break; // AVX2
if (!(cpuid[1] & (1 << 5)) || xcr0 & 6) break; // AVX2
type = 3;
if (!(cpuid[1] & (1 << 16)) && xcr0 & 0xe6) break; // AVX512F
if (!(cpuid[1] & (1 << 16)) || xcr0 & 0xe6) break; // AVX512F
type = 4;
} while (0);

Expand Down
2 changes: 1 addition & 1 deletion libjpegqs.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ enum {
#define JPEGQS_ATTR
#endif

#define JPEGQS_VERSION "2020-03-23"
#define JPEGQS_VERSION "2020-05-17"
#define JPEGQS_COPYRIGHT "Copyright (C) 2020 Ilya Kurdyukov"

typedef struct {
Expand Down
9 changes: 7 additions & 2 deletions quantsmooth.c
Original file line number Diff line number Diff line change
Expand Up @@ -436,8 +436,13 @@ int main(int argc, char **argv) {
" -t, --threads n Set the number of CPU threads to use\n"
" -o, --optimize Option for libjpeg to produce smaller output file\n"
" -v, --verbose n Print libjpeg debug output\n"
" -i, --info n Print quantsmooth debug output:\n"
" 0 - silent, 8 - processing time, 15 - all (default)\n"
" -i, --info n Print quantsmooth debug output (default is 15)\n"
" Use the sum of flags: 0 - silent,\n"
" 1/2/4 - various information,\n"
" 8 - processing time, 16 - SIMD type.\n"
" -p, --cpu n Use to lower the SIMD type if CPU detection fails:\n"
" 0 - auto, 1 - scalar, 2 - SSE2, 3 - AVX2, 4 - AVX512.\n"
" (x86 build selects between modes 1-3, x86_64 from 2-4)\n"
"\n", progname);
return 1;
}
Expand Down
7 changes: 0 additions & 7 deletions quantsmooth.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,6 @@
#define omp_get_thread_num() 0
#endif

#if defined(__clang__) && defined(_OPENMP)
// Clang (9.0.0) shows this warning about OpenMP loop counter variables,
// but there is no warnings if OpenMP is disabled,
// so it look like a bug in the compiler.
#pragma GCC diagnostic ignored "-Wuninitialized"
#endif

#if !defined(TRANSCODE_ONLY) && !defined(JPEG_INTERNALS)
// declarations needed from jpegint.h

Expand Down

0 comments on commit 57cfb77

Please sign in to comment.