Skip to content

Commit

Permalink
Merge pull request #299 from intel/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
chuckyount authored Jul 18, 2024
2 parents a3a2105 + 4d71d70 commit 312567d
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 67 deletions.
2 changes: 1 addition & 1 deletion src/common/common_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ namespace yask {
// https://semver.org/.

// Format: "major.minor.patch[-alpha|-beta]".
const string version = "4.05.03";
const string version = "4.05.04";

string yask_get_version_string() {
return version;
Expand Down
63 changes: 35 additions & 28 deletions src/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ ifeq ($(cxx_is_llvm_intel),1)
-fimf-precision=low -fp-model fast -fimf-domain-exclusion=none -fma
YK_CXXWARN2 += -Wno-unknown-pragmas -Wno-unused-variable -Wno-unused-but-set-variable \
-Wno-unused-const-variable -fno-color-diagnostics
OMPFLAG := -fiopenmp
OMPFLAG := -qopenmp
SWIG_CXXFLAGS += -Wno-deprecated-declarations
MACROS += INTEL_OMP
VEC_MACROS += NO_PRAGMA_VEC2
Expand Down Expand Up @@ -881,7 +881,6 @@ help:
echo " $(MAKE) clean; $(MAKE) -j stencil=ssg YK_CXXOPT='-O2' # Use O2 optimization"; \
echo " $(MAKE) clean; $(MAKE) -j stencil=ssg YK_CXX=icpc # Use classic Intel compiler"; \
echo " $(MAKE) clean; $(MAKE) -j stencil=ssg YK_CXX=g++ # Use gnu compiler"; \
echo " $(MAKE) clean; $(MAKE) -j stencil=ssg MPI_CXX=mpiCC # Specify MPI compiler"; \
echo " "
@echo "Example builds of kernel API for C++ and Python apps:"; \
echo " $(MAKE) clean; $(MAKE) -j stencil=iso3dfd yk-api"; \
Expand Down Expand Up @@ -910,21 +909,29 @@ help:
first_test := 0
last_test := 999

# Default regex for stencil to run.
test_regex := .

TEST_MAKE_ARGS := real_bytes=8 use_rcp=0 allow_new_var_types=0 trace=1
TEST_MAKE := $(MAKE) $(TEST_MAKE_ARGS)

# Define makefile functions for folding.
# Set default threads.
# Disable folding and checking for offload testing.
# Enable checking for CPU testing.
ifeq ($(offload),1)
FOLD =
outer_threads := 2
inner_threads := 2
else
TEST_MAKE_ARGS += check=1
FOLD = fold=$(subst $(space),$(comma),$(1))
outer_threads := 8
inner_threads := 2
TEST_MAKE_ARGS += check=1
endif

# Define makefile functions for folding.
# Disable folding for non-vectorized arch.
ifeq ($(arch),intel64)
FOLD =
else
FOLD = fold=$(subst $(space),$(comma),$(1))
endif

### Unit tests.
Expand Down Expand Up @@ -1041,7 +1048,7 @@ test_args10 := $(DEF_MPI_TEST_ARGS) -l 64 -b 24 -mb 16 -bt 2 -no-use_shm -overla
test_args11 := $(DEF_MPI_TEST_ARGS) -l 64 -b 24 -mb 16 -bt 2 -use_shm -no-overlap_comms $(EXTRA_TEST_ARGS)
endif

# Run the kernel binary using several combos of sizes and ranks.
# Run the kernel binary using the test args defined above.
yk-tests:
if (( $(first_test) <= 0 && $(last_test) >= 0 )); then $(YK_SCRIPT) $(test_args0); fi
if (( $(first_test) <= 1 && $(last_test) >= 1 )); then $(YK_SCRIPT) $(test_args1); fi
Expand All @@ -1057,15 +1064,19 @@ yk-mpi-tests:
# Run the default YASK compiler and kernel.
# First run on 1 rank, then multiple ranks if ranks>1.
# This is the primary target for building and running stencil tests.
yc-and-yk-test: $(YK_EXEC) $(YK_SCRIPT)
$(MAKE) ranks=1 yk-tests
if (( $(ranks) > 1 )); then $(MAKE) yk-tests yk-mpi-tests; fi
yc-and-yk-test: $(YK_SCRIPT)
@ echo "Running tests that match regex '$(test_regex)' numbered from $(first_test) to $(last_test)..."
if [[ $(stencil) =~ $(test_regex) ]]; then \
$(MAKE) $(YK_EXEC) && \
$(MAKE) ranks=1 yk-tests && \
if (( $(ranks) > 1 )); then $(MAKE) yk-tests yk-mpi-tests; fi; \
fi
STENCIL_TEST := $(TEST_MAKE) yc-and-yk-test

# Run the YASK kernel test without implicity using the YASK compiler.
yk-test-no-yc: kernel-no-yc $(YK_SCRIPT)
$(MAKE) ranks=1 yk-tests
if (( $(ranks) > 1 )); then $(MAKE) yk-tests yk-mpi-tests; fi
(( $(ranks) > 1 )) && $(MAKE) yk-tests yk-mpi-tests

# Run the kernel API tests for C++ and Python with and w/o expected exceptions.
api-tests:
Expand Down Expand Up @@ -1159,20 +1170,6 @@ single-stencil-tests:
4d-tests:
$(MAKE) clean; $(STENCIL_TEST) stencil=test_4d $(call FOLD,w=2 x=2)

# Selected collections from above for testing specific features.
scratch-tests:
$(MAKE) clean; $(STENCIL_TEST) stencil=test_scratch_1d $(call FOLD,x=4)
$(MAKE) clean; $(STENCIL_TEST) stencil=test_scratch_stages_1d $(call FOLD,x=4)
$(MAKE) clean; $(STENCIL_TEST) stencil=test_scratch_boundary_1d $(call FOLD,x=4)
$(MAKE) clean; $(STENCIL_TEST) stencil=test_scratch_2d $(call FOLD,x=2 y=2)
$(MAKE) clean; $(STENCIL_TEST) stencil=test_scratch_3d $(call FOLD,x=2 z=2) inner_loop_dim=x

boundary-tests:
$(MAKE) clean; $(STENCIL_TEST) stencil=test_boundary_1d $(call FOLD,x=4)
$(MAKE) clean; $(STENCIL_TEST) stencil=test_scratch_boundary_1d $(call FOLD,x=4)
$(MAKE) clean; $(STENCIL_TEST) stencil=test_boundary_2d $(call FOLD,x=2 y=2)
$(MAKE) clean; $(STENCIL_TEST) stencil=test_boundary_3d $(call FOLD,x=2 y=2) inner_loop_dim=1

# The standard set of stencils to test.
stencil-tests:
$(MAKE) 1d-tests
Expand All @@ -1184,6 +1181,16 @@ stencil-tests:
if (( $(offload) == 0 )); then $(MAKE) 3d-tests4; fi
$(MAKE) 4d-tests

# Pre-defined feature tests.
scratch-tests:
$(MAKE) stencil-tests test_regex=scratch

boundary-tests:
$(MAKE) stencil-tests test_regex=boundary

stages-tests:
$(MAKE) stencil-tests test_regex=stages

unit-tests:
$(MAKE) clean; $(MAKE) cxx-yk-omp-test
$(MAKE) clean; $(MAKE) cxx-yk-var-test stencil=test_3d $(call FOLD,x=2 y=2)
Expand All @@ -1193,9 +1200,9 @@ all-tests:
$(MAKE) api-tests
$(MAKE) stencil-tests

# Install the script.
# Install the scripts.
# Then, build and run all the tests.
all:
$(MAKE) script
$(MAKE) scripts
$(MAKE) all-tests

3 changes: 2 additions & 1 deletion src/kernel/lib/settings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,8 @@ namespace yask {
parser.add_option(make_shared<command_line_parser::bool_option>
("allow_addl_padding",
"[Advanced] Allow automatic extension of padding"
" beyond minimal vector alignment on any or all YASK vars.",
" beyond minimal vector alignment on any or all YASK vars"
" based on internal heuristics.",
_allow_addl_pad));
#ifdef USE_MPI
_add_domain_option(parser, "nr", "Num ranks", _num_ranks);
Expand Down
2 changes: 1 addition & 1 deletion src/kernel/lib/settings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ namespace yask {

// Var behavior, including allocation.
bool _step_wrap = false; // Allow invalid step indices to alias to valid ones (set via APIs only).
bool _allow_addl_pad = true; // Allow extending padding beyond what's needed for alignment.
bool _allow_addl_pad = false;
#ifdef USE_OFFLOAD
bool _bundle_allocs = false;
#else
Expand Down
5 changes: 5 additions & 0 deletions src/kernel/lib/setup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ namespace yask {
#ifdef USE_OFFLOAD
_omp_hostn = omp_get_initial_device();
_omp_devn = omp_get_default_device();

// Heuristic to assign GPU n to rank n on this node.
// Assumes shm is local to a node.
if (my_rank > 0 && omp_get_num_devices() > my_shm_rank)
_omp_devn = my_shm_rank;
#endif

#else
Expand Down
8 changes: 7 additions & 1 deletion src/kernel/lib/soln_apis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,14 @@ namespace yask {
reset_auto_tuner(actl_opts->_do_auto_tune, false);

// Report ranks.
#ifdef USE_MPI
DEBUG_MSG("\nNum MPI ranks: " << env->get_num_ranks() <<
"\nThis MPI rank index: " << env->get_rank_index());
"\nThis MPI rank index: " << env->get_rank_index() <<
"\nNum shm-group MPI ranks: " << env->num_shm_ranks <<
"\nThis shm-group MPI rank: " << env->my_shm_rank);
#else
DEBUG_MSG("\nMPI not supported in this binary");
#endif

// report threads.
{
Expand Down
11 changes: 6 additions & 5 deletions src/kernel/lib/stencil_calc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,7 @@ namespace yask {
// Full rectilinear polytope of aligned vecs.
else {
TRACE_MSG("calculating vecs within "
"normalized local indices " <<
"*normalized* local indices " <<
norm_fvidxs.make_range_str(true) <<
" via outer thread " << outer_thread_idx <<
" and inner thread " << inner_thread_idx);
Expand All @@ -712,15 +712,16 @@ namespace yask {
sb_fvidxs.make_range_str(true) <<
" via outer thread " << outer_thread_idx <<
" and inner thread " << inner_thread_idx);
#if VPTS == 1
#if VLEN == 1
THROW_YASK_EXCEPTION("(internal fault) vector border-code not expected with vec-size==1");
#else

// Normalized vector indices.
auto norm_ovidxs = normalize_indices(sb_ovidxs);

// Need to find range in each border part.
// 2D example w/4 edges and 4 corners:
// Need to find range in each border part. 2D example w/4
// edges and 4 corners:
//
// +---+------+---+
// | lx| |rx |
// | ly| ly |ly |
Expand Down Expand Up @@ -839,7 +840,7 @@ namespace yask {
if (pv_needed) {
TRACE_MSG("calculating partial vectors with mask 0x" <<
std::hex << pv_mask << std::dec << " for " << descr <<
" within normalized local indices " <<
" within *normalized* local indices " <<
pv_part.make_range_str(true) <<
" via outer thread " << outer_thread_idx <<
" and inner thread " << inner_thread_idx);
Expand Down
47 changes: 28 additions & 19 deletions src/kernel/lib/yk_var.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,10 @@ namespace yask {
// Adjust padding only for domain dims.
if (_domain_dim_mask & mbit) {

// Rounding should use soln vec lengths in case
// this var is not vectorized.
// Use soln vec len for rounding to allow reading a non-vec
// var in this dim while calculating a vec var. (The var
// vec-len is always 1 or the same as the soln vec-len in a
// given dim.)
auto svl = _corep->_soln_vec_lens[i];

// Add more padding requested by options or APIs.
Expand All @@ -265,31 +267,38 @@ namespace yask {
new_left_pads[i] = max(new_left_pads[i], _corep->_req_left_pads[i]);
new_right_pads[i] = max(new_right_pads[i], _corep->_req_right_pads[i]);

// Round left pad up to vec len.
// Round left pad up to soln vec len.
new_left_pads[i] = ROUND_UP(new_left_pads[i], svl);

// Round domain + right pad up to soln vec len by extending right pad.
// Using soln vec len to allow reading a non-vec var in this dim
// while calculating a vec var. (The var vec-len is always 1 or the same
// as the soln vec-len in a given dim.)
idx_t dprp = ROUND_UP(_corep->_domains[i] + new_right_pads[i], svl);

// Calculate pads from overall domain + right pad.
new_right_pads[i] = dprp - _corep->_domains[i];

// Add yet another vec to both sides. This allows full-vector reads;
// only writes are masked.
// Sum of rounded-up domain and rounded right pad.
idx_t rdpp = ROUND_UP(_corep->_domains[i] + new_right_pads[i], svl);

// Subtract domain size back out to get desired right pad.
new_right_pads[i] = rdpp - _corep->_domains[i];

// When vec len > 1, add extra vecs to accommodate
// mis-alignment and extra calculations
//
// Example:
// ... +-------+-+ Last full vec and partial vec domain,
// ... +-------+-+---+ so minimal halo is within 1-vec pad.
// ... +-------+-------+ But full vecs actually calc'd,
// ... +-------+-------+---+ so halo reads are needed beyond that.
// ... +-------+-------+---+---+ Rounded up for alloc.
#if VLEN > 1
new_left_pads[i] += svl;
new_right_pads[i] += svl;
#endif

// Make inner dim an odd number of vecs.
// Make inner dim an odd number of vecs when allowed.
// This reportedly helps avoid some uarch aliasing.
auto na = new_left_pads[i] + _corep->_domains[i] + new_right_pads[i];
// Only add this optional vector if not already allocated.
if (!p &&
actl_opts->_allow_addl_pad &&
get_dim_name(i) == inner_layout_dim &&
(na / svl) % 2 == 0) {
new_right_pads[i] += svl;
get_dim_name(i) == inner_layout_dim) {
auto na = new_left_pads[i] + _corep->_domains[i] + new_right_pads[i];
if ((na / svl) % 2 == 0)
new_right_pads[i] += svl;
}

// If storage is allocated, get max of existing pad & new
Expand Down
24 changes: 15 additions & 9 deletions src/kernel/yask.sh
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ while true; do
echo " Run YASK executable as an argument to <command>, e.g., 'numactl -N 0'."
echo " -mpi_cmd <command>"
echo " Run YASK executable as an argument to <command>, e.g., 'mpiexec.hydra -n 4'."
echo " If -mpi_cmd and -exe_prefix are both specified, this one is used first."
echo " If -mpi_cmd and -exe_prefix are both specified, this one is applied first."
echo " The default command is based on the number of nodes and ranks (see below)."
echo " -force_mpi"
echo " Generate a default 'mpirun' prefix even if there is only 1 rank to run."
Expand All @@ -186,7 +186,6 @@ while true; do
echo " This value, along with the number of nodes, <N>, is used to set these defaults:"
echo " - Number of MPI ranks per node to <R>/<N>."
echo " - Number of OpenMP threads per rank based on core count (for CPU kernels only)."
echo " - Default MPI command to 'mpirun -np <R> -ppn <R>/<N>'."
echo " If a different MPI command is needed, use -mpi_cmd <command> explicitly."
echo " If the env var SLURM_NTASKS is set AND if it greater than the number of nodes,"
echo " the default is its value."
Expand Down Expand Up @@ -389,15 +388,22 @@ fi
# Set MPI command default.
ppn=$(( $nranks / $nnodes ))
if [[ $nranks > 1 || $force_mpi == 1 ]]; then
: ${mpi_cmd="mpirun -np $nranks -ppn $ppn"}

# Add default Intel MPI settings.
envs+=" I_MPI_PRINT_VERSION=1 I_MPI_DEBUG=5"
if [[ $arch_offload =~ "nv" ]]; then
: ${mpi_cmd="mpirun -np $nranks --oversubscribe"}

# Add NUMA pinning if number of discovered NUMA nodes
# equals what is being used.
if [[ -n "$nnumas" && $nnumas == $ppn ]]; then
envs+=" I_MPI_PIN_DOMAIN=numa"
else
: ${mpi_cmd="mpirun -np $nranks -ppn $ppn"}

# Add default Intel MPI settings.
# These will be ignored if Intel MPI isn't used.
envs+=" I_MPI_PRINT_VERSION=1 I_MPI_DEBUG=5"

# Add NUMA pinning if number of discovered NUMA nodes
# equals what is being used.
if [[ -n "$nnumas" && $nnumas == $ppn ]]; then
envs+=" I_MPI_PIN_DOMAIN=numa"
fi
fi

# Check whether HBM policy setting is allowed.
Expand Down
9 changes: 7 additions & 2 deletions utils/bin/yask_log_to_csv.pl
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,20 @@

# Header.
YaskUtils::printCsvHeader($outFH);
print $outFH ",log file\n";
print $outFH ",date & time,log file\n";

# Values from files.
for my $arg (@ARGV) {
for my $fn (glob $arg) {
my %results;
YaskUtils::getResultsFromFile(\%results, $fn);

my $datestr = "";
if ($fn =~ /(\d{4})-(\d{2})-(\d{2})_(\d{2})-(\d{2})-(\d{2})/) {
$datestr = "$2/$3/$1 $4:$5:$6"; # format for Excel.
}

YaskUtils::printCsvValues(\%results, $outFH);
print $outFH ",\"$fn\"\n";
print $outFH ",\"$datestr\",\"$fn\"\n";
}
}

0 comments on commit 312567d

Please sign in to comment.