Merge pull request #157 from intel/develop

Develop
intel · Sep 10, 2018 · 1384d01 · 1384d01
2 parents 7a5ffd7 + 86cd38f
commit 1384d01
Show file tree

Hide file tree

Showing 31 changed files with 1,718 additions and 641 deletions.
diff --git a/Makefile b/Makefile
@@ -77,7 +77,7 @@ include $(YASK_BASE)/src/common/common.mk
 # - vars starting with 'YK_' apply to the YASK stencil kernel.
 # - vars starting with 'YC_' apply to the YASK stencil compiler.
 
-YASK_MFLAGS	:=	--max-load 5
+YASK_MFLAGS	:=	--max-load 16
 #YASK_MFLAGS	+=	--output-sync --output-sync=line
 YK_MAKE		:=	$(MAKE) $(YASK_MFLAGS) -C src/kernel YASK_OUTPUT_DIR=$(YASK_OUT_BASE)
 YC_MAKE		:=	$(MAKE) $(YASK_MFLAGS) -C src/compiler YASK_OUTPUT_DIR=$(YASK_OUT_BASE)

diff --git a/README.md b/README.md
@@ -1,4 +1,5 @@
 #### Backward-compatibility notices:
+* Version 2.13.02 required some changes in perf statistics due to step (temporal) conditions. Both text output and `yk_stats` APIs affected.
 * Version 2.12.00 removed the long-deprecated '==' operator for asserting equality between a grid point and an equation. Use 'EQUALS' instead.
 * Version 2.11.01 changed the plain-text format of some of the performance data in the test-utility output. Specifically, some leading spaces were added, SI multipliers for values < 1 were added, and the phrase "time in" no longer appears before each time breakdown. This may affect some user programs that parse the output to collect stats.
 * Version 2.10.00 changed the location of temporary files created during the build process. This will not affect most users, although you may need to manually remove old `src/compiler/gen` and `src/kernel/gen` directories.

diff --git a/docs/YASK-intro.pdf b/docs/YASK-intro.pdf
diff --git a/include/yc_node_api.hpp b/include/yc_node_api.hpp
@@ -81,6 +81,10 @@ namespace yask {
     /// Shared pointer to \ref yc_divide_node
     typedef std::shared_ptr<yc_divide_node> yc_divide_node_ptr;
 
+    class yc_mod_node;
+    /// Shared pointer to \ref yc_mod_node
+    typedef std::shared_ptr<yc_mod_node> yc_mod_node_ptr;
+
     class yc_not_node;
     /// Shared pointer to \ref yc_not_node
     typedef std::shared_ptr<yc_not_node> yc_not_node_ptr;
@@ -173,17 +177,22 @@ namespace yask {
             the yc_solution that contains the grid that is on the
             LHS.
 
-            An optional condition may be provided to define the sub-domain
+            An optional domain condition may be provided to define the sub-domain
             to which this equation applies. See new_first_domain_index()
             for more information and an example.
             Conditions are always evaluated with respect to the overall
             problem domain, i.e., independent of any specific
             MPI domain decomposition that might occur at run-time.
             If a condition is not provided, the equation applies to the
             entire problem domain.
-            A condition can be added to an equation after its creation
+            A domain condition can be added to an equation after its creation
             via yc_equation_node.set_cond().
 
+            A step-index condition is similar to a domain condition, but
+            applies to the current step (usually time).
+            A step-index condition can be added to an equation after its creation
+            via yc_equation_node.set_step_cond().
+
             @returns Pointer to new \ref yc_equation_node object.
         */
         virtual yc_equation_node_ptr
@@ -265,6 +274,17 @@ namespace yask {
         new_divide_node(yc_number_node_ptr lhs /**< [in] Expression before `/` sign. */,
                         yc_number_node_ptr rhs /**< [in] Expression after `/` sign. */ );
 
+        /// Create a modulo node.
+        /**
+            New modulo nodes can also be created via the overloaded `%` operator.
+            The modulo operator converts both operands to integers before performing
+            the operation.
+           @returns Pointer to new \ref yc_mod_node object.
+        */
+        virtual yc_number_node_ptr
+        new_mod_node(yc_number_node_ptr lhs /**< [in] Expression before `%` sign. */,
+                     yc_number_node_ptr rhs /**< [in] Expression after `%` sign. */ );
+
         /// Create a symbol for the first index value in a given dimension.
         /**
            Create an expression that indicates the first value in the overall problem
@@ -452,6 +472,11 @@ namespace yask {
                               /**< [in] Boolean expression describing the sub-domain
                                  or `nullptr` to remove the condition. */ ) =0;
 
+        /// Set the condition describing the valid step indices.
+        virtual void set_step_cond(yc_bool_node_ptr step_cond
+                                   /**< [in] Boolean expression describing a valid step
+                                      or `nullptr` to remove the condition. */ ) =0;
+
         /// Create a deep copy of AST starting with this node.
         virtual yc_equation_node_ptr clone_ast() const =0;
     };
@@ -594,6 +619,10 @@ namespace yask {
     /** Created via yc_node_factory::new_divide_node(). */
     class yc_divide_node : public virtual yc_binary_number_node { };
 
+    /// A modulo node.
+    /** Created via yc_node_factory::new_mod_node(). */
+    class yc_mod_node : public virtual yc_binary_number_node { };
+
     /// A boolean inversion operator.
     /** Example: used to implement `!(a || b)`.
         Created via yc_node_factory::new_not_node().
@@ -709,6 +738,13 @@ namespace yask {
     yc_number_node_ptr operator/(yc_number_node_ptr lhs, double rhs);
     //@}
 
+    //@{
+    /// Operator version of yc_node_factory::new_mod_node().
+    yc_number_node_ptr operator%(yc_number_node_ptr lhs, yc_number_node_ptr rhs);
+    yc_number_node_ptr operator%(double lhs, yc_number_node_ptr rhs);
+    yc_number_node_ptr operator%(yc_number_node_ptr lhs, double rhs);
+    //@}
+
     //@{
     /// Operator version of yc_node_factory::new_multiplication_node().
     yc_number_node_ptr operator*(yc_number_node_ptr lhs, yc_number_node_ptr rhs);

diff --git a/include/yk_solution_api.hpp b/include/yk_solution_api.hpp
@@ -189,7 +189,8 @@ namespace yask {
         virtual void
         set_block_size(const std::string& dim
                        /**< [in] Name of dimension to set.  Must be one of
-                          the names from get_domain_dim_names(). */,
+                          the names from get_step_dim_name() or
+                          get_domain_dim_names(). */,
                        idx_t size
                        /**< [in] Elements in a block in this `dim`. */ ) =0;
 
@@ -202,7 +203,8 @@ namespace yask {
         virtual idx_t
         get_block_size(const std::string& dim
                         /**< [in] Name of dimension to get.  Must be one of
-                           the names from get_domain_dim_names(). */) const =0;
+                           the names from get_step_dim_name() or
+                           get_domain_dim_names(). */) const =0;
 
         /// Set the number of MPI ranks in the given dimension.
         /**
@@ -445,11 +447,10 @@ namespace yask {
         virtual void
         end_solution() =0;
 
-
         /// Get performance statistics associated with preceding calls to run_solution().
         /**
-           Side effect: resets all statistics, so a subsequent call will
-           measure performance after the current call.
+           @note Side effect: resets all statistics, so each call
+           returns only the elapsed time and counts since the previous call.
            @returns Pointer to statistics object.
         */
         virtual yk_stats_ptr
@@ -824,43 +825,47 @@ namespace yask {
 
         /// Get the number of elements in the overall domain.
         /**
-           @returns Product of all the overal domain sizes across all domain dimensions.
+           @returns Product across all domain dimensions of the domain sizes across all ranks.
+           Multiply this value by get_num_steps_done() to determine the number
+           of points processed.
+           Then, divide by get_elapsed_run_secs() to determine the throughput.
         */
         virtual idx_t
         get_num_elements() =0;
 
-        /// Get the number of elements written in each step.
+        /// Get the number of steps executed via run_solution().
         /**
-           @returns Number of elements written to each output grid.
-           This is the same value as get_num_elements() if there is only one output grid.
+           @returns A positive number, regardless of whether run_solution() steps were executed
+           forward or backward.
         */
         virtual idx_t
-        get_num_writes() =0;
+        get_num_steps_done() =0;
 
-        /// Get the estimated number of floating-point operations required for each step.
+        /// Get the number of elements written across all steps.
         /**
-           @returns Number of FP ops created by the stencil compiler.
-           It may be slightly more or less than the actual number of FP ops executed
-           by the CPU due to C++ compiler transformations.
+           @returns Number of elements written, summed over all output grids,
+           steps executed, and ranks.
         */
         virtual idx_t
-        get_est_fp_ops() =0;
+        get_num_writes_done() =0;
 
-        /// Get the number of steps calculated via run_solution().
+        /// Get the estimated number of floating-point operations executed across all steps.
         /**
-           @returns A positive number, regardless of whether run_solution() steps were executed
-           forward or backward.
+           @returns Number of FP ops created by the stencil compiler, summed over
+           all stencil-bundles, steps executed, and ranks.
+           It may be slightly more or less than the actual number of FP ops executed
+           by the CPU due to C++ compiler transformations.
         */
         virtual idx_t
-        get_num_steps_done() =0;
+        get_est_fp_ops_done() =0;
 
         /// Get the number of seconds elapsed during calls to run_solution().
         /**
            @returns Only the time spent in run_solution(), not in any other code in your
            application between calls.
         */
         virtual double
-        get_elapsed_run_secs() =0;
+        get_elapsed_secs() =0;
     };
 
     /** @}*/

diff --git a/src/common/common_utils.cpp b/src/common/common_utils.cpp
@@ -41,7 +41,7 @@ namespace yask {
     // for numbers above 9 (at least up to 99).
 
     // Format: "major.minor.patch".
-    const string version = "2.12.04";
+    const string version = "2.13.02";
 
     string yask_get_version_string() {
         return version;

diff --git a/src/common/idiv.hpp b/src/common/idiv.hpp
@@ -50,28 +50,33 @@ namespace yask {
 
     template<typename T>
     inline T idiv_flr(T a, T b) {
+        assert(b);
         //return (a<0 ? a-(b-1) : a) / b;
         //return (a - (a<0 ? b-1 : 0)) / b;
         return (a + (a>>(sizeof(a)*8-1)) * (b-1)) / b;
     }
 
     template<typename T>
     inline T ceil_idiv_flr(T a, T b) {
+        assert(b);
         return idiv_flr(a + b - 1, b);
     }
 
     template<typename T>
     inline T round_up_flr(T a, T b) {
+        assert(b);
         return (idiv_flr(a + b - 1, b) * b);
     }
 
     template<typename T>
     inline T round_down_flr(T a, T b) {
+        assert(b);
         return (idiv_flr(a, b) * b);
     }
 
     template<typename T>
     inline T imod_flr(T a, T b) {
+        assert(b);
         //return ((a % b) + b) % b;
         //return ((a < 0) ? ((a % b) + b) : a) % b;
         //T c = a % b; return (c < 0) ? c + b : c;