From 3e7f29ddc0738d42ea94d6e9e478ae237d481634 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Thu, 12 Dec 2019 14:25:51 +0300
Subject: [PATCH 1/7] Add valgrind tests

---
 .circleci/config.yml            | 61 +++++++++++++++++++++++
 README.md                       | 86 +++++++++++++++++++++++++++++++++
 azure/citus-bot.sh              | 13 ++++-
 azure/create-cluster.sh         | 17 ++++++-
 azure/finalize-valgrind-test.sh | 38 +++++++++++++++
 azure/push-results.sh           | 37 ++++++++++++++
 azure/run-all-tests.sh          | 39 +++++++--------
 fabfile/config.py               | 13 +++++
 fabfile/run.py                  | 76 ++++++++++++++++++++++++++++-
 fabfile/setup.py                | 22 +++++----
 fabfile/use.py                  | 24 +++++++--
 11 files changed, 391 insertions(+), 35 deletions(-)
 create mode 100755 azure/finalize-valgrind-test.sh
 create mode 100755 azure/push-results.sh
diff --git a/.circleci/config.yml b/.circleci/config.yml
index 52090427..ce2bfdf6 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -63,6 +63,40 @@ jobs:
           name: delete the given resource group
           no_output_timeout: 10m             
 
+  valgrind-test:
+    docker:
+      - image: buildpack-deps:trusty
+
+    working_directory: /home/circleci/project  
+    steps:
+      - azure-cli/install
+      - azure-cli/login-with-service-principal
+      - checkout
+      - run:
+          command: |
+            cd ./azure
+            ./add-sshkey.sh
+            ./citus-bot.sh citusbot_valgrind_test_resource_group
+          name: install dependencies and run valgrind tests
+          no_output_timeout: 10m 
+
+  finalize-valgrind-test:
+    docker:
+      - image: buildpack-deps:trusty
+
+    working_directory: /home/circleci/project  
+    steps:
+      - azure-cli/install
+      - azure-cli/login-with-service-principal
+      - checkout
+      - run:
+          command: |
+            cd ./azure
+            ./add-sshkey.sh
+            ./finalize-valgrind-test.sh
+          name: install dependencies and run valgrind tests
+          no_output_timeout: 10m
+
 orbs:
   azure-cli: circleci/azure-cli@1.0.0      
 
@@ -97,3 +131,30 @@ workflows:
               only: 
                 - /tpch\/.*/ # match with tpch/ prefix
                 - /all_performance_test\/.*/ # match with all_performance_test/ prefix
+
+  # perform weekly valgrind test on azure every monday at 00:00
+  # https://crontab.guru/#0_0_*_*_1
+  weekly-valgrind:
+    triggers:
+      - schedule:
+          cron: "0 0 * * 1"
+          filters:
+            branches:
+              only:
+                - master
+    jobs:
+      - valgrind-test
+
+  # Since valgrind tests really take a long time to finish, wait for 9.5 hours.
+  # Then push valgrind test results and terminate the machine.
+  # https://crontab.guru/#30_9_*_*_1
+  weekly-valgrind-finalize:
+    triggers:
+      - schedule:
+          cron: "30 9 * * 1"
+          filters:
+            branches:
+              only:
+                - master
+    jobs:
+      - finalize-valgrind-test
diff --git a/README.md b/README.md
index cabc2a76..19dfea41 100644
--- a/README.md
+++ b/README.md
@@ -27,6 +27,7 @@ required for testing citus.
   * [Running PgBench Tests Against Hyperscale (Citus)](#pgbench-cloud)
   * [Running TPC-H Tests](#tpch)
   * [Running TPC-H Tests Against Hyperscale (Citus)](#tpch-cloud)
+  * [Running Valgrind Tests](#valgrind)
 * [Example fab Commands](#fab-examples)
 * [Tasks, and Ordering of Tasks](#fab-tasks)
 * [Task Namespaces](#task-namespaces)
@@ -602,6 +603,91 @@ On the coordinator node:
 fab run.tpch_automate:tpch_q1.ini,connectionURI='postgres://citus:dwVg70yBfkZ6hO1WXFyq1Q@c.fhhwxh5watzbizj3folblgbnpbu.db.citusdata.com:5432/citus?sslmode\=require'
 ```
 
+## <a name="valgrind"></a> Running Valgrind Tests
+
+TL;DR
+
+```bash
+# 1 # start valgrind test
+
+# create valgrind instance to run
+eval `ssh-agent -s`
+ssh-add
+export GIT_USERNAME=<Your github username>
+export GIT_TOKEN=<Your github token with repo, write:packages and read:packages permissions> # You can create a github token from https://github.com/settings/tokens.
+export RESOURCE_GROUP_NAME='your-valgrind-test-rg-name-here'
+export VALGRIND_TEST=1
+cd azure
+./create-cluster.sh
+
+# connect to coordinator
+./connect.sh
+
+# run fab command in coordinator in a detachable session
+sudo yum install tmux
+tmux new -d "fab use.postgres:12.3 use.enterprise:enterprise-master run.valgrind"
+
+# simply exit from coordinator after detaching
+
+# 2 # finalize valgrind test
+
+# reconnect to coordinator after 9.5 hours (if you preferred default coordinator configuration)
+export RESOURCE_GROUP_NAME='your-valgrind-test-rg-name-here'
+./connect.sh
+
+# you can first check if valgrind test is finished by attaching to tmux session
+tmux a
+# then you should detach from the session before moving forward
+Ctrl+b d
+
+# run push results script
+cd test-automation/azure
+./push-results.sh <branch name you prefer to push results>
+
+# simply exit from coordinator after pushing the results
+
+# delete resource group finally
+cd azure
+./delete-resource-group.sh
+```
+
+DETAILS:
+
+To create a valgrind instance, following the steps in [Setup Steps For Each Test](#azure-setup-steps), do the following before executing `create-cluster.sh`:
+
+```bash
+export VALGRIND_TEST=1
+```
+
+, which makes `numberOfWorkers` setting useless.
+This is because we will already be using our regression test structure and it creates a local cluster 
+itself. Also, as we install `valgrind` only on coordinator, if we have worker nodes, then we cannot build
+PostgreSQL as we require `valgrind` on workers and get error even if we do not need them.
+
+On the coordinator node:
+
+```bash
+# an example usage: Use PostgreSQL 12.1 and run valgrind test on enterprise/enterprise-master
+fab use.postgres:12.1 use.enterprise:enterprise-master run.valgrind
+```
+
+However as valgrind tests take too much time to complete, we recommend you to run valgrind tests in a detached session:
+```bash
+sudo yum install tmux
+tmux new -d "fab use.postgres:12.1 use.enterprise:enterprise-master run.valgrind"
+```
+
+After the tests are finished (takes up to 9 hours with default coordinator size), re-connect to the coordinator.
+Result can be found under `$HOME/results` directory.
+
+To push the results to `release_test_results` repository, run the below command in coordinator node:
+
+```bash
+sh $HOME/test-automation/azure/push-results.sh <branch_name_to_push>
+```
+
+Finally, delete your resource group.
+
 ## <a name="fab-examples"></a> Example fab Commands
 
 Use `fab --list` to see all the tasks you can run! This is just a few examples.
diff --git a/azure/citus-bot.sh b/azure/citus-bot.sh
index 09116889..ae4f473b 100755
--- a/azure/citus-bot.sh
+++ b/azure/citus-bot.sh
@@ -40,8 +40,19 @@ trap cleanup EXIT
 
 rg=$1
 export RESOURCE_GROUP_NAME=${rg}
-./create-cluster.sh
 
+if [ "$rg" == "citusbot_valgrind_test_resource_group" ]; then
+    # If running valgrind tests, do not run cleanup function
+    # This is because, as valgrind tests requires too much time to run,
+    # we start valgrind tests via nohup in ci. Hence ssh session
+    # will immediately be closed just after the fabric command is run
+    trap - EXIT
+    # If running valgrind tests, export VALGRIND_TEST to be 1 to ensure
+    # only coordinator instance is created in create-cluster script
+    export VALGRIND_TEST=1
+fi
+
+./create-cluster.sh
 
 public_ip=$(az group deployment show -g ${rg} -n azuredeploy --query properties.outputs.publicIP.value)
 # remove the quotes 
diff --git a/azure/create-cluster.sh b/azure/create-cluster.sh
index 5b0b9302..d9d93721 100755
--- a/azure/create-cluster.sh
+++ b/azure/create-cluster.sh
@@ -34,7 +34,22 @@ echo "waiting a long time to create cluster, this might take up to 30 mins depen
 # so that $HOME, $PATH are set to the target users $HOME and $PATH.
 export BRANCH=${CIRCLE_BRANCH:=master}
 
-az group deployment create -g ${rg} --template-file azuredeploy.json --parameters @azuredeploy.parameters.json --parameters sshPublicKey="${public_key}" branchName="$BRANCH" 
+# below is the default create cluster command
+CREATE_CLUSTER_COMMAND=(az group deployment create -g ${rg} --template-file azuredeploy.json --parameters @azuredeploy.parameters.json --parameters sshPublicKey="${public_key}" branchName="$BRANCH")
+
+# if VALGRIND_TEST variable is not exported, set it to 0
+is_valgrind_test=${VALGRIND_TEST:=0}
+
+# if we want to run valgrind tests, lets overwrite numberOfWorkers parameter with 0
+if [[ "$is_valgrind_test" == "1" ]]; then
+    # be on the safe side, add "--parameters" before "numberOfWorkers" as the order
+    # of the parameters in CREATE_CLUSTER_COMMAND may change
+    CREATE_CLUSTER_COMMAND+=(--parameters)
+    CREATE_CLUSTER_COMMAND+=(numberOfWorkers=0)
+fi
+
+# run CREATE_CLUSTER_COMMAND
+"${CREATE_CLUSTER_COMMAND[@]}"
 
 end_time=`date +%s`
 echo execution time was `expr $end_time - $start_time` s.
diff --git a/azure/finalize-valgrind-test.sh b/azure/finalize-valgrind-test.sh
new file mode 100755
index 00000000..cc49369f
--- /dev/null
+++ b/azure/finalize-valgrind-test.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# fail if trying to reference a variable that is not set.
+set -u
+# exit immediately if a command fails
+set -e
+# echo commands
+set -x
+
+function cleanup {
+    sh ./delete-resource-group.sh
+}
+
+export RESOURCE_GROUP_NAME="citusbot_valgrind_test_resource_group"
+
+trap cleanup EXIT
+
+public_ip=$(az group deployment show -g ${RESOURCE_GROUP_NAME} -n azuredeploy --query properties.outputs.publicIP.value)
+# remove the quotes 
+public_ip=$(echo ${public_ip} | cut -d "\"" -f 2)
+
+echo ${public_ip}
+
+ssh-keyscan -H ${public_ip} >> ~/.ssh/known_hosts
+chmod 600 ~/.ssh/known_hosts
+
+sh ./delete-security-rule.sh
+
+echo "adding public ip to known hosts in remote"
+ssh -o "StrictHostKeyChecking no" -A pguser@${public_ip} "ssh-keyscan -H ${public_ip} >> /home/pguser/.ssh/known_hosts"
+echo "running tests in remote"
+
+# ssh with non-interactive mode does not source bash profile, so we will need to do it ourselves here.
+# put an empty success file for valgrind tests under results dir if there are error logs
+# push the files under results dir
+ssh -o "StrictHostKeyChecking no" -A pguser@${public_ip} \
+"source ~/.bash_profile;" \
+"sh /home/pguser/test-automation/azure/push-results.sh ${RESOURCE_GROUP_NAME}";
diff --git a/azure/push-results.sh b/azure/push-results.sh
new file mode 100755
index 00000000..3f24781f
--- /dev/null
+++ b/azure/push-results.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+# this scripts pushes the results under results/ directory to release-test-results repository
+
+# args #
+# $1 -> branch name to push results
+
+# fail if trying to reference a variable that is not set.
+set -u
+# exit immediately if a command fails
+set -e
+# fail in a pipeline if any of the commands fails
+set -o pipefail
+
+branch_name=$1
+
+# add github to known hosts
+
+echo "github.com ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==" >> ~/.ssh/known_hosts
+
+git clone git@github.com:citusdata/release-test-results.git "${HOME}"/release-test-results
+
+git config --global user.email "citus-bot@microsoft.com" 
+git config --global user.name "citus bot" 
+
+now=$(date +"%m_%d_%Y_%s")
+
+mv "${HOME}"/results "${HOME}"/release-test-results/periodic_job_results/"${now}"
+
+cd "${HOME}"/release-test-results
+
+commit_message="add test results"
+
+git checkout -b "${branch_name}/${now}"
+git add -A 
+git commit -m "$commit_message"
+git push origin "${branch_name}/${now}"
diff --git a/azure/run-all-tests.sh b/azure/run-all-tests.sh
index 3b8fe371..86fbbb7c 100755
--- a/azure/run-all-tests.sh
+++ b/azure/run-all-tests.sh
@@ -22,22 +22,23 @@ if [ "$rg_name" = "citusbot_tpch_test_resource_group" ]; then
     fab run.tpch_automate
 fi 
 
-
-# add github to known hosts
-echo "github.com ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==" >> ~/.ssh/known_hosts
-
-git clone git@github.com:citusdata/release-test-results.git
-
-git config --global user.email "citus-bot@microsoft.com" 
-git config --global user.name "citus bot" 
-
-now=$(date +"%m_%d_%Y_%s")
-
-mv ${HOME}/results ${HOME}/release-test-results/periodic_job_results/${now}
-
-cd ${HOME}/release-test-results
-
-git checkout -b ${rg_name}/${now}
-git add -A 
-git commit -m "add test results for performance tests ${rg_name}"
-git push origin ${rg_name}/${now}
+# If running valgrind tests, do not run cleanup function
+# This is because, as valgrind tests requires too much time to run,
+# we start valgrind tests via nohup in ci. Hence ssh session 
+# will immediately be closed just after the fabric command is run
+#
+# We have a seperate job to terminate the machine and push the results
+if [ "$rg_name" = "citusbot_valgrind_test_resource_group" ]; then
+    nohup fab use.postgres:13.1 use.enterprise:enterprise-master run.valgrind > /dev/null 2>&1 &
+
+    # wait for cloning to end
+    while ! test -d "$HOME/citus-enterprise";
+    do
+        echo "Wait until citus is cloned completely ...";
+        sleep 60;
+    done
+
+    echo "Citus is cloned succesfully";
+else
+    sh "${HOME}"/test-automation/azure/push-results.sh "$1";
+fi 
diff --git a/fabfile/config.py b/fabfile/config.py
index 0860da8d..f52a7f3a 100644
--- a/fabfile/config.py
+++ b/fabfile/config.py
@@ -14,6 +14,19 @@
 RESULTS_DIRECTORY = os.path.join(HOME_DIR, 'results')
 CITUS_INSTALLATION = os.path.join(HOME_DIR, 'citus-installation')
 PORT = 5432
+RELATIVE_REGRESS_PATH = 'src/test/regress'
+
+# keys to access settings dictionary
+REPO_PATH = 'repo_path'
+BUILD_CITUS_FUNC = 'build_citus_func'
+
+# valgrind test variables
+VALGRIND_TEST_OUT_FILE = 'valgrind_test_out.txt'
+VALGRIND_LOGS_FILE = 'valgrind_test_log.txt'
+REGRESSION_DIFFS_FILE = 'regression.diffs'
+CITUS_RELATED_VALGRIND_LOG_FILE = 'valgrind_test_log_citus.txt'
+VALGRIND_REQUIRED_PACKAGES = ['valgrind', 'valgrind-devel.x86_64', 'openssl-devel.x86_64', 'libicu-devel.x86_64']
+VALGRIND_SUCCESS_FNAME = 'valgrind_success'
 
 PG_VERSION = '9.6.1'
 PG_CONFIGURE_FLAGS = ['--with-openssl']
diff --git a/fabfile/run.py b/fabfile/run.py
index 00d15ee6..add0a03f 100644
--- a/fabfile/run.py
+++ b/fabfile/run.py
@@ -1,4 +1,5 @@
-from fabric.api import task, run, cd, runs_once, roles, execute
+from fabric.api import task, run, cd, runs_once, roles, execute, abort
+from fabric.context_managers import settings
 
 import config
 import use
@@ -12,7 +13,7 @@
 import ConfigParser
 import time
 
-__all__ = ['jdbc', 'regression', 'pgbench_tests', 'tpch_automate']
+__all__ = ['jdbc', 'regression', 'pgbench_tests', 'tpch_automate', 'valgrind', 'valgrind_filter_put_results']
 
 
 @task
@@ -205,3 +206,74 @@ def tpch_queries(query_info, connectionURI, pg_version, citus_version, config_fi
         out_val = run(run_string)
         results_file.write(out_val)
         results_file.write('\n')
+
+# If no citus valgrind logs exist results directory, then simply put valgrind_success 
+# file under results directory.
+def valgrind_filter_put_results():
+    'Filter valgrind test outputs, put success file if no citus related valgrind output'
+
+    repo_path = config.settings[config.REPO_PATH]
+
+    regression_test_path = os.path.join(repo_path, config.RELATIVE_REGRESS_PATH)
+    
+    regression_diffs_path = os.path.join(regression_test_path, config.REGRESSION_DIFFS_FILE)
+    valgrind_logs_path = os.path.join(regression_test_path, config.VALGRIND_LOGS_FILE)
+    
+    citus_valgrind_logs_path = os.path.join(config.RESULTS_DIRECTORY, config.CITUS_RELATED_VALGRIND_LOG_FILE)
+    success_file_path = os.path.join(config.RESULTS_DIRECTORY, config.VALGRIND_SUCCESS_FNAME)
+    
+    trace_ids_tmp_file = ".trace_ids"
+    trace_ids_path = os.path.join(regression_test_path, trace_ids_tmp_file)
+
+    # ship regression.diffs (if exists) to result folder
+    if os.path.isfile(regression_diffs_path):
+        run('mv {} {}'.format(regression_diffs_path, config.RESULTS_DIRECTORY))
+
+    # filter the (possibly) citus-related outputs and put to results file if existz
+
+    if os.path.isfile(valgrind_logs_path):
+        
+        # get stack trace id that includes calls to citus
+        run('cat {} | grep -i "citus" | awk \'{{ print $1 }}\' | uniq  > {}'.format(valgrind_logs_path, trace_ids_path))
+
+        if os.path.isfile(trace_ids_path) and os.path.getsize(trace_ids_path) > 0:            
+            # filter stack traces with stack trace ids that we found above (if any)
+            run('while read line; do grep {} -e $line ; done < {} > {}'.format(
+                valgrind_logs_path, 
+                trace_ids_path,
+                citus_valgrind_logs_path))
+        
+        # cleanup
+        run('rm {}'.format(trace_ids_path))
+    
+    # if we have no citus-related valgrind outputs then just put an empty file named as `config.VALGRIND_SUCCESS_FNAME`
+    if not os.path.exists(citus_valgrind_logs_path):    
+        run('touch {}'.format(success_file_path))
+
+@task
+@roles('master')
+def valgrind(*args): 
+    'Runs valgrind tests'
+
+    # set citus path variable
+    repo_path = config.settings[config.REPO_PATH]
+    
+    use.valgrind()
+    setup.valgrind()
+
+    with cd(os.path.join(repo_path, config.RELATIVE_REGRESS_PATH)):
+
+        # make check-multi-vg returns 2 in case of failures in regression tests
+        # we should do failure handling here
+        with settings(warn_only=True):
+            valgrind_logs_path=os.path.join(config.RESULTS_DIRECTORY, config.VALGRIND_LOGS_FILE)
+            valgrind_test_out_path = os.path.join(config.RESULTS_DIRECTORY, config.VALGRIND_TEST_OUT_FILE)
+
+            # wrap the command with tee to log stdout & stderr to a file in results directory as well
+            # this is done to ensure that valgrind test is actually finished
+            valgrind_test_command = 'make check-multi-vg valgrind-log-file={}'.format(valgrind_logs_path)
+            valgrind_test_command = valgrind_test_command + ' 2>&1 | tee {}'.format(valgrind_test_out_path)
+
+            run(valgrind_test_command)
+
+            valgrind_filter_put_results()
diff --git a/fabfile/setup.py b/fabfile/setup.py
index 434320e9..824c4601 100644
--- a/fabfile/setup.py
+++ b/fabfile/setup.py
@@ -22,6 +22,7 @@
 import add
 import use
 import prefix
+import use
 
 __all__ = ["basic_testing", "tpch", "valgrind", "enterprise", "hammerdb"]
 
@@ -46,18 +47,21 @@ def tpch():
 
 @task
 def valgrind():
-    'Just like basic_testing, but adds --enable-debug flag and installs valgrind'
-    execute(prefix.ensure_pg_latest_exists, default=config.CITUS_INSTALLATION)
+    # prepare yum install command
+    install_required_packages_command = 'yum install -q -y ' + ' '.join(config.VALGRIND_REQUIRED_PACKAGES)
 
-    # we do this execute dance so valgrind is installed on every node, not just the master
-    def install_valgrind():
-        sudo('yum install -q -y valgrind')
-    execute(install_valgrind)
+    # install libraries required for valgrind test
+    sudo(install_required_packages_command)
 
-    config.PG_CONFIGURE_FLAGS.append('--enable-debug')
+    # create results directory to put resulting log files there
+    # (for pushing them to results repository)
+    utils.rmdir(config.RESULTS_DIRECTORY, force=True)
+    utils.mkdir_if_not_exists(config.RESULTS_DIRECTORY)
 
-    execute(common_setup, build_citus)
-    execute(add_workers)    
+    # set build citus function
+    build_citus_func = config.settings[config.BUILD_CITUS_FUNC]
+    execute(prefix.ensure_pg_latest_exists, default=config.CITUS_INSTALLATION)
+    execute(common_setup, build_citus_func)
 
 @task
 @roles('master')
diff --git a/fabfile/use.py b/fabfile/use.py
index 29d28d8c..6ecc7438 100644
--- a/fabfile/use.py
+++ b/fabfile/use.py
@@ -5,7 +5,9 @@
 '''
 import re
 
-from fabric.api import task, runs_once, abort, local, lcd, roles
+from fabric.api import task, runs_once, abort, local, lcd, roles, sudo
+
+import setup
 
 import config
 import utils
@@ -21,7 +23,12 @@ def citus(*args):
         abort('You must provide a single argument, with a command such as "use.citus:v6.0.1"')
     git_ref = args[0]
 
-    path = config.CITUS_REPO
+    # set community repo specific variables
+    config.settings[config.REPO_PATH] = config.CITUS_REPO
+    config.settings[config.BUILD_CITUS_FUNC] = setup.build_citus
+
+    # check if we can clone citus successfully, then remove it
+    path = "/tmp/tmp_citus"
     local('rm -rf {} || true'.format(path))
     local('git clone -q https://github.com/citusdata/citus.git {}'.format(path))
     with lcd(path):
@@ -41,7 +48,12 @@ def enterprise(*args):
         abort('You must provide a single argument, with a command such as "use.enterprise:v6.0.1"')
     git_ref = args[0]
 
-    path = config.ENTERPRISE_REPO
+    # set enterprise repo specific variables
+    config.settings[config.REPO_PATH] = config.ENTERPRISE_REPO
+    config.settings[config.BUILD_CITUS_FUNC] = setup.build_enterprise
+
+    # check if we can clone citus successfully, then remove it
+    path = "/tmp/tmp_citus"
     local('rm -rf {} || true'.format(path))
     if config.settings[config.IS_SSH_KEYS_USED]:
         local('git clone -q git@github.com:citusdata/citus-enterprise.git {}'.format(path))
@@ -79,3 +91,9 @@ def asserts(*args):
 def debug_mode(*args):
     '''ps's configure is passed: '--enable-debug --enable-cassert CFLAGS="-ggdb -Og -g3 -fno-omit-frame-pointer"' '''
     config.PG_CONFIGURE_FLAGS.append('--enable-debug --enable-cassert CFLAGS="-ggdb -Og -g3 -fno-omit-frame-pointer"')
+
+
+@task
+def valgrind(*args):
+    config.PG_CONFIGURE_FLAGS.append('--with-icu --enable-cassert --enable-debug CFLAGS="-ggdb -Og -DUSE_VALGRIND"')
+    
\ No newline at end of file

From c1786a34ffa5de6561cad010ee64c21d892c38b1 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Fri, 3 Jul 2020 10:26:01 +0300
Subject: [PATCH 2/7] test on current branch (this will be reverted before
 merge)

---
 .circleci/config.yml    | 4 ++--
 azure/create-cluster.sh | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index ce2bfdf6..b8401089 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -141,7 +141,7 @@ workflows:
           filters:
             branches:
               only:
-                - master
+                - valgrind-merge
     jobs:
       - valgrind-test
 
@@ -155,6 +155,6 @@ workflows:
           filters:
             branches:
               only:
-                - master
+                - valgrind-merge
     jobs:
       - finalize-valgrind-test
diff --git a/azure/create-cluster.sh b/azure/create-cluster.sh
index d9d93721..ea6d9410 100755
--- a/azure/create-cluster.sh
+++ b/azure/create-cluster.sh
@@ -32,7 +32,7 @@ echo "waiting a long time to create cluster, this might take up to 30 mins depen
 # store the branch name in a file so that target user can read it. Target user cannot see the envionment variables because
 # we use login option in su and -p(preserving environment variables) cannot be used with login. We need to use login option
 # so that $HOME, $PATH are set to the target users $HOME and $PATH.
-export BRANCH=${CIRCLE_BRANCH:=master}
+export BRANCH="valgrind-merge"
 
 # below is the default create cluster command
 CREATE_CLUSTER_COMMAND=(az group deployment create -g ${rg} --template-file azuredeploy.json --parameters @azuredeploy.parameters.json --parameters sshPublicKey="${public_key}" branchName="$BRANCH")

From 5377c9f2476995fa2138f44624c7ac359b66dbe7 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Mon, 1 Feb 2021 13:18:28 +0300
Subject: [PATCH 3/7] Revert "test on current branch (this will be reverted
 before merge)"

This reverts commit c1786a34ffa5de6561cad010ee64c21d892c38b1.
---
 .circleci/config.yml    | 4 ++--
 azure/create-cluster.sh | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index b8401089..ce2bfdf6 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -141,7 +141,7 @@ workflows:
           filters:
             branches:
               only:
-                - valgrind-merge
+                - master
     jobs:
       - valgrind-test
 
@@ -155,6 +155,6 @@ workflows:
           filters:
             branches:
               only:
-                - valgrind-merge
+                - master
     jobs:
       - finalize-valgrind-test
diff --git a/azure/create-cluster.sh b/azure/create-cluster.sh
index ea6d9410..d9d93721 100755
--- a/azure/create-cluster.sh
+++ b/azure/create-cluster.sh
@@ -32,7 +32,7 @@ echo "waiting a long time to create cluster, this might take up to 30 mins depen
 # store the branch name in a file so that target user can read it. Target user cannot see the envionment variables because
 # we use login option in su and -p(preserving environment variables) cannot be used with login. We need to use login option
 # so that $HOME, $PATH are set to the target users $HOME and $PATH.
-export BRANCH="valgrind-merge"
+export BRANCH=${CIRCLE_BRANCH:=master}
 
 # below is the default create cluster command
 CREATE_CLUSTER_COMMAND=(az group deployment create -g ${rg} --template-file azuredeploy.json --parameters @azuredeploy.parameters.json --parameters sshPublicKey="${public_key}" branchName="$BRANCH")

From b8b605e93526998771d2f3ff5c139415cd11d06a Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Mon, 1 Feb 2021 13:21:23 +0300
Subject: [PATCH 4/7] fixup! Add valgrind tests

as we didn't merge #203, fix readme section
---
 README.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 19dfea41..87ac7df4 100644
--- a/README.md
+++ b/README.md
@@ -611,16 +611,14 @@ TL;DR
 # 1 # start valgrind test
 
 # create valgrind instance to run
-eval `ssh-agent -s`
-ssh-add
-export GIT_USERNAME=<Your github username>
-export GIT_TOKEN=<Your github token with repo, write:packages and read:packages permissions> # You can create a github token from https://github.com/settings/tokens.
 export RESOURCE_GROUP_NAME='your-valgrind-test-rg-name-here'
 export VALGRIND_TEST=1
 cd azure
 ./create-cluster.sh
 
 # connect to coordinator
+eval `ssh-agent -s`
+ssh-add
 ./connect.sh
 
 # run fab command in coordinator in a detachable session

From 0c3d4efd41d50cef31f602e742c21c2c31cea6be Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Mon, 1 Feb 2021 13:37:33 +0300
Subject: [PATCH 5/7] fixup! Add valgrind tests

check if not 0 instead
---
 azure/create-cluster.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/azure/create-cluster.sh b/azure/create-cluster.sh
index d9d93721..c7b0a27e 100755
--- a/azure/create-cluster.sh
+++ b/azure/create-cluster.sh
@@ -41,7 +41,7 @@ CREATE_CLUSTER_COMMAND=(az group deployment create -g ${rg} --template-file azur
 is_valgrind_test=${VALGRIND_TEST:=0}
 
 # if we want to run valgrind tests, lets overwrite numberOfWorkers parameter with 0
-if [[ "$is_valgrind_test" == "1" ]]; then
+if [[ "$is_valgrind_test" != "0" ]]; then
     # be on the safe side, add "--parameters" before "numberOfWorkers" as the order
     # of the parameters in CREATE_CLUSTER_COMMAND may change
     CREATE_CLUSTER_COMMAND+=(--parameters)

From 18b7b1c06c9708460167296cdc3930a525199664 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Mon, 1 Feb 2021 13:46:21 +0300
Subject: [PATCH 6/7] fixup! Add valgrind tests

better readme
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 87ac7df4..7605db92 100644
--- a/README.md
+++ b/README.md
@@ -685,6 +685,7 @@ sh $HOME/test-automation/azure/push-results.sh <branch_name_to_push>
 ```
 
 Finally, delete your resource group.
+Note that automated (weekly) valgrind test already destroys the resources that it uses.
 
 ## <a name="fab-examples"></a> Example fab Commands
 

From eb6fff05eef584802476c683dd379adcb30fe1cd Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Mon, 1 Feb 2021 14:50:26 +0300
Subject: [PATCH 7/7] fixup! Add valgrind tests

final reviews
---
 .circleci/config.yml |  4 ++--
 README.md            |  2 --
 azure/citus-bot.sh   | 13 ++++++++-----
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index ce2bfdf6..c54aa9ba 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -133,10 +133,10 @@ workflows:
                 - /all_performance_test\/.*/ # match with all_performance_test/ prefix
 
   # perform weekly valgrind test on azure every monday at 00:00
-  # https://crontab.guru/#0_0_*_*_1
   weekly-valgrind:
     triggers:
       - schedule:
+          # https://crontab.guru/#0_0_*_*_1
           cron: "0 0 * * 1"
           filters:
             branches:
@@ -147,10 +147,10 @@ workflows:
 
   # Since valgrind tests really take a long time to finish, wait for 9.5 hours.
   # Then push valgrind test results and terminate the machine.
-  # https://crontab.guru/#30_9_*_*_1
   weekly-valgrind-finalize:
     triggers:
       - schedule:
+          # https://crontab.guru/#30_9_*_*_1
           cron: "30 9 * * 1"
           filters:
             branches:
diff --git a/README.md b/README.md
index 7605db92..94843227 100644
--- a/README.md
+++ b/README.md
@@ -622,7 +622,6 @@ ssh-add
 ./connect.sh
 
 # run fab command in coordinator in a detachable session
-sudo yum install tmux
 tmux new -d "fab use.postgres:12.3 use.enterprise:enterprise-master run.valgrind"
 
 # simply exit from coordinator after detaching
@@ -671,7 +670,6 @@ fab use.postgres:12.1 use.enterprise:enterprise-master run.valgrind
 
 However as valgrind tests take too much time to complete, we recommend you to run valgrind tests in a detached session:
 ```bash
-sudo yum install tmux
 tmux new -d "fab use.postgres:12.1 use.enterprise:enterprise-master run.valgrind"
 ```
 
diff --git a/azure/citus-bot.sh b/azure/citus-bot.sh
index ae4f473b..7ba4960a 100755
--- a/azure/citus-bot.sh
+++ b/azure/citus-bot.sh
@@ -42,11 +42,6 @@ rg=$1
 export RESOURCE_GROUP_NAME=${rg}
 
 if [ "$rg" == "citusbot_valgrind_test_resource_group" ]; then
-    # If running valgrind tests, do not run cleanup function
-    # This is because, as valgrind tests requires too much time to run,
-    # we start valgrind tests via nohup in ci. Hence ssh session
-    # will immediately be closed just after the fabric command is run
-    trap - EXIT
     # If running valgrind tests, export VALGRIND_TEST to be 1 to ensure
     # only coordinator instance is created in create-cluster script
     export VALGRIND_TEST=1
@@ -54,6 +49,14 @@ fi
 
 ./create-cluster.sh
 
+if [ "$VALGRIND_TEST" == "1" ]; then
+    # If running valgrind tests, do not run cleanup function
+    # This is because, as valgrind tests requires too much time to run,
+    # we start valgrind tests via nohup in ci. Hence ssh session
+    # will immediately be closed just after the fabric command is run
+    trap - EXIT
+fi
+
 public_ip=$(az group deployment show -g ${rg} -n azuredeploy --query properties.outputs.publicIP.value)
 # remove the quotes 
 public_ip=$(echo ${public_ip} | cut -d "\"" -f 2)