besu/.github/workflows/splitTestsByTime.sh

#!/bin/bash
##
## Copyright contributors to Besu.
##
## Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
## the License. You may obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
## an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
## specific language governing permissions and limitations under the License.
##
## SPDX-License-Identifier: Apache-2.0
##

REPORTS_DIR="$1"
REPORT_STRIP_PREFIX="$2"
REPORT_STRIP_SUFFIX="$3"
SPLIT_COUNT=$4
SPLIT_INDEX=$5

# extract tests time from Junit XML reports
find "$REPORTS_DIR" -type f -name TEST-*.xml | xargs -I{} bash -c "xmlstarlet sel -t -v 'concat(sum(//testcase/@time), \" \", //testsuite/@name)' '{}'; echo '{}' | sed \"s#${REPORT_STRIP_PREFIX}/\(.*\)/${REPORT_STRIP_SUFFIX}.*# \1#\"" > tmp/timing.tsv

# Sort times in descending order
IFS=$'\n' sorted=($(sort -nr tmp/timing.tsv))
unset IFS

sums=()
tests=()

# Initialize sums
for ((i=0; i<SPLIT_COUNT; i++))
do
	sums[$i]=0
done

echo -n '' > tmp/processedTests.list

# add tests to groups trying to balance the sum of execution time of each group
for line in "${sorted[@]}"; do
	line_parts=( $line )
	test_time=$( echo "${line_parts[0]} * 1000 / 1" | bc )  # convert to millis without decimals
	test_name=${line_parts[1]}
	module_dir=${line_parts[2]}
	test_with_module="$test_name $module_dir"

  # deduplication check to avoid executing a test multiple time
  if grep -F -q --line-regexp "$test_with_module" tmp/processedTests.list
  then
    continue
  fi

  # Does the test still exists?
  if grep -F -q --line-regexp "$test_with_module" tmp/currentTests.list
  then
    # Find index of min sum
    idx_min_sum=0
    min_sum=${sums[0]}
    for ((i=0; i<SPLIT_COUNT; i++))
    do
      if [[ ${sums[$i]} -lt $min_sum ]]
      then
        idx_min_sum=$i
        min_sum=${sums[$i]}
      fi
    done

    # Add the test to the min sum list
    min_sum_tests=${tests[$idx_min_sum]}
    tests[$idx_min_sum]="${min_sum_tests}${test_with_module},"

    # Update the sums
    ((sums[idx_min_sum]+=test_time))

    echo "$test_with_module" >> tmp/processedTests.list
  fi
done

# Any new test?
grep -F --line-regexp -v -f tmp/processedTests.list tmp/currentTests.list > tmp/newTests.list
idx_new_test=0
while read -r new_test_with_module
do
	idx_group=$(( idx_new_test % SPLIT_COUNT ))
	group=${tests[$idx_group]}
	tests[$idx_group]="${group}${new_test_with_module},"
	idx_new_test=$(( idx_new_test + 1 ))
done < tmp/newTests.list

# remove last comma
for ((i=0; i<SPLIT_COUNT; i++))
do
  test_list=${tests[$i]%,}
  tests[$i]="$test_list"
done


# group tests by module
module_list=( $( echo "${tests[$SPLIT_INDEX]}" | tr "," "\n" | awk '{print $2}' | sort -u ) )

declare -A group_by_module
for module_dir in "${module_list[@]}"
do
	group_by_module[$module_dir]=""
done

IFS="," test_list=( ${tests[$SPLIT_INDEX]} )
unset IFS

for line in "${test_list[@]}"
do
	line_parts=( $line )
	test_name=${line_parts[0]}
	module_dir=${line_parts[1]}

	module_group=${group_by_module[$module_dir]}
	group_by_module[$module_dir]="$module_group$test_name "
done

# return the requests index, without quotes to drop the last trailing space
for module_dir in "${module_list[@]}"
do
	module_test_task=":${module_dir//\//:}:test"
	module_tests=$( echo "${group_by_module[$module_dir]% }" | sed -e 's/^\| / --tests /g' )
	echo "$module_test_task $module_tests"
done
Split acceptance tests by time (#6953) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago			`#!/bin/bash`
build - Apply spotless license header checks to shell scripts (#7048) * build - apply license to shell scripts * build - shell script license text * fix - Add missing shebang line in evmtool benchmark shell scripts * build - update shell license text and spotless regexp * build - apply license header to shell scripts * fix - Update shebang in splitList to use bash --------- Signed-off-by: Usman Saleem <usman@usmans.info> 7 months ago			`##`
Copyright Besu (#7768) * update copyright for new files only if not matching existing patterns * scripts too * scripts all use Besu copyright Signed-off-by: Sally MacFarlane <macfarla.github@gmail.com> --------- Signed-off-by: Sally MacFarlane <macfarla.github@gmail.com> 1 month ago			`## Copyright contributors to Besu.`
build - Apply spotless license header checks to shell scripts (#7048) * build - apply license to shell scripts * build - shell script license text * fix - Add missing shebang line in evmtool benchmark shell scripts * build - update shell license text and spotless regexp * build - apply license header to shell scripts * fix - Update shebang in splitList to use bash --------- Signed-off-by: Usman Saleem <usman@usmans.info> 7 months ago			`##`
			`## Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with`
			`## the License. You may obtain a copy of the License at`
			`##`
			`## http://www.apache.org/licenses/LICENSE-2.0`
			`##`
			`## Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on`
			`## an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the`
			`## specific language governing permissions and limitations under the License.`
			`##`
			`## SPDX-License-Identifier: Apache-2.0`
			`##`
Split acceptance tests by time (#6953) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago
			`REPORTS_DIR="$1"`
Split unit tests by time (#7079) * Split unit tests by time Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Parallelize compile and unit tests since there is not shared cache Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * fix Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Reduce ATs runnes to 10 Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Apply suggestions from code review Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> --------- Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> 6 months ago			`REPORT_STRIP_PREFIX="$2"`
			`REPORT_STRIP_SUFFIX="$3"`
			`SPLIT_COUNT=$4`
			`SPLIT_INDEX=$5`
Split acceptance tests by time (#6953) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago
			`# extract tests time from Junit XML reports`
Split unit tests by time (#7079) * Split unit tests by time Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Parallelize compile and unit tests since there is not shared cache Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * fix Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Reduce ATs runnes to 10 Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Apply suggestions from code review Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> --------- Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> 6 months ago			`find "$REPORTS_DIR" -type f -name TEST-.xml \| xargs -I{} bash -c "xmlstarlet sel -t -v 'concat(sum(//testcase/@time), \" \", //testsuite/@name)' '{}'; echo '{}' \| sed \"s#${REPORT_STRIP_PREFIX}/\(.\)/${REPORT_STRIP_SUFFIX}.*# \1#\"" > tmp/timing.tsv`
Split acceptance tests by time (#6953) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago
			`# Sort times in descending order`
			`IFS=$'\n' sorted=($(sort -nr tmp/timing.tsv))`
			`unset IFS`

			`sums=()`
			`tests=()`

			`# Initialize sums`
			`for ((i=0; i<SPLIT_COUNT; i++))`
			`do`
			`sums[$i]=0`
			`done`

Split unit tests by time (#7079) * Split unit tests by time Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Parallelize compile and unit tests since there is not shared cache Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * fix Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Reduce ATs runnes to 10 Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Apply suggestions from code review Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> --------- Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> 6 months ago			`echo -n '' > tmp/processedTests.list`

Split acceptance tests by time (#6953) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago			`# add tests to groups trying to balance the sum of execution time of each group`
			`for line in "${sorted[@]}"; do`
			`line_parts=( $line )`
Split unit tests by time (#7079) * Split unit tests by time Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Parallelize compile and unit tests since there is not shared cache Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * fix Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Reduce ATs runnes to 10 Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Apply suggestions from code review Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> --------- Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> 6 months ago			`test_time=$( echo "${line_parts[0]} * 1000 / 1" \| bc ) # convert to millis without decimals`
Split acceptance tests by time (#6953) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago			`test_name=${line_parts[1]}`
Split unit tests by time (#7079) * Split unit tests by time Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Parallelize compile and unit tests since there is not shared cache Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * fix Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Reduce ATs runnes to 10 Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Apply suggestions from code review Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> --------- Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> 6 months ago			`module_dir=${line_parts[2]}`
			`test_with_module="$test_name $module_dir"`
Split acceptance tests by time (#6953) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago
Cleanup transition steps used to update test reports on main (#7158) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 6 months ago			`# deduplication check to avoid executing a test multiple time`
Fix acceptance reports path and how to get the latest merged PR (#7147) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 6 months ago			`if grep -F -q --line-regexp "$test_with_module" tmp/processedTests.list`
			`then`
			`continue`
			`fi`

Support removed and added test when splitting by time (#6990) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago			`# Does the test still exists?`
Split unit tests by time (#7079) * Split unit tests by time Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Parallelize compile and unit tests since there is not shared cache Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * fix Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Reduce ATs runnes to 10 Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Apply suggestions from code review Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> --------- Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> 6 months ago			`if grep -F -q --line-regexp "$test_with_module" tmp/currentTests.list`
Support removed and added test when splitting by time (#6990) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago			`then`
			`# Find index of min sum`
			`idx_min_sum=0`
			`min_sum=${sums[0]}`
			`for ((i=0; i<SPLIT_COUNT; i++))`
			`do`
			`if [[ ${sums[$i]} -lt $min_sum ]]`
			`then`
			`idx_min_sum=$i`
			`min_sum=${sums[$i]}`
			`fi`
			`done`

			`# Add the test to the min sum list`
			`min_sum_tests=${tests[$idx_min_sum]}`
Split unit tests by time (#7079) * Split unit tests by time Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Parallelize compile and unit tests since there is not shared cache Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * fix Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Reduce ATs runnes to 10 Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Apply suggestions from code review Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> --------- Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> 6 months ago			`tests[$idx_min_sum]="${min_sum_tests}${test_with_module},"`
Support removed and added test when splitting by time (#6990) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago
			`# Update the sums`
			`((sums[idx_min_sum]+=test_time))`

Split unit tests by time (#7079) * Split unit tests by time Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Parallelize compile and unit tests since there is not shared cache Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * fix Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Reduce ATs runnes to 10 Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Apply suggestions from code review Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> --------- Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> 6 months ago			`echo "$test_with_module" >> tmp/processedTests.list`
Support removed and added test when splitting by time (#6990) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago			`fi`
Split acceptance tests by time (#6953) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago			`done`

Support removed and added test when splitting by time (#6990) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago			`# Any new test?`
			`grep -F --line-regexp -v -f tmp/processedTests.list tmp/currentTests.list > tmp/newTests.list`
			`idx_new_test=0`
Split unit tests by time (#7079) * Split unit tests by time Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Parallelize compile and unit tests since there is not shared cache Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * fix Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Reduce ATs runnes to 10 Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Apply suggestions from code review Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> --------- Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> 6 months ago			`while read -r new_test_with_module`
Support removed and added test when splitting by time (#6990) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago			`do`
Split unit tests by time (#7079) * Split unit tests by time Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Parallelize compile and unit tests since there is not shared cache Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * fix Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Reduce ATs runnes to 10 Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Apply suggestions from code review Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> --------- Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> 6 months ago			`idx_group=$(( idx_new_test % SPLIT_COUNT ))`
			`group=${tests[$idx_group]}`
			`tests[$idx_group]="${group}${new_test_with_module},"`
			`idx_new_test=$(( idx_new_test + 1 ))`
Support removed and added test when splitting by time (#6990) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago			`done < tmp/newTests.list`

Split unit tests by time (#7079) * Split unit tests by time Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Parallelize compile and unit tests since there is not shared cache Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * fix Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Reduce ATs runnes to 10 Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Apply suggestions from code review Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> --------- Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> 6 months ago			`# remove last comma`
			`for ((i=0; i<SPLIT_COUNT; i++))`
			`do`
			`test_list=${tests[$i]%,}`
			`tests[$i]="$test_list"`
			`done`


			`# group tests by module`
			`module_list=( $( echo "${tests[$SPLIT_INDEX]}" \| tr "," "\n" \| awk '{print $2}' \| sort -u ) )`

			`declare -A group_by_module`
			`for module_dir in "${module_list[@]}"`
			`do`
			`group_by_module[$module_dir]=""`
			`done`

			`IFS="," test_list=( ${tests[$SPLIT_INDEX]} )`
			`unset IFS`

			`for line in "${test_list[@]}"`
			`do`
			`line_parts=( $line )`
			`test_name=${line_parts[0]}`
			`module_dir=${line_parts[1]}`

			`module_group=${group_by_module[$module_dir]}`
			`group_by_module[$module_dir]="$module_group$test_name "`
			`done`
Support removed and added test when splitting by time (#6990) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago
Split acceptance tests by time (#6953) Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> 7 months ago			`# return the requests index, without quotes to drop the last trailing space`
Split unit tests by time (#7079) * Split unit tests by time Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Parallelize compile and unit tests since there is not shared cache Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * fix Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Reduce ATs runnes to 10 Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> * Apply suggestions from code review Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> --------- Signed-off-by: Fabio Di Fabio <fabio.difabio@consensys.net> Co-authored-by: Sally MacFarlane <macfarla.github@gmail.com> 6 months ago			`for module_dir in "${module_list[@]}"`
			`do`
			`module_test_task=":${module_dir//\//:}:test"`
			`module_tests=$( echo "${group_by_module[$module_dir]% }" \| sed -e 's/^\\| / --tests /g' )`
			`echo "$module_test_task $module_tests"`
			`done`