Diff of the two buildlogs: -- --- b1/build.log 2024-01-13 17:22:23.311829694 +0000 +++ b2/build.log 2024-01-13 18:22:33.698170180 +0000 @@ -1,6 +1,6 @@ I: pbuilder: network access will be disabled during build -I: Current time: Sat Jan 13 04:20:04 -12 2024 -I: pbuilder-time-stamp: 1705162804 +I: Current time: Sat Feb 15 13:45:35 +14 2025 +I: pbuilder-time-stamp: 1739576735 I: Building the build Environment I: extracting base tarball [/var/cache/pbuilder/bookworm-reproducible-base.tgz] I: copying local configuration @@ -28,49 +28,81 @@ dpkg-source: info: applying auto-gitignore I: Not using root during the build. I: Installing the build-deps -I: user script /srv/workspace/pbuilder/2338948/tmp/hooks/D02_print_environment starting +I: user script /srv/workspace/pbuilder/3907119/tmp/hooks/D01_modify_environment starting +debug: Running on ionos15-amd64. +I: Changing host+domainname to test build reproducibility +I: Adding a custom variable just for the fun of it... +I: Changing /bin/sh to bash +'/bin/sh' -> '/bin/bash' +lrwxrwxrwx 1 root root 9 Feb 14 23:45 /bin/sh -> /bin/bash +I: Setting pbuilder2's login shell to /bin/bash +I: Setting pbuilder2's GECOS to second user,second room,second work-phone,second home-phone,second other +I: user script /srv/workspace/pbuilder/3907119/tmp/hooks/D01_modify_environment finished +I: user script /srv/workspace/pbuilder/3907119/tmp/hooks/D02_print_environment starting I: set - BUILDDIR='/build/reproducible-path' - BUILDUSERGECOS='first user,first room,first work-phone,first home-phone,first other' - BUILDUSERNAME='pbuilder1' - BUILD_ARCH='amd64' - DEBIAN_FRONTEND='noninteractive' - DEB_BUILD_OPTIONS='buildinfo=+all reproducible=+all parallel=15 ' - DISTRIBUTION='bookworm' - HOME='/root' - HOST_ARCH='amd64' + BASH=/bin/sh + BASHOPTS=checkwinsize:cmdhist:complete_fullquote:extquote:force_fignore:globasciiranges:globskipdots:hostcomplete:interactive_comments:patsub_replacement:progcomp:promptvars:sourcepath + BASH_ALIASES=() + BASH_ARGC=() + BASH_ARGV=() + BASH_CMDS=() + BASH_LINENO=([0]="12" [1]="0") + BASH_LOADABLES_PATH=/usr/local/lib/bash:/usr/lib/bash:/opt/local/lib/bash:/usr/pkg/lib/bash:/opt/pkg/lib/bash:. + BASH_SOURCE=([0]="/tmp/hooks/D02_print_environment" [1]="/tmp/hooks/D02_print_environment") + BASH_VERSINFO=([0]="5" [1]="2" [2]="15" [3]="1" [4]="release" [5]="x86_64-pc-linux-gnu") + BASH_VERSION='5.2.15(1)-release' + BUILDDIR=/build/reproducible-path + BUILDUSERGECOS='second user,second room,second work-phone,second home-phone,second other' + BUILDUSERNAME=pbuilder2 + BUILD_ARCH=amd64 + DEBIAN_FRONTEND=noninteractive + DEB_BUILD_OPTIONS='buildinfo=+all reproducible=+all parallel=16 ' + DIRSTACK=() + DISTRIBUTION=bookworm + EUID=0 + FUNCNAME=([0]="Echo" [1]="main") + GROUPS=() + HOME=/root + HOSTNAME=i-capture-the-hostname + HOSTTYPE=x86_64 + HOST_ARCH=amd64 IFS=' ' - INVOCATION_ID='eff629f2648349b5a16472f8aa62a800' - LANG='C' - LANGUAGE='en_US:en' - LC_ALL='C' - MAIL='/var/mail/root' - OPTIND='1' - PATH='/usr/sbin:/usr/bin:/sbin:/bin:/usr/games' - PBCURRENTCOMMANDLINEOPERATION='build' - PBUILDER_OPERATION='build' - PBUILDER_PKGDATADIR='/usr/share/pbuilder' - PBUILDER_PKGLIBDIR='/usr/lib/pbuilder' - PBUILDER_SYSCONFDIR='/etc' - PPID='2338948' - PS1='# ' - PS2='> ' + INVOCATION_ID=01fdad5637dd4b91bfb0a97c8b64a631 + LANG=C + LANGUAGE=et_EE:et + LC_ALL=C + MACHTYPE=x86_64-pc-linux-gnu + MAIL=/var/mail/root + OPTERR=1 + OPTIND=1 + OSTYPE=linux-gnu + PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path + PBCURRENTCOMMANDLINEOPERATION=build + PBUILDER_OPERATION=build + PBUILDER_PKGDATADIR=/usr/share/pbuilder + PBUILDER_PKGLIBDIR=/usr/lib/pbuilder + PBUILDER_SYSCONFDIR=/etc + PIPESTATUS=([0]="0") + POSIXLY_CORRECT=y + PPID=3907119 PS4='+ ' - PWD='/' - SHELL='/bin/bash' - SHLVL='2' - SUDO_COMMAND='/usr/bin/timeout -k 18.1h 18h /usr/bin/ionice -c 3 /usr/bin/nice /usr/sbin/pbuilder --build --configfile /srv/reproducible-results/rbuild-debian/r-b-build.7BFLl8qJ/pbuilderrc_9ook --distribution bookworm --hookdir /etc/pbuilder/first-build-hooks --debbuildopts -b --basetgz /var/cache/pbuilder/bookworm-reproducible-base.tgz --buildresult /srv/reproducible-results/rbuild-debian/r-b-build.7BFLl8qJ/b1 --logfile b1/build.log librsb_1.3.0.2+dfsg-4.dsc' - SUDO_GID='111' - SUDO_UID='106' - SUDO_USER='jenkins' - TERM='unknown' - TZ='/usr/share/zoneinfo/Etc/GMT+12' - USER='root' - _='/usr/bin/systemd-run' - http_proxy='http://78.137.99.97:3128' + PWD=/ + SHELL=/bin/bash + SHELLOPTS=braceexpand:errexit:hashall:interactive-comments:posix + SHLVL=3 + SUDO_COMMAND='/usr/bin/timeout -k 24.1h 24h /usr/bin/ionice -c 3 /usr/bin/nice -n 11 /usr/bin/unshare --uts -- /usr/sbin/pbuilder --build --configfile /srv/reproducible-results/rbuild-debian/r-b-build.7BFLl8qJ/pbuilderrc_zI7r --distribution bookworm --hookdir /etc/pbuilder/rebuild-hooks --debbuildopts -b --basetgz /var/cache/pbuilder/bookworm-reproducible-base.tgz --buildresult /srv/reproducible-results/rbuild-debian/r-b-build.7BFLl8qJ/b2 --logfile b2/build.log librsb_1.3.0.2+dfsg-4.dsc' + SUDO_GID=111 + SUDO_UID=106 + SUDO_USER=jenkins + TERM=unknown + TZ=/usr/share/zoneinfo/Etc/GMT-14 + UID=0 + USER=root + _='I: set' + http_proxy=http://85.184.249.68:3128 I: uname -a - Linux ionos11-amd64 6.1.0-17-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.1.69-1 (2023-12-30) x86_64 GNU/Linux + Linux i-capture-the-hostname 6.5.0-0.deb12.4-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.5.10-1~bpo12+1 (2023-11-23) x86_64 GNU/Linux I: ls -l /bin total 5632 -rwxr-xr-x 1 root root 1265648 Apr 23 2023 bash @@ -128,15 +160,15 @@ -rwxr-xr-x 1 root root 52112 Sep 20 2022 readlink -rwxr-xr-x 1 root root 72752 Sep 20 2022 rm -rwxr-xr-x 1 root root 56240 Sep 20 2022 rmdir - -rwxr-xr-x 1 root root 27560 Jul 28 23:46 run-parts + -rwxr-xr-x 1 root root 27560 Jul 28 2023 run-parts -rwxr-xr-x 1 root root 126424 Jan 5 2023 sed - lrwxrwxrwx 1 root root 4 Jan 5 2023 sh -> dash + lrwxrwxrwx 1 root root 9 Feb 14 23:45 sh -> /bin/bash -rwxr-xr-x 1 root root 43888 Sep 20 2022 sleep -rwxr-xr-x 1 root root 85008 Sep 20 2022 stty -rwsr-xr-x 1 root root 72000 Mar 23 2023 su -rwxr-xr-x 1 root root 39824 Sep 20 2022 sync -rwxr-xr-x 1 root root 531984 Apr 6 2023 tar - -rwxr-xr-x 1 root root 14520 Jul 28 23:46 tempfile + -rwxr-xr-x 1 root root 14520 Jul 28 2023 tempfile -rwxr-xr-x 1 root root 109616 Sep 20 2022 touch -rwxr-xr-x 1 root root 35664 Sep 20 2022 true -rwxr-xr-x 1 root root 14568 Mar 23 2023 ulockmgr_server @@ -156,7 +188,7 @@ -rwxr-xr-x 1 root root 2206 Apr 10 2022 zless -rwxr-xr-x 1 root root 1842 Apr 10 2022 zmore -rwxr-xr-x 1 root root 4577 Apr 10 2022 znew -I: user script /srv/workspace/pbuilder/2338948/tmp/hooks/D02_print_environment finished +I: user script /srv/workspace/pbuilder/3907119/tmp/hooks/D02_print_environment finished -> Attempting to satisfy build-dependencies -> Creating pbuilder-satisfydepends-dummy package Package: pbuilder-satisfydepends-dummy @@ -357,7 +389,7 @@ Get: 153 http://deb.debian.org/debian bookworm/main amd64 libltdl-dev amd64 2.4.7-5 [164 kB] Get: 154 http://deb.debian.org/debian bookworm/main amd64 libhwloc-dev amd64 2.9.0-1 [241 kB] Get: 155 http://deb.debian.org/debian bookworm/main amd64 zlib1g-dev amd64 1:1.2.13.dfsg-1 [916 kB] -Fetched 343 MB in 1min 33s (3701 kB/s) +Fetched 343 MB in 9s (36.9 MB/s) debconf: delaying package configuration, since apt-utils is not installed Selecting previously unselected package liblocale-gettext-perl. (Reading database ... (Reading database ... 5% (Reading database ... 10% (Reading database ... 15% (Reading database ... 20% (Reading database ... 25% (Reading database ... 30% (Reading database ... 35% (Reading database ... 40% (Reading database ... 45% (Reading database ... 50% (Reading database ... 55% (Reading database ... 60% (Reading database ... 65% (Reading database ... 70% (Reading database ... 75% (Reading database ... 80% (Reading database ... 85% (Reading database ... 90% (Reading database ... 95% (Reading database ... 100% (Reading database ... 18148 files and directories currently installed.) @@ -1015,7 +1047,11 @@ Building tag database... -> Finished parsing the build-deps I: Building the package -I: Running cd /build/reproducible-path/librsb-1.3.0.2+dfsg/ && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games" HOME="/nonexistent/first-build" dpkg-buildpackage -us -uc -b && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games" HOME="/nonexistent/first-build" dpkg-genchanges -S > ../librsb_1.3.0.2+dfsg-4_source.changes +I: user script /srv/workspace/pbuilder/3907119/tmp/hooks/A99_set_merged_usr starting +Not re-configuring usrmerge for bookworm +I: user script /srv/workspace/pbuilder/3907119/tmp/hooks/A99_set_merged_usr finished +hostname: Name or service not known +I: Running cd /build/reproducible-path/librsb-1.3.0.2+dfsg/ && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path" HOME="/nonexistent/second-build" dpkg-buildpackage -us -uc -b && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path" HOME="/nonexistent/second-build" dpkg-genchanges -S > ../librsb_1.3.0.2+dfsg-4_source.changes dpkg-buildpackage: info: source package librsb dpkg-buildpackage: info: source version 1.3.0.2+dfsg-4 dpkg-buildpackage: info: source distribution unstable @@ -1035,62 +1071,62 @@ dh binary --parallel dh_update_autotools_config dh_autoreconf -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found libtoolize: putting auxiliary files in '.'. libtoolize: copying file './ltmain.sh' libtoolize: putting macros in AC_CONFIG_MACRO_DIRS, 'm4'. @@ -1132,90 +1168,90 @@ libtoolize: copying file 'm4/ltsugar.m4' libtoolize: copying file 'm4/ltversion.m4' libtoolize: copying file 'm4/lt~obsolete.m4' -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found configure.ac:66: installing './compile' configure.ac:65: installing './missing' Makefile.am: installing './depcomp' @@ -1398,7 +1434,7 @@ checking for m4... m4 checking for gmake... gmake checking for ggrep... /bin/grep -checking for bash... /bin/bash +checking for bash... /bin/sh checking for gsed... /bin/sed checking for cmp... cmp checking for basename... basename @@ -1515,7 +1551,7 @@ configure: It appears that Fortran programs can be linked without using the Fortran linker. configure: Using OPENMP_CFLAGS ok for linking an OpenMP program: adding it to LIBS. checking if your have a usable getrusage() ... 1 -checking for /bin/bash... /bin/bash +checking for /bin/sh... /bin/sh configure: Will not use Google Test. configure: You seem to not have GNU Octave or have disabled 'int' type. Part of the test suite will not be generated. If you want more testing capabilities, you should enable the 'int' type as well. checking that generated files are newer than configure... done @@ -1537,7 +1573,7 @@ config.status: executing depfiles commands config.status: executing libtool commands === configuring in librsbpp (/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp) -configure: running /bin/bash ./configure --disable-option-checking '--prefix=/usr' '--build=x86_64-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/x86_64-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=x86_64-linux-gnu' 'CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/bash' --cache-file=/dev/null --srcdir=. +configure: running /bin/sh ./configure --disable-option-checking '--prefix=/usr' '--build=x86_64-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/x86_64-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=x86_64-linux-gnu' 'CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/sh' --cache-file=/dev/null --srcdir=. checking for a BSD-compatible install... /usr/bin/install -c checking whether build environment is sane... yes checking for a race-free mkdir -p... /bin/mkdir -p @@ -1659,7 +1695,7 @@ configure: Will not use Google Test. checking whether you have std::thread... yes checking whether you have std::mutex... yes -/bin/bash +/bin/sh checking that generated files are newer than configure... done configure: creating ./config.status config.status: creating Makefile @@ -1668,7 +1704,7 @@ config.status: executing libtool commands configure: Successfully created a Makefile. === configuring in rsblib (/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib) -configure: running /bin/bash ./configure --disable-option-checking '--prefix=/usr' '--build=x86_64-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/x86_64-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=x86_64-linux-gnu' 'CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/bash' --cache-file=/dev/null --srcdir=. +configure: running /bin/sh ./configure --disable-option-checking '--prefix=/usr' '--build=x86_64-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/x86_64-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=x86_64-linux-gnu' 'CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/sh' --cache-file=/dev/null --srcdir=. checking for a BSD-compatible install... /usr/bin/install -c checking whether build environment is sane... yes checking for a race-free mkdir -p... /bin/mkdir -p @@ -1777,7 +1813,7 @@ checking for filesystem... yes checking for main in -lstdc++fs... yes configure: Assuming you are yet to build librsb.la. (set LIBS= -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la) -/bin/bash +/bin/sh checking that generated files are newer than configure... done configure: creating ./config.status config.status: creating Makefile @@ -1787,7 +1823,7 @@ config.status: executing libtool commands configure: Created a Makefile. === configuring in rsbtest (/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest) -configure: running /bin/bash ./configure --disable-option-checking '--prefix=/usr' '--build=x86_64-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/x86_64-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=x86_64-linux-gnu' 'CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/bash' --cache-file=/dev/null --srcdir=. +configure: running /bin/sh ./configure --disable-option-checking '--prefix=/usr' '--build=x86_64-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/x86_64-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=x86_64-linux-gnu' 'CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/sh' --cache-file=/dev/null --srcdir=. checking for a BSD-compatible install... /usr/bin/install -c checking whether build environment is sane... yes checking for a race-free mkdir -p... /bin/mkdir -p @@ -1898,7 +1934,7 @@ checking for filesystem... yes checking for rsb_lib_init... no checking for dd... yes -/bin/bash +/bin/sh /usr/bin/timeout checking that generated files are newer than configure... done configure: creating ./config.status @@ -1930,7 +1966,7 @@ ARFLAGS : cru M4 : m4 MAKE : gmake - BASH : /bin/bash + BASH : /bin/sh OCTAVE : false DOXYGEN : doxygen HELP2MAN : help2man @@ -1974,31 +2010,31 @@ debian/rules override_dh_auto_build make[1]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' dh_auto_build - make -j15 + make -j16 make[2]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' gmake all-recursive gmake[3]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' Making all in librsbpp gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp' g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbpp.o rsbpp.cpp -/bin/bash ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_coo.lo rsbpp_coo.cpp -/bin/bash ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_csr.lo rsbpp_csr.cpp +/bin/sh ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_coo.lo rsbpp_coo.cpp +/bin/sh ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_csr.lo rsbpp_csr.cpp g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbtt.o rsbtt.cpp gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -c -o rsbct.o rsbct.c libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c rsbpp_coo.cpp -fPIC -DPIC -o .libs/rsbpp_coo.o libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c rsbpp_csr.cpp -fPIC -DPIC -o .libs/rsbpp_csr.o libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c rsbpp_coo.cpp -o rsbpp_coo.o >/dev/null 2>&1 libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c rsbpp_csr.cpp -o rsbpp_csr.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o librsbpp.la rsbpp_coo.lo rsbpp_csr.lo -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o librsbpp.la rsbpp_coo.lo rsbpp_csr.lo -lpthread -lstdc++fs libtool: link: ar cr .libs/librsbpp.a .libs/rsbpp_coo.o .libs/rsbpp_csr.o libtool: link: ranlib .libs/librsbpp.a libtool: link: ( cd ".libs" && rm -f "librsbpp.la" && ln -s "../librsbpp.la" "librsbpp.la" ) -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbpp rsbpp.o librsbpp.la -lpthread -lstdc++fs -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbtt rsbtt.o librsbpp.la -lpthread -lstdc++fs -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbct rsbct.o librsbpp.la -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbpp rsbpp.o librsbpp.la -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbtt rsbtt.o librsbpp.la -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbct rsbct.o librsbpp.la -lpthread -lstdc++fs libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z -Wl,relro -o rsbpp rsbpp.o ./.libs/librsbpp.a -lpthread -lstdc++fs -fopenmp -libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z -Wl,relro -o rsbct rsbct.o ./.libs/librsbpp.a -lpthread -lstdc++fs -fopenmp libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z -Wl,relro -o rsbtt rsbtt.o ./.libs/librsbpp.a -lpthread -lstdc++fs -fopenmp +libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z -Wl,relro -o rsbct rsbct.o ./.libs/librsbpp.a -lpthread -lstdc++fs -fopenmp gmake all-am gmake[5]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp' gmake[5]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp' @@ -2016,140 +2052,175 @@ gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o rsbench-rsb_libspblas_tests.o `test -f 'rsb_libspblas_tests.c' || echo './'`rsb_libspblas_tests.c g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -c -o rsb_dummy.o rsb_dummy.cpp gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o ch2icfb-ch2icfb.o `test -f 'ch2icfb.c' || echo './'`ch2icfb.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_stropts.lo `test -f 'rsb_stropts.c' || echo './'`rsb_stropts.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_strmif.lo `test -f 'rsb_strmif.c' || echo './'`rsb_strmif.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_unroll.lo `test -f 'rsb_unroll.c' || echo './'`rsb_unroll.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_vb.lo `test -f 'rsb_krnl_vb.c' || echo './'`rsb_krnl_vb.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_stropts.lo `test -f 'rsb_stropts.c' || echo './'`rsb_stropts.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_strmif.lo `test -f 'rsb_strmif.c' || echo './'`rsb_strmif.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_unroll.lo `test -f 'rsb_unroll.c' || echo './'`rsb_unroll.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_vb.lo `test -f 'rsb_krnl_vb.c' || echo './'`rsb_krnl_vb.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_lb.lo `test -f 'rsb_krnl_lb.c' || echo './'`rsb_krnl_lb.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_unroll.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_unroll.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_vb.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_vb.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_strmif.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_strmif.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_stropts.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_stropts.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_lb.lo `test -f 'rsb_krnl_lb.c' || echo './'`rsb_krnl_lb.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl.lo `test -f 'rsb_krnl.c' || echo './'`rsb_krnl.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_bench.lo `test -f 'rsb_bench.c' || echo './'`rsb_bench.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_strmif.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_strmif.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_vb.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_vb.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_lb.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_lb.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl.lo `test -f 'rsb_krnl.c' || echo './'`rsb_krnl.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_unroll.c -o librsb_nounroll_la-rsb_unroll.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_bench.lo `test -f 'rsb_bench.c' || echo './'`rsb_bench.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_mergesort.lo `test -f 'rsb_mergesort.c' || echo './'`rsb_mergesort.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_mergesort.lo `test -f 'rsb_mergesort.c' || echo './'`rsb_mergesort.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_vb.c -o librsb_nounroll_la-rsb_krnl_vb.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_bench.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_bench.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_permute.lo `test -f 'rsb_permute.c' || echo './'`rsb_permute.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_lb.c -o librsb_nounroll_la-rsb_krnl_lb.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mergesort.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_mergesort.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_unroll.c -o librsb_nounroll_la-rsb_unroll.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_permute.lo `test -f 'rsb_permute.c' || echo './'`rsb_permute.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_l.lo `test -f 'rsb_krnl_bcss_l.c' || echo './'`rsb_krnl_bcss_l.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_vb.c -o librsb_nounroll_la-rsb_krnl_vb.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_stropts.c -o librsb_nounroll_la-rsb_stropts.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_u.lo `test -f 'rsb_krnl_bcss_u.c' || echo './'`rsb_krnl_bcss_u.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_permute.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_permute.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_lb.c -o librsb_nounroll_la-rsb_krnl_lb.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_strmif.c -o librsb_nounroll_la-rsb_strmif.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mergesort.c -o librsb_nounroll_la-rsb_mergesort.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_l.lo `test -f 'rsb_krnl_bcss_l.c' || echo './'`rsb_krnl_bcss_l.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_u.lo `test -f 'rsb_krnl_bcss_u.c' || echo './'`rsb_krnl_bcss_u.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_l.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_l.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo `test -f 'rsb_krnl_bcss_spsv_u.c' || echo './'`rsb_krnl_bcss_spsv_u.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_strmif.c -o librsb_nounroll_la-rsb_strmif.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo `test -f 'rsb_krnl_bcss_spsv_u.c' || echo './'`rsb_krnl_bcss_spsv_u.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_u.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo `test -f 'rsb_krnl_bcss_spmv_u.c' || echo './'`rsb_krnl_bcss_spmv_u.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo `test -f 'rsb_krnl_bcss_misc_u.c' || echo './'`rsb_krnl_bcss_misc_u.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo `test -f 'rsb_krnl_bcoo_spmv_u.c' || echo './'`rsb_krnl_bcoo_spmv_u.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo `test -f 'rsb_krnl_bcss_spmv_u.c' || echo './'`rsb_krnl_bcss_spmv_u.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mergesort.c -o librsb_nounroll_la-rsb_mergesort.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo `test -f 'rsb_krnl_bcss_misc_u.c' || echo './'`rsb_krnl_bcss_misc_u.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_spsv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_spsv_u.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_l.c -o librsb_nounroll_la-rsb_krnl_bcss_l.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_spmv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_spmv_u.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_misc_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_misc_u.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcoo_spmv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_l.c -o librsb_nounroll_la-rsb_krnl_bcss_l.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo `test -f 'rsb_krnl_bcoo_spmv_u.c' || echo './'`rsb_krnl_bcoo_spmv_u.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_u.c -o librsb_nounroll_la-rsb_krnl_bcss_u.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss.lo `test -f 'rsb_krnl_bcss.c' || echo './'`rsb_krnl_bcss.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spmv.lo `test -f 'rsb_spmv.c' || echo './'`rsb_spmv.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_merge.lo `test -f 'rsb_merge.c' || echo './'`rsb_merge.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss.lo `test -f 'rsb_krnl_bcss.c' || echo './'`rsb_krnl_bcss.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spmv.lo `test -f 'rsb_spmv.c' || echo './'`rsb_spmv.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_merge.lo `test -f 'rsb_merge.c' || echo './'`rsb_merge.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcoo_spmv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_ompio.lo `test -f 'rsb_ompio.c' || echo './'`rsb_ompio.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_ompio.lo `test -f 'rsb_ompio.c' || echo './'`rsb_ompio.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spmv.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spmv.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_util.lo `test -f 'rsb_util.c' || echo './'`rsb_util.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_merge.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_merge.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_ompio.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_ompio.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_util.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_util.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_util.lo `test -f 'rsb_util.c' || echo './'`rsb_util.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss.c -o librsb_nounroll_la-rsb_krnl_bcss.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_util.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_util.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_ompio.c -o librsb_nounroll_la-rsb_ompio.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spgemm_csr.lo `test -f 'rsb_spgemm_csr.c' || echo './'`rsb_spgemm_csr.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spsum_misc.lo `test -f 'rsb_spsum_misc.c' || echo './'`rsb_spsum_misc.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum_misc.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spsum_misc.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spgemm_csr.lo `test -f 'rsb_spgemm_csr.c' || echo './'`rsb_spgemm_csr.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spsum_misc.lo `test -f 'rsb_spsum_misc.c' || echo './'`rsb_spsum_misc.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm_csr.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spgemm_csr.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_prec.lo `test -f 'rsb_prec.c' || echo './'`rsb_prec.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spmv.c -o librsb_nounroll_la-rsb_spmv.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_bench.c -o librsb_nounroll_la-rsb_bench.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum_misc.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spsum_misc.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_merge.c -o librsb_nounroll_la-rsb_merge.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_prec.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_prec.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_bench.c -o librsb_nounroll_la-rsb_bench.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_permute.c -o librsb_nounroll_la-rsb_permute.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_prec.c -o librsb_nounroll_la-rsb_prec.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm_csr.c -o librsb_nounroll_la-rsb_spgemm_csr.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_prec.lo `test -f 'rsb_prec.c' || echo './'`rsb_prec.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum_misc.c -o librsb_nounroll_la-rsb_spsum_misc.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_is.lo `test -f 'rsb_is.c' || echo './'`rsb_is.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_prec.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_prec.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mio.lo `test -f 'rsb_mio.c' || echo './'`rsb_mio.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_is.lo `test -f 'rsb_is.c' || echo './'`rsb_is.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mio.lo `test -f 'rsb_mio.c' || echo './'`rsb_mio.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_is.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_is.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_op.lo `test -f 'rsb_op.c' || echo './'`rsb_op.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mio.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_mio.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_prec.c -o librsb_nounroll_la-rsb_prec.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_op.lo `test -f 'rsb_op.c' || echo './'`rsb_op.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_is.c -o librsb_base_la-rsb_is.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_op.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_op.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum_misc.c -o librsb_nounroll_la-rsb_spsum_misc.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_op.c -o librsb_base_la-rsb_op.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_bio.lo `test -f 'rsb_bio.c' || echo './'`rsb_bio.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_get.lo `test -f 'rsb_get.c' || echo './'`rsb_get.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_bio.lo `test -f 'rsb_bio.c' || echo './'`rsb_bio.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_get.lo `test -f 'rsb_get.c' || echo './'`rsb_get.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_set.lo `test -f 'rsb_set.c' || echo './'`rsb_set.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_bio.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_bio.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_get.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_get.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_set.lo `test -f 'rsb_set.c' || echo './'`rsb_set.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo.lo `test -f 'rsb_coo.c' || echo './'`rsb_coo.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_is.c -o librsb_base_la-rsb_is.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_set.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_set.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_bio.c -o librsb_base_la-rsb_bio.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mio.c -o librsb_base_la-rsb_mio.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_csr.lo `test -f 'rsb_csr.c' || echo './'`rsb_csr.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo_check.lo `test -f 'rsb_coo_check.c' || echo './'`rsb_coo_check.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo.lo `test -f 'rsb_coo.c' || echo './'`rsb_coo.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_csr.lo `test -f 'rsb_csr.c' || echo './'`rsb_csr.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_set.c -o librsb_base_la-rsb_set.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_csr.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_csr.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo_check.lo `test -f 'rsb_coo_check.c' || echo './'`rsb_coo_check.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mio.c -o librsb_base_la-rsb_mio.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo_symm.lo `test -f 'rsb_coo_symm.c' || echo './'`rsb_coo_symm.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo_check.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo_check.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo_symm.lo `test -f 'rsb_coo_symm.c' || echo './'`rsb_coo_symm.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_idx.lo `test -f 'rsb_idx.c' || echo './'`rsb_idx.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo_symm.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo_symm.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_set.c -o librsb_base_la-rsb_set.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_srt.lo `test -f 'rsb_srt.c' || echo './'`rsb_srt.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_idx.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_idx.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_csr.c -o librsb_base_la-rsb_csr.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srt.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_srt.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo_check.c -o librsb_base_la-rsb_coo_check.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo_symm.c -o librsb_base_la-rsb_coo_symm.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo.c -o librsb_base_la-rsb_coo.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_idx.lo `test -f 'rsb_idx.c' || echo './'`rsb_idx.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_srt.lo `test -f 'rsb_srt.c' || echo './'`rsb_srt.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_srtp.lo `test -f 'rsb_srtp.c' || echo './'`rsb_srtp.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_idx.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_idx.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srt.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_srt.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_src.lo `test -f 'rsb_src.c' || echo './'`rsb_src.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo_symm.c -o librsb_base_la-rsb_coo_symm.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_srtp.lo `test -f 'rsb_srtp.c' || echo './'`rsb_srtp.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srtp.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_srtp.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_test_accuracy.lo `test -f 'rsb_test_accuracy.c' || echo './'`rsb_test_accuracy.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_src.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_src.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_get.c -o librsb_base_la-rsb_get.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_src.lo `test -f 'rsb_src.c' || echo './'`rsb_src.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_test_accuracy.lo `test -f 'rsb_test_accuracy.c' || echo './'`rsb_test_accuracy.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_src.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_src.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_clone.lo `test -f 'rsb_clone.c' || echo './'`rsb_clone.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_test_accuracy.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_test_accuracy.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_clone.lo `test -f 'rsb_clone.c' || echo './'`rsb_clone.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_src.c -o librsb_base_la-rsb_src.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec.lo `test -f 'rsb_rec.c' || echo './'`rsb_rec.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srt.c -o librsb_base_la-rsb_srt.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_clone.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_clone.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rec.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_src.c -o librsb_base_la-rsb_src.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_idx.c -o librsb_base_la-rsb_idx.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srtp.c -o librsb_base_la-rsb_srtp.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_test_accuracy.c -o librsb_base_la-rsb_test_accuracy.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srt.c -o librsb_base_la-rsb_srt.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec.lo `test -f 'rsb_rec.c' || echo './'`rsb_rec.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_idx.c -o librsb_base_la-rsb_idx.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rec.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_render.lo `test -f 'rsb_render.c' || echo './'`rsb_render.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_eps.lo `test -f 'rsb_eps.c' || echo './'`rsb_eps.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_eps.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_eps.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_render.lo `test -f 'rsb_render.c' || echo './'`rsb_render.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_eps.lo `test -f 'rsb_eps.c' || echo './'`rsb_eps.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_msort_up.lo `test -f 'rsb_msort_up.c' || echo './'`rsb_msort_up.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_render.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_render.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_sys.lo `test -f 'rsb_sys.c' || echo './'`rsb_sys.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_eps.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_eps.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_clone.c -o librsb_base_la-rsb_clone.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_msort_up.lo `test -f 'rsb_msort_up.c' || echo './'`rsb_msort_up.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_render.c -o librsb_base_la-rsb_render.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_msort_up.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_msort_up.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_sys.lo `test -f 'rsb_sys.c' || echo './'`rsb_sys.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_sys.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_sys.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_blas_stuff.lo `test -f 'rsb_blas_stuff.c' || echo './'`rsb_blas_stuff.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_gen.lo `test -f 'rsb_gen.c' || echo './'`rsb_gen.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_blas_stuff.lo `test -f 'rsb_blas_stuff.c' || echo './'`rsb_blas_stuff.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_render.c -o librsb_base_la-rsb_render.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_blas_stuff.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_blas_stuff.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_msort_up.c -o librsb_base_la-rsb_msort_up.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_gen.lo `test -f 'rsb_gen.c' || echo './'`rsb_gen.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_sys.c -o librsb_base_la-rsb_sys.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_gen.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_gen.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_perf.lo `test -f 'rsb_perf.c' || echo './'`rsb_perf.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec.c -o librsb_base_la-rsb_rec.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_perf.lo `test -f 'rsb_perf.c' || echo './'`rsb_perf.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_blas_stuff.c -o librsb_base_la-rsb_blas_stuff.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rsb.lo `test -f 'rsb_rsb.c' || echo './'`rsb_rsb.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_eps.c -o librsb_base_la-rsb_eps.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec.c -o librsb_base_la-rsb_rec.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_perf.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_perf.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_sys.c -o librsb_base_la-rsb_sys.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rsb.lo `test -f 'rsb_rsb.c' || echo './'`rsb_rsb.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_err.lo `test -f 'rsb_err.c' || echo './'`rsb_err.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rsb.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rsb.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_err.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_err.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_tune.lo `test -f 'rsb_tune.c' || echo './'`rsb_tune.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_tune.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_tune.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_perf.c -o librsb_base_la-rsb_perf.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_gen.c -o librsb_base_la-rsb_gen.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_err.c -o librsb_base_la-rsb_err.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_eps.c -o librsb_base_la-rsb_eps.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_do.lo `test -f 'rsb_do.c' || echo './'`rsb_do.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rsb.c -o librsb_base_la-rsb_rsb.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_do.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_do.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_internals.lo `test -f 'rsb_internals.c' || echo './'`rsb_internals.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_garbage.lo `test -f 'rsb_garbage.c' || echo './'`rsb_garbage.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mmio.lo `test -f 'rsb_mmio.c' || echo './'`rsb_mmio.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_internals.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_internals.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_garbage.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_garbage.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_partition.lo `test -f 'rsb_partition.c' || echo './'`rsb_partition.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mmio.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_mmio.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_partition.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_partition.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_garbage.c -o librsb_base_la-rsb_garbage.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mmio.c -o librsb_base_la-rsb_mmio.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mbw.lo `test -f 'rsb_mbw.c' || echo './'`rsb_mbw.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_partition.c -o librsb_base_la-rsb_partition.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_limiter.lo `test -f 'rsb_limiter.c' || echo './'`rsb_limiter.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_do.c -o librsb_base_la-rsb_do.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mbw.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_mbw.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_fpb.lo `test -f 'rsb_fpb.c' || echo './'`rsb_fpb.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_limiter.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_limiter.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spgemm.lo `test -f 'rsb_spgemm.c' || echo './'`rsb_spgemm.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_fpb.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_fpb.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spsum.lo `test -f 'rsb_spsum.c' || echo './'`rsb_spsum.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spgemm.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spsum.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_limiter.c -o librsb_base_la-rsb_limiter.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spsv.lo `test -f 'rsb_spsv.c' || echo './'`rsb_spsv.c rsb_test_matops.c: In function 'rsb__main_block_partitioned_spmv_sxsa': rsb_test_matops.c:214:41: warning: '%s' directive writing up to 2047 bytes into a region of size 1011 [-Wformat-overflow=] 214 | rsb__sprintf(dst,"%s%s_%s_%.0lf_%s%s" "%s%s%s" "%s%s" "%s",pp?pp:"",h,rsb__getenv_nnr("HOSTNAME"),rsb_time(),buf,ap?ap:"", @@ -2168,113 +2239,80 @@ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 32 | __va_arg_pack ()); | ~~~~~~~~~~~~~~~~~ -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rsb.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rsb.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_err.lo `test -f 'rsb_err.c' || echo './'`rsb_err.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_perf.c -o librsb_base_la-rsb_perf.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_err.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_err.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_gen.c -o librsb_base_la-rsb_gen.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rsb.c -o librsb_base_la-rsb_rsb.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_err.c -o librsb_base_la-rsb_err.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_tune.lo `test -f 'rsb_tune.c' || echo './'`rsb_tune.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_do.lo `test -f 'rsb_do.c' || echo './'`rsb_do.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_tune.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_tune.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_do.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_do.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_internals.lo `test -f 'rsb_internals.c' || echo './'`rsb_internals.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_garbage.lo `test -f 'rsb_garbage.c' || echo './'`rsb_garbage.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mmio.lo `test -f 'rsb_mmio.c' || echo './'`rsb_mmio.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_partition.lo `test -f 'rsb_partition.c' || echo './'`rsb_partition.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_internals.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_internals.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_garbage.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_garbage.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mmio.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_mmio.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_partition.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_partition.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mbw.lo `test -f 'rsb_mbw.c' || echo './'`rsb_mbw.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_limiter.lo `test -f 'rsb_limiter.c' || echo './'`rsb_limiter.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mbw.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_mbw.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_limiter.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_limiter.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_partition.c -o librsb_base_la-rsb_partition.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_garbage.c -o librsb_base_la-rsb_garbage.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mmio.c -o librsb_base_la-rsb_mmio.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_spsv_u.c -o librsb_nounroll_la-rsb_krnl_bcss_spsv_u.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_do.c -o librsb_base_la-rsb_do.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_limiter.c -o librsb_base_la-rsb_limiter.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_fpb.lo `test -f 'rsb_fpb.c' || echo './'`rsb_fpb.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spgemm.lo `test -f 'rsb_spgemm.c' || echo './'`rsb_spgemm.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spsum.lo `test -f 'rsb_spsum.c' || echo './'`rsb_spsum.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_fpb.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_fpb.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spgemm.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spsum.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spsv.lo `test -f 'rsb_spsv.c' || echo './'`rsb_spsv.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_tune.c -o librsb_base_la-rsb_tune.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_internals.c -o librsb_base_la-rsb_internals.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsv.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spsv.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mbw.c -o librsb_base_la-rsb_mbw.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum.c -o librsb_base_la-rsb_spsum.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_fpb.c -o librsb_base_la-rsb_fpb.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_lock.lo `test -f 'rsb_lock.c' || echo './'`rsb_lock.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_lock.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_lock.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_lock.lo `test -f 'rsb_lock.c' || echo './'`rsb_lock.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm.c -o librsb_base_la-rsb_spgemm.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_internals.c -o librsb_base_la-rsb_internals.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_swt.lo `test -f 'rsb_swt.c' || echo './'`rsb_swt.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsv.c -o librsb_base_la-rsb_spsv.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_init.lo `test -f 'rsb_init.c' || echo './'`rsb_init.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_lock.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_lock.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mbw.c -o librsb_base_la-rsb_mbw.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_swt.lo `test -f 'rsb_swt.c' || echo './'`rsb_swt.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_swt.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_swt.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsv.c -o librsb_base_la-rsb_spsv.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_lock.c -o librsb_base_la-rsb_lock.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_init.lo `test -f 'rsb_init.c' || echo './'`rsb_init.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_init.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_init.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_dump.lo `test -f 'rsb_dump.c' || echo './'`rsb_dump.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_dump.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_dump.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_cpmv.lo `test -f 'rsb_cpmv.c' || echo './'`rsb_cpmv.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_swt.c -o librsb_base_la-rsb_swt.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_dump.lo `test -f 'rsb_dump.c' || echo './'`rsb_dump.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_cpmv.lo `test -f 'rsb_cpmv.c' || echo './'`rsb_cpmv.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_asm.lo `test -f 'rsb_asm.c' || echo './'`rsb_asm.c -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_user.lo `test -f 'rsb_user.c' || echo './'`rsb_user.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_cpmv.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_cpmv.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_dump.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_dump.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_asm.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_asm.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_user.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_user.o +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_asm.lo `test -f 'rsb_asm.c' || echo './'`rsb_asm.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_spsv_u.c -o librsb_nounroll_la-rsb_krnl_bcss_spsv_u.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_dump.c -o librsb_base_la-rsb_dump.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_init.c -o librsb_base_la-rsb_init.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_asm.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_asm.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_cpmv.c -o librsb_base_la-rsb_cpmv.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo2rec.lo `test -f 'rsb_coo2rec.c' || echo './'`rsb_coo2rec.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_asm.c -o librsb_base_la-rsb_asm.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_user.lo `test -f 'rsb_user.c' || echo './'`rsb_user.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo2rec.lo `test -f 'rsb_coo2rec.c' || echo './'`rsb_coo2rec.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_user.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_user.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo2rec.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo2rec.o -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_dump.c -o librsb_base_la-rsb_dump.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec2coo.lo `test -f 'rsb_rec2coo.c' || echo './'`rsb_rec2coo.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_user.c -o librsb_base_la-rsb_user.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec2csr.lo `test -f 'rsb_rec2csr.c' || echo './'`rsb_rec2csr.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec2coo.lo `test -f 'rsb_rec2coo.c' || echo './'`rsb_rec2coo.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec2csr.lo `test -f 'rsb_rec2csr.c' || echo './'`rsb_rec2csr.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_asm.c -o librsb_base_la-rsb_asm.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_csr2coo.lo `test -f 'rsb_csr2coo.c' || echo './'`rsb_csr2coo.c +/bin/sh ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c -o rsb_blas_sparse.lo rsb_blas_sparse.F90 +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o rsb_libspblas_handle.lo rsb_libspblas_handle.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec2coo.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rec2coo.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_csr2coo.lo `test -f 'rsb_csr2coo.c' || echo './'`rsb_csr2coo.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o rsb_libspblas.lo rsb_libspblas.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec2csr.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rec2csr.o -/bin/bash ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c -o rsb_blas_sparse.lo rsb_blas_sparse.F90 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_csr2coo.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_csr2coo.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas_handle.c -fPIC -DPIC -o .libs/rsb_libspblas_handle.o libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c rsb_blas_sparse.F90 -fPIC -o .libs/rsb_blas_sparse.o -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o rsb_libspblas_handle.lo rsb_libspblas_handle.c -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec2csr.c -o librsb_base_la-rsb_rec2csr.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o rsb_libspblas.lo rsb_libspblas.c +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas.c -fPIC -DPIC -o .libs/rsb_libspblas.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_user.c -o librsb_base_la-rsb_user.o >/dev/null 2>&1 gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c -o rsb_blas_sparse.o rsb_blas_sparse.F90 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec2coo.c -o librsb_base_la-rsb_rec2coo.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas_handle.c -fPIC -DPIC -o .libs/rsb_libspblas_handle.o +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec2csr.c -o librsb_base_la-rsb_rec2csr.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_csr2coo.c -o librsb_base_la-rsb_csr2coo.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas.c -fPIC -DPIC -o .libs/rsb_libspblas.o -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o ch2icfb ch2icfb-ch2icfb.o -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o ch2icfb ch2icfb-ch2icfb.o -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info' is ignored for programs libtool: link: gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -Wl,-z -Wl,relro -o ch2icfb ch2icfb-ch2icfb.o -fopenmp -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp if test -f ./rsb_types.h -a ! -f ./rsb_types.h ; then cp -pv ./rsb_types.h ./rsb_types.h ; fi # out-of-dir behaviour varies between installations -if test -f ch2icfb ; then if SED=/bin/sed GREP=/bin/grep /bin/bash ./scripts/rsb_h_to_rsb_fi.sh . /build/reproducible-path/librsb-1.3.0.2+dfsg > rsb.F90 ; then true; else rm "rsb.F90"; echo "Failure generating rsb.F90!" ; exit ; fi ; else echo "Warning: Your system did not build ch2icfb for some reason --- skipping rebuild of "rsb.F90 ;true ; fi -/bin/bash ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c -o rsb.lo rsb.F90 +if test -f ch2icfb ; then if SED=/bin/sed GREP=/bin/grep /bin/sh ./scripts/rsb_h_to_rsb_fi.sh . /build/reproducible-path/librsb-1.3.0.2+dfsg > rsb.F90 ; then true; else rm "rsb.F90"; echo "Failure generating rsb.F90!" ; exit ; fi ; else echo "Warning: Your system did not build ch2icfb for some reason --- skipping rebuild of "rsb.F90 ;true ; fi +/bin/sh ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c -o rsb.lo rsb.F90 gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c -o rsb.o rsb.F90 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo2rec.c -o librsb_base_la-rsb_coo2rec.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas_handle.c -o rsb_libspblas_handle.o >/dev/null 2>&1 libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c rsb.F90 -fPIC -o .libs/rsb.o libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c rsb.F90 -o rsb.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas_handle.c -o rsb_libspblas_handle.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas.c -o rsb_libspblas.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_util.c -o librsb_nounroll_la-rsb_util.o >/dev/null 2>&1 libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c rsb_blas_sparse.F90 -o rsb_blas_sparse.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_spblas.la rsb_libspblas_handle.lo rsb_libspblas.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_util.c -o librsb_nounroll_la-rsb_util.o >/dev/null 2>&1 +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_spblas.la rsb_libspblas_handle.lo rsb_libspblas.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info/-version-number' is ignored for convenience libraries libtool: link: ar cr .libs/librsb_spblas.a .libs/rsb_libspblas_handle.o .libs/rsb_libspblas.o libtool: link: ranlib .libs/librsb_spblas.a libtool: link: ( cd ".libs" && rm -f "librsb_spblas.la" && ln -s "../librsb_spblas.la" "librsb_spblas.la" ) -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_base.la librsb_base_la-rsb_is.lo librsb_base_la-rsb_mio.lo librsb_base_la-rsb_op.lo librsb_base_la-rsb_bio.lo librsb_base_la-rsb_get.lo librsb_base_la-rsb_set.lo librsb_base_la-rsb_coo.lo librsb_base_la-rsb_csr.lo librsb_base_la-rsb_coo_check.lo librsb_base_la-rsb_coo_symm.lo librsb_base_la-rsb_idx.lo librsb_base_la-rsb_srt.lo librsb_base_la-rsb_srtp.lo librsb_base_la-rsb_src.lo librsb_base_la-rsb_test_accuracy.lo librsb_base_la-rsb_clone.lo librsb_base_la-rsb_rec.lo librsb_base_la-rsb_render.lo librsb_base_la-rsb_eps.lo librsb_base_la-rsb_msort_up.lo librsb_base_la-rsb_sys.lo librsb_base_la-rsb_blas_stuff.lo librsb_base_la-rsb_gen.lo librsb_base_la-rsb_perf.lo librsb_base_la-rsb_rsb.lo librsb_base_la-rsb_err.lo librsb_base_la-rsb_tune.lo librsb_base_la-rsb_do.lo librsb_base_la-rsb_internals.lo librsb_base_la-rsb_garbage.lo librsb_base_la-rsb_mmio.lo librsb_base_la-rsb_partition.lo librsb_base_la-rsb_mbw.lo librsb_base_la-rsb_limiter.lo librsb_base_la-rsb_fpb.lo librsb_base_la-rsb_spgemm.lo librsb_base_la-rsb_spsum.lo librsb_base_la-rsb_spsv.lo librsb_base_la-rsb_lock.lo librsb_base_la-rsb_swt.lo librsb_base_la-rsb_init.lo librsb_base_la-rsb_dump.lo librsb_base_la-rsb_cpmv.lo librsb_base_la-rsb_asm.lo librsb_base_la-rsb_user.lo librsb_base_la-rsb_coo2rec.lo librsb_base_la-rsb_rec2coo.lo librsb_base_la-rsb_rec2csr.lo librsb_base_la-rsb_csr2coo.lo rsb_blas_sparse.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_base.la librsb_base_la-rsb_is.lo librsb_base_la-rsb_mio.lo librsb_base_la-rsb_op.lo librsb_base_la-rsb_bio.lo librsb_base_la-rsb_get.lo librsb_base_la-rsb_set.lo librsb_base_la-rsb_coo.lo librsb_base_la-rsb_csr.lo librsb_base_la-rsb_coo_check.lo librsb_base_la-rsb_coo_symm.lo librsb_base_la-rsb_idx.lo librsb_base_la-rsb_srt.lo librsb_base_la-rsb_srtp.lo librsb_base_la-rsb_src.lo librsb_base_la-rsb_test_accuracy.lo librsb_base_la-rsb_clone.lo librsb_base_la-rsb_rec.lo librsb_base_la-rsb_render.lo librsb_base_la-rsb_eps.lo librsb_base_la-rsb_msort_up.lo librsb_base_la-rsb_sys.lo librsb_base_la-rsb_blas_stuff.lo librsb_base_la-rsb_gen.lo librsb_base_la-rsb_perf.lo librsb_base_la-rsb_rsb.lo librsb_base_la-rsb_err.lo librsb_base_la-rsb_tune.lo librsb_base_la-rsb_do.lo librsb_base_la-rsb_internals.lo librsb_base_la-rsb_garbage.lo librsb_base_la-rsb_mmio.lo librsb_base_la-rsb_partition.lo librsb_base_la-rsb_mbw.lo librsb_base_la-rsb_limiter.lo librsb_base_la-rsb_fpb.lo librsb_base_la-rsb_spgemm.lo librsb_base_la-rsb_spsum.lo librsb_base_la-rsb_spsv.lo librsb_base_la-rsb_lock.lo librsb_base_la-rsb_swt.lo librsb_base_la-rsb_init.lo librsb_base_la-rsb_dump.lo librsb_base_la-rsb_cpmv.lo librsb_base_la-rsb_asm.lo librsb_base_la-rsb_user.lo librsb_base_la-rsb_coo2rec.lo librsb_base_la-rsb_rec2coo.lo librsb_base_la-rsb_rec2csr.lo librsb_base_la-rsb_csr2coo.lo rsb_blas_sparse.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info/-version-number' is ignored for convenience libraries libtool: link: ar cr .libs/librsb_base.a .libs/librsb_base_la-rsb_is.o .libs/librsb_base_la-rsb_mio.o .libs/librsb_base_la-rsb_op.o .libs/librsb_base_la-rsb_bio.o .libs/librsb_base_la-rsb_get.o .libs/librsb_base_la-rsb_set.o .libs/librsb_base_la-rsb_coo.o .libs/librsb_base_la-rsb_csr.o .libs/librsb_base_la-rsb_coo_check.o .libs/librsb_base_la-rsb_coo_symm.o .libs/librsb_base_la-rsb_idx.o .libs/librsb_base_la-rsb_srt.o .libs/librsb_base_la-rsb_srtp.o .libs/librsb_base_la-rsb_src.o .libs/librsb_base_la-rsb_test_accuracy.o .libs/librsb_base_la-rsb_clone.o .libs/librsb_base_la-rsb_rec.o .libs/librsb_base_la-rsb_render.o .libs/librsb_base_la-rsb_eps.o .libs/librsb_base_la-rsb_msort_up.o .libs/librsb_base_la-rsb_sys.o .libs/librsb_base_la-rsb_blas_stuff.o .libs/librsb_base_la-rsb_gen.o .libs/librsb_base_la-rsb_perf.o .libs/librsb_base_la-rsb_rsb.o .libs/librsb_base_la-rsb_err.o .libs/librsb_base_la-rsb_tune.o .libs/librsb_base_la-rsb_do.o .libs/librsb_base_la-rsb_internals.o .libs/librsb_base_la-rsb_garbage.o .libs/librsb_base_la-rsb_mmio.o .libs/librsb_base_la-rsb_partition.o .libs/librsb_base_la-rsb_mbw.o .libs/librsb_base_la-rsb_limiter.o .libs/librsb_base_la-rsb_fpb.o .libs/librsb_base_la-rsb_spgemm.o .libs/librsb_base_la-rsb_spsum.o .libs/librsb_base_la-rsb_spsv.o .libs/librsb_base_la-rsb_lock.o .libs/librsb_base_la-rsb_swt.o .libs/librsb_base_la-rsb_init.o .libs/librsb_base_la-rsb_dump.o .libs/librsb_base_la-rsb_cpmv.o .libs/librsb_base_la-rsb_asm.o .libs/librsb_base_la-rsb_user.o .libs/librsb_base_la-rsb_coo2rec.o .libs/librsb_base_la-rsb_rec2coo.o .libs/librsb_base_la-rsb_rec2csr.o .libs/librsb_base_la-rsb_csr2coo.o .libs/rsb_blas_sparse.o libtool: link: ranlib .libs/librsb_base.a libtool: link: ( cd ".libs" && rm -f "librsb_base.la" && ln -s "../librsb_base.la" "librsb_base.la" ) +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_misc_u.c -o librsb_nounroll_la-rsb_krnl_bcss_misc_u.o >/dev/null 2>&1 +libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl.c -o librsb_nounroll_la-rsb_krnl.o >/dev/null 2>&1 rsb_test_matops.c: In function 'rsb__main_block_partitioned_spsv_sxsx': rsb_test_matops.c:214:41: warning: '%s' directive writing up to 2047 bytes into a region of size 1011 [-Wformat-overflow=] 214 | rsb__sprintf(dst,"%s%s_%s_%.0lf_%s%s" "%s%s%s" "%s%s" "%s",pp?pp:"",h,rsb__getenv_nnr("HOSTNAME"),rsb_time(),buf,ap?ap:"", @@ -2289,8 +2327,6 @@ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 32 | __va_arg_pack ()); | ~~~~~~~~~~~~~~~~~ -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl.c -o librsb_nounroll_la-rsb_krnl.o >/dev/null 2>&1 -libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_misc_u.c -o librsb_nounroll_la-rsb_krnl_bcss_misc_u.o >/dev/null 2>&1 rsb_test_matops.c: In function 'rsb__main_block_partitioned_mat_stats': rsb_test_matops.c:214:41: warning: '%s' directive writing up to 2047 bytes into a region of size 1011 [-Wformat-overflow=] 214 | rsb__sprintf(dst,"%s%s_%s_%.0lf_%s%s" "%s%s%s" "%s%s" "%s",pp?pp:"",h,rsb__getenv_nnr("HOSTNAME"),rsb_time(),buf,ap?ap:"", @@ -2307,12 +2343,12 @@ | ~~~~~~~~~~~~~~~~~ libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_spmv_u.c -o librsb_nounroll_la-rsb_krnl_bcss_spmv_u.o >/dev/null 2>&1 libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcoo_spmv_u.c -o librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_nounroll.la librsb_nounroll_la-rsb_stropts.lo librsb_nounroll_la-rsb_strmif.lo librsb_nounroll_la-rsb_unroll.lo librsb_nounroll_la-rsb_krnl_vb.lo librsb_nounroll_la-rsb_krnl_lb.lo librsb_nounroll_la-rsb_krnl.lo librsb_nounroll_la-rsb_bench.lo librsb_nounroll_la-rsb_mergesort.lo librsb_nounroll_la-rsb_permute.lo librsb_nounroll_la-rsb_krnl_bcss_l.lo librsb_nounroll_la-rsb_krnl_bcss_u.lo librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss.lo librsb_nounroll_la-rsb_spmv.lo librsb_nounroll_la-rsb_merge.lo librsb_nounroll_la-rsb_ompio.lo librsb_nounroll_la-rsb_util.lo librsb_nounroll_la-rsb_spgemm_csr.lo librsb_nounroll_la-rsb_spsum_misc.lo librsb_nounroll_la-rsb_prec.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_nounroll.la librsb_nounroll_la-rsb_stropts.lo librsb_nounroll_la-rsb_strmif.lo librsb_nounroll_la-rsb_unroll.lo librsb_nounroll_la-rsb_krnl_vb.lo librsb_nounroll_la-rsb_krnl_lb.lo librsb_nounroll_la-rsb_krnl.lo librsb_nounroll_la-rsb_bench.lo librsb_nounroll_la-rsb_mergesort.lo librsb_nounroll_la-rsb_permute.lo librsb_nounroll_la-rsb_krnl_bcss_l.lo librsb_nounroll_la-rsb_krnl_bcss_u.lo librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss.lo librsb_nounroll_la-rsb_spmv.lo librsb_nounroll_la-rsb_merge.lo librsb_nounroll_la-rsb_ompio.lo librsb_nounroll_la-rsb_util.lo librsb_nounroll_la-rsb_spgemm_csr.lo librsb_nounroll_la-rsb_spsum_misc.lo librsb_nounroll_la-rsb_prec.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info/-version-number' is ignored for convenience libraries libtool: link: ar cr .libs/librsb_nounroll.a .libs/librsb_nounroll_la-rsb_stropts.o .libs/librsb_nounroll_la-rsb_strmif.o .libs/librsb_nounroll_la-rsb_unroll.o .libs/librsb_nounroll_la-rsb_krnl_vb.o .libs/librsb_nounroll_la-rsb_krnl_lb.o .libs/librsb_nounroll_la-rsb_krnl.o .libs/librsb_nounroll_la-rsb_bench.o .libs/librsb_nounroll_la-rsb_mergesort.o .libs/librsb_nounroll_la-rsb_permute.o .libs/librsb_nounroll_la-rsb_krnl_bcss_l.o .libs/librsb_nounroll_la-rsb_krnl_bcss_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss_spsv_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss_spmv_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss_misc_u.o .libs/librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss.o .libs/librsb_nounroll_la-rsb_spmv.o .libs/librsb_nounroll_la-rsb_merge.o .libs/librsb_nounroll_la-rsb_ompio.o .libs/librsb_nounroll_la-rsb_util.o .libs/librsb_nounroll_la-rsb_spgemm_csr.o .libs/librsb_nounroll_la-rsb_spsum_misc.o .libs/librsb_nounroll_la-rsb_prec.o libtool: link: ranlib .libs/librsb_nounroll.a libtool: link: ( cd ".libs" && rm -f "librsb_nounroll.la" && ln -s "../librsb_nounroll.la" "librsb_nounroll.la" ) -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb.la -rpath /usr/lib/x86_64-linux-gnu rsb.lo librsb_nounroll.la librsb_base.la librsb_spblas.la /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/librsbpp.la -lstdc++ -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb.la -rpath /usr/lib/x86_64-linux-gnu rsb.lo librsb_nounroll.la librsb_base.la librsb_spblas.la /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/librsbpp.la -lstdc++ -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: link: gcc -shared -fPIC -DPIC .libs/rsb.o -Wl,--whole-archive ./.libs/librsb_nounroll.a ./.libs/librsb_base.a ./.libs/librsb_spblas.a /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/.libs/librsbpp.a -Wl,--no-whole-archive -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lpthread -lstdc++fs -lstdc++ -lhwloc -lz -lgfortran -lm -lquadmath -g -O2 -fstack-protector-strong -O3 -Wl,-z -Wl,relro -fopenmp -fopenmp -Wl,-soname -Wl,librsb.so.0 -o .libs/librsb.so.0.0.0 libtool: link: (cd ".libs" && rm -f "librsb.so.0" && ln -s "librsb.so.0.0.0" "librsb.so.0") libtool: link: (cd ".libs" && rm -f "librsb.so" && ln -s "librsb.so.0.0.0" "librsb.so") @@ -2324,7 +2360,7 @@ libtool: link: ranlib .libs/librsb.a libtool: link: rm -fr .libs/librsb.lax libtool: link: ( cd ".libs" && rm -f "librsb.la" && ln -s "../librsb.la" "librsb.la" ) -/bin/bash ./libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -no-undefined -version-info 0:0:0 -Wl,-z,relro -o rsbench rsbench-rsbench.o rsbench-rsb_test_matops.o rsbench-rsb_mkl.o rsbench-rsb_genmm.o rsbench-rsb_mmls.o rsbench-rsb_pr.o rsbench-rsb_pcnt.o rsbench-rsb_failure_tests.o rsbench-rsb_libspblas_tests.o rsb_dummy.o librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -no-undefined -version-info 0:0:0 -Wl,-z,relro -o rsbench rsbench-rsbench.o rsbench-rsb_test_matops.o rsbench-rsb_mkl.o rsbench-rsb_genmm.o rsbench-rsb_mmls.o rsbench-rsb_pr.o rsbench-rsb_pcnt.o rsbench-rsb_failure_tests.o rsbench-rsb_libspblas_tests.o rsb_dummy.o librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info' is ignored for programs libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/rsbench rsbench-rsbench.o rsbench-rsb_test_matops.o rsbench-rsb_mkl.o rsbench-rsb_genmm.o rsbench-rsb_mmls.o rsbench-rsb_pr.o rsbench-rsb_pcnt.o rsbench-rsb_failure_tests.o rsbench-rsb_libspblas_tests.o rsb_dummy.o -fopenmp ./.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' @@ -2341,27 +2377,27 @@ gfortran -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c -o fortran.o fortran.F90 gfortran -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c -o fortran_rsb_fi.o fortran_rsb_fi.F90 g++ -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -c -o cplusplus.o cplusplus.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o hello-spblas hello-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o transpose transpose.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o io-spblas io-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o hello hello.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o power power.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o backsolve backsolve.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o autotune autotune.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o fortran_rsb_fi fortran_rsb_fi.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o fortran fortran.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/hello-spblas hello-spblas.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp -libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/io-spblas io-spblas.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp -libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/transpose transpose.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o transpose transpose.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o hello hello.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o power power.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o io-spblas io-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o hello-spblas hello-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o backsolve backsolve.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o fortran_rsb_fi fortran_rsb_fi.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o fortran fortran.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o autotune autotune.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o snippets snippets.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/hello hello.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/power power.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp +libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/io-spblas io-spblas.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/backsolve backsolve.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o snippets snippets.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/transpose transpose.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/fortran_rsb_fi fortran_rsb_fi.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp +libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/hello-spblas hello-spblas.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/autotune autotune.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp -libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/fortran fortran.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/snippets snippets.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o cplusplus cplusplus.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/fortran fortran.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o cplusplus cplusplus.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/cplusplus cplusplus.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/x86_64-linux-gnu/12 -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../x86_64-linux-gnu -L/usr/lib/gcc/x86_64-linux-gnu/12/../../../../lib -L/lib/x86_64-linux-gnu -L/lib/../lib -L/usr/lib/x86_64-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/x86_64-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/examples' Making all in scripts @@ -2381,9 +2417,9 @@ /bin/mkdir -p man /bin/mkdir -p man gmake makedox -SOURCE_DATE_EPOCH=1704070861 \ +SOURCE_DATE_EPOCH=1735693261 \ help2man --name="benchmark and test for librsb" --no-info ../rsbench | /bin/sed 's/January //g' > man/rsbench.1 -SOURCE_DATE_EPOCH=1704070861 \ +SOURCE_DATE_EPOCH=1735693261 \ help2man --name="provide configuration information for librsb" --no-info /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb-config | /bin/sed 's/January //g' > man/librsb-config.1 gmake[5]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/doc' DOXYGEN_PROJECT_NUMBER=1.3.0.2 doxygen Doxyfile || echo "are you sure you have doxygen installed ?" @@ -3026,13 +3062,13 @@ g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o rgt-rgt.o `test -f 'rgt.cpp' || echo './'`rgt.cpp g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o rgt_ne-rgt.o `test -f 'rgt.cpp' || echo './'`rgt.cpp g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o rgt_rv-rgt.o `test -f 'rgt.cpp' || echo './'`rgt.cpp -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt rgt-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -/bin/bash ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt_ne rgt_ne-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -/bin/bash ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt_rv rgt_rv-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rgt rgt-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +/bin/sh ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt_ne rgt_ne-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt_rv rgt_rv-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt rgt-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rgt_rv rgt_rv-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rgt rgt-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp libtool: link: g++ -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rgt_ne rgt_ne-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rsb rsb.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rsb rsb.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rsb rsb.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp gmake[6]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib' Making all in examples @@ -3047,25 +3083,25 @@ g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o mtx2bin.o mtx2bin.cpp g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o render.o render.cpp g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o span.o span.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o span span.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/span span.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o assemble assemble.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o render render.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o example example.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/assemble assemble.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o render render.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o example example.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o misc misc.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/render render.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o misc misc.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o span span.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/example example.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o assemble assemble.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/misc misc.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o twonnz twonnz.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o build build.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/twonnz twonnz.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/span span.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/assemble assemble.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o build build.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/build build.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o mtx2bin mtx2bin.o -lstdc++fs -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o mtx2bin mtx2bin.o -lstdc++fs -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/mtx2bin mtx2bin.o -lstdc++fs -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o autotune autotune.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o twonnz twonnz.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/twonnz twonnz.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o autotune autotune.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o bench bench.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/autotune autotune.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o bench bench.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/bench bench.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp gmake[6]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib/examples' gmake[5]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib' @@ -3075,7 +3111,7 @@ gmake all-am gmake[5]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest' g++ -DHAVE_CONFIG_H -I. -I/build/reproducible-path/librsb-1.3.0.2+dfsg -Wdate-time -D_FORTIFY_SOURCE=2 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o rsbtest.o rsbtest.cpp -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rsbtest rsbtest.o /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lstdc++fs -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rsbtest rsbtest.o /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lstdc++fs -lstdc++fs libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rsbtest rsbtest.o /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lstdc++fs -fopenmp gmake[5]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest' gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest' @@ -3151,7 +3187,7 @@ gmake[3]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' gmake mtests -C . gmake[3]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' -srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/bash -ex ./scripts/readme-tests.sh +srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/sh -ex ./scripts/readme-tests.sh + test x/build/reproducible-path/librsb-1.3.0.2+dfsg = x + ./rsbench -oa -Ob --bench -f /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx -qH -R -n1,4 -T z --verbose --nrhs 1,2 --by-rows # --bench option implies -qH -R --write-performance-record --want-mkl-autotune --mkl-benchmark --types : --split-experimental 6 --merge-experimental 6 --also-transpose --sort-filenames-list --want-memory-benchmark @@ -3161,15 +3197,15 @@ Adding matrix file: /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx # Sorting matrices list (use --no-sort-filenames-list to prevent this) # Using matrices: A.mtx -# beginning run at 1705165324 +# beginning run at 1739579122 # /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench -oa -Ob --bench -f /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx -qH -R -n1,4 -T z --verbose --nrhs 1,2 --by-rows # compiled with: CC=gcc CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -# average timer granularity: 5.96e-08 s -# Will write a final performance record to file rsbench_pr__1705165324_gcc-12.2-1,4th.rpr and periodic checkpoints to rsbench_pr__1705165324_gcc-12.2-1,4th.rpr.tmp +# average timer granularity: 6.18e-08 s +# Will write a final performance record to file rsbench_pr__1739579122_gcc-12.2-1,4th.rpr and periodic checkpoints to rsbench_pr__1739579122_gcc-12.2-1,4th.rpr.tmp # will NOT perform ancillary tests. # will flush cache memory: between each operation measurement series, and NOT between each operation. # will keep any zero encountered in the matrix. -# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games +# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path # env: export LD_LIBRARY_PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg/.libs # env: HOSTNAME is not set # env: KMP_AFFINITY is not set @@ -3208,61 +3244,61 @@ # env: SLURM_NTASKS is not set # env: SLURM_STEP_TASKS_PER_NODE is not set # env: SLURM_TASKS_PER_NODE is not set -# detected hostname: ionos11-amd64 +# detected hostname: i-capture-the-hostname # user specified a verbosity level of 1 (each --verbose occurrence counts +1) # This test will measure times in scanning arrays sized and aligned to fit in caches. # 2 cache levels detected Will fill struct with 40 samples... -# Memory benchmark took 5.682s +# Memory benchmark took 6.368s # auto-tuning oriented output implies times==0 iterations and sort-after-load. #pr: allocated a performance record for 8 samples (2240 bytes). -# Cache block size total 524288 bytes, per-thread 34952 bytes -# so far, program took 5.685s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . +# Cache block size total 524288 bytes, per-thread 32768 bytes +# so far, program took 6.371s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . # Using 1 threads # reading A.mtx (184 bytes / 1 MiB / 6 nnz / 3 rows / 3 columns / 1 MiB COO) as type Z... -# file input of A.mtx took 0.00 s (6 nnz, 43767 nnz/s ) (1.34 MB/s ) -#pre-sorting (6 elements) took 0.025672 s -#weeding duplicates (to 6 elements) took 4.05312e-06 s (and check, 3.09944e-06 s ) +# file input of A.mtx took 0.00 s (6 nnz, 35296 nnz/s ) (1.08 MB/s ) +#pre-sorting (6 elements) took 0.0486271 s +#weeding duplicates (to 6 elements) took 4.05312e-06 s (and check, 2.14577e-06 s ) # multi-nrhs benchmarking (1,2) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads -# Constructed matrix (took 0.092s): (3 x 3)[0x55afcfc25060]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.124s): (3 x 3)[0x5588997453f0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (1 th.) took 0.04799s; avg 0.016s ( +/- 0.07/ 0.10 %); best 0.01599s; worst 0.01601s; std dev. 1.153e-05 (taking best). -Reference operation time is 0.015986 s (0.006005 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01599 Mflops: 0.006) -Merge (3 -> 1 leaves) took w.c.t. of 4.387e-05s, ~1.502e-05s of computing time (of which 9.537e-07s sorting, 7.868e-06s analysis) -3 iterations (1 th.) took 5.293e-05s; avg 1.764e-05s ( +/- 94.59/189.19 %); best 9.537e-07s; worst 5.102e-05s; std dev. 2.36e-05 (taking best). -Reference operation time is 9.53674e-07 s (100.7 Mflops) with 1 threads. -After merge step 1: tpop: 9.537e-07 s ~Mflops: 100.663 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 16762.500x: 0.01599s -> 9.537e-07s, so taking this instance. +3 iterations (1 th.) took 0.07997s; avg 0.02666s ( +/- 40.00/ 20.05 %); best 0.016s; worst 0.032s; std dev. 0.007539 (taking best). +Reference operation time is 0.015995 s (0.006002 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.016 Mflops: 0.006) +Merge (3 -> 1 leaves) took w.c.t. of 4.601e-05s, ~1.502e-05s of computing time (of which 1.907e-06s sorting, 9.06e-06s analysis) +3 iterations (1 th.) took 5.293e-05s; avg 1.764e-05s ( +/- 99.65/194.59 %); best 6.109e-08s; worst 5.198e-05s; std dev. 2.428e-05 (taking best). +Reference operation time is 6.10948e-08 s (1571 Mflops) with 1 threads. +After merge step 1: tpop: 6.109e-08 s ~Mflops: 1571.329 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 261806.829x: 0.016s -> 6.109e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04799s (of which 5.007e-05s partitioning, 0s I/O); computing times: 1.502e-05s in par. loops, 9.537e-07s sorting, 7.868e-06s analyzing) -Total merge + benchmarking process took 0.04799s, equivalent to 50321.2/3.0 new/old ops (0.09574s for 2 clones -- as 100394.8/6.0 ops, or 50197.4/3.0 ops per clone), SPEEDUP of 16762.500x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 16762.500x (0.01599s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.01599s per op. -In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 1676150.0% (1.676e+04x) (from 0.006005 to 100.7 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.064s (of which 5.221e-05s partitioning, 0s I/O); computing times: 1.502e-05s in par. loops, 1.907e-06s sorting, 9.06e-06s analyzing) +Total merge + benchmarking process took 0.064s, equivalent to 1047602.0/4.0 new/old ops (0.1437s for 2 clones -- as 2352655.6/9.0 ops, or 1176327.8/4.5 ops per clone), SPEEDUP of 261806.829x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 261806.829x (0.016s -> 6.109e-08s), will amortize in 4.0 ops by saving 0.01599s per op. +In 1 tuning rounds (tot. 0.22s, 0.14s for constructor, 2 clones) obtained a SPEEDUP of 26180582.9% (2.618e+05x) (from 0.006002 to 1571 Mflops). #pr: updating sample at index 1 (0^th of 8), 0^th touch for (0,0,0,0,0,0,0). -First run of RSB Autotuner took 0.144034 s (1.599e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.223979 s (1.600e-02 s -> 6.109e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.795902 s and estimated a speedup of 1.000000 x (6.205e-08 s -> 6.205e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 1.25592 s and estimated a speedup of 1.000000 x (6.109e-08 s -> 6.109e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000000 0.027704 0.031971 0.059675 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.059675 -%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.027704 -%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.031971 +%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000000 0.023696 0.047987 0.071683 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.071683 +%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.023696 +%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.047987 %:ROW_MAJOR_SORT_TIME:A.mtx S N 1 3 3 6 0.000000 %:ROW_MAJOR_SORT_SCALING:A.mtx S N 1 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.059675 +%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.071683 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 1 3 3 6 0.000 %:UNSORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 %:SORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 @@ -3277,47 +3313,47 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 1 3 3 6 6 6 6 # # Using 4 threads -# Constructed matrix (took 0.100s): (3 x 3)[0x55afcfc29760]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.276s): (3 x 3)[0x55889974a140]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.1 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (4 th.) took 0.048s; avg 0.016s ( +/- 0.03/ 0.03 %); best 0.01599s; worst 0.016s; std dev. 3.407e-06 (taking best). -Reference operation time is 0.0159948 s (0.006002 Mflops) with 4 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01599 Mflops: 0.006) -Merge (3 -> 1 leaves) took w.c.t. of 2.885e-05s, ~6.914e-06s of computing time (of which 1.907e-06s sorting, 6.914e-06s analysis) -3 iterations (4 th.) took 9.06e-06s; avg 3.02e-06s ( +/- 97.95/160.53 %); best 6.205e-08s; worst 7.868e-06s; std dev. 3.462e-06 (taking best). -Reference operation time is 6.20484e-08 s (1547 Mflops) with 4 threads. -After merge step 1: tpop: 6.205e-08 s ~Mflops: 1547.178 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 257779.059x: 0.01599s -> 6.205e-08s, so taking this instance. +3 iterations (4 th.) took 0.06401s; avg 0.02134s ( +/- 43.76/ 31.34 %); best 0.012s; worst 0.02802s; std dev. 0.006804 (taking best). +Reference operation time is 0.0119989 s (0.008001 Mflops) with 4 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.012 Mflops: 0.008) +Merge (3 -> 1 leaves) took w.c.t. of 2.599e-05s, ~8.106e-06s of computing time (of which 9.537e-07s sorting, 5.96e-06s analysis) +3 iterations (4 th.) took 7.868e-06s; avg 2.623e-06s ( +/- 97.67/163.64 %); best 6.109e-08s; worst 6.914e-06s; std dev. 3.059e-06 (taking best). +Reference operation time is 6.10948e-08 s (1571 Mflops) with 4 threads. +After merge step 1: tpop: 6.109e-08 s ~Mflops: 1571.329 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 196398.049x: 0.012s -> 6.109e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04798s (of which 3.386e-05s partitioning, 0s I/O); computing times: 6.914e-06s in par. loops, 1.907e-06s sorting, 6.914e-06s analyzing) -Total merge + benchmarking process took 0.04798s, equivalent to 773333.3/3.0 new/old ops (0.09585s for 2 clones -- as 1544776.2/6.0 ops, or 772388.1/3.0 ops per clone), SPEEDUP of 257779.059x -Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 257779.059x (0.01599s -> 6.205e-08s), will amortize in 3.0 ops by saving 0.01599s per op. -In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 25777805.9% (2.578e+05x) (from 0.006002 to 1547 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.08798s (of which 3.004e-05s partitioning, 0s I/O); computing times: 8.106e-06s in par. loops, 9.537e-07s sorting, 5.96e-06s analyzing) +Total merge + benchmarking process took 0.08798s, equivalent to 1439992.2/7.3 new/old ops (0.1399s for 2 clones -- as 2289131.7/11.7 ops, or 1144565.9/5.8 ops per clone), SPEEDUP of 196398.049x +Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 196398.049x (0.012s -> 6.109e-08s), will amortize in 7.3 ops by saving 0.012s per op. +In 1 tuning rounds (tot. 0.2s, 0.14s for constructor, 2 clones) obtained a SPEEDUP of 19639704.9% (1.964e+05x) (from 0.008001 to 1571 Mflops). #pr: updating sample at index 5 (1^th of 8), 0^th touch for (0,1,0,0,0,0,0). -First run of RSB Autotuner took 0.144022 s (1.599e-02 s -> 6.205e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.204028 s (1.200e-02 s -> 6.109e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.807932 s and estimated a speedup of 1.000000 x (6.205e-08 s -> 6.205e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 1.13995 s and estimated a speedup of 1.000000 x (6.109e-08 s -> 6.109e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000000 0.031729 0.036000 0.067729 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.067729 -%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.031729 -%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.036000 +%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000000 0.047527 0.175983 0.223510 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.223510 +%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.047527 +%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.175983 %:ROW_MAJOR_SORT_TIME:A.mtx S N 4 3 3 6 0.000000 %:ROW_MAJOR_SORT_SCALING:A.mtx S N 4 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.067729 +%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.223510 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 4 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.88 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.88 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 0.87 -%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 0.89 -%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 -nan 0.87 0.89 0.88 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.32 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.32 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 0.50 +%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 0.27 +%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 -nan 0.50 0.27 0.32 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo %:SM_COUNTS:A.mtx S N 4 3 3 6 1 1 0 0 0 %:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 4 3 3 6 28 48 36 @@ -3326,50 +3362,50 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 4 3 3 6 6 6 6 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[4] -%operation:A.mtx 0.091687 1e+09 1e+09 +%operation:A.mtx 0.123725 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:A.mtx 0 0.027704 0 0.031971 +%constructor:A.mtx 0 0.0236962 0 0.047987 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,2) -- now using nrhs 2. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads -# Constructed matrix (took 0.080s): (3 x 3)[0x55afcfc2ead0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.128s): (3 x 3)[0x55889974a140]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (1 th.) took 0.04801s; avg 0.016s ( +/- 0.19/ 0.21 %); best 0.01597s; worst 0.01604s; std dev. 2.661e-05 (taking best). -Reference operation time is 0.0159731 s (0.01202 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01597 Mflops: 0.012) -Merge (3 -> 1 leaves) took w.c.t. of 2.503e-05s, ~6.914e-06s of computing time (of which 9.537e-07s sorting, 5.96e-06s analysis) -3 iterations (1 th.) took 5.96e-06s; avg 1.987e-06s ( +/- 96.88/152.00 %); best 6.205e-08s; worst 5.007e-06s; std dev. 2.171e-06 (taking best). -Reference operation time is 6.20484e-08 s (3094 Mflops) with 1 threads. -After merge step 1: tpop: 6.205e-08 s ~Mflops: 3094.357 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 257429.395x: 0.01597s -> 6.205e-08s, so taking this instance. +3 iterations (1 th.) took 0.06398s; avg 0.02133s ( +/- 24.99/ 12.50 %); best 0.016s; worst 0.02399s; std dev. 0.00377 (taking best). +Reference operation time is 0.0159972 s (0.012 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.016 Mflops: 0.012) +Merge (3 -> 1 leaves) took w.c.t. of 2.503e-05s, ~8.106e-06s of computing time (of which 9.537e-07s sorting, 5.007e-06s analysis) +3 iterations (1 th.) took 8.821e-06s; avg 2.94e-06s ( +/- 97.92/167.57 %); best 6.109e-08s; worst 7.868e-06s; std dev. 3.506e-06 (taking best). +Reference operation time is 6.10948e-08 s (3143 Mflops) with 1 threads. +After merge step 1: tpop: 6.109e-08 s ~Mflops: 3142.659 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 261841.951x: 0.016s -> 6.109e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04799s (of which 2.789e-05s partitioning, 0s I/O); computing times: 6.914e-06s in par. loops, 9.537e-07s sorting, 5.96e-06s analyzing) -Total merge + benchmarking process took 0.04799s, equivalent to 773394.8/3.0 new/old ops (0.09586s for 2 clones -- as 1544991.4/6.0 ops, or 772495.7/3.0 ops per clone), SPEEDUP of 257429.395x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 257429.395x (0.01597s -> 6.205e-08s), will amortize in 3.0 ops by saving 0.01597s per op. -In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 25742839.5% (2.574e+05x) (from 0.01202 to 3094 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.06398s (of which 3.004e-05s partitioning, 0s I/O); computing times: 8.106e-06s in par. loops, 9.537e-07s sorting, 5.007e-06s analyzing) +Total merge + benchmarking process took 0.06398s, equivalent to 1047192.2/4.0 new/old ops (0.1309s for 2 clones -- as 2141853.7/8.2 ops, or 1070926.8/4.1 ops per clone), SPEEDUP of 261841.951x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 261841.951x (0.016s -> 6.109e-08s), will amortize in 4.0 ops by saving 0.016s per op. +In 1 tuning rounds (tot. 0.19s, 0.13s for constructor, 2 clones) obtained a SPEEDUP of 26184095.1% (2.618e+05x) (from 0.012 to 3143 Mflops). #pr: updating sample at index 3 (2^th of 8), 0^th touch for (0,0,0,0,1,0,0). -First run of RSB Autotuner took 0.144027 s (1.597e-02 s -> 6.205e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.195003 s (1.600e-02 s -> 6.109e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.799945 s and estimated a speedup of 1.000000 x (6.205e-08 s -> 6.205e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 1.07197 s and estimated a speedup of 1.000000 x (6.109e-08 s -> 6.109e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000000 0.015708 0.031979 0.047687 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.047687 -%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.015708 -%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.031979 +%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000000 0.035483 0.043967 0.079450 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.079450 +%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.035483 +%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.043967 %:ROW_MAJOR_SORT_TIME:A.mtx S N 1 3 3 6 0.000000 %:ROW_MAJOR_SORT_SCALING:A.mtx S N 1 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.047687 +%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.079450 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 1 3 3 6 0.000 %:UNSORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 %:SORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 @@ -3384,47 +3420,47 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 1 3 3 6 6 6 6 # # Using 4 threads -# Constructed matrix (took 0.115s): (3 x 3)[0x55afcfc2ead0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.139s): (3 x 3)[0x55889974ec50]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.1 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (4 th.) took 0.07199s; avg 0.024s ( +/- 33.33/ 33.34 %); best 0.016s; worst 0.032s; std dev. 0.006531 (taking best). -Reference operation time is 0.0159991 s (0.012 Mflops) with 4 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.016 Mflops: 0.012) -Merge (3 -> 1 leaves) took w.c.t. of 2.503e-05s, ~7.153e-06s of computing time (of which 1.907e-06s sorting, 5.007e-06s analysis) -3 iterations (4 th.) took 8.106e-06s; avg 2.702e-06s ( +/- 97.70/164.71 %); best 6.205e-08s; worst 7.153e-06s; std dev. 3.171e-06 (taking best). -Reference operation time is 6.20484e-08 s (3094 Mflops) with 4 threads. -After merge step 1: tpop: 6.205e-08 s ~Mflops: 3094.357 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 257848.223x: 0.016s -> 6.205e-08s, so taking this instance. +3 iterations (4 th.) took 0.06003s; avg 0.02001s ( +/- 0.23/ 0.33 %); best 0.01996s; worst 0.02008s; std dev. 4.773e-05 (taking best). +Reference operation time is 0.019964 s (0.009617 Mflops) with 4 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01996 Mflops: 0.010) +Merge (3 -> 1 leaves) took w.c.t. of 2.408e-05s, ~6.914e-06s of computing time (of which 2.146e-06s sorting, 5.007e-06s analysis) +3 iterations (4 th.) took 9.06e-06s; avg 3.02e-06s ( +/- 97.98/168.42 %); best 6.109e-08s; worst 8.106e-06s; std dev. 3.618e-06 (taking best). +Reference operation time is 6.10948e-08 s (3143 Mflops) with 4 threads. +After merge step 1: tpop: 6.109e-08 s ~Mflops: 3142.659 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 326770.732x: 0.01996s -> 6.109e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.048s (of which 2.909e-05s partitioning, 0s I/O); computing times: 7.153e-06s in par. loops, 1.907e-06s sorting, 5.007e-06s analyzing) -Total merge + benchmarking process took 0.048s, equivalent to 773556.2/3.0 new/old ops (0.09242s for 2 clones -- as 1489513.9/5.8 ops, or 744757.0/2.9 ops per clone), SPEEDUP of 257848.223x -Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 257848.223x (0.016s -> 6.205e-08s), will amortize in 3.0 ops by saving 0.016s per op. -In 1 tuning rounds (tot. 0.16s, 0.092s for constructor, 2 clones) obtained a SPEEDUP of 25784722.3% (2.578e+05x) (from 0.012 to 3094 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.05999s (of which 2.813e-05s partitioning, 0s I/O); computing times: 6.914e-06s in par. loops, 2.146e-06s sorting, 5.007e-06s analyzing) +Total merge + benchmarking process took 0.05999s, equivalent to 981853.7/3.0 new/old ops (0.1198s for 2 clones -- as 1961295.6/6.0 ops, or 980647.8/3.0 ops per clone), SPEEDUP of 326770.732x +Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 326770.732x (0.01996s -> 6.109e-08s), will amortize in 3.0 ops by saving 0.01996s per op. +In 1 tuning rounds (tot. 0.18s, 0.12s for constructor, 2 clones) obtained a SPEEDUP of 32676973.2% (3.268e+05x) (from 0.009617 to 3143 Mflops). #pr: updating sample at index 7 (3^th of 8), 0^th touch for (0,1,0,0,1,0,0). -First run of RSB Autotuner took 0.164593 s (1.600e-02 s -> 6.205e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.180009 s (1.996e-02 s -> 6.109e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.823931 s and estimated a speedup of 1.000000 x (6.205e-08 s -> 6.205e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 1.03997 s and estimated a speedup of 1.000000 x (6.109e-08 s -> 6.109e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000000 0.035535 0.048006 0.083541 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.083541 -%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.035535 -%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.048006 +%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000000 0.035645 0.068688 0.104333 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.104333 +%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.035645 +%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.068688 %:ROW_MAJOR_SORT_TIME:A.mtx S N 4 3 3 6 0.000000 %:ROW_MAJOR_SORT_SCALING:A.mtx S N 4 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.083541 +%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.104333 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 4 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.57 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.57 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 0.44 -%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 0.67 -%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 -nan 0.44 0.67 0.57 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.76 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.76 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 0.64 +%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 -nan 1.00 0.64 0.76 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo %:SM_COUNTS:A.mtx S N 4 3 3 6 1 1 0 0 0 %:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 4 3 3 6 28 48 36 @@ -3433,185 +3469,190 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 4 3 3 6 6 6 6 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[4] -%operation:A.mtx 0.0797012 1e+09 1e+09 +%operation:A.mtx 0.12846 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:A.mtx 0 0.015708 0 0.0319791 +%constructor:A.mtx 0 0.0354831 0 0.043967 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 10.244s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 3.824s/0.000s . +# so far, program took 12.860s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 5.311s/0.000s . getrusage() stats: ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.03507s (system CPU time used) -ru_utime : 47.8s (user CPU time used) +ru_stime : 0.01179s (system CPU time used) +ru_utime : 44.36s (user CPU time used) # benchmarking terminated --- finalizing run. # ====== BEGIN Total summary record. #pr: ======== Limiting to nrhs=1: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 100.66 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 2.81e-01 2.29e+00 1 9.60e-05 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1547.18 1.599e-02 0.000e+00 6.205e-08 0.000e+00 1.440e-01 4.32e+00 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1571.33 1.600e-02 0.000e+00 6.109e-08 0.000e+00 2.240e-01 4.39e+00 2.29e+00 1 9.60e-05 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1571.33 1.200e-02 0.000e+00 6.109e-08 0.000e+00 2.040e-01 4.39e+00 2.29e+00 1 9.60e-05 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 13726977.9 % faster, avg. sp. ratio 137270.779x, max sp. ratio 257779.059x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1236076.2/151030.5/2321122.0/2472152.5 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.0/ 9.0/ 9.0/ 18.0 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.0, min. 9.0, max. 9.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 22910143.9 % faster, avg. sp. ratio 229102.439x, max sp. ratio 261806.829x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 3502813.7/3339535.6/3666091.7/7005627.3 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 15.5/ 14.0/ 17.0/ 31.0 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 15.5, min. 14.0, max. 17.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.888/ 0.231/ 3.546,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 4.600/ 0.281/ 4.319,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.601/ 3.601/ 3.601,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 8.773/ 4.387/ 4.387,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.292/ 2.292/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.29 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.29 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 8.239e+02, min 1.007e+02, max 1.547e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 6.004e-03, min 6.002e-03, max 6.005e-03 (2 samples) -#pr: best tun. rsb operation time was: on avg. 5.079e-07s, min 6.205e-08s, max 9.537e-07s, tot 1.016e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.599e-02s, max 1.599e-02s, tot 3.198e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.159e+00 3.319e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.826e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.21 s, min 0.20 s, max 0.22 s, tot 0.43 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.21 s, min 0.20 s, max 0.22 s, tot 0.43 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.571e+03, min 1.571e+03, max 1.571e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 7.001e-03, min 6.002e-03, max 8.001e-03 (2 samples) +#pr: best tun. rsb operation time was: on avg. 6.109e-08s, min 6.109e-08s, max 6.109e-08s, tot 1.222e-07s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.400e-02s, min 1.200e-02s, max 1.600e-02s, tot 2.799e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 8.670e-01 8.670e-01 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds memory bandwidth --- is this a tiny matrix ? +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.225e+00 #pr: ======== Limiting to nrhs=2: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3094.36 1.597e-02 0.000e+00 6.205e-08 0.000e+00 1.440e-01 6.64e+00 1.65e+00 1 1.92e-04 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3094.36 1.600e-02 0.000e+00 6.205e-08 0.000e+00 1.646e-01 6.64e+00 1.65e+00 1 1.92e-04 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3142.66 1.600e-02 0.000e+00 6.109e-08 0.000e+00 1.950e-01 6.74e+00 1.65e+00 1 1.92e-04 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3142.66 1.996e-02 0.000e+00 6.109e-08 0.000e+00 1.800e-01 6.74e+00 1.65e+00 1 1.92e-04 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 25763780.9 % faster, avg. sp. ratio 257638.809x, max sp. ratio 257848.223x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2486928.0/2321202.7/2652653.2/4973855.9 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.7/ 9.0/ 10.3/ 19.3 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.7, min. 9.0, max. 10.3 ops) +#pr: (in succ. cases rsb autotuning gave avg. 29430534.1 % faster, avg. sp. ratio 294306.341x, max sp. ratio 326770.732x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 3069100.5/2946388.3/3191812.7/6138201.0 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 10.6/ 9.0/ 12.2/ 21.2 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 10.6, min. 9.0, max. 12.2 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 5.093/ 5.093/ 5.093,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 13.280/ 6.640/ 6.640,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 5.172/ 5.172/ 5.172,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 13.487/ 6.744/ 6.744,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.646/ 1.646/ 1.646) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 0.31 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 0.31 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 3.094e+03, min 3.094e+03, max 3.094e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.201e-02, min 1.200e-02, max 1.202e-02 (2 samples) -#pr: best tun. rsb operation time was: on avg. 6.205e-08s, min 6.205e-08s, max 6.205e-08s, tot 1.241e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.597e-02s, max 1.600e-02s, tot 3.197e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.503e+00 1.503e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.826e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 1.637e+01 x, min 2.000e+00 x, max 3.074e+01 x (2 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.19 s, min 0.18 s, max 0.20 s, tot 0.38 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.19 s, min 0.18 s, max 0.20 s, tot 0.38 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 3.143e+03, min 3.143e+03, max 3.143e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.081e-02, min 9.617e-03, max 1.200e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 6.109e-08s, min 6.109e-08s, max 6.109e-08s, tot 1.222e-07s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.798e-02s, min 1.600e-02s, max 1.996e-02s, tot 3.596e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 6.036e-01 6.036e-01 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds memory bandwidth --- is this a tiny matrix ? +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.225e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.000e+00 x, min 2.000e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) #pr: ======== Limiting to transA=N: #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 100.66 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 2.81e-01 2.29e+00 1 9.60e-05 -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3094.36 1.597e-02 0.000e+00 6.205e-08 0.000e+00 1.440e-01 6.64e+00 1.65e+00 1 1.92e-04 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1547.18 1.599e-02 0.000e+00 6.205e-08 0.000e+00 1.440e-01 4.32e+00 2.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3094.36 1.600e-02 0.000e+00 6.205e-08 0.000e+00 1.646e-01 6.64e+00 1.65e+00 1 1.92e-04 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1571.33 1.600e-02 0.000e+00 6.109e-08 0.000e+00 2.240e-01 4.39e+00 2.29e+00 1 9.60e-05 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3142.66 1.600e-02 0.000e+00 6.109e-08 0.000e+00 1.950e-01 6.74e+00 1.65e+00 1 1.92e-04 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1571.33 1.200e-02 0.000e+00 6.109e-08 0.000e+00 2.040e-01 4.39e+00 2.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3142.66 1.996e-02 0.000e+00 6.109e-08 0.000e+00 1.800e-01 6.74e+00 1.65e+00 1 1.92e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 19745379.4 % faster, avg. sp. ratio 197454.794x, max sp. ratio 257848.223x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1861502.1/151030.5/2652653.2/7446008.4 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.3/ 9.0/ 10.3/ 37.3 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.3, min. 9.0, max. 10.3 ops) +#pr: (in succ. cases rsb autotuning gave avg. 26170339.0 % faster, avg. sp. ratio 261704.390x, max sp. ratio 326770.732x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 3285957.1/2946388.3/3666091.7/13143828.3 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 13.1/ 9.0/ 17.0/ 52.2 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 13.1, min. 9.0, max. 17.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.490/ 0.231/ 5.093,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 17.880/ 0.281/ 6.640,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 4.387/ 3.601/ 5.172,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 22.261/ 4.387/ 6.744,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.969/ 1.646/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 2 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 0.60 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 0.60 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.959e+03, min 1.007e+02, max 3.094e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 9.007e-03, min 6.002e-03, max 1.202e-02 (4 samples) -#pr: best tun. rsb operation time was: on avg. 2.850e-07s, min 6.205e-08s, max 9.537e-07s, tot 1.140e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.597e-02s, max 1.600e-02s, tot 6.395e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.503e+00 3.319e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.826e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 1.637e+01 x, min 2.000e+00 x, max 3.074e+01 x (2 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.20 s, min 0.18 s, max 0.22 s, tot 0.80 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.20 s, min 0.18 s, max 0.22 s, tot 0.80 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 2.357e+03, min 1.571e+03, max 3.143e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 8.906e-03, min 6.002e-03, max 1.200e-02 (4 samples) +#pr: best tun. rsb operation time was: on avg. 6.109e-08s, min 6.109e-08s, max 6.109e-08s, tot 2.444e-07s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.200e-02s, max 1.996e-02s, tot 6.396e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 6.036e-01 8.670e-01 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds memory bandwidth --- is this a tiny matrix ? +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.225e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.000e+00 x, min 2.000e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) #pr: ======== Limiting to both transA=N and nrhs=1: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 100.66 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 2.81e-01 2.29e+00 1 9.60e-05 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1547.18 1.599e-02 0.000e+00 6.205e-08 0.000e+00 1.440e-01 4.32e+00 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1571.33 1.600e-02 0.000e+00 6.109e-08 0.000e+00 2.240e-01 4.39e+00 2.29e+00 1 9.60e-05 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1571.33 1.200e-02 0.000e+00 6.109e-08 0.000e+00 2.040e-01 4.39e+00 2.29e+00 1 9.60e-05 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 13726977.9 % faster, avg. sp. ratio 137270.779x, max sp. ratio 257779.059x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1236076.2/151030.5/2321122.0/2472152.5 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.0/ 9.0/ 9.0/ 18.0 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.0, min. 9.0, max. 9.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 22910143.9 % faster, avg. sp. ratio 229102.439x, max sp. ratio 261806.829x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 3502813.7/3339535.6/3666091.7/7005627.3 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 15.5/ 14.0/ 17.0/ 31.0 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 15.5, min. 14.0, max. 17.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.888/ 0.231/ 3.546,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 4.600/ 0.281/ 4.319,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.601/ 3.601/ 3.601,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 8.773/ 4.387/ 4.387,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.292/ 2.292/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.29 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.29 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 8.239e+02, min 1.007e+02, max 1.547e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 6.004e-03, min 6.002e-03, max 6.005e-03 (2 samples) -#pr: best tun. rsb operation time was: on avg. 5.079e-07s, min 6.205e-08s, max 9.537e-07s, tot 1.016e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.599e-02s, max 1.599e-02s, tot 3.198e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.159e+00 3.319e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.826e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.21 s, min 0.20 s, max 0.22 s, tot 0.43 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.21 s, min 0.20 s, max 0.22 s, tot 0.43 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.571e+03, min 1.571e+03, max 1.571e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 7.001e-03, min 6.002e-03, max 8.001e-03 (2 samples) +#pr: best tun. rsb operation time was: on avg. 6.109e-08s, min 6.109e-08s, max 6.109e-08s, tot 1.222e-07s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.400e-02s, min 1.200e-02s, max 1.600e-02s, tot 2.799e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 8.670e-01 8.670e-01 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds memory bandwidth --- is this a tiny matrix ? +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.225e+00 #pr: ======== Limiting to both transA=N and nrhs=2: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3094.36 1.597e-02 0.000e+00 6.205e-08 0.000e+00 1.440e-01 6.64e+00 1.65e+00 1 1.92e-04 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3094.36 1.600e-02 0.000e+00 6.205e-08 0.000e+00 1.646e-01 6.64e+00 1.65e+00 1 1.92e-04 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3142.66 1.600e-02 0.000e+00 6.109e-08 0.000e+00 1.950e-01 6.74e+00 1.65e+00 1 1.92e-04 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3142.66 1.996e-02 0.000e+00 6.109e-08 0.000e+00 1.800e-01 6.74e+00 1.65e+00 1 1.92e-04 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 25763780.9 % faster, avg. sp. ratio 257638.809x, max sp. ratio 257848.223x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2486928.0/2321202.7/2652653.2/4973855.9 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.7/ 9.0/ 10.3/ 19.3 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.7, min. 9.0, max. 10.3 ops) +#pr: (in succ. cases rsb autotuning gave avg. 29430534.1 % faster, avg. sp. ratio 294306.341x, max sp. ratio 326770.732x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 3069100.5/2946388.3/3191812.7/6138201.0 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 10.6/ 9.0/ 12.2/ 21.2 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 10.6, min. 9.0, max. 12.2 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 5.093/ 5.093/ 5.093,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 13.280/ 6.640/ 6.640,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 5.172/ 5.172/ 5.172,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 13.487/ 6.744/ 6.744,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.646/ 1.646/ 1.646) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 0.31 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 0.31 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 3.094e+03, min 3.094e+03, max 3.094e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.201e-02, min 1.200e-02, max 1.202e-02 (2 samples) -#pr: best tun. rsb operation time was: on avg. 6.205e-08s, min 6.205e-08s, max 6.205e-08s, tot 1.241e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.597e-02s, max 1.600e-02s, tot 3.197e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.503e+00 1.503e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.826e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 1.637e+01 x, min 2.000e+00 x, max 3.074e+01 x (2 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.19 s, min 0.18 s, max 0.20 s, tot 0.38 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.19 s, min 0.18 s, max 0.20 s, tot 0.38 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 3.143e+03, min 3.143e+03, max 3.143e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.081e-02, min 9.617e-03, max 1.200e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 6.109e-08s, min 6.109e-08s, max 6.109e-08s, tot 1.222e-07s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.798e-02s, min 1.600e-02s, max 1.996e-02s, tot 3.596e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 6.036e-01 6.036e-01 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds memory bandwidth --- is this a tiny matrix ? +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.225e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.000e+00 x, min 2.000e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) #pr: ======== Limiting to transA=T: #pr: No sample (out of 4) matched the dump criteria -- skipping dump round. #pr: ======== Limiting to both transA=T and nrhs=1: @@ -3621,44 +3662,45 @@ #pr: ======== All results (not limiting) #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 100.66 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 2.81e-01 2.29e+00 1 9.60e-05 -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3094.36 1.597e-02 0.000e+00 6.205e-08 0.000e+00 1.440e-01 6.64e+00 1.65e+00 1 1.92e-04 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1547.18 1.599e-02 0.000e+00 6.205e-08 0.000e+00 1.440e-01 4.32e+00 2.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3094.36 1.600e-02 0.000e+00 6.205e-08 0.000e+00 1.646e-01 6.64e+00 1.65e+00 1 1.92e-04 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1571.33 1.600e-02 0.000e+00 6.109e-08 0.000e+00 2.240e-01 4.39e+00 2.29e+00 1 9.60e-05 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3142.66 1.600e-02 0.000e+00 6.109e-08 0.000e+00 1.950e-01 6.74e+00 1.65e+00 1 1.92e-04 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1571.33 1.200e-02 0.000e+00 6.109e-08 0.000e+00 2.040e-01 4.39e+00 2.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3142.66 1.996e-02 0.000e+00 6.109e-08 0.000e+00 1.800e-01 6.74e+00 1.65e+00 1 1.92e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 19745379.4 % faster, avg. sp. ratio 197454.794x, max sp. ratio 257848.223x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1861502.1/151030.5/2652653.2/7446008.4 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.3/ 9.0/ 10.3/ 37.3 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.3, min. 9.0, max. 10.3 ops) +#pr: (in succ. cases rsb autotuning gave avg. 26170339.0 % faster, avg. sp. ratio 261704.390x, max sp. ratio 326770.732x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 3285957.1/2946388.3/3666091.7/13143828.3 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 13.1/ 9.0/ 17.0/ 52.2 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 13.1, min. 9.0, max. 17.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.490/ 0.231/ 5.093,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 17.880/ 0.281/ 6.640,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 4.387/ 3.601/ 5.172,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 22.261/ 4.387/ 6.744,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.969/ 1.646/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 2 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 0.60 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 0.60 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.959e+03, min 1.007e+02, max 3.094e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 9.007e-03, min 6.002e-03, max 1.202e-02 (4 samples) -#pr: best tun. rsb operation time was: on avg. 2.850e-07s, min 6.205e-08s, max 9.537e-07s, tot 1.140e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.597e-02s, max 1.600e-02s, tot 6.395e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.503e+00 3.319e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.826e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 1.637e+01 x, min 2.000e+00 x, max 3.074e+01 x (2 samples, the non-min-nrhs ones) -#pr: Record collection took 3.65 s. +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.20 s, min 0.18 s, max 0.22 s, tot 0.80 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.20 s, min 0.18 s, max 0.22 s, tot 0.80 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 2.357e+03, min 1.571e+03, max 3.143e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 8.906e-03, min 6.002e-03, max 1.200e-02 (4 samples) +#pr: best tun. rsb operation time was: on avg. 6.109e-08s, min 6.109e-08s, max 6.109e-08s, tot 2.444e-07s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.200e-02s, max 1.996e-02s, tot 6.396e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 6.036e-01 8.670e-01 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds memory bandwidth --- is this a tiny matrix ? +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.225e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.000e+00 x, min 2.000e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) +#pr: Record collection took 5.34 s. #pr: Record comprises 40 memory benchmark samples (prepend RSB_PR_MBW=1 to dump this). -#pr: Record comprises 90 environment variables in 3809 bytes (prepend RSB_PR_ENV=1 to dump this). +#pr: Record comprises 90 environment variables in 3863 bytes (prepend RSB_PR_ENV=1 to dump this). # ====== END Total summary record. -#pr: ======== Saved a performance record of 8 samples to rsbench_pr__1705165324_gcc-12.2-1,4th.rpr -# Removing the temporary record file rsbench_pr__1705165324_gcc-12.2-1,4th.rpr.tmp. -# terminating run at 1705165334 (after 10.2s of w.c.t.) +#pr: ======== Saved a performance record of 8 samples to rsbench_pr__1739579122_gcc-12.2-1,4th.rpr +# Removing the temporary record file rsbench_pr__1739579122_gcc-12.2-1,4th.rpr.tmp. +# terminating run at 1739579135 (after 12.9s of w.c.t.) + ./rsbench -oa -Ob --help /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench is a swiss army knife for testing the library functionality and performance. You can use it to perform sparse matrix - unitary vector multiplication, specifying the blocking parameters, the times to perform multiplication. @@ -3912,35 +3954,36 @@ Written by michelemartone_AT_users_DOT_sourceforge_DOT_net. + ./rsbench -I -cache block size : 34952 +cache block size : 32768 hwloc size of cache level 1: 65536 hwloc size of cache level 2: 524288 -detected max available cores/threads : 15 -detected max OpenMP procs : 15 -detected max OpenMP procs : 15 -detected max OpenMP procs : 15 -detected max OpenMP procs : 15 -detected max OpenMP procs : 15 -detected max OpenMP procs : 15 -detected max OpenMP procs : 15 -detected max OpenMP procs : 15 -detected max OpenMP procs : 15 -detected max OpenMP procs : 15 -detected max OpenMP procs : 15 -detected max OpenMP procs : 15 -detected max OpenMP procs : 15 -detected max OpenMP procs : 15 -detected max OpenMP procs : 15 +detected max available cores/threads : 16 +detected max OpenMP procs : 16 +detected max OpenMP procs : 16 +detected max OpenMP procs : 16 +detected max OpenMP procs : 16 +detected max OpenMP procs : 16 +detected max OpenMP procs : 16 +detected max OpenMP procs : 16 +detected max OpenMP procs : 16 +detected max OpenMP procs : 16 +detected max OpenMP procs : 16 +detected max OpenMP procs : 16 +detected max OpenMP procs : 16 +detected max OpenMP procs : 16 +detected max OpenMP procs : 16 +detected max OpenMP procs : 16 +detected max OpenMP procs : 16 detected 2 levels of cache L1 size: 65536 L2 size: 524288 sysconf() : 4096 bytes per pagesize -sysconf() : 16523302 physical pages -sysconf() : 67679444992 bytes (64544 MB) of physical memory -sysconf() : 5107202 available (free) physical pages -sysconf() : 20919099392 available (free) physical memory +sysconf() : 16456967 physical pages +sysconf() : 67407736832 bytes (64285 MB) of physical memory +sysconf() : 3078852 available (free) physical pages +sysconf() : 12610977792 available (free) physical memory sysconf() , processors : 64 -sysconf() , processors online : 15 +sysconf() , processors online : 16 sysconf() : level 1 cache size 65536 sysconf() : level 1 cache associativity 2 sysconf() : level 1 cache line size 64 @@ -3967,30 +4010,30 @@ RSB_SUBM_IDX_MARKER : 2147483647 RSB_MAX_ALLOCATABLE_MEMORY_CHUNK: 18446744073709551615 timing min delta (if negative, don't complain with us) : 0 s -timing granularity : 6.52075e-08 s +timing granularity : 6.78539e-08 s CFLAGS : -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 CXXFLAGS : -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp CC : gcc memhinfo : L2:16/64/512K,L1:2/64/64K -detected free memory : 20919099392 -detected total memory : 67679444992 -for array sized 524288 elems, took 0.000598907 s for linear search and 0 s for binary search for element 524287, in 131 tries, for a total of 0.100048 s (ignore this:137363194) -for array sized 524288 elems, took 0.000286102 s for linear search and 0 s for binary search for element 262143, in 271 tries, for a total of 0.100040 s (ignore this:279444700) -for array sized 524288 elems, took 0.000145912 s for linear search and 0 s for binary search for element 131071, in 543 tries, for a total of 0.100114 s (ignore this:421787806) -for array sized 524288 elems, took 7.00951e-05 s for linear search and 0 s for binary search for element 65535, in 1080 tries, for a total of 0.100091 s (ignore this:563343406) -for array sized 524288 elems, took 3.29018e-05 s for linear search and 0 s for binary search for element 32767, in 2176 tries, for a total of 0.100036 s (ignore this:705945390) -for array sized 524288 elems, took 1.69277e-05 s for linear search and 0 s for binary search for element 16383, in 4314 tries, for a total of 0.100007 s (ignore this:847297914) -for array sized 524288 elems, took 7.86781e-06 s for linear search and 0 s for binary search for element 8191, in 8538 tries, for a total of 0.100005 s (ignore this:987167430) -for array sized 524288 elems, took 3.8147e-06 s for linear search and 0 s for binary search for element 4095, in 16606 tries, for a total of 0.100004 s (ignore this:1123170570) -for array sized 524288 elems, took 1.90735e-06 s for linear search and 0 s for binary search for element 2047, in 32255 tries, for a total of 0.100022 s (ignore this:1255222540) -for array sized 524288 elems, took 9.53674e-07 s for linear search and 0 s for binary search for element 1023, in 59749 tries, for a total of 0.100002 s (ignore this:1377468994) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 511, in 106750 tries, for a total of 0.100000 s (ignore this:1486567494) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 255, in 176005 tries, for a total of 0.100000 s (ignore this:1576330044) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 127, in 240545 tries, for a total of 0.100000 s (ignore this:1637428474) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 63, in 358675 tries, for a total of 0.100000 s (ignore this:1682621524) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 31, in 418993 tries, for a total of 0.100001 s (ignore this:1708599090) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 15, in 450568 tries, for a total of 0.100000 s (ignore this:1722116130) -for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 7, in 480108 tries, for a total of 0.100001 s (ignore this:1728837642) +detected free memory : 12610977792 +detected total memory : 67407736832 +for array sized 524288 elems, took 0.000642061 s for linear search and 0 s for binary search for element 524287, in 53 tries, for a total of 0.102560 s (ignore this:55574422) +for array sized 524288 elems, took 0.000283003 s for linear search and 0 s for binary search for element 262143, in 123 tries, for a total of 0.107567 s (ignore this:120061600) +for array sized 524288 elems, took 0.000133038 s for linear search and 0 s for binary search for element 131071, in 245 tries, for a total of 0.107798 s (ignore this:184286390) +for array sized 524288 elems, took 6.69956e-05 s for linear search and 0 s for binary search for element 65535, in 498 tries, for a total of 0.107807 s (ignore this:249559250) +for array sized 524288 elems, took 3.29018e-05 s for linear search and 0 s for binary search for element 32767, in 1002 tries, for a total of 0.107801 s (ignore this:315224318) +for array sized 524288 elems, took 1.5974e-05 s for linear search and 0 s for binary search for element 16383, in 1837 tries, for a total of 0.100001 s (ignore this:375415460) +for array sized 524288 elems, took 8.10623e-06 s for linear search and 0 s for binary search for element 8191, in 3870 tries, for a total of 0.106989 s (ignore this:438813800) +for array sized 524288 elems, took 4.05312e-06 s for linear search and 0 s for binary search for element 4095, in 7097 tries, for a total of 0.100001 s (ignore this:496938230) +for array sized 524288 elems, took 1.90735e-06 s for linear search and 0 s for binary search for element 2047, in 14962 tries, for a total of 0.100001 s (ignore this:558192658) +for array sized 524288 elems, took 9.53674e-07 s for linear search and 0 s for binary search for element 1023, in 24219 tries, for a total of 0.101282 s (ignore this:607744732) +for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 511, in 46646 tries, for a total of 0.105322 s (ignore this:655416944) +for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 255, in 74040 tries, for a total of 0.105554 s (ignore this:693177344) +for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 127, in 125925 tries, for a total of 0.100000 s (ignore this:725162294) +for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 63, in 127987 tries, for a total of 0.104185 s (ignore this:741288656) +for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 31, in 157511 tries, for a total of 0.106001 s (ignore this:751054338) +for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 15, in 172250 tries, for a total of 0.100000 s (ignore this:756221838) +for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 7, in 190309 tries, for a total of 0.100929 s (ignore this:758886164) + ./rsbench -C /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench version: 1.3.0 format switches:br @@ -4037,17 +4080,17 @@ Adding matrix file: /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx # Sorting matrices list (use --no-sort-filenames-list to prevent this) # Using matrices: A.mtx -# beginning run at 1705165336 +# beginning run at 1739579138 # /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench -oa -Ob --bench -f /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx --verbose --nrhs 1,4 --by-rows # compiled with: CC=gcc CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -# User did not specify threads; assuming 1. Environment provides max 15 threads; this build supports max 128. -# User did not specify threads; assuming 1. Environment provides max 15 threads; this build supports max 128. -# average timer granularity: 5.97e-08 s -# Will write a final performance record to file rsbench_pr__1705165336_gcc-12.2.rpr and periodic checkpoints to rsbench_pr__1705165336_gcc-12.2.rpr.tmp +# User did not specify threads; assuming 1. Environment provides max 16 threads; this build supports max 128. +# User did not specify threads; assuming 1. Environment provides max 16 threads; this build supports max 128. +# average timer granularity: 2.71e-07 s +# Will write a final performance record to file rsbench_pr__1739579138_gcc-12.2.rpr and periodic checkpoints to rsbench_pr__1739579138_gcc-12.2.rpr.tmp # will NOT perform ancillary tests. # will flush cache memory: between each operation measurement series, and NOT between each operation. # will keep any zero encountered in the matrix. -# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games +# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path # env: export LD_LIBRARY_PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg/.libs # env: HOSTNAME is not set # env: KMP_AFFINITY is not set @@ -4086,835 +4129,846 @@ # env: SLURM_NTASKS is not set # env: SLURM_STEP_TASKS_PER_NODE is not set # env: SLURM_TASKS_PER_NODE is not set -# detected hostname: ionos11-amd64 +# detected hostname: i-capture-the-hostname # user specified a verbosity level of 1 (each --verbose occurrence counts +1) # This test will measure times in scanning arrays sized and aligned to fit in caches. # 2 cache levels detected Will fill struct with 40 samples... -# Memory benchmark took 5.378s +# Memory benchmark took 4.997s # auto-tuning oriented output implies times==0 iterations and sort-after-load. #pr: allocated a performance record for 16 samples (4480 bytes). # multi-type benchmarking (DSCZ) -- now using typecode D (last was D). -# Cache block size total 524288 bytes, per-thread 34952 bytes -# so far, program took 5.381s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . +# Cache block size total 524288 bytes, per-thread 32768 bytes +# so far, program took 5.005s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . # reading A.mtx (184 bytes / 1 MiB / 6 nnz / 3 rows / 3 columns / 1 MiB COO) as type D... -# file input of A.mtx took 0.00 s (6 nnz, 47215 nnz/s ) (1.45 MB/s ) -#pre-sorting (6 elements) took 0.0222218 s -#weeding duplicates (to 6 elements) took 2.14577e-06 s (and check, 1.90735e-06 s ) +# file input of A.mtx took 0.00 s (6 nnz, 35296 nnz/s ) (1.08 MB/s ) +#pre-sorting (6 elements) took 0.0271699 s +#weeding duplicates (to 6 elements) took 3.09944e-06 s (and check, 2.86102e-06 s ) # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 15 threads -# Constructed matrix (took 0.100s): (3 x 3)[0x561a5fa65840]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 16 threads +# Constructed matrix (took 0.112s): (3 x 3)[0x561932245bb0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (15 th.) took 0.04801s; avg 0.016s ( +/- 0.04/ 0.06 %); best 0.016s; worst 0.01601s; std dev. 6.78e-06 (taking best). -Reference operation time is 0.0159969 s (0.0015 Mflops) with 15 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.016 Mflops: 0.002) -Merge (3 -> 1 leaves) took w.c.t. of 2.193e-05s, ~1.001e-05s of computing time (of which 2.146e-06s sorting, 3.099e-06s analysis) -3 iterations (15 th.) took 3.004e-05s; avg 1.001e-05s ( +/- 99.36/190.48 %); best 6.45e-08s; worst 2.909e-05s; std dev. 1.349e-05 (taking best). -Reference operation time is 6.45041e-08 s (372.1 Mflops) with 15 threads. -After merge step 1: tpop: 6.45e-08 s ~Mflops: 372.069 nsubm:1 otn:15 -Applying merge (3 -> 1 leaves, 15 th.) yielded SPEEDUP of 247998.522x: 0.016s -> 6.45e-08s, so taking this instance. +3 iterations (16 th.) took 0.06402s; avg 0.02134s ( +/- 25.06/ 12.55 %); best 0.01599s; worst 0.02402s; std dev. 0.003781 (taking best). +Reference operation time is 0.0159919 s (0.001501 Mflops) with 16 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01599 Mflops: 0.002) +Merge (3 -> 1 leaves) took w.c.t. of 3.195e-05s, ~1.192e-05s of computing time (of which 9.537e-07s sorting, 6.914e-06s analysis) +3 iterations (16 th.) took 2.408e-05s; avg 8.027e-06s ( +/- 99.23/188.12 %); best 6.15e-08s; worst 2.313e-05s; std dev. 1.068e-05 (taking best). +Reference operation time is 6.15001e-08 s (390.2 Mflops) with 16 threads. +After merge step 1: tpop: 6.15e-08 s ~Mflops: 390.243 nsubm:1 otn:16 +Applying merge (3 -> 1 leaves, 16 th.) yielded SPEEDUP of 260031.014x: 0.01599s -> 6.15e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04797s (of which 2.599e-05s partitioning, 0s I/O); computing times: 1.001e-05s in par. loops, 2.146e-06s sorting, 3.099e-06s analyzing) -Total merge + benchmarking process took 0.04797s, equivalent to 743703.6/3.0 new/old ops (0.09581s for 2 clones -- as 1485363.1/6.0 ops, or 742681.6/3.0 ops per clone), SPEEDUP of 247998.522x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 15 th.sp.) yielded SPEEDUP of 247998.522x (0.016s -> 6.45e-08s), will amortize in 3.0 ops by saving 0.016s per op. -In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 24799752.2% (2.48e+05x) (from 0.0015 to 372.1 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.06395s (of which 3.695e-05s partitioning, 0s I/O); computing times: 1.192e-05s in par. loops, 9.537e-07s sorting, 6.914e-06s analyzing) +Total merge + benchmarking process took 0.06395s, equivalent to 1039883.7/4.0 new/old ops (0.1357s for 2 clones -- as 2206919.9/8.5 ops, or 1103460.0/4.2 ops per clone), SPEEDUP of 260031.014x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 16 th.sp.) yielded SPEEDUP of 260031.014x (0.01599s -> 6.15e-08s), will amortize in 4.0 ops by saving 0.01599s per op. +In 1 tuning rounds (tot. 0.2s, 0.14s for constructor, 2 clones) obtained a SPEEDUP of 26003001.4% (2.6e+05x) (from 0.001501 to 390.2 Mflops). #pr: updating sample at index 1 (0^th of 16), 0^th touch for (0,0,0,0,0,0,0). -First run of RSB Autotuner took 0.143978 s (1.600e-02 s -> 6.450e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.199944 s (1.599e-02 s -> 6.150e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.799963 s and estimated a speedup of 1.000000 x (6.450e-08 s -> 6.450e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.975953 s and estimated a speedup of 1.000000 x (6.150e-08 s -> 6.150e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.7320508075688772 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 15 3 3 6 0.000000 0.039464 0.027983 0.067447 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.067447 -%:RSB_SUBDIVISION_TIME:A.mtx S N 15 3 3 6 0.039464 -%:RSB_SHUFFLE_TIME:A.mtx S N 15 3 3 6 0.027983 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 15 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 15 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.067447 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 15 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 15 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 15 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 16 3 3 6 0.000000 0.039841 0.039869 0.079710 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.079710 +%:RSB_SUBDIVISION_TIME:A.mtx S N 16 3 3 6 0.039841 +%:RSB_SHUFFLE_TIME:A.mtx S N 16 3 3 6 0.039869 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 16 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 16 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.079710 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 16 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 16 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 16 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 15 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 15 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 15 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 15 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 15 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 16 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 16 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 16 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 16 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 16 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[15] SPMV[15] SPMV[15] -%operation:A.mtx 0.0994842 1e+09 1e+09 -%constructor:matrix SORT[15] SCAN[15] SHUFFLE[15] INSERT[15] -%constructor:A.mtx 0 0.039464 0 0.027983 +%operation:matrix CONSTRUCTOR[16] SPMV[16] SPMV[16] +%operation:A.mtx 0.111768 1e+09 1e+09 +%constructor:matrix SORT[16] SCAN[16] SHUFFLE[16] INSERT[16] +%constructor:A.mtx 0 0.0398409 0 0.0398688 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 15 threads -# Constructed matrix (took 0.096s): (3 x 3)[0x561a5fa68bf0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 16 threads +# Constructed matrix (took 0.139s): (3 x 3)[0x5619322490e0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (15 th.) took 0.04799s; avg 0.016s ( +/- 0.05/ 0.06 %); best 0.01599s; worst 0.01601s; std dev. 7.419e-06 (taking best). -Reference operation time is 0.0159891 s (0.006004 Mflops) with 15 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01599 Mflops: 0.006) -Merge (3 -> 1 leaves) took w.c.t. of 1.192e-05s, ~5.96e-06s of computing time (of which 1.192e-06s sorting, 1.907e-06s analysis) -3 iterations (15 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 96.14/142.86 %); best 6.45e-08s; worst 4.053e-06s; std dev. 1.73e-06 (taking best). -Reference operation time is 6.45041e-08 s (1488 Mflops) with 15 threads. -After merge step 1: tpop: 6.45e-08 s ~Mflops: 1488.276 nsubm:1 otn:15 -Applying merge (3 -> 1 leaves, 15 th.) yielded SPEEDUP of 247876.548x: 0.01599s -> 6.45e-08s, so taking this instance. +3 iterations (16 th.) took 0.05999s; avg 0.02s ( +/- 39.75/ 20.01 %); best 0.01205s; worst 0.024s; std dev. 0.00562 (taking best). +Reference operation time is 0.012049 s (0.007967 Mflops) with 16 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01205 Mflops: 0.008) +Merge (3 -> 1 leaves) took w.c.t. of 2.003e-05s, ~6.914e-06s of computing time (of which 2.146e-06s sorting, 3.099e-06s analysis) +3 iterations (16 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 96.31/142.86 %); best 6.15e-08s; worst 4.053e-06s; std dev. 1.73e-06 (taking best). +Reference operation time is 6.15001e-08 s (1561 Mflops) with 16 threads. +After merge step 1: tpop: 6.15e-08 s ~Mflops: 1560.974 nsubm:1 otn:16 +Applying merge (3 -> 1 leaves, 16 th.) yielded SPEEDUP of 195917.814x: 0.01205s -> 6.15e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04799s (of which 1.597e-05s partitioning, 0s I/O); computing times: 5.96e-06s in par. loops, 1.192e-06s sorting, 1.907e-06s analyzing) -Total merge + benchmarking process took 0.04799s, equivalent to 743999.3/3.0 new/old ops (0.0959s for 2 clones -- as 1486708.6/6.0 ops, or 743354.3/3.0 ops per clone), SPEEDUP of 247876.548x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 15 th.sp.) yielded SPEEDUP of 247876.548x (0.01599s -> 6.45e-08s), will amortize in 3.0 ops by saving 0.01599s per op. -In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 24787554.8% (2.479e+05x) (from 0.006004 to 1488 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04799s (of which 2.408e-05s partitioning, 0s I/O); computing times: 6.914e-06s in par. loops, 2.146e-06s sorting, 3.099e-06s analyzing) +Total merge + benchmarking process took 0.04799s, equivalent to 780259.7/4.0 new/old ops (0.1079s for 2 clones -- as 1754615.2/9.0 ops, or 877307.6/4.5 ops per clone), SPEEDUP of 195917.814x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 16 th.sp.) yielded SPEEDUP of 195917.814x (0.01205s -> 6.15e-08s), will amortize in 4.0 ops by saving 0.01205s per op. +In 1 tuning rounds (tot. 0.17s, 0.11s for constructor, 2 clones) obtained a SPEEDUP of 19591681.4% (1.959e+05x) (from 0.007967 to 1561 Mflops). #pr: updating sample at index 9 (1^th of 16), 0^th touch for (0,0,0,0,1,0,0). -First run of RSB Autotuner took 0.143991 s (1.599e-02 s -> 6.450e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.168004 s (1.205e-02 s -> 6.150e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.799943 s and estimated a speedup of 1.000000 x (6.450e-08 s -> 6.450e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.87197 s and estimated a speedup of 1.000000 x (6.150e-08 s -> 6.150e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.7320508075688772 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 15 3 3 6 0.000000 0.031555 0.032003 0.063558 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.063558 -%:RSB_SUBDIVISION_TIME:A.mtx S N 15 3 3 6 0.031555 -%:RSB_SHUFFLE_TIME:A.mtx S N 15 3 3 6 0.032003 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 15 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 15 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.063558 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 15 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 15 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 15 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 16 3 3 6 0.000000 0.055174 0.047999 0.103173 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.103173 +%:RSB_SUBDIVISION_TIME:A.mtx S N 16 3 3 6 0.055174 +%:RSB_SHUFFLE_TIME:A.mtx S N 16 3 3 6 0.047999 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 16 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 16 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.103173 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 16 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 16 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 16 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 15 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 15 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 15 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 15 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 15 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 16 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 16 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 16 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 16 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 16 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[15] SPMV[15] SPMV[15] -%operation:A.mtx 0.0955791 1e+09 1e+09 -%constructor:matrix SORT[15] SCAN[15] SHUFFLE[15] INSERT[15] -%constructor:A.mtx 0 0.0315549 0 0.0320029 +%operation:matrix CONSTRUCTOR[16] SPMV[16] SPMV[16] +%operation:A.mtx 0.139182 1e+09 1e+09 +%constructor:matrix SORT[16] SCAN[16] SHUFFLE[16] INSERT[16] +%constructor:A.mtx 0 0.0551739 0 0.0479991 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 7.648s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.888s/0.000s . +# so far, program took 7.681s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 2.216s/0.000s . getrusage() stats: -ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.2647s (system CPU time used) -ru_utime : 25.83s (user CPU time used) +ru_maxrss: 6 (maximum resident set size -- MB) +ru_stime : 0.05571s (system CPU time used) +ru_utime : 20.32s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode S (last was D). -# Cache block size total 524288 bytes, per-thread 34952 bytes -# so far, program took 7.648s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.888s/0.000s . +# Cache block size total 524288 bytes, per-thread 32768 bytes +# so far, program took 7.681s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 2.216s/0.000s . # Reusing type converted (D->S) arrays from last iteration instead of reloading matrix file. # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 15 threads -# Constructed matrix (took 0.104s): (3 x 3)[0x561a5fa68bf0]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 16 threads +# Constructed matrix (took 0.124s): (3 x 3)[0x56193224be20]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (15 th.) took 0.04396s; avg 0.01465s ( +/- 18.09/ 9.05 %); best 0.012s; worst 0.01598s; std dev. 0.001875 (taking best). -Reference operation time is 0.0120029 s (0.002 Mflops) with 15 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.012 Mflops: 0.002) -Merge (3 -> 1 leaves) took w.c.t. of 2.813e-05s, ~1.192e-05s of computing time (of which 1.907e-06s sorting, 5.007e-06s analysis) -3 iterations (15 th.) took 2.193e-05s; avg 7.312e-06s ( +/- 99.12/186.96 %); best 6.45e-08s; worst 2.098e-05s; std dev. 9.674e-06 (taking best). -Reference operation time is 6.45041e-08 s (372.1 Mflops) with 15 threads. -After merge step 1: tpop: 6.45e-08 s ~Mflops: 372.069 nsubm:1 otn:15 -Applying merge (3 -> 1 leaves, 15 th.) yielded SPEEDUP of 186080.207x: 0.012s -> 6.45e-08s, so taking this instance. +3 iterations (16 th.) took 0.07199s; avg 0.024s ( +/- 0.03/ 0.02 %); best 0.02399s; worst 0.024s; std dev. 4.415e-06 (taking best). +Reference operation time is 0.0239909 s (0.001 Mflops) with 16 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.02399 Mflops: 0.001) +Merge (3 -> 1 leaves) took w.c.t. of 2.193e-05s, ~7.868e-06s of computing time (of which 9.537e-07s sorting, 4.053e-06s analysis) +3 iterations (16 th.) took 1.192e-05s; avg 3.974e-06s ( +/- 98.45/176.00 %); best 6.15e-08s; worst 1.097e-05s; std dev. 4.961e-06 (taking best). +Reference operation time is 6.15001e-08 s (390.2 Mflops) with 16 threads. +After merge step 1: tpop: 6.15e-08 s ~Mflops: 390.243 nsubm:1 otn:16 +Applying merge (3 -> 1 leaves, 16 th.) yielded SPEEDUP of 390094.980x: 0.02399s -> 6.15e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04797s (of which 3.195e-05s partitioning, 0s I/O); computing times: 1.192e-05s in par. loops, 1.907e-06s sorting, 5.007e-06s analyzing) -Total merge + benchmarking process took 0.04797s, equivalent to 743625.9/4.0 new/old ops (0.0959s for 2 clones -- as 1486708.6/8.0 ops, or 743354.3/4.0 ops per clone), SPEEDUP of 186080.207x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 15 th.sp.) yielded SPEEDUP of 186080.207x (0.012s -> 6.45e-08s), will amortize in 4.0 ops by saving 0.012s per op. -In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 18607920.7% (1.861e+05x) (from 0.002 to 372.1 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.07198s (of which 2.503e-05s partitioning, 0s I/O); computing times: 7.868e-06s in par. loops, 9.537e-07s sorting, 4.053e-06s analyzing) +Total merge + benchmarking process took 0.07198s, equivalent to 1170420.6/3.0 new/old ops (0.1439s for 2 clones -- as 2339360.3/6.0 ops, or 1169680.2/3.0 ops per clone), SPEEDUP of 390094.980x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 16 th.sp.) yielded SPEEDUP of 390094.980x (0.02399s -> 6.15e-08s), will amortize in 3.0 ops by saving 0.02399s per op. +In 1 tuning rounds (tot. 0.22s, 0.14s for constructor, 2 clones) obtained a SPEEDUP of 39009398.0% (3.901e+05x) (from 0.001 to 390.2 Mflops). #pr: updating sample at index 3 (2^th of 16), 0^th touch for (0,0,0,0,0,1,0). -First run of RSB Autotuner took 0.140322 s (1.200e-02 s -> 6.450e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.21627 s (2.399e-02 s -> 6.150e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.703673 s and estimated a speedup of 1.000000 x (6.450e-08 s -> 6.450e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.815677 s and estimated a speedup of 1.000000 x (6.150e-08 s -> 6.150e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.73205078 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 15 3 3 6 0.000000 0.039596 0.031999 0.071595 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.071595 -%:RSB_SUBDIVISION_TIME:A.mtx S N 15 3 3 6 0.039596 -%:RSB_SHUFFLE_TIME:A.mtx S N 15 3 3 6 0.031999 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 15 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 15 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.071595 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 15 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 15 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 15 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 16 3 3 6 0.000000 0.047493 0.024043 0.071536 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.071536 +%:RSB_SUBDIVISION_TIME:A.mtx S N 16 3 3 6 0.047493 +%:RSB_SHUFFLE_TIME:A.mtx S N 16 3 3 6 0.024043 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 16 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 16 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.071536 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 16 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 16 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 16 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 15 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 15 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 15 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 15 3 3 6 96 -%:SM_MINMAXAVGNNZ:A.mtx S N 15 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 16 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 16 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 16 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 16 3 3 6 96 +%:SM_MINMAXAVGNNZ:A.mtx S N 16 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[15] SPMV[15] SPMV[15] -%operation:A.mtx 0.103602 1e+09 1e+09 -%constructor:matrix SORT[15] SCAN[15] SHUFFLE[15] INSERT[15] -%constructor:A.mtx 0 0.0395958 0 0.0319991 +%operation:matrix CONSTRUCTOR[16] SPMV[16] SPMV[16] +%operation:A.mtx 0.123497 1e+09 1e+09 +%constructor:matrix SORT[16] SCAN[16] SHUFFLE[16] INSERT[16] +%constructor:A.mtx 0 0.0474932 0 0.0240431 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 15 threads -# Constructed matrix (took 0.104s): (3 x 3)[0x561a5fa68bf0]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 16 threads +# Constructed matrix (took 0.108s): (3 x 3)[0x56193224be20]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (15 th.) took 0.048s; avg 0.016s ( +/- 0.04/ 0.04 %); best 0.016s; worst 0.01601s; std dev. 5.261e-06 (taking best). -Reference operation time is 0.015995 s (0.006002 Mflops) with 15 threads. +3 iterations (16 th.) took 0.05599s; avg 0.01866s ( +/- 14.29/ 28.35 %); best 0.016s; worst 0.02396s; std dev. 0.003742 (taking best). +Reference operation time is 0.0159969 s (0.006001 Mflops) with 16 threads. Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.016 Mflops: 0.006) -Merge (3 -> 1 leaves) took w.c.t. of 1.001e-05s, ~4.053e-06s of computing time (of which 1.907e-06s sorting, 2.146e-06s analysis) -3 iterations (15 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 96.14/142.86 %); best 6.45e-08s; worst 4.053e-06s; std dev. 1.73e-06 (taking best). -Reference operation time is 6.45041e-08 s (1488 Mflops) with 15 threads. -After merge step 1: tpop: 6.45e-08 s ~Mflops: 1488.276 nsubm:1 otn:15 -Applying merge (3 -> 1 leaves, 15 th.) yielded SPEEDUP of 247968.952x: 0.016s -> 6.45e-08s, so taking this instance. +Merge (3 -> 1 leaves) took w.c.t. of 1.287e-05s, ~5.007e-06s of computing time (of which 9.537e-07s sorting, 1.907e-06s analysis) +3 iterations (16 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 96.31/128.57 %); best 6.15e-08s; worst 3.815e-06s; std dev. 1.593e-06 (taking best). +Reference operation time is 6.15001e-08 s (1561 Mflops) with 16 threads. +After merge step 1: tpop: 6.15e-08 s ~Mflops: 1560.974 nsubm:1 otn:16 +Applying merge (3 -> 1 leaves, 16 th.) yielded SPEEDUP of 260112.425x: 0.016s -> 6.15e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.048s (of which 1.407e-05s partitioning, 0s I/O); computing times: 4.053e-06s in par. loops, 1.907e-06s sorting, 2.146e-06s analyzing) -Total merge + benchmarking process took 0.048s, equivalent to 744091.7/3.0 new/old ops (0.09595s for 2 clones -- as 1487440.4/6.0 ops, or 743720.2/3.0 ops per clone), SPEEDUP of 247968.952x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 15 th.sp.) yielded SPEEDUP of 247968.952x (0.016s -> 6.45e-08s), will amortize in 3.0 ops by saving 0.01599s per op. -In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 24796795.2% (2.48e+05x) (from 0.006002 to 1488 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04834s (of which 1.597e-05s partitioning, 0s I/O); computing times: 5.007e-06s in par. loops, 9.537e-07s sorting, 1.907e-06s analyzing) +Total merge + benchmarking process took 0.04834s, equivalent to 786016.7/3.0 new/old ops (0.104s for 2 clones -- as 1690827.7/6.5 ops, or 845413.8/3.3 ops per clone), SPEEDUP of 260112.425x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 16 th.sp.) yielded SPEEDUP of 260112.425x (0.016s -> 6.15e-08s), will amortize in 3.0 ops by saving 0.016s per op. +In 1 tuning rounds (tot. 0.16s, 0.1s for constructor, 2 clones) obtained a SPEEDUP of 26011142.5% (2.601e+05x) (from 0.006001 to 1561 Mflops). #pr: updating sample at index 11 (3^th of 16), 0^th touch for (0,0,0,0,1,1,0). -First run of RSB Autotuner took 0.14429 s (1.600e-02 s -> 6.450e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.160365 s (1.600e-02 s -> 6.150e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.703687 s and estimated a speedup of 1.000000 x (6.450e-08 s -> 6.450e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.807666 s and estimated a speedup of 1.000000 x (6.150e-08 s -> 6.150e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.73205078 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 15 3 3 6 0.000000 0.039692 0.032000 0.071692 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.071692 -%:RSB_SUBDIVISION_TIME:A.mtx S N 15 3 3 6 0.039692 -%:RSB_SHUFFLE_TIME:A.mtx S N 15 3 3 6 0.032000 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 15 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 15 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.071692 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 15 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 15 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 15 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 16 3 3 6 0.000000 0.035898 0.039962 0.075860 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.075860 +%:RSB_SUBDIVISION_TIME:A.mtx S N 16 3 3 6 0.035898 +%:RSB_SHUFFLE_TIME:A.mtx S N 16 3 3 6 0.039962 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 16 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 16 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.075860 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 16 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 16 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 16 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 15 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 15 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 15 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 15 3 3 6 96 -%:SM_MINMAXAVGNNZ:A.mtx S N 15 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 16 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 16 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 16 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 16 3 3 6 96 +%:SM_MINMAXAVGNNZ:A.mtx S N 16 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[15] SPMV[15] SPMV[15] -%operation:A.mtx 0.103696 1e+09 1e+09 -%constructor:matrix SORT[15] SCAN[15] SHUFFLE[15] INSERT[15] -%constructor:A.mtx 0 0.0396922 0 0.0319998 +%operation:matrix CONSTRUCTOR[16] SPMV[16] SPMV[16] +%operation:A.mtx 0.107866 1e+09 1e+09 +%constructor:matrix SORT[16] SCAN[16] SHUFFLE[16] INSERT[16] +%constructor:A.mtx 0 0.035898 0 0.0399621 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 9.700s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 3.580s/0.000s . +# so far, program took 10.084s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 4.216s/0.000s . getrusage() stats: ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.4422s (system CPU time used) -ru_utime : 46.09s (user CPU time used) +ru_stime : 0.05575s (system CPU time used) +ru_utime : 37.45s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode C (last was S). -# Cache block size total 524288 bytes, per-thread 34952 bytes -# so far, program took 9.700s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 3.580s/0.000s . +# Cache block size total 524288 bytes, per-thread 32768 bytes +# so far, program took 10.084s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 4.216s/0.000s . # Reusing type converted (S->C) arrays from last iteration instead of reloading matrix file. # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 15 threads -# Constructed matrix (took 0.104s): (3 x 3)[0x561a5fa69b80]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 16 threads +# Constructed matrix (took 0.116s): (3 x 3)[0x56193224be20]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (15 th.) took 0.04799s; avg 0.016s ( +/- 0.02/ 0.01 %); best 0.01599s; worst 0.016s; std dev. 2.245e-06 (taking best). -Reference operation time is 0.0159919 s (0.006003 Mflops) with 15 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01599 Mflops: 0.006) -Merge (3 -> 1 leaves) took w.c.t. of 1.001e-05s, ~4.053e-06s of computing time (of which 9.537e-07s sorting, 1.907e-06s analysis) -3 iterations (15 th.) took 1.979e-05s; avg 6.596e-06s ( +/- 99.02/185.54 %); best 6.45e-08s; worst 1.884e-05s; std dev. 8.663e-06 (taking best). -Reference operation time is 6.45041e-08 s (1488 Mflops) with 15 threads. -After merge step 1: tpop: 6.45e-08 s ~Mflops: 1488.276 nsubm:1 otn:15 -Applying merge (3 -> 1 leaves, 15 th.) yielded SPEEDUP of 247920.902x: 0.01599s -> 6.45e-08s, so taking this instance. +3 iterations (16 th.) took 0.06397s; avg 0.02132s ( +/- 24.97/ 49.89 %); best 0.016s; worst 0.03196s; std dev. 0.007522 (taking best). +Reference operation time is 0.0159981 s (0.006001 Mflops) with 16 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.016 Mflops: 0.006) +Merge (3 -> 1 leaves) took w.c.t. of 2.623e-05s, ~6.914e-06s of computing time (of which 2.146e-06s sorting, 5.007e-06s analysis) +3 iterations (16 th.) took 3.29e-05s; avg 1.097e-05s ( +/- 99.44/191.30 %); best 6.15e-08s; worst 3.195e-05s; std dev. 1.484e-05 (taking best). +Reference operation time is 6.15001e-08 s (1561 Mflops) with 16 threads. +After merge step 1: tpop: 6.15e-08 s ~Mflops: 1560.974 nsubm:1 otn:16 +Applying merge (3 -> 1 leaves, 16 th.) yielded SPEEDUP of 260131.808x: 0.016s -> 6.15e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.048s (of which 1.502e-05s partitioning, 0s I/O); computing times: 4.053e-06s in par. loops, 9.537e-07s sorting, 1.907e-06s analyzing) -Total merge + benchmarking process took 0.048s, equivalent to 744062.1/3.0 new/old ops (0.0957s for 2 clones -- as 1483614.9/6.0 ops, or 741807.4/3.0 ops per clone), SPEEDUP of 247920.902x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 15 th.sp.) yielded SPEEDUP of 247920.902x (0.01599s -> 6.45e-08s), will amortize in 3.0 ops by saving 0.01599s per op. -In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 24791990.2% (2.479e+05x) (from 0.006003 to 1488 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.05201s (of which 2.909e-05s partitioning, 0s I/O); computing times: 6.914e-06s in par. loops, 2.146e-06s sorting, 5.007e-06s analyzing) +Total merge + benchmarking process took 0.05201s, equivalent to 845625.1/3.3 new/old ops (0.1118s for 2 clones -- as 1817898.8/7.0 ops, or 908949.4/3.5 ops per clone), SPEEDUP of 260131.808x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 16 th.sp.) yielded SPEEDUP of 260131.808x (0.016s -> 6.15e-08s), will amortize in 3.3 ops by saving 0.016s per op. +In 1 tuning rounds (tot. 0.18s, 0.11s for constructor, 2 clones) obtained a SPEEDUP of 26013080.8% (2.601e+05x) (from 0.006001 to 1561 Mflops). #pr: updating sample at index 5 (4^th of 16), 0^th touch for (0,0,0,0,0,2,0). -First run of RSB Autotuner took 0.144008 s (1.599e-02 s -> 6.450e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.17594 s (1.600e-02 s -> 6.150e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.799967 s and estimated a speedup of 1.000000 x (6.450e-08 s -> 6.450e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.83996 s and estimated a speedup of 1.000000 x (6.150e-08 s -> 6.150e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.73205078 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 15 3 3 6 0.000000 0.039792 0.031974 0.071766 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.071766 -%:RSB_SUBDIVISION_TIME:A.mtx S N 15 3 3 6 0.039792 -%:RSB_SHUFFLE_TIME:A.mtx S N 15 3 3 6 0.031974 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 15 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 15 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.071766 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 15 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 15 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 15 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 16 3 3 6 0.000000 0.047786 0.032030 0.079816 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.079816 +%:RSB_SUBDIVISION_TIME:A.mtx S N 16 3 3 6 0.047786 +%:RSB_SHUFFLE_TIME:A.mtx S N 16 3 3 6 0.032030 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 16 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 16 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.079816 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 16 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 16 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 16 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 15 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 15 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 15 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 15 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 15 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 16 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 16 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 16 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 16 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 16 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[15] SPMV[15] SPMV[15] -%operation:A.mtx 0.10377 1e+09 1e+09 -%constructor:matrix SORT[15] SCAN[15] SHUFFLE[15] INSERT[15] -%constructor:A.mtx 0 0.0397921 0 0.0319741 +%operation:matrix CONSTRUCTOR[16] SPMV[16] SPMV[16] +%operation:A.mtx 0.115873 1e+09 1e+09 +%constructor:matrix SORT[16] SCAN[16] SHUFFLE[16] INSERT[16] +%constructor:A.mtx 0 0.047786 0 0.0320299 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 15 threads -# Constructed matrix (took 0.112s): (3 x 3)[0x561a5fa69b80]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 16 threads +# Constructed matrix (took 0.112s): (3 x 3)[0x56193224be20]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (15 th.) took 0.04799s; avg 0.016s ( +/- 0.05/ 0.03 %); best 0.01599s; worst 0.016s; std dev. 5.188e-06 (taking best). -Reference operation time is 0.0159891 s (0.02402 Mflops) with 15 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01599 Mflops: 0.024) -Merge (3 -> 1 leaves) took w.c.t. of 2.098e-05s, ~7.153e-06s of computing time (of which 9.537e-07s sorting, 3.815e-06s analysis) -3 iterations (15 th.) took 6.914e-06s; avg 2.305e-06s ( +/- 58.62/117.24 %); best 9.537e-07s; worst 5.007e-06s; std dev. 1.911e-06 (taking best). -Reference operation time is 9.53674e-07 s (402.7 Mflops) with 15 threads. -After merge step 1: tpop: 9.537e-07 s ~Mflops: 402.653 nsubm:1 otn:15 -Applying merge (3 -> 1 leaves, 15 th.) yielded SPEEDUP of 16765.750x: 0.01599s -> 9.537e-07s, so taking this instance. +3 iterations (16 th.) took 0.05597s; avg 0.01866s ( +/- 14.37/ 28.45 %); best 0.01598s; worst 0.02396s; std dev. 0.003753 (taking best). +Reference operation time is 0.015976 s (0.02404 Mflops) with 16 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01598 Mflops: 0.024) +Merge (3 -> 1 leaves) took w.c.t. of 2.313e-05s, ~6.914e-06s of computing time (of which 9.537e-07s sorting, 5.007e-06s analysis) +3 iterations (16 th.) took 1.693e-05s; avg 5.643e-06s ( +/- 83.10/166.20 %); best 9.537e-07s; worst 1.502e-05s; std dev. 6.631e-06 (taking best). +Reference operation time is 9.53674e-07 s (402.7 Mflops) with 16 threads. +After merge step 1: tpop: 9.537e-07 s ~Mflops: 402.653 nsubm:1 otn:16 +Applying merge (3 -> 1 leaves, 16 th.) yielded SPEEDUP of 16752.000x: 0.01598s -> 9.537e-07s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04798s (of which 2.599e-05s partitioning, 0s I/O); computing times: 7.153e-06s in par. loops, 9.537e-07s sorting, 3.815e-06s analyzing) -Total merge + benchmarking process took 0.04798s, equivalent to 50305.5/3.0 new/old ops (0.09587s for 2 clones -- as 100526.2/6.0 ops, or 50263.1/3.0 ops per clone), SPEEDUP of 16765.750x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 15 th.sp.) yielded SPEEDUP of 16765.750x (0.01599s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.01599s per op. -In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 1676475.0% (1.677e+04x) (from 0.02402 to 402.7 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04802s (of which 2.599e-05s partitioning, 0s I/O); computing times: 6.914e-06s in par. loops, 9.537e-07s sorting, 5.007e-06s analyzing) +Total merge + benchmarking process took 0.04802s, equivalent to 50354.8/3.0 new/old ops (0.0959s for 2 clones -- as 100559.5/6.0 ops, or 50279.8/3.0 ops per clone), SPEEDUP of 16752.000x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 16 th.sp.) yielded SPEEDUP of 16752.000x (0.01598s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.01597s per op. +In 1 tuning rounds (tot. 0.15s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 1675100.0% (1.675e+04x) (from 0.02404 to 402.7 Mflops). #pr: updating sample at index 13 (5^th of 16), 0^th touch for (0,0,0,0,1,2,0). -First run of RSB Autotuner took 0.143991 s (1.599e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.151999 s (1.598e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.795982 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.819967 s and estimated a speedup of 1.000000 x (6.150e-08 s -> 6.150e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.73205078 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 15 3 3 6 0.000000 0.039488 0.031989 0.071477 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.071477 -%:RSB_SUBDIVISION_TIME:A.mtx S N 15 3 3 6 0.039488 -%:RSB_SHUFFLE_TIME:A.mtx S N 15 3 3 6 0.031989 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 15 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 15 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.071477 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 15 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 15 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 15 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 16 3 3 6 0.000000 0.047640 0.032008 0.079648 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.079648 +%:RSB_SUBDIVISION_TIME:A.mtx S N 16 3 3 6 0.047640 +%:RSB_SHUFFLE_TIME:A.mtx S N 16 3 3 6 0.032008 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 16 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 16 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.079648 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 16 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 16 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 16 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 15 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 15 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 15 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 15 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 15 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 16 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 16 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 16 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 16 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 16 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[15] SPMV[15] SPMV[15] -%operation:A.mtx 0.111502 1e+09 1e+09 -%constructor:matrix SORT[15] SCAN[15] SHUFFLE[15] INSERT[15] -%constructor:A.mtx 0 0.0394881 0 0.0319891 +%operation:matrix CONSTRUCTOR[16] SPMV[16] SPMV[16] +%operation:A.mtx 0.111669 1e+09 1e+09 +%constructor:matrix SORT[16] SCAN[16] SHUFFLE[16] INSERT[16] +%constructor:A.mtx 0 0.0476398 0 0.0320079 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 11.960s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 5.464s/0.000s . +# so far, program took 12.476s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 6.204s/0.000s . getrusage() stats: ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.46s (system CPU time used) -ru_utime : 69.29s (user CPU time used) +ru_stime : 0.05575s (system CPU time used) +ru_utime : 55.21s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode Z (last was C). -# Cache block size total 524288 bytes, per-thread 34952 bytes -# so far, program took 11.960s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 5.464s/0.000s . +# Cache block size total 524288 bytes, per-thread 32768 bytes +# so far, program took 12.476s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 6.204s/0.000s . # Reusing type converted (C->Z) arrays from last iteration instead of reloading matrix file. # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 15 threads -# Constructed matrix (took 0.104s): (3 x 3)[0x561a5fa69b80]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 16 threads +# Constructed matrix (took 0.120s): (3 x 3)[0x56193224be20]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (15 th.) took 0.04799s; avg 0.016s ( +/- 0.16/ 0.16 %); best 0.01597s; worst 0.01602s; std dev. 2.122e-05 (taking best). -Reference operation time is 0.01597 s (0.006011 Mflops) with 15 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01597 Mflops: 0.006) -Merge (3 -> 1 leaves) took w.c.t. of 2.217e-05s, ~7.153e-06s of computing time (of which 1.907e-06s sorting, 3.815e-06s analysis) -3 iterations (15 th.) took 2.909e-05s; avg 9.696e-06s ( +/- 99.33/190.16 %); best 6.45e-08s; worst 2.813e-05s; std dev. 1.304e-05 (taking best). -Reference operation time is 6.45041e-08 s (1488 Mflops) with 15 threads. -After merge step 1: tpop: 6.45e-08 s ~Mflops: 1488.276 nsubm:1 otn:15 -Applying merge (3 -> 1 leaves, 15 th.) yielded SPEEDUP of 247580.854x: 0.01597s -> 6.45e-08s, so taking this instance. +3 iterations (16 th.) took 0.03999s; avg 0.01333s ( +/- 39.79/ 20.10 %); best 0.008027s; worst 0.01601s; std dev. 0.003751 (taking best). +Reference operation time is 0.00802708 s (0.01196 Mflops) with 16 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.008027 Mflops: 0.012) +Merge (3 -> 1 leaves) took w.c.t. of 2.408e-05s, ~8.106e-06s of computing time (of which 9.537e-07s sorting, 4.053e-06s analysis) +3 iterations (16 th.) took 2.599e-05s; avg 8.663e-06s ( +/- 99.29/188.99 %); best 6.15e-08s; worst 2.503e-05s; std dev. 1.158e-05 (taking best). +Reference operation time is 6.15001e-08 s (1561 Mflops) with 16 threads. +After merge step 1: tpop: 6.15e-08 s ~Mflops: 1560.974 nsubm:1 otn:16 +Applying merge (3 -> 1 leaves, 16 th.) yielded SPEEDUP of 130521.419x: 0.008027s -> 6.15e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.048s (of which 2.503e-05s partitioning, 0s I/O); computing times: 7.153e-06s in par. loops, 1.907e-06s sorting, 3.815e-06s analyzing) -Total merge + benchmarking process took 0.048s, equivalent to 744091.7/3.0 new/old ops (0.09587s for 2 clones -- as 1486324.2/6.0 ops, or 743162.1/3.0 ops per clone), SPEEDUP of 247580.854x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 15 th.sp.) yielded SPEEDUP of 247580.854x (0.01597s -> 6.45e-08s), will amortize in 3.0 ops by saving 0.01597s per op. -In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 24757985.4% (2.476e+05x) (from 0.006011 to 1488 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04796s (of which 2.885e-05s partitioning, 0s I/O); computing times: 8.106e-06s in par. loops, 9.537e-07s sorting, 4.053e-06s analyzing) +Total merge + benchmarking process took 0.04796s, equivalent to 779786.8/6.0 new/old ops (0.09585s for 2 clones -- as 1558585.0/11.9 ops, or 779292.5/6.0 ops per clone), SPEEDUP of 130521.419x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 16 th.sp.) yielded SPEEDUP of 130521.419x (0.008027s -> 6.15e-08s), will amortize in 6.0 ops by saving 0.008027s per op. +In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 13052041.9% (1.305e+05x) (from 0.01196 to 1561 Mflops). #pr: updating sample at index 7 (6^th of 16), 0^th touch for (0,0,0,0,0,3,0). -First run of RSB Autotuner took 0.144035 s (1.597e-02 s -> 6.450e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.136014 s (8.027e-03 s -> 6.150e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.79995 s and estimated a speedup of 1.000000 x (6.450e-08 s -> 6.450e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.811942 s and estimated a speedup of 1.000000 x (6.150e-08 s -> 6.150e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 15 3 3 6 0.000000 0.039778 0.032006 0.071784 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.071784 -%:RSB_SUBDIVISION_TIME:A.mtx S N 15 3 3 6 0.039778 -%:RSB_SHUFFLE_TIME:A.mtx S N 15 3 3 6 0.032006 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 15 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 15 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.071784 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 15 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 15 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 15 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 16 3 3 6 0.000000 0.035769 0.047996 0.083765 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.083765 +%:RSB_SUBDIVISION_TIME:A.mtx S N 16 3 3 6 0.035769 +%:RSB_SHUFFLE_TIME:A.mtx S N 16 3 3 6 0.047996 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 16 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 16 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.083765 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 16 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 16 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 16 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 15 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 15 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 15 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 15 3 3 6 276 -%:SM_MINMAXAVGNNZ:A.mtx S N 15 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 16 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 16 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 16 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 16 3 3 6 276 +%:SM_MINMAXAVGNNZ:A.mtx S N 16 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[15] SPMV[15] SPMV[15] -%operation:A.mtx 0.10378 1e+09 1e+09 -%constructor:matrix SORT[15] SCAN[15] SHUFFLE[15] INSERT[15] -%constructor:A.mtx 0 0.039778 0 0.032006 +%operation:matrix CONSTRUCTOR[16] SPMV[16] SPMV[16] +%operation:A.mtx 0.119779 1e+09 1e+09 +%constructor:matrix SORT[16] SCAN[16] SHUFFLE[16] INSERT[16] +%constructor:A.mtx 0 0.035769 0 0.047996 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 15 threads -# Constructed matrix (took 0.112s): (3 x 3)[0x561a5fa6b6a0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 16 threads +# Constructed matrix (took 0.108s): (3 x 3)[0x56193224d2a0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (15 th.) took 0.04799s; avg 0.016s ( +/- 0.06/ 0.04 %); best 0.01599s; worst 0.016s; std dev. 6.365e-06 (taking best). -Reference operation time is 0.0159872 s (0.02402 Mflops) with 15 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01599 Mflops: 0.024) -Merge (3 -> 1 leaves) took w.c.t. of 2.193e-05s, ~5.96e-06s of computing time (of which 9.537e-07s sorting, 5.007e-06s analysis) -3 iterations (15 th.) took 6.914e-06s; avg 2.305e-06s ( +/- 97.20/158.62 %); best 6.45e-08s; worst 5.96e-06s; std dev. 2.614e-06 (taking best). -Reference operation time is 6.45041e-08 s (5953 Mflops) with 15 threads. -After merge step 1: tpop: 6.45e-08 s ~Mflops: 5953.106 nsubm:1 otn:15 -Applying merge (3 -> 1 leaves, 15 th.) yielded SPEEDUP of 247846.978x: 0.01599s -> 6.45e-08s, so taking this instance. +3 iterations (16 th.) took 0.03998s; avg 0.01333s ( +/- 40.08/ 20.04 %); best 0.007985s; worst 0.016s; std dev. 0.003776 (taking best). +Reference operation time is 0.00798512 s (0.04809 Mflops) with 16 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.007985 Mflops: 0.048) +Merge (3 -> 1 leaves) took w.c.t. of 2.599e-05s, ~7.868e-06s of computing time (of which 1.907e-06s sorting, 5.96e-06s analysis) +3 iterations (16 th.) took 1.001e-05s; avg 3.338e-06s ( +/- 98.16/171.43 %); best 6.15e-08s; worst 9.06e-06s; std dev. 4.065e-06 (taking best). +Reference operation time is 6.15001e-08 s (6244 Mflops) with 16 threads. +After merge step 1: tpop: 6.15e-08 s ~Mflops: 6243.895 nsubm:1 otn:16 +Applying merge (3 -> 1 leaves, 16 th.) yielded SPEEDUP of 129839.116x: 0.007985s -> 6.15e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.048s (of which 2.599e-05s partitioning, 0s I/O); computing times: 5.96e-06s in par. loops, 9.537e-07s sorting, 5.007e-06s analyzing) -Total merge + benchmarking process took 0.048s, equivalent to 744091.7/3.0 new/old ops (0.09587s for 2 clones -- as 1486276.1/6.0 ops, or 743138.1/3.0 ops per clone), SPEEDUP of 247846.978x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 15 th.sp.) yielded SPEEDUP of 247846.978x (0.01599s -> 6.45e-08s), will amortize in 3.0 ops by saving 0.01599s per op. -In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 24784597.8% (2.478e+05x) (from 0.02402 to 5953 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.048s (of which 3.004e-05s partitioning, 0s I/O); computing times: 7.868e-06s in par. loops, 1.907e-06s sorting, 5.96e-06s analyzing) +Total merge + benchmarking process took 0.048s, equivalent to 780453.6/6.0 new/old ops (0.09587s for 2 clones -- as 1558875.8/12.0 ops, or 779437.9/6.0 ops per clone), SPEEDUP of 129839.116x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 16 th.sp.) yielded SPEEDUP of 129839.116x (0.007985s -> 6.15e-08s), will amortize in 6.0 ops by saving 0.007985s per op. +In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 12983811.6% (1.298e+05x) (from 0.04809 to 6244 Mflops). #pr: updating sample at index 15 (7^th of 16), 0^th touch for (0,0,0,0,1,3,0). -First run of RSB Autotuner took 0.144013 s (1.599e-02 s -> 6.450e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.136036 s (7.985e-03 s -> 6.150e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.795607 s and estimated a speedup of 1.000000 x (6.450e-08 s -> 6.450e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.803693 s and estimated a speedup of 1.000000 x (6.150e-08 s -> 6.150e-08 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 15 3 3 6 0.000001 0.035474 0.031998 0.067472 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.067473 -%:RSB_SUBDIVISION_TIME:A.mtx S N 15 3 3 6 0.035474 -%:RSB_SHUFFLE_TIME:A.mtx S N 15 3 3 6 0.031998 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 15 3 3 6 0.000001 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 15 3 3 6 1.000 -%:SORTEDCOO2RSB_TIME:A.mtx S N 15 3 3 6 0.067472 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 15 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 15 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 15 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 15 3 3 6 1.00 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 16 3 3 6 0.000000 0.035855 0.035985 0.071840 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.071840 +%:RSB_SUBDIVISION_TIME:A.mtx S N 16 3 3 6 0.035855 +%:RSB_SHUFFLE_TIME:A.mtx S N 16 3 3 6 0.035985 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 16 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 16 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 16 3 3 6 0.071840 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 16 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 16 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 16 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 16 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 15 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 15 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 15 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 15 3 3 6 276 -%:SM_MINMAXAVGNNZ:A.mtx S N 15 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 16 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 16 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 16 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 16 3 3 6 276 +%:SM_MINMAXAVGNNZ:A.mtx S N 16 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[15] SPMV[15] SPMV[15] -%operation:A.mtx 0.111492 1e+09 1e+09 -%constructor:matrix SORT[15] SCAN[15] SHUFFLE[15] INSERT[15] -%constructor:A.mtx 1.19209e-06 0.0354741 0 0.0319979 +%operation:matrix CONSTRUCTOR[16] SPMV[16] SPMV[16] +%operation:A.mtx 0.10784 1e+09 1e+09 +%constructor:matrix SORT[16] SCAN[16] SHUFFLE[16] INSERT[16] +%constructor:A.mtx 0 0.0358548 0 0.035985 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 14.220s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 7.347s/0.000s . +# so far, program took 14.760s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 8.091s/0.000s . getrusage() stats: ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.5196s (system CPU time used) -ru_utime : 93.02s (user CPU time used) +ru_stime : 0.2864s (system CPU time used) +ru_utime : 72.13s (user CPU time used) # benchmarking terminated --- finalizing run. # ====== BEGIN Total summary record. #pr: ======== Limiting to type D: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 15 15 0 4.0000 4.6667 3 1 372.07 1.600e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 2.29e+00 5.17e+00 1 2.40e-05 -pr: 9:R_R A 3 3 6 4 D S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.599e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 5.64e+00 2.79e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 D S N 16 16 0 4.0000 4.6667 3 1 390.24 1.599e-02 0.000e+00 6.150e-08 0.000e+00 1.999e-01 2.41e+00 5.17e+00 1 2.40e-05 +pr: 9:R_R A 3 3 6 4 D S N 16 16 0 4.0000 4.6667 3 1 1560.97 1.205e-02 0.000e+00 6.150e-08 0.000e+00 1.680e-01 5.92e+00 2.79e+00 1 9.60e-05 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 24793653.5 % faster, avg. sp. ratio 247937.535x, max sp. ratio 247998.522x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2232175.2/2232075.4/2232275.0/4464350.4 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.0/ 9.0/ 9.0/ 18.0 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.0, min. 9.0, max. 9.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 22797341.4 % faster, avg. sp. ratio 227974.414x, max sp. ratio 260031.014x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2991444.1/2731769.7/3251118.4/5982888.2 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 13.2/ 12.5/ 13.9/ 26.4 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 13.2, min. 12.5, max. 13.9 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 16/ 16/ 16) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 48/ 48/ 48) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.039/ 1.922/ 4.155,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 7.937/ 2.294/ 5.643,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.187/ 2.016/ 4.358,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 8.325/ 2.407/ 5.919,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 3.979/ 2.792/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.29 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.29 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.302e+02, min 3.721e+02, max 1.488e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 3.752e-03, min 1.500e-03, max 6.004e-03 (2 samples) -#pr: best tun. rsb operation time was: on avg. 6.450e-08s, min 6.450e-08s, max 6.450e-08s, tot 1.290e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.599e-02s, max 1.600e-02s, tot 3.199e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.636e+00 5.696e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.370e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.18 s, min 0.17 s, max 0.20 s, tot 0.37 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.18 s, min 0.17 s, max 0.20 s, tot 0.37 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 9.756e+02, min 3.902e+02, max 1.561e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 4.734e-03, min 1.501e-03, max 7.967e-03 (2 samples) +#pr: best tun. rsb operation time was: on avg. 6.150e-08s, min 6.150e-08s, max 6.150e-08s, tot 1.230e-07s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.402e-02s, min 1.205e-02s, max 1.599e-02s, tot 2.804e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 8.184e-01 1.769e+00 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds memory bandwidth --- is this a tiny matrix ? +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.643e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (1 samples, the non-min-nrhs ones) #pr: ======== Limiting to type S: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 3:R_R A 3 3 6 1 S S N 15 15 0 4.0000 4.6667 3 1 372.07 1.200e-02 0.000e+00 6.450e-08 0.000e+00 1.403e-01 1.36e+00 3.17e+00 1 2.40e-05 -pr: 11:R_R A 3 3 6 4 S S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.600e-02 0.000e+00 6.450e-08 0.000e+00 1.443e-01 3.04e+00 1.54e+00 1 9.60e-05 +pr: 3:R_R A 3 3 6 1 S S N 16 16 0 4.0000 4.6667 3 1 390.24 2.399e-02 0.000e+00 6.150e-08 0.000e+00 2.163e-01 1.43e+00 3.17e+00 1 2.40e-05 +pr: 11:R_R A 3 3 6 4 S S N 16 16 0 4.0000 4.6667 3 1 1560.97 1.600e-02 0.000e+00 6.150e-08 0.000e+00 1.604e-01 3.19e+00 1.54e+00 1 9.60e-05 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 21702358.0 % faster, avg. sp. ratio 217024.580x, max sp. ratio 247968.952x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2206154.1/2175398.3/2236910.0/4412308.3 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 10.4/ 9.0/ 11.7/ 20.7 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 10.4, min. 9.0, max. 11.7 ops) +#pr: (in succ. cases rsb autotuning gave avg. 32510270.2 % faster, avg. sp. ratio 325103.702x, max sp. ratio 390094.980x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 3062068.2/2607555.7/3516580.7/6124136.5 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.5/ 9.0/ 10.0/ 19.0 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.5, min. 9.0, max. 10.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 8/ 8/ 8) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 24/ 24/ 24) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.736/ 1.178/ 2.294,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 4.403/ 1.364/ 3.039,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.821/ 1.236/ 2.407,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 4.618/ 1.431/ 3.187,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.354/ 1.542/ 3.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.28 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.28 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.302e+02, min 3.721e+02, max 1.488e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 4.001e-03, min 2.000e-03, max 6.002e-03 (2 samples) -#pr: best tun. rsb operation time was: on avg. 6.450e-08s, min 6.450e-08s, max 6.450e-08s, tot 1.290e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.400e-02s, min 1.200e-02s, max 1.600e-02s, tot 2.800e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.773e+00 9.294e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.370e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.19 s, min 0.16 s, max 0.22 s, tot 0.38 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.19 s, min 0.16 s, max 0.22 s, tot 0.38 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 9.756e+02, min 3.902e+02, max 1.561e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 3.501e-03, min 1.000e-03, max 6.001e-03 (2 samples) +#pr: best tun. rsb operation time was: on avg. 6.150e-08s, min 6.150e-08s, max 6.150e-08s, tot 1.230e-07s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.999e-02s, min 1.600e-02s, max 2.399e-02s, tot 3.999e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.482e+00 2.886e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.643e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (1 samples, the non-min-nrhs ones) #pr: ======== Limiting to type C: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 5:R_R A 3 3 6 1 C S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.599e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 2.29e+00 1.29e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 15 15 0 4.0000 4.6667 3 1 402.65 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 3.82e-01 6.98e-01 1 3.84e-04 +pr: 5:R_R A 3 3 6 1 C S N 16 16 0 4.0000 4.6667 3 1 1560.97 1.600e-02 0.000e+00 6.150e-08 0.000e+00 1.759e-01 2.41e+00 1.29e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 16 16 0 4.0000 4.6667 3 1 402.65 1.598e-02 0.000e+00 9.537e-07 0.000e+00 1.520e-01 3.82e-01 6.98e-01 1 3.84e-04 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 13234232.6 % faster, avg. sp. ratio 132343.326x, max sp. ratio 247920.902x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1191763.3/150985.5/2232541.1/2383526.6 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.0/ 9.0/ 9.0/ 18.0 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.0, min. 9.0, max. 9.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 13844090.4 % faster, avg. sp. ratio 138441.904x, max sp. ratio 260131.808x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1510096.5/159382.8/2860810.2/3020193.0 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 10.3/ 9.5/ 11.0/ 20.5 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 10.3, min. 9.5, max. 11.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 16/ 16/ 16) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 48/ 48/ 48) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.102/ 0.281/ 1.922,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 2.676/ 0.382/ 2.294,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.149/ 0.281/ 2.016,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 2.788/ 0.382/ 2.407,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 0.995/ 0.698/ 1.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.29 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.29 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.455e+02, min 4.027e+02, max 1.488e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.501e-02, min 6.003e-03, max 2.402e-02 (2 samples) -#pr: best tun. rsb operation time was: on avg. 5.091e-07s, min 6.450e-08s, max 9.537e-07s, tot 1.018e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.599e-02s, max 1.599e-02s, tot 3.198e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 5.696e+00 3.897e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.370e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.706e-01 x, min 2.706e-01 x, max 2.706e-01 x (1 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.16 s, min 0.15 s, max 0.18 s, tot 0.33 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.16 s, min 0.15 s, max 0.18 s, tot 0.33 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 9.818e+02, min 4.027e+02, max 1.561e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.502e-02, min 6.001e-03, max 2.404e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 5.076e-07s, min 6.150e-08s, max 9.537e-07s, tot 1.015e-06s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.598e-02s, max 1.600e-02s, tot 3.197e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.769e+00 1.269e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.643e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.580e-01 x, min 2.580e-01 x, max 2.580e-01 x (1 samples, the non-min-nrhs ones) #pr: ======== Limiting to type Z: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 7:R_R A 3 3 6 1 Z S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.597e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 4.15e+00 2.29e+00 1 9.60e-05 -pr: 15:R_R A 3 3 6 4 Z S N 15 15 0 4.0000 4.6667 3 1 5953.11 1.599e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 1.09e+01 1.32e+00 1 3.84e-04 +pr: 7:R_R A 3 3 6 1 Z S N 16 16 0 4.0000 4.6667 3 1 1560.97 8.027e-03 0.000e+00 6.150e-08 0.000e+00 1.360e-01 4.36e+00 2.29e+00 1 9.60e-05 +pr: 15:R_R A 3 3 6 4 Z S N 16 16 0 4.0000 4.6667 3 1 6243.90 7.985e-03 0.000e+00 6.150e-08 0.000e+00 1.360e-01 1.14e+01 1.32e+00 1 3.84e-04 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 24771291.6 % faster, avg. sp. ratio 247713.916x, max sp. ratio 247846.978x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2232786.9/2232615.0/2232958.8/4465573.8 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.0/ 9.0/ 9.0/ 18.0 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.0, min. 9.0, max. 9.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 13017926.7 % faster, avg. sp. ratio 130180.267x, max sp. ratio 130521.419x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2211785.2/2211606.9/2211963.6/4423570.5 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 17.0/ 16.9/ 17.0/ 34.0 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 17.0, min. 16.9, max. 17.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 5.643/ 3.411/ 7.875,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 15.007/ 4.155/ 10.852,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 5.919/ 3.577/ 8.260,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 15.740/ 4.358/ 11.382,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.807/ 1.323/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.29 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.29 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 3.721e+03, min 1.488e+03, max 5.953e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.502e-02, min 6.011e-03, max 2.402e-02 (2 samples) -#pr: best tun. rsb operation time was: on avg. 6.450e-08s, min 6.450e-08s, max 6.450e-08s, tot 1.290e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.598e-02s, min 1.597e-02s, max 1.599e-02s, tot 3.196e-02s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.390e+00 3.211e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.370e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (1 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.27 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.27 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 3.902e+03, min 1.561e+03, max 6.244e+03 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 3.002e-02, min 1.196e-02, max 4.809e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 6.150e-08s, min 6.150e-08s, max 6.150e-08s, tot 1.230e-07s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 8.006e-03s, min 7.985e-03s, max 8.027e-03s, tot 1.601e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.317e-01 9.969e-01 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds memory bandwidth --- is this a tiny matrix ? +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.643e+00 +#pr: min / max ratio of in-cache MEMSET bandwidth to extrapolated read bandwidth ratio: 7.095e-01 1.638e+00 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds cache bandwidth! #pr: ======== Limiting to nrhs=1: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 15 15 0 4.0000 4.6667 3 1 372.07 1.600e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 2.29e+00 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 15 15 0 4.0000 4.6667 3 1 372.07 1.200e-02 0.000e+00 6.450e-08 0.000e+00 1.403e-01 1.36e+00 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.599e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 2.29e+00 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.597e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 4.15e+00 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 D S N 16 16 0 4.0000 4.6667 3 1 390.24 1.599e-02 0.000e+00 6.150e-08 0.000e+00 1.999e-01 2.41e+00 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 16 16 0 4.0000 4.6667 3 1 390.24 2.399e-02 0.000e+00 6.150e-08 0.000e+00 2.163e-01 1.43e+00 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 16 16 0 4.0000 4.6667 3 1 1560.97 1.600e-02 0.000e+00 6.150e-08 0.000e+00 1.759e-01 2.41e+00 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 16 16 0 4.0000 4.6667 3 1 1560.97 8.027e-03 0.000e+00 6.150e-08 0.000e+00 1.360e-01 4.36e+00 2.29e+00 1 9.60e-05 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 23239412.1 % faster, avg. sp. ratio 232395.121x, max sp. ratio 247998.522x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2218243.4/2175398.3/2232958.8/8872973.6 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.7/ 9.0/ 11.7/ 38.7 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.7, min. 9.0, max. 11.7 ops) +#pr: (in succ. cases rsb autotuning gave avg. 26019380.5 % faster, avg. sp. ratio 260194.805x, max sp. ratio 390094.980x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2960029.1/2211606.9/3516580.7/11840116.3 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 12.4/ 9.0/ 16.9/ 49.5 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 12.4, min. 9.0, max. 16.9 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.108/ 1.178/ 3.411,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.108/ 1.364/ 4.155,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.211/ 1.236/ 3.577,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.602/ 1.431/ 4.358,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.979/ 1.292/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.57 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.57 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.302e+02, min 3.721e+02, max 1.488e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 3.879e-03, min 1.500e-03, max 6.011e-03 (4 samples) -#pr: best tun. rsb operation time was: on avg. 6.450e-08s, min 6.450e-08s, max 6.450e-08s, tot 2.580e-07s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.499e-02s, min 1.200e-02s, max 1.600e-02s, tot 5.996e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.211e+00 9.294e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.370e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.18 s, min 0.14 s, max 0.22 s, tot 0.73 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.18 s, min 0.14 s, max 0.22 s, tot 0.73 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 9.756e+02, min 3.902e+02, max 1.561e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 5.115e-03, min 1.000e-03, max 1.196e-02 (4 samples) +#pr: best tun. rsb operation time was: on avg. 6.150e-08s, min 6.150e-08s, max 6.150e-08s, tot 2.460e-07s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.600e-02s, min 8.027e-03s, max 2.399e-02s, tot 6.401e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 9.969e-01 2.886e+00 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds memory bandwidth --- is this a tiny matrix ? +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.643e+00 #pr: ======== Limiting to nrhs=4: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 9:R_R A 3 3 6 4 D S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.599e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 5.64e+00 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.600e-02 0.000e+00 6.450e-08 0.000e+00 1.443e-01 3.04e+00 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 15 15 0 4.0000 4.6667 3 1 402.65 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 3.82e-01 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 15 15 0 4.0000 4.6667 3 1 5953.11 1.599e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 1.09e+01 1.32e+00 1 3.84e-04 +pr: 9:R_R A 3 3 6 4 D S N 16 16 0 4.0000 4.6667 3 1 1560.97 1.205e-02 0.000e+00 6.150e-08 0.000e+00 1.680e-01 5.92e+00 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 16 16 0 4.0000 4.6667 3 1 1560.97 1.600e-02 0.000e+00 6.150e-08 0.000e+00 1.604e-01 3.19e+00 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 16 16 0 4.0000 4.6667 3 1 402.65 1.598e-02 0.000e+00 9.537e-07 0.000e+00 1.520e-01 3.82e-01 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 16 16 0 4.0000 4.6667 3 1 6243.90 7.985e-03 0.000e+00 6.150e-08 0.000e+00 1.360e-01 1.14e+01 1.32e+00 1 3.84e-04 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 19011355.7 % faster, avg. sp. ratio 190114.557x, max sp. ratio 247968.952x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1713196.4/150985.5/2236910.0/6852785.5 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.0/ 9.0/ 9.0/ 36.0 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.0, min. 9.0, max. 9.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 15065433.9 % faster, avg. sp. ratio 150655.339x, max sp. ratio 260112.425x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1927667.9/159382.8/2731769.7/7710671.8 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 12.6/ 9.5/ 17.0/ 50.5 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 12.6, min. 9.5, max. 17.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.651/ 0.281/ 7.875,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 19.915/ 0.382/ 10.852,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.826/ 0.281/ 8.260,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 20.869/ 0.382/ 11.382,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.589/ 0.698/ 2.792) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.58 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.58 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 2.333e+03, min 4.027e+02, max 5.953e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.501e-02, min 6.002e-03, max 2.402e-02 (4 samples) -#pr: best tun. rsb operation time was: on avg. 2.868e-07s, min 6.450e-08s, max 9.537e-07s, tot 1.147e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.599e-02s, max 1.600e-02s, tot 6.396e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.390e+00 3.897e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.370e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.068e+00 x, min 2.706e-01 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.15 s, min 0.14 s, max 0.17 s, tot 0.62 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.15 s, min 0.14 s, max 0.17 s, tot 0.62 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 2.442e+03, min 4.027e+02, max 6.244e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 2.152e-02, min 6.001e-03, max 4.809e-02 (4 samples) +#pr: best tun. rsb operation time was: on avg. 2.845e-07s, min 6.150e-08s, max 9.537e-07s, tot 1.138e-06s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.300e-02s, min 7.985e-03s, max 1.600e-02s, tot 5.201e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.317e-01 1.269e+01 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds memory bandwidth --- is this a tiny matrix ? +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.643e+00 +#pr: min / max ratio of in-cache MEMSET bandwidth to extrapolated read bandwidth ratio: 7.095e-01 2.086e+01 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds cache bandwidth! #pr: ======== Limiting to transA=N: #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 15 15 0 4.0000 4.6667 3 1 372.07 1.600e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 2.29e+00 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 15 15 0 4.0000 4.6667 3 1 372.07 1.200e-02 0.000e+00 6.450e-08 0.000e+00 1.403e-01 1.36e+00 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.599e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 2.29e+00 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.597e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 4.15e+00 2.29e+00 1 9.60e-05 -pr: 9:R_R A 3 3 6 4 D S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.599e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 5.64e+00 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.600e-02 0.000e+00 6.450e-08 0.000e+00 1.443e-01 3.04e+00 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 15 15 0 4.0000 4.6667 3 1 402.65 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 3.82e-01 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 15 15 0 4.0000 4.6667 3 1 5953.11 1.599e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 1.09e+01 1.32e+00 1 3.84e-04 +pr: 1:R_R A 3 3 6 1 D S N 16 16 0 4.0000 4.6667 3 1 390.24 1.599e-02 0.000e+00 6.150e-08 0.000e+00 1.999e-01 2.41e+00 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 16 16 0 4.0000 4.6667 3 1 390.24 2.399e-02 0.000e+00 6.150e-08 0.000e+00 2.163e-01 1.43e+00 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 16 16 0 4.0000 4.6667 3 1 1560.97 1.600e-02 0.000e+00 6.150e-08 0.000e+00 1.759e-01 2.41e+00 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 16 16 0 4.0000 4.6667 3 1 1560.97 8.027e-03 0.000e+00 6.150e-08 0.000e+00 1.360e-01 4.36e+00 2.29e+00 1 9.60e-05 +pr: 9:R_R A 3 3 6 4 D S N 16 16 0 4.0000 4.6667 3 1 1560.97 1.205e-02 0.000e+00 6.150e-08 0.000e+00 1.680e-01 5.92e+00 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 16 16 0 4.0000 4.6667 3 1 1560.97 1.600e-02 0.000e+00 6.150e-08 0.000e+00 1.604e-01 3.19e+00 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 16 16 0 4.0000 4.6667 3 1 402.65 1.598e-02 0.000e+00 9.537e-07 0.000e+00 1.520e-01 3.82e-01 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 16 16 0 4.0000 4.6667 3 1 6243.90 7.985e-03 0.000e+00 6.150e-08 0.000e+00 1.360e-01 1.14e+01 1.32e+00 1 3.84e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 8 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 21125383.9 % faster, avg. sp. ratio 211254.839x, max sp. ratio 247998.522x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1965719.9/150985.5/2236910.0/15725759.1 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.3/ 9.0/ 11.7/ 74.8 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.3, min. 9.0, max. 11.7 ops) +#pr: (in succ. cases rsb autotuning gave avg. 20542407.2 % faster, avg. sp. ratio 205425.072x, max sp. ratio 390094.980x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2443848.5/159382.8/3516580.7/19550788.1 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 12.5/ 9.0/ 17.0/100.0 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 12.5, min. 9.0, max. 17.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.880/ 0.281/ 7.875,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 30.023/ 0.382/ 10.852,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.019/ 0.281/ 8.260,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 31.471/ 0.382/ 11.382,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.284/ 0.698/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 8 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /8 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 1.15 s (8 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 1.15 s (8 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.632e+03, min 3.721e+02, max 5.953e+03 (8 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 9.444e-03, min 1.500e-03, max 2.402e-02 (8 samples) -#pr: best tun. rsb operation time was: on avg. 1.757e-07s, min 6.450e-08s, max 9.537e-07s, tot 1.405e-06s (8 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.549e-02s, min 1.200e-02s, max 1.600e-02s, tot 1.239e-01s (8 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.390e+00 3.897e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.370e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.068e+00 x, min 2.706e-01 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.17 s, min 0.14 s, max 0.22 s, tot 1.34 s (8 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.17 s, min 0.14 s, max 0.22 s, tot 1.34 s (8 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.709e+03, min 3.902e+02, max 6.244e+03 (8 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.332e-02, min 1.000e-03, max 4.809e-02 (8 samples) +#pr: best tun. rsb operation time was: on avg. 1.730e-07s, min 6.150e-08s, max 9.537e-07s, tot 1.384e-06s (8 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.450e-02s, min 7.985e-03s, max 2.399e-02s, tot 1.160e-01s (8 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.317e-01 1.269e+01 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds memory bandwidth --- is this a tiny matrix ? +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.643e+00 +#pr: min / max ratio of in-cache MEMSET bandwidth to extrapolated read bandwidth ratio: 7.095e-01 2.086e+01 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds cache bandwidth! #pr: ======== Limiting to both transA=N and nrhs=1: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 15 15 0 4.0000 4.6667 3 1 372.07 1.600e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 2.29e+00 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 15 15 0 4.0000 4.6667 3 1 372.07 1.200e-02 0.000e+00 6.450e-08 0.000e+00 1.403e-01 1.36e+00 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.599e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 2.29e+00 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.597e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 4.15e+00 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 D S N 16 16 0 4.0000 4.6667 3 1 390.24 1.599e-02 0.000e+00 6.150e-08 0.000e+00 1.999e-01 2.41e+00 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 16 16 0 4.0000 4.6667 3 1 390.24 2.399e-02 0.000e+00 6.150e-08 0.000e+00 2.163e-01 1.43e+00 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 16 16 0 4.0000 4.6667 3 1 1560.97 1.600e-02 0.000e+00 6.150e-08 0.000e+00 1.759e-01 2.41e+00 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 16 16 0 4.0000 4.6667 3 1 1560.97 8.027e-03 0.000e+00 6.150e-08 0.000e+00 1.360e-01 4.36e+00 2.29e+00 1 9.60e-05 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 23239412.1 % faster, avg. sp. ratio 232395.121x, max sp. ratio 247998.522x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2218243.4/2175398.3/2232958.8/8872973.6 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.7/ 9.0/ 11.7/ 38.7 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.7, min. 9.0, max. 11.7 ops) +#pr: (in succ. cases rsb autotuning gave avg. 26019380.5 % faster, avg. sp. ratio 260194.805x, max sp. ratio 390094.980x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2960029.1/2211606.9/3516580.7/11840116.3 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 12.4/ 9.0/ 16.9/ 49.5 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 12.4, min. 9.0, max. 16.9 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.108/ 1.178/ 3.411,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.108/ 1.364/ 4.155,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.211/ 1.236/ 3.577,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.602/ 1.431/ 4.358,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.979/ 1.292/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.57 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.57 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.302e+02, min 3.721e+02, max 1.488e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 3.879e-03, min 1.500e-03, max 6.011e-03 (4 samples) -#pr: best tun. rsb operation time was: on avg. 6.450e-08s, min 6.450e-08s, max 6.450e-08s, tot 2.580e-07s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.499e-02s, min 1.200e-02s, max 1.600e-02s, tot 5.996e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.211e+00 9.294e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.370e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.18 s, min 0.14 s, max 0.22 s, tot 0.73 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.18 s, min 0.14 s, max 0.22 s, tot 0.73 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 9.756e+02, min 3.902e+02, max 1.561e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 5.115e-03, min 1.000e-03, max 1.196e-02 (4 samples) +#pr: best tun. rsb operation time was: on avg. 6.150e-08s, min 6.150e-08s, max 6.150e-08s, tot 2.460e-07s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.600e-02s, min 8.027e-03s, max 2.399e-02s, tot 6.401e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 9.969e-01 2.886e+00 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds memory bandwidth --- is this a tiny matrix ? +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.643e+00 #pr: ======== Limiting to both transA=N and nrhs=4: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 9:R_R A 3 3 6 4 D S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.599e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 5.64e+00 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.600e-02 0.000e+00 6.450e-08 0.000e+00 1.443e-01 3.04e+00 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 15 15 0 4.0000 4.6667 3 1 402.65 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 3.82e-01 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 15 15 0 4.0000 4.6667 3 1 5953.11 1.599e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 1.09e+01 1.32e+00 1 3.84e-04 +pr: 9:R_R A 3 3 6 4 D S N 16 16 0 4.0000 4.6667 3 1 1560.97 1.205e-02 0.000e+00 6.150e-08 0.000e+00 1.680e-01 5.92e+00 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 16 16 0 4.0000 4.6667 3 1 1560.97 1.600e-02 0.000e+00 6.150e-08 0.000e+00 1.604e-01 3.19e+00 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 16 16 0 4.0000 4.6667 3 1 402.65 1.598e-02 0.000e+00 9.537e-07 0.000e+00 1.520e-01 3.82e-01 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 16 16 0 4.0000 4.6667 3 1 6243.90 7.985e-03 0.000e+00 6.150e-08 0.000e+00 1.360e-01 1.14e+01 1.32e+00 1 3.84e-04 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 19011355.7 % faster, avg. sp. ratio 190114.557x, max sp. ratio 247968.952x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1713196.4/150985.5/2236910.0/6852785.5 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.0/ 9.0/ 9.0/ 36.0 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.0, min. 9.0, max. 9.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 15065433.9 % faster, avg. sp. ratio 150655.339x, max sp. ratio 260112.425x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1927667.9/159382.8/2731769.7/7710671.8 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 12.6/ 9.5/ 17.0/ 50.5 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 12.6, min. 9.5, max. 17.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.651/ 0.281/ 7.875,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 19.915/ 0.382/ 10.852,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.826/ 0.281/ 8.260,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 20.869/ 0.382/ 11.382,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.589/ 0.698/ 2.792) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.58 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.58 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 2.333e+03, min 4.027e+02, max 5.953e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.501e-02, min 6.002e-03, max 2.402e-02 (4 samples) -#pr: best tun. rsb operation time was: on avg. 2.868e-07s, min 6.450e-08s, max 9.537e-07s, tot 1.147e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.599e-02s, max 1.600e-02s, tot 6.396e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.390e+00 3.897e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.370e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.068e+00 x, min 2.706e-01 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.15 s, min 0.14 s, max 0.17 s, tot 0.62 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.15 s, min 0.14 s, max 0.17 s, tot 0.62 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 2.442e+03, min 4.027e+02, max 6.244e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 2.152e-02, min 6.001e-03, max 4.809e-02 (4 samples) +#pr: best tun. rsb operation time was: on avg. 2.845e-07s, min 6.150e-08s, max 9.537e-07s, tot 1.138e-06s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.300e-02s, min 7.985e-03s, max 1.600e-02s, tot 5.201e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.317e-01 1.269e+01 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds memory bandwidth --- is this a tiny matrix ? +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.643e+00 +#pr: min / max ratio of in-cache MEMSET bandwidth to extrapolated read bandwidth ratio: 7.095e-01 2.086e+01 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds cache bandwidth! #pr: ======== Limiting to transA=T: #pr: No sample (out of 8) matched the dump criteria -- skipping dump round. #pr: ======== Limiting to both transA=T and nrhs=1: @@ -4924,53 +4978,55 @@ #pr: ======== All results (not limiting) #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 15 15 0 4.0000 4.6667 3 1 372.07 1.600e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 2.29e+00 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 15 15 0 4.0000 4.6667 3 1 372.07 1.200e-02 0.000e+00 6.450e-08 0.000e+00 1.403e-01 1.36e+00 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.599e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 2.29e+00 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.597e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 4.15e+00 2.29e+00 1 9.60e-05 -pr: 9:R_R A 3 3 6 4 D S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.599e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 5.64e+00 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 15 15 0 4.0000 4.6667 3 1 1488.28 1.600e-02 0.000e+00 6.450e-08 0.000e+00 1.443e-01 3.04e+00 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 15 15 0 4.0000 4.6667 3 1 402.65 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 3.82e-01 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 15 15 0 4.0000 4.6667 3 1 5953.11 1.599e-02 0.000e+00 6.450e-08 0.000e+00 1.440e-01 1.09e+01 1.32e+00 1 3.84e-04 +pr: 1:R_R A 3 3 6 1 D S N 16 16 0 4.0000 4.6667 3 1 390.24 1.599e-02 0.000e+00 6.150e-08 0.000e+00 1.999e-01 2.41e+00 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 16 16 0 4.0000 4.6667 3 1 390.24 2.399e-02 0.000e+00 6.150e-08 0.000e+00 2.163e-01 1.43e+00 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 16 16 0 4.0000 4.6667 3 1 1560.97 1.600e-02 0.000e+00 6.150e-08 0.000e+00 1.759e-01 2.41e+00 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 16 16 0 4.0000 4.6667 3 1 1560.97 8.027e-03 0.000e+00 6.150e-08 0.000e+00 1.360e-01 4.36e+00 2.29e+00 1 9.60e-05 +pr: 9:R_R A 3 3 6 4 D S N 16 16 0 4.0000 4.6667 3 1 1560.97 1.205e-02 0.000e+00 6.150e-08 0.000e+00 1.680e-01 5.92e+00 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 16 16 0 4.0000 4.6667 3 1 1560.97 1.600e-02 0.000e+00 6.150e-08 0.000e+00 1.604e-01 3.19e+00 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 16 16 0 4.0000 4.6667 3 1 402.65 1.598e-02 0.000e+00 9.537e-07 0.000e+00 1.520e-01 3.82e-01 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 16 16 0 4.0000 4.6667 3 1 6243.90 7.985e-03 0.000e+00 6.150e-08 0.000e+00 1.360e-01 1.14e+01 1.32e+00 1 3.84e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 8 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 21125383.9 % faster, avg. sp. ratio 211254.839x, max sp. ratio 247998.522x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1965719.9/150985.5/2236910.0/15725759.1 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.3/ 9.0/ 11.7/ 74.8 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 9.3, min. 9.0, max. 11.7 ops) +#pr: (in succ. cases rsb autotuning gave avg. 20542407.2 % faster, avg. sp. ratio 205425.072x, max sp. ratio 390094.980x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2443848.5/159382.8/3516580.7/19550788.1 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 12.5/ 9.0/ 17.0/100.0 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 12.5, min. 9.0, max. 17.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.880/ 0.281/ 7.875,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 30.023/ 0.382/ 10.852,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.019/ 0.281/ 8.260,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 31.471/ 0.382/ 11.382,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.284/ 0.698/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 8 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /8 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 1.15 s (8 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 1.15 s (8 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.632e+03, min 3.721e+02, max 5.953e+03 (8 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 9.444e-03, min 1.500e-03, max 2.402e-02 (8 samples) -#pr: best tun. rsb operation time was: on avg. 1.757e-07s, min 6.450e-08s, max 9.537e-07s, tot 1.405e-06s (8 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.549e-02s, min 1.200e-02s, max 1.600e-02s, tot 1.239e-01s (8 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.390e+00 3.897e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.370e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.068e+00 x, min 2.706e-01 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) -#pr: Record collection took 7.96 s. +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.17 s, min 0.14 s, max 0.22 s, tot 1.34 s (8 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.17 s, min 0.14 s, max 0.22 s, tot 1.34 s (8 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.709e+03, min 3.902e+02, max 6.244e+03 (8 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.332e-02, min 1.000e-03, max 4.809e-02 (8 samples) +#pr: best tun. rsb operation time was: on avg. 1.730e-07s, min 6.150e-08s, max 9.537e-07s, tot 1.384e-06s (8 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.450e-02s, min 7.985e-03s, max 2.399e-02s, tot 1.160e-01s (8 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.317e-01 1.269e+01 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds memory bandwidth --- is this a tiny matrix ? +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.643e+00 +#pr: min / max ratio of in-cache MEMSET bandwidth to extrapolated read bandwidth ratio: 7.095e-01 2.086e+01 +#pr: # Warning: extrapolated memory I/O bandwidth exceeds cache bandwidth! +#pr: Record collection took 8.88 s. #pr: Record comprises 40 memory benchmark samples (prepend RSB_PR_MBW=1 to dump this). -#pr: Record comprises 90 environment variables in 3809 bytes (prepend RSB_PR_ENV=1 to dump this). +#pr: Record comprises 90 environment variables in 3863 bytes (prepend RSB_PR_ENV=1 to dump this). # ====== END Total summary record. -#pr: ======== Saved a performance record of 16 samples to rsbench_pr__1705165336_gcc-12.2.rpr -# Removing the temporary record file rsbench_pr__1705165336_gcc-12.2.rpr.tmp. -# terminating run at 1705165350 (after 14.2s of w.c.t.) -srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/bash ./scripts/doc-tests.sh +#pr: ======== Saved a performance record of 16 samples to rsbench_pr__1739579138_gcc-12.2.rpr +# Removing the temporary record file rsbench_pr__1739579138_gcc-12.2.rpr.tmp. +# terminating run at 1739579153 (after 14.8s of w.c.t.) +srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/sh ./scripts/doc-tests.sh + set -o pipefail + test x/build/reproducible-path/librsb-1.3.0.2+dfsg = x -+ cat /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/autotune.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/backsolve.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/hello-spblas.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/hello.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/io-spblas.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/power.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/snippets.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/transpose.c + grep '^.\{71,\}' ++ cat /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/autotune.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/backsolve.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/hello-spblas.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/hello.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/io-spblas.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/power.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/snippets.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/transpose.c + true + cat /build/reproducible-path/librsb-1.3.0.2+dfsg/README + grep '^[^ ].\{80,\}' @@ -5020,7 +5076,7 @@ type char codes:D S C Z gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/examples' if test /build/reproducible-path/librsb-1.3.0.2+dfsg != /build/reproducible-path/librsb-1.3.0.2+dfsg ; then cp /build/reproducible-path/librsb-1.3.0.2+dfsg/pd.mtx /build/reproducible-path/librsb-1.3.0.2+dfsg/vf.mtx /build/reproducible-path/librsb-1.3.0.2+dfsg/examples ; fi -( PATH="/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games" /bin/bash /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/bench.sh; ) +( PATH="/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path" /bin/sh /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/bench.sh; ) + which rsbench /build/reproducible-path/librsb-1.3.0.2+dfsg/rsbench + BRF=test.rpr @@ -5030,15 +5086,15 @@ Will invoke autotuning for ~10.000000 s x 1 rounds, specifying verbosity=0 and threads=0. (>0 means no structure tuning; 0 means only structure tuning, <0 means tuning of both with (negated) thread count suggestion). # Requested no transposition. # performance record file set to: test.rpr -# beginning run at 1705165351 +# beginning run at 1739579153 # /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench -oa -Ob --bench --lower 100 --as-symmetric --types : -n 1 --notranspose --compare-competitors --verbose --verbose --write-performance-record=test.rpr # compiled with: CC=gcc CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -# average timer granularity: 5.97e-08 s +# average timer granularity: 6.61e-08 s # Will write a final performance record to file test.rpr and periodic checkpoints to test.rpr.tmp # will NOT perform ancillary tests. # will flush cache memory: between each operation measurement series, and NOT between each operation. # will keep any zero encountered in the matrix. -# env: export PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games +# env: export PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path # env: export LD_LIBRARY_PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg/.libs # env: HOSTNAME is not set # env: KMP_AFFINITY is not set @@ -5077,15 +5133,15 @@ # env: SLURM_NTASKS is not set # env: SLURM_STEP_TASKS_PER_NODE is not set # env: SLURM_TASKS_PER_NODE is not set -# detected hostname: ionos11-amd64 +# detected hostname: i-capture-the-hostname # user specified a verbosity level of 2 (each --verbose occurrence counts +1) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5097,29 +5153,29 @@ # This test will measure times in scanning arrays sized and aligned to fit in caches. # 2 cache levels detected Will fill struct with 40 samples... -# Memory benchmark took 5.528s +# Memory benchmark took 5.790s # auto-tuning oriented output implies times==0 iterations and sort-after-load. #pr: allocated a performance record for 4 samples (1120 bytes). # multi-type benchmarking (DSCZ) -- now using typecode D (last was D). -# Cache block size total 524288 bytes, per-thread 34952 bytes -# so far, program took 5.530s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . +# Cache block size total 524288 bytes, per-thread 32768 bytes +# so far, program took 5.793s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.156e-01 s (100.00 %) - analyzed arrays in 3.638e-02 s (31.46 %) - cleaned-up arrays in 1.907e-05 s (0.02 %) + converted COO to RSB in 1.291e-01 s (100.00 %) + analyzed arrays in 3.368e-02 s (26.08 %) + cleaned-up arrays in 1.693e-05 s (0.01 %) deduplicated arrays in 1.502e-05 s (0.01 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 5.830e-02 s (50.41 %) - memory allocations took 6.485e-05 s (0.06 %) + shuffled partitions in 6.339e-02 s (49.08 %) + memory allocations took 5.317e-05 s (0.04 %) leafs setup took 9.060e-06 s (0.01 %) - halfword conversion took 2.085e-02 s (18.03 %) -Built (100 x 100)[0x5585f4dba030]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.116s): (100 x 100)[0x5585f4dba030]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + halfword conversion took 3.198e-02 s (24.76 %) +Built (100 x 100)[0x55ea309ea030]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.129s): (100 x 100)[0x55ea309ea030]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. @@ -5130,7 +5186,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5145,7 +5201,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5160,7 +5216,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5170,16 +5226,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.03949s; avg 0.01316s ( +/- 32.17/ 21.74 %); best 0.008928s; worst 0.01603s; std dev. 0.003056 (taking best). -Reference operation time is 0.00892806 s (2.263 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.008928 Mflops: 2.263) -Merge (3 -> 1 leaves) took w.c.t. of 0.000206s, ~0.0001829s of computing time (of which 8.893e-05s sorting, 7.153e-06s analysis) +3 iterations (1 th.) took 0.05028s; avg 0.01676s ( +/- 4.74/ 9.32 %); best 0.01597s; worst 0.01832s; std dev. 0.001105 (taking best). +Reference operation time is 0.0159669 s (1.265 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.01597 Mflops: 1.265) +Merge (3 -> 1 leaves) took w.c.t. of 0.0002551s, ~0.0001879s of computing time (of which 9.513e-05s sorting, 8.106e-06s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5194,7 +5250,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5204,14 +5260,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 3.886e-05s; avg 1.295e-05s ( +/- 7.98/ 15.95 %); best 1.192e-05s; worst 1.502e-05s; std dev. 1.461e-06 (taking best). +3 iterations (1 th.) took 4.506e-05s; avg 1.502e-05s ( +/- 20.63/ 39.68 %); best 1.192e-05s; worst 2.098e-05s; std dev. 4.216e-06 (taking best). Reference operation time is 1.19209e-05 s (1694 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5222,15 +5278,15 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success After merge step 1: tpop: 1.192e-05 s ~Mflops: 1694.499 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 748.940x: 0.008928s -> 1.192e-05s, so taking this instance. +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 1339.400x: 0.01597s -> 1.192e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.1441s (of which 0.0002129s partitioning, 0.09609s I/O); computing times: 0.0001829s in par. loops, 8.893e-05s sorting, 7.153e-06s analyzing) -Total merge + benchmarking process took 0.1441s, equivalent to 12084.0/16.1 new/old ops (0.07193s for 2 clones -- as 6033.6/8.1 ops, or 3016.8/4.0 ops per clone), SPEEDUP of 748.940x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 748.940x (0.008928s -> 1.192e-05s), will amortize in 16.2 ops by saving 0.008916s per op. -In 1 tuning rounds (tot. 0.21s, 0.072s for constructor, 2 clones) obtained a SPEEDUP of 74794.0% (748.9x) (from 2.263 to 1694 Mflops). Employed 0.087s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.1417s (of which 0.0002608s partitioning, 0.09604s I/O); computing times: 0.0001879s in par. loops, 9.513e-05s sorting, 8.106e-06s analyzing) +Total merge + benchmarking process took 0.1417s, equivalent to 11890.2/8.9 new/old ops (0.09315s for 2 clones -- as 7813.7/5.8 ops, or 3906.8/2.9 ops per clone), SPEEDUP of 1339.400x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 1339.400x (0.01597s -> 1.192e-05s), will amortize in 8.9 ops by saving 0.01595s per op. +In 1 tuning rounds (tot. 0.24s, 0.093s for constructor, 2 clones) obtained a SPEEDUP of 133840.0% (1339x) (from 1.265 to 1694 Mflops). Employed 0.096s for I/O of matrix plots. #pr: updating sample at index 1 (0^th of 4), 0^th touch for (0,0,0,0,0,0,0). -First run of RSB Autotuner took 0.295204 s (8.928e-03 s -> 1.192e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.336078 s (1.597e-02 s -> 1.192e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type D, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 @@ -5239,7 +5295,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5254,7 +5310,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5265,13 +5321,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 15. +Starting with requested 0 threads ; current default 1 ; at most 16. # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5286,7 +5342,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5296,27 +5352,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 4.911e-05s; avg 1.637e-05s ( +/- 27.18/ 52.91 %); best 1.192e-05s; worst 2.503e-05s; std dev. 6.126e-06 (taking best). -Reference operation time is 1.19209e-05 s (1694 Mflops) with 1 threads. +3 iterations (1 th.) took 5.317e-05s; avg 1.772e-05s ( +/- 27.35/ 53.36 %); best 1.287e-05s; worst 2.718e-05s; std dev. 6.688e-06 (taking best). +Reference operation time is 1.28746e-05 s (1569 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 6.331e-02 s (100.00 %) - analyzed arrays in 3.127e-02 s (49.39 %) - cleaned-up arrays in 1.502e-05 s (0.02 %) - deduplicated arrays in 1.311e-05 s (0.02 %) + converted COO to RSB in 5.532e-02 s (100.00 %) + analyzed arrays in 2.327e-02 s (42.07 %) + cleaned-up arrays in 1.597e-05 s (0.03 %) + deduplicated arrays in 1.502e-05 s (0.03 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.199e-02 s (50.53 %) + shuffled partitions in 3.199e-02 s (57.83 %) memory allocations took 4.768e-06 s (0.01 %) - leafs setup took 2.861e-06 s (0.00 %) - halfword conversion took 1.097e-05 s (0.02 %) -Built (100 x 100)[0x5585f4dbf010]{D} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 15). + leafs setup took 3.815e-06 s (0.01 %) + halfword conversion took 1.502e-05 s (0.03 %) +Built (100 x 100)[0x55ea309f9740]{D} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 16). # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5327,12 +5383,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5343,22 +5399,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 7.988e-02 s (100.00 %) - analyzed arrays in 1.591e-02 s (19.92 %) - cleaned-up arrays in 1.502e-05 s (0.02 %) - deduplicated arrays in 1.192e-05 s (0.01 %) + converted COO to RSB in 8.787e-02 s (100.00 %) + analyzed arrays in 2.385e-02 s (27.14 %) + cleaned-up arrays in 1.597e-05 s (0.02 %) + deduplicated arrays in 1.407e-05 s (0.02 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.195e-02 s (40.00 %) - memory allocations took 9.298e-06 s (0.01 %) + shuffled partitions in 3.200e-02 s (36.42 %) + memory allocations took 7.868e-06 s (0.01 %) leafs setup took 5.007e-06 s (0.01 %) - halfword conversion took 3.198e-02 s (40.03 %) -Built (100 x 100)[0x5585f4dcecc0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' + halfword conversion took 3.198e-02 s (36.39 %) +Built (100 x 100)[0x55ea309ff340]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5373,7 +5429,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5388,7 +5444,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5398,16 +5454,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04763s; avg 0.01588s ( +/- 1.51/ 0.77 %); best 0.01564s; worst 0.016s; std dev. 0.0001697 (taking best). -Reference operation time is 0.015636 s (1.292 Mflops) with 1 threads. -Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 0.015636 s/0 threads (speedup 0.000762404 x), same?n. +3 iterations (1 th.) took 0.05163s; avg 0.01721s ( +/- 9.02/ 16.09 %); best 0.01566s; worst 0.01998s; std dev. 0.001963 (taking best). +Reference operation time is 0.0156569 s (1.29 Mflops) with 1 threads. +Challenging best inner round reference (1.28746e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 0.0156569 s/0 threads (speedup 0.000822293 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5418,12 +5474,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5434,22 +5490,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.119e-01 s (100.00 %) - analyzed arrays in 4.781e-02 s (42.75 %) - cleaned-up arrays in 1.693e-05 s (0.02 %) - deduplicated arrays in 1.407e-05 s (0.01 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.201e-02 s (28.61 %) - memory allocations took 1.287e-05 s (0.01 %) - leafs setup took 7.868e-06 s (0.01 %) - halfword conversion took 3.198e-02 s (28.59 %) -Built (100 x 100)[0x5585f4dba030]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 10, symflags:'LS' + converted COO to RSB in 9.988e-02 s (100.00 %) + analyzed arrays in 3.584e-02 s (35.88 %) + cleaned-up arrays in 1.502e-05 s (0.02 %) + deduplicated arrays in 1.311e-05 s (0.01 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 3.202e-02 s (32.05 %) + memory allocations took 1.097e-05 s (0.01 %) + leafs setup took 7.153e-06 s (0.01 %) + halfword conversion took 3.198e-02 s (32.02 %) +Built (100 x 100)[0x55ea309ea080]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 10, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5464,7 +5520,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5479,7 +5535,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5489,16 +5545,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04762s; avg 0.01587s ( +/- 1.60/ 0.84 %); best 0.01562s; worst 0.01601s; std dev. 0.0001792 (taking best). -Reference operation time is 0.015619 s (1.293 Mflops) with 1 threads. -Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 0.5, 10 leaves, 2.206 bytes/nz, 0.015619 s/0 threads (speedup 0.000763231 x), same?n. +3 iterations (1 th.) took 0.04761s; avg 0.01587s ( +/- 1.57/ 0.84 %); best 0.01562s; worst 0.016s; std dev. 0.0001765 (taking best). +Reference operation time is 0.0156209 s (1.293 Mflops) with 1 threads. +Challenging best inner round reference (1.28746e-05 s/1 threads) with: subdivision 0.5, 10 leaves, 2.206 bytes/nz, 0.0156209 s/0 threads (speedup 0.000824188 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 14 subms, 10 lsubms, 2.2059 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5509,12 +5565,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5525,22 +5581,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 9.588e-02 s (100.00 %) - analyzed arrays in 3.183e-02 s (33.20 %) - cleaned-up arrays in 1.502e-05 s (0.02 %) - deduplicated arrays in 1.311e-05 s (0.01 %) + converted COO to RSB in 1.039e-01 s (100.00 %) + analyzed arrays in 3.985e-02 s (38.36 %) + cleaned-up arrays in 1.407e-05 s (0.01 %) + deduplicated arrays in 1.192e-05 s (0.01 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.202e-02 s (33.40 %) - memory allocations took 1.001e-05 s (0.01 %) - leafs setup took 6.914e-06 s (0.01 %) - halfword conversion took 3.198e-02 s (33.35 %) -Built (100 x 100)[0x5585f4da8cc0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 26, symflags:'LS' + shuffled partitions in 3.203e-02 s (30.82 %) + memory allocations took 1.311e-05 s (0.01 %) + leafs setup took 5.960e-06 s (0.01 %) + halfword conversion took 3.197e-02 s (30.77 %) +Built (100 x 100)[0x55ea309d8cc0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 26, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5555,7 +5611,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5570,7 +5626,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5580,16 +5636,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04766s; avg 0.01589s ( +/- 1.38/ 0.77 %); best 0.01567s; worst 0.01601s; std dev. 0.0001552 (taking best). -Reference operation time is 0.015667 s (1.289 Mflops) with 1 threads. -Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 1, 26 leaves, 2.317 bytes/nz, 0.015667 s/0 threads (speedup 0.000760896 x), same?n. +3 iterations (1 th.) took 0.04766s; avg 0.01589s ( +/- 1.44/ 0.73 %); best 0.01566s; worst 0.016s; std dev. 0.0001622 (taking best). +Reference operation time is 0.0156579 s (1.29 Mflops) with 1 threads. +Challenging best inner round reference (1.28746e-05 s/1 threads) with: subdivision 1, 26 leaves, 2.317 bytes/nz, 0.0156579 s/0 threads (speedup 0.000822243 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 36 subms, 26 lsubms, 2.3168 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5600,12 +5656,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5616,22 +5672,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 7.986e-02 s (100.00 %) - analyzed arrays in 2.776e-02 s (34.77 %) - cleaned-up arrays in 1.812e-05 s (0.02 %) - deduplicated arrays in 1.502e-05 s (0.02 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 2.399e-02 s (30.04 %) - memory allocations took 7.486e-05 s (0.09 %) - leafs setup took 6.914e-06 s (0.01 %) - halfword conversion took 2.799e-02 s (35.05 %) -Built (100 x 100)[0x5585f4da8cc0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 54, symflags:'LS' + converted COO to RSB in 1.039e-01 s (100.00 %) + analyzed arrays in 3.981e-02 s (38.31 %) + cleaned-up arrays in 1.597e-05 s (0.02 %) + deduplicated arrays in 1.383e-05 s (0.01 %) + sorted arrays in 1.192e-06 s (0.00 %) + shuffled partitions in 3.202e-02 s (30.82 %) + memory allocations took 7.391e-05 s (0.07 %) + leafs setup took 5.007e-06 s (0.00 %) + halfword conversion took 3.197e-02 s (30.76 %) +Built (100 x 100)[0x55ea309d8cc0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 54, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5646,7 +5702,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5661,7 +5717,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5671,16 +5727,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.02767s; avg 0.009224s ( +/- 13.37/ 26.57 %); best 0.007991s; worst 0.01168s; std dev. 0.001733 (taking best). -Reference operation time is 0.00799084 s (2.528 Mflops) with 1 threads. -Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 2, 54 leaves, 2.459 bytes/nz, 0.00799084 s/0 threads (speedup 0.00149182 x), same?n. +3 iterations (1 th.) took 0.04786s; avg 0.01595s ( +/- 0.60/ 0.33 %); best 0.01586s; worst 0.01601s; std dev. 6.782e-05 (taking best). +Reference operation time is 0.0158589 s (1.274 Mflops) with 1 threads. +Challenging best inner round reference (1.28746e-05 s/1 threads) with: subdivision 2, 54 leaves, 2.459 bytes/nz, 0.0158589 s/0 threads (speedup 0.000811823 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 74 subms, 54 lsubms, 2.4594 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5691,12 +5747,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5707,22 +5763,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 7.189e-02 s (100.00 %) - analyzed arrays in 2.385e-02 s (33.18 %) - cleaned-up arrays in 1.502e-05 s (0.02 %) - deduplicated arrays in 1.287e-05 s (0.02 %) + converted COO to RSB in 1.039e-01 s (100.00 %) + analyzed arrays in 3.989e-02 s (38.38 %) + cleaned-up arrays in 1.597e-05 s (0.02 %) + deduplicated arrays in 1.407e-05 s (0.01 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 2.401e-02 s (33.39 %) - memory allocations took 6.223e-05 s (0.09 %) - leafs setup took 9.775e-06 s (0.01 %) - halfword conversion took 2.393e-02 s (33.29 %) -Built (100 x 100)[0x5585f4d701d0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 54, symflags:'LS' + shuffled partitions in 3.197e-02 s (30.76 %) + memory allocations took 4.911e-05 s (0.05 %) + leafs setup took 4.053e-06 s (0.00 %) + halfword conversion took 3.198e-02 s (30.78 %) +Built (100 x 100)[0x55ea309a01d0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 54, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5737,7 +5793,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5752,7 +5808,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5762,17 +5818,17 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0317s; avg 0.01057s ( +/- 26.86/ 51.04 %); best 0.007729s; worst 0.01596s; std dev. 0.003816 (taking best). -Reference operation time is 0.00772905 s (2.614 Mflops) with 1 threads. -Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 4, 54 leaves, 2.459 bytes/nz, 0.00772905 s/0 threads (speedup 0.00154235 x), same?n. +3 iterations (1 th.) took 0.05179s; avg 0.01726s ( +/- 8.44/ 15.76 %); best 0.01581s; worst 0.01999s; std dev. 0.001926 (taking best). +Reference operation time is 0.0158069 s (1.278 Mflops) with 1 threads. +Challenging best inner round reference (1.28746e-05 s/1 threads) with: subdivision 4, 54 leaves, 2.459 bytes/nz, 0.0158069 s/0 threads (speedup 0.000814492 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 74 subms, 54 lsubms, 2.4594 bpnz -Best sparse multiply performance with subdivision multiplier of 1: 1694.5 Mflops. +Best sparse multiply performance with subdivision multiplier of 1: 1568.98 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5782,23 +5838,23 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 0.70781 s (eq. to 6e+04/ 6e+04 old/new op.times), gained local/global speedup 1 x (1.19209e-05 : 1.19209e-05) / 1 x (1.19209e-05 : 1.19209e-05). This is not amortizable ! +Last tuner inner round (1 of 1) took 0.803774 s (eq. to 6e+04/ 6e+04 old/new op.times), gained local/global speedup 1 x (1.28746e-05 : 1.28746e-05) / 1 x (1.28746e-05 : 1.28746e-05). This is not amortizable ! Auto tuning inner round 1 did not find a configuration better than the original. -In 1 tuning rounds (tot. 0.71s, 0.5s for constructor, 0 clones) obtained NO speedup (best stays 1694 Mflops). -Second run of RSB Autotuner took 0.707889 s and estimated a speedup of 1.000000 x (1.192e-05 s -> 1.192e-05 s per op) in same matrix (1 -> 1 lsubm) +In 1 tuning rounds (tot. 0.8s, 0.56s for constructor, 0 clones) obtained NO speedup (best stays 1569 Mflops). +Second run of RSB Autotuner took 0.803847 s and estimated a speedup of 1.000000 x (1.287e-05 s -> 1.287e-05 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:100 #norm:10 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.036382 0.058297 0.094679 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.094679 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.036382 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.058297 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.033676 0.063391 0.097067 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.097067 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.033676 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.063391 %:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000000 %:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.094679 +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.097067 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 @@ -5813,34 +5869,34 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.115641 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.129148 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 0 0.036382 0 0.0582969 -# so far, program took 6.698s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.003s/0.000s . +%constructor:lower-100x100-5050nz 0 0.0336761 0 0.063391 +# so far, program took 7.139s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.140s/0.000s . getrusage() stats: ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.09054s (system CPU time used) -ru_utime : 15.67s (user CPU time used) +ru_stime : 0.1074s (system CPU time used) +ru_utime : 20.03s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode S (last was D). # Cache block size total 524288 bytes, per-thread 524288 bytes -# so far, program took 6.698s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.003s/0.000s . +# so far, program took 7.139s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.140s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 7.937e-02 s (100.00 %) - analyzed arrays in 1.536e-02 s (19.36 %) - cleaned-up arrays in 1.407e-05 s (0.02 %) - deduplicated arrays in 1.192e-05 s (0.02 %) - sorted arrays in 9.537e-07 s (0.00 %) - shuffled partitions in 4.398e-02 s (55.41 %) - memory allocations took 6.199e-06 s (0.01 %) - leafs setup took 4.053e-06 s (0.01 %) - halfword conversion took 1.999e-02 s (25.19 %) -Built (100 x 100)[0x5585f4dba030]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.079s): (100 x 100)[0x5585f4dba030]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + converted COO to RSB in 1.195e-01 s (100.00 %) + analyzed arrays in 2.353e-02 s (19.68 %) + cleaned-up arrays in 1.407e-05 s (0.01 %) + deduplicated arrays in 1.192e-05 s (0.01 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 6.398e-02 s (53.52 %) + memory allocations took 5.007e-06 s (0.00 %) + leafs setup took 0.000e+00 s (0.00 %) + halfword conversion took 3.201e-02 s (26.77 %) +Built (100 x 100)[0x55ea309ea720]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.120s): (100 x 100)[0x55ea309ea720]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type S, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. @@ -5851,7 +5907,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5866,7 +5922,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5881,7 +5937,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5891,16 +5947,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.03197s; avg 0.01066s ( +/- 25.04/ 12.66 %); best 0.007989s; worst 0.01201s; std dev. 0.001887 (taking best). -Reference operation time is 0.00798917 s (2.528 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type S, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.007989 Mflops: 2.528) -Merge (3 -> 1 leaves) took w.c.t. of 7.486e-05s, ~6.104e-05s of computing time (of which 2.408e-05s sorting, 5.007e-06s analysis) +3 iterations (1 th.) took 0.04799s; avg 0.016s ( +/- 0.10/ 0.06 %); best 0.01598s; worst 0.01601s; std dev. 1.097e-05 (taking best). +Reference operation time is 0.015981 s (1.264 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type S, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.01598 Mflops: 1.264) +Merge (3 -> 1 leaves) took w.c.t. of 5.603e-05s, ~4.911e-05s of computing time (of which 2.098e-05s sorting, 2.146e-06s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5915,7 +5971,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5925,14 +5981,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 3.886e-05s; avg 1.295e-05s ( +/- 0.61/ 1.23 %); best 1.287e-05s; worst 1.311e-05s; std dev. 1.124e-07 (taking best). -Reference operation time is 1.28746e-05 s (1569 Mflops) with 1 threads. +3 iterations (1 th.) took 3.815e-05s; avg 1.272e-05s ( +/- 6.25/ 3.12 %); best 1.192e-05s; worst 1.311e-05s; std dev. 5.62e-07 (taking best). +Reference operation time is 1.19209e-05 s (1694 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5942,16 +5998,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -After merge step 1: tpop: 1.287e-05 s ~Mflops: 1568.980 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 620.537x: 0.007989s -> 1.287e-05s, so taking this instance. +After merge step 1: tpop: 1.192e-05 s ~Mflops: 1694.499 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 1340.580x: 0.01598s -> 1.192e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.09632s (of which 7.987e-05s partitioning, 0.06405s I/O); computing times: 6.104e-05s in par. loops, 2.408e-05s sorting, 5.007e-06s analyzing) -Total merge + benchmarking process took 0.09632s, equivalent to 7481.6/12.1 new/old ops (0.06343s for 2 clones -- as 4926.8/7.9 ops, or 2463.4/4.0 ops per clone), SPEEDUP of 620.537x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 620.537x (0.007989s -> 1.287e-05s), will amortize in 12.1 ops by saving 0.007976s per op. -In 1 tuning rounds (tot. 0.16s, 0.063s for constructor, 2 clones) obtained a SPEEDUP of 61953.7% (620.5x) (from 2.528 to 1569 Mflops). Employed 0.064s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.1442s (of which 5.913e-05s partitioning, 0.09605s I/O); computing times: 4.911e-05s in par. loops, 2.098e-05s sorting, 2.146e-06s analyzing) +Total merge + benchmarking process took 0.1442s, equivalent to 12095.5/9.0 new/old ops (0.09561s for 2 clones -- as 8020.0/6.0 ops, or 4010.0/3.0 ops per clone), SPEEDUP of 1340.580x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 1340.580x (0.01598s -> 1.192e-05s), will amortize in 9.0 ops by saving 0.01597s per op. +In 1 tuning rounds (tot. 0.24s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 133958.0% (1341x) (from 1.264 to 1694 Mflops). Employed 0.096s for I/O of matrix plots. #pr: updating sample at index 2 (1^th of 4), 0^th touch for (0,0,0,0,0,1,0). -First run of RSB Autotuner took 0.22432 s (7.989e-03 s -> 1.287e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.336167 s (1.598e-02 s -> 1.192e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type S, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 @@ -5960,7 +6016,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5975,7 +6031,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5986,13 +6042,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 15. +Starting with requested 0 threads ; current default 1 ; at most 16. # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6007,7 +6063,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6017,27 +6073,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 4.196e-05s; avg 1.399e-05s ( +/- 21.59/ 36.36 %); best 1.097e-05s; worst 1.907e-05s; std dev. 3.618e-06 (taking best). -Reference operation time is 1.09673e-05 s (1842 Mflops) with 1 threads. +3 iterations (1 th.) took 3.886e-05s; avg 1.295e-05s ( +/- 7.98/ 8.59 %); best 1.192e-05s; worst 1.407e-05s; std dev. 8.778e-07 (taking best). +Reference operation time is 1.19209e-05 s (1694 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 4.749e-02 s (100.00 %) - analyzed arrays in 1.545e-02 s (32.53 %) - cleaned-up arrays in 1.502e-05 s (0.03 %) - deduplicated arrays in 1.192e-05 s (0.03 %) + converted COO to RSB in 5.561e-02 s (100.00 %) + analyzed arrays in 2.358e-02 s (42.40 %) + cleaned-up arrays in 1.383e-05 s (0.02 %) + deduplicated arrays in 1.216e-05 s (0.02 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.200e-02 s (67.39 %) + shuffled partitions in 3.200e-02 s (57.54 %) memory allocations took 5.007e-06 s (0.01 %) - leafs setup took 1.907e-06 s (0.00 %) - halfword conversion took 5.007e-06 s (0.01 %) -Built (100 x 100)[0x5585f4dbf010]{S} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 15). + leafs setup took 0.000e+00 s (0.00 %) + halfword conversion took 4.053e-06 s (0.01 %) +Built (100 x 100)[0x55ea309f9740]{S} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 16). # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6048,12 +6104,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6064,22 +6120,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 3.191e-02 s (100.00 %) - analyzed arrays in 2.363e-02 s (74.03 %) - cleaned-up arrays in 1.502e-05 s (0.05 %) - deduplicated arrays in 1.216e-05 s (0.04 %) + converted COO to RSB in 3.984e-02 s (100.00 %) + analyzed arrays in 2.382e-02 s (59.79 %) + cleaned-up arrays in 1.407e-05 s (0.04 %) + deduplicated arrays in 1.192e-05 s (0.03 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 8.247e-03 s (25.84 %) - memory allocations took 3.815e-06 s (0.01 %) - leafs setup took 1.192e-06 s (0.00 %) - halfword conversion took 5.960e-06 s (0.02 %) -Built (100 x 100)[0x5585f4dba030]{S} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' + shuffled partitions in 1.599e-02 s (40.13 %) + memory allocations took 1.907e-06 s (0.00 %) + leafs setup took 9.537e-07 s (0.00 %) + halfword conversion took 4.053e-06 s (0.01 %) +Built (100 x 100)[0x55ea309eadc0]{S} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6094,7 +6150,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6109,7 +6165,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6119,16 +6175,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 4.411e-05s; avg 1.47e-05s ( +/- 18.92/ 36.22 %); best 1.192e-05s; worst 2.003e-05s; std dev. 3.766e-06 (taking best). +3 iterations (1 th.) took 3.886e-05s; avg 1.295e-05s ( +/- 7.98/ 6.75 %); best 1.192e-05s; worst 1.383e-05s; std dev. 7.867e-07 (taking best). Reference operation time is 1.19209e-05 s (1694 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 0.25, 1 leaves, 2.08 bytes/nz, 1.19209e-05 s/0 threads (speedup 0.92 x), same?n. +Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 0.25, 1 leaves, 2.08 bytes/nz, 1.19209e-05 s/0 threads (speedup 1 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6139,12 +6195,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6155,22 +6211,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 7.938e-02 s (100.00 %) - analyzed arrays in 2.340e-02 s (29.48 %) + converted COO to RSB in 8.776e-02 s (100.00 %) + analyzed arrays in 2.380e-02 s (27.12 %) cleaned-up arrays in 1.407e-05 s (0.02 %) - deduplicated arrays in 1.311e-05 s (0.02 %) + deduplicated arrays in 1.192e-05 s (0.01 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.197e-02 s (40.28 %) - memory allocations took 4.768e-06 s (0.01 %) - leafs setup took 5.007e-06 s (0.01 %) - halfword conversion took 2.397e-02 s (30.19 %) -Built (100 x 100)[0x5585f4dba800]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 8, symflags:'LS' + shuffled partitions in 3.196e-02 s (36.42 %) + memory allocations took 2.861e-06 s (0.00 %) + leafs setup took 2.146e-06 s (0.00 %) + halfword conversion took 3.197e-02 s (36.43 %) +Built (100 x 100)[0x55ea309c5100]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 8, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6185,7 +6241,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6200,7 +6256,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6210,16 +6266,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.03966s; avg 0.01322s ( +/- 11.90/ 21.03 %); best 0.01165s; worst 0.016s; std dev. 0.001972 (taking best). -Reference operation time is 0.0116458 s (1.735 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 0.5, 8 leaves, 2.185 bytes/nz, 0.0116458 s/0 threads (speedup 0.000941735 x), same?n. +3 iterations (1 th.) took 0.04786s; avg 0.01595s ( +/- 0.60/ 0.37 %); best 0.01586s; worst 0.01601s; std dev. 6.82e-05 (taking best). +Reference operation time is 0.0158579 s (1.274 Mflops) with 1 threads. +Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 0.5, 8 leaves, 2.185 bytes/nz, 0.0158579 s/0 threads (speedup 0.000751733 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 11 subms, 8 lsubms, 2.1846 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6230,12 +6286,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6246,22 +6302,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 6.790e-02 s (100.00 %) - analyzed arrays in 2.388e-02 s (35.17 %) - cleaned-up arrays in 1.407e-05 s (0.02 %) - deduplicated arrays in 1.287e-05 s (0.02 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 2.400e-02 s (35.35 %) - memory allocations took 8.821e-06 s (0.01 %) - leafs setup took 5.960e-06 s (0.01 %) - halfword conversion took 1.997e-02 s (29.41 %) -Built (100 x 100)[0x5585f4d95100]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 19, symflags:'LS' + converted COO to RSB in 1.039e-01 s (100.00 %) + analyzed arrays in 4.788e-02 s (46.06 %) + cleaned-up arrays in 1.597e-05 s (0.02 %) + deduplicated arrays in 1.407e-05 s (0.01 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 2.404e-02 s (23.13 %) + memory allocations took 5.960e-06 s (0.01 %) + leafs setup took 1.907e-06 s (0.00 %) + halfword conversion took 3.198e-02 s (30.77 %) +Built (100 x 100)[0x55ea309c5100]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 17, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6276,7 +6332,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6291,7 +6347,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6301,16 +6357,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.03568s; avg 0.01189s ( +/- 30.31/ 32.07 %); best 0.008289s; worst 0.01571s; std dev. 0.003033 (taking best). -Reference operation time is 0.00828886 s (2.437 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 1, 19 leaves, 2.272 bytes/nz, 0.00828886 s/0 threads (speedup 0.00132313 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 26 subms, 19 lsubms, 2.2725 bpnz +3 iterations (1 th.) took 0.04783s; avg 0.01594s ( +/- 0.71/ 0.43 %); best 0.01583s; worst 0.01601s; std dev. 8.09e-05 (taking best). +Reference operation time is 0.015831 s (1.276 Mflops) with 1 threads. +Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 1, 17 leaves, 2.25 bytes/nz, 0.015831 s/0 threads (speedup 0.000753012 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 24 subms, 17 lsubms, 2.2503 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6321,12 +6377,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6337,22 +6393,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.039e-01 s (100.00 %) - analyzed arrays in 3.989e-02 s (38.39 %) - cleaned-up arrays in 1.502e-05 s (0.01 %) - deduplicated arrays in 1.287e-05 s (0.01 %) + converted COO to RSB in 1.040e-01 s (100.00 %) + analyzed arrays in 3.993e-02 s (38.41 %) + cleaned-up arrays in 1.287e-05 s (0.01 %) + deduplicated arrays in 1.311e-05 s (0.01 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.201e-02 s (30.81 %) - memory allocations took 1.812e-05 s (0.02 %) - leafs setup took 6.914e-06 s (0.01 %) - halfword conversion took 3.195e-02 s (30.75 %) -Built (100 x 100)[0x5585f4d95100]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 41, symflags:'LS' + shuffled partitions in 3.201e-02 s (30.79 %) + memory allocations took 7.153e-06 s (0.01 %) + leafs setup took 3.815e-06 s (0.00 %) + halfword conversion took 3.198e-02 s (30.76 %) +Built (100 x 100)[0x55ea309c5100]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 40, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6367,7 +6423,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6382,7 +6438,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6392,16 +6448,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04797s; avg 0.01599s ( +/- 0.30/ 0.15 %); best 0.01594s; worst 0.01601s; std dev. 3.361e-05 (taking best). -Reference operation time is 0.0159411 s (1.267 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 2, 41 leaves, 2.399 bytes/nz, 0.0159411 s/0 threads (speedup 0.000687984 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 56 subms, 41 lsubms, 2.3992 bpnz +3 iterations (1 th.) took 0.04796s; avg 0.01599s ( +/- 0.13/ 0.08 %); best 0.01597s; worst 0.016s; std dev. 1.503e-05 (taking best). +Reference operation time is 0.0159669 s (1.265 Mflops) with 1 threads. +Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 2, 40 leaves, 2.388 bytes/nz, 0.0159669 s/0 threads (speedup 0.000746603 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 55 subms, 40 lsubms, 2.3881 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6412,12 +6468,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6428,22 +6484,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.037e-01 s (100.00 %) - analyzed arrays in 4.762e-02 s (45.94 %) - cleaned-up arrays in 1.502e-05 s (0.01 %) - deduplicated arrays in 1.192e-05 s (0.01 %) + converted COO to RSB in 1.038e-01 s (100.00 %) + analyzed arrays in 3.974e-02 s (38.29 %) + cleaned-up arrays in 1.597e-05 s (0.02 %) + deduplicated arrays in 1.407e-05 s (0.01 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.201e-02 s (30.88 %) - memory allocations took 1.383e-05 s (0.01 %) - leafs setup took 6.199e-06 s (0.01 %) - halfword conversion took 2.398e-02 s (23.13 %) -Built (100 x 100)[0x5585f4d95100]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 54, symflags:'LS' + shuffled partitions in 3.202e-02 s (30.85 %) + memory allocations took 9.060e-06 s (0.01 %) + leafs setup took 4.053e-06 s (0.00 %) + halfword conversion took 3.199e-02 s (30.82 %) +Built (100 x 100)[0x55ea309c5100]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 51, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6458,7 +6514,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6473,7 +6529,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6483,17 +6539,17 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04799s; avg 0.016s ( +/- 0.06/ 0.06 %); best 0.01599s; worst 0.01601s; std dev. 7.826e-06 (taking best). -Reference operation time is 0.0159879 s (1.263 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 4, 54 leaves, 2.459 bytes/nz, 0.0159879 s/0 threads (speedup 0.000685973 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 74 subms, 54 lsubms, 2.4594 bpnz -Best sparse multiply performance with subdivision multiplier of 1: 1841.85 Mflops. +3 iterations (1 th.) took 0.04795s; avg 0.01598s ( +/- 0.20/ 0.14 %); best 0.01595s; worst 0.01601s; std dev. 2.338e-05 (taking best). +Reference operation time is 0.0159512 s (1.266 Mflops) with 1 threads. +Challenging best inner round reference (1.19209e-05 s/1 threads) with: subdivision 4, 51 leaves, 2.447 bytes/nz, 0.0159512 s/0 threads (speedup 0.000747339 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 70 subms, 51 lsubms, 2.4467 bpnz +Best sparse multiply performance with subdivision multiplier of 1: 1694.5 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6503,29 +6559,29 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 0.60764 s (eq. to 6e+04/ 6e+04 old/new op.times), gained local/global speedup 1 x (1.09673e-05 : 1.09673e-05) / 1 x (1.09673e-05 : 1.09673e-05). This is not amortizable ! +Last tuner inner round (1 of 1) took 0.687847 s (eq. to 6e+04/ 6e+04 old/new op.times), gained local/global speedup 1 x (1.19209e-05 : 1.19209e-05) / 1 x (1.19209e-05 : 1.19209e-05). This is not amortizable ! Auto tuning inner round 1 did not find a configuration better than the original. -In 1 tuning rounds (tot. 0.61s, 0.43s for constructor, 0 clones) obtained NO speedup (best stays 1842 Mflops). -Second run of RSB Autotuner took 0.607706 s and estimated a speedup of 1.000000 x (1.097e-05 s -> 1.097e-05 s per op) in same matrix (1 -> 1 lsubm) +In 1 tuning rounds (tot. 0.69s, 0.5s for constructor, 0 clones) obtained NO speedup (best stays 1694 Mflops). +Second run of RSB Autotuner took 0.687899 s and estimated a speedup of 1.000000 x (1.192e-05 s -> 1.192e-05 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:100 #norm:10 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000001 0.015364 0.043977 0.059341 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.059342 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.015364 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.043977 -%:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000001 -%:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.000 -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.059341 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.023526 0.063985 0.087511 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.087511 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.023526 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.063985 +%:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000000 +%:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.087511 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:RSB_SUBDIVISION_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:RSB_SHUFFLE_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 -%:CONSTRUCTOR_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 1.00 1.00 1.00 +%:CONSTRUCTOR_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo %:SM_COUNTS:lower-100x100-5050nz S N 1 100 100 5050 1 1 0 0 0 %:SM_IDXOCCUPATIONRSBVSCOOANDCSR:lower-100x100-5050nz S N 1 100 100 5050 10504 40400 20600 @@ -6534,34 +6590,34 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.0793731 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.11955 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 9.53674e-07 0.0153639 0 0.043977 -# so far, program took 7.690s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.835s/0.000s . +%constructor:lower-100x100-5050nz 0 0.023526 0 0.0639849 +# so far, program took 8.363s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 2.164s/0.000s . getrusage() stats: ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.1778s (system CPU time used) -ru_utime : 26.29s (user CPU time used) +ru_stime : 0.1551s (system CPU time used) +ru_utime : 34.15s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode C (last was D). # Cache block size total 524288 bytes, per-thread 524288 bytes -# so far, program took 7.690s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.835s/0.000s . +# so far, program took 8.363s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 2.164s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.275e-01 s (100.00 %) - analyzed arrays in 3.147e-02 s (24.69 %) - cleaned-up arrays in 1.383e-05 s (0.01 %) - deduplicated arrays in 1.311e-05 s (0.01 %) + converted COO to RSB in 1.196e-01 s (100.00 %) + analyzed arrays in 2.358e-02 s (19.72 %) + cleaned-up arrays in 1.407e-05 s (0.01 %) + deduplicated arrays in 1.192e-05 s (0.01 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 6.398e-02 s (50.19 %) - memory allocations took 5.245e-06 s (0.00 %) - leafs setup took 2.861e-06 s (0.00 %) - halfword conversion took 3.200e-02 s (25.10 %) -Built (100 x 100)[0x5585f4de09d0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.128s): (100 x 100)[0x5585f4de09d0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + shuffled partitions in 6.399e-02 s (53.50 %) + memory allocations took 5.007e-06 s (0.00 %) + leafs setup took 9.537e-07 s (0.00 %) + halfword conversion took 3.200e-02 s (26.75 %) +Built (100 x 100)[0x55ea30a10a10]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.120s): (100 x 100)[0x55ea30a10a10]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. @@ -6572,7 +6628,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6587,7 +6643,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6602,7 +6658,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6612,16 +6668,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04796s; avg 0.01599s ( +/- 0.19/ 0.10 %); best 0.01595s; worst 0.016s; std dev. 2.198e-05 (taking best). -Reference operation time is 0.015955 s (5.064 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.01595 Mflops: 5.064) -Merge (3 -> 1 leaves) took w.c.t. of 8.297e-05s, ~6.795e-05s of computing time (of which 3.195e-05s sorting, 5.007e-06s analysis) +3 iterations (1 th.) took 0.04799s; avg 0.016s ( +/- 0.07/ 0.05 %); best 0.01599s; worst 0.016s; std dev. 8.074e-06 (taking best). +Reference operation time is 0.015985 s (5.055 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.01599 Mflops: 5.055) +Merge (3 -> 1 leaves) took w.c.t. of 7.105e-05s, ~6.199e-05s of computing time (of which 2.384e-05s sorting, 1.907e-06s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6636,7 +6692,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6646,14 +6702,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001559s; avg 5.198e-05s ( +/- 1.83/ 3.67 %); best 5.102e-05s; worst 5.388e-05s; std dev. 1.349e-06 (taking best). -Reference operation time is 5.10216e-05 s (1584 Mflops) with 1 threads. +3 iterations (1 th.) took 0.000118s; avg 3.934e-05s ( +/- 3.03/ 4.24 %); best 3.815e-05s; worst 4.101e-05s; std dev. 1.216e-06 (taking best). +Reference operation time is 3.8147e-05 s (2118 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6663,16 +6719,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -After merge step 1: tpop: 5.102e-05 s ~Mflops: 1583.644 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 312.710x: 0.01595s -> 5.102e-05s, so taking this instance. +After merge step 1: tpop: 3.815e-05 s ~Mflops: 2118.124 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 419.038x: 0.01599s -> 3.815e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.1441s (of which 8.702e-05s partitioning, 0.09607s I/O); computing times: 6.795e-05s in par. loops, 3.195e-05s sorting, 5.007e-06s analyzing) -Total merge + benchmarking process took 0.1441s, equivalent to 2823.6/9.0 new/old ops (0.09529s for 2 clones -- as 1867.7/6.0 ops, or 933.9/3.0 ops per clone), SPEEDUP of 312.710x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 312.710x (0.01595s -> 5.102e-05s), will amortize in 9.1 ops by saving 0.0159s per op. -In 1 tuning rounds (tot. 0.24s, 0.095s for constructor, 2 clones) obtained a SPEEDUP of 31171.0% (312.7x) (from 5.064 to 1584 Mflops). Employed 0.096s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.144s (of which 7.486e-05s partitioning, 0.09604s I/O); computing times: 6.199e-05s in par. loops, 2.384e-05s sorting, 1.907e-06s analyzing) +Total merge + benchmarking process took 0.144s, equivalent to 3775.7/9.0 new/old ops (0.0955s for 2 clones -- as 2503.5/6.0 ops, or 1251.8/3.0 ops per clone), SPEEDUP of 419.038x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 419.038x (0.01599s -> 3.815e-05s), will amortize in 9.0 ops by saving 0.01595s per op. +In 1 tuning rounds (tot. 0.24s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 41803.8% (419x) (from 5.055 to 2118 Mflops). Employed 0.096s for I/O of matrix plots. #pr: updating sample at index 3 (2^th of 4), 0^th touch for (0,0,0,0,0,2,0). -First run of RSB Autotuner took 0.336075 s (1.595e-02 s -> 5.102e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.33601 s (1.599e-02 s -> 3.815e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type C, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 @@ -6681,7 +6737,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6696,7 +6752,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6707,13 +6763,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 15. +Starting with requested 0 threads ; current default 1 ; at most 16. # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6728,7 +6784,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6738,27 +6794,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001121s; avg 3.735e-05s ( +/- 9.36/ 18.09 %); best 3.386e-05s; worst 4.411e-05s; std dev. 4.778e-06 (taking best). +3 iterations (1 th.) took 0.0001051s; avg 3.505e-05s ( +/- 3.40/ 6.12 %); best 3.386e-05s; worst 3.719e-05s; std dev. 1.52e-06 (taking best). Reference operation time is 3.38554e-05 s (2387 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 5.537e-02 s (100.00 %) - analyzed arrays in 1.933e-02 s (34.92 %) - cleaned-up arrays in 1.383e-05 s (0.02 %) - deduplicated arrays in 1.311e-05 s (0.02 %) + converted COO to RSB in 5.556e-02 s (100.00 %) + analyzed arrays in 2.354e-02 s (42.37 %) + cleaned-up arrays in 1.407e-05 s (0.03 %) + deduplicated arrays in 1.192e-05 s (0.02 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.599e-02 s (65.00 %) - memory allocations took 5.007e-06 s (0.01 %) - leafs setup took 2.861e-06 s (0.01 %) - halfword conversion took 8.106e-06 s (0.01 %) -Built (100 x 100)[0x5585f4dbf010]{C} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 15). + shuffled partitions in 3.198e-02 s (57.57 %) + memory allocations took 3.338e-06 s (0.01 %) + leafs setup took 9.537e-07 s (0.00 %) + halfword conversion took 4.053e-06 s (0.01 %) +Built (100 x 100)[0x55ea309f9740]{C} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 16). # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6769,12 +6825,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6785,22 +6841,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 7.991e-02 s (100.00 %) - analyzed arrays in 1.590e-02 s (19.90 %) - cleaned-up arrays in 1.502e-05 s (0.02 %) - deduplicated arrays in 1.192e-05 s (0.01 %) + converted COO to RSB in 8.792e-02 s (100.00 %) + analyzed arrays in 2.392e-02 s (27.20 %) + cleaned-up arrays in 1.597e-05 s (0.02 %) + deduplicated arrays in 1.383e-05 s (0.02 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.199e-02 s (40.03 %) - memory allocations took 7.153e-06 s (0.01 %) - leafs setup took 1.907e-06 s (0.00 %) - halfword conversion took 3.199e-02 s (40.03 %) -Built (100 x 100)[0x5585f4dba800]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' + shuffled partitions in 3.198e-02 s (36.37 %) + memory allocations took 4.292e-06 s (0.00 %) + leafs setup took 0.000e+00 s (0.00 %) + halfword conversion took 3.199e-02 s (36.39 %) +Built (100 x 100)[0x55ea30a10a10]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6815,7 +6871,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6830,7 +6886,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6840,16 +6896,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.05164s; avg 0.01721s ( +/- 7.06/ 14.10 %); best 0.016s; worst 0.01964s; std dev. 0.001716 (taking best). -Reference operation time is 0.0159969 s (5.051 Mflops) with 1 threads. -Challenging best inner round reference (3.38554e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 0.0159969 s/0 threads (speedup 0.00211637 x), same?n. +3 iterations (1 th.) took 0.04783s; avg 0.01594s ( +/- 0.71/ 0.36 %); best 0.01583s; worst 0.016s; std dev. 8.014e-05 (taking best). +Reference operation time is 0.01583 s (5.104 Mflops) with 1 threads. +Challenging best inner round reference (3.38554e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 0.01583 s/0 threads (speedup 0.00213868 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6860,12 +6916,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6876,22 +6932,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.078e-01 s (100.00 %) - analyzed arrays in 3.980e-02 s (36.91 %) - cleaned-up arrays in 1.478e-05 s (0.01 %) + converted COO to RSB in 1.039e-01 s (100.00 %) + analyzed arrays in 3.990e-02 s (38.40 %) + cleaned-up arrays in 1.407e-05 s (0.01 %) deduplicated arrays in 1.192e-05 s (0.01 %) - sorted arrays in 1.192e-06 s (0.00 %) - shuffled partitions in 3.201e-02 s (29.68 %) - memory allocations took 8.106e-06 s (0.01 %) - leafs setup took 3.099e-06 s (0.00 %) - halfword conversion took 3.598e-02 s (33.37 %) -Built (100 x 100)[0x5585f4dba800]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 10, symflags:'LS' + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 3.201e-02 s (30.80 %) + memory allocations took 4.053e-06 s (0.00 %) + leafs setup took 1.907e-06 s (0.00 %) + halfword conversion took 3.199e-02 s (30.78 %) +Built (100 x 100)[0x55ea30a10a10]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 10, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6906,7 +6962,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6921,7 +6977,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6931,16 +6987,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04366s; avg 0.01455s ( +/- 19.89/ 9.97 %); best 0.01166s; worst 0.016s; std dev. 0.002047 (taking best). -Reference operation time is 0.011657 s (6.931 Mflops) with 1 threads. -Challenging best inner round reference (3.38554e-05 s/1 threads) with: subdivision 0.5, 10 leaves, 2.206 bytes/nz, 0.011657 s/0 threads (speedup 0.0029043 x), same?n. +3 iterations (1 th.) took 0.04785s; avg 0.01595s ( +/- 0.65/ 0.35 %); best 0.01585s; worst 0.016s; std dev. 7.289e-05 (taking best). +Reference operation time is 0.015846 s (5.099 Mflops) with 1 threads. +Challenging best inner round reference (3.38554e-05 s/1 threads) with: subdivision 0.5, 10 leaves, 2.206 bytes/nz, 0.015846 s/0 threads (speedup 0.00213653 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 14 subms, 10 lsubms, 2.2059 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6951,12 +7007,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6967,22 +7023,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 9.589e-02 s (100.00 %) - analyzed arrays in 3.985e-02 s (41.56 %) + converted COO to RSB in 1.039e-01 s (100.00 %) + analyzed arrays in 3.990e-02 s (38.39 %) cleaned-up arrays in 1.407e-05 s (0.01 %) deduplicated arrays in 1.216e-05 s (0.01 %) - sorted arrays in 9.537e-07 s (0.00 %) - shuffled partitions in 2.401e-02 s (25.04 %) - memory allocations took 9.775e-06 s (0.01 %) - leafs setup took 5.007e-06 s (0.01 %) - halfword conversion took 3.198e-02 s (33.35 %) -Built (100 x 100)[0x5585f4d701d0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 26, symflags:'LS' + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 3.201e-02 s (30.80 %) + memory allocations took 4.768e-06 s (0.00 %) + leafs setup took 3.099e-06 s (0.00 %) + halfword conversion took 3.198e-02 s (30.78 %) +Built (100 x 100)[0x55ea309a01d0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 26, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6997,7 +7053,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7012,7 +7068,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7022,16 +7078,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04772s; avg 0.01591s ( +/- 1.02/ 0.67 %); best 0.01575s; worst 0.01601s; std dev. 0.0001163 (taking best). -Reference operation time is 0.0157459 s (5.132 Mflops) with 1 threads. -Challenging best inner round reference (3.38554e-05 s/1 threads) with: subdivision 1, 26 leaves, 2.317 bytes/nz, 0.0157459 s/0 threads (speedup 0.00215011 x), same?n. +3 iterations (1 th.) took 0.04785s; avg 0.01595s ( +/- 0.72/ 0.42 %); best 0.01584s; worst 0.01602s; std dev. 8.187e-05 (taking best). +Reference operation time is 0.015835 s (5.103 Mflops) with 1 threads. +Challenging best inner round reference (3.38554e-05 s/1 threads) with: subdivision 1, 26 leaves, 2.317 bytes/nz, 0.015835 s/0 threads (speedup 0.00213801 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 36 subms, 26 lsubms, 2.3168 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7042,12 +7098,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7059,21 +7115,21 @@ Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 converted COO to RSB in 1.039e-01 s (100.00 %) - analyzed arrays in 3.983e-02 s (38.35 %) - cleaned-up arrays in 1.717e-05 s (0.02 %) - deduplicated arrays in 1.407e-05 s (0.01 %) + analyzed arrays in 3.987e-02 s (38.37 %) + cleaned-up arrays in 1.407e-05 s (0.01 %) + deduplicated arrays in 1.311e-05 s (0.01 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.202e-02 s (30.82 %) - memory allocations took 1.073e-05 s (0.01 %) - leafs setup took 6.914e-06 s (0.01 %) - halfword conversion took 3.198e-02 s (30.78 %) -Built (100 x 100)[0x5585f4d701d0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 54, symflags:'LS' + shuffled partitions in 3.205e-02 s (30.84 %) + memory allocations took 5.960e-06 s (0.01 %) + leafs setup took 7.153e-06 s (0.01 %) + halfword conversion took 3.194e-02 s (30.74 %) +Built (100 x 100)[0x55ea309a01d0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 54, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7088,7 +7144,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7103,7 +7159,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7113,16 +7169,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0476s; avg 0.01587s ( +/- 1.69/ 0.85 %); best 0.0156s; worst 0.016s; std dev. 0.0001892 (taking best). -Reference operation time is 0.0155981 s (5.18 Mflops) with 1 threads. -Challenging best inner round reference (3.38554e-05 s/1 threads) with: subdivision 2, 54 leaves, 2.459 bytes/nz, 0.0155981 s/0 threads (speedup 0.00217049 x), same?n. +3 iterations (1 th.) took 0.04772s; avg 0.01591s ( +/- 0.97/ 0.59 %); best 0.01575s; worst 0.016s; std dev. 0.0001102 (taking best). +Reference operation time is 0.0157518 s (5.13 Mflops) with 1 threads. +Challenging best inner round reference (3.38554e-05 s/1 threads) with: subdivision 2, 54 leaves, 2.459 bytes/nz, 0.0157518 s/0 threads (speedup 0.0021493 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 74 subms, 54 lsubms, 2.4594 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7133,12 +7189,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7149,22 +7205,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.039e-01 s (100.00 %) - analyzed arrays in 3.987e-02 s (38.37 %) - cleaned-up arrays in 1.407e-05 s (0.01 %) - deduplicated arrays in 1.192e-05 s (0.01 %) - sorted arrays in 9.537e-07 s (0.00 %) - shuffled partitions in 3.201e-02 s (30.81 %) - memory allocations took 1.717e-05 s (0.02 %) - leafs setup took 8.106e-06 s (0.01 %) - halfword conversion took 3.198e-02 s (30.77 %) -Built (100 x 100)[0x5585f4d701d0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 54, symflags:'LS' + converted COO to RSB in 1.079e-01 s (100.00 %) + analyzed arrays in 3.984e-02 s (36.92 %) + cleaned-up arrays in 1.717e-05 s (0.02 %) + deduplicated arrays in 1.478e-05 s (0.01 %) + sorted arrays in 0.000e+00 s (0.00 %) + shuffled partitions in 3.202e-02 s (29.67 %) + memory allocations took 1.216e-05 s (0.01 %) + leafs setup took 5.007e-06 s (0.00 %) + halfword conversion took 3.600e-02 s (33.36 %) +Built (100 x 100)[0x55ea309a01d0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 54, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7179,7 +7235,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7194,7 +7250,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7204,9 +7260,9 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0477s; avg 0.0159s ( +/- 1.81/ 1.23 %); best 0.01561s; worst 0.0161s; std dev. 0.0002079 (taking best). -Reference operation time is 0.0156121 s (5.175 Mflops) with 1 threads. -Challenging best inner round reference (3.38554e-05 s/1 threads) with: subdivision 4, 54 leaves, 2.459 bytes/nz, 0.0156121 s/0 threads (speedup 0.00216853 x), same?n. +3 iterations (1 th.) took 0.04781s; avg 0.01594s ( +/- 0.82/ 0.43 %); best 0.01581s; worst 0.016s; std dev. 9.285e-05 (taking best). +Reference operation time is 0.015805 s (5.112 Mflops) with 1 threads. +Challenging best inner round reference (3.38554e-05 s/1 threads) with: subdivision 4, 54 leaves, 2.459 bytes/nz, 0.015805 s/0 threads (speedup 0.00214207 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 74 subms, 54 lsubms, 2.4594 bpnz Best sparse multiply performance with subdivision multiplier of 1: 2386.62 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing @@ -7214,7 +7270,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7224,23 +7280,23 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 0.787967 s (eq. to 2e+04/ 2e+04 old/new op.times), gained local/global speedup 1 x (3.38554e-05 : 3.38554e-05) / 1 x (3.38554e-05 : 3.38554e-05). This is not amortizable ! +Last tuner inner round (1 of 1) took 0.803918 s (eq. to 2e+04/ 2e+04 old/new op.times), gained local/global speedup 1 x (3.38554e-05 : 3.38554e-05) / 1 x (3.38554e-05 : 3.38554e-05). This is not amortizable ! Auto tuning inner round 1 did not find a configuration better than the original. -In 1 tuning rounds (tot. 0.79s, 0.55s for constructor, 0 clones) obtained NO speedup (best stays 2387 Mflops). -Second run of RSB Autotuner took 0.788032 s and estimated a speedup of 1.000000 x (3.386e-05 s -> 3.386e-05 s per op) in same matrix (1 -> 1 lsubm) +In 1 tuning rounds (tot. 0.8s, 0.56s for constructor, 0 clones) obtained NO speedup (best stays 2387 Mflops). +Second run of RSB Autotuner took 0.803973 s and estimated a speedup of 1.000000 x (3.386e-05 s -> 3.386e-05 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:100 0 #norm:10 0 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.031475 0.063984 0.095459 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.095459 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.031475 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.063984 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.023584 0.063987 0.087571 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.087571 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.023584 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.063987 %:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000000 %:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.095459 +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.087571 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 @@ -7255,34 +7311,34 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.127493 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.119599 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 0 0.0314748 0 0.0639839 -# so far, program took 9.022s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 2.959s/0.000s . +%constructor:lower-100x100-5050nz 0 0.0235839 0 0.063987 +# so far, program took 9.703s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 3.304s/0.000s . getrusage() stats: ru_maxrss: 7 (maximum resident set size -- MB) -ru_stime : 0.3328s (system CPU time used) -ru_utime : 39.31s (user CPU time used) +ru_stime : 0.203s (system CPU time used) +ru_utime : 49.22s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode Z (last was D). # Cache block size total 524288 bytes, per-thread 524288 bytes -# so far, program took 9.022s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 2.959s/0.000s . +# so far, program took 9.703s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 3.304s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.154e-01 s (100.00 %) - analyzed arrays in 2.341e-02 s (20.28 %) - cleaned-up arrays in 1.693e-05 s (0.01 %) - deduplicated arrays in 1.407e-05 s (0.01 %) + converted COO to RSB in 1.194e-01 s (100.00 %) + analyzed arrays in 2.341e-02 s (19.60 %) + cleaned-up arrays in 1.407e-05 s (0.01 %) + deduplicated arrays in 1.287e-05 s (0.01 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 5.998e-02 s (51.97 %) + shuffled partitions in 6.399e-02 s (53.57 %) memory allocations took 6.914e-06 s (0.01 %) - leafs setup took 4.053e-06 s (0.00 %) - halfword conversion took 3.199e-02 s (27.72 %) -Built (100 x 100)[0x5585f4dea7a0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.116s): (100 x 100)[0x5585f4dea7a0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + leafs setup took 9.537e-07 s (0.00 %) + halfword conversion took 3.200e-02 s (26.79 %) +Built (100 x 100)[0x55ea30a1a7e0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.119s): (100 x 100)[0x55ea30a1a7e0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. @@ -7293,7 +7349,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7308,7 +7364,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7323,7 +7379,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7333,16 +7389,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04797s; avg 0.01599s ( +/- 0.16/ 0.12 %); best 0.01596s; worst 0.01601s; std dev. 1.871e-05 (taking best). -Reference operation time is 0.0159628 s (5.062 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.01596 Mflops: 5.062) -Merge (3 -> 1 leaves) took w.c.t. of 8.798e-05s, ~7.105e-05s of computing time (of which 3.91e-05s sorting, 5.007e-06s analysis) +3 iterations (1 th.) took 0.04796s; avg 0.01599s ( +/- 0.08/ 0.05 %); best 0.01597s; worst 0.016s; std dev. 9.087e-06 (taking best). +Reference operation time is 0.015975 s (5.058 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.01597 Mflops: 5.058) +Merge (3 -> 1 leaves) took w.c.t. of 7.51e-05s, ~6.795e-05s of computing time (of which 3.6e-05s sorting, 2.146e-06s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7357,7 +7413,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7367,14 +7423,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000104s; avg 3.465e-05s ( +/- 2.29/ 3.90 %); best 3.386e-05s; worst 3.6e-05s; std dev. 9.603e-07 (taking best). -Reference operation time is 3.38554e-05 s (2387 Mflops) with 1 threads. +3 iterations (1 th.) took 0.0001061s; avg 3.537e-05s ( +/- 0.90/ 1.80 %); best 3.505e-05s; worst 3.6e-05s; std dev. 4.496e-07 (taking best). +Reference operation time is 3.50475e-05 s (2305 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7384,16 +7440,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -After merge step 1: tpop: 3.386e-05 s ~Mflops: 2386.618 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 471.500x: 0.01596s -> 3.386e-05s, so taking this instance. +After merge step 1: tpop: 3.505e-05 s ~Mflops: 2305.441 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 455.810x: 0.01597s -> 3.505e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.1443s (of which 9.203e-05s partitioning, 0.09605s I/O); computing times: 7.105e-05s in par. loops, 3.91e-05s sorting, 5.007e-06s analyzing) -Total merge + benchmarking process took 0.1443s, equivalent to 4263.0/9.0 new/old ops (0.09523s for 2 clones -- as 2812.8/6.0 ops, or 1406.4/3.0 ops per clone), SPEEDUP of 471.500x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 471.500x (0.01596s -> 3.386e-05s), will amortize in 9.1 ops by saving 0.01593s per op. -In 1 tuning rounds (tot. 0.24s, 0.095s for constructor, 2 clones) obtained a SPEEDUP of 47050.0% (471.5x) (from 5.062 to 2387 Mflops). Employed 0.096s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.1476s (of which 7.987e-05s partitioning, 0.09941s I/O); computing times: 6.795e-05s in par. loops, 3.6e-05s sorting, 2.146e-06s analyzing) +Total merge + benchmarking process took 0.1476s, equivalent to 4211.7/9.2 new/old ops (0.09554s for 2 clones -- as 2725.9/6.0 ops, or 1362.9/3.0 ops per clone), SPEEDUP of 455.810x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 455.810x (0.01597s -> 3.505e-05s), will amortize in 9.3 ops by saving 0.01594s per op. +In 1 tuning rounds (tot. 0.24s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 45481.0% (455.8x) (from 5.058 to 2305 Mflops). Employed 0.096s for I/O of matrix plots. #pr: updating sample at index 4 (3^th of 4), 0^th touch for (0,0,0,0,0,3,0). -First run of RSB Autotuner took 0.336311 s (1.596e-02 s -> 3.386e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.339604 s (1.597e-02 s -> 3.505e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 @@ -7402,7 +7458,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7417,7 +7473,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7428,13 +7484,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 15. +Starting with requested 0 threads ; current default 1 ; at most 16. # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7449,7 +7505,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7459,27 +7515,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001161s; avg 3.87e-05s ( +/- 9.45/ 18.89 %); best 3.505e-05s; worst 4.601e-05s; std dev. 5.17e-06 (taking best). +3 iterations (1 th.) took 0.0001152s; avg 3.839e-05s ( +/- 8.70/ 14.91 %); best 3.505e-05s; worst 4.411e-05s; std dev. 4.065e-06 (taking best). Reference operation time is 3.50475e-05 s (2305 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 5.532e-02 s (100.00 %) - analyzed arrays in 2.329e-02 s (42.10 %) - cleaned-up arrays in 1.383e-05 s (0.02 %) - deduplicated arrays in 1.216e-05 s (0.02 %) + converted COO to RSB in 5.599e-02 s (100.00 %) + analyzed arrays in 2.000e-02 s (35.72 %) + cleaned-up arrays in 1.407e-05 s (0.03 %) + deduplicated arrays in 1.192e-05 s (0.02 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.199e-02 s (57.83 %) - memory allocations took 4.053e-06 s (0.01 %) - leafs setup took 3.099e-06 s (0.01 %) + shuffled partitions in 3.595e-02 s (64.21 %) + memory allocations took 4.768e-06 s (0.01 %) + leafs setup took 1.907e-06 s (0.00 %) halfword conversion took 5.960e-06 s (0.01 %) -Built (100 x 100)[0x5585f4dbf010]{Z} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 15). +Built (100 x 100)[0x55ea309f9740]{Z} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 16). # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7490,12 +7546,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7506,22 +7562,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 8.760e-02 s (100.00 %) - analyzed arrays in 2.360e-02 s (26.93 %) + converted COO to RSB in 9.172e-02 s (100.00 %) + analyzed arrays in 2.373e-02 s (25.87 %) cleaned-up arrays in 1.407e-05 s (0.02 %) - deduplicated arrays in 1.311e-05 s (0.01 %) + deduplicated arrays in 1.287e-05 s (0.01 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.195e-02 s (36.48 %) - memory allocations took 5.722e-06 s (0.01 %) - leafs setup took 5.960e-06 s (0.01 %) - halfword conversion took 3.201e-02 s (36.54 %) -Built (100 x 100)[0x5585f4db5140]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 6, symflags:'LS' + shuffled partitions in 2.799e-02 s (30.52 %) + memory allocations took 4.768e-06 s (0.01 %) + leafs setup took 4.053e-06 s (0.00 %) + halfword conversion took 3.996e-02 s (43.57 %) +Built (100 x 100)[0x55ea30a1a7e0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 6, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7536,7 +7592,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7551,7 +7607,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7561,16 +7617,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0476s; avg 0.01587s ( +/- 1.63/ 0.83 %); best 0.01561s; worst 0.016s; std dev. 0.0001833 (taking best). -Reference operation time is 0.0156062 s (5.177 Mflops) with 1 threads. -Challenging best inner round reference (3.50475e-05 s/1 threads) with: subdivision 0.25, 6 leaves, 2.163 bytes/nz, 0.0156062 s/0 threads (speedup 0.00224575 x), same?n. +3 iterations (1 th.) took 0.03973s; avg 0.01324s ( +/- 41.60/ 20.82 %); best 0.007734s; worst 0.016s; std dev. 0.003896 (taking best). +Reference operation time is 0.00773406 s (10.45 Mflops) with 1 threads. +Challenging best inner round reference (3.50475e-05 s/1 threads) with: subdivision 0.25, 6 leaves, 2.163 bytes/nz, 0.00773406 s/0 threads (speedup 0.00453158 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 8 subms, 6 lsubms, 2.1632 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7581,12 +7637,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7598,21 +7654,21 @@ Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 converted COO to RSB in 1.038e-01 s (100.00 %) - analyzed arrays in 3.978e-02 s (38.32 %) - cleaned-up arrays in 1.693e-05 s (0.02 %) - deduplicated arrays in 1.311e-05 s (0.01 %) + analyzed arrays in 3.981e-02 s (38.33 %) + cleaned-up arrays in 1.502e-05 s (0.01 %) + deduplicated arrays in 1.192e-05 s (0.01 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.201e-02 s (30.83 %) - memory allocations took 9.060e-06 s (0.01 %) - leafs setup took 6.914e-06 s (0.01 %) - halfword conversion took 3.198e-02 s (30.80 %) -Built (100 x 100)[0x5585f4db5140]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 18, symflags:'LS' + shuffled partitions in 3.202e-02 s (30.84 %) + memory allocations took 1.001e-05 s (0.01 %) + leafs setup took 4.053e-06 s (0.00 %) + halfword conversion took 3.198e-02 s (30.79 %) +Built (100 x 100)[0x55ea30a1a7e0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 18, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7627,7 +7683,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7642,7 +7698,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7652,16 +7708,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.03966s; avg 0.01322s ( +/- 10.95/ 20.99 %); best 0.01177s; worst 0.016s; std dev. 0.001963 (taking best). -Reference operation time is 0.0117741 s (6.863 Mflops) with 1 threads. -Challenging best inner round reference (3.50475e-05 s/1 threads) with: subdivision 0.5, 18 leaves, 2.261 bytes/nz, 0.0117741 s/0 threads (speedup 0.00297667 x), same?n. +3 iterations (1 th.) took 0.04737s; avg 0.01579s ( +/- 2.68/ 1.44 %); best 0.01537s; worst 0.01602s; std dev. 0.0002994 (taking best). +Reference operation time is 0.015367 s (5.258 Mflops) with 1 threads. +Challenging best inner round reference (3.50475e-05 s/1 threads) with: subdivision 0.5, 18 leaves, 2.261 bytes/nz, 0.015367 s/0 threads (speedup 0.0022807 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 25 subms, 18 lsubms, 2.2614 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7672,12 +7728,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7691,19 +7747,19 @@ converted COO to RSB in 1.038e-01 s (100.00 %) analyzed arrays in 3.980e-02 s (38.33 %) cleaned-up arrays in 1.597e-05 s (0.02 %) - deduplicated arrays in 1.407e-05 s (0.01 %) + deduplicated arrays in 1.502e-05 s (0.01 %) sorted arrays in 0.000e+00 s (0.00 %) shuffled partitions in 3.201e-02 s (30.83 %) - memory allocations took 1.192e-05 s (0.01 %) - leafs setup took 8.821e-06 s (0.01 %) - halfword conversion took 3.197e-02 s (30.79 %) -Built (100 x 100)[0x5585f4db5140]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 39, symflags:'LS' + memory allocations took 9.060e-06 s (0.01 %) + leafs setup took 5.007e-06 s (0.00 %) + halfword conversion took 3.198e-02 s (30.80 %) +Built (100 x 100)[0x55ea309b3d80]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 39, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7718,7 +7774,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7733,7 +7789,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7743,16 +7799,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04794s; avg 0.01598s ( +/- 0.28/ 0.15 %); best 0.01593s; worst 0.016s; std dev. 3.165e-05 (taking best). -Reference operation time is 0.015934 s (5.071 Mflops) with 1 threads. -Challenging best inner round reference (3.50475e-05 s/1 threads) with: subdivision 1, 39 leaves, 2.387 bytes/nz, 0.015934 s/0 threads (speedup 0.00219955 x), same?n. +3 iterations (1 th.) took 0.04795s; avg 0.01598s ( +/- 0.22/ 0.13 %); best 0.01595s; worst 0.016s; std dev. 2.519e-05 (taking best). +Reference operation time is 0.0159469 s (5.067 Mflops) with 1 threads. +Challenging best inner round reference (3.50475e-05 s/1 threads) with: subdivision 1, 39 leaves, 2.387 bytes/nz, 0.0159469 s/0 threads (speedup 0.00219777 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 54 subms, 39 lsubms, 2.3865 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7763,12 +7819,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7779,22 +7835,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.030e-01 s (100.00 %) - analyzed arrays in 3.891e-02 s (37.79 %) + converted COO to RSB in 1.036e-01 s (100.00 %) + analyzed arrays in 3.960e-02 s (38.21 %) cleaned-up arrays in 1.502e-05 s (0.01 %) - deduplicated arrays in 1.287e-05 s (0.01 %) + deduplicated arrays in 1.216e-05 s (0.01 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.205e-02 s (31.12 %) - memory allocations took 1.717e-05 s (0.02 %) - leafs setup took 9.060e-06 s (0.01 %) - halfword conversion took 3.196e-02 s (31.03 %) -Built (100 x 100)[0x5585f4d83d80]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 54, symflags:'LS' + shuffled partitions in 3.201e-02 s (30.89 %) + memory allocations took 1.287e-05 s (0.01 %) + leafs setup took 5.007e-06 s (0.00 %) + halfword conversion took 3.198e-02 s (30.86 %) +Built (100 x 100)[0x55ea309b3d80]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 54, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7809,7 +7865,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7824,7 +7880,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7834,16 +7890,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04789s; avg 0.01596s ( +/- 0.17/ 0.20 %); best 0.01594s; worst 0.016s; std dev. 2.404e-05 (taking best). -Reference operation time is 0.0159369 s (5.07 Mflops) with 1 threads. -Challenging best inner round reference (3.50475e-05 s/1 threads) with: subdivision 2, 54 leaves, 2.459 bytes/nz, 0.0159369 s/0 threads (speedup 0.00219915 x), same?n. +3 iterations (1 th.) took 0.04797s; avg 0.01599s ( +/- 0.29/ 0.22 %); best 0.01594s; worst 0.01603s; std dev. 3.389e-05 (taking best). +Reference operation time is 0.015945 s (5.067 Mflops) with 1 threads. +Challenging best inner round reference (3.50475e-05 s/1 threads) with: subdivision 2, 54 leaves, 2.459 bytes/nz, 0.015945 s/0 threads (speedup 0.00219803 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 74 subms, 54 lsubms, 2.4594 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7854,12 +7910,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 524288 bytes, per-thread 34952 bytes +# Cache block size total 524288 bytes, per-thread 32768 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 -# RSB_IO_WANT_EXECUTING_THREADS: 15 +# rsb_g_threads: 16 +# RSB_IO_WANT_EXECUTING_THREADS: 16 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7870,22 +7926,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.035e-01 s (100.00 %) - analyzed arrays in 3.945e-02 s (38.12 %) - cleaned-up arrays in 1.812e-05 s (0.02 %) - deduplicated arrays in 1.407e-05 s (0.01 %) + converted COO to RSB in 1.032e-01 s (100.00 %) + analyzed arrays in 3.909e-02 s (37.89 %) + cleaned-up arrays in 3.314e-05 s (0.03 %) + deduplicated arrays in 2.193e-05 s (0.02 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.201e-02 s (30.93 %) - memory allocations took 1.192e-05 s (0.01 %) - leafs setup took 1.287e-05 s (0.01 %) - halfword conversion took 3.197e-02 s (30.89 %) -Built (100 x 100)[0x5585f4d30150]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 54, symflags:'LS' + shuffled partitions in 3.202e-02 s (31.03 %) + memory allocations took 2.074e-05 s (0.02 %) + leafs setup took 8.821e-06 s (0.01 %) + halfword conversion took 3.198e-02 s (31.00 %) +Built (100 x 100)[0x55ea30960150]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 54, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing # Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7900,7 +7956,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7915,7 +7971,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7925,9 +7981,9 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.04792s; avg 0.01597s ( +/- 0.34/ 0.19 %); best 0.01592s; worst 0.016s; std dev. 3.83e-05 (taking best). -Reference operation time is 0.015919 s (5.076 Mflops) with 1 threads. -Challenging best inner round reference (3.50475e-05 s/1 threads) with: subdivision 4, 54 leaves, 2.459 bytes/nz, 0.015919 s/0 threads (speedup 0.00220162 x), same?n. +3 iterations (1 th.) took 0.04789s; avg 0.01596s ( +/- 0.38/ 0.24 %); best 0.0159s; worst 0.016s; std dev. 4.377e-05 (taking best). +Reference operation time is 0.015902 s (5.081 Mflops) with 1 threads. +Challenging best inner round reference (3.50475e-05 s/1 threads) with: subdivision 4, 54 leaves, 2.459 bytes/nz, 0.015902 s/0 threads (speedup 0.00220396 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 74 subms, 54 lsubms, 2.4594 bpnz Best sparse multiply performance with subdivision multiplier of 1: 2305.44 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing @@ -7935,7 +7991,7 @@ # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset # min_leaf_matrix_bytes : 65536 # avg_leaf_matrix_bytes : 1048576 -# rsb_g_threads: 15 +# rsb_g_threads: 16 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7945,23 +8001,23 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 0.791902 s (eq. to 2e+04/ 2e+04 old/new op.times), gained local/global speedup 1 x (3.50475e-05 : 3.50475e-05) / 1 x (3.50475e-05 : 3.50475e-05). This is not amortizable ! +Last tuner inner round (1 of 1) took 0.79658 s (eq. to 2e+04/ 2e+04 old/new op.times), gained local/global speedup 1 x (3.50475e-05 : 3.50475e-05) / 1 x (3.50475e-05 : 3.50475e-05). This is not amortizable ! Auto tuning inner round 1 did not find a configuration better than the original. -In 1 tuning rounds (tot. 0.79s, 0.56s for constructor, 0 clones) obtained NO speedup (best stays 2305 Mflops). -Second run of RSB Autotuner took 0.791968 s and estimated a speedup of 1.000000 x (3.505e-05 s -> 3.505e-05 s per op) in same matrix (1 -> 1 lsubm) +In 1 tuning rounds (tot. 0.8s, 0.56s for constructor, 0 clones) obtained NO speedup (best stays 2305 Mflops). +Second run of RSB Autotuner took 0.796653 s and estimated a speedup of 1.000000 x (3.505e-05 s -> 3.505e-05 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:100 0 #norm:10 0 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.023407 0.059983 0.083390 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.083390 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.023407 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.059983 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.023414 0.063991 0.087405 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.087405 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.023414 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.063991 %:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000000 %:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.083390 +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.087405 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 @@ -7976,68 +8032,68 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.115427 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.119444 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 0 0.023407 0 0.059983 -# so far, program took 10.346s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 4.088s/0.000s . +%constructor:lower-100x100-5050nz 0 0.0234139 0 0.0639911 +# so far, program took 11.039s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 4.440s/0.000s . getrusage() stats: ru_maxrss: 8 (maximum resident set size -- MB) -ru_stime : 0.4943s (system CPU time used) -ru_utime : 52.47s (user CPU time used) +ru_stime : 0.2747s (system CPU time used) +ru_utime : 64.69s (user CPU time used) # benchmarking terminated --- finalizing run. # ====== BEGIN Total summary record. #pr: ======== All results (not limiting) #pr: Dump from a base of 4 samples (of max 4) ordered by (1,1,1,1,1,4,1) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R lower-100x100-5050nz 100 100 5050 1 D S N 1 1 0 2.1212 2.0800 3 1 1694.50 8.928e-03 0.000e+00 1.192e-05 0.000e+00 2.952e-01 4.47e+00 2.60e+00 1 2.02e-02 -pr: 2:R_R lower-100x100-5050nz 100 100 5050 1 S S N 1 1 0 2.1212 2.0800 3 1 1568.98 7.989e-03 0.000e+00 1.287e-05 0.000e+00 2.243e-01 2.48e+00 1.56e+00 1 2.02e-02 -pr: 3:R_R lower-100x100-5050nz 100 100 5050 1 C S N 1 1 0 2.1212 2.0800 3 1 1583.64 1.595e-02 0.000e+00 5.102e-05 0.000e+00 3.361e-01 1.04e+00 6.50e-01 1 8.08e-02 -pr: 4:R_R lower-100x100-5050nz 100 100 5050 1 Z S N 1 1 0 2.1212 2.0800 3 1 2386.62 1.596e-02 0.000e+00 3.386e-05 0.000e+00 3.363e-01 2.84e+00 1.17e+00 1 8.08e-02 +pr: 1:R_R lower-100x100-5050nz 100 100 5050 1 D S N 1 1 0 2.1212 2.0800 3 1 1694.50 1.597e-02 0.000e+00 1.192e-05 0.000e+00 3.361e-01 4.47e+00 2.60e+00 1 2.02e-02 +pr: 2:R_R lower-100x100-5050nz 100 100 5050 1 S S N 1 1 0 2.1212 2.0800 3 1 1694.50 1.598e-02 0.000e+00 1.192e-05 0.000e+00 3.362e-01 2.68e+00 1.56e+00 1 2.02e-02 +pr: 3:R_R lower-100x100-5050nz 100 100 5050 1 C S N 1 1 0 2.1212 2.0800 3 1 2118.12 1.599e-02 0.000e+00 3.815e-05 0.000e+00 3.360e-01 1.40e+00 6.50e-01 1 8.08e-02 +pr: 4:R_R lower-100x100-5050nz 100 100 5050 1 Z S N 1 1 0 2.1212 2.0800 3 1 2305.44 1.597e-02 0.000e+00 3.505e-05 0.000e+00 3.396e-01 2.74e+00 1.17e+00 1 8.08e-02 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 53742.2 % faster, avg. sp. ratio 538.422x, max sp. ratio 748.940x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 14676.9/6586.9/24763.5/58707.6 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 25.8/ 21.1/ 33.1/103.3 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 25.9, min. 21.1, max. 33.1 ops) +#pr: (in succ. cases rsb autotuning gave avg. 88770.7 % faster, avg. sp. ratio 888.707x, max sp. ratio 1340.580x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 18722.5/8808.3/28199.7/74890.1 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 21.1/ 21.0/ 21.3/ 84.4 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 21.1, min. 21.1, max. 21.3 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 1683/ 1683/ 1683) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 5050/ 5050/ 5050) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 15150/ 6733/ 26933) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 45450/ 20200/ 80800) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 2.121/ 2.121/ 2.121) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.668/ 1.029/ 4.404,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.833/ 1.045/ 4.471,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.780/ 1.376/ 4.404,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 11.287/ 1.397/ 4.471,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.495/ 0.650/ 2.599) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 2.080/ 2.080/ 2.080) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.30 s, min 0.22 s, max 0.34 s, tot 1.19 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.30 s, min 0.22 s, max 0.34 s, tot 1.19 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.808e+03, min 1.569e+03, max 2.387e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 3.729e+00, min 2.263e+00, max 5.064e+00 (4 samples) -#pr: best tun. rsb operation time was: on avg. 2.742e-05s, min 1.192e-05s, max 5.102e-05s, tot 1.097e-04s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 1.221e-02s, min 7.989e-03s, max 1.596e-02s, tot 4.884e-02s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.814e+00 7.762e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.678e+00 -#pr: Record collection took 3.94 s. +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.34 s, min 0.34 s, max 0.34 s, tot 1.35 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.34 s, min 0.34 s, max 0.34 s, tot 1.35 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.953e+03, min 1.694e+03, max 2.305e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 3.160e+00, min 1.264e+00, max 5.058e+00 (4 samples) +#pr: best tun. rsb operation time was: on avg. 2.426e-05s, min 1.192e-05s, max 3.815e-05s, tot 9.704e-05s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.598e-02s, min 1.597e-02s, max 1.599e-02s, tot 6.391e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.781e+00 5.700e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.677e+00 +#pr: Record collection took 4.37 s. #pr: Record comprises 40 memory benchmark samples (prepend RSB_PR_MBW=1 to dump this). -#pr: Record comprises 89 environment variables in 3855 bytes (prepend RSB_PR_ENV=1 to dump this). +#pr: Record comprises 89 environment variables in 3909 bytes (prepend RSB_PR_ENV=1 to dump this). # ====== END Total summary record. #pr: ======== Saved a performance record of 4 samples to test.rpr # Removing the temporary record file test.rpr.tmp. -# terminating run at 1705165361 (after 10.3s of w.c.t.) +# terminating run at 1739579164 (after 11.0s of w.c.t.) + ls -ltr test-tuning-lower-100x100-5050nz--C-N-1--base.eps test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps test-tuning-lower-100x100-5050nz--D-N-1--base.eps test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps test-tuning-lower-100x100-5050nz--S-N-1--base.eps test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps test-tuning-lower-100x100-5050nz--Z-N-1--base.eps test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85631 Jan 13 17:02 test-tuning-lower-100x100-5050nz--D-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84556 Jan 13 17:02 test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85634 Jan 13 17:02 test-tuning-lower-100x100-5050nz--S-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84555 Jan 13 17:02 test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85634 Jan 13 17:02 test-tuning-lower-100x100-5050nz--C-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84555 Jan 13 17:02 test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85634 Jan 13 17:02 test-tuning-lower-100x100-5050nz--Z-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84555 Jan 13 17:02 test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85643 Feb 15 00:25 test-tuning-lower-100x100-5050nz--D-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84564 Feb 15 00:25 test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85643 Feb 15 00:26 test-tuning-lower-100x100-5050nz--S-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84563 Feb 15 00:26 test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85643 Feb 15 00:26 test-tuning-lower-100x100-5050nz--C-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84564 Feb 15 00:26 test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85642 Feb 15 00:26 test-tuning-lower-100x100-5050nz--Z-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84565 Feb 15 00:26 test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps + rsbench --read-performance-record test.rpr + ls -ltr test.txt --rw-r--r-- 1 pbuilder1 pbuilder1 4098 Jan 13 17:02 test.txt +-rw-r--r-- 1 pbuilder2 pbuilder2 4099 Feb 15 00:26 test.txt + RSB_PR_WLTC=2 + RSB_PR_SR=0 + rsbench --read-performance-record test.rpr @@ -8047,29 +8103,29 @@ /usr/bin/kpsepath ++ kpsepath tex ++ sed 's/!!//g;s/:/\n/g;' -+ find . /nonexistent/first-build/.texlive2022/texmf-config/tex/kpsewhich// /nonexistent/first-build/.texlive2022/texmf-var/tex/kpsewhich// /nonexistent/first-build/texmf/tex/kpsewhich// /usr/local/share/texmf/tex/kpsewhich// /etc/texmf/tex/kpsewhich// /var/lib/texmf/tex/kpsewhich// /usr/share/texmf/tex/kpsewhich// /usr/share/texlive/texmf-dist/tex/kpsewhich// /nonexistent/first-build/.texlive2022/texmf-config/tex/generic// /nonexistent/first-build/.texlive2022/texmf-var/tex/generic// /nonexistent/first-build/texmf/tex/generic// /usr/local/share/texmf/tex/generic// /etc/texmf/tex/generic// /var/lib/texmf/tex/generic// /usr/share/texmf/tex/generic// /usr/share/texlive/texmf-dist/tex/generic// /nonexistent/first-build/.texlive2022/texmf-config/tex/latex// /nonexistent/first-build/.texlive2022/texmf-var/tex/latex// /nonexistent/first-build/texmf/tex/latex// /usr/local/share/texmf/tex/latex// /etc/texmf/tex/latex// /var/lib/texmf/tex/latex// /usr/share/texmf/tex/latex// /usr/share/texlive/texmf-dist/tex/latex// /nonexistent/first-build/.texlive2022/texmf-config/tex/// /nonexistent/first-build/.texlive2022/texmf-var/tex/// /nonexistent/first-build/texmf/tex/// /usr/local/share/texmf/tex/// /etc/texmf/tex/// /var/lib/texmf/tex/// /usr/share/texmf/tex/// /usr/share/texlive/texmf-dist/tex/// -name sciposter.cls -find: '/nonexistent/first-build/.texlive2022/texmf-config/tex/kpsewhich//': No such file or directory -find: '/nonexistent/first-build/.texlive2022/texmf-var/tex/kpsewhich//': No such file or directory -find: '/nonexistent/first-build/texmf/tex/kpsewhich//': No such file or directory ++ find . /nonexistent/second-build/.texlive2022/texmf-config/tex/kpsewhich// /nonexistent/second-build/.texlive2022/texmf-var/tex/kpsewhich// /nonexistent/second-build/texmf/tex/kpsewhich// /usr/local/share/texmf/tex/kpsewhich// /etc/texmf/tex/kpsewhich// /var/lib/texmf/tex/kpsewhich// /usr/share/texmf/tex/kpsewhich// /usr/share/texlive/texmf-dist/tex/kpsewhich// /nonexistent/second-build/.texlive2022/texmf-config/tex/generic// /nonexistent/second-build/.texlive2022/texmf-var/tex/generic// /nonexistent/second-build/texmf/tex/generic// /usr/local/share/texmf/tex/generic// /etc/texmf/tex/generic// /var/lib/texmf/tex/generic// /usr/share/texmf/tex/generic// /usr/share/texlive/texmf-dist/tex/generic// /nonexistent/second-build/.texlive2022/texmf-config/tex/latex// /nonexistent/second-build/.texlive2022/texmf-var/tex/latex// /nonexistent/second-build/texmf/tex/latex// /usr/local/share/texmf/tex/latex// /etc/texmf/tex/latex// /var/lib/texmf/tex/latex// /usr/share/texmf/tex/latex// /usr/share/texlive/texmf-dist/tex/latex// /nonexistent/second-build/.texlive2022/texmf-config/tex/// /nonexistent/second-build/.texlive2022/texmf-var/tex/// /nonexistent/second-build/texmf/tex/// /usr/local/share/texmf/tex/// /etc/texmf/tex/// /var/lib/texmf/tex/// /usr/share/texmf/tex/// /usr/share/texlive/texmf-dist/tex/// -name sciposter.cls +find: '/nonexistent/second-build/.texlive2022/texmf-config/tex/kpsewhich//': No such file or directory +find: '/nonexistent/second-build/.texlive2022/texmf-var/tex/kpsewhich//': No such file or directory +find: '/nonexistent/second-build/texmf/tex/kpsewhich//': No such file or directory find: '/usr/local/share/texmf/tex/kpsewhich//': No such file or directory find: '/etc/texmf/tex/kpsewhich//': No such file or directory find: '/var/lib/texmf/tex/kpsewhich//': No such file or directory find: '/usr/share/texmf/tex/kpsewhich//': No such file or directory find: '/usr/share/texlive/texmf-dist/tex/kpsewhich//': No such file or directory -find: '/nonexistent/first-build/.texlive2022/texmf-config/tex/generic//': No such file or directory -find: '/nonexistent/first-build/.texlive2022/texmf-var/tex/generic//': No such file or directory -find: '/nonexistent/first-build/texmf/tex/generic//': No such file or directory +find: '/nonexistent/second-build/.texlive2022/texmf-config/tex/generic//': No such file or directory +find: '/nonexistent/second-build/.texlive2022/texmf-var/tex/generic//': No such file or directory +find: '/nonexistent/second-build/texmf/tex/generic//': No such file or directory find: '/usr/local/share/texmf/tex/generic//': No such file or directory find: '/usr/share/texmf/tex/generic//': No such file or directory -find: '/nonexistent/first-build/.texlive2022/texmf-config/tex/latex//': No such file or directory -find: '/nonexistent/first-build/.texlive2022/texmf-var/tex/latex//': No such file or directory -find: '/nonexistent/first-build/texmf/tex/latex//': No such file or directory +find: '/nonexistent/second-build/.texlive2022/texmf-config/tex/latex//': No such file or directory +find: '/nonexistent/second-build/.texlive2022/texmf-var/tex/latex//': No such file or directory +find: '/nonexistent/second-build/texmf/tex/latex//': No such file or directory find: '/usr/local/share/texmf/tex/latex//': No such file or directory find: '/etc/texmf/tex/latex//': No such file or directory find: '/var/lib/texmf/tex/latex//': No such file or directory -find: '/nonexistent/first-build/.texlive2022/texmf-config/tex///': No such file or directory -find: '/nonexistent/first-build/.texlive2022/texmf-var/tex///': No such file or directory -find: '/nonexistent/first-build/texmf/tex///': No such file or directory +find: '/nonexistent/second-build/.texlive2022/texmf-config/tex///': No such file or directory +find: '/nonexistent/second-build/.texlive2022/texmf-var/tex///': No such file or directory +find: '/nonexistent/second-build/texmf/tex///': No such file or directory find: '/usr/local/share/texmf/tex///': No such file or directory + exit 0 for mf in pd.mtx vf.mtx ; do if test -f /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/$mf ; then true; else cp -p /build/reproducible-path/librsb-1.3.0.2+dfsg/$mf /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/$mf ; fi; done @@ -8084,7 +8140,7 @@ This error may be safely ignored. Correctly allocated a matrix. Summary information of the matrix: -(3 x 3)[0x561e19688030]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' +(3 x 3)[0x55a5769e4030]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' Correctly performed a SPMV. Correctly freed the matrix. Correctly finalized the library. @@ -8099,7 +8155,7 @@ This error may be safely ignored. Correctly allocated a matrix. Summary information of the matrix: -(3 x 3)[0x56258b48a030]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' +(3 x 3)[0x55a04daa8030]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' Correctly performed a SPMV. Correctly freed the matrix. Correctly finalized the library. @@ -8163,51 +8219,51 @@ Done. Building a matrix with 5 nnz, 5 x 5 Duplicates check: 5 - 0 = 5 - converted COO to RSB in 1.265e-01 s (100.00 %) - analyzed arrays in 4.800e-02 s (37.95 %) - cleaned-up arrays in 0.000e+00 s (0.00 %) + converted COO to RSB in 1.183e-01 s (100.00 %) + analyzed arrays in 3.999e-02 s (33.81 %) + cleaned-up arrays in 9.537e-07 s (0.00 %) deduplicated arrays in 0.000e+00 s (0.00 %) - sorted arrays in 1.447e-02 s (11.44 %) - shuffled partitions in 3.200e-02 s (25.30 %) - memory allocations took 5.245e-06 s (0.00 %) - leafs setup took 9.537e-07 s (0.00 %) - halfword conversion took 3.200e-02 s (25.30 %) -Built (5 x 5)[0x56258b48d070]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' + sorted arrays in 1.428e-02 s (12.08 %) + shuffled partitions in 3.199e-02 s (27.05 %) + memory allocations took 1.669e-05 s (0.01 %) + leafs setup took 5.007e-06 s (0.00 %) + halfword conversion took 3.199e-02 s (27.05 %) +Built (5 x 5)[0x55a04daab2b0]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' Allocated matrix of 5 nonzeroes: -(5 x 5)[0x56258b48d070]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +(5 x 5)[0x55a04daab2b0]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' -Before auto-tuning, 100 multiplications took 1.591975s. +Before auto-tuning, 100 multiplications took 1.595942s. Threads autotuning (may take more than 1.500000s)... Will use autotuning routine to sample matrix: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.79953e-08), 15 suggested as starting thread count(default). -3 iterations (15 th.) took 0.04794s; avg 0.01598s ( +/- 0.25/ 0.13 %); best 0.01594s; worst 0.016s; std dev. 2.783e-05 (taking best). -Reference operation time is 0.01594 s (0.001255 Mflops) with 15 threads. -3 iterations (15 th.) took 0.05199s; avg 0.01733s ( +/- 7.67/ 15.31 %); best 0.016s; worst 0.01998s; std dev. 0.001877 (taking best). -Reference operation time is 0.016 s (0.00125 Mflops) with 15 threads. -After 0.100002s, autotuning routine did not find a better threads count configuration. -(5 x 5)[0x56258b48d070]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' -After threads auto-tuning, 100 multiplications took 1.591995s -- effective speedup of 0.999987 x -Matrix autotuning (may take more than 1.500000s; using 15 threads )... +Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.9998e-08), 16 suggested as starting thread count(default). +3 iterations (16 th.) took 0.04794s; avg 0.01598s ( +/- 0.26/ 0.17 %); best 0.01594s; worst 0.01601s; std dev. 3.025e-05 (taking best). +Reference operation time is 0.0159378 s (0.001255 Mflops) with 16 threads. +3 iterations (16 th.) took 0.04795s; avg 0.01598s ( +/- 0.22/ 0.13 %); best 0.01595s; worst 0.016s; std dev. 2.49e-05 (taking best). +Reference operation time is 0.0159481 s (0.001254 Mflops) with 16 threads. +After 0.096012s, autotuning routine did not find a better threads count configuration. +(5 x 5)[0x55a04daab2b0]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +After threads auto-tuning, 100 multiplications took 1.595939s -- effective speedup of 1 x +Matrix autotuning (may take more than 1.500000s; using 16 threads )... Will autotune matrix: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.79953e-08), 15 suggested as starting thread count. -3 iterations (15 th.) took 0.04799s; avg 0.016s ( +/- 0.02/ 0.02 %); best 0.01599s; worst 0.016s; std dev. 3.213e-06 (taking best). -Reference operation time is 0.0159931 s (0.001251 Mflops) with 15 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz (tpop: 0.01599 Mflops: 0.001) -Merge (2 -> 1 leaves) took w.c.t. of 2.217e-05s, ~4.053e-06s of computing time (of which 0s sorting, 3.815e-06s analysis) -3 iterations (15 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 90.88/200.00 %); best 5.8e-08s; worst 1.907e-06s; std dev. 8.991e-07 (taking best). -Reference operation time is 5.79953e-08 s (344.9 Mflops) with 15 threads. -After merge step 1: tpop: 5.8e-08 s ~Mflops: 344.855 nsubm:1 otn:15 -Applying merge (2 -> 1 leaves, 15 th.) yielded SPEEDUP of 275765.673x: 0.01599s -> 5.8e-08s, so taking this instance. +Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.9998e-08), 16 suggested as starting thread count. +3 iterations (16 th.) took 0.04799s; avg 0.016s ( +/- 0.02/ 0.01 %); best 0.016s; worst 0.016s; std dev. 1.976e-06 (taking best). +Reference operation time is 0.015995 s (0.00125 Mflops) with 16 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz (tpop: 0.016 Mflops: 0.001) +Merge (2 -> 1 leaves) took w.c.t. of 2.909e-05s, ~5.007e-06s of computing time (of which 1.192e-06s sorting, 5.96e-06s analysis) +3 iterations (16 th.) took 4.053e-06s; avg 1.351e-06s ( +/- 95.56/200.00 %); best 6e-08s; worst 4.053e-06s; std dev. 1.911e-06 (taking best). +Reference operation time is 5.9998e-08 s (333.3 Mflops) with 16 threads. +After merge step 1: tpop: 6e-08 s ~Mflops: 333.344 nsubm:1 otn:16 +Applying merge (2 -> 1 leaves, 16 th.) yielded SPEEDUP of 266592.490x: 0.016s -> 6e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (2 -> 1 subms) took 0.048s (of which 0.000144s partitioning, 0s I/O); computing times: 4.053e-06s in par. loops, 0s sorting, 3.815e-06s analyzing) -Total merge + benchmarking process took 0.048s, equivalent to 827634.1/3.0 new/old ops (0.09581s for 2 clones -- as 1651963.0/6.0 ops, or 825981.5/3.0 ops per clone), SPEEDUP of 275765.673x -Applying multi-merge (2 -> 1 leaves, 1 steps, 15 -> 15 th.sp.) yielded SPEEDUP of 275765.673x (0.01599s -> 5.8e-08s), will amortize in 3.0 ops by saving 0.01599s per op. -In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 27576467.3% (2.758e+05x) (from 0.001251 to 344.9 Mflops). -After 0.144020s, autotuning routine declared speedup of 275766 x, when using threads count of 15. -(5 x 5)[0x56258b490400]{D} @ (0(0..5),0(0..5)) (5 nnz, 1 nnz/r) flags 0x2040186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'' -After threads auto-tuning, 100 multiplications took 0.000024s -- further speedup of 66112 x +A total of 1 merge steps (of max 6) (2 -> 1 subms) took 0.048s (of which 0.0003428s partitioning, 0s I/O); computing times: 5.007e-06s in par. loops, 1.192e-06s sorting, 5.96e-06s analyzing) +Total merge + benchmarking process took 0.048s, equivalent to 799976.2/3.0 new/old ops (0.09558s for 2 clones -- as 1592986.3/6.0 ops, or 796493.1/3.0 ops per clone), SPEEDUP of 266592.490x +Applying multi-merge (2 -> 1 leaves, 1 steps, 16 -> 16 th.sp.) yielded SPEEDUP of 266592.490x (0.016s -> 6e-08s), will amortize in 3.0 ops by saving 0.01599s per op. +In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 26659149.0% (2.666e+05x) (from 0.00125 to 333.3 Mflops). +After 0.144042s, autotuning routine declared speedup of 266592 x, when using threads count of 16. +(5 x 5)[0x55a04daae640]{D} @ (0(0..5),0(0..5)) (5 nnz, 1 nnz/r) flags 0x2040186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'' +After threads auto-tuning, 100 multiplications took 0.000033s -- further speedup of 48157.2 x 0/2 0 0 -> 0 1/2 1 0 -> 5 0/2 0 3 -> 0 @@ -8218,7 +8274,7 @@ Correctly initialized the library. Correctly allocated a matrix with 7 nonzeroes. Summary information of the matrix: -(6 x 6)[0x56258b48d070]{D} @ (0(1..2),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' +(6 x 6)[0x55a04daab2b0]{D} @ (0(1..2),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' Matrix printout: %%MatrixMarket matrix coordinate real general 6 6 1 @@ -8369,87 +8425,71 @@ Creating 500 x 500 matrix with 62500 nonzeroes. Building a matrix with 62500 nnz, 500 x 500 Duplicates check: 62500 - 0 = 62500 - converted COO to RSB in 2.354e-01 s (100.00 %) - analyzed arrays in 3.962e-02 s (16.83 %) - cleaned-up arrays in 1.450e-04 s (0.06 %) - deduplicated arrays in 1.638e-04 s (0.07 %) - sorted arrays in 9.538e-02 s (40.52 %) - shuffled partitions in 6.398e-02 s (27.19 %) - memory allocations took 4.935e-05 s (0.02 %) - leafs setup took 1.001e-05 s (0.00 %) - halfword conversion took 3.599e-02 s (15.29 %) -Built (500 x 500)[0x55f2d9bbce60]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 64, symflags:'' + converted COO to RSB in 2.369e-01 s (100.00 %) + analyzed arrays in 4.755e-02 s (20.07 %) + cleaned-up arrays in 1.440e-04 s (0.06 %) + deduplicated arrays in 1.791e-04 s (0.08 %) + sorted arrays in 9.298e-02 s (39.25 %) + shuffled partitions in 6.399e-02 s (27.01 %) + memory allocations took 5.031e-05 s (0.02 %) + leafs setup took 1.192e-05 s (0.01 %) + halfword conversion took 3.199e-02 s (13.50 %) +Built (500 x 500)[0x560ff6debe60]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 64, symflags:'' Allocated matrix of 62500 nonzeroes: -(500 x 500)[0x55f2d9bbce60]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 64, symflags:'' +(500 x 500)[0x560ff6debe60]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 64, symflags:'' -Before auto-tuning, 100 multiplications took 1.595658s. +Before auto-tuning, 100 multiplications took 1.599721s. Threads autotuning (may take more than 1.500000s)... Will use autotuning routine to sample matrix: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 87 subms, 64 lsubms, 2.1321 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.74946e-08), 15 suggested as starting thread count(default). -3 iterations (15 th.) took 0.04792s; avg 0.01597s ( +/- 0.31/ 0.17 %); best 0.01592s; worst 0.016s; std dev. 3.563e-05 (taking best). -Reference operation time is 0.015923 s (15.7 Mflops) with 15 threads. -3 iterations (15 th.) took 0.04796s; avg 0.01599s ( +/- 0.16/ 0.09 %); best 0.01596s; worst 0.016s; std dev. 1.786e-05 (taking best). -Reference operation time is 0.0159628 s (15.66 Mflops) with 15 threads. -After 0.096027s, autotuning routine did not find a better threads count configuration. -(500 x 500)[0x55f2d9bbce60]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 64, symflags:'' -After threads auto-tuning, 100 multiplications took 1.603980s -- effective speedup of 0.994812 x -Matrix autotuning (may take more than 1.500000s; using 15 threads )... +Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.82933e-08), 16 suggested as starting thread count(default). +3 iterations (16 th.) took 0.0479s; avg 0.01597s ( +/- 0.37/ 0.19 %); best 0.01591s; worst 0.016s; std dev. 4.177e-05 (taking best). +Reference operation time is 0.015909 s (15.71 Mflops) with 16 threads. +3 iterations (16 th.) took 0.04798s; avg 0.01599s ( +/- 0.12/ 0.07 %); best 0.01597s; worst 0.016s; std dev. 1.394e-05 (taking best). +Reference operation time is 0.0159731 s (15.65 Mflops) with 16 threads. +After 0.096000s, autotuning routine did not find a better threads count configuration. +(500 x 500)[0x560ff6debe60]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 64, symflags:'' +After threads auto-tuning, 100 multiplications took 1.599958s -- effective speedup of 0.999852 x +Matrix autotuning (may take more than 1.500000s; using 16 threads )... Will autotune matrix: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 87 subms, 64 lsubms, 2.1321 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.74946e-08), 15 suggested as starting thread count. -3 iterations (15 th.) took 0.04799s; avg 0.016s ( +/- 0.05/ 0.04 %); best 0.01599s; worst 0.016s; std dev. 5.735e-06 (taking best). -Reference operation time is 0.01599 s (15.63 Mflops) with 15 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 87 subms, 64 lsubms, 2.1321 bpnz (tpop: 0.01599 Mflops: 15.635) -Merge (64 -> 40 leaves) took w.c.t. of 0.03207s, ~0.1365s of computing time (of which 0.02484s sorting, 1.502e-05s analysis) -3 iterations (15 th.) took 0.05185s; avg 0.01728s ( +/- 7.64/ 15.23 %); best 0.01596s; worst 0.01992s; std dev. 0.001861 (taking best). -Reference operation time is 0.0159628 s (15.66 Mflops) with 15 threads. -After merge step 1: tpop: 0.01596 s ~Mflops: 15.661 nsubm:40 otn:15 -Applying merge (64 -> 40 leaves, 15 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00170x): 0.01599s -> 0.01596s, so IGNORING this instance. -Merge (40 -> 28 leaves) took w.c.t. of 0.01418s, ~0.002475s of computing time (of which 0.0001869s sorting, 2.193e-05s analysis) -3 iterations (15 th.) took 0.05779s; avg 0.01926s ( +/- 17.04/ 33.86 %); best 0.01598s; worst 0.02579s; std dev. 0.004613 (taking best). -Reference operation time is 0.015981 s (15.64 Mflops) with 15 threads. -After merge step 2: tpop: 0.01598 s ~Mflops: 15.644 nsubm:28 otn:15 -Applying merge (40 -> 28 leaves, 15 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00057x): 0.01599s -> 0.01598s, so IGNORING this instance. -Merge (28 -> 22 leaves) took w.c.t. of 0.008146s, ~0.0002739s of computing time (of which 0.0001261s sorting, 1.407e-05s analysis) -3 iterations (15 th.) took 0.06738s; avg 0.02246s ( +/- 28.84/ 57.64 %); best 0.01598s; worst 0.03541s; std dev. 0.009154 (taking best). -Reference operation time is 0.0159819 s (15.64 Mflops) with 15 threads. -After merge step 3: tpop: 0.01598 s ~Mflops: 15.643 nsubm:22 otn:15 -Applying merge (28 -> 22 leaves, 15 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00051x): 0.01599s -> 0.01598s, so IGNORING this instance. -Merge (22 -> 16 leaves) took w.c.t. of 0.008693s, ~0.0002148s of computing time (of which 9.894e-05s sorting, 1.121e-05s analysis) -3 iterations (15 th.) took 0.05251s; avg 0.0175s ( +/- 9.16/ 11.35 %); best 0.0159s; worst 0.01949s; std dev. 0.00149 (taking best). -Reference operation time is 0.015898 s (15.73 Mflops) with 15 threads. -After merge step 4: tpop: 0.0159 s ~Mflops: 15.725 nsubm:16 otn:15 -Applying merge (22 -> 16 leaves, 15 th.) yielded NEGLIGIBLE change (4th in a row) (old/new=1.00579x): 0.01599s -> 0.0159s, so IGNORING this instance. -Merge (16 -> 10 leaves) took w.c.t. of 0.007498s, ~0.001259s of computing time (of which 0.0005512s sorting, 1.097e-05s analysis) -3 iterations (15 th.) took 0.09122s; avg 0.03041s ( +/- 21.12/ 30.74 %); best 0.02398s; worst 0.03975s; std dev. 0.006762 (taking best). -Reference operation time is 0.0239849 s (10.42 Mflops) with 15 threads. -After merge step 5: tpop: 0.02398 s ~Mflops: 10.423 nsubm:10 otn:15 -Applying merge (16 -> 10 leaves, 15 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.500x: 0.01599s -> 0.02398s. +Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.82933e-08), 16 suggested as starting thread count. +3 iterations (16 th.) took 0.04796s; avg 0.01599s ( +/- 0.11/ 0.17 %); best 0.01597s; worst 0.01602s; std dev. 1.981e-05 (taking best). +Reference operation time is 0.01597 s (15.65 Mflops) with 16 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 87 subms, 64 lsubms, 2.1321 bpnz (tpop: 0.01597 Mflops: 15.654) +Merge (64 -> 40 leaves) took w.c.t. of 0.01269s, ~0.03115s of computing time (of which 0.0003242s sorting, 1.097e-05s analysis) +3 iterations (16 th.) took 0.06329s; avg 0.0211s ( +/- 24.20/ 29.35 %); best 0.01599s; worst 0.02729s; std dev. 0.004675 (taking best). +Reference operation time is 0.01599 s (15.63 Mflops) with 16 threads. +After merge step 1: tpop: 0.01599 s ~Mflops: 15.635 nsubm:40 otn:16 +Applying merge (64 -> 40 leaves, 16 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=0.99875x): 0.01597s -> 0.01599s, so IGNORING this instance. +Merge (40 -> 28 leaves) took w.c.t. of 0.008108s, ~0.000443s of computing time (of which 0.0002265s sorting, 1.216e-05s analysis) +3 iterations (16 th.) took 0.05585s; avg 0.01862s ( +/- 14.18/ 28.18 %); best 0.01598s; worst 0.02386s; std dev. 0.00371 (taking best). +Reference operation time is 0.0159769 s (15.65 Mflops) with 16 threads. +After merge step 2: tpop: 0.01598 s ~Mflops: 15.648 nsubm:28 otn:16 +Applying merge (40 -> 28 leaves, 16 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=0.99957x): 0.01597s -> 0.01598s, so IGNORING this instance. +Merge (28 -> 22 leaves) took w.c.t. of 0.008104s, ~0.0002372s of computing time (of which 0.0001049s sorting, 1.001e-05s analysis) +3 iterations (16 th.) took 0.04359s; avg 0.01453s ( +/- 45.11/ 35.08 %); best 0.007976s; worst 0.01963s; std dev. 0.004867 (taking best). +Reference operation time is 0.00797582 s (31.34 Mflops) with 16 threads. +After merge step 3: tpop: 0.007976 s ~Mflops: 31.345 nsubm:22 otn:16 +Applying merge (28 -> 22 leaves, 16 th.) yielded SPEEDUP of 2.002x: 0.01597s -> 0.007976s, so taking this instance. +Merge (22 -> 16 leaves) took w.c.t. of 0.008125s, ~0.0002151s of computing time (of which 8.321e-05s sorting, 8.106e-06s analysis) +3 iterations (16 th.) took 0.03583s; avg 0.01194s ( +/- 33.28/ 66.44 %); best 0.007969s; worst 0.01988s; std dev. 0.005612 (taking best). +Reference operation time is 0.0079689 s (31.37 Mflops) with 16 threads. +After merge step 4: tpop: 0.007969 s ~Mflops: 31.372 nsubm:16 otn:16 +Applying merge (22 -> 16 leaves, 16 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00087x): 0.007976s -> 0.007969s, so IGNORING this instance. +Merge (16 -> 10 leaves) took w.c.t. of 0.004399s, ~0.0008547s of computing time (of which 0.000499s sorting, 6.914e-06s analysis) +3 iterations (16 th.) took 0.05956s; avg 0.01985s ( +/- 20.09/ 39.61 %); best 0.01587s; worst 0.02772s; std dev. 0.005561 (taking best). +Reference operation time is 0.015866 s (15.76 Mflops) with 16 threads. +After merge step 5: tpop: 0.01587 s ~Mflops: 15.757 nsubm:10 otn:16 +Applying merge (16 -> 10 leaves, 16 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.989x: 0.007976s -> 0.01587s. Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 5 merge steps (of max 6) (64 -> 10 subms) took 0.392s (of which 0.07104s partitioning, 0s I/O); computing times: 0.1407s in par. loops, 0.02581s sorting, 7.319e-05s analyzing) -Total merge + benchmarking process took 0.392s, equivalent to 24.5/24.5 new/old ops (0.04791s for 1 clones -- as 3.0/3.0 ops, or 3.0/3.0 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) -Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... -3 iterations (15 th.) took 0.04791s; avg 0.01597s ( +/- 0.32/ 0.17 %); best 0.01592s; worst 0.016s; std dev. 3.634e-05 (taking best). -Reference operation time is 0.015919 s (15.7 Mflops) with 15 threads. -Starting split (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 87 subms, 64 lsubms, 2.1321 bpnz (tpop: 0.01592 Mflops: 15.705) -Split (64 -> 160 leaves, 87 -> 215 subms) took 0.02371s (of which: 1.097e-05s analysis, -5.457e+10s mem.mgmt); compute time: 0.004655s overall, 0.0002618s searches, 0.004393s shuffle, 0.001188s switch, 0.0006261s quadrants. -3 iterations (15 th.) took 0.04784s; avg 0.01595s ( +/- 0.67/ 0.54 %); best 0.01584s; worst 0.01603s; std dev. 8.012e-05 (taking best). -Reference operation time is 0.015841 s (15.78 Mflops) with 15 threads. -After split step 1: tpop: 0.01584 s ~Mflops: 15.782 nsubm:160 otn:15 -Applying split (64 -> 160 leaves, 15 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00492x): 0.01592s -> 0.01584s, so IGNORING this instance. -Split (160 -> 400 leaves, 215 -> 535 subms) took 0.02379s (of which: 3.6e-05s analysis, -1.364e+11s mem.mgmt); compute time: 0.005842s overall, 0.0005529s searches, 0.005289s shuffle, 0.001796s switch, 0.001232s quadrants. -3 iterations (15 th.) took 0.06806s; avg 0.02269s ( +/- 13.15/ 19.76 %); best 0.0197s; worst 0.02717s; std dev. 0.003227 (taking best). -Reference operation time is 0.0197048 s (12.69 Mflops) with 15 threads. -After split step 2: tpop: 0.0197 s ~Mflops: 12.687 nsubm:400 otn:15 -Applying split (160 -> 400 leaves, 15 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.238x: 0.01592s -> 0.0197s. -Skipping further split based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 2 split steps (of max 6) (64 -> 400 subms) took 0.1641s (of which 0.04812s partitioning, 0s I/O); computing times: 0.0105s in par. loops, 0.0008147s sorting, 4.697e-05s analyzing) -Total split + benchmarking process took 0.1641s, equivalent to 10.3/10.3 new/old ops (0.05623s for 1 clones -- as 3.5/3.5 ops, or 3.5/3.5 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) -In 1 tuning rounds (tot. 0.76s, 0.1s for constructor, 2 clones) obtained NO speedup (best stays 15.7 Mflops). -After 0.756659s, autotuning routine declared speedup of 1 x, when using threads count of 15. -(500 x 500)[0x55f2d9bbce60]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 64, symflags:'' -After threads auto-tuning, 100 multiplications took 1.599342s -- further speedup of 1.0029 x +A total of 5 merge steps (of max 6) (64 -> 10 subms) took 0.348s (of which 0.04175s partitioning, 0s I/O); computing times: 0.0329s in par. loops, 0.001238s sorting, 4.816e-05s analyzing) +Total merge + benchmarking process took 0.348s, equivalent to 43.6/21.8 new/old ops (0.09597s for 2 clones -- as 12.0/6.0 ops, or 6.0/3.0 ops per clone), SPEEDUP of 2.002x +Applying multi-merge (64 -> 22 leaves, 3 steps, 16 -> 16 th.sp.) yielded SPEEDUP of 2.002x (0.01597s -> 0.007976s), will amortize in 43.5 ops by saving 0.007994s per op. +In 1 tuning rounds (tot. 0.44s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 100.2% (2.002x) (from 15.65 to 31.34 Mflops). +After 0.444334s, autotuning routine declared speedup of 2.0023 x, when using threads count of 16. +(500 x 500)[0x560ff6df7660]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' +After threads auto-tuning, 100 multiplications took 1.599621s -- further speedup of 1.00021 x librsb timer-based profiling is not supported in this build. If you wish to have it, re-configure librsb with its support. So you can safely ignore the error you might just have seen printed out on screen. /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/backsolve Hello, RSB! @@ -8457,19 +8497,19 @@ Correctly initialized the library. Building a matrix with 7 nnz, 6 x 6 Duplicates check: 1 - 0 = 1 - converted COO to RSB in 6.228e-02 s (100.00 %) - analyzed arrays in 4.623e-02 s (74.23 %) + converted COO to RSB in 6.194e-02 s (100.00 %) + analyzed arrays in 4.596e-02 s (74.20 %) cleaned-up arrays in 5.960e-06 s (0.01 %) - deduplicated arrays in 1.192e-06 s (0.00 %) - sorted arrays in 2.861e-06 s (0.00 %) - shuffled partitions in 1.600e-02 s (25.69 %) - memory allocations took 2.313e-05 s (0.04 %) - leafs setup took 4.053e-06 s (0.01 %) - halfword conversion took 1.097e-05 s (0.02 %) -Built (6 x 6)[0x55dd6937f060]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' + deduplicated arrays in 9.537e-07 s (0.00 %) + sorted arrays in 2.146e-06 s (0.00 %) + shuffled partitions in 1.594e-02 s (25.74 %) + memory allocations took 1.907e-05 s (0.03 %) + leafs setup took 2.861e-06 s (0.00 %) + halfword conversion took 7.153e-06 s (0.01 %) +Built (6 x 6)[0x5557b1546060]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' Correctly allocated a matrix with 7 nonzeroes. Summary information of the matrix: -(6 x 6)[0x55dd6937f060]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' +(6 x 6)[0x5557b1546060]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' Matrix printout: %%MatrixMarket matrix coordinate real general 6 6 1 @@ -8495,55 +8535,55 @@ 1 1 Will autotune matrix: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz. -Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:6.111e-08 -3 iterations (15 th.) took 2.384e-05s; avg 7.947e-06s ( +/- 99.23/188.00 %); best 6.111e-08s; worst 2.289e-05s; std dev. 1.057e-05 (taking best). -Reference operation time is 6.11067e-08 s (32.73 Mflops) with 15 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz (tpop: 6.111e-08 Mflops: 32.730) +Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:5.831e-08 +3 iterations (16 th.) took 2.193e-05s; avg 7.312e-06s ( +/- 99.20/186.96 %); best 5.831e-08s; worst 2.098e-05s; std dev. 9.674e-06 (taking best). +Reference operation time is 5.83053e-08 s (34.3 Mflops) with 16 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz (tpop: 5.831e-08 Mflops: 34.302) Merge (1 -> 1 leaves) took w.c.t. of 0s, ~0s of computing time (of which 0s sorting, 0s analysis) -3 iterations (15 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 91.46/ 66.67 %); best 6.111e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). -Reference operation time is 6.11067e-08 s (32.73 Mflops) with 15 threads. -After merge step 1: tpop: 6.111e-08 s ~Mflops: 32.730 nsubm:1 otn:15 -Applying merge (1 -> 1 leaves, 15 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00000x): 6.111e-08s -> 6.111e-08s, so IGNORING this instance. +3 iterations (16 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 90.83/ 50.00 %); best 5.831e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 5.83053e-08 s (34.3 Mflops) with 16 threads. +After merge step 1: tpop: 5.831e-08 s ~Mflops: 34.302 nsubm:1 otn:16 +Applying merge (1 -> 1 leaves, 16 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00000x): 5.831e-08s -> 5.831e-08s, so IGNORING this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (1 -> 1 subms) took 1.812e-05s (of which 4.053e-06s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 0s analyzing) -Total merge + benchmarking process took 1.812e-05s, equivalent to 296.5/296.5 new/old ops (0.04781s for 1 clones -- as 782465.9/782465.9 ops, or 782465.9/782465.9 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +A total of 1 merge steps (of max 6) (1 -> 1 subms) took 2.003e-05s (of which 3.099e-06s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 0s analyzing) +Total merge + benchmarking process took 2.003e-05s, equivalent to 343.5/343.5 new/old ops (0.04785s for 1 clones -- as 820764.7/820764.7 ops, or 820764.7/820764.7 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... -3 iterations (15 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 90.39/200.00 %); best 6.111e-08s; worst 1.907e-06s; std dev. 8.991e-07 (taking best). -Reference operation time is 6.11067e-08 s (32.73 Mflops) with 15 threads. -Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz (tpop: 6.111e-08 Mflops: 32.730) -Split (1 -> 1 leaves, 1 -> 1 subms) took 1.693e-05s (of which: 2.146e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (15 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 90.39/ 50.00 %); best 6.111e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). -Reference operation time is 6.11067e-08 s (32.73 Mflops) with 15 threads. -After split step 1: tpop: 6.111e-08 s ~Mflops: 32.730 nsubm:1 otn:15 -Applying split (1 -> 1 leaves, 15 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00000x): 6.111e-08s -> 6.111e-08s, so IGNORING this instance. -Split (1 -> 1 leaves, 1 -> 1 subms) took 9.06e-06s (of which: 0s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (15 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 91.46/ 66.67 %); best 6.111e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). -Reference operation time is 6.11067e-08 s (32.73 Mflops) with 15 threads. -After split step 2: tpop: 6.111e-08 s ~Mflops: 32.730 nsubm:1 otn:15 -Applying split (1 -> 1 leaves, 15 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00000x): 6.111e-08s -> 6.111e-08s, so IGNORING this instance. -Split (1 -> 1 leaves, 1 -> 1 subms) took 4.053e-06s (of which: 9.537e-07s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (15 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 90.39/ 50.00 %); best 6.111e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). -Reference operation time is 6.11067e-08 s (32.73 Mflops) with 15 threads. -After split step 3: tpop: 6.111e-08 s ~Mflops: 32.730 nsubm:1 otn:15 -Applying split (1 -> 1 leaves, 15 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00000x): 6.111e-08s -> 6.111e-08s, so IGNORING this instance. -Split (1 -> 1 leaves, 1 -> 1 subms) took 8.106e-06s (of which: 9.537e-07s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (15 th.) took 9.537e-07s; avg 3.179e-07s ( +/- 80.78/200.00 %); best 6.111e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). -Reference operation time is 6.11067e-08 s (32.73 Mflops) with 15 threads. -After split step 4: tpop: 6.111e-08 s ~Mflops: 32.730 nsubm:1 otn:15 -Applying split (1 -> 1 leaves, 15 th.) yielded NEGLIGIBLE change (4th in a row) (old/new=1.00000x): 6.111e-08s -> 6.111e-08s, so IGNORING this instance. -Split (1 -> 1 leaves, 1 -> 1 subms) took 2.861e-06s (of which: 9.537e-07s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (15 th.) took 9.537e-07s; avg 3.179e-07s ( +/- 80.78/200.00 %); best 6.111e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). -Reference operation time is 6.11067e-08 s (32.73 Mflops) with 15 threads. -After split step 5: tpop: 6.111e-08 s ~Mflops: 32.730 nsubm:1 otn:15 -Applying split (1 -> 1 leaves, 15 th.) yielded NEGLIGIBLE change (5th in a row) (old/new=1.00000x): 6.111e-08s -> 6.111e-08s, so IGNORING this instance. +3 iterations (16 th.) took 7.868e-06s; avg 2.623e-06s ( +/- 97.78/163.64 %); best 5.831e-08s; worst 6.914e-06s; std dev. 3.059e-06 (taking best). +Reference operation time is 5.83053e-08 s (34.3 Mflops) with 16 threads. +Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz (tpop: 5.831e-08 Mflops: 34.302) +Split (1 -> 1 leaves, 1 -> 1 subms) took 2.408e-05s (of which: 5.007e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (16 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 90.83/ 50.00 %); best 5.831e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 5.83053e-08 s (34.3 Mflops) with 16 threads. +After split step 1: tpop: 5.831e-08 s ~Mflops: 34.302 nsubm:1 otn:16 +Applying split (1 -> 1 leaves, 16 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00000x): 5.831e-08s -> 5.831e-08s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 3.099e-06s (of which: 9.537e-07s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (16 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 93.89/100.00 %); best 5.831e-08s; worst 1.907e-06s; std dev. 7.787e-07 (taking best). +Reference operation time is 5.83053e-08 s (34.3 Mflops) with 16 threads. +After split step 2: tpop: 5.831e-08 s ~Mflops: 34.302 nsubm:1 otn:16 +Applying split (1 -> 1 leaves, 16 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00000x): 5.831e-08s -> 5.831e-08s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 1.097e-05s (of which: 9.537e-07s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (16 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 90.83/ 50.00 %); best 5.831e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 5.83053e-08 s (34.3 Mflops) with 16 threads. +After split step 3: tpop: 5.831e-08 s ~Mflops: 34.302 nsubm:1 otn:16 +Applying split (1 -> 1 leaves, 16 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00000x): 5.831e-08s -> 5.831e-08s, so IGNORING this instance. Split (1 -> 1 leaves, 1 -> 1 subms) took 2.146e-06s (of which: 0s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (15 th.) took 1.192e-06s; avg 3.974e-07s ( +/- 84.62/200.00 %); best 6.111e-08s; worst 1.192e-06s; std dev. 5.62e-07 (taking best). -Reference operation time is 6.11067e-08 s (32.73 Mflops) with 15 threads. -After split step 6: tpop: 6.111e-08 s ~Mflops: 32.730 nsubm:1 otn:15 -Applying split (1 -> 1 leaves, 15 th.) yielded NEGLIGIBLE change (6th in a row) (old/new=1.00000x): 6.111e-08s -> 6.111e-08s, so IGNORING this instance. -A total of 6 split steps (of max 6) (1 -> 1 subms) took 0.000422s (of which 0.000349s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 5.007e-06s analyzing) -Total split + benchmarking process took 0.000422s, equivalent to 6906.0/6906.0 new/old ops (0.04792s for 1 clones -- as 784284.0/784284.0 ops, or 784284.0/784284.0 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) -In 1 tuning rounds (tot. 0.096s, 0.096s for constructor, 2 clones) obtained NO speedup (best stays 32.73 Mflops). +3 iterations (16 th.) took 9.537e-07s; avg 3.179e-07s ( +/- 81.66/200.00 %); best 5.831e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 5.83053e-08 s (34.3 Mflops) with 16 threads. +After split step 4: tpop: 5.831e-08 s ~Mflops: 34.302 nsubm:1 otn:16 +Applying split (1 -> 1 leaves, 16 th.) yielded NEGLIGIBLE change (4th in a row) (old/new=1.00000x): 5.831e-08s -> 5.831e-08s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 6.914e-06s (of which: 0s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (16 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 91.85/ 66.67 %); best 5.831e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). +Reference operation time is 5.83053e-08 s (34.3 Mflops) with 16 threads. +After split step 5: tpop: 5.831e-08 s ~Mflops: 34.302 nsubm:1 otn:16 +Applying split (1 -> 1 leaves, 16 th.) yielded NEGLIGIBLE change (5th in a row) (old/new=1.00000x): 5.831e-08s -> 5.831e-08s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 3.099e-06s (of which: 0s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (16 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 90.83/ 50.00 %); best 5.831e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). +Reference operation time is 5.83053e-08 s (34.3 Mflops) with 16 threads. +After split step 6: tpop: 5.831e-08 s ~Mflops: 34.302 nsubm:1 otn:16 +Applying split (1 -> 1 leaves, 16 th.) yielded NEGLIGIBLE change (6th in a row) (old/new=1.00000x): 5.831e-08s -> 5.831e-08s, so IGNORING this instance. +A total of 6 split steps (of max 6) (1 -> 1 subms) took 0.000267s (of which 0.0001893s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 6.914e-06s analyzing) +Total split + benchmarking process took 0.000267s, equivalent to 4579.8/4579.8 new/old ops (0.04792s for 1 clones -- as 821930.1/821930.1 ops, or 821930.1/821930.1 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +In 1 tuning rounds (tot. 0.096s, 0.096s for constructor, 2 clones) obtained NO speedup (best stays 34.3 Mflops). Backsolving we should get a unitary vector: %%MatrixMarket matrix array real general @@ -8578,173 +8618,206 @@ /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/fortran Building a matrix with 210 nnz, 20 x 20 Duplicates check: 210 - 0 = 210 - converted COO to RSB in 1.951e-01 s (100.00 %) - analyzed arrays in 5.992e-02 s (30.71 %) + converted COO to RSB in 1.270e-01 s (100.00 %) + analyzed arrays in 3.986e-02 s (31.39 %) cleaned-up arrays in 3.099e-06 s (0.00 %) - deduplicated arrays in 2.861e-06 s (0.00 %) - sorted arrays in 3.907e-02 s (20.02 %) - shuffled partitions in 4.799e-02 s (24.60 %) - memory allocations took 1.028e-04 s (0.05 %) - leafs setup took 1.192e-05 s (0.01 %) - halfword conversion took 4.798e-02 s (24.59 %) -Built (20 x 20)[0x56416ed47580]{D} @ (0(0..0),0(0..0)) (210 nnz, 10 nnz/r) flags 0x2446396 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'LS' + deduplicated arrays in 2.146e-06 s (0.00 %) + sorted arrays in 2.304e-02 s (18.15 %) + shuffled partitions in 3.200e-02 s (25.20 %) + memory allocations took 4.697e-05 s (0.04 %) + leafs setup took 5.960e-06 s (0.00 %) + halfword conversion took 3.199e-02 s (25.20 %) +Built (20 x 20)[0x55a098303580]{D} @ (0(0..0),0(0..0)) (210 nnz, 10 nnz/r) flags 0x2446396 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'LS' Will autotune matrix: 20 x 20, type D, 210 nnz, 10 nnz/r, 30 subms, 22 lsubms, 3.7524 bpnz. -Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:9.05e-08 -Starting autotuning (16 x 9.05037e-08 s stages, transA=N, nrhs=1, timer gran.=9.05037e-08), 15 suggested as starting thread count(default). -3 iterations (15 th.) took 0.05999s; avg 0.02s ( +/- 20.00/ 20.00 %); best 0.016s; worst 0.024s; std dev. 0.003265 (taking best). -Reference operation time is 0.0159991 s (0.0525 Mflops) with 15 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 30 subms, 22 lsubms, 3.7524 bpnz (tpop: 0.016 Mflops: 0.053) -Merge (22 -> 16 leaves) took w.c.t. of 0.004519s, ~0.0008547s of computing time (of which 5.007e-06s sorting, 1.097e-05s analysis) -3 iterations (15 th.) took 0.05941s; avg 0.0198s ( +/- 19.43/ 38.56 %); best 0.01595s; worst 0.02744s; std dev. 0.0054 (taking best). -Reference operation time is 0.015954 s (0.05265 Mflops) with 15 threads. -After merge step 1: tpop: 0.01595 s ~Mflops: 0.053 nsubm:16 otn:15 -Applying merge (22 -> 16 leaves, 15 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00282x): 0.016s -> 0.01595s, so IGNORING this instance. -Merge (16 -> 10 leaves) took w.c.t. of 0.01223s, ~3.123e-05s of computing time (of which 3.099e-06s sorting, 1.216e-05s analysis) -3 iterations (15 th.) took 0.09172s; avg 0.03057s ( +/- 21.77/ 43.25 %); best 0.02392s; worst 0.0438s; std dev. 0.00935 (taking best). -Reference operation time is 0.023917 s (0.03512 Mflops) with 15 threads. -After merge step 2: tpop: 0.02392 s ~Mflops: 0.035 nsubm:10 otn:15 -Applying merge (16 -> 10 leaves, 15 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.495x: 0.016s -> 0.02392s. -Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 2 merge steps (of max 6) (22 -> 10 subms) took 0.168s (of which 0.01678s partitioning, 0s I/O); computing times: 0.000886s in par. loops, 8.106e-06s sorting, 2.313e-05s analyzing) -Total merge + benchmarking process took 0.168s, equivalent to 10.5/10.5 new/old ops (0.07186s for 1 clones -- as 4.5/4.5 ops, or 4.5/4.5 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) -Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... -3 iterations (15 th.) took 0.05997s; avg 0.01999s ( +/- 20.00/ 39.96 %); best 0.01599s; worst 0.02798s; std dev. 0.005648 (taking best). -Reference operation time is 0.0159919 s (0.05253 Mflops) with 15 threads. -Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 30 subms, 22 lsubms, 3.7524 bpnz (tpop: 0.01599 Mflops: 0.053) -Split (22 -> 51 leaves, 30 -> 70 subms) took 0.03193s (of which: 9.06e-06s analysis, -1.876e+10s mem.mgmt); compute time: 0.1834s overall, 1.287e-05s searches, 0.1834s shuffle, 0.182s switch, 0.0002592s quadrants. -3 iterations (15 th.) took 0.05591s; avg 0.01864s ( +/- 14.16/ 28.29 %); best 0.016s; worst 0.02391s; std dev. 0.003728 (taking best). -Reference operation time is 0.0159972 s (0.05251 Mflops) with 15 threads. -After split step 1: tpop: 0.016 s ~Mflops: 0.053 nsubm:51 otn:15 -Applying split (22 -> 51 leaves, 15 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=0.99967x): 0.01599s -> 0.016s, so IGNORING this instance. -Split (51 -> 122 leaves, 70 -> 166 subms) took 0.01958s (of which: 1.502e-05s analysis, -4.263e+10s mem.mgmt); compute time: 0.002699s overall, 1.24e-05s searches, 0.002686s shuffle, 0.001189s switch, 0.0003273s quadrants. -3 iterations (15 th.) took 0.04798s; avg 0.01599s ( +/- 0.31/ 0.26 %); best 0.01594s; worst 0.01604s; std dev. 3.799e-05 (taking best). -Reference operation time is 0.015944 s (0.05268 Mflops) with 15 threads. -After split step 2: tpop: 0.01594 s ~Mflops: 0.053 nsubm:122 otn:15 -Applying split (51 -> 122 leaves, 15 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00301x): 0.01599s -> 0.01594s, so IGNORING this instance. -Split (122 -> 146 leaves, 166 -> 198 subms) took 0.02778s (of which: 2.694e-05s analysis, -1.364e+10s mem.mgmt); compute time: 0.001025s overall, 1.025e-05s searches, 0.001015s shuffle, 0.0003989s switch, 0.0001338s quadrants. -3 iterations (15 th.) took 0.04791s; avg 0.01597s ( +/- 0.40/ 0.22 %); best 0.01591s; worst 0.01601s; std dev. 4.575e-05 (taking best). -Reference operation time is 0.015907 s (0.05281 Mflops) with 15 threads. -After split step 3: tpop: 0.01591 s ~Mflops: 0.053 nsubm:146 otn:15 -Applying split (122 -> 146 leaves, 15 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00534x): 0.01599s -> 0.01591s, so IGNORING this instance. -Split (146 -> 146 leaves, 198 -> 198 subms) took 0.01959s (of which: 3.505e-05s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (15 th.) took 0.04813s; avg 0.01604s ( +/- 0.12/ 0.10 %); best 0.01602s; worst 0.01606s; std dev. 1.464e-05 (taking best). -Reference operation time is 0.016022 s (0.05243 Mflops) with 15 threads. -After split step 4: tpop: 0.01602 s ~Mflops: 0.052 nsubm:146 otn:15 -Applying split (146 -> 146 leaves, 15 th.) yielded NEGLIGIBLE change (4th in a row) (old/new=0.99813x): 0.01599s -> 0.01602s, so IGNORING this instance. -Split (146 -> 146 leaves, 198 -> 198 subms) took 0.02773s (of which: 2.813e-05s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (15 th.) took 0.0479s; avg 0.01597s ( +/- 0.39/ 0.20 %); best 0.01591s; worst 0.016s; std dev. 4.423e-05 (taking best). -Reference operation time is 0.0159051 s (0.05281 Mflops) with 15 threads. -After split step 5: tpop: 0.01591 s ~Mflops: 0.053 nsubm:146 otn:15 -Applying split (146 -> 146 leaves, 15 th.) yielded NEGLIGIBLE change (5th in a row) (old/new=1.00546x): 0.01599s -> 0.01591s, so IGNORING this instance. -Split (146 -> 146 leaves, 198 -> 198 subms) took 0.02041s (of which: 3.004e-05s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (15 th.) took 0.04772s; avg 0.01591s ( +/- 1.54/ 1.16 %); best 0.01566s; worst 0.01609s; std dev. 0.0001803 (taking best). -Reference operation time is 0.0156622 s (0.05363 Mflops) with 15 threads. -After split step 6: tpop: 0.01566 s ~Mflops: 0.054 nsubm:146 otn:15 -Applying split (146 -> 146 leaves, 15 th.) yielded SPEEDUP of 1.021x: 0.01599s -> 0.01566s, so taking this instance. -A total of 6 split steps (of max 6) (22 -> 146 subms) took 0.492s (of which 0.1481s partitioning, 0s I/O); computing times: 0.1872s in par. loops, 3.552e-05s sorting, 0.0001442s analyzing) -Total split + benchmarking process took 0.492s, equivalent to 31.4/30.8 new/old ops (0.1197s for 2 clones -- as 7.6/7.5 ops, or 3.8/3.7 ops per clone), SPEEDUP of 1.021x -Applying multi-split (22 -> 146 leaves, 6 steps, 0 -> 15 th.sp.) yielded SPEEDUP of 1.021x (0.01599s -> 0.01566s), will amortize in 1492.2 ops by saving 0.0003297s per op. -In 1 tuning rounds (tot. 0.92s, 0.19s for constructor, 3 clones) obtained a SPEEDUP of 2.1% (1.021x) (from 0.05253 to 0.05363 Mflops). - autotuner chose 15 threads -Will autotune matrix: 20 x 20, type D, 210 nnz, 10 nnz/r, 198 subms, 146 lsubms, 4.2286 bpnz. -Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:9.05e-08 -Starting autotuning (16 x 9.05037e-08 s stages, transA=N, nrhs=1, timer gran.=9.05037e-08), 15 suggested as starting thread count(default). -3 iterations (15 th.) took 0.04799s; avg 0.016s ( +/- 0.06/ 0.05 %); best 0.01599s; worst 0.016s; std dev. 7.456e-06 (taking best). -~ 15 threads: 0.01599s (0.053 Mflops) (0/2 degradations so far) - -3 iterations (14 th.) took 0.04798s; avg 0.01599s ( +/- 0.21/ 0.16 %); best 0.01596s; worst 0.01602s; std dev. 2.473e-05 (taking best). - 14 threads: 0.01596s (0.053 Mflops) (0/2 degradations so far) - -3 iterations (13 th.) took 0.04795s; avg 0.01598s ( +/- 0.18/ 0.16 %); best 0.01595s; worst 0.01601s; std dev. 2.245e-05 (taking best). - 13 threads: 0.01595s (0.053 Mflops) (0/2 degradations so far) - -3 iterations (12 th.) took 0.04402s; avg 0.01467s ( +/- 17.91/ 9.00 %); best 0.01204s; worst 0.01599s; std dev. 0.001858 (taking best). - 12 threads: 0.01204s (0.07 Mflops) (0/2 degradations so far) - -3 iterations (11 th.) took 0.04796s; avg 0.01599s ( +/- 0.14/ 0.13 %); best 0.01596s; worst 0.01601s; std dev. 1.756e-05 (taking best). - 11 threads: 0.01596s (0.053 Mflops) (1/2 degradations so far) - -3 iterations (10 th.) took 0.04796s; avg 0.01599s ( +/- 0.12/ 0.13 %); best 0.01597s; worst 0.01601s; std dev. 1.631e-05 (taking best). - 10 threads: 0.01597s (0.053 Mflops) (2/2 degradations so far) - -Best threads choice is 12; starting threads were 15; max speed gap is 1.3x; search took 0.28s. -Starting merge (and threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 198 subms, 146 lsubms, 4.2286 bpnz (tpop: 0.01204 Mflops: 0.070) -Merge (146 -> 99 leaves) took w.c.t. of 0.01596s, ~0.0003295s of computing time (of which 2.933e-05s sorting, 2.193e-05s analysis) -3 iterations (15 th.) took 0.04798s; avg 0.01599s ( +/- 0.16/ 0.20 %); best 0.01597s; worst 0.01603s; std dev. 2.404e-05 (taking best). -~ 15 threads: 0.01597s (0.053 Mflops) (0/2 degradations so far) - -3 iterations (14 th.) took 0.04792s; avg 0.01597s ( +/- 0.36/ 0.24 %); best 0.01592s; worst 0.01601s; std dev. 4.131e-05 (taking best). - 14 threads: 0.01592s (0.053 Mflops) (0/2 degradations so far) - -3 iterations (13 th.) took 0.05194s; avg 0.01731s ( +/- 7.93/ 15.47 %); best 0.01594s; worst 0.01999s; std dev. 0.001894 (taking best). - 13 threads: 0.01594s (0.053 Mflops) (1/2 degradations so far) - -3 iterations (12 th.) took 0.04795s; avg 0.01598s ( +/- 0.22/ 0.12 %); best 0.01595s; worst 0.016s; std dev. 2.508e-05 (taking best). - 12 threads: 0.01595s (0.053 Mflops) (2/2 degradations so far) - -Best threads choice is 14; starting threads were 15; max speed gap is 1x; search took 0.2s. -After merge step 1: tpop: 0.01592 s ~Mflops: 0.053 nsubm:99 otn:14 -Applying merge (146 -> 99 leaves, 14 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.321x: 0.01204s -> 0.01592s. -Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 1 merge steps (of max 6) (146 -> 99 subms) took 0.2119s (of which 0.01598s partitioning, 0s I/O); computing times: 0.0003295s in par. loops, 2.933e-05s sorting, 2.193e-05s analyzing) -Total merge + benchmarking process took 0.2119s, equivalent to 17.6/17.6 new/old ops (0.04792s for 1 clones -- as 4.0/4.0 ops, or 4.0/4.0 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:7.33e-08 +Starting autotuning (16 x 7.33018e-08 s stages, transA=N, nrhs=1, timer gran.=7.33018e-08), 16 suggested as starting thread count(default). +3 iterations (16 th.) took 0.04799s; avg 0.016s ( +/- 0.01/ 0.02 %); best 0.01599s; worst 0.016s; std dev. 2.245e-06 (taking best). +Reference operation time is 0.0159938 s (0.05252 Mflops) with 16 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 30 subms, 22 lsubms, 3.7524 bpnz (tpop: 0.01599 Mflops: 0.053) +Merge (22 -> 16 leaves) took w.c.t. of 0.008456s, ~0.008896s of computing time (of which 4.292e-06s sorting, 5.007e-06s analysis) +3 iterations (16 th.) took 0.03553s; avg 0.01184s ( +/- 32.68/ 64.96 %); best 0.007974s; worst 0.01954s; std dev. 0.005441 (taking best). +Reference operation time is 0.00797415 s (0.1053 Mflops) with 16 threads. +After merge step 1: tpop: 0.007974 s ~Mflops: 0.105 nsubm:16 otn:16 +Applying merge (22 -> 16 leaves, 16 th.) yielded SPEEDUP of 2.006x: 0.01599s -> 0.007974s, so taking this instance. +Merge (16 -> 10 leaves) took w.c.t. of 0.005212s, ~8.106e-06s of computing time (of which 2.384e-06s sorting, 4.053e-06s analysis) +3 iterations (16 th.) took 0.04274s; avg 0.01425s ( +/- 44.08/ 59.62 %); best 0.007966s; worst 0.02274s; std dev. 0.006231 (taking best). +Reference operation time is 0.00796604 s (0.1054 Mflops) with 16 threads. +After merge step 2: tpop: 0.007966 s ~Mflops: 0.105 nsubm:10 otn:16 +Applying merge (16 -> 10 leaves, 16 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00102x): 0.007974s -> 0.007966s, so IGNORING this instance. +Merge (10 -> 8 leaves) took w.c.t. of 2.694e-05s, ~1.001e-05s of computing time (of which 2.146e-06s sorting, 7.868e-06s analysis) +3 iterations (16 th.) took 0.02393s; avg 0.007975s ( +/- 0.45/ 0.44 %); best 0.007939s; worst 0.00801s; std dev. 2.892e-05 (taking best). +Reference operation time is 0.0079391 s (0.1058 Mflops) with 16 threads. +After merge step 3: tpop: 0.007939 s ~Mflops: 0.106 nsubm:8 otn:16 +Applying merge (10 -> 8 leaves, 16 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00441x): 0.007974s -> 0.007939s, so IGNORING this instance. +Merge (8 -> 6 leaves) took w.c.t. of 1.097e-05s, ~4.053e-06s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) +3 iterations (16 th.) took 0.02397s; avg 0.007989s ( +/- 0.24/ 0.14 %); best 0.00797s; worst 0.008s; std dev. 1.38e-05 (taking best). +Reference operation time is 0.00796986 s (0.1054 Mflops) with 16 threads. +After merge step 4: tpop: 0.00797 s ~Mflops: 0.105 nsubm:6 otn:16 +Applying merge (8 -> 6 leaves, 16 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00054x): 0.007974s -> 0.00797s, so IGNORING this instance. +Merge (6 -> 3 leaves) took w.c.t. of 1.001e-05s, ~5.96e-06s of computing time (of which 1.907e-06s sorting, 9.537e-07s analysis) +3 iterations (16 th.) took 0.02398s; avg 0.007993s ( +/- 15.17/ 15.09 %); best 0.00678s; worst 0.009199s; std dev. 0.0009877 (taking best). +Reference operation time is 0.00677991 s (0.1239 Mflops) with 16 threads. +After merge step 5: tpop: 0.00678 s ~Mflops: 0.124 nsubm:3 otn:16 +Applying merge (6 -> 3 leaves, 16 th.) yielded SPEEDUP of 1.176x: 0.007974s -> 0.00678s, so taking this instance. +Merge (3 -> 1 leaves) took w.c.t. of 1.311e-05s, ~6.914e-06s of computing time (of which 1.907e-06s sorting, 9.537e-07s analysis) +3 iterations (16 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 42.86/ 71.43 %); best 9.537e-07s; worst 2.861e-06s; std dev. 8.485e-07 (taking best). +Reference operation time is 9.53674e-07 s (880.8 Mflops) with 16 threads. +After merge step 6: tpop: 9.537e-07 s ~Mflops: 880.804 nsubm:1 otn:16 +Applying merge (3 -> 1 leaves, 16 th.) yielded SPEEDUP of 7109.250x: 0.00678s -> 9.537e-07s, so taking this instance. +Merged all the matrix leaves: no reason to continue merging. +A total of 6 merge steps (of max 6) (22 -> 1 subms) took 0.244s (of which 0.01377s partitioning, 0s I/O); computing times: 0.008931s in par. loops, 1.359e-05s sorting, 1.979e-05s analyzing) +Total merge + benchmarking process took 0.244s, equivalent to 255883.0/15.3 new/old ops (0.1277s for 4 clones -- as 133943.5/8.0 ops, or 33485.9/2.0 ops per clone), SPEEDUP of 16770.750x +Applying multi-merge (22 -> 1 leaves, 6 steps, 0 -> 16 th.sp.) yielded SPEEDUP of 16770.750x (0.01599s -> 9.537e-07s), will amortize in 15.3 ops by saving 0.01599s per op. +In 1 tuning rounds (tot. 0.34s, 0.13s for constructor, 4 clones) obtained a SPEEDUP of 1676975.0% (1.677e+04x) (from 0.05252 to 880.8 Mflops). + autotuner chose 16 threads +Will autotune matrix: 20 x 20, type D, 210 nnz, 10 nnz/r, 1 subms, 1 lsubms, 2.4000 bpnz. +Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:7.33e-08 +Starting autotuning (16 x 7.33018e-08 s stages, transA=N, nrhs=1, timer gran.=7.33018e-08), 16 suggested as starting thread count(default). +3 iterations (16 th.) took 1.192e-05s; avg 3.974e-06s ( +/- 70.00/122.00 %); best 1.192e-06s; worst 8.821e-06s; std dev. 3.44e-06 (taking best). +~ 16 threads: 1.192e-06s (7e+02 Mflops) (0/2 degradations so far) - +3 iterations (15 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 15 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (14 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 14 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (13 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 13 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (12 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 12 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (11 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 11 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (10 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 10 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (9 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 9 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (8 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 8 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (7 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 7 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (6 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 6 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (5 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 5 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (4 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 4 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (3 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 3 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (2 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 2 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (1 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 1 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +Best threads choice is 15; starting threads were 16; max speed gap is 1.2x; search took 0.0002s. +Starting merge (and threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 1 subms, 1 lsubms, 2.4000 bpnz (tpop: 9.537e-07 Mflops: 880.804) +Merge (1 -> 1 leaves) took w.c.t. of 9.537e-07s, ~0s of computing time (of which 0s sorting, 0s analysis) +3 iterations (16 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 42.86/ 85.71 %); best 9.537e-07s; worst 3.099e-06s; std dev. 1.012e-06 (taking best). +~ 16 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (15 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 15 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (14 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 14 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (13 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 13 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (12 th.) took 3.815e-06s; avg 1.272e-06s ( +/- 25.00/ 50.00 %); best 9.537e-07s; worst 1.907e-06s; std dev. 4.496e-07 (taking best). + 12 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (11 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 11 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (10 th.) took 4.053e-06s; avg 1.351e-06s ( +/- 29.41/ 41.18 %); best 9.537e-07s; worst 1.907e-06s; std dev. 4.052e-07 (taking best). + 10 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (9 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 9 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (8 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 8 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (7 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 7 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (6 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 6 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (5 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 5 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (4 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 4 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (3 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 3 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (2 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 2 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (1 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 1 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +Best threads choice is 16; starting threads were 16; max speed gap is 1x; search took 0.00018s. +After merge step 1: tpop: 9.537e-07 s ~Mflops: 880.804 nsubm:1 otn:16 +Applying merge (1 -> 1 leaves, 16 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00000x): 9.537e-07s -> 9.537e-07s, so IGNORING this instance. +Merged all the matrix leaves: no reason to continue merging. +A total of 1 merge steps (of max 6) (1 -> 1 subms) took 0.0002899s (of which 0.000103s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 0s analyzing) +Total merge + benchmarking process took 0.0002899s, equivalent to 304.0/304.0 new/old ops (0.02771s for 1 clones -- as 29051.8/29051.8 ops, or 29051.8/29051.8 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... -3 iterations (15 th.) took 0.04802s; avg 0.01601s ( +/- 0.08/ 0.12 %); best 0.01599s; worst 0.01602s; std dev. 1.367e-05 (taking best). -~ 15 threads: 0.01599s (0.053 Mflops) (0/2 degradations so far) - -3 iterations (14 th.) took 0.04793s; avg 0.01598s ( +/- 0.24/ 0.12 %); best 0.01594s; worst 0.016s; std dev. 2.731e-05 (taking best). - 14 threads: 0.01594s (0.053 Mflops) (0/2 degradations so far) - -3 iterations (13 th.) took 0.05199s; avg 0.01733s ( +/- 7.78/ 15.41 %); best 0.01598s; worst 0.02s; std dev. 0.001888 (taking best). - 13 threads: 0.01598s (0.053 Mflops) (1/2 degradations so far) - -3 iterations (12 th.) took 0.04761s; avg 0.01587s ( +/- 1.75/ 0.96 %); best 0.01559s; worst 0.01602s; std dev. 0.0001964 (taking best). - 12 threads: 0.01559s (0.054 Mflops) (0/2 degradations so far) - -3 iterations (11 th.) took 0.04794s; avg 0.01598s ( +/- 0.12/ 0.13 %); best 0.01596s; worst 0.016s; std dev. 1.636e-05 (taking best). - 11 threads: 0.01596s (0.053 Mflops) (1/2 degradations so far) - -3 iterations (10 th.) took 0.04798s; avg 0.01599s ( +/- 0.08/ 0.07 %); best 0.01598s; worst 0.016s; std dev. 9.919e-06 (taking best). - 10 threads: 0.01598s (0.053 Mflops) (2/2 degradations so far) - -Best threads choice is 12; starting threads were 15; max speed gap is 1x; search took 0.29s. -Starting split (and threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 198 subms, 146 lsubms, 4.2286 bpnz (tpop: 0.01559 Mflops: 0.054) -Split (146 -> 146 leaves, 198 -> 198 subms) took 0.02375s (of which: 3.004e-05s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (15 th.) took 0.04788s; avg 0.01596s ( +/- 0.39/ 0.27 %); best 0.0159s; worst 0.016s; std dev. 4.493e-05 (taking best). -~ 15 threads: 0.0159s (0.053 Mflops) (0/2 degradations so far) - -3 iterations (14 th.) took 0.04798s; avg 0.01599s ( +/- 0.03/ 0.02 %); best 0.01599s; worst 0.016s; std dev. 3.252e-06 (taking best). - 14 threads: 0.01599s (0.053 Mflops) (1/2 degradations so far) - -3 iterations (13 th.) took 0.04397s; avg 0.01466s ( +/- 18.14/ 9.18 %); best 0.012s; worst 0.016s; std dev. 0.00188 (taking best). - 13 threads: 0.012s (0.07 Mflops) (0/2 degradations so far) - -3 iterations (12 th.) took 0.04797s; avg 0.01599s ( +/- 0.15/ 0.09 %); best 0.01596s; worst 0.016s; std dev. 1.704e-05 (taking best). - 12 threads: 0.01596s (0.053 Mflops) (1/2 degradations so far) - -3 iterations (11 th.) took 0.04799s; avg 0.016s ( +/- 0.18/ 0.10 %); best 0.01597s; worst 0.01601s; std dev. 2.042e-05 (taking best). - 11 threads: 0.01597s (0.053 Mflops) (2/2 degradations so far) - -Best threads choice is 13; starting threads were 15; max speed gap is 1.3x; search took 0.24s. -After split step 1: tpop: 0.012 s ~Mflops: 0.070 nsubm:146 otn:13 -Applying split (146 -> 146 leaves, 13 th.) yielded SPEEDUP of 1.300x: 0.01559s -> 0.012s, so taking this instance. -Split (146 -> 146 leaves, 198 -> 198 subms) took 0.02781s (of which: 2.503e-05s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. -3 iterations (15 th.) took 0.04791s; avg 0.01597s ( +/- 0.19/ 0.22 %); best 0.01594s; worst 0.016s; std dev. 2.716e-05 (taking best). -~ 15 threads: 0.01594s (0.053 Mflops) (0/2 degradations so far) - -3 iterations (14 th.) took 0.04796s; avg 0.01599s ( +/- 0.27/ 0.22 %); best 0.01594s; worst 0.01602s; std dev. 3.268e-05 (taking best). - 14 threads: 0.01594s (0.053 Mflops) (1/2 degradations so far) - -3 iterations (13 th.) took 0.04796s; avg 0.01599s ( +/- 0.17/ 0.13 %); best 0.01596s; worst 0.01601s; std dev. 1.992e-05 (taking best). - 13 threads: 0.01596s (0.053 Mflops) (2/2 degradations so far) - -Best threads choice is 15; starting threads were 15; max speed gap is 1x; search took 0.14s. -After split step 2: tpop: 0.01594 s ~Mflops: 0.053 nsubm:146 otn:15 -Applying split (146 -> 146 leaves, 15 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.329x: 0.012s -> 0.01594s. +3 iterations (16 th.) took 4.053e-06s; avg 1.351e-06s ( +/- 29.41/ 41.18 %); best 9.537e-07s; worst 1.907e-06s; std dev. 4.052e-07 (taking best). +~ 16 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (15 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 15 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (14 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 14 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (13 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 13 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (12 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 12 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (11 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 11 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (10 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 10 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (9 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 9 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (8 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 8 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - +3 iterations (7 th.) took 2.146e-06s; avg 7.153e-07s ( +/- 89.75/ 66.67 %); best 7.33e-08s; worst 1.192e-06s; std dev. 5.15e-07 (taking best). + 7 threads: 7.33e-08s (1.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (6 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 6 threads: 9.537e-07s (8.8e+02 Mflops) (1/2 degradations so far) - +3 iterations (5 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 88.47/ 50.00 %); best 7.33e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 5 threads: 7.33e-08s (1.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (4 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 88.47/ 50.00 %); best 7.33e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). + 4 threads: 7.33e-08s (1.1e+04 Mflops) (0/2 degradations so far) - +3 iterations (3 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). + 3 threads: 9.537e-07s (8.8e+02 Mflops) (1/2 degradations so far) - +3 iterations (2 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). + 2 threads: 9.537e-07s (8.8e+02 Mflops) (2/2 degradations so far) - +Best threads choice is 7; starting threads were 16; max speed gap is 13x; search took 0.00015s. +Starting split (and threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 1 subms, 1 lsubms, 2.4000 bpnz (tpop: 7.33e-08 Mflops: 11459.474) +Split (1 -> 3 leaves, 1 -> 4 subms) took 3.91e-05s (of which: 2.146e-06s analysis, -1.74e+09s mem.mgmt); compute time: 1.693e-05s overall, 4.053e-06s searches, 1.287e-05s shuffle, 4.053e-06s switch, 9.537e-07s quadrants. +3 iterations (16 th.) took 0.03178s; avg 0.01059s ( +/- 24.52/ 13.26 %); best 0.007997s; worst 0.012s; std dev. 0.001839 (taking best). +~ 16 threads: 0.007997s (0.11 Mflops) (0/2 degradations so far) - +3 iterations (15 th.) took 0.03173s; avg 0.01058s ( +/- 27.02/ 13.56 %); best 0.007718s; worst 0.01201s; std dev. 0.002021 (taking best). + 15 threads: 0.007718s (0.11 Mflops) (0/2 degradations so far) - +3 iterations (14 th.) took 0.02797s; avg 0.009324s ( +/- 14.41/ 28.64 %); best 0.007981s; worst 0.01199s; std dev. 0.001888 (taking best). + 14 threads: 0.007981s (0.11 Mflops) (1/2 degradations so far) - +3 iterations (13 th.) took 0.03599s; avg 0.012s ( +/- 0.01/ 0.02 %); best 0.012s; worst 0.012s; std dev. 1.461e-06 (taking best). + 13 threads: 0.012s (0.07 Mflops) (2/2 degradations so far) - +Best threads choice is 15; starting threads were 16; max speed gap is 1.6x; search took 0.13s. +After split step 1: tpop: 0.007718 s ~Mflops: 0.109 nsubm:3 otn:15 +Applying split (1 -> 3 leaves, 15 th.) yielded SLOWDOWN (1th of 3 tolerable) of 105288.665x: 7.33e-08s -> 0.007718s. Skipping further split based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 2 split steps (of max 6) (146 -> 146 subms) took 0.48s (of which 0.05213s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 5.507e-05s analyzing) -Total split + benchmarking process took 0.48s, equivalent to 40.0/30.8 new/old ops (0.09584s for 2 clones -- as 8.0/6.1 ops, or 4.0/3.1 ops per clone), SPEEDUP of 1.300x -Applying multi-split (146 -> 146 leaves, 1 steps, 12 -> 13 th.sp.) yielded SPEEDUP of 1.300x (0.01559s -> 0.012s), will amortize in 133.4 ops by saving 0.003597s per op. -In 1 tuning rounds (tot. 1.4s, 0.14s for constructor, 3 clones) obtained a SPEEDUP of 30.0% (1.3x) (from 0.05387 to 0.07002 Mflops). +A total of 1 split steps (of max 6) (1 -> 3 subms) took 0.1279s (of which 5.388e-05s partitioning, 0s I/O); computing times: 1.693e-05s in par. loops, 4.053e-06s sorting, 2.146e-06s analyzing) +Total split + benchmarking process took 0.1279s, equivalent to 1744322.7/1744322.7 new/old ops (0.03143s for 1 clones -- as 428749.4/428749.4 ops, or 428749.4/428749.4 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +In 1 tuning rounds (tot. 0.19s, 0.059s for constructor, 2 clones) obtained NO speedup (best stays 1.146e+04 Mflops). check results are ok Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 5.130e-02 s (100.00 %) - analyzed arrays in 1.996e-02 s (38.91 %) - cleaned-up arrays in 1.907e-06 s (0.00 %) - deduplicated arrays in 1.907e-06 s (0.00 %) - sorted arrays in 1.529e-02 s (29.81 %) - shuffled partitions in 1.601e-02 s (31.21 %) - memory allocations took 1.931e-05 s (0.04 %) - leafs setup took 8.106e-06 s (0.02 %) - halfword conversion took 2.861e-06 s (0.01 %) -Built (6 x 6)[0x56416ed4bb80]{Z} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x20440b4 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'UL' + converted COO to RSB in 3.147e-02 s (100.00 %) + analyzed arrays in 1.599e-02 s (50.82 %) + cleaned-up arrays in 9.537e-07 s (0.00 %) + deduplicated arrays in 9.537e-07 s (0.00 %) + sorted arrays in 7.470e-03 s (23.74 %) + shuffled partitions in 7.998e-03 s (25.41 %) + memory allocations took 4.768e-06 s (0.02 %) + leafs setup took 1.192e-06 s (0.00 %) + halfword conversion took 9.537e-07 s (0.00 %) +Built (6 x 6)[0x55a09830d950]{Z} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x20440b4 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'UL' Read matrix pd.mtx 6 x 6 : 36 Matrix has no symmetry Using NRHS=4 -Repeated USMV took 0.4697E-04 s -A single USMM took 0.2313E-04 s -USMM-to-USMV speed ratio is is 2.031 x +Repeated USMV took 0.2694E-04 s +A single USMM took 0.1693E-04 s +USMM-to-USMV speed ratio is is 1.592 x Call auto-tuning routine.. Repeat measurement. Tuned USMM took 0.3815E-05 s -Tuned-to-untuned speed ratio is is 6.062 x +Tuned-to-untuned speed ratio is is 4.438 x FAILED: 0 PASSED: 2 /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/fortran_rsb_fi @@ -8793,222 +8866,222 @@ Loading matrix from file "/build/reproducible-path/librsb-1.3.0.2+dfsg/pd.mtx". Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 1.831e-01 s (100.00 %) - analyzed arrays in 5.594e-02 s (30.55 %) - cleaned-up arrays in 9.060e-06 s (0.00 %) - deduplicated arrays in 1.907e-06 s (0.00 %) - sorted arrays in 3.111e-02 s (16.99 %) - shuffled partitions in 4.799e-02 s (26.21 %) - memory allocations took 2.694e-05 s (0.01 %) - leafs setup took 1.192e-05 s (0.01 %) - halfword conversion took 4.799e-02 s (26.21 %) -Built (6 x 6)[0x5619470245b0]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' + converted COO to RSB in 1.262e-01 s (100.00 %) + analyzed arrays in 3.998e-02 s (31.67 %) + cleaned-up arrays in 1.907e-06 s (0.00 %) + deduplicated arrays in 1.192e-06 s (0.00 %) + sorted arrays in 2.222e-02 s (17.60 %) + shuffled partitions in 3.200e-02 s (25.36 %) + memory allocations took 1.073e-05 s (0.01 %) + leafs setup took 5.007e-06 s (0.00 %) + halfword conversion took 3.199e-02 s (25.34 %) +Built (6 x 6)[0x55bb9f24b5b0]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' Considering D clone. Base matrix: -(6 x 6)[0x561947029000]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' +(6 x 6)[0x55bb9f250230]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' -Will use autotuning routine to sample matrix: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. +Will use autotuning routine to sample matrix: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.2778 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (15 th.) took 0.04796s; avg 0.01599s ( +/- 0.31/ 0.33 %); best 0.01594s; worst 0.01604s; std dev. 4.213e-05 (taking best). -Reference operation time is 0.015938 s (0.009035 Mflops) with 15 threads. -After 0.048067s, autotuning routine did not find a better threads count configuration. +3 iterations (16 th.) took 0.04796s; avg 0.01599s ( +/- 0.20/ 0.11 %); best 0.01595s; worst 0.016s; std dev. 2.22e-05 (taking best). +Reference operation time is 0.015954 s (0.009026 Mflops) with 16 threads. +After 0.048001s, autotuning routine did not find a better threads count configuration. -Will autotune matrix: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. +Will autotune matrix: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.2778 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (15 th.) took 0.04801s; avg 0.016s ( +/- 0.28/ 0.24 %); best 0.01596s; worst 0.01604s; std dev. 3.38e-05 (taking best). -Reference operation time is 0.0159581 s (0.009024 Mflops) with 15 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.01596 Mflops: 0.009) -Merge (22 -> 16 leaves) took w.c.t. of 0.008511s, ~0.000366s of computing time (of which 3.815e-06s sorting, 1.121e-05s analysis) -3 iterations (15 th.) took 0.0714s; avg 0.0238s ( +/- 47.46/ 48.80 %); best 0.01251s; worst 0.03542s; std dev. 0.009357 (taking best). -Reference operation time is 0.0125051 s (0.01152 Mflops) with 15 threads. -After merge step 1: tpop: 0.01251 s ~Mflops: 0.012 nsubm:16 otn:15 -Applying merge (22 -> 16 leaves, 15 th.) yielded SPEEDUP of 1.276x: 0.01596s -> 0.01251s, so taking this instance. -Merge (16 -> 10 leaves) took w.c.t. of 0.004007s, ~5.817e-05s of computing time (of which 1.407e-05s sorting, 1.001e-05s analysis) -3 iterations (15 th.) took 0.05996s; avg 0.01999s ( +/- 32.31/ 52.46 %); best 0.01353s; worst 0.03047s; std dev. 0.00748 (taking best). -Reference operation time is 0.01353 s (0.01064 Mflops) with 15 threads. -After merge step 2: tpop: 0.01353 s ~Mflops: 0.011 nsubm:10 otn:15 -Applying merge (16 -> 10 leaves, 15 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.082x: 0.01251s -> 0.01353s. -Merge (10 -> 7 leaves) took w.c.t. of 3.099e-05s, ~1.001e-05s of computing time (of which 9.537e-07s sorting, 9.06e-06s analysis) -3 iterations (15 th.) took 0.04789s; avg 0.01596s ( +/- 0.47/ 0.26 %); best 0.01589s; worst 0.016s; std dev. 5.293e-05 (taking best). -Reference operation time is 0.015887 s (0.009064 Mflops) with 15 threads. -After merge step 3: tpop: 0.01589 s ~Mflops: 0.009 nsubm:7 otn:15 -Applying merge (10 -> 7 leaves, 15 th.) yielded SLOWDOWN (2th of 3 tolerable) of 1.270x: 0.01251s -> 0.01589s. -Skipping further merge based tests after 2 definite performance degradations in a row (and last exceeding limit). -A total of 3 merge steps (of max 6) (22 -> 7 subms) took 0.24s (of which 0.01259s partitioning, 0s I/O); computing times: 0.0004342s in par. loops, 1.884e-05s sorting, 3.028e-05s analyzing) -Total merge + benchmarking process took 0.24s, equivalent to 19.2/15.0 new/old ops (0.09585s for 2 clones -- as 7.7/6.0 ops, or 3.8/3.0 ops per clone), SPEEDUP of 1.276x -Applying multi-merge (22 -> 16 leaves, 1 steps, 0 -> 15 th.sp.) yielded SPEEDUP of 1.276x (0.01596s -> 0.01251s), will amortize in 69.5 ops by saving 0.003453s per op. -In 1 tuning rounds (tot. 0.34s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 27.6% (1.276x) (from 0.009024 to 0.01152 Mflops). -After 0.336067s, global autotuning declared speedup of 1.27613 x, when using threads count of 15 and a new matrix: -(6 x 6)[0x56194702b1b0]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' +3 iterations (16 th.) took 0.048s; avg 0.016s ( +/- 0.01/ 0.01 %); best 0.016s; worst 0.016s; std dev. 8.778e-07 (taking best). +Reference operation time is 0.0159979 s (0.009001 Mflops) with 16 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.2778 bpnz (tpop: 0.016 Mflops: 0.009) +Merge (22 -> 19 leaves) took w.c.t. of 2.408e-05s, ~1.001e-05s of computing time (of which 1.907e-06s sorting, 4.053e-06s analysis) +3 iterations (16 th.) took 0.04796s; avg 0.01599s ( +/- 0.16/ 0.09 %); best 0.01596s; worst 0.016s; std dev. 1.8e-05 (taking best). +Reference operation time is 0.01596 s (0.009023 Mflops) with 16 threads. +After merge step 1: tpop: 0.01596 s ~Mflops: 0.009 nsubm:19 otn:16 +Applying merge (22 -> 19 leaves, 16 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00238x): 0.016s -> 0.01596s, so IGNORING this instance. +Merge (19 -> 16 leaves) took w.c.t. of 1.597e-05s, ~3.099e-06s of computing time (of which 0s sorting, 4.053e-06s analysis) +3 iterations (16 th.) took 0.04797s; avg 0.01599s ( +/- 0.12/ 0.06 %); best 0.01597s; worst 0.016s; std dev. 1.366e-05 (taking best). +Reference operation time is 0.01597 s (0.009017 Mflops) with 16 threads. +After merge step 2: tpop: 0.01597 s ~Mflops: 0.009 nsubm:16 otn:16 +Applying merge (19 -> 16 leaves, 16 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00175x): 0.016s -> 0.01597s, so IGNORING this instance. +Merge (16 -> 13 leaves) took w.c.t. of 1.097e-05s, ~2.861e-06s of computing time (of which 0s sorting, 2.861e-06s analysis) +3 iterations (16 th.) took 0.04797s; avg 0.01599s ( +/- 0.11/ 0.06 %); best 0.01597s; worst 0.016s; std dev. 1.208e-05 (taking best). +Reference operation time is 0.015974 s (0.009015 Mflops) with 16 threads. +After merge step 3: tpop: 0.01597 s ~Mflops: 0.009 nsubm:13 otn:16 +Applying merge (16 -> 13 leaves, 16 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00149x): 0.016s -> 0.01597s, so IGNORING this instance. +Merge (13 -> 10 leaves) took w.c.t. of 1.192e-05s, ~5.007e-06s of computing time (of which 2.146e-06s sorting, 3.099e-06s analysis) +3 iterations (16 th.) took 0.04798s; avg 0.01599s ( +/- 0.09/ 0.07 %); best 0.01598s; worst 0.016s; std dev. 1.077e-05 (taking best). +Reference operation time is 0.0159771 s (0.009013 Mflops) with 16 threads. +After merge step 4: tpop: 0.01598 s ~Mflops: 0.009 nsubm:10 otn:16 +Applying merge (13 -> 10 leaves, 16 th.) yielded NEGLIGIBLE change (4th in a row) (old/new=1.00130x): 0.016s -> 0.01598s, so IGNORING this instance. +Merge (10 -> 7 leaves) took w.c.t. of 1.812e-05s, ~5.007e-06s of computing time (of which 9.537e-07s sorting, 4.053e-06s analysis) +3 iterations (16 th.) took 0.04786s; avg 0.01595s ( +/- 0.57/ 0.29 %); best 0.01586s; worst 0.016s; std dev. 6.418e-05 (taking best). +Reference operation time is 0.0158639 s (0.009077 Mflops) with 16 threads. +After merge step 5: tpop: 0.01586 s ~Mflops: 0.009 nsubm:7 otn:16 +Applying merge (10 -> 7 leaves, 16 th.) yielded NEGLIGIBLE change (5th in a row) (old/new=1.00845x): 0.016s -> 0.01586s, so IGNORING this instance. +Merge (7 -> 4 leaves) took w.c.t. of 9.06e-06s, ~4.053e-06s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) +3 iterations (16 th.) took 0.03998s; avg 0.01333s ( +/- 39.51/ 19.88 %); best 0.008062s; worst 0.01598s; std dev. 0.003724 (taking best). +Reference operation time is 0.00806212 s (0.01786 Mflops) with 16 threads. +After merge step 6: tpop: 0.008062 s ~Mflops: 0.018 nsubm:4 otn:16 +Applying merge (7 -> 4 leaves, 16 th.) yielded SPEEDUP of 1.984x: 0.016s -> 0.008062s, so taking this instance. +A total of 6 merge steps (of max 6) (22 -> 4 subms) took 0.328s (of which 0.0001113s partitioning, 0s I/O); computing times: 3.004e-05s in par. loops, 5.96e-06s sorting, 1.907e-05s analyzing) +Total merge + benchmarking process took 0.328s, equivalent to 40.7/20.5 new/old ops (0.09597s for 2 clones -- as 11.9/6.0 ops, or 6.0/3.0 ops per clone), SPEEDUP of 1.984x +Applying multi-merge (22 -> 4 leaves, 6 steps, 0 -> 16 th.sp.) yielded SPEEDUP of 1.984x (0.016s -> 0.008062s), will amortize in 41.3 ops by saving 0.007936s per op. +In 1 tuning rounds (tot. 0.42s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 98.4% (1.984x) (from 0.009001 to 0.01786 Mflops). +After 0.424019s, global autotuning declared speedup of 1.98433 x, when using threads count of 16 and a new matrix: +(6 x 6)[0x55bb9f2523e0]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 4, symflags:'' Considering S clone. Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 1.003e-01 s (100.00 %) - analyzed arrays in 3.977e-02 s (39.65 %) + converted COO to RSB in 1.039e-01 s (100.00 %) + analyzed arrays in 3.994e-02 s (38.42 %) cleaned-up arrays in 9.537e-07 s (0.00 %) - deduplicated arrays in 9.537e-07 s (0.00 %) + deduplicated arrays in 0.000e+00 s (0.00 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.201e-02 s (31.91 %) - memory allocations took 1.812e-05 s (0.02 %) - leafs setup took 7.153e-06 s (0.01 %) - halfword conversion took 2.849e-02 s (28.41 %) -Built (6 x 6)[0x56194702b1b0]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' + shuffled partitions in 3.201e-02 s (30.80 %) + memory allocations took 5.245e-06 s (0.01 %) + leafs setup took 2.146e-06 s (0.00 %) + halfword conversion took 3.198e-02 s (30.77 %) +Built (6 x 6)[0x55bb9f2523e0]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' Base matrix: -(6 x 6)[0x56194702b1b0]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' +(6 x 6)[0x55bb9f2523e0]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' -Will use autotuning routine to sample matrix: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz. +Will use autotuning routine to sample matrix: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.5000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (15 th.) took 0.0474s; avg 0.0158s ( +/- 2.52/ 1.36 %); best 0.0154s; worst 0.01602s; std dev. 0.0002823 (taking best). -Reference operation time is 0.0154021 s (0.009349 Mflops) with 15 threads. -After 0.047463s, autotuning routine did not find a better threads count configuration. +3 iterations (16 th.) took 0.05597s; avg 0.01866s ( +/- 14.24/ 28.47 %); best 0.016s; worst 0.02397s; std dev. 0.003756 (taking best). +Reference operation time is 0.0159991 s (0.009001 Mflops) with 16 threads. +After 0.055985s, autotuning routine did not find a better threads count configuration. -Will autotune matrix: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz. +Will autotune matrix: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.5000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (15 th.) took 0.04794s; avg 0.01598s ( +/- 2.00/ 1.87 %); best 0.01566s; worst 0.01628s; std dev. 0.0002527 (taking best). -Reference operation time is 0.015661 s (0.009195 Mflops) with 15 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz (tpop: 0.01566 Mflops: 0.009) -Merge (16 -> 10 leaves) took w.c.t. of 0.008295s, ~3.099e-05s of computing time (of which 1.907e-06s sorting, 6.914e-06s analysis) -3 iterations (15 th.) took 0.05938s; avg 0.01979s ( +/- 19.20/ 38.36 %); best 0.01599s; worst 0.02739s; std dev. 0.005369 (taking best). -Reference operation time is 0.0159929 s (0.009004 Mflops) with 15 threads. -After merge step 1: tpop: 0.01599 s ~Mflops: 0.009 nsubm:10 otn:15 -Applying merge (16 -> 10 leaves, 15 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.021x: 0.01566s -> 0.01599s. -Merge (10 -> 7 leaves) took w.c.t. of 3.6e-05s, ~8.106e-06s of computing time (of which 9.537e-07s sorting, 7.868e-06s analysis) -3 iterations (15 th.) took 0.04844s; avg 0.01615s ( +/- 1.45/ 2.33 %); best 0.01591s; worst 0.01652s; std dev. 0.0002688 (taking best). -Reference operation time is 0.0159109 s (0.00905 Mflops) with 15 threads. -After merge step 2: tpop: 0.01591 s ~Mflops: 0.009 nsubm:7 otn:15 -Applying merge (10 -> 7 leaves, 15 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=0.98430x): 0.01566s -> 0.01591s, so IGNORING this instance. -Merge (7 -> 4 leaves) took w.c.t. of 3.505e-05s, ~1.097e-05s of computing time (of which 9.537e-07s sorting, 7.153e-06s analysis) -3 iterations (15 th.) took 0.0474s; avg 0.0158s ( +/- 2.53/ 1.50 %); best 0.0154s; worst 0.01604s; std dev. 0.000284 (taking best). -Reference operation time is 0.0154011 s (0.00935 Mflops) with 15 threads. -After merge step 3: tpop: 0.0154 s ~Mflops: 0.009 nsubm:4 otn:15 -Applying merge (7 -> 4 leaves, 15 th.) yielded SPEEDUP of 1.017x: 0.01566s -> 0.0154s, so taking this instance. -Merge (4 -> 1 leaves) took w.c.t. of 3.099e-05s, ~1.097e-05s of computing time (of which 2.146e-06s sorting, 5.96e-06s analysis) -3 iterations (15 th.) took 1.097e-05s; avg 3.656e-06s ( +/- 98.20/173.91 %); best 6.57e-08s; worst 1.001e-05s; std dev. 4.512e-06 (taking best). -Reference operation time is 6.56962e-08 s (2192 Mflops) with 15 threads. -After merge step 4: tpop: 6.57e-08 s ~Mflops: 2191.906 nsubm:1 otn:15 -Applying merge (4 -> 1 leaves, 15 th.) yielded SPEEDUP of 234429.323x: 0.0154s -> 6.57e-08s, so taking this instance. +3 iterations (16 th.) took 0.048s; avg 0.016s ( +/- 0.03/ 0.02 %); best 0.016s; worst 0.016s; std dev. 3.059e-06 (taking best). +Reference operation time is 0.015995 s (0.009003 Mflops) with 16 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.5000 bpnz (tpop: 0.016 Mflops: 0.009) +Merge (16 -> 13 leaves) took w.c.t. of 1.311e-05s, ~3.099e-06s of computing time (of which 1.192e-06s sorting, 2.146e-06s analysis) +3 iterations (16 th.) took 0.04791s; avg 0.01597s ( +/- 0.37/ 0.20 %); best 0.01591s; worst 0.016s; std dev. 4.222e-05 (taking best). +Reference operation time is 0.015909 s (0.009052 Mflops) with 16 threads. +After merge step 1: tpop: 0.01591 s ~Mflops: 0.009 nsubm:13 otn:16 +Applying merge (16 -> 13 leaves, 16 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00541x): 0.016s -> 0.01591s, so IGNORING this instance. +Merge (13 -> 10 leaves) took w.c.t. of 2.503e-05s, ~4.053e-06s of computing time (of which 9.537e-07s sorting, 1.907e-06s analysis) +3 iterations (16 th.) took 0.04796s; avg 0.01599s ( +/- 0.16/ 0.10 %); best 0.01596s; worst 0.016s; std dev. 1.789e-05 (taking best). +Reference operation time is 0.0159631 s (0.009021 Mflops) with 16 threads. +After merge step 2: tpop: 0.01596 s ~Mflops: 0.009 nsubm:10 otn:16 +Applying merge (13 -> 10 leaves, 16 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00200x): 0.016s -> 0.01596s, so IGNORING this instance. +Merge (10 -> 7 leaves) took w.c.t. of 1.001e-05s, ~3.099e-06s of computing time (of which 1.192e-06s sorting, 1.192e-06s analysis) +3 iterations (16 th.) took 0.04797s; avg 0.01599s ( +/- 0.11/ 0.06 %); best 0.01597s; worst 0.016s; std dev. 1.298e-05 (taking best). +Reference operation time is 0.0159709 s (0.009016 Mflops) with 16 threads. +After merge step 3: tpop: 0.01597 s ~Mflops: 0.009 nsubm:7 otn:16 +Applying merge (10 -> 7 leaves, 16 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00151x): 0.016s -> 0.01597s, so IGNORING this instance. +Merge (7 -> 4 leaves) took w.c.t. of 8.821e-06s, ~3.099e-06s of computing time (of which 9.537e-07s sorting, 1.907e-06s analysis) +3 iterations (16 th.) took 0.04798s; avg 0.01599s ( +/- 0.09/ 0.05 %); best 0.01598s; worst 0.016s; std dev. 1.077e-05 (taking best). +Reference operation time is 0.0159771 s (0.009013 Mflops) with 16 threads. +After merge step 4: tpop: 0.01598 s ~Mflops: 0.009 nsubm:4 otn:16 +Applying merge (7 -> 4 leaves, 16 th.) yielded NEGLIGIBLE change (4th in a row) (old/new=1.00112x): 0.016s -> 0.01598s, so IGNORING this instance. +Merge (4 -> 1 leaves) took w.c.t. of 2.789e-05s, ~5.007e-06s of computing time (of which 1.192e-06s sorting, 9.537e-07s analysis) +3 iterations (16 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 93.47/ 84.62 %); best 6.745e-08s; worst 1.907e-06s; std dev. 7.867e-07 (taking best). +Reference operation time is 6.74486e-08 s (2135 Mflops) with 16 threads. +After merge step 5: tpop: 6.745e-08 s ~Mflops: 2134.959 nsubm:1 otn:16 +Applying merge (4 -> 1 leaves, 16 th.) yielded SPEEDUP of 237143.867x: 0.016s -> 6.745e-08s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 4 merge steps (of max 6) (16 -> 1 subms) took 0.2601s (of which 0.008557s partitioning, 0s I/O); computing times: 6.104e-05s in par. loops, 5.96e-06s sorting, 2.789e-05s analyzing) -Total merge + benchmarking process took 0.2601s, equivalent to 3959085.5/16.6 new/old ops (0.1404s for 3 clones -- as 2137125.7/9.0 ops, or 712375.2/3.0 ops per clone), SPEEDUP of 238385.048x -Applying multi-merge (16 -> 1 leaves, 4 steps, 0 -> 15 th.sp.) yielded SPEEDUP of 238385.048x (0.01566s -> 6.57e-08s), will amortize in 16.6 ops by saving 0.01566s per op. -In 1 tuning rounds (tot. 0.35s, 0.14s for constructor, 3 clones) obtained a SPEEDUP of 23838404.8% (2.384e+05x) (from 0.009195 to 2192 Mflops). -After 0.352400s, global autotuning declared speedup of 238385 x, when using threads count of 15 and a new matrix: -(6 x 6)[0x56194702dd80]{S} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x2244086 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'' +A total of 5 merge steps (of max 6) (16 -> 1 subms) took 0.24s (of which 0.000159s partitioning, 0s I/O); computing times: 1.836e-05s in par. loops, 5.484e-06s sorting, 8.106e-06s analyzing) +Total merge + benchmarking process took 0.24s, equivalent to 3558115.9/15.0 new/old ops (0.09593s for 2 clones -- as 1422205.7/6.0 ops, or 711102.9/3.0 ops per clone), SPEEDUP of 237143.867x +Applying multi-merge (16 -> 1 leaves, 5 steps, 0 -> 16 th.sp.) yielded SPEEDUP of 237143.867x (0.016s -> 6.745e-08s), will amortize in 15.0 ops by saving 0.01599s per op. +In 1 tuning rounds (tot. 0.34s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 23714286.7% (2.371e+05x) (from 0.009003 to 2135 Mflops). +After 0.336019s, global autotuning declared speedup of 237144 x, when using threads count of 16 and a new matrix: +(6 x 6)[0x55bb9f250280]{S} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x2244086 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'' Considering C clone. Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 1.035e-01 s (100.00 %) - analyzed arrays in 3.950e-02 s (38.16 %) - cleaned-up arrays in 9.537e-07 s (0.00 %) - deduplicated arrays in 1.192e-06 s (0.00 %) + converted COO to RSB in 1.040e-01 s (100.00 %) + analyzed arrays in 3.994e-02 s (38.42 %) + cleaned-up arrays in 1.192e-06 s (0.00 %) + deduplicated arrays in 9.537e-07 s (0.00 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.200e-02 s (30.91 %) - memory allocations took 2.289e-05 s (0.02 %) - leafs setup took 6.914e-06 s (0.01 %) - halfword conversion took 3.200e-02 s (30.90 %) -Built (6 x 6)[0x561947032100]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' + shuffled partitions in 3.200e-02 s (30.79 %) + memory allocations took 1.097e-05 s (0.01 %) + leafs setup took 3.099e-06 s (0.00 %) + halfword conversion took 3.199e-02 s (30.78 %) +Built (6 x 6)[0x55bb9f258400]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' Base matrix: -(6 x 6)[0x561947032100]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' +(6 x 6)[0x55bb9f258400]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' -Will use autotuning routine to sample matrix: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.2778 bpnz. +Will use autotuning routine to sample matrix: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (15 th.) took 0.04397s; avg 0.01466s ( +/- 17.56/ 8.93 %); best 0.01208s; worst 0.01596s; std dev. 0.00182 (taking best). -Reference operation time is 0.0120831 s (0.04767 Mflops) with 15 threads. -After 0.044024s, autotuning routine did not find a better threads count configuration. +3 iterations (16 th.) took 0.04795s; avg 0.01598s ( +/- 0.22/ 0.11 %); best 0.01595s; worst 0.016s; std dev. 2.451e-05 (taking best). +Reference operation time is 0.0159481 s (0.03612 Mflops) with 16 threads. +After 0.047971s, autotuning routine did not find a better threads count configuration. -Will autotune matrix: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.2778 bpnz. +Will autotune matrix: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (15 th.) took 0.04796s; avg 0.01599s ( +/- 0.11/ 0.14 %); best 0.01597s; worst 0.01601s; std dev. 1.626e-05 (taking best). -Reference operation time is 0.0159681 s (0.03607 Mflops) with 15 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.2778 bpnz (tpop: 0.01597 Mflops: 0.036) -Merge (22 -> 19 leaves) took w.c.t. of 3.099e-05s, ~9.06e-06s of computing time (of which 1.907e-06s sorting, 7.153e-06s analysis) -3 iterations (15 th.) took 0.04792s; avg 0.01597s ( +/- 0.32/ 0.17 %); best 0.01592s; worst 0.016s; std dev. 3.655e-05 (taking best). -Reference operation time is 0.015923 s (0.03617 Mflops) with 15 threads. -After merge step 1: tpop: 0.01592 s ~Mflops: 0.036 nsubm:19 otn:15 -Applying merge (22 -> 19 leaves, 15 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00283x): 0.01597s -> 0.01592s, so IGNORING this instance. -Merge (19 -> 16 leaves) took w.c.t. of 3.719e-05s, ~9.06e-06s of computing time (of which 2.146e-06s sorting, 1.097e-05s analysis) -3 iterations (15 th.) took 0.04793s; avg 0.01598s ( +/- 0.38/ 0.30 %); best 0.01591s; worst 0.01602s; std dev. 4.535e-05 (taking best). -Reference operation time is 0.0159149 s (0.03619 Mflops) with 15 threads. -After merge step 2: tpop: 0.01591 s ~Mflops: 0.036 nsubm:16 otn:15 -Applying merge (19 -> 16 leaves, 15 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00334x): 0.01597s -> 0.01591s, so IGNORING this instance. -Merge (16 -> 13 leaves) took w.c.t. of 3.91e-05s, ~1.001e-05s of computing time (of which 2.146e-06s sorting, 7.868e-06s analysis) -3 iterations (15 th.) took 0.04759s; avg 0.01586s ( +/- 1.52/ 0.90 %); best 0.01562s; worst 0.01601s; std dev. 0.0001712 (taking best). -Reference operation time is 0.0156219 s (0.03687 Mflops) with 15 threads. -After merge step 3: tpop: 0.01562 s ~Mflops: 0.037 nsubm:13 otn:15 -Applying merge (16 -> 13 leaves, 15 th.) yielded SPEEDUP of 1.022x: 0.01597s -> 0.01562s, so taking this instance. -Merge (13 -> 10 leaves) took w.c.t. of 3.29e-05s, ~9.06e-06s of computing time (of which 1.192e-06s sorting, 8.106e-06s analysis) -3 iterations (15 th.) took 0.04796s; avg 0.01599s ( +/- 0.33/ 0.24 %); best 0.01593s; worst 0.01602s; std dev. 3.889e-05 (taking best). -Reference operation time is 0.0159321 s (0.03615 Mflops) with 15 threads. -After merge step 4: tpop: 0.01593 s ~Mflops: 0.036 nsubm:10 otn:15 -Applying merge (13 -> 10 leaves, 15 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=0.98053x): 0.01562s -> 0.01593s, so IGNORING this instance. -Merge (10 -> 7 leaves) took w.c.t. of 3.695e-05s, ~1.407e-05s of computing time (of which 1.907e-06s sorting, 9.06e-06s analysis) -3 iterations (15 th.) took 0.04791s; avg 0.01597s ( +/- 3.01/ 3.35 %); best 0.01549s; worst 0.01651s; std dev. 0.0004169 (taking best). -Reference operation time is 0.0154891 s (0.03719 Mflops) with 15 threads. -After merge step 5: tpop: 0.01549 s ~Mflops: 0.037 nsubm:7 otn:15 -Applying merge (10 -> 7 leaves, 15 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00857x): 0.01562s -> 0.01549s, so IGNORING this instance. -Merge (7 -> 4 leaves) took w.c.t. of 3.099e-05s, ~9.06e-06s of computing time (of which 1.907e-06s sorting, 5.96e-06s analysis) -3 iterations (15 th.) took 0.04793s; avg 0.01598s ( +/- 0.29/ 0.17 %); best 0.01593s; worst 0.01601s; std dev. 3.328e-05 (taking best). -Reference operation time is 0.0159311 s (0.03616 Mflops) with 15 threads. -After merge step 6: tpop: 0.01593 s ~Mflops: 0.036 nsubm:4 otn:15 -Applying merge (7 -> 4 leaves, 15 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=0.98059x): 0.01562s -> 0.01593s, so IGNORING this instance. -A total of 6 merge steps (of max 6) (22 -> 4 subms) took 0.336s (of which 0.0002339s partitioning, 0s I/O); computing times: 6.032e-05s in par. loops, 1.121e-05s sorting, 4.911e-05s analyzing) -Total merge + benchmarking process took 0.336s, equivalent to 21.5/21.0 new/old ops (0.09591s for 2 clones -- as 6.1/6.0 ops, or 3.1/3.0 ops per clone), SPEEDUP of 1.022x -Applying multi-merge (22 -> 13 leaves, 3 steps, 0 -> 15 th.sp.) yielded SPEEDUP of 1.022x (0.01597s -> 0.01562s), will amortize in 970.6 ops by saving 0.0003462s per op. -In 1 tuning rounds (tot. 0.43s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 2.2% (1.022x) (from 0.03607 to 0.03687 Mflops). -After 0.431997s, global autotuning declared speedup of 1.02216 x, when using threads count of 15 and a new matrix: -(6 x 6)[0x561947034750]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 13, symflags:'' +3 iterations (16 th.) took 0.048s; avg 0.016s ( +/- 0.00/ 0.01 %); best 0.016s; worst 0.016s; std dev. 7.867e-07 (taking best). +Reference operation time is 0.0159981 s (0.036 Mflops) with 16 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.016 Mflops: 0.036) +Merge (22 -> 16 leaves) took w.c.t. of 0.004552s, ~0.000268s of computing time (of which 2.861e-06s sorting, 3.099e-06s analysis) +3 iterations (16 th.) took 0.03245s; avg 0.01082s ( +/- 25.86/ 43.09 %); best 0.008019s; worst 0.01548s; std dev. 0.003318 (taking best). +Reference operation time is 0.00801897 s (0.07183 Mflops) with 16 threads. +After merge step 1: tpop: 0.008019 s ~Mflops: 0.072 nsubm:16 otn:16 +Applying merge (22 -> 16 leaves, 16 th.) yielded SPEEDUP of 1.995x: 0.016s -> 0.008019s, so taking this instance. +Merge (16 -> 10 leaves) took w.c.t. of 0.00408s, ~1.192e-05s of computing time (of which 2.146e-06s sorting, 4.053e-06s analysis) +3 iterations (16 th.) took 0.05198s; avg 0.01733s ( +/- 8.34/ 15.46 %); best 0.01588s; worst 0.02001s; std dev. 0.001896 (taking best). +Reference operation time is 0.0158811 s (0.03627 Mflops) with 16 threads. +After merge step 2: tpop: 0.01588 s ~Mflops: 0.036 nsubm:10 otn:16 +Applying merge (16 -> 10 leaves, 16 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.980x: 0.008019s -> 0.01588s. +Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). +A total of 2 merge steps (of max 6) (22 -> 10 subms) took 0.1161s (of which 0.008643s partitioning, 0s I/O); computing times: 0.0002799s in par. loops, 5.007e-06s sorting, 7.153e-06s analyzing) +Total merge + benchmarking process took 0.1161s, equivalent to 14.5/7.3 new/old ops (0.07099s for 2 clones -- as 8.9/4.4 ops, or 4.4/2.2 ops per clone), SPEEDUP of 1.995x +Applying multi-merge (22 -> 16 leaves, 1 steps, 0 -> 16 th.sp.) yielded SPEEDUP of 1.995x (0.016s -> 0.008019s), will amortize in 14.6 ops by saving 0.007979s per op. +In 1 tuning rounds (tot. 0.21s, 0.071s for constructor, 2 clones) obtained a SPEEDUP of 99.5% (1.995x) (from 0.036 to 0.07183 Mflops). +After 0.212150s, global autotuning declared speedup of 1.99503 x, when using threads count of 16 and a new matrix: +(6 x 6)[0x55bb9f25aa50]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' Considering Z clone. Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 1.039e-01 s (100.00 %) - analyzed arrays in 3.987e-02 s (38.37 %) + converted COO to RSB in 9.605e-02 s (100.00 %) + analyzed arrays in 4.004e-02 s (41.69 %) cleaned-up arrays in 9.537e-07 s (0.00 %) deduplicated arrays in 9.537e-07 s (0.00 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.200e-02 s (30.80 %) - memory allocations took 1.407e-05 s (0.01 %) - leafs setup took 9.060e-06 s (0.01 %) - halfword conversion took 3.200e-02 s (30.80 %) -Built (6 x 6)[0x561947032100]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' + shuffled partitions in 2.390e-02 s (24.88 %) + memory allocations took 9.060e-06 s (0.01 %) + leafs setup took 6.914e-06 s (0.01 %) + halfword conversion took 3.209e-02 s (33.41 %) +Built (6 x 6)[0x55bb9f258400]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' Base matrix: -(6 x 6)[0x561947032100]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' +(6 x 6)[0x55bb9f258400]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' Will use autotuning routine to sample matrix: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (15 th.) took 0.04759s; avg 0.01586s ( +/- 1.79/ 0.91 %); best 0.01558s; worst 0.01601s; std dev. 0.0002011 (taking best). -Reference operation time is 0.015579 s (0.03697 Mflops) with 15 threads. -After 0.047644s, autotuning routine did not find a better threads count configuration. +3 iterations (16 th.) took 0.04764s; avg 0.01588s ( +/- 1.53/ 0.83 %); best 0.01564s; worst 0.01601s; std dev. 0.0001717 (taking best). +Reference operation time is 0.0156362 s (0.03684 Mflops) with 16 threads. +After 0.047666s, autotuning routine did not find a better threads count configuration. Will autotune matrix: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (15 th.) took 0.04799s; avg 0.016s ( +/- 3.41/ 3.44 %); best 0.01545s; worst 0.01655s; std dev. 0.0004472 (taking best). -Reference operation time is 0.0154519 s (0.03728 Mflops) with 15 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz (tpop: 0.01545 Mflops: 0.037) -Merge (28 -> 22 leaves) took w.c.t. of 0.008159s, ~3.91e-05s of computing time (of which 3.099e-06s sorting, 7.868e-06s analysis) -3 iterations (15 th.) took 0.08778s; avg 0.02926s ( +/- 45.43/ 36.08 %); best 0.01597s; worst 0.03982s; std dev. 0.009927 (taking best). -Reference operation time is 0.0159681 s (0.03607 Mflops) with 15 threads. -After merge step 1: tpop: 0.01597 s ~Mflops: 0.036 nsubm:22 otn:15 -Applying merge (28 -> 22 leaves, 15 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.033x: 0.01545s -> 0.01597s. -Merge (22 -> 16 leaves) took w.c.t. of 0.004085s, ~5.507e-05s of computing time (of which 1.383e-05s sorting, 1.001e-05s analysis) -3 iterations (15 th.) took 0.07194s; avg 0.02398s ( +/- 49.95/ 99.57 %); best 0.012s; worst 0.04786s; std dev. 0.01688 (taking best). -Reference operation time is 0.012002 s (0.04799 Mflops) with 15 threads. -After merge step 2: tpop: 0.012 s ~Mflops: 0.048 nsubm:16 otn:15 -Applying merge (22 -> 16 leaves, 15 th.) yielded SPEEDUP of 1.287x: 0.01545s -> 0.012s, so taking this instance. -Merge (16 -> 10 leaves) took w.c.t. of 0.008169s, ~3.386e-05s of computing time (of which 4.053e-06s sorting, 8.106e-06s analysis) -3 iterations (15 th.) took 0.05582s; avg 0.01861s ( +/- 14.52/ 27.80 %); best 0.0159s; worst 0.02378s; std dev. 0.003659 (taking best). -Reference operation time is 0.0159049 s (0.03622 Mflops) with 15 threads. -After merge step 3: tpop: 0.0159 s ~Mflops: 0.036 nsubm:10 otn:15 -Applying merge (16 -> 10 leaves, 15 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.325x: 0.012s -> 0.0159s. +3 iterations (16 th.) took 0.0479s; avg 0.01597s ( +/- 0.62/ 0.74 %); best 0.01587s; worst 0.01609s; std dev. 8.942e-05 (taking best). +Reference operation time is 0.0158691 s (0.0363 Mflops) with 16 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz (tpop: 0.01587 Mflops: 0.036) +Merge (28 -> 22 leaves) took w.c.t. of 0.008283s, ~1.192e-05s of computing time (of which 2.861e-06s sorting, 5.007e-06s analysis) +3 iterations (16 th.) took 0.03956s; avg 0.01319s ( +/- 39.21/ 50.21 %); best 0.008015s; worst 0.01981s; std dev. 0.004922 (taking best). +Reference operation time is 0.00801492 s (0.07187 Mflops) with 16 threads. +After merge step 1: tpop: 0.008015 s ~Mflops: 0.072 nsubm:22 otn:16 +Applying merge (28 -> 22 leaves, 16 th.) yielded SPEEDUP of 1.980x: 0.01587s -> 0.008015s, so taking this instance. +Merge (22 -> 16 leaves) took w.c.t. of 0.008022s, ~1.001e-05s of computing time (of which 2.384e-06s sorting, 3.099e-06s analysis) +3 iterations (16 th.) took 0.04029s; avg 0.01343s ( +/- 40.57/ 80.77 %); best 0.007982s; worst 0.02428s; std dev. 0.007671 (taking best). +Reference operation time is 0.00798202 s (0.07216 Mflops) with 16 threads. +After merge step 2: tpop: 0.007982 s ~Mflops: 0.072 nsubm:16 otn:16 +Applying merge (22 -> 16 leaves, 16 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00412x): 0.008015s -> 0.007982s, so IGNORING this instance. +Merge (16 -> 10 leaves) took w.c.t. of 0.004057s, ~1.693e-05s of computing time (of which 2.384e-06s sorting, 6.914e-06s analysis) +3 iterations (16 th.) took 0.05958s; avg 0.01986s ( +/- 19.48/ 38.91 %); best 0.01599s; worst 0.02759s; std dev. 0.005465 (taking best). +Reference operation time is 0.015991 s (0.03602 Mflops) with 16 threads. +After merge step 3: tpop: 0.01599 s ~Mflops: 0.036 nsubm:10 otn:16 +Applying merge (16 -> 10 leaves, 16 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.995x: 0.008015s -> 0.01599s. Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 3 merge steps (of max 6) (28 -> 10 subms) took 0.284s (of which 0.02047s partitioning, 0s I/O); computing times: 0.000128s in par. loops, 2.098e-05s sorting, 2.599e-05s analyzing) -Total merge + benchmarking process took 0.284s, equivalent to 23.7/18.4 new/old ops (0.09583s for 2 clones -- as 8.0/6.2 ops, or 4.0/3.1 ops per clone), SPEEDUP of 1.287x -Applying multi-merge (28 -> 16 leaves, 2 steps, 0 -> 15 th.sp.) yielded SPEEDUP of 1.287x (0.01545s -> 0.012s), will amortize in 82.3 ops by saving 0.00345s per op. -In 1 tuning rounds (tot. 0.38s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 28.7% (1.287x) (from 0.03728 to 0.04799 Mflops). -After 0.380079s, global autotuning declared speedup of 1.28745 x, when using threads count of 15 and a new matrix: -(6 x 6)[0x561947035850]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' +A total of 3 merge steps (of max 6) (28 -> 10 subms) took 0.1879s (of which 0.02038s partitioning, 0s I/O); computing times: 3.886e-05s in par. loops, 7.629e-06s sorting, 1.502e-05s analyzing) +Total merge + benchmarking process took 0.1879s, equivalent to 23.4/11.8 new/old ops (0.06797s for 2 clones -- as 8.5/4.3 ops, or 4.2/2.1 ops per clone), SPEEDUP of 1.980x +Applying multi-merge (28 -> 22 leaves, 1 steps, 0 -> 16 th.sp.) yielded SPEEDUP of 1.980x (0.01587s -> 0.008015s), will amortize in 23.9 ops by saving 0.007854s per op. +In 1 tuning rounds (tot. 0.28s, 0.068s for constructor, 2 clones) obtained a SPEEDUP of 98.0% (1.98x) (from 0.0363 to 0.07187 Mflops). +After 0.275814s, global autotuning declared speedup of 1.97995 x, when using threads count of 16 and a new matrix: +(6 x 6)[0x55bb9f25bb50]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/examples' gmake[3]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' @@ -9139,18 +9212,19 @@ 4 0 DIFF PRINT TEST END Beginning large binary search test. -Detected 67679444992 bytes of memory, comprehensive of 18881064960 of free memory. +Detected 67407736832 bytes of memory, comprehensive of 11301416960 of free memory. On this system, maximal array of coordinates can have 2147483137 elements and occupy 8589932548 bytes. -Will perform the test using less memory (17592186041536 MB) than on the maximal coordinate indices array (18446744070690481152) allows. -Skipping test: too little memory. -Skipping large binary search test. +Will perform the test using less memory (8083 MB) than on the maximal coordinate indices array (8476062720) allows. +(c)allocated 2119015680 nnz (8476062720 bytes) +Succeeded retrieving array last element. +Successfully performed large binary search test. BASIC SPARSE BLAS TEST: BEGIN INIT INTERFACE TEST: BEGIN got RSB_IO_WANT_EXTRA_VERBOSE_INTERFACE: -1 got RSB_IO_WANT_IS_INITIALIZED_MARKER: 1 INIT INTERFACE TEST: END (SUCCESS) DEVEL PRINT TEST: BEGIN -(4 x 4)[0x55af78b7dae0]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +(4 x 4)[0x56252dfbcd40]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' RSB_FLAG_USE_HALFWORD_INDICES | RSB_FLAG_SORTED_INPUT | RSB_FLAG_WANT_COO_STORAGE | @@ -9159,8 +9233,8 @@ RSB_FLAG_ASSEMBLED_IN_COO_ARRAYS | RSB_FLAG_OWN_PARTITIONING_ARRAYS | RSB_FLAG_SORT_INPUT -(2 x 2)[0x55af78b7dbf0]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' -(2 x 2)[0x55af78b7dd00]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(2 x 2)[0x56252dfbce50]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(2 x 2)[0x56252dfbcf60]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' #R 4 x 4, 4 nnz (16 bytes), 16 index space for bytes, 544 bytes for 2 structs (2 of which are on the diagonal) (1e+02% of nnz are on the diagonal) #N at 0 0, 4 x 4, 4 nnz ( 25%) #T at 0 0, 2 x 2, 2 nnz ( 50%) @@ -9168,9 +9242,9 @@ ( 0x2046186 = { rec:1 coo:1 css:1 hw:1 ic:1 fi:0 symflags: } ) DEVEL PRINT TEST: END PRINT TEST: BEGIN [QUIET] -(2 x 2)[0x55af78b7dbf0]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' -(2 x 2)[0x55af78b7dd00]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' -(4 x 4)[0x55af78b7dae0]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +(2 x 2)[0x56252dfbce50]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(2 x 2)[0x56252dfbcf60]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(4 x 4)[0x56252dfbcd40]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' RSB_FLAG_USE_HALFWORD_INDICES | RSB_FLAG_SORTED_INPUT | RSB_FLAG_WANT_COO_STORAGE | @@ -9188,8 +9262,8 @@ BASIC PRIMITIVES TEST: BEGIN BASIC PRIMITIVES TEST: END (SUCCESS) ADVANCED SPARSE BLAS TEST: BEGIN [limit 30.000000s] [QUIET] -Terminating testing earlier due to user timeout request: test took 30.000007 s, max allowed was 30.000000. - PASSED:1192 +Terminating testing earlier due to user timeout request: test took 30.000027 s, max allowed was 30.000000. + PASSED:937 FAILED:0 ADVANCED SPARSE BLAS TEST: END (SUCCESS) gmake qtests -C librsbpp @@ -9198,46 +9272,46 @@ gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp' gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp' ./rsbtt -if ! test -f G.mtx ; then cp -p /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/G.mtx . ; fi ; /bin/bash /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/test.sh +if ! test -f G.mtx ; then cp -p /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/G.mtx . ; fi ; /bin/sh /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/test.sh ++ ./rsbpp Td,s G.mtx ++ grep Z-sort ++ wc -l + test 54 = 54 -++ ./rsbpp Td G.mtx ++ grep Z-sort +++ ./rsbpp Td G.mtx ++ wc -l + test 27 = 27 -++ ./rsbpp Td,z G.mtx ++ grep Z-sort ++ wc -l +++ ./rsbpp Td,z G.mtx + test 54 = 54 -++ ./rsbpp vTd,z G.mtx ++ grep Z-sort +++ ./rsbpp vTd,z G.mtx ++ wc -l + test 54 = 54 ++ ./rsbpp vTd,z G.mtx -++ grep Z-sort ++ wc -l +++ grep Z-sort + test 54 = 54 ++ ./rsbpp vvvTd,z G.mtx ++ grep Zorted ++ wc -l + test 8 = 8 -++ ./rsbpp vvTd,z G.mtx ++ grep Z-sort ++ wc -l +++ ./rsbpp vvTd,z G.mtx + test 54 = 54 ++ ./rsbpp vvTd,z G.mtx -++ grep Range ++ wc -l +++ grep Range + test 0 = 0 -++ ./rsbpp vvvTd,z G.mtx ++ grep Range +++ ./rsbpp vvvTd,z G.mtx ++ wc -l + test 258 -gt 0 ++ ./rsbpp vvvTd,z S.mtx -++ grep Range ++ wc -l +++ grep Range + test 0 -eq 0 ++ ./rsbpp vvvTd,z G.mtx ++ grep Range @@ -9270,26 +9344,26 @@ + test 0 = 0 ++ OMP_NUM_THREADS=1 ++ RSB_NUM_THREADS=1 -++ ./rsbpp vvvC1000m100M100I1r1vtN,TorsF ++ grep Recursing +++ ./rsbpp vvvC1000m100M100I1r1vtN,TorsF ++ wc -l + test 4 = 4 ++ OMP_NUM_THREADS=2 -++ RSB_NUM_THREADS=2 ++ grep Recursing +++ RSB_NUM_THREADS=2 ++ ./rsbpp vvvC1000m100M100I1r1vtN,TorsF ++ wc -l + test 4 = 4 ++ OMP_NUM_THREADS=1 ++ RSB_NUM_THREADS=1 -++ ./rsbpp vvvC1000m100M100I1r1vtN,ToRsF ++ grep Recursing +++ ./rsbpp vvvC1000m100M100I1r1vtN,ToRsF ++ wc -l + test 208 = 208 ++ OMP_NUM_THREADS=2 ++ RSB_NUM_THREADS=2 -++ ./rsbpp vvvC1000m100M100I1r1vtN,ToRsF ++ grep Recursing +++ ./rsbpp vvvC1000m100M100I1r1vtN,ToRsF ++ wc -l + test 410 = 410 echo "Skipping tests based on Google Test (not detected at configure time)" @@ -9337,7 +9411,7 @@ 0 0 0 0 before tuning for SPMV: -(3 x 3)[0x556fd9d1fea0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'' +(3 x 3)[0x55ccf27d50c0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'' ** x: 1.1 @@ -9426,18 +9500,18 @@ BEGIN Rsb_Matrix_test_multimatrix_ms_mnrhs BEGIN -(3 x 3)[0x556fd9d20ec0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.97033: -(3 x 3)[0x556fd9d57cc0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -(3 x 3)[0x556fd9d20ec0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.34567: -(3 x 3)[0x556fd9d55930]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -(3 x 3)[0x556fd9d569c0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.33304: -(3 x 3)[0x556fd9d59d70]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -(3 x 3)[0x556fd9d20ec0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.48995: -(3 x 3)[0x556fd9d55930]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x55ccf27ff520]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1.32902: +(3 x 3)[0x55ccf280e630]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x55ccf27ff520]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1.00045: +(3 x 3)[0x55ccf280e630]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x55ccf27ff520]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1.00403: +(3 x 3)[0x55ccf280e630]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x55ccf27ff520]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1.00991: +(3 x 3)[0x55ccf280e630]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' END OK: terminating with no allocations registered in librsb [*] tests terminated successfully ! @@ -9454,10 +9528,10 @@ ./rsbtest --no-tune --max_t 0.01 --serial | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q Building ./rsbtest --no-tune --max_t 0.01 --max 1 --nrhs 1 --beta 1 --incy 1 --incx 1 --no-trans --alpha 1 --type d --rand --serial . | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q adding ! ./rsbtest --mkl A.mkl -running on ionos11-amd64 +running on i-capture-the-hostname Built without the MKL. ( ! ./rsbtest --unrecognized-option-triggers-abort ) -running on ionos11-amd64 +running on i-capture-the-hostname /build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest/.libs/rsbtest: unrecognized option '--unrecognized-option-triggers-abort' unrecognized option, aborting. ( ./rsbtest --no-tune --max_t 0.01 --skip-loading-hermitian-matrices --skip-loading-unsymmetric-matrices --tune-maxt 10 --tune-maxr 10 --verbose-tuning --extra-verbose-interface --min_t 0.01 --max_t 0.01 --mintimes 1 --maxtimes 1 --verbose --skip-loading-symmetric-matrices A.mtx | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q skip ) @@ -9466,7 +9540,7 @@ ( ! ./rsbtest --no-tune --max_t 0.01 --quiet --types all --nthreads 1,2 --maxtimes 1 -+ A.mtx | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q 2.threads ) ( ! ./rsbtest --no-tune --max_t 0.01 --quiet --render-only A.mtx > /dev/null ) ! ./rsbtest --no-tune --max_t 0.01 --quiet --max 1 --nrhs 1 --beta 1 --incy 1 --incx 1 --render --no-trans --alpha 1 --type all A.mtx -running on ionos11-amd64 +running on i-capture-the-hostname Will not invoke autotuning routine. Benchmark will sample for at most 0.01 s Built without render support! @@ -9518,7 +9592,7 @@ gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' gmake[5]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' /bin/mkdir -p '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/x86_64-linux-gnu' - /bin/bash ./libtool --mode=install /usr/bin/install -c librsb.la '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/x86_64-linux-gnu' + /bin/sh ./libtool --mode=install /usr/bin/install -c librsb.la '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/x86_64-linux-gnu' libtool: install: /usr/bin/install -c .libs/librsb.so.0.0.0 /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/x86_64-linux-gnu/librsb.so.0.0.0 libtool: install: (cd /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/x86_64-linux-gnu && { ln -s -f librsb.so.0.0.0 librsb.so.0 || { rm -f librsb.so.0 && ln -s librsb.so.0.0.0 librsb.so.0; }; }) libtool: install: (cd /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/x86_64-linux-gnu && { ln -s -f librsb.so.0.0.0 librsb.so || { rm -f librsb.so && ln -s librsb.so.0.0.0 librsb.so; }; }) @@ -9528,7 +9602,7 @@ libtool: install: ranlib /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/x86_64-linux-gnu/librsb.a libtool: warning: remember to run 'libtool --finish /usr/lib/x86_64-linux-gnu' /bin/mkdir -p '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' - /bin/bash ./libtool --mode=install /usr/bin/install -c rsbench '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' + /bin/sh ./libtool --mode=install /usr/bin/install -c rsbench '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' libtool: warning: 'librsb.la' has not been installed in '/usr/lib/x86_64-linux-gnu' libtool: install: /usr/bin/install -c .libs/rsbench /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin/rsbench /bin/mkdir -p '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' @@ -9656,11 +9730,11 @@ dh_md5sums dh_builddeb dpkg-deb: building package 'librsb-tools' in '../librsb-tools_1.3.0.2+dfsg-4_amd64.deb'. -dpkg-deb: building package 'librsb-tools-dbgsym' in '../librsb-tools-dbgsym_1.3.0.2+dfsg-4_amd64.deb'. -dpkg-deb: building package 'librsb-dev' in '../librsb-dev_1.3.0.2+dfsg-4_amd64.deb'. -dpkg-deb: building package 'librsb0' in '../librsb0_1.3.0.2+dfsg-4_amd64.deb'. dpkg-deb: building package 'librsb0-dbgsym' in '../librsb0-dbgsym_1.3.0.2+dfsg-4_amd64.deb'. +dpkg-deb: building package 'librsb-dev' in '../librsb-dev_1.3.0.2+dfsg-4_amd64.deb'. +dpkg-deb: building package 'librsb-tools-dbgsym' in '../librsb-tools-dbgsym_1.3.0.2+dfsg-4_amd64.deb'. dpkg-deb: building package 'librsb-doc' in '../librsb-doc_1.3.0.2+dfsg-4_all.deb'. +dpkg-deb: building package 'librsb0' in '../librsb0_1.3.0.2+dfsg-4_amd64.deb'. dpkg-genbuildinfo --build=binary -O../librsb_1.3.0.2+dfsg-4_amd64.buildinfo dpkg-genchanges --build=binary -O../librsb_1.3.0.2+dfsg-4_amd64.changes dpkg-genchanges: info: binary-only upload (no source code included) @@ -9669,12 +9743,14 @@ dpkg-buildpackage: info: binary-only upload (no source included) dpkg-genchanges: info: not including original source code in upload I: copying local configuration +I: user script /srv/workspace/pbuilder/3907119/tmp/hooks/B01_cleanup starting +I: user script /srv/workspace/pbuilder/3907119/tmp/hooks/B01_cleanup finished I: unmounting dev/ptmx filesystem I: unmounting dev/pts filesystem I: unmounting dev/shm filesystem I: unmounting proc filesystem I: unmounting sys filesystem I: cleaning the build env -I: removing directory /srv/workspace/pbuilder/2338948 and its subdirectories -I: Current time: Sat Jan 13 05:22:22 -12 2024 -I: pbuilder-time-stamp: 1705166542 +I: removing directory /srv/workspace/pbuilder/3907119 and its subdirectories +I: Current time: Sat Feb 15 14:45:41 +14 2025 +I: pbuilder-time-stamp: 1739580341