Diff of the two buildlogs: -- --- b1/build.log 2024-01-23 17:22:23.231359393 +0000 +++ b2/build.log 2024-01-23 18:09:42.819109433 +0000 @@ -1,6 +1,6 @@ I: pbuilder: network access will be disabled during build -I: Current time: Mon Feb 24 11:27:42 -12 2025 -I: pbuilder-time-stamp: 1740439662 +I: Current time: Wed Jan 24 07:22:28 +14 2024 +I: pbuilder-time-stamp: 1706030548 I: Building the build Environment I: extracting base tarball [/var/cache/pbuilder/bookworm-reproducible-base.tgz] I: copying local configuration @@ -29,51 +29,83 @@ dpkg-source: info: applying auto-gitignore I: Not using root during the build. I: Installing the build-deps -I: user script /srv/workspace/pbuilder/5587/tmp/hooks/D02_print_environment starting +I: user script /srv/workspace/pbuilder/27792/tmp/hooks/D01_modify_environment starting +debug: Running on ionos2-i386. +I: Changing host+domainname to test build reproducibility +I: Adding a custom variable just for the fun of it... +I: Changing /bin/sh to bash +'/bin/sh' -> '/bin/bash' +lrwxrwxrwx 1 root root 9 Jan 23 17:22 /bin/sh -> /bin/bash +I: Setting pbuilder2's login shell to /bin/bash +I: Setting pbuilder2's GECOS to second user,second room,second work-phone,second home-phone,second other +I: user script /srv/workspace/pbuilder/27792/tmp/hooks/D01_modify_environment finished +I: user script /srv/workspace/pbuilder/27792/tmp/hooks/D02_print_environment starting I: set - BUILDDIR='/build/reproducible-path' - BUILDUSERGECOS='first user,first room,first work-phone,first home-phone,first other' - BUILDUSERNAME='pbuilder1' - BUILD_ARCH='i386' - DEBIAN_FRONTEND='noninteractive' - DEB_BUILD_OPTIONS='buildinfo=+all reproducible=+all parallel=16 ' - DISTRIBUTION='bookworm' - HOME='/root' - HOST_ARCH='i386' + BASH=/bin/sh + BASHOPTS=checkwinsize:cmdhist:complete_fullquote:extquote:force_fignore:globasciiranges:globskipdots:hostcomplete:interactive_comments:patsub_replacement:progcomp:promptvars:sourcepath + BASH_ALIASES=() + BASH_ARGC=() + BASH_ARGV=() + BASH_CMDS=() + BASH_LINENO=([0]="12" [1]="0") + BASH_LOADABLES_PATH=/usr/local/lib/bash:/usr/lib/bash:/opt/local/lib/bash:/usr/pkg/lib/bash:/opt/pkg/lib/bash:. + BASH_SOURCE=([0]="/tmp/hooks/D02_print_environment" [1]="/tmp/hooks/D02_print_environment") + BASH_VERSINFO=([0]="5" [1]="2" [2]="15" [3]="1" [4]="release" [5]="i686-pc-linux-gnu") + BASH_VERSION='5.2.15(1)-release' + BUILDDIR=/build/reproducible-path + BUILDUSERGECOS='second user,second room,second work-phone,second home-phone,second other' + BUILDUSERNAME=pbuilder2 + BUILD_ARCH=i386 + DEBIAN_FRONTEND=noninteractive + DEB_BUILD_OPTIONS='buildinfo=+all reproducible=+all parallel=7 ' + DIRSTACK=() + DISTRIBUTION=bookworm + EUID=0 + FUNCNAME=([0]="Echo" [1]="main") + GROUPS=() + HOME=/root + HOSTNAME=i-capture-the-hostname + HOSTTYPE=i686 + HOST_ARCH=i386 IFS=' ' - INVOCATION_ID='a7246bc3db414a40b876f3f92b0e23d8' - LANG='C' - LANGUAGE='en_US:en' - LC_ALL='C' - LD_LIBRARY_PATH='/usr/lib/libeatmydata' - LD_PRELOAD='libeatmydata.so' - MAIL='/var/mail/root' - OPTIND='1' - PATH='/usr/sbin:/usr/bin:/sbin:/bin:/usr/games' - PBCURRENTCOMMANDLINEOPERATION='build' - PBUILDER_OPERATION='build' - PBUILDER_PKGDATADIR='/usr/share/pbuilder' - PBUILDER_PKGLIBDIR='/usr/lib/pbuilder' - PBUILDER_SYSCONFDIR='/etc' - PPID='5587' - PS1='# ' - PS2='> ' + INVOCATION_ID=03f27e3c81bc4f30a60a97afe558f4a2 + LANG=C + LANGUAGE=de_CH:de + LC_ALL=C + LD_LIBRARY_PATH=/usr/lib/libeatmydata + LD_PRELOAD=libeatmydata.so + MACHTYPE=i686-pc-linux-gnu + MAIL=/var/mail/root + OPTERR=1 + OPTIND=1 + OSTYPE=linux-gnu + PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path + PBCURRENTCOMMANDLINEOPERATION=build + PBUILDER_OPERATION=build + PBUILDER_PKGDATADIR=/usr/share/pbuilder + PBUILDER_PKGLIBDIR=/usr/lib/pbuilder + PBUILDER_SYSCONFDIR=/etc + PIPESTATUS=([0]="0") + POSIXLY_CORRECT=y + PPID=27792 PS4='+ ' - PWD='/' - SHELL='/bin/bash' - SHLVL='2' - SUDO_COMMAND='/usr/bin/timeout -k 18.1h 18h /usr/bin/ionice -c 3 /usr/bin/nice /usr/sbin/pbuilder --build --configfile /srv/reproducible-results/rbuild-debian/r-b-build.JbHePWEi/pbuilderrc_JRPG --distribution bookworm --hookdir /etc/pbuilder/first-build-hooks --debbuildopts -b --basetgz /var/cache/pbuilder/bookworm-reproducible-base.tgz --buildresult /srv/reproducible-results/rbuild-debian/r-b-build.JbHePWEi/b1 --logfile b1/build.log librsb_1.3.0.2+dfsg-4.dsc' - SUDO_GID='112' - SUDO_UID='107' - SUDO_USER='jenkins' - TERM='unknown' - TZ='/usr/share/zoneinfo/Etc/GMT+12' - USER='root' - _='/usr/bin/systemd-run' - http_proxy='http://85.184.249.68:3128' + PWD=/ + SHELL=/bin/bash + SHELLOPTS=braceexpand:errexit:hashall:interactive-comments:posix + SHLVL=3 + SUDO_COMMAND='/usr/bin/timeout -k 24.1h 24h /usr/bin/ionice -c 3 /usr/bin/nice -n 11 /usr/bin/unshare --uts -- /usr/sbin/pbuilder --build --configfile /srv/reproducible-results/rbuild-debian/r-b-build.JbHePWEi/pbuilderrc_by2P --distribution bookworm --hookdir /etc/pbuilder/rebuild-hooks --debbuildopts -b --basetgz /var/cache/pbuilder/bookworm-reproducible-base.tgz --buildresult /srv/reproducible-results/rbuild-debian/r-b-build.JbHePWEi/b2 --logfile b2/build.log librsb_1.3.0.2+dfsg-4.dsc' + SUDO_GID=112 + SUDO_UID=107 + SUDO_USER=jenkins + TERM=unknown + TZ=/usr/share/zoneinfo/Etc/GMT-14 + UID=0 + USER=root + _='I: set' + http_proxy=http://78.137.99.97:3128 I: uname -a - Linux ionos6-i386 6.1.0-17-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.1.69-1 (2023-12-30) x86_64 GNU/Linux + Linux i-capture-the-hostname 6.1.0-17-686-pae #1 SMP PREEMPT_DYNAMIC Debian 6.1.69-1 (2023-12-30) i686 GNU/Linux I: ls -l /bin total 6036 -rwxr-xr-x 1 root root 1408088 Apr 23 2023 bash @@ -131,15 +163,15 @@ -rwxr-xr-x 1 root root 51080 Sep 20 2022 readlink -rwxr-xr-x 1 root root 75720 Sep 20 2022 rm -rwxr-xr-x 1 root root 51080 Sep 20 2022 rmdir - -rwxr-xr-x 1 root root 22308 Jul 28 2023 run-parts + -rwxr-xr-x 1 root root 22308 Jul 28 23:46 run-parts -rwxr-xr-x 1 root root 133224 Jan 5 2023 sed - lrwxrwxrwx 1 root root 4 Jan 5 2023 sh -> dash + lrwxrwxrwx 1 root root 9 Jan 23 17:22 sh -> /bin/bash -rwxr-xr-x 1 root root 38760 Sep 20 2022 sleep -rwxr-xr-x 1 root root 87976 Sep 20 2022 stty -rwsr-xr-x 1 root root 83492 Mar 23 2023 su -rwxr-xr-x 1 root root 38792 Sep 20 2022 sync -rwxr-xr-x 1 root root 598456 Apr 6 2023 tar - -rwxr-xr-x 1 root root 13860 Jul 28 2023 tempfile + -rwxr-xr-x 1 root root 13860 Jul 28 23:46 tempfile -rwxr-xr-x 1 root root 120776 Sep 20 2022 touch -rwxr-xr-x 1 root root 34664 Sep 20 2022 true -rwxr-xr-x 1 root root 17892 Mar 23 2023 ulockmgr_server @@ -159,7 +191,7 @@ -rwxr-xr-x 1 root root 2206 Apr 10 2022 zless -rwxr-xr-x 1 root root 1842 Apr 10 2022 zmore -rwxr-xr-x 1 root root 4577 Apr 10 2022 znew -I: user script /srv/workspace/pbuilder/5587/tmp/hooks/D02_print_environment finished +I: user script /srv/workspace/pbuilder/27792/tmp/hooks/D02_print_environment finished -> Attempting to satisfy build-dependencies -> Creating pbuilder-satisfydepends-dummy package Package: pbuilder-satisfydepends-dummy @@ -359,7 +391,7 @@ Get: 152 http://deb.debian.org/debian bookworm/main i386 libltdl-dev i386 2.4.7-5 [167 kB] Get: 153 http://deb.debian.org/debian bookworm/main i386 libhwloc-dev i386 2.9.0-1 [259 kB] Get: 154 http://deb.debian.org/debian bookworm/main i386 zlib1g-dev i386 1:1.2.13.dfsg-1 [913 kB] -Fetched 348 MB in 9s (38.0 MB/s) +Fetched 348 MB in 31s (11.3 MB/s) debconf: delaying package configuration, since apt-utils is not installed Selecting previously unselected package liblocale-gettext-perl. (Reading database ... (Reading database ... 5% (Reading database ... 10% (Reading database ... 15% (Reading database ... 20% (Reading database ... 25% (Reading database ... 30% (Reading database ... 35% (Reading database ... 40% (Reading database ... 45% (Reading database ... 50% (Reading database ... 55% (Reading database ... 60% (Reading database ... 65% (Reading database ... 70% (Reading database ... 75% (Reading database ... 80% (Reading database ... 85% (Reading database ... 90% (Reading database ... 95% (Reading database ... 100% (Reading database ... 18156 files and directories currently installed.) @@ -1013,7 +1045,11 @@ Building tag database... -> Finished parsing the build-deps I: Building the package -I: Running cd /build/reproducible-path/librsb-1.3.0.2+dfsg/ && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games" HOME="/nonexistent/first-build" dpkg-buildpackage -us -uc -b && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games" HOME="/nonexistent/first-build" dpkg-genchanges -S > ../librsb_1.3.0.2+dfsg-4_source.changes +I: user script /srv/workspace/pbuilder/27792/tmp/hooks/A99_set_merged_usr starting +Not re-configuring usrmerge for bookworm +I: user script /srv/workspace/pbuilder/27792/tmp/hooks/A99_set_merged_usr finished +hostname: Name or service not known +I: Running cd /build/reproducible-path/librsb-1.3.0.2+dfsg/ && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path" HOME="/nonexistent/second-build" dpkg-buildpackage -us -uc -b && env PATH="/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path" HOME="/nonexistent/second-build" dpkg-genchanges -S > ../librsb_1.3.0.2+dfsg-4_source.changes dpkg-buildpackage: info: source package librsb dpkg-buildpackage: info: source version 1.3.0.2+dfsg-4 dpkg-buildpackage: info: source distribution unstable @@ -1034,62 +1070,62 @@ dh binary --no-parallel dh_update_autotools_config -O--no-parallel dh_autoreconf -O--no-parallel -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found libtoolize: putting auxiliary files in '.'. libtoolize: copying file './ltmain.sh' libtoolize: putting macros in AC_CONFIG_MACRO_DIRS, 'm4'. @@ -1131,90 +1167,90 @@ libtoolize: copying file 'm4/ltsugar.m4' libtoolize: copying file 'm4/ltversion.m4' libtoolize: copying file 'm4/lt~obsolete.m4' -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found -sh: 1: git: not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found +sh: line 1: git: command not found configure.ac:66: installing './compile' configure.ac:65: installing './missing' Makefile.am: installing './depcomp' @@ -1397,7 +1433,7 @@ checking for m4... m4 checking for gmake... gmake checking for ggrep... /bin/grep -checking for bash... /bin/bash +checking for bash... /bin/sh checking for gsed... /bin/sed checking for cmp... cmp checking for basename... basename @@ -1514,7 +1550,7 @@ configure: It appears that Fortran programs can be linked without using the Fortran linker. configure: Using OPENMP_CFLAGS ok for linking an OpenMP program: adding it to LIBS. checking if your have a usable getrusage() ... 1 -checking for /bin/bash... /bin/bash +checking for /bin/sh... /bin/sh configure: Will not use Google Test. configure: You seem to not have GNU Octave or have disabled 'int' type. Part of the test suite will not be generated. If you want more testing capabilities, you should enable the 'int' type as well. checking that generated files are newer than configure... done @@ -1536,7 +1572,7 @@ config.status: executing depfiles commands config.status: executing libtool commands === configuring in librsbpp (/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp) -configure: running /bin/bash ./configure --disable-option-checking '--prefix=/usr' '--build=i686-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/i386-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=i686-linux-gnu' 'CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/bash' --cache-file=/dev/null --srcdir=. +configure: running /bin/sh ./configure --disable-option-checking '--prefix=/usr' '--build=i686-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/i386-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=i686-linux-gnu' 'CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/sh' --cache-file=/dev/null --srcdir=. checking for a BSD-compatible install... /usr/bin/install -c checking whether build environment is sane... yes checking for a race-free mkdir -p... /bin/mkdir -p @@ -1658,7 +1694,7 @@ configure: Will not use Google Test. checking whether you have std::thread... yes checking whether you have std::mutex... yes -/bin/bash +/bin/sh checking that generated files are newer than configure... done configure: creating ./config.status config.status: creating Makefile @@ -1667,7 +1703,7 @@ config.status: executing libtool commands configure: Successfully created a Makefile. === configuring in rsblib (/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib) -configure: running /bin/bash ./configure --disable-option-checking '--prefix=/usr' '--build=i686-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/i386-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=i686-linux-gnu' 'CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/bash' --cache-file=/dev/null --srcdir=. +configure: running /bin/sh ./configure --disable-option-checking '--prefix=/usr' '--build=i686-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/i386-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=i686-linux-gnu' 'CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/sh' --cache-file=/dev/null --srcdir=. checking for a BSD-compatible install... /usr/bin/install -c checking whether build environment is sane... yes checking for a race-free mkdir -p... /bin/mkdir -p @@ -1776,7 +1812,7 @@ checking for filesystem... yes checking for main in -lstdc++fs... yes configure: Assuming you are yet to build librsb.la. (set LIBS= -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la) -/bin/bash +/bin/sh checking that generated files are newer than configure... done configure: creating ./config.status config.status: creating Makefile @@ -1786,7 +1822,7 @@ config.status: executing libtool commands configure: Created a Makefile. === configuring in rsbtest (/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest) -configure: running /bin/bash ./configure --disable-option-checking '--prefix=/usr' '--build=i686-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/i386-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=i686-linux-gnu' 'CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/bash' --cache-file=/dev/null --srcdir=. +configure: running /bin/sh ./configure --disable-option-checking '--prefix=/usr' '--build=i686-linux-gnu' '--includedir=${prefix}/include' '--mandir=${prefix}/share/man' '--infodir=${prefix}/share/info' '--sysconfdir=/etc' '--localstatedir=/var' '--disable-silent-rules' '--libdir=${prefix}/lib/i386-linux-gnu' '--runstatedir=/run' '--disable-maintainer-mode' '--disable-dependency-tracking' '--with-hwloc' '--enable-matrix-types=blas' '--with-zlib=-lz' '--enable-doc-build' '--enable-fortran-module-install' '--enable-extra-patches' '--enable-pkg-config-install' '--disable-extra-patches' 'build_alias=i686-linux-gnu' 'CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3' 'LDFLAGS=-Wl,-z,relro' 'CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2' 'FCFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong' 'CXXFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security' 'BASH=/bin/sh' --cache-file=/dev/null --srcdir=. checking for a BSD-compatible install... /usr/bin/install -c checking whether build environment is sane... yes checking for a race-free mkdir -p... /bin/mkdir -p @@ -1897,7 +1933,7 @@ checking for filesystem... yes checking for rsb_lib_init... no checking for dd... yes -/bin/bash +/bin/sh /usr/bin/timeout checking that generated files are newer than configure... done configure: creating ./config.status @@ -1929,7 +1965,7 @@ ARFLAGS : cru M4 : m4 MAKE : gmake - BASH : /bin/bash + BASH : /bin/sh OCTAVE : false DOXYGEN : doxygen HELP2MAN : help2man @@ -1948,7 +1984,7 @@ Supported I/O functionality level : "7" vs "7" Interface Error Verbosity : "0" vs "0" Internals Error Verbosity : "0" vs "0" - Memory hierarchy info, detected : "L2:16/64/4096K,L1:8/64/32K" + Memory hierarchy info, detected : "L2:16/64/512K,L1:2/64/64K" Memory hierarchy info, selected : "" Maximum of supported threads : "128" Build Fortran examples : "yes" vs "yes" @@ -1980,23 +2016,23 @@ Making all in librsbpp gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp' g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbpp.o rsbpp.cpp -/bin/bash ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_coo.lo rsbpp_coo.cpp +/bin/sh ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_coo.lo rsbpp_coo.cpp libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c rsbpp_coo.cpp -fPIC -DPIC -o .libs/rsbpp_coo.o libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c rsbpp_coo.cpp -o rsbpp_coo.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_csr.lo rsbpp_csr.cpp +/bin/sh ./libtool --tag=CXX --mode=compile g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbpp_csr.lo rsbpp_csr.cpp libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c rsbpp_csr.cpp -fPIC -DPIC -o .libs/rsbpp_csr.o libtool: compile: g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c rsbpp_csr.cpp -o rsbpp_csr.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o librsbpp.la rsbpp_coo.lo rsbpp_csr.lo -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o librsbpp.la rsbpp_coo.lo rsbpp_csr.lo -lpthread -lstdc++fs libtool: link: ar cr .libs/librsbpp.a .libs/rsbpp_coo.o .libs/rsbpp_csr.o libtool: link: ranlib .libs/librsbpp.a libtool: link: ( cd ".libs" && rm -f "librsbpp.la" && ln -s "../librsbpp.la" "librsbpp.la" ) -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbpp rsbpp.o librsbpp.la -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbpp rsbpp.o librsbpp.la -lpthread -lstdc++fs libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z -Wl,relro -o rsbpp rsbpp.o ./.libs/librsbpp.a -lpthread -lstdc++fs -fopenmp g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -c -o rsbtt.o rsbtt.cpp -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbtt rsbtt.o librsbpp.la -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbtt rsbtt.o librsbpp.la -lpthread -lstdc++fs libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z -Wl,relro -o rsbtt rsbtt.o ./.libs/librsbpp.a -lpthread -lstdc++fs -fopenmp gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBPP_HAS_RSB_H=1 -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -c -o rsbct.o rsbct.c -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbct rsbct.o librsbpp.la -lpthread -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z,relro -o rsbct rsbct.o librsbpp.la -lpthread -lstdc++fs libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -DRSBPP_HAS_RSB_H=1 -Wl,-z -Wl,relro -o rsbct rsbct.o ./.libs/librsbpp.a -lpthread -lstdc++fs -fopenmp gmake all-am gmake[5]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp' @@ -2061,255 +2097,255 @@ gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o rsbench-rsb_libspblas_tests.o `test -f 'rsb_libspblas_tests.c' || echo './'`rsb_libspblas_tests.c g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -c -o rsb_dummy.o rsb_dummy.cpp gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o ch2icfb-ch2icfb.o `test -f 'ch2icfb.c' || echo './'`ch2icfb.c -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o ch2icfb ch2icfb-ch2icfb.o -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o ch2icfb ch2icfb-ch2icfb.o -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info' is ignored for programs libtool: link: gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -Wl,-z -Wl,relro -o ch2icfb ch2icfb-ch2icfb.o -fopenmp -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp if test -f ./rsb_types.h -a ! -f ./rsb_types.h ; then cp -pv ./rsb_types.h ./rsb_types.h ; fi # out-of-dir behaviour varies between installations -if test -f ch2icfb ; then if SED=/bin/sed GREP=/bin/grep /bin/bash ./scripts/rsb_h_to_rsb_fi.sh . /build/reproducible-path/librsb-1.3.0.2+dfsg > rsb.F90 ; then true; else rm "rsb.F90"; echo "Failure generating rsb.F90!" ; exit ; fi ; else echo "Warning: Your system did not build ch2icfb for some reason --- skipping rebuild of "rsb.F90 ;true ; fi -/bin/bash ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c -o rsb.lo rsb.F90 +if test -f ch2icfb ; then if SED=/bin/sed GREP=/bin/grep /bin/sh ./scripts/rsb_h_to_rsb_fi.sh . /build/reproducible-path/librsb-1.3.0.2+dfsg > rsb.F90 ; then true; else rm "rsb.F90"; echo "Failure generating rsb.F90!" ; exit ; fi ; else echo "Warning: Your system did not build ch2icfb for some reason --- skipping rebuild of "rsb.F90 ;true ; fi +/bin/sh ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c -o rsb.lo rsb.F90 libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c rsb.F90 -fPIC -o .libs/rsb.o libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c rsb.F90 -o rsb.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_stropts.lo `test -f 'rsb_stropts.c' || echo './'`rsb_stropts.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_stropts.lo `test -f 'rsb_stropts.c' || echo './'`rsb_stropts.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_stropts.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_stropts.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_stropts.c -o librsb_nounroll_la-rsb_stropts.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_strmif.lo `test -f 'rsb_strmif.c' || echo './'`rsb_strmif.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_strmif.lo `test -f 'rsb_strmif.c' || echo './'`rsb_strmif.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_strmif.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_strmif.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_strmif.c -o librsb_nounroll_la-rsb_strmif.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_unroll.lo `test -f 'rsb_unroll.c' || echo './'`rsb_unroll.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_unroll.lo `test -f 'rsb_unroll.c' || echo './'`rsb_unroll.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_unroll.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_unroll.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_unroll.c -o librsb_nounroll_la-rsb_unroll.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_vb.lo `test -f 'rsb_krnl_vb.c' || echo './'`rsb_krnl_vb.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_vb.lo `test -f 'rsb_krnl_vb.c' || echo './'`rsb_krnl_vb.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_vb.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_vb.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_vb.c -o librsb_nounroll_la-rsb_krnl_vb.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_lb.lo `test -f 'rsb_krnl_lb.c' || echo './'`rsb_krnl_lb.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_lb.lo `test -f 'rsb_krnl_lb.c' || echo './'`rsb_krnl_lb.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_lb.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_lb.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_lb.c -o librsb_nounroll_la-rsb_krnl_lb.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl.lo `test -f 'rsb_krnl.c' || echo './'`rsb_krnl.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl.lo `test -f 'rsb_krnl.c' || echo './'`rsb_krnl.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl.c -o librsb_nounroll_la-rsb_krnl.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_bench.lo `test -f 'rsb_bench.c' || echo './'`rsb_bench.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_bench.lo `test -f 'rsb_bench.c' || echo './'`rsb_bench.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_bench.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_bench.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_bench.c -o librsb_nounroll_la-rsb_bench.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_mergesort.lo `test -f 'rsb_mergesort.c' || echo './'`rsb_mergesort.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_mergesort.lo `test -f 'rsb_mergesort.c' || echo './'`rsb_mergesort.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mergesort.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_mergesort.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mergesort.c -o librsb_nounroll_la-rsb_mergesort.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_permute.lo `test -f 'rsb_permute.c' || echo './'`rsb_permute.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_permute.lo `test -f 'rsb_permute.c' || echo './'`rsb_permute.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_permute.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_permute.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_permute.c -o librsb_nounroll_la-rsb_permute.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_l.lo `test -f 'rsb_krnl_bcss_l.c' || echo './'`rsb_krnl_bcss_l.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_l.lo `test -f 'rsb_krnl_bcss_l.c' || echo './'`rsb_krnl_bcss_l.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_l.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_l.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_l.c -o librsb_nounroll_la-rsb_krnl_bcss_l.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_u.lo `test -f 'rsb_krnl_bcss_u.c' || echo './'`rsb_krnl_bcss_u.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_u.lo `test -f 'rsb_krnl_bcss_u.c' || echo './'`rsb_krnl_bcss_u.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_u.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_u.c -o librsb_nounroll_la-rsb_krnl_bcss_u.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo `test -f 'rsb_krnl_bcss_spsv_u.c' || echo './'`rsb_krnl_bcss_spsv_u.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo `test -f 'rsb_krnl_bcss_spsv_u.c' || echo './'`rsb_krnl_bcss_spsv_u.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_spsv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_spsv_u.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_spsv_u.c -o librsb_nounroll_la-rsb_krnl_bcss_spsv_u.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo `test -f 'rsb_krnl_bcss_spmv_u.c' || echo './'`rsb_krnl_bcss_spmv_u.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo `test -f 'rsb_krnl_bcss_spmv_u.c' || echo './'`rsb_krnl_bcss_spmv_u.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_spmv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_spmv_u.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_spmv_u.c -o librsb_nounroll_la-rsb_krnl_bcss_spmv_u.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo `test -f 'rsb_krnl_bcss_misc_u.c' || echo './'`rsb_krnl_bcss_misc_u.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo `test -f 'rsb_krnl_bcss_misc_u.c' || echo './'`rsb_krnl_bcss_misc_u.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_misc_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss_misc_u.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss_misc_u.c -o librsb_nounroll_la-rsb_krnl_bcss_misc_u.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo `test -f 'rsb_krnl_bcoo_spmv_u.c' || echo './'`rsb_krnl_bcoo_spmv_u.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo `test -f 'rsb_krnl_bcoo_spmv_u.c' || echo './'`rsb_krnl_bcoo_spmv_u.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcoo_spmv_u.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcoo_spmv_u.c -o librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss.lo `test -f 'rsb_krnl_bcss.c' || echo './'`rsb_krnl_bcss.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_krnl_bcss.lo `test -f 'rsb_krnl_bcss.c' || echo './'`rsb_krnl_bcss.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_krnl_bcss.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_krnl_bcss.c -o librsb_nounroll_la-rsb_krnl_bcss.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spmv.lo `test -f 'rsb_spmv.c' || echo './'`rsb_spmv.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spmv.lo `test -f 'rsb_spmv.c' || echo './'`rsb_spmv.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spmv.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spmv.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spmv.c -o librsb_nounroll_la-rsb_spmv.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_merge.lo `test -f 'rsb_merge.c' || echo './'`rsb_merge.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_merge.lo `test -f 'rsb_merge.c' || echo './'`rsb_merge.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_merge.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_merge.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_merge.c -o librsb_nounroll_la-rsb_merge.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_ompio.lo `test -f 'rsb_ompio.c' || echo './'`rsb_ompio.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_ompio.lo `test -f 'rsb_ompio.c' || echo './'`rsb_ompio.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_ompio.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_ompio.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_ompio.c -o librsb_nounroll_la-rsb_ompio.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_util.lo `test -f 'rsb_util.c' || echo './'`rsb_util.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_util.lo `test -f 'rsb_util.c' || echo './'`rsb_util.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_util.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_util.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_util.c -o librsb_nounroll_la-rsb_util.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spgemm_csr.lo `test -f 'rsb_spgemm_csr.c' || echo './'`rsb_spgemm_csr.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spgemm_csr.lo `test -f 'rsb_spgemm_csr.c' || echo './'`rsb_spgemm_csr.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm_csr.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spgemm_csr.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm_csr.c -o librsb_nounroll_la-rsb_spgemm_csr.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spsum_misc.lo `test -f 'rsb_spsum_misc.c' || echo './'`rsb_spsum_misc.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_spsum_misc.lo `test -f 'rsb_spsum_misc.c' || echo './'`rsb_spsum_misc.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum_misc.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_spsum_misc.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum_misc.c -o librsb_nounroll_la-rsb_spsum_misc.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_prec.lo `test -f 'rsb_prec.c' || echo './'`rsb_prec.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_nounroll_la-rsb_prec.lo `test -f 'rsb_prec.c' || echo './'`rsb_prec.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_prec.c -fPIC -DPIC -o .libs/librsb_nounroll_la-rsb_prec.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fno-unroll-loops -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_prec.c -o librsb_nounroll_la-rsb_prec.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_nounroll.la librsb_nounroll_la-rsb_stropts.lo librsb_nounroll_la-rsb_strmif.lo librsb_nounroll_la-rsb_unroll.lo librsb_nounroll_la-rsb_krnl_vb.lo librsb_nounroll_la-rsb_krnl_lb.lo librsb_nounroll_la-rsb_krnl.lo librsb_nounroll_la-rsb_bench.lo librsb_nounroll_la-rsb_mergesort.lo librsb_nounroll_la-rsb_permute.lo librsb_nounroll_la-rsb_krnl_bcss_l.lo librsb_nounroll_la-rsb_krnl_bcss_u.lo librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss.lo librsb_nounroll_la-rsb_spmv.lo librsb_nounroll_la-rsb_merge.lo librsb_nounroll_la-rsb_ompio.lo librsb_nounroll_la-rsb_util.lo librsb_nounroll_la-rsb_spgemm_csr.lo librsb_nounroll_la-rsb_spsum_misc.lo librsb_nounroll_la-rsb_prec.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_nounroll.la librsb_nounroll_la-rsb_stropts.lo librsb_nounroll_la-rsb_strmif.lo librsb_nounroll_la-rsb_unroll.lo librsb_nounroll_la-rsb_krnl_vb.lo librsb_nounroll_la-rsb_krnl_lb.lo librsb_nounroll_la-rsb_krnl.lo librsb_nounroll_la-rsb_bench.lo librsb_nounroll_la-rsb_mergesort.lo librsb_nounroll_la-rsb_permute.lo librsb_nounroll_la-rsb_krnl_bcss_l.lo librsb_nounroll_la-rsb_krnl_bcss_u.lo librsb_nounroll_la-rsb_krnl_bcss_spsv_u.lo librsb_nounroll_la-rsb_krnl_bcss_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss_misc_u.lo librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.lo librsb_nounroll_la-rsb_krnl_bcss.lo librsb_nounroll_la-rsb_spmv.lo librsb_nounroll_la-rsb_merge.lo librsb_nounroll_la-rsb_ompio.lo librsb_nounroll_la-rsb_util.lo librsb_nounroll_la-rsb_spgemm_csr.lo librsb_nounroll_la-rsb_spsum_misc.lo librsb_nounroll_la-rsb_prec.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info/-version-number' is ignored for convenience libraries libtool: link: ar cr .libs/librsb_nounroll.a .libs/librsb_nounroll_la-rsb_stropts.o .libs/librsb_nounroll_la-rsb_strmif.o .libs/librsb_nounroll_la-rsb_unroll.o .libs/librsb_nounroll_la-rsb_krnl_vb.o .libs/librsb_nounroll_la-rsb_krnl_lb.o .libs/librsb_nounroll_la-rsb_krnl.o .libs/librsb_nounroll_la-rsb_bench.o .libs/librsb_nounroll_la-rsb_mergesort.o .libs/librsb_nounroll_la-rsb_permute.o .libs/librsb_nounroll_la-rsb_krnl_bcss_l.o .libs/librsb_nounroll_la-rsb_krnl_bcss_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss_spsv_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss_spmv_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss_misc_u.o .libs/librsb_nounroll_la-rsb_krnl_bcoo_spmv_u.o .libs/librsb_nounroll_la-rsb_krnl_bcss.o .libs/librsb_nounroll_la-rsb_spmv.o .libs/librsb_nounroll_la-rsb_merge.o .libs/librsb_nounroll_la-rsb_ompio.o .libs/librsb_nounroll_la-rsb_util.o .libs/librsb_nounroll_la-rsb_spgemm_csr.o .libs/librsb_nounroll_la-rsb_spsum_misc.o .libs/librsb_nounroll_la-rsb_prec.o libtool: link: ranlib .libs/librsb_nounroll.a libtool: link: ( cd ".libs" && rm -f "librsb_nounroll.la" && ln -s "../librsb_nounroll.la" "librsb_nounroll.la" ) -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_is.lo `test -f 'rsb_is.c' || echo './'`rsb_is.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_is.lo `test -f 'rsb_is.c' || echo './'`rsb_is.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_is.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_is.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_is.c -o librsb_base_la-rsb_is.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mio.lo `test -f 'rsb_mio.c' || echo './'`rsb_mio.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mio.lo `test -f 'rsb_mio.c' || echo './'`rsb_mio.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mio.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_mio.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mio.c -o librsb_base_la-rsb_mio.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_op.lo `test -f 'rsb_op.c' || echo './'`rsb_op.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_op.lo `test -f 'rsb_op.c' || echo './'`rsb_op.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_op.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_op.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_op.c -o librsb_base_la-rsb_op.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_bio.lo `test -f 'rsb_bio.c' || echo './'`rsb_bio.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_bio.lo `test -f 'rsb_bio.c' || echo './'`rsb_bio.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_bio.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_bio.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_bio.c -o librsb_base_la-rsb_bio.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_get.lo `test -f 'rsb_get.c' || echo './'`rsb_get.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_get.lo `test -f 'rsb_get.c' || echo './'`rsb_get.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_get.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_get.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_get.c -o librsb_base_la-rsb_get.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_set.lo `test -f 'rsb_set.c' || echo './'`rsb_set.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_set.lo `test -f 'rsb_set.c' || echo './'`rsb_set.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_set.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_set.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_set.c -o librsb_base_la-rsb_set.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo.lo `test -f 'rsb_coo.c' || echo './'`rsb_coo.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo.lo `test -f 'rsb_coo.c' || echo './'`rsb_coo.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo.c -o librsb_base_la-rsb_coo.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_csr.lo `test -f 'rsb_csr.c' || echo './'`rsb_csr.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_csr.lo `test -f 'rsb_csr.c' || echo './'`rsb_csr.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_csr.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_csr.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_csr.c -o librsb_base_la-rsb_csr.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo_check.lo `test -f 'rsb_coo_check.c' || echo './'`rsb_coo_check.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo_check.lo `test -f 'rsb_coo_check.c' || echo './'`rsb_coo_check.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo_check.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo_check.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo_check.c -o librsb_base_la-rsb_coo_check.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo_symm.lo `test -f 'rsb_coo_symm.c' || echo './'`rsb_coo_symm.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo_symm.lo `test -f 'rsb_coo_symm.c' || echo './'`rsb_coo_symm.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo_symm.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo_symm.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo_symm.c -o librsb_base_la-rsb_coo_symm.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_idx.lo `test -f 'rsb_idx.c' || echo './'`rsb_idx.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_idx.lo `test -f 'rsb_idx.c' || echo './'`rsb_idx.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_idx.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_idx.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_idx.c -o librsb_base_la-rsb_idx.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_srt.lo `test -f 'rsb_srt.c' || echo './'`rsb_srt.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_srt.lo `test -f 'rsb_srt.c' || echo './'`rsb_srt.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srt.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_srt.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srt.c -o librsb_base_la-rsb_srt.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_srtp.lo `test -f 'rsb_srtp.c' || echo './'`rsb_srtp.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_srtp.lo `test -f 'rsb_srtp.c' || echo './'`rsb_srtp.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srtp.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_srtp.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_srtp.c -o librsb_base_la-rsb_srtp.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_src.lo `test -f 'rsb_src.c' || echo './'`rsb_src.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_src.lo `test -f 'rsb_src.c' || echo './'`rsb_src.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_src.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_src.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_src.c -o librsb_base_la-rsb_src.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_test_accuracy.lo `test -f 'rsb_test_accuracy.c' || echo './'`rsb_test_accuracy.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_test_accuracy.lo `test -f 'rsb_test_accuracy.c' || echo './'`rsb_test_accuracy.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_test_accuracy.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_test_accuracy.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_test_accuracy.c -o librsb_base_la-rsb_test_accuracy.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_clone.lo `test -f 'rsb_clone.c' || echo './'`rsb_clone.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_clone.lo `test -f 'rsb_clone.c' || echo './'`rsb_clone.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_clone.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_clone.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_clone.c -o librsb_base_la-rsb_clone.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec.lo `test -f 'rsb_rec.c' || echo './'`rsb_rec.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec.lo `test -f 'rsb_rec.c' || echo './'`rsb_rec.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rec.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec.c -o librsb_base_la-rsb_rec.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_render.lo `test -f 'rsb_render.c' || echo './'`rsb_render.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_render.lo `test -f 'rsb_render.c' || echo './'`rsb_render.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_render.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_render.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_render.c -o librsb_base_la-rsb_render.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_eps.lo `test -f 'rsb_eps.c' || echo './'`rsb_eps.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_eps.lo `test -f 'rsb_eps.c' || echo './'`rsb_eps.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_eps.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_eps.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_eps.c -o librsb_base_la-rsb_eps.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_msort_up.lo `test -f 'rsb_msort_up.c' || echo './'`rsb_msort_up.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_msort_up.lo `test -f 'rsb_msort_up.c' || echo './'`rsb_msort_up.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_msort_up.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_msort_up.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_msort_up.c -o librsb_base_la-rsb_msort_up.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_sys.lo `test -f 'rsb_sys.c' || echo './'`rsb_sys.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_sys.lo `test -f 'rsb_sys.c' || echo './'`rsb_sys.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_sys.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_sys.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_sys.c -o librsb_base_la-rsb_sys.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_blas_stuff.lo `test -f 'rsb_blas_stuff.c' || echo './'`rsb_blas_stuff.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_blas_stuff.lo `test -f 'rsb_blas_stuff.c' || echo './'`rsb_blas_stuff.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_blas_stuff.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_blas_stuff.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_blas_stuff.c -o librsb_base_la-rsb_blas_stuff.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_gen.lo `test -f 'rsb_gen.c' || echo './'`rsb_gen.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_gen.lo `test -f 'rsb_gen.c' || echo './'`rsb_gen.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_gen.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_gen.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_gen.c -o librsb_base_la-rsb_gen.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_perf.lo `test -f 'rsb_perf.c' || echo './'`rsb_perf.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_perf.lo `test -f 'rsb_perf.c' || echo './'`rsb_perf.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_perf.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_perf.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_perf.c -o librsb_base_la-rsb_perf.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rsb.lo `test -f 'rsb_rsb.c' || echo './'`rsb_rsb.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rsb.lo `test -f 'rsb_rsb.c' || echo './'`rsb_rsb.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rsb.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rsb.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rsb.c -o librsb_base_la-rsb_rsb.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_err.lo `test -f 'rsb_err.c' || echo './'`rsb_err.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_err.lo `test -f 'rsb_err.c' || echo './'`rsb_err.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_err.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_err.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_err.c -o librsb_base_la-rsb_err.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_tune.lo `test -f 'rsb_tune.c' || echo './'`rsb_tune.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_tune.lo `test -f 'rsb_tune.c' || echo './'`rsb_tune.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_tune.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_tune.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_tune.c -o librsb_base_la-rsb_tune.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_do.lo `test -f 'rsb_do.c' || echo './'`rsb_do.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_do.lo `test -f 'rsb_do.c' || echo './'`rsb_do.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_do.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_do.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_do.c -o librsb_base_la-rsb_do.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_internals.lo `test -f 'rsb_internals.c' || echo './'`rsb_internals.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_internals.lo `test -f 'rsb_internals.c' || echo './'`rsb_internals.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_internals.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_internals.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_internals.c -o librsb_base_la-rsb_internals.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_garbage.lo `test -f 'rsb_garbage.c' || echo './'`rsb_garbage.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_garbage.lo `test -f 'rsb_garbage.c' || echo './'`rsb_garbage.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_garbage.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_garbage.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_garbage.c -o librsb_base_la-rsb_garbage.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mmio.lo `test -f 'rsb_mmio.c' || echo './'`rsb_mmio.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mmio.lo `test -f 'rsb_mmio.c' || echo './'`rsb_mmio.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mmio.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_mmio.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mmio.c -o librsb_base_la-rsb_mmio.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_partition.lo `test -f 'rsb_partition.c' || echo './'`rsb_partition.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_partition.lo `test -f 'rsb_partition.c' || echo './'`rsb_partition.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_partition.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_partition.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_partition.c -o librsb_base_la-rsb_partition.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mbw.lo `test -f 'rsb_mbw.c' || echo './'`rsb_mbw.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_mbw.lo `test -f 'rsb_mbw.c' || echo './'`rsb_mbw.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mbw.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_mbw.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_mbw.c -o librsb_base_la-rsb_mbw.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_limiter.lo `test -f 'rsb_limiter.c' || echo './'`rsb_limiter.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_limiter.lo `test -f 'rsb_limiter.c' || echo './'`rsb_limiter.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_limiter.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_limiter.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_limiter.c -o librsb_base_la-rsb_limiter.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_fpb.lo `test -f 'rsb_fpb.c' || echo './'`rsb_fpb.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_fpb.lo `test -f 'rsb_fpb.c' || echo './'`rsb_fpb.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_fpb.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_fpb.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_fpb.c -o librsb_base_la-rsb_fpb.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spgemm.lo `test -f 'rsb_spgemm.c' || echo './'`rsb_spgemm.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spgemm.lo `test -f 'rsb_spgemm.c' || echo './'`rsb_spgemm.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spgemm.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spgemm.c -o librsb_base_la-rsb_spgemm.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spsum.lo `test -f 'rsb_spsum.c' || echo './'`rsb_spsum.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spsum.lo `test -f 'rsb_spsum.c' || echo './'`rsb_spsum.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spsum.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsum.c -o librsb_base_la-rsb_spsum.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spsv.lo `test -f 'rsb_spsv.c' || echo './'`rsb_spsv.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_spsv.lo `test -f 'rsb_spsv.c' || echo './'`rsb_spsv.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsv.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_spsv.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_spsv.c -o librsb_base_la-rsb_spsv.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_lock.lo `test -f 'rsb_lock.c' || echo './'`rsb_lock.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_lock.lo `test -f 'rsb_lock.c' || echo './'`rsb_lock.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_lock.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_lock.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_lock.c -o librsb_base_la-rsb_lock.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_swt.lo `test -f 'rsb_swt.c' || echo './'`rsb_swt.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_swt.lo `test -f 'rsb_swt.c' || echo './'`rsb_swt.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_swt.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_swt.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_swt.c -o librsb_base_la-rsb_swt.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_init.lo `test -f 'rsb_init.c' || echo './'`rsb_init.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_init.lo `test -f 'rsb_init.c' || echo './'`rsb_init.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_init.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_init.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_init.c -o librsb_base_la-rsb_init.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_dump.lo `test -f 'rsb_dump.c' || echo './'`rsb_dump.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_dump.lo `test -f 'rsb_dump.c' || echo './'`rsb_dump.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_dump.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_dump.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_dump.c -o librsb_base_la-rsb_dump.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_cpmv.lo `test -f 'rsb_cpmv.c' || echo './'`rsb_cpmv.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_cpmv.lo `test -f 'rsb_cpmv.c' || echo './'`rsb_cpmv.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_cpmv.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_cpmv.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_cpmv.c -o librsb_base_la-rsb_cpmv.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_asm.lo `test -f 'rsb_asm.c' || echo './'`rsb_asm.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_asm.lo `test -f 'rsb_asm.c' || echo './'`rsb_asm.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_asm.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_asm.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_asm.c -o librsb_base_la-rsb_asm.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_user.lo `test -f 'rsb_user.c' || echo './'`rsb_user.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_user.lo `test -f 'rsb_user.c' || echo './'`rsb_user.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_user.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_user.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_user.c -o librsb_base_la-rsb_user.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo2rec.lo `test -f 'rsb_coo2rec.c' || echo './'`rsb_coo2rec.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_coo2rec.lo `test -f 'rsb_coo2rec.c' || echo './'`rsb_coo2rec.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo2rec.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_coo2rec.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_coo2rec.c -o librsb_base_la-rsb_coo2rec.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec2coo.lo `test -f 'rsb_rec2coo.c' || echo './'`rsb_rec2coo.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec2coo.lo `test -f 'rsb_rec2coo.c' || echo './'`rsb_rec2coo.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec2coo.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rec2coo.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec2coo.c -o librsb_base_la-rsb_rec2coo.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec2csr.lo `test -f 'rsb_rec2csr.c' || echo './'`rsb_rec2csr.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_rec2csr.lo `test -f 'rsb_rec2csr.c' || echo './'`rsb_rec2csr.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec2csr.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_rec2csr.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_rec2csr.c -o librsb_base_la-rsb_rec2csr.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_csr2coo.lo `test -f 'rsb_csr2coo.c' || echo './'`rsb_csr2coo.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o librsb_base_la-rsb_csr2coo.lo `test -f 'rsb_csr2coo.c' || echo './'`rsb_csr2coo.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_csr2coo.c -fPIC -DPIC -o .libs/librsb_base_la-rsb_csr2coo.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_csr2coo.c -o librsb_base_la-rsb_csr2coo.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c -o rsb_blas_sparse.lo rsb_blas_sparse.F90 +/bin/sh ./libtool --tag=FC --mode=compile gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c -o rsb_blas_sparse.lo rsb_blas_sparse.F90 libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c rsb_blas_sparse.F90 -fPIC -o .libs/rsb_blas_sparse.o libtool: compile: gfortran -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c rsb_blas_sparse.F90 -o rsb_blas_sparse.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_base.la librsb_base_la-rsb_is.lo librsb_base_la-rsb_mio.lo librsb_base_la-rsb_op.lo librsb_base_la-rsb_bio.lo librsb_base_la-rsb_get.lo librsb_base_la-rsb_set.lo librsb_base_la-rsb_coo.lo librsb_base_la-rsb_csr.lo librsb_base_la-rsb_coo_check.lo librsb_base_la-rsb_coo_symm.lo librsb_base_la-rsb_idx.lo librsb_base_la-rsb_srt.lo librsb_base_la-rsb_srtp.lo librsb_base_la-rsb_src.lo librsb_base_la-rsb_test_accuracy.lo librsb_base_la-rsb_clone.lo librsb_base_la-rsb_rec.lo librsb_base_la-rsb_render.lo librsb_base_la-rsb_eps.lo librsb_base_la-rsb_msort_up.lo librsb_base_la-rsb_sys.lo librsb_base_la-rsb_blas_stuff.lo librsb_base_la-rsb_gen.lo librsb_base_la-rsb_perf.lo librsb_base_la-rsb_rsb.lo librsb_base_la-rsb_err.lo librsb_base_la-rsb_tune.lo librsb_base_la-rsb_do.lo librsb_base_la-rsb_internals.lo librsb_base_la-rsb_garbage.lo librsb_base_la-rsb_mmio.lo librsb_base_la-rsb_partition.lo librsb_base_la-rsb_mbw.lo librsb_base_la-rsb_limiter.lo librsb_base_la-rsb_fpb.lo librsb_base_la-rsb_spgemm.lo librsb_base_la-rsb_spsum.lo librsb_base_la-rsb_spsv.lo librsb_base_la-rsb_lock.lo librsb_base_la-rsb_swt.lo librsb_base_la-rsb_init.lo librsb_base_la-rsb_dump.lo librsb_base_la-rsb_cpmv.lo librsb_base_la-rsb_asm.lo librsb_base_la-rsb_user.lo librsb_base_la-rsb_coo2rec.lo librsb_base_la-rsb_rec2coo.lo librsb_base_la-rsb_rec2csr.lo librsb_base_la-rsb_csr2coo.lo rsb_blas_sparse.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_base.la librsb_base_la-rsb_is.lo librsb_base_la-rsb_mio.lo librsb_base_la-rsb_op.lo librsb_base_la-rsb_bio.lo librsb_base_la-rsb_get.lo librsb_base_la-rsb_set.lo librsb_base_la-rsb_coo.lo librsb_base_la-rsb_csr.lo librsb_base_la-rsb_coo_check.lo librsb_base_la-rsb_coo_symm.lo librsb_base_la-rsb_idx.lo librsb_base_la-rsb_srt.lo librsb_base_la-rsb_srtp.lo librsb_base_la-rsb_src.lo librsb_base_la-rsb_test_accuracy.lo librsb_base_la-rsb_clone.lo librsb_base_la-rsb_rec.lo librsb_base_la-rsb_render.lo librsb_base_la-rsb_eps.lo librsb_base_la-rsb_msort_up.lo librsb_base_la-rsb_sys.lo librsb_base_la-rsb_blas_stuff.lo librsb_base_la-rsb_gen.lo librsb_base_la-rsb_perf.lo librsb_base_la-rsb_rsb.lo librsb_base_la-rsb_err.lo librsb_base_la-rsb_tune.lo librsb_base_la-rsb_do.lo librsb_base_la-rsb_internals.lo librsb_base_la-rsb_garbage.lo librsb_base_la-rsb_mmio.lo librsb_base_la-rsb_partition.lo librsb_base_la-rsb_mbw.lo librsb_base_la-rsb_limiter.lo librsb_base_la-rsb_fpb.lo librsb_base_la-rsb_spgemm.lo librsb_base_la-rsb_spsum.lo librsb_base_la-rsb_spsv.lo librsb_base_la-rsb_lock.lo librsb_base_la-rsb_swt.lo librsb_base_la-rsb_init.lo librsb_base_la-rsb_dump.lo librsb_base_la-rsb_cpmv.lo librsb_base_la-rsb_asm.lo librsb_base_la-rsb_user.lo librsb_base_la-rsb_coo2rec.lo librsb_base_la-rsb_rec2coo.lo librsb_base_la-rsb_rec2csr.lo librsb_base_la-rsb_csr2coo.lo rsb_blas_sparse.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info/-version-number' is ignored for convenience libraries libtool: link: ar cr .libs/librsb_base.a .libs/librsb_base_la-rsb_is.o .libs/librsb_base_la-rsb_mio.o .libs/librsb_base_la-rsb_op.o .libs/librsb_base_la-rsb_bio.o .libs/librsb_base_la-rsb_get.o .libs/librsb_base_la-rsb_set.o .libs/librsb_base_la-rsb_coo.o .libs/librsb_base_la-rsb_csr.o .libs/librsb_base_la-rsb_coo_check.o .libs/librsb_base_la-rsb_coo_symm.o .libs/librsb_base_la-rsb_idx.o .libs/librsb_base_la-rsb_srt.o .libs/librsb_base_la-rsb_srtp.o .libs/librsb_base_la-rsb_src.o .libs/librsb_base_la-rsb_test_accuracy.o .libs/librsb_base_la-rsb_clone.o .libs/librsb_base_la-rsb_rec.o .libs/librsb_base_la-rsb_render.o .libs/librsb_base_la-rsb_eps.o .libs/librsb_base_la-rsb_msort_up.o .libs/librsb_base_la-rsb_sys.o .libs/librsb_base_la-rsb_blas_stuff.o .libs/librsb_base_la-rsb_gen.o .libs/librsb_base_la-rsb_perf.o .libs/librsb_base_la-rsb_rsb.o .libs/librsb_base_la-rsb_err.o .libs/librsb_base_la-rsb_tune.o .libs/librsb_base_la-rsb_do.o .libs/librsb_base_la-rsb_internals.o .libs/librsb_base_la-rsb_garbage.o .libs/librsb_base_la-rsb_mmio.o .libs/librsb_base_la-rsb_partition.o .libs/librsb_base_la-rsb_mbw.o .libs/librsb_base_la-rsb_limiter.o .libs/librsb_base_la-rsb_fpb.o .libs/librsb_base_la-rsb_spgemm.o .libs/librsb_base_la-rsb_spsum.o .libs/librsb_base_la-rsb_spsv.o .libs/librsb_base_la-rsb_lock.o .libs/librsb_base_la-rsb_swt.o .libs/librsb_base_la-rsb_init.o .libs/librsb_base_la-rsb_dump.o .libs/librsb_base_la-rsb_cpmv.o .libs/librsb_base_la-rsb_asm.o .libs/librsb_base_la-rsb_user.o .libs/librsb_base_la-rsb_coo2rec.o .libs/librsb_base_la-rsb_rec2coo.o .libs/librsb_base_la-rsb_rec2csr.o .libs/librsb_base_la-rsb_csr2coo.o .libs/rsb_blas_sparse.o libtool: link: ranlib .libs/librsb_base.a libtool: link: ( cd ".libs" && rm -f "librsb_base.la" && ln -s "../librsb_base.la" "librsb_base.la" ) -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o rsb_libspblas_handle.lo rsb_libspblas_handle.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o rsb_libspblas_handle.lo rsb_libspblas_handle.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas_handle.c -fPIC -DPIC -o .libs/rsb_libspblas_handle.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas_handle.c -o rsb_libspblas_handle.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o rsb_libspblas.lo rsb_libspblas.c +/bin/sh ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o rsb_libspblas.lo rsb_libspblas.c libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas.c -fPIC -DPIC -o .libs/rsb_libspblas.o libtool: compile: gcc -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c rsb_libspblas.c -o rsb_libspblas.o >/dev/null 2>&1 -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_spblas.la rsb_libspblas_handle.lo rsb_libspblas.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb_spblas.la rsb_libspblas_handle.lo rsb_libspblas.lo -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info/-version-number' is ignored for convenience libraries libtool: link: ar cr .libs/librsb_spblas.a .libs/rsb_libspblas_handle.o .libs/rsb_libspblas.o libtool: link: ranlib .libs/librsb_spblas.a libtool: link: ( cd ".libs" && rm -f "librsb_spblas.la" && ln -s "../librsb_spblas.la" "librsb_spblas.la" ) -/bin/bash ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb.la -rpath /usr/lib/i386-linux-gnu rsb.lo librsb_nounroll.la librsb_base.la librsb_spblas.la /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/librsbpp.la -lstdc++ -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CC --mode=link gcc -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -no-undefined -version-info 0:0:0 -Wl,-z,relro -o librsb.la -rpath /usr/lib/i386-linux-gnu rsb.lo librsb_nounroll.la librsb_base.la librsb_spblas.la /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/librsbpp.la -lstdc++ -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: link: gcc -shared -fPIC -DPIC .libs/rsb.o -Wl,--whole-archive ./.libs/librsb_nounroll.a ./.libs/librsb_base.a ./.libs/librsb_spblas.a /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/.libs/librsbpp.a -Wl,--no-whole-archive -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lpthread -lstdc++fs -lstdc++ -lhwloc -lz -lgfortran -lm -lquadmath -g -O2 -fstack-protector-strong -O3 -Wl,-z -Wl,relro -fopenmp -fopenmp -Wl,-soname -Wl,librsb.so.0 -o .libs/librsb.so.0.0.0 libtool: link: (cd ".libs" && rm -f "librsb.so.0" && ln -s "librsb.so.0.0.0" "librsb.so.0") libtool: link: (cd ".libs" && rm -f "librsb.so" && ln -s "librsb.so.0.0.0" "librsb.so") @@ -2321,44 +2357,44 @@ libtool: link: ranlib .libs/librsb.a libtool: link: rm -fr .libs/librsb.lax libtool: link: ( cd ".libs" && rm -f "librsb.la" && ln -s "../librsb.la" "librsb.la" ) -/bin/bash ./libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -no-undefined -version-info 0:0:0 -Wl,-z,relro -o rsbench rsbench-rsbench.o rsbench-rsb_test_matops.o rsbench-rsb_mkl.o rsbench-rsb_genmm.o rsbench-rsb_mmls.o rsbench-rsb_pr.o rsbench-rsb_pcnt.o rsbench-rsb_failure_tests.o rsbench-rsb_libspblas_tests.o rsb_dummy.o librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ./libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -no-undefined -version-info 0:0:0 -Wl,-z,relro -o rsbench rsbench-rsbench.o rsbench-rsb_test_matops.o rsbench-rsb_mkl.o rsbench-rsb_genmm.o rsbench-rsb_mmls.o rsbench-rsb_pr.o rsbench-rsb_pcnt.o rsbench-rsb_failure_tests.o rsbench-rsb_libspblas_tests.o rsb_dummy.o librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: warning: '-version-info' is ignored for programs libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/rsbench rsbench-rsbench.o rsbench-rsb_test_matops.o rsbench-rsb_mkl.o rsbench-rsb_genmm.o rsbench-rsb_mmls.o rsbench-rsb_pr.o rsbench-rsb_pcnt.o rsbench-rsb_failure_tests.o rsbench-rsb_libspblas_tests.o rsb_dummy.o -fopenmp ./.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' Making all in examples gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/examples' gcc -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o hello.o hello.c -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o hello hello.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o hello hello.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/hello hello.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp gcc -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o snippets.o snippets.c -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o snippets snippets.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o snippets snippets.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/snippets snippets.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp gcc -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o transpose.o transpose.c -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o transpose transpose.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o transpose transpose.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/transpose transpose.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp gcc -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o power.o power.c -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o power power.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o power power.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/power power.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp gcc -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o autotune.o autotune.c -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o autotune autotune.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o autotune autotune.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/autotune autotune.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp gcc -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o backsolve.o backsolve.c -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o backsolve backsolve.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o backsolve backsolve.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/backsolve backsolve.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp gcc -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o hello-spblas.o hello-spblas.c -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o hello-spblas hello-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o hello-spblas hello-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/hello-spblas hello-spblas.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp gcc -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -c -o io-spblas.o io-spblas.c -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o io-spblas io-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o io-spblas io-spblas.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/io-spblas io-spblas.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp gfortran -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c -o fortran.o fortran.F90 -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o fortran fortran.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o fortran fortran.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/fortran fortran.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp gfortran -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -I.. -I.. -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -fopenmp -c -o fortran_rsb_fi.o fortran_rsb_fi.F90 -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o fortran_rsb_fi fortran_rsb_fi.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o fortran_rsb_fi fortran_rsb_fi.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/fortran_rsb_fi fortran_rsb_fi.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp g++ -DHAVE_CONFIG_H -I. -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp -I/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -c -o cplusplus.o cplusplus.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o cplusplus cplusplus.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath +/bin/sh ../libtool --tag=CXX --mode=link g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z,relro -o cplusplus cplusplus.o -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lhwloc -lm -lhwloc -lz -fopenmp -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath libtool: link: g++ -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp -Wl,-z -Wl,relro -o .libs/cplusplus cplusplus.o -fopenmp -L.. /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lhwloc -lz -L/usr/lib/gcc/i686-linux-gnu/12 -L/usr/lib/gcc/i686-linux-gnu/12/../../../i386-linux-gnu -L/usr/lib/gcc/i686-linux-gnu/12/../../../../lib -L/lib/i386-linux-gnu -L/lib/../lib -L/usr/lib/i386-linux-gnu -L/usr/lib/../lib -L/usr/lib/gcc/i686-linux-gnu/12/../../.. -lgfortran -lm -lquadmath -fopenmp gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/examples' Making all in scripts @@ -2376,10 +2412,10 @@ Making all in doc gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/doc' /bin/mkdir -p man -SOURCE_DATE_EPOCH=1735693261 \ +SOURCE_DATE_EPOCH=1704070861 \ help2man --name="benchmark and test for librsb" --no-info ../rsbench | /bin/sed 's/January //g' > man/rsbench.1 /bin/mkdir -p man -SOURCE_DATE_EPOCH=1735693261 \ +SOURCE_DATE_EPOCH=1704070861 \ help2man --name="provide configuration information for librsb" --no-info /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb-config | /bin/sed 's/January //g' > man/librsb-config.1 gmake makedox gmake[5]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/doc' @@ -3020,49 +3056,49 @@ Making all in . gmake[6]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib' g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o rsb.o rsb.cpp -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rsb rsb.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rsb rsb.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rsb rsb.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o rgt-rgt.o `test -f 'rgt.cpp' || echo './'`rgt.cpp -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt rgt-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt rgt-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rgt rgt-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o rgt_ne-rgt.o `test -f 'rgt.cpp' || echo './'`rgt.cpp -/bin/bash ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt_ne rgt_ne-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt_ne rgt_ne-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -DRSBP_NOTHROW=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rgt_ne rgt_ne-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp g++ -DHAVE_CONFIG_H -I. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o rgt_rv-rgt.o `test -f 'rgt.cpp' || echo './'`rgt.cpp -/bin/bash ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt_rv rgt_rv-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ./libtool --tag=CXX --mode=link g++ -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rgt_rv rgt_rv-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -DRSBP_NOTHROW=1 -DRSBP_WANT_REV=1 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rgt_rv rgt_rv-rgt.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp gmake[6]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib' Making all in examples gmake[6]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib/examples' g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o assemble.o assemble.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o assemble assemble.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o assemble assemble.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/assemble assemble.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o autotune.o autotune.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o autotune autotune.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o autotune autotune.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/autotune autotune.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o bench.o bench.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o bench bench.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o bench bench.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/bench bench.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o build.o build.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o build build.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o build build.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/build build.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o misc.o misc.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o misc misc.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o misc misc.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/misc misc.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o twonnz.o twonnz.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o twonnz twonnz.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o twonnz twonnz.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/twonnz twonnz.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o example.o example.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o example example.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o example example.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/example example.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o mtx2bin.o mtx2bin.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o mtx2bin mtx2bin.o -lstdc++fs -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o mtx2bin mtx2bin.o -lstdc++fs -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/mtx2bin mtx2bin.o -lstdc++fs -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o render.o render.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o render render.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o render render.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/render render.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp g++ -DHAVE_CONFIG_H -I.. -Wdate-time -D_FORTIFY_SOURCE=2 -I/build/reproducible-path/librsb-1.3.0.2+dfsg -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o span.o span.cpp -/bin/bash ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o span span.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la +/bin/sh ../libtool --tag=CXX --mode=link g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o span span.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la libtool: link: g++ -I.. -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/span span.o -lpthread /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -fopenmp gmake[6]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib/examples' gmake[5]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsblib' @@ -3072,7 +3108,7 @@ gmake all-am gmake[5]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest' g++ -DHAVE_CONFIG_H -I. -I/build/reproducible-path/librsb-1.3.0.2+dfsg -Wdate-time -D_FORTIFY_SOURCE=2 -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -c -o rsbtest.o rsbtest.cpp -/bin/bash ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rsbtest rsbtest.o /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lstdc++fs -lstdc++fs +/bin/sh ./libtool --tag=CXX --mode=link g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z,relro -o rsbtest rsbtest.o /build/reproducible-path/librsb-1.3.0.2+dfsg/librsb.la -lstdc++fs -lstdc++fs libtool: link: g++ -fopenmp -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -Wl,-z -Wl,relro -o .libs/rsbtest rsbtest.o /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/librsb.so -lstdc++fs -fopenmp gmake[5]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest' gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest' @@ -3148,7 +3184,7 @@ gmake[3]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' gmake mtests -C . gmake[3]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' -srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/bash -ex ./scripts/readme-tests.sh +srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/sh -ex ./scripts/readme-tests.sh + test x/build/reproducible-path/librsb-1.3.0.2+dfsg = x + ./rsbench -oa -Ob --bench -f /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx -qH -R -n1,4 -T z --verbose --nrhs 1,2 --by-rows # --bench option implies -qH -R --write-performance-record --want-mkl-autotune --mkl-benchmark --types : --split-experimental 6 --merge-experimental 6 --also-transpose --sort-filenames-list --want-memory-benchmark @@ -3158,15 +3194,15 @@ Adding matrix file: /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx # Sorting matrices list (use --no-sort-filenames-list to prevent this) # Using matrices: A.mtx -# beginning run at 1740440648 +# beginning run at 1706032571 # /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench -oa -Ob --bench -f /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx -qH -R -n1,4 -T z --verbose --nrhs 1,2 --by-rows # compiled with: CC=gcc CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -# average timer granularity: 5.15e-08 s -# Will write a final performance record to file rsbench_pr__1740440648_gcc-12.2-1,4th.rpr and periodic checkpoints to rsbench_pr__1740440648_gcc-12.2-1,4th.rpr.tmp +# average timer granularity: 7.48e-07 s +# Will write a final performance record to file rsbench_pr__1706032571_gcc-12.2-1,4th.rpr and periodic checkpoints to rsbench_pr__1706032571_gcc-12.2-1,4th.rpr.tmp # will NOT perform ancillary tests. # will flush cache memory: between each operation measurement series, and NOT between each operation. # will keep any zero encountered in the matrix. -# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games +# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path # env: export LD_LIBRARY_PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg/.libs:/usr/lib/libeatmydata # env: HOSTNAME is not set # env: KMP_AFFINITY is not set @@ -3205,61 +3241,61 @@ # env: SLURM_NTASKS is not set # env: SLURM_STEP_TASKS_PER_NODE is not set # env: SLURM_TASKS_PER_NODE is not set -# detected hostname: ionos6-i386 +# detected hostname: i-capture-the-hostname # user specified a verbosity level of 1 (each --verbose occurrence counts +1) # This test will measure times in scanning arrays sized and aligned to fit in caches. # 2 cache levels detected Will fill struct with 40 samples... -# Memory benchmark took 6.921s +# Memory benchmark took 5.398s # auto-tuning oriented output implies times==0 iterations and sort-after-load. #pr: allocated a performance record for 8 samples (2016 bytes). -# Cache block size total 4194304 bytes, per-thread 233016 bytes -# so far, program took 6.923s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . +# Cache block size total 524288 bytes, per-thread 52428 bytes +# so far, program took 5.428s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . # Using 1 threads # reading A.mtx (184 bytes / 1 MiB / 6 nnz / 3 rows / 3 columns / 1 MiB COO) as type Z... -# file input of A.mtx took 0.00 s (6 nnz, 46864 nnz/s ) (1.44 MB/s ) -#pre-sorting (6 elements) took 0.00125813 s -#weeding duplicates (to 6 elements) took 1.90735e-05 s (and check, 2.14577e-06 s ) +# file input of A.mtx took 0.00 s (6 nnz, 43767 nnz/s ) (1.34 MB/s ) +#pre-sorting (6 elements) took 0.0295911 s +#weeding duplicates (to 6 elements) took 3.09944e-06 s (and check, 1.90735e-06 s ) # multi-nrhs benchmarking (1,2) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads -# Constructed matrix (took 0.000s): (3 x 3)[0x583c3ff0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.136s): (3 x 3)[0x5173a0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (1 th.) took 8.512e-05s; avg 2.837e-05s ( +/- 71.43/142.86 %); best 8.106e-06s; worst 6.89e-05s; std dev. 2.866e-05 (taking best). -Reference operation time is 8.10623e-06 s (11.84 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 8.106e-06 Mflops: 11.843) -Merge (3 -> 1 leaves) took w.c.t. of 1.884e-05s, ~5.96e-06s of computing time (of which 9.537e-07s sorting, 1.907e-06s analysis) -3 iterations (1 th.) took 3.004e-05s; avg 1.001e-05s ( +/- 99.48/190.48 %); best 5.22e-08s; worst 2.909e-05s; std dev. 1.349e-05 (taking best). -Reference operation time is 5.22017e-08 s (1839 Mflops) with 1 threads. -After merge step 1: tpop: 5.22e-08 s ~Mflops: 1839.019 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 155.287x: 8.106e-06s -> 5.22e-08s, so taking this instance. +3 iterations (1 th.) took 0.068s; avg 0.02267s ( +/- 11.84/ 23.62 %); best 0.01998s; worst 0.02802s; std dev. 0.003786 (taking best). +Reference operation time is 0.0199811 s (0.004805 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01998 Mflops: 0.005) +Merge (3 -> 1 leaves) took w.c.t. of 2.718e-05s, ~1.001e-05s of computing time (of which 2.146e-06s sorting, 5.96e-06s analysis) +3 iterations (1 th.) took 3.6e-05s; avg 1.2e-05s ( +/- 92.05/174.17 %); best 9.537e-07s; worst 3.29e-05s; std dev. 1.479e-05 (taking best). +Reference operation time is 9.53674e-07 s (100.7 Mflops) with 1 threads. +After merge step 1: tpop: 9.537e-07 s ~Mflops: 100.663 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 20951.750x: 0.01998s -> 9.537e-07s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 9.298e-05s (of which 2.599e-05s partitioning, 0s I/O); computing times: 5.96e-06s in par. loops, 9.537e-07s sorting, 1.907e-06s analyzing) -Total merge + benchmarking process took 9.298e-05s, equivalent to 1781.2/11.5 new/old ops (4.792e-05s for 2 clones -- as 918.0/5.9 ops, or 459.0/3.0 ops per clone), SPEEDUP of 155.287x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 155.287x (8.106e-06s -> 5.22e-08s), will amortize in 11.5 ops by saving 8.054e-06s per op. -In 1 tuning rounds (tot. 0.00025s, 4.8e-05s for constructor, 2 clones) obtained a SPEEDUP of 15428.7% (155.3x) (from 11.84 to 1839 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.05998s (of which 3.409e-05s partitioning, 0s I/O); computing times: 1.001e-05s in par. loops, 2.146e-06s sorting, 5.96e-06s analyzing) +Total merge + benchmarking process took 0.05998s, equivalent to 62892.5/3.0 new/old ops (0.1358s for 2 clones -- as 142404.0/6.8 ops, or 71202.0/3.4 ops per clone), SPEEDUP of 20951.750x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 20951.750x (0.01998s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.01998s per op. +In 1 tuning rounds (tot. 0.2s, 0.14s for constructor, 2 clones) obtained a SPEEDUP of 2095075.0% (2.095e+04x) (from 0.004805 to 100.7 Mflops). #pr: updating sample at index 1 (0^th of 8), 0^th touch for (0,0,0,0,0,0,0). -First run of RSB Autotuner took 0.000288963 s (8.106e-06 s -> 5.220e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.203987 s (1.998e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.000756979 s and estimated a speedup of 1.000000 x (5.220e-08 s -> 5.220e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.799956 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000001 0.000080 0.000018 0.000098 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.000099 -%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.000080 -%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.000018 +%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000001 0.039786 0.047965 0.087751 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.087752 +%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.039786 +%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.047965 %:ROW_MAJOR_SORT_TIME:A.mtx S N 1 3 3 6 0.000001 %:ROW_MAJOR_SORT_SCALING:A.mtx S N 1 3 3 6 1.000 -%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.000098 +%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.087751 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 1 3 3 6 0.000 %:UNSORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 %:SORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 @@ -3274,47 +3310,47 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 1 3 3 6 6 6 6 # # Using 4 threads -# Constructed matrix (took 0.001s): (3 x 3)[0x583c7cf0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.132s): (3 x 3)[0x51b1d0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.1 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (4 th.) took 8.321e-05s; avg 2.774e-05s ( +/- 60.46/ 98.57 %); best 1.097e-05s; worst 5.507e-05s; std dev. 1.95e-05 (taking best). -Reference operation time is 1.09673e-05 s (8.753 Mflops) with 4 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 1.097e-05 Mflops: 8.753) -Merge (3 -> 1 leaves) took w.c.t. of 8.106e-06s, ~1.907e-06s of computing time (of which 9.537e-07s sorting, 1.192e-06s analysis) -3 iterations (4 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 91.79/ 50.00 %); best 5.22e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). -Reference operation time is 5.22017e-08 s (1839 Mflops) with 4 threads. -After merge step 1: tpop: 5.22e-08 s ~Mflops: 1839.019 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 210.094x: 1.097e-05s -> 5.22e-08s, so taking this instance. +3 iterations (4 th.) took 0.04799s; avg 0.016s ( +/- 0.02/ 0.02 %); best 0.016s; worst 0.016s; std dev. 2.485e-06 (taking best). +Reference operation time is 0.015995 s (0.006002 Mflops) with 4 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.016 Mflops: 0.006) +Merge (3 -> 1 leaves) took w.c.t. of 2.193e-05s, ~6.914e-06s of computing time (of which 2.146e-06s sorting, 3.099e-06s analysis) +3 iterations (4 th.) took 8.106e-06s; avg 2.702e-06s ( +/- 64.71/ 85.29 %); best 9.537e-07s; worst 5.007e-06s; std dev. 1.701e-06 (taking best). +Reference operation time is 9.53674e-07 s (100.7 Mflops) with 4 threads. +After merge step 1: tpop: 9.537e-07 s ~Mflops: 100.663 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 16772.000x: 0.016s -> 9.537e-07s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 4.911e-05s (of which 1.311e-05s partitioning, 0s I/O); computing times: 1.907e-06s in par. loops, 9.537e-07s sorting, 1.192e-06s analyzing) -Total merge + benchmarking process took 4.911e-05s, equivalent to 940.9/4.5 new/old ops (4.196e-05s for 2 clones -- as 803.8/3.8 ops, or 401.9/1.9 ops per clone), SPEEDUP of 210.094x -Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 210.094x (1.097e-05s -> 5.22e-08s), will amortize in 4.5 ops by saving 1.092e-05s per op. -In 1 tuning rounds (tot. 0.00018s, 4.2e-05s for constructor, 2 clones) obtained a SPEEDUP of 20909.4% (210.1x) (from 8.753 to 1839 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04798s (of which 2.789e-05s partitioning, 0s I/O); computing times: 6.914e-06s in par. loops, 2.146e-06s sorting, 3.099e-06s analyzing) +Total merge + benchmarking process took 0.04798s, equivalent to 50314.0/3.0 new/old ops (0.1079s for 2 clones -- as 113118.2/6.7 ops, or 56559.1/3.4 ops per clone), SPEEDUP of 16772.000x +Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 16772.000x (0.016s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.01599s per op. +In 1 tuning rounds (tot. 0.16s, 0.11s for constructor, 2 clones) obtained a SPEEDUP of 1677100.0% (1.677e+04x) (from 0.006002 to 100.7 Mflops). #pr: updating sample at index 5 (1^th of 8), 0^th touch for (0,1,0,0,0,0,0). -First run of RSB Autotuner took 0.000195026 s (1.097e-05 s -> 5.220e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.156013 s (1.600e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.000627041 s and estimated a speedup of 1.000000 x (5.220e-08 s -> 5.220e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.799952 s and estimated a speedup of 1.000000 x (1.192e-06 s -> 1.192e-06 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000000 0.000079 0.001133 0.001212 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.001212 -%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.000079 -%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.001133 +%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000000 0.031554 0.052006 0.083560 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.083560 +%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.031554 +%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.052006 %:ROW_MAJOR_SORT_TIME:A.mtx S N 4 3 3 6 0.000000 %:ROW_MAJOR_SORT_SCALING:A.mtx S N 4 3 3 6 inf -%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.001212 +%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.083560 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 4 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.08 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.08 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 1.01 -%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 0.02 -%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 inf 1.01 0.02 0.08 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 1.05 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 1.05 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 1.26 +%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 0.92 +%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 inf 1.26 0.92 1.05 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo %:SM_COUNTS:A.mtx S N 4 3 3 6 1 1 0 0 0 %:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 4 3 3 6 28 48 36 @@ -3323,56 +3359,56 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 4 3 3 6 6 6 6 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[4] -%operation:A.mtx 0.000159979 1e+09 1e+09 +%operation:A.mtx 0.135765 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:A.mtx 9.53674e-07 8.01086e-05 0 1.81198e-05 +%constructor:A.mtx 9.53674e-07 0.0397861 0 0.047965 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,2) -- now using nrhs 2. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads -# Constructed matrix (took 0.000s): (3 x 3)[0x583cbd50]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.088s): (3 x 3)[0x51f4b0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (1 th.) took 4.101e-05s; avg 1.367e-05s ( +/- 42.44/ 83.14 %); best 7.868e-06s; worst 2.503e-05s; std dev. 8.037e-06 (taking best). -Reference operation time is 7.86781e-06 s (24.4 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 7.868e-06 Mflops: 24.403) -Merge (3 -> 1 leaves) took w.c.t. of 8.821e-06s, ~2.146e-06s of computing time (of which 1.192e-06s sorting, 9.537e-07s analysis) -3 iterations (1 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 96.87/142.86 %); best 5.22e-08s; worst 4.053e-06s; std dev. 1.73e-06 (taking best). -Reference operation time is 5.22017e-08 s (3678 Mflops) with 1 threads. -After merge step 1: tpop: 5.22e-08 s ~Mflops: 3678.038 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 150.719x: 7.868e-06s -> 5.22e-08s, so taking this instance. +3 iterations (1 th.) took 0.048s; avg 0.016s ( +/- 0.02/ 0.02 %); best 0.016s; worst 0.016s; std dev. 2.485e-06 (taking best). +Reference operation time is 0.015996 s (0.012 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.016 Mflops: 0.012) +Merge (3 -> 1 leaves) took w.c.t. of 2.408e-05s, ~9.06e-06s of computing time (of which 1.907e-06s sorting, 5.007e-06s analysis) +3 iterations (1 th.) took 9.06e-06s; avg 3.02e-06s ( +/- 68.42/105.26 %); best 9.537e-07s; worst 6.199e-06s; std dev. 2.281e-06 (taking best). +Reference operation time is 9.53674e-07 s (201.3 Mflops) with 1 threads. +After merge step 1: tpop: 9.537e-07 s ~Mflops: 201.327 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 16773.000x: 0.016s -> 9.537e-07s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 5.078e-05s (of which 1.192e-05s partitioning, 0s I/O); computing times: 2.146e-06s in par. loops, 1.192e-06s sorting, 9.537e-07s analyzing) -Total merge + benchmarking process took 5.078e-05s, equivalent to 972.8/6.5 new/old ops (4.196e-05s for 2 clones -- as 803.8/5.3 ops, or 401.9/2.7 ops per clone), SPEEDUP of 150.719x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 150.719x (7.868e-06s -> 5.22e-08s), will amortize in 6.5 ops by saving 7.816e-06s per op. -In 1 tuning rounds (tot. 0.00014s, 4.2e-05s for constructor, 2 clones) obtained a SPEEDUP of 14971.9% (150.7x) (from 24.4 to 3678 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04798s (of which 3.099e-05s partitioning, 0s I/O); computing times: 9.06e-06s in par. loops, 1.907e-06s sorting, 5.007e-06s analyzing) +Total merge + benchmarking process took 0.04798s, equivalent to 50315.0/3.0 new/old ops (0.09587s for 2 clones -- as 100527.0/6.0 ops, or 50263.5/3.0 ops per clone), SPEEDUP of 16773.000x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 16773.000x (0.016s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.016s per op. +In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 1677200.0% (1.677e+04x) (from 0.012 to 201.3 Mflops). #pr: updating sample at index 3 (2^th of 8), 0^th touch for (0,0,0,0,1,0,0). -First run of RSB Autotuner took 0.000156164 s (7.868e-06 s -> 5.220e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.144018 s (1.600e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.000432968 s and estimated a speedup of 1.000000 x (5.220e-08 s -> 5.220e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.835951 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000000 0.000021 0.000015 0.000036 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.000036 -%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.000021 -%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.000015 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 1 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 1 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.000036 +%:CONSTRUCTOR_TIMES:A.mtx S N 1 3 3 6 0.000001 0.023936 0.031983 0.055919 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.055920 +%:RSB_SUBDIVISION_TIME:A.mtx S N 1 3 3 6 0.023936 +%:RSB_SHUFFLE_TIME:A.mtx S N 1 3 3 6 0.031983 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 1 3 3 6 0.000001 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 1 3 3 6 1.000 +%:SORTEDCOO2RSB_TIME:A.mtx S N 1 3 3 6 0.055919 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 1 3 3 6 0.000 %:UNSORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 %:SORTEDCOO2RSB_SCALING:A.mtx S N 1 3 3 6 1.00 %:RSB_SUBDIVISION_SCALING:A.mtx S N 1 3 3 6 1.00 %:RSB_SHUFFLE_SCALING:A.mtx S N 1 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 1 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 1 3 3 6 1.00 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo %:SM_COUNTS:A.mtx S N 1 3 3 6 1 1 0 0 0 %:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 1 3 3 6 28 48 36 @@ -3381,47 +3417,47 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 1 3 3 6 6 6 6 # # Using 4 threads -# Constructed matrix (took 0.008s): (3 x 3)[0x583cbd50]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Constructed matrix (took 0.104s): (3 x 3)[0x51f4b0]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.1 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (4 th.) took 4.387e-05s; avg 1.462e-05s ( +/- 31.52/ 36.96 %); best 1.001e-05s; worst 2.003e-05s; std dev. 4.126e-06 (taking best). -Reference operation time is 1.00136e-05 s (19.17 Mflops) with 4 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 1.001e-05 Mflops: 19.174) -Merge (3 -> 1 leaves) took w.c.t. of 9.06e-06s, ~1.907e-06s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) -3 iterations (4 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 94.95/107.69 %); best 5.22e-08s; worst 2.146e-06s; std dev. 8.778e-07 (taking best). -Reference operation time is 5.22017e-08 s (3678 Mflops) with 4 threads. -After merge step 1: tpop: 5.22e-08 s ~Mflops: 3678.038 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 191.825x: 1.001e-05s -> 5.22e-08s, so taking this instance. +3 iterations (4 th.) took 0.052s; avg 0.01733s ( +/- 7.69/ 15.32 %); best 0.016s; worst 0.01999s; std dev. 0.001878 (taking best). +Reference operation time is 0.016 s (0.012 Mflops) with 4 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.016 Mflops: 0.012) +Merge (3 -> 1 leaves) took w.c.t. of 1.812e-05s, ~5.96e-06s of computing time (of which 2.146e-06s sorting, 3.099e-06s analysis) +3 iterations (4 th.) took 8.821e-06s; avg 2.94e-06s ( +/- 67.57/102.70 %); best 9.537e-07s; worst 5.96e-06s; std dev. 2.171e-06 (taking best). +Reference operation time is 9.53674e-07 s (201.3 Mflops) with 4 threads. +After merge step 1: tpop: 9.537e-07 s ~Mflops: 201.327 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 16777.250x: 0.016s -> 9.537e-07s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 4.983e-05s (of which 1.287e-05s partitioning, 0s I/O); computing times: 1.907e-06s in par. loops, 9.537e-07s sorting, 9.537e-07s analyzing) -Total merge + benchmarking process took 4.983e-05s, equivalent to 954.6/5.0 new/old ops (4.22e-05s for 2 clones -- as 808.4/4.2 ops, or 404.2/2.1 ops per clone), SPEEDUP of 191.825x -Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 191.825x (1.001e-05s -> 5.22e-08s), will amortize in 5.0 ops by saving 9.961e-06s per op. -In 1 tuning rounds (tot. 0.00015s, 4.2e-05s for constructor, 2 clones) obtained a SPEEDUP of 19082.5% (191.8x) (from 19.17 to 3678 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04798s (of which 2.289e-05s partitioning, 0s I/O); computing times: 5.96e-06s in par. loops, 2.146e-06s sorting, 3.099e-06s analyzing) +Total merge + benchmarking process took 0.04798s, equivalent to 50314.0/3.0 new/old ops (0.09587s for 2 clones -- as 100532.0/6.0 ops, or 50266.0/3.0 ops per clone), SPEEDUP of 16777.250x +Applying multi-merge (3 -> 1 leaves, 1 steps, 1 -> 1 th.sp.) yielded SPEEDUP of 16777.250x (0.016s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.016s per op. +In 1 tuning rounds (tot. 0.15s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 1677625.0% (1.678e+04x) (from 0.012 to 201.3 Mflops). #pr: updating sample at index 7 (3^th of 8), 0^th touch for (0,1,0,0,1,0,0). -First run of RSB Autotuner took 0.000159025 s (1.001e-05 s -> 5.220e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.148 s (1.600e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.00058794 s and estimated a speedup of 1.000000 x (5.220e-08 s -> 5.220e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.835964 s and estimated a speedup of 1.000000 x (1.907e-06 s -> 1.907e-06 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000000 0.003573 0.004005 0.007578 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.007578 -%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.003573 -%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.004005 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 4 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 4 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.007578 +%:CONSTRUCTOR_TIMES:A.mtx S N 4 3 3 6 0.000001 0.028577 0.039067 0.067644 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.067645 +%:RSB_SUBDIVISION_TIME:A.mtx S N 4 3 3 6 0.028577 +%:RSB_SHUFFLE_TIME:A.mtx S N 4 3 3 6 0.039067 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 4 3 3 6 0.000001 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 4 3 3 6 1.000 +%:SORTEDCOO2RSB_TIME:A.mtx S N 4 3 3 6 0.067644 %:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 4 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 0.01 -%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 0.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 -nan 0.01 0.00 0.00 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.83 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 4 3 3 6 0.83 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 4 3 3 6 0.84 +%:RSB_SHUFFLE_SCALING:A.mtx S N 4 3 3 6 0.82 +%:CONSTRUCTOR_SCALING:A.mtx S N 4 3 3 6 1.00 0.84 0.82 0.83 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo %:SM_COUNTS:A.mtx S N 4 3 3 6 1 1 0 0 0 %:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 4 3 3 6 28 48 36 @@ -3430,184 +3466,184 @@ %:SM_MINMAXAVGNNZ:A.mtx S N 4 3 3 6 6 6 6 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[4] -%operation:A.mtx 5.19753e-05 1e+09 1e+09 +%operation:A.mtx 0.087934 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:A.mtx 0 2.09808e-05 0 1.50204e-05 +%constructor:A.mtx 9.53674e-07 0.023936 0 0.0319831 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 7.018s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.003s/0.000s . +# so far, program took 10.150s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 3.924s/0.000s . getrusage() stats: -ru_maxrss: 29 (maximum resident set size -- MB) -ru_stime : 0.1275s (system CPU time used) -ru_utime : 8.335s (user CPU time used) +ru_maxrss: 9 (maximum resident set size -- MB) +ru_stime : 0.05987s (system CPU time used) +ru_utime : 31.78s (user CPU time used) # benchmarking terminated --- finalizing run. # ====== BEGIN Total summary record. #pr: ======== Limiting to nrhs=1: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1839.02 8.106e-06 0.000e+00 5.220e-08 0.000e+00 2.890e-04 5.13e+00 2.29e+00 1 9.60e-05 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1839.02 1.097e-05 0.000e+00 5.220e-08 0.000e+00 1.950e-04 5.13e+00 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 100.66 1.998e-02 0.000e+00 9.537e-07 0.000e+00 2.040e-01 2.81e-01 2.29e+00 1 9.60e-05 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 100.66 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.560e-01 2.81e-01 2.29e+00 1 9.60e-05 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 18169.0 % faster, avg. sp. ratio 182.690x, max sp. ratio 210.094x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 4635.8/3736.0/5535.5/9271.5 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 26.7/ 17.8/ 35.6/ 53.4 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 26.9, min. 17.9, max. 35.9 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1886087.5 % faster, avg. sp. ratio 18861.875x, max sp. ratio 20951.750x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 188743.8/163591.5/213896.0/377487.5 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 10.0/ 9.8/ 10.2/ 20.0 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 10.0, min. 9.8, max. 10.2 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 4.214/ 4.214/ 4.214,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.268/ 5.134/ 5.134,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.231/ 0.231/ 0.231,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.562/ 0.281/ 0.281,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.292/ 2.292/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.839e+03, min 1.839e+03, max 1.839e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.030e+01, min 8.753e+00, max 1.184e+01 (2 samples) -#pr: best tun. rsb operation time was: on avg. 5.220e-08s, min 5.220e-08s, max 5.220e-08s, tot 1.044e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 9.537e-06s, min 8.106e-06s, max 1.097e-05s, tot 1.907e-05s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 5.396e+00 5.396e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.856e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.18 s, min 0.16 s, max 0.20 s, tot 0.36 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.18 s, min 0.16 s, max 0.20 s, tot 0.36 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.007e+02, min 1.007e+02, max 1.007e+02 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 5.403e-03, min 4.805e-03, max 6.002e-03 (2 samples) +#pr: best tun. rsb operation time was: on avg. 9.537e-07s, min 9.537e-07s, max 9.537e-07s, tot 1.907e-06s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.799e-02s, min 1.600e-02s, max 1.998e-02s, tot 3.598e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.555e+01 3.555e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.743e+00 #pr: ======== Limiting to nrhs=2: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3678.04 7.868e-06 0.000e+00 5.220e-08 0.000e+00 1.562e-04 7.89e+00 1.65e+00 1 1.92e-04 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3678.04 1.001e-05 0.000e+00 5.220e-08 0.000e+00 1.590e-04 7.89e+00 1.65e+00 1 1.92e-04 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 201.33 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 4.32e-01 1.65e+00 1 1.92e-04 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 201.33 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.480e-01 4.32e-01 1.65e+00 1 1.92e-04 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 17027.2 % faster, avg. sp. ratio 171.272x, max sp. ratio 191.825x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 3019.0/2991.6/3046.4/6037.9 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 17.9/ 15.9/ 19.8/ 35.7 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 18.0, min. 16.0, max. 20.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1677412.5 % faster, avg. sp. ratio 16775.125x, max sp. ratio 16777.250x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 153101.5/151013.8/155189.2/306203.0 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.1/ 9.0/ 9.2/ 18.3 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.1, min. 9.0, max. 9.3 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 6.053/ 6.053/ 6.053,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 15.785/ 7.892/ 7.892,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.331/ 0.331/ 0.331,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.864/ 0.432/ 0.432,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.646/ 1.646/ 1.646) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 3.678e+03, min 3.678e+03, max 3.678e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 2.179e+01, min 1.917e+01, max 2.440e+01 (2 samples) -#pr: best tun. rsb operation time was: on avg. 5.220e-08s, min 5.220e-08s, max 5.220e-08s, tot 1.044e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 8.941e-06s, min 7.868e-06s, max 1.001e-05s, tot 1.788e-05s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.757e+00 3.757e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.856e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.15 s, min 0.14 s, max 0.15 s, tot 0.29 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.15 s, min 0.14 s, max 0.15 s, tot 0.29 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 2.013e+02, min 2.013e+02, max 2.013e+02 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.200e-02, min 1.200e-02, max 1.200e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 9.537e-07s, min 9.537e-07s, max 9.537e-07s, tot 1.907e-06s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.600e-02s, min 1.600e-02s, max 1.600e-02s, tot 3.200e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.475e+01 2.475e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.743e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.000e+00 x, min 2.000e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) #pr: ======== Limiting to transA=N: #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1839.02 8.106e-06 0.000e+00 5.220e-08 0.000e+00 2.890e-04 5.13e+00 2.29e+00 1 9.60e-05 -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3678.04 7.868e-06 0.000e+00 5.220e-08 0.000e+00 1.562e-04 7.89e+00 1.65e+00 1 1.92e-04 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1839.02 1.097e-05 0.000e+00 5.220e-08 0.000e+00 1.950e-04 5.13e+00 2.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3678.04 1.001e-05 0.000e+00 5.220e-08 0.000e+00 1.590e-04 7.89e+00 1.65e+00 1 1.92e-04 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 100.66 1.998e-02 0.000e+00 9.537e-07 0.000e+00 2.040e-01 2.81e-01 2.29e+00 1 9.60e-05 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 201.33 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 4.32e-01 1.65e+00 1 1.92e-04 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 100.66 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.560e-01 2.81e-01 2.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 201.33 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.480e-01 4.32e-01 1.65e+00 1 1.92e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 17598.1 % faster, avg. sp. ratio 176.981x, max sp. ratio 210.094x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 3827.4/2991.6/5535.5/15309.4 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 22.3/ 15.9/ 35.6/ 89.2 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 22.4, min. 16.0, max. 35.9 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1781750.0 % faster, avg. sp. ratio 17818.500x, max sp. ratio 20951.750x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 170922.6/151013.8/213896.0/683690.5 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.6/ 9.0/ 10.2/ 38.2 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.6, min. 9.0, max. 10.2 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 5.134/ 4.214/ 6.053,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 26.053/ 5.134/ 7.892,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.281/ 0.231/ 0.331,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 1.426/ 0.281/ 0.432,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.969/ 1.646/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 2 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 2.759e+03, min 1.839e+03, max 3.678e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.604e+01, min 8.753e+00, max 2.440e+01 (4 samples) -#pr: best tun. rsb operation time was: on avg. 5.220e-08s, min 5.220e-08s, max 5.220e-08s, tot 2.088e-07s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 9.239e-06s, min 7.868e-06s, max 1.097e-05s, tot 3.695e-05s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.757e+00 5.396e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.856e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.16 s, min 0.14 s, max 0.20 s, tot 0.65 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.16 s, min 0.14 s, max 0.20 s, tot 0.65 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.510e+02, min 1.007e+02, max 2.013e+02 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 8.702e-03, min 4.805e-03, max 1.200e-02 (4 samples) +#pr: best tun. rsb operation time was: on avg. 9.537e-07s, min 9.537e-07s, max 9.537e-07s, tot 3.815e-06s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.699e-02s, min 1.600e-02s, max 1.998e-02s, tot 6.797e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.475e+01 3.555e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.743e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.000e+00 x, min 2.000e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) #pr: ======== Limiting to both transA=N and nrhs=1: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1839.02 8.106e-06 0.000e+00 5.220e-08 0.000e+00 2.890e-04 5.13e+00 2.29e+00 1 9.60e-05 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1839.02 1.097e-05 0.000e+00 5.220e-08 0.000e+00 1.950e-04 5.13e+00 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 100.66 1.998e-02 0.000e+00 9.537e-07 0.000e+00 2.040e-01 2.81e-01 2.29e+00 1 9.60e-05 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 100.66 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.560e-01 2.81e-01 2.29e+00 1 9.60e-05 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 18169.0 % faster, avg. sp. ratio 182.690x, max sp. ratio 210.094x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 4635.8/3736.0/5535.5/9271.5 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 26.7/ 17.8/ 35.6/ 53.4 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 26.9, min. 17.9, max. 35.9 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1886087.5 % faster, avg. sp. ratio 18861.875x, max sp. ratio 20951.750x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 188743.8/163591.5/213896.0/377487.5 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 10.0/ 9.8/ 10.2/ 20.0 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 10.0, min. 9.8, max. 10.2 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 4.214/ 4.214/ 4.214,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.268/ 5.134/ 5.134,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.231/ 0.231/ 0.231,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.562/ 0.281/ 0.281,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.292/ 2.292/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.839e+03, min 1.839e+03, max 1.839e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.030e+01, min 8.753e+00, max 1.184e+01 (2 samples) -#pr: best tun. rsb operation time was: on avg. 5.220e-08s, min 5.220e-08s, max 5.220e-08s, tot 1.044e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 9.537e-06s, min 8.106e-06s, max 1.097e-05s, tot 1.907e-05s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 5.396e+00 5.396e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.856e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.18 s, min 0.16 s, max 0.20 s, tot 0.36 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.18 s, min 0.16 s, max 0.20 s, tot 0.36 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.007e+02, min 1.007e+02, max 1.007e+02 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 5.403e-03, min 4.805e-03, max 6.002e-03 (2 samples) +#pr: best tun. rsb operation time was: on avg. 9.537e-07s, min 9.537e-07s, max 9.537e-07s, tot 1.907e-06s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.799e-02s, min 1.600e-02s, max 1.998e-02s, tot 3.598e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.555e+01 3.555e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.743e+00 #pr: ======== Limiting to both transA=N and nrhs=2: #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3678.04 7.868e-06 0.000e+00 5.220e-08 0.000e+00 1.562e-04 7.89e+00 1.65e+00 1 1.92e-04 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3678.04 1.001e-05 0.000e+00 5.220e-08 0.000e+00 1.590e-04 7.89e+00 1.65e+00 1 1.92e-04 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 201.33 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 4.32e-01 1.65e+00 1 1.92e-04 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 201.33 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.480e-01 4.32e-01 1.65e+00 1 1.92e-04 #pr: 2 samples (out of 4) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 17027.2 % faster, avg. sp. ratio 171.272x, max sp. ratio 191.825x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 3019.0/2991.6/3046.4/6037.9 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 17.9/ 15.9/ 19.8/ 35.7 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 18.0, min. 16.0, max. 20.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1677412.5 % faster, avg. sp. ratio 16775.125x, max sp. ratio 16777.250x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 153101.5/151013.8/155189.2/306203.0 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.1/ 9.0/ 9.2/ 18.3 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.1, min. 9.0, max. 9.3 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 6.053/ 6.053/ 6.053,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 15.785/ 7.892/ 7.892,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.331/ 0.331/ 0.331,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.864/ 0.432/ 0.432,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.646/ 1.646/ 1.646) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 1 /1 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 3.678e+03, min 3.678e+03, max 3.678e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 2.179e+01, min 1.917e+01, max 2.440e+01 (2 samples) -#pr: best tun. rsb operation time was: on avg. 5.220e-08s, min 5.220e-08s, max 5.220e-08s, tot 1.044e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 8.941e-06s, min 7.868e-06s, max 1.001e-05s, tot 1.788e-05s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.757e+00 3.757e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.856e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.15 s, min 0.14 s, max 0.15 s, tot 0.29 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.15 s, min 0.14 s, max 0.15 s, tot 0.29 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 2.013e+02, min 2.013e+02, max 2.013e+02 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.200e-02, min 1.200e-02, max 1.200e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 9.537e-07s, min 9.537e-07s, max 9.537e-07s, tot 1.907e-06s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.600e-02s, min 1.600e-02s, max 1.600e-02s, tot 3.200e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.475e+01 2.475e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.743e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.000e+00 x, min 2.000e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) #pr: ======== Limiting to transA=T: #pr: No sample (out of 4) matched the dump criteria -- skipping dump round. @@ -3618,44 +3654,44 @@ #pr: ======== All results (not limiting) #pr: Dump from a base of 4 samples (of max 8) ordered by (1,2,1,1,2,1,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 1839.02 8.106e-06 0.000e+00 5.220e-08 0.000e+00 2.890e-04 5.13e+00 2.29e+00 1 9.60e-05 -pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 3678.04 7.868e-06 0.000e+00 5.220e-08 0.000e+00 1.562e-04 7.89e+00 1.65e+00 1 1.92e-04 -pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 1839.02 1.097e-05 0.000e+00 5.220e-08 0.000e+00 1.950e-04 5.13e+00 2.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 3678.04 1.001e-05 0.000e+00 5.220e-08 0.000e+00 1.590e-04 7.89e+00 1.65e+00 1 1.92e-04 +pr: 1:R_R A 3 3 6 1 Z S N 1 1 0 4.0000 4.6667 3 1 100.66 1.998e-02 0.000e+00 9.537e-07 0.000e+00 2.040e-01 2.81e-01 2.29e+00 1 9.60e-05 +pr: 3:R_R A 3 3 6 2 Z S N 1 1 0 4.0000 4.6667 3 1 201.33 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 4.32e-01 1.65e+00 1 1.92e-04 +pr: 5:R_R A 3 3 6 1 Z S N 4 1 0 4.0000 4.6667 3 1 100.66 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.560e-01 2.81e-01 2.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 2 Z S N 4 1 0 4.0000 4.6667 3 1 201.33 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.480e-01 4.32e-01 1.65e+00 1 1.92e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 17598.1 % faster, avg. sp. ratio 176.981x, max sp. ratio 210.094x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 3827.4/2991.6/5535.5/15309.4 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 22.3/ 15.9/ 35.6/ 89.2 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 22.4, min. 16.0, max. 35.9 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1781750.0 % faster, avg. sp. ratio 17818.500x, max sp. ratio 20951.750x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 170922.6/151013.8/213896.0/683690.5 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.6/ 9.0/ 10.2/ 38.2 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.6, min. 9.0, max. 10.2 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 5.134/ 4.214/ 6.053,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 26.053/ 5.134/ 7.892,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.281/ 0.231/ 0.331,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 1.426/ 0.281/ 0.432,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.969/ 1.646/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 2 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 2.759e+03, min 1.839e+03, max 3.678e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.604e+01, min 8.753e+00, max 2.440e+01 (4 samples) -#pr: best tun. rsb operation time was: on avg. 5.220e-08s, min 5.220e-08s, max 5.220e-08s, tot 2.088e-07s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 9.239e-06s, min 7.868e-06s, max 1.097e-05s, tot 3.695e-05s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.757e+00 5.396e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.856e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.16 s, min 0.14 s, max 0.20 s, tot 0.65 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.16 s, min 0.14 s, max 0.20 s, tot 0.65 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.510e+02, min 1.007e+02, max 2.013e+02 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 8.702e-03, min 4.805e-03, max 1.200e-02 (4 samples) +#pr: best tun. rsb operation time was: on avg. 9.537e-07s, min 9.537e-07s, max 9.537e-07s, tot 3.815e-06s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.699e-02s, min 1.600e-02s, max 1.998e-02s, tot 6.797e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.475e+01 3.555e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.743e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.000e+00 x, min 2.000e+00 x, max 2.000e+00 x (2 samples, the non-min-nrhs ones) -#pr: Record collection took 0.08 s. +#pr: Record collection took 3.82 s. #pr: Record comprises 40 memory benchmark samples (prepend RSB_PR_MBW=1 to dump this). -#pr: Record comprises 90 environment variables in 3821 bytes (prepend RSB_PR_ENV=1 to dump this). +#pr: Record comprises 90 environment variables in 3875 bytes (prepend RSB_PR_ENV=1 to dump this). # ====== END Total summary record. -#pr: ======== Saved a performance record of 8 samples to rsbench_pr__1740440648_gcc-12.2-1,4th.rpr -# Removing the temporary record file rsbench_pr__1740440648_gcc-12.2-1,4th.rpr.tmp. -# terminating run at 1740440655 (after 7.0s of w.c.t.) +#pr: ======== Saved a performance record of 8 samples to rsbench_pr__1706032571_gcc-12.2-1,4th.rpr +# Removing the temporary record file rsbench_pr__1706032571_gcc-12.2-1,4th.rpr.tmp. +# terminating run at 1706032581 (after 10.1s of w.c.t.) + ./rsbench -oa -Ob --help /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench is a swiss army knife for testing the library functionality and performance. You can use it to perform sparse matrix - unitary vector multiplication, specifying the blocking parameters, the times to perform multiplication. @@ -3909,43 +3945,35 @@ Written by michelemartone_AT_users_DOT_sourceforge_DOT_net. + ./rsbench -I -cache block size : 233016 -hwloc size of cache level 1: 32768 -hwloc size of cache level 2: 4194304 -detected max available cores/threads : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 -detected max OpenMP procs : 18 +cache block size : 52428 +hwloc size of cache level 1: 65536 +hwloc size of cache level 2: 524288 +detected max available cores/threads : 10 +detected max OpenMP procs : 10 +detected max OpenMP procs : 10 +detected max OpenMP procs : 10 +detected max OpenMP procs : 10 +detected max OpenMP procs : 10 +detected max OpenMP procs : 10 +detected max OpenMP procs : 10 +detected max OpenMP procs : 10 +detected max OpenMP procs : 10 +detected max OpenMP procs : 10 detected 2 levels of cache -L1 size: 32768 -L2 size: 4194304 +L1 size: 65536 +L2 size: 524288 sysconf() : 4096 bytes per pagesize -sysconf() : 2024100 physical pages -sysconf() : 3995746304 bytes (3810 MB) of physical memory -sysconf() : 1011810 available (free) physical pages -sysconf() : 4144373760 available (free) physical memory -sysconf() , processors : 128 -sysconf() , processors online : 18 -sysconf() : level 1 cache size 32768 -sysconf() : level 1 cache associativity 8 +sysconf() : 2052095 physical pages +sysconf() : 4110413824 bytes (3919 MB) of physical memory +sysconf() : 124962 available (free) physical pages +sysconf() : 511844352 available (free) physical memory +sysconf() , processors : 32 +sysconf() , processors online : 10 +sysconf() : level 1 cache size 65536 +sysconf() : level 1 cache associativity 2 sysconf() : level 1 cache line size 64 -sysconf() : level 2 cache size 2097152 -sysconf() : level 2 cache associativity 8 +sysconf() : level 2 cache size 524288 +sysconf() : level 2 cache associativity 16 sysconf() : level 2 cache line size 64 sysconf() : no level 3 cache sysconf() : no level 4 cache @@ -3967,33 +3995,30 @@ RSB_SUBM_IDX_MARKER : 2147483647 RSB_MAX_ALLOCATABLE_MEMORY_CHUNK: 4294967295 timing min delta (if negative, don't complain with us) : 0 s -timing granularity : 5.1105e-08 s +timing granularity : 6.15156e-07 s CFLAGS : -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 CXXFLAGS : -g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -fopenmp CC : gcc -memhinfo : L2:16/64/4M,L1:8/64/32K -detected free memory : -150593536 -detected total memory : -299220992 -for array sized 4194304 elems, took 0.00145388 s for linear search and 0 s for binary search for element 4194303, in 68 tries, for a total of 0.100067 s (ignore this:570425208) -for array sized 4194304 elems, took 0.000726938 s for linear search and 0 s for binary search for element 2097151, in 137 tries, for a total of 0.100164 s (ignore this:1145044582) -for array sized 4194304 elems, took 0.000362873 s for linear search and 0 s for binary search for element 1048575, in 274 tries, for a total of 0.100098 s (ignore this:1719663682) -for array sized 4194304 elems, took 0.00018096 s for linear search and 0 s for binary search for element 524287, in 548 tries, for a total of 0.100134 s (ignore this:-2000685062) -for array sized 4194304 elems, took 9.08375e-05 s for linear search and 0 s for binary search for element 262143, in 1094 tries, for a total of 0.100085 s (ignore this:-1427116178) -for array sized 4194304 elems, took 4.48227e-05 s for linear search and 0 s for binary search for element 131071, in 2176 tries, for a total of 0.100022 s (ignore this:-856695186) -for array sized 4194304 elems, took 2.19345e-05 s for linear search and 0 s for binary search for element 65535, in 4340 tries, for a total of 0.100018 s (ignore this:-287851386) -for array sized 4194304 elems, took 1.09673e-05 s for linear search and 0 s for binary search for element 32767, in 8655 tries, for a total of 0.100006 s (ignore this:279345384) -for array sized 4194304 elems, took 4.76837e-06 s for linear search and 0 s for binary search for element 16383, in 17063 tries, for a total of 0.100000 s (ignore this:838431642) -for array sized 4194304 elems, took 1.90735e-06 s for linear search and 0 s for binary search for element 8191, in 33216 tries, for a total of 0.100000 s (ignore this:1382576154) -for array sized 4194304 elems, took 9.53674e-07 s for linear search and 0 s for binary search for element 4095, in 63146 tries, for a total of 0.100000 s (ignore this:1899741894) -for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 2047, in 115367 tries, for a total of 0.100001 s (ignore this:-1922912904) -for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 1023, in 196049 tries, for a total of 0.100001 s (ignore this:-1521796650) -for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 511, in 300947 tries, for a total of 0.100001 s (ignore this:-1214228816) -for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 255, in 411643 tries, for a total of 0.100000 s (ignore this:-1004290886) -for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 127, in 504839 tries, for a total of 0.100001 s (ignore this:-876061780) -for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 63, in 571210 tries, for a total of 0.100000 s (ignore this:-804089320) -for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 31, in 679699 tries, for a total of 0.100000 s (ignore this:-761947982) -for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 15, in 735493 tries, for a total of 0.100000 s (ignore this:-739883192) -for array sized 4194304 elems, took 0 s for linear search and 0 s for binary search for element 7, in 753107 tries, for a total of 0.100001 s (ignore this:-729339694) +memhinfo : L2:16/64/512K,L1:2/64/64K +detected free memory : 511844352 +detected total memory : -184553472 +for array sized 524288 elems, took 0.000572205 s for linear search and 0 s for binary search for element 524287, in 122 tries, for a total of 0.100649 s (ignore this:127926028) +for array sized 524288 elems, took 0.000298023 s for linear search and 0 s for binary search for element 262143, in 300 tries, for a total of 0.100025 s (ignore this:285211828) +for array sized 524288 elems, took 0.000149012 s for linear search and 0 s for binary search for element 131071, in 595 tries, for a total of 0.100156 s (ignore this:441186318) +for array sized 524288 elems, took 8.17776e-05 s for linear search and 0 s for binary search for element 65535, in 1185 tries, for a total of 0.100056 s (ignore this:596504268) +for array sized 524288 elems, took 4.07696e-05 s for linear search and 0 s for binary search for element 32767, in 2335 tries, for a total of 0.100034 s (ignore this:749526158) +for array sized 524288 elems, took 2.09808e-05 s for linear search and 0 s for binary search for element 16383, in 4494 tries, for a total of 0.100019 s (ignore this:896776562) +for array sized 524288 elems, took 9.77516e-06 s for linear search and 0 s for binary search for element 8191, in 8482 tries, for a total of 0.100004 s (ignore this:1035728686) +for array sized 524288 elems, took 4.76837e-06 s for linear search and 0 s for binary search for element 4095, in 15136 tries, for a total of 0.100005 s (ignore this:1159692526) +for array sized 524288 elems, took 2.86102e-06 s for linear search and 0 s for binary search for element 2047, in 25518 tries, for a total of 0.100001 s (ignore this:1264163218) +for array sized 524288 elems, took 9.53674e-07 s for linear search and 0 s for binary search for element 1023, in 38221 tries, for a total of 0.100003 s (ignore this:1342363384) +for array sized 524288 elems, took 9.53674e-07 s for linear search and 0 s for binary search for element 511, in 51038 tries, for a total of 0.100001 s (ignore this:1394524220) +for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 255, in 60978 tries, for a total of 0.100001 s (ignore this:1425623000) +for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 127, in 68056 tries, for a total of 0.100001 s (ignore this:1442909224) +for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 63, in 71960 tries, for a total of 0.100000 s (ignore this:1451976184) +for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 31, in 74204 tries, for a total of 0.100001 s (ignore this:1456576832) +for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 15, in 74240 tries, for a total of 0.100001 s (ignore this:1458804032) +for array sized 524288 elems, took 0 s for linear search and 0 s for binary search for element 7, in 74835 tries, for a total of 0.100001 s (ignore this:1459851722) + ./rsbench -C /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench version: 1.3.0 format switches:br @@ -4020,7 +4045,7 @@ RSB_CONST_MAX_SUPPORTED_CORES:128 RSB_BLAS_MATRICES_MAX:2147482623 RSB_CONST_MIN_NNZ_PER_ROW_FOR_COO_SWITCH:2 -RSB_USER_SET_MEM_HIERARCHY_INFO:L2:16/64/4096K,L1:8/64/32K +RSB_USER_SET_MEM_HIERARCHY_INFO:L2:16/64/512K,L1:2/64/64K RSB_MAX_VALUE_FOR_TYPE(rsb_half_idx_t):65535 RSB_IOLEVEL:7 LIBRSBPP support: on. @@ -4040,17 +4065,17 @@ Adding matrix file: /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx # Sorting matrices list (use --no-sort-filenames-list to prevent this) # Using matrices: A.mtx -# beginning run at 1740440658 +# beginning run at 1706032584 # /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench -oa -Ob --bench -f /build/reproducible-path/librsb-1.3.0.2+dfsg/A.mtx --verbose --nrhs 1,4 --by-rows # compiled with: CC=gcc CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -# User did not specify threads; assuming 1. Environment provides max 18 threads; this build supports max 128. -# User did not specify threads; assuming 1. Environment provides max 18 threads; this build supports max 128. -# average timer granularity: 5.21e-08 s -# Will write a final performance record to file rsbench_pr__1740440658_gcc-12.2.rpr and periodic checkpoints to rsbench_pr__1740440658_gcc-12.2.rpr.tmp +# User did not specify threads; assuming 1. Environment provides max 10 threads; this build supports max 128. +# User did not specify threads; assuming 1. Environment provides max 10 threads; this build supports max 128. +# average timer granularity: 6.12e-07 s +# Will write a final performance record to file rsbench_pr__1706032584_gcc-12.2.rpr and periodic checkpoints to rsbench_pr__1706032584_gcc-12.2.rpr.tmp # will NOT perform ancillary tests. # will flush cache memory: between each operation measurement series, and NOT between each operation. # will keep any zero encountered in the matrix. -# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games +# env: export PATH=/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path # env: export LD_LIBRARY_PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg/.libs:/usr/lib/libeatmydata # env: HOSTNAME is not set # env: KMP_AFFINITY is not set @@ -4089,835 +4114,835 @@ # env: SLURM_NTASKS is not set # env: SLURM_STEP_TASKS_PER_NODE is not set # env: SLURM_TASKS_PER_NODE is not set -# detected hostname: ionos6-i386 +# detected hostname: i-capture-the-hostname # user specified a verbosity level of 1 (each --verbose occurrence counts +1) # This test will measure times in scanning arrays sized and aligned to fit in caches. # 2 cache levels detected Will fill struct with 40 samples... -# Memory benchmark took 6.896s +# Memory benchmark took 5.510s # auto-tuning oriented output implies times==0 iterations and sort-after-load. #pr: allocated a performance record for 16 samples (4032 bytes). # multi-type benchmarking (DSCZ) -- now using typecode D (last was D). -# Cache block size total 4194304 bytes, per-thread 233016 bytes -# so far, program took 6.898s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . +# Cache block size total 524288 bytes, per-thread 52428 bytes +# so far, program took 5.536s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . # reading A.mtx (184 bytes / 1 MiB / 6 nnz / 3 rows / 3 columns / 1 MiB COO) as type D... -# file input of A.mtx took 0.00 s (6 nnz, 60062 nnz/s ) (1.84 MB/s ) -#pre-sorting (6 elements) took 0.00119281 s -#weeding duplicates (to 6 elements) took 9.53674e-07 s (and check, 9.53674e-07 s ) +# file input of A.mtx took 0.00 s (6 nnz, 41054 nnz/s ) (1.26 MB/s ) +#pre-sorting (6 elements) took 0.0244482 s +#weeding duplicates (to 6 elements) took 3.09944e-06 s (and check, 2.86102e-06 s ) # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 18 threads -# Constructed matrix (took 0.000s): (3 x 3)[0x57c74760]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 10 threads +# Constructed matrix (took 0.104s): (3 x 3)[0x238abb0]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (18 th.) took 8.607e-05s; avg 2.869e-05s ( +/- 65.10/126.04 %); best 1.001e-05s; worst 6.485e-05s; std dev. 2.557e-05 (taking best). -Reference operation time is 1.00136e-05 s (2.397 Mflops) with 18 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 1.001e-05 Mflops: 2.397) -Merge (3 -> 1 leaves) took w.c.t. of 1.597e-05s, ~5.96e-06s of computing time (of which 1.192e-06s sorting, 1.907e-06s analysis) -3 iterations (18 th.) took 1.788e-05s; avg 5.96e-06s ( +/- 99.15/184.00 %); best 5.095e-08s; worst 1.693e-05s; std dev. 7.765e-06 (taking best). -Reference operation time is 5.09501e-08 s (471 Mflops) with 18 threads. -After merge step 1: tpop: 5.095e-08 s ~Mflops: 471.050 nsubm:1 otn:18 -Applying merge (3 -> 1 leaves, 18 th.) yielded SPEEDUP of 196.537x: 1.001e-05s -> 5.095e-08s, so taking this instance. +3 iterations (10 th.) took 0.04799s; avg 0.016s ( +/- 0.03/ 0.02 %); best 0.01599s; worst 0.016s; std dev. 3.618e-06 (taking best). +Reference operation time is 0.0159919 s (0.001501 Mflops) with 10 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01599 Mflops: 0.002) +Merge (3 -> 1 leaves) took w.c.t. of 3.6e-05s, ~1.121e-05s of computing time (of which 1.907e-06s sorting, 4.053e-06s analysis) +3 iterations (10 th.) took 2.098e-05s; avg 6.994e-06s ( +/- 86.36/172.73 %); best 9.537e-07s; worst 1.907e-05s; std dev. 8.542e-06 (taking best). +Reference operation time is 9.53674e-07 s (25.17 Mflops) with 10 threads. +After merge step 1: tpop: 9.537e-07 s ~Mflops: 25.166 nsubm:1 otn:10 +Applying merge (3 -> 1 leaves, 10 th.) yielded SPEEDUP of 16768.750x: 0.01599s -> 9.537e-07s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 7.486e-05s (of which 2.003e-05s partitioning, 0s I/O); computing times: 5.96e-06s in par. loops, 1.192e-06s sorting, 1.907e-06s analyzing) -Total merge + benchmarking process took 7.486e-05s, equivalent to 1469.3/7.5 new/old ops (4.601e-05s for 2 clones -- as 903.1/4.6 ops, or 451.6/2.3 ops per clone), SPEEDUP of 196.537x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 18 th.sp.) yielded SPEEDUP of 196.537x (1.001e-05s -> 5.095e-08s), will amortize in 7.5 ops by saving 9.963e-06s per op. -In 1 tuning rounds (tot. 0.00023s, 4.6e-05s for constructor, 2 clones) obtained a SPEEDUP of 19553.7% (196.5x) (from 2.397 to 471 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04798s (of which 4.196e-05s partitioning, 0s I/O); computing times: 1.121e-05s in par. loops, 1.907e-06s sorting, 4.053e-06s analyzing) +Total merge + benchmarking process took 0.04798s, equivalent to 50315.8/3.0 new/old ops (0.09584s for 2 clones -- as 100496.8/6.0 ops, or 50248.4/3.0 ops per clone), SPEEDUP of 16768.750x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 10 th.sp.) yielded SPEEDUP of 16768.750x (0.01599s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.01599s per op. +In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 1676775.0% (1.677e+04x) (from 0.001501 to 25.17 Mflops). #pr: updating sample at index 1 (0^th of 16), 0^th touch for (0,0,0,0,0,0,0). -First run of RSB Autotuner took 0.000255108 s (1.001e-05 s -> 5.095e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.144013 s (1.599e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.000605106 s and estimated a speedup of 1.000000 x (5.095e-08 s -> 5.095e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.795978 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.7320508075688772 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 18 3 3 6 0.000000 0.000040 0.000018 0.000058 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000058 -%:RSB_SUBDIVISION_TIME:A.mtx S N 18 3 3 6 0.000040 -%:RSB_SHUFFLE_TIME:A.mtx S N 18 3 3 6 0.000018 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 18 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 18 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000058 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 18 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 18 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 18 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 10 3 3 6 0.000001 0.039521 0.031993 0.071514 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.071515 +%:RSB_SUBDIVISION_TIME:A.mtx S N 10 3 3 6 0.039521 +%:RSB_SHUFFLE_TIME:A.mtx S N 10 3 3 6 0.031993 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 10 3 3 6 0.000001 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 10 3 3 6 1.000 +%:SORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.071514 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 10 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 10 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 10 3 3 6 1.00 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 18 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 18 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 18 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 18 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 18 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 10 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 10 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 10 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 10 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 10 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[18] SPMV[18] SPMV[18] -%operation:A.mtx 8.4877e-05 1e+09 1e+09 -%constructor:matrix SORT[18] SCAN[18] SHUFFLE[18] INSERT[18] -%constructor:A.mtx 0 4.00543e-05 0 1.81198e-05 +%operation:matrix CONSTRUCTOR[10] SPMV[10] SPMV[10] +%operation:A.mtx 0.103525 1e+09 1e+09 +%constructor:matrix SORT[10] SCAN[10] SHUFFLE[10] INSERT[10] +%constructor:A.mtx 9.53674e-07 0.039521 0 0.0319932 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 18 threads -# Constructed matrix (took 0.000s): (3 x 3)[0x57c77740]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 10 threads +# Constructed matrix (took 0.107s): (3 x 3)[0x238d560]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (18 th.) took 4.005e-05s; avg 1.335e-05s ( +/- 33.93/ 66.07 %); best 8.821e-06s; worst 2.217e-05s; std dev. 6.238e-06 (taking best). -Reference operation time is 8.82149e-06 s (10.88 Mflops) with 18 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 8.821e-06 Mflops: 10.883) -Merge (3 -> 1 leaves) took w.c.t. of 8.106e-06s, ~1.907e-06s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) -3 iterations (18 th.) took 1.121e-05s; avg 3.735e-06s ( +/- 98.64/168.09 %); best 5.095e-08s; worst 1.001e-05s; std dev. 4.466e-06 (taking best). -Reference operation time is 5.09501e-08 s (1884 Mflops) with 18 threads. -After merge step 1: tpop: 5.095e-08 s ~Mflops: 1884.198 nsubm:1 otn:18 -Applying merge (3 -> 1 leaves, 18 th.) yielded SPEEDUP of 173.140x: 8.821e-06s -> 5.095e-08s, so taking this instance. +3 iterations (10 th.) took 0.048s; avg 0.016s ( +/- 0.01/ 0.02 %); best 0.016s; worst 0.016s; std dev. 2.1e-06 (taking best). +Reference operation time is 0.0159972 s (0.006001 Mflops) with 10 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type D, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.016 Mflops: 0.006) +Merge (3 -> 1 leaves) took w.c.t. of 2.003e-05s, ~5.96e-06s of computing time (of which 9.537e-07s sorting, 4.053e-06s analysis) +3 iterations (10 th.) took 2.694e-05s; avg 8.98e-06s ( +/- 89.38/168.14 %); best 9.537e-07s; worst 2.408e-05s; std dev. 1.068e-05 (taking best). +Reference operation time is 9.53674e-07 s (100.7 Mflops) with 10 threads. +After merge step 1: tpop: 9.537e-07 s ~Mflops: 100.663 nsubm:1 otn:10 +Applying merge (3 -> 1 leaves, 10 th.) yielded SPEEDUP of 16774.250x: 0.016s -> 9.537e-07s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 5.889e-05s (of which 1.287e-05s partitioning, 0s I/O); computing times: 1.907e-06s in par. loops, 9.537e-07s sorting, 9.537e-07s analyzing) -Total merge + benchmarking process took 5.889e-05s, equivalent to 1155.8/6.7 new/old ops (4.101e-05s for 2 clones -- as 804.9/4.6 ops, or 402.4/2.3 ops per clone), SPEEDUP of 173.140x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 18 th.sp.) yielded SPEEDUP of 173.140x (8.821e-06s -> 5.095e-08s), will amortize in 6.7 ops by saving 8.771e-06s per op. -In 1 tuning rounds (tot. 0.00015s, 4.1e-05s for constructor, 2 clones) obtained a SPEEDUP of 17214.0% (173.1x) (from 10.88 to 1884 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04399s (of which 2.503e-05s partitioning, 0s I/O); computing times: 5.96e-06s in par. loops, 9.537e-07s sorting, 4.053e-06s analyzing) +Total merge + benchmarking process took 0.04399s, equivalent to 46132.0/2.8 new/old ops (0.09186s for 2 clones -- as 96318.0/5.7 ops, or 48159.0/2.9 ops per clone), SPEEDUP of 16774.250x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 10 th.sp.) yielded SPEEDUP of 16774.250x (0.016s -> 9.537e-07s), will amortize in 2.8 ops by saving 0.016s per op. +In 1 tuning rounds (tot. 0.14s, 0.092s for constructor, 2 clones) obtained a SPEEDUP of 1677325.0% (1.677e+04x) (from 0.006001 to 100.7 Mflops). #pr: updating sample at index 9 (1^th of 16), 0^th touch for (0,0,0,0,1,0,0). -First run of RSB Autotuner took 0.000161886 s (8.821e-06 s -> 5.095e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.139994 s (1.600e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.000517845 s and estimated a speedup of 1.000000 x (5.095e-08 s -> 5.095e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.799957 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.7320508075688772 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 18 3 3 6 0.000000 0.000051 0.000014 0.000065 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000065 -%:RSB_SUBDIVISION_TIME:A.mtx S N 18 3 3 6 0.000051 -%:RSB_SHUFFLE_TIME:A.mtx S N 18 3 3 6 0.000014 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 18 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 18 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000065 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 18 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 18 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 18 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 10 3 3 6 0.000001 0.039211 0.031968 0.071179 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.071180 +%:RSB_SUBDIVISION_TIME:A.mtx S N 10 3 3 6 0.039211 +%:RSB_SHUFFLE_TIME:A.mtx S N 10 3 3 6 0.031968 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 10 3 3 6 0.000001 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 10 3 3 6 1.000 +%:SORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.071179 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 10 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 10 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 10 3 3 6 1.00 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 18 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 18 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 18 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 18 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 18 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 10 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 10 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 10 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 10 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 10 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[18] SPMV[18] SPMV[18] -%operation:A.mtx 8.79765e-05 1e+09 1e+09 -%constructor:matrix SORT[18] SCAN[18] SHUFFLE[18] INSERT[18] -%constructor:A.mtx 0 5.10216e-05 0 1.40667e-05 +%operation:matrix CONSTRUCTOR[10] SPMV[10] SPMV[10] +%operation:A.mtx 0.107197 1e+09 1e+09 +%constructor:matrix SORT[10] SCAN[10] SHUFFLE[10] INSERT[10] +%constructor:A.mtx 9.53674e-07 0.039211 0 0.0319679 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 6.933s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.002s/0.000s . +# so far, program took 7.813s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.880s/0.000s . getrusage() stats: -ru_maxrss: 27 (maximum resident set size -- MB) -ru_stime : 0.07534s (system CPU time used) -ru_utime : 7.411s (user CPU time used) +ru_maxrss: 9 (maximum resident set size -- MB) +ru_stime : 0.03601s (system CPU time used) +ru_utime : 19.83s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode S (last was D). -# Cache block size total 4194304 bytes, per-thread 233016 bytes -# so far, program took 6.933s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.002s/0.000s . +# Cache block size total 524288 bytes, per-thread 52428 bytes +# so far, program took 7.813s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.880s/0.000s . # Reusing type converted (D->S) arrays from last iteration instead of reloading matrix file. # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 18 threads -# Constructed matrix (took 0.000s): (3 x 3)[0x57c7b1d0]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 10 threads +# Constructed matrix (took 0.103s): (3 x 3)[0x238ff10]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (18 th.) took 4.315e-05s; avg 1.438e-05s ( +/- 37.02/ 67.40 %); best 9.06e-06s; worst 2.408e-05s; std dev. 6.867e-06 (taking best). -Reference operation time is 9.05991e-06 s (2.649 Mflops) with 18 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 9.06e-06 Mflops: 2.649) -Merge (3 -> 1 leaves) took w.c.t. of 9.06e-06s, ~2.861e-06s of computing time (of which 0s sorting, 1.192e-06s analysis) -3 iterations (18 th.) took 4.721e-05s; avg 1.574e-05s ( +/- 93.94/186.36 %); best 9.537e-07s; worst 4.506e-05s; std dev. 2.074e-05 (taking best). -Reference operation time is 9.53674e-07 s (25.17 Mflops) with 18 threads. -After merge step 1: tpop: 9.537e-07 s ~Mflops: 25.166 nsubm:1 otn:18 -Applying merge (3 -> 1 leaves, 18 th.) yielded SPEEDUP of 9.500x: 9.06e-06s -> 9.537e-07s, so taking this instance. +3 iterations (10 th.) took 0.048s; avg 0.016s ( +/- 0.01/ 0.02 %); best 0.016s; worst 0.016s; std dev. 2.503e-06 (taking best). +Reference operation time is 0.0159981 s (0.0015 Mflops) with 10 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.016 Mflops: 0.002) +Merge (3 -> 1 leaves) took w.c.t. of 1.979e-05s, ~8.106e-06s of computing time (of which 1.907e-06s sorting, 5.007e-06s analysis) +3 iterations (10 th.) took 1.502e-05s; avg 5.007e-06s ( +/- 80.95/157.14 %); best 9.537e-07s; worst 1.287e-05s; std dev. 5.564e-06 (taking best). +Reference operation time is 9.53674e-07 s (25.17 Mflops) with 10 threads. +After merge step 1: tpop: 9.537e-07 s ~Mflops: 25.166 nsubm:1 otn:10 +Applying merge (3 -> 1 leaves, 10 th.) yielded SPEEDUP of 16775.250x: 0.016s -> 9.537e-07s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 9.608e-05s (of which 1.192e-05s partitioning, 0s I/O); computing times: 2.861e-06s in par. loops, 0s sorting, 1.192e-06s analyzing) -Total merge + benchmarking process took 9.608e-05s, equivalent to 100.8/10.6 new/old ops (4.387e-05s for 2 clones -- as 46.0/4.8 ops, or 23.0/2.4 ops per clone), SPEEDUP of 9.500x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 18 th.sp.) yielded SPEEDUP of 9.500x (9.06e-06s -> 9.537e-07s), will amortize in 11.9 ops by saving 8.106e-06s per op. -In 1 tuning rounds (tot. 0.00023s, 4.4e-05s for constructor, 2 clones) obtained a SPEEDUP of 850.0% (9.5x) (from 2.649 to 25.17 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.048s (of which 2.599e-05s partitioning, 0s I/O); computing times: 8.106e-06s in par. loops, 1.907e-06s sorting, 5.007e-06s analyzing) +Total merge + benchmarking process took 0.048s, equivalent to 50328.5/3.0 new/old ops (0.0959s for 2 clones -- as 100555.2/6.0 ops, or 50277.6/3.0 ops per clone), SPEEDUP of 16775.250x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 10 th.sp.) yielded SPEEDUP of 16775.250x (0.016s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.016s per op. +In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 1677425.0% (1.678e+04x) (from 0.0015 to 25.17 Mflops). #pr: updating sample at index 3 (2^th of 16), 0^th touch for (0,0,0,0,0,1,0). -First run of RSB Autotuner took 0.000246048 s (9.060e-06 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.144193 s (1.600e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.000416994 s and estimated a speedup of 1.000000 x (5.095e-08 s -> 5.095e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.703785 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.73205078 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 18 3 3 6 0.000001 0.000027 0.000014 0.000041 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000042 -%:RSB_SUBDIVISION_TIME:A.mtx S N 18 3 3 6 0.000027 -%:RSB_SHUFFLE_TIME:A.mtx S N 18 3 3 6 0.000014 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 18 3 3 6 0.000001 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 18 3 3 6 1.000 -%:SORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000041 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 18 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 18 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 18 3 3 6 1.00 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 10 3 3 6 0.000001 0.039368 0.031996 0.071364 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.071365 +%:RSB_SUBDIVISION_TIME:A.mtx S N 10 3 3 6 0.039368 +%:RSB_SHUFFLE_TIME:A.mtx S N 10 3 3 6 0.031996 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 10 3 3 6 0.000001 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 10 3 3 6 1.000 +%:SORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.071364 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 10 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 10 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 10 3 3 6 1.00 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 18 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 18 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 18 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 18 3 3 6 96 -%:SM_MINMAXAVGNNZ:A.mtx S N 18 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 10 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 10 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 10 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 10 3 3 6 96 +%:SM_MINMAXAVGNNZ:A.mtx S N 10 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[18] SPMV[18] SPMV[18] -%operation:A.mtx 5.91278e-05 1e+09 1e+09 -%constructor:matrix SORT[18] SCAN[18] SHUFFLE[18] INSERT[18] -%constructor:A.mtx 9.53674e-07 2.69413e-05 0 1.40667e-05 +%operation:matrix CONSTRUCTOR[10] SPMV[10] SPMV[10] +%operation:A.mtx 0.103375 1e+09 1e+09 +%constructor:matrix SORT[10] SCAN[10] SHUFFLE[10] INSERT[10] +%constructor:A.mtx 1.19209e-06 0.0393682 0 0.031996 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 18 threads -# Constructed matrix (took 0.000s): (3 x 3)[0x57c7d520]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 10 threads +# Constructed matrix (took 0.104s): (3 x 3)[0x2391710]{S} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (18 th.) took 5.913e-05s; avg 1.971e-05s ( +/- 54.03/ 98.39 %); best 9.06e-06s; worst 3.91e-05s; std dev. 1.373e-05 (taking best). -Reference operation time is 9.05991e-06 s (10.6 Mflops) with 18 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 9.06e-06 Mflops: 10.596) -Merge (3 -> 1 leaves) took w.c.t. of 8.821e-06s, ~3.099e-06s of computing time (of which 0s sorting, 9.537e-07s analysis) -3 iterations (18 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 95.07/ 84.62 %); best 5.095e-08s; worst 1.907e-06s; std dev. 7.867e-07 (taking best). -Reference operation time is 5.09501e-08 s (1884 Mflops) with 18 threads. -After merge step 1: tpop: 5.095e-08 s ~Mflops: 1884.198 nsubm:1 otn:18 -Applying merge (3 -> 1 leaves, 18 th.) yielded SPEEDUP of 177.819x: 9.06e-06s -> 5.095e-08s, so taking this instance. +3 iterations (10 th.) took 0.048s; avg 0.016s ( +/- 0.04/ 0.02 %); best 0.01599s; worst 0.016s; std dev. 4.065e-06 (taking best). +Reference operation time is 0.0159929 s (0.006003 Mflops) with 10 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type S, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01599 Mflops: 0.006) +Merge (3 -> 1 leaves) took w.c.t. of 1.597e-05s, ~6.199e-06s of computing time (of which 1.907e-06s sorting, 3.099e-06s analysis) +3 iterations (10 th.) took 6.199e-06s; avg 2.066e-06s ( +/- 53.85/ 96.15 %); best 9.537e-07s; worst 4.053e-06s; std dev. 1.408e-06 (taking best). +Reference operation time is 9.53674e-07 s (100.7 Mflops) with 10 threads. +After merge step 1: tpop: 9.537e-07 s ~Mflops: 100.663 nsubm:1 otn:10 +Applying merge (3 -> 1 leaves, 10 th.) yielded SPEEDUP of 16769.750x: 0.01599s -> 9.537e-07s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 9.99e-05s (of which 1.192e-05s partitioning, 0s I/O); computing times: 3.099e-06s in par. loops, 0s sorting, 9.537e-07s analyzing) -Total merge + benchmarking process took 9.99e-05s, equivalent to 1960.7/11.0 new/old ops (3.91e-05s for 2 clones -- as 767.4/4.3 ops, or 383.7/2.2 ops per clone), SPEEDUP of 177.819x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 18 th.sp.) yielded SPEEDUP of 177.819x (9.06e-06s -> 5.095e-08s), will amortize in 11.1 ops by saving 9.009e-06s per op. -In 1 tuning rounds (tot. 0.00021s, 3.9e-05s for constructor, 2 clones) obtained a SPEEDUP of 17681.9% (177.8x) (from 10.6 to 1884 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04825s (of which 2.193e-05s partitioning, 0s I/O); computing times: 6.199e-06s in par. loops, 1.907e-06s sorting, 3.099e-06s analyzing) +Total merge + benchmarking process took 0.04825s, equivalent to 50590.5/3.0 new/old ops (0.09591s for 2 clones -- as 100570.0/6.0 ops, or 50285.0/3.0 ops per clone), SPEEDUP of 16769.750x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 10 th.sp.) yielded SPEEDUP of 16769.750x (0.01599s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.01599s per op. +In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 1676875.0% (1.677e+04x) (from 0.006003 to 100.7 Mflops). #pr: updating sample at index 11 (3^th of 16), 0^th touch for (0,0,0,0,1,1,0). -First run of RSB Autotuner took 0.000224113 s (9.060e-06 s -> 5.095e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.144275 s (1.599e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.000442982 s and estimated a speedup of 1.000000 x (5.095e-08 s -> 5.095e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.707699 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:3 #norm:1.73205078 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 18 3 3 6 0.000000 0.000025 0.000014 0.000039 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000039 -%:RSB_SUBDIVISION_TIME:A.mtx S N 18 3 3 6 0.000025 -%:RSB_SHUFFLE_TIME:A.mtx S N 18 3 3 6 0.000014 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 18 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 18 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000039 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 18 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 18 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 18 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 10 3 3 6 0.000001 0.039712 0.031982 0.071694 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.071695 +%:RSB_SUBDIVISION_TIME:A.mtx S N 10 3 3 6 0.039712 +%:RSB_SHUFFLE_TIME:A.mtx S N 10 3 3 6 0.031982 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 10 3 3 6 0.000001 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 10 3 3 6 1.000 +%:SORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.071694 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 10 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 10 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 10 3 3 6 1.00 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 18 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 18 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 18 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 18 3 3 6 96 -%:SM_MINMAXAVGNNZ:A.mtx S N 18 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 10 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 10 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 10 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 10 3 3 6 96 +%:SM_MINMAXAVGNNZ:A.mtx S N 10 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[18] SPMV[18] SPMV[18] -%operation:A.mtx 5.79357e-05 1e+09 1e+09 -%constructor:matrix SORT[18] SCAN[18] SHUFFLE[18] INSERT[18] -%constructor:A.mtx 0 2.5034e-05 0 1.38283e-05 +%operation:matrix CONSTRUCTOR[10] SPMV[10] SPMV[10] +%operation:A.mtx 0.103704 1e+09 1e+09 +%constructor:matrix SORT[10] SCAN[10] SHUFFLE[10] INSERT[10] +%constructor:A.mtx 9.53674e-07 0.039712 0 0.0319822 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 6.966s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.003s/0.000s . +# so far, program took 9.877s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 3.580s/0.000s . getrusage() stats: -ru_maxrss: 27 (maximum resident set size -- MB) -ru_stime : 0.1032s (system CPU time used) -ru_utime : 7.959s (user CPU time used) +ru_maxrss: 9 (maximum resident set size -- MB) +ru_stime : 0.03601s (system CPU time used) +ru_utime : 32.73s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode C (last was S). -# Cache block size total 4194304 bytes, per-thread 233016 bytes -# so far, program took 6.966s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.003s/0.000s . +# Cache block size total 524288 bytes, per-thread 52428 bytes +# so far, program took 9.877s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 3.580s/0.000s . # Reusing type converted (S->C) arrays from last iteration instead of reloading matrix file. # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 18 threads -# Constructed matrix (took 0.000s): (3 x 3)[0x57c7db50]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 10 threads +# Constructed matrix (took 0.104s): (3 x 3)[0x2394a80]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (18 th.) took 5.412e-05s; avg 1.804e-05s ( +/- 49.78/ 99.56 %); best 9.06e-06s; worst 3.6e-05s; std dev. 1.27e-05 (taking best). -Reference operation time is 9.05991e-06 s (10.6 Mflops) with 18 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 9.06e-06 Mflops: 10.596) -Merge (3 -> 1 leaves) took w.c.t. of 8.821e-06s, ~3.099e-06s of computing time (of which 0s sorting, 9.537e-07s analysis) -3 iterations (18 th.) took 2.408e-05s; avg 8.027e-06s ( +/- 88.12/173.27 %); best 9.537e-07s; worst 2.193e-05s; std dev. 9.835e-06 (taking best). -Reference operation time is 9.53674e-07 s (100.7 Mflops) with 18 threads. -After merge step 1: tpop: 9.537e-07 s ~Mflops: 100.663 nsubm:1 otn:18 -Applying merge (3 -> 1 leaves, 18 th.) yielded SPEEDUP of 9.500x: 9.06e-06s -> 9.537e-07s, so taking this instance. +3 iterations (10 th.) took 0.048s; avg 0.016s ( +/- 0.10/ 0.10 %); best 0.01598s; worst 0.01601s; std dev. 1.317e-05 (taking best). +Reference operation time is 0.0159819 s (0.006007 Mflops) with 10 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01598 Mflops: 0.006) +Merge (3 -> 1 leaves) took w.c.t. of 1.693e-05s, ~6.914e-06s of computing time (of which 2.861e-06s sorting, 3.099e-06s analysis) +3 iterations (10 th.) took 1.407e-05s; avg 4.689e-06s ( +/- 79.66/159.32 %); best 9.537e-07s; worst 1.216e-05s; std dev. 5.282e-06 (taking best). +Reference operation time is 9.53674e-07 s (100.7 Mflops) with 10 threads. +After merge step 1: tpop: 9.537e-07 s ~Mflops: 100.663 nsubm:1 otn:10 +Applying merge (3 -> 1 leaves, 10 th.) yielded SPEEDUP of 16758.250x: 0.01598s -> 9.537e-07s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 6.986e-05s (of which 1.192e-05s partitioning, 0s I/O); computing times: 3.099e-06s in par. loops, 0s sorting, 9.537e-07s analyzing) -Total merge + benchmarking process took 6.986e-05s, equivalent to 73.2/7.7 new/old ops (8.821e-05s for 2 clones -- as 92.5/9.7 ops, or 46.2/4.9 ops per clone), SPEEDUP of 9.500x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 18 th.sp.) yielded SPEEDUP of 9.500x (9.06e-06s -> 9.537e-07s), will amortize in 8.6 ops by saving 8.106e-06s per op. -In 1 tuning rounds (tot. 0.00022s, 8.8e-05s for constructor, 2 clones) obtained a SPEEDUP of 850.0% (9.5x) (from 10.6 to 100.7 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04801s (of which 2.193e-05s partitioning, 0s I/O); computing times: 6.914e-06s in par. loops, 2.861e-06s sorting, 3.099e-06s analyzing) +Total merge + benchmarking process took 0.04801s, equivalent to 50342.2/3.0 new/old ops (0.09568s for 2 clones -- as 100332.0/6.0 ops, or 50166.0/3.0 ops per clone), SPEEDUP of 16758.250x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 10 th.sp.) yielded SPEEDUP of 16758.250x (0.01598s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.01598s per op. +In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 1675725.0% (1.676e+04x) (from 0.006007 to 100.7 Mflops). #pr: updating sample at index 5 (4^th of 16), 0^th touch for (0,0,0,0,0,2,0). -First run of RSB Autotuner took 0.000234842 s (9.060e-06 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.143805 s (1.598e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.000447035 s and estimated a speedup of 1.000000 x (5.095e-08 s -> 5.095e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.799938 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.73205078 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 18 3 3 6 0.000000 0.000025 0.000014 0.000039 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000039 -%:RSB_SUBDIVISION_TIME:A.mtx S N 18 3 3 6 0.000025 -%:RSB_SHUFFLE_TIME:A.mtx S N 18 3 3 6 0.000014 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 18 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 18 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000039 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 18 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 18 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 18 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 10 3 3 6 0.000001 0.039675 0.032001 0.071676 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.071677 +%:RSB_SUBDIVISION_TIME:A.mtx S N 10 3 3 6 0.039675 +%:RSB_SHUFFLE_TIME:A.mtx S N 10 3 3 6 0.032001 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 10 3 3 6 0.000001 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 10 3 3 6 1.000 +%:SORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.071676 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 10 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 10 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 10 3 3 6 1.00 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 18 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 18 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 18 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 18 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 18 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 10 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 10 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 10 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 10 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 10 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[18] SPMV[18] SPMV[18] -%operation:A.mtx 5.60284e-05 1e+09 1e+09 -%constructor:matrix SORT[18] SCAN[18] SHUFFLE[18] INSERT[18] -%constructor:A.mtx 0 2.5034e-05 0 1.40667e-05 +%operation:matrix CONSTRUCTOR[10] SPMV[10] SPMV[10] +%operation:A.mtx 0.103691 1e+09 1e+09 +%constructor:matrix SORT[10] SCAN[10] SHUFFLE[10] INSERT[10] +%constructor:A.mtx 9.53674e-07 0.039675 0 0.032001 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 18 threads -# Constructed matrix (took 0.000s): (3 x 3)[0x57c7db50]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 10 threads +# Constructed matrix (took 0.107s): (3 x 3)[0x2394a80]{C} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (18 th.) took 5.007e-05s; avg 1.669e-05s ( +/- 52.86/ 98.57 %); best 7.868e-06s; worst 3.314e-05s; std dev. 1.164e-05 (taking best). -Reference operation time is 7.86781e-06 s (48.81 Mflops) with 18 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 7.868e-06 Mflops: 48.806) -Merge (3 -> 1 leaves) took w.c.t. of 7.868e-06s, ~2.146e-06s of computing time (of which 9.537e-07s sorting, 0s analysis) -3 iterations (18 th.) took 4.053e-06s; avg 1.351e-06s ( +/- 96.23/129.41 %); best 5.095e-08s; worst 3.099e-06s; std dev. 1.296e-06 (taking best). -Reference operation time is 5.09501e-08 s (7537 Mflops) with 18 threads. -After merge step 1: tpop: 5.095e-08 s ~Mflops: 7536.793 nsubm:1 otn:18 -Applying merge (3 -> 1 leaves, 18 th.) yielded SPEEDUP of 154.422x: 7.868e-06s -> 5.095e-08s, so taking this instance. +3 iterations (10 th.) took 0.052s; avg 0.01733s ( +/- 7.80/ 15.48 %); best 0.01598s; worst 0.02002s; std dev. 0.001897 (taking best). +Reference operation time is 0.0159819 s (0.02403 Mflops) with 10 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type C, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01598 Mflops: 0.024) +Merge (3 -> 1 leaves) took w.c.t. of 1.788e-05s, ~8.106e-06s of computing time (of which 1.907e-06s sorting, 2.861e-06s analysis) +3 iterations (10 th.) took 8.821e-06s; avg 2.94e-06s ( +/- 35.14/ 70.27 %); best 1.907e-06s; worst 5.007e-06s; std dev. 1.461e-06 (taking best). +Reference operation time is 1.90735e-06 s (201.3 Mflops) with 10 threads. +After merge step 1: tpop: 1.907e-06 s ~Mflops: 201.327 nsubm:1 otn:10 +Applying merge (3 -> 1 leaves, 10 th.) yielded SPEEDUP of 8379.125x: 0.01598s -> 1.907e-06s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 5.007e-05s (of which 1.097e-05s partitioning, 0s I/O); computing times: 2.146e-06s in par. loops, 9.537e-07s sorting, 0s analyzing) -Total merge + benchmarking process took 5.007e-05s, equivalent to 982.7/6.4 new/old ops (4.196e-05s for 2 clones -- as 823.6/5.3 ops, or 411.8/2.7 ops per clone), SPEEDUP of 154.422x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 18 th.sp.) yielded SPEEDUP of 154.422x (7.868e-06s -> 5.095e-08s), will amortize in 6.4 ops by saving 7.817e-06s per op. -In 1 tuning rounds (tot. 0.00015s, 4.2e-05s for constructor, 2 clones) obtained a SPEEDUP of 15342.2% (154.4x) (from 48.81 to 7537 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.056s (of which 2.384e-05s partitioning, 0s I/O); computing times: 8.106e-06s in par. loops, 1.907e-06s sorting, 2.861e-06s analyzing) +Total merge + benchmarking process took 0.056s, equivalent to 29360.1/3.5 new/old ops (0.1039s for 2 clones -- as 54474.6/6.5 ops, or 27237.3/3.3 ops per clone), SPEEDUP of 8379.125x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 10 th.sp.) yielded SPEEDUP of 8379.125x (0.01598s -> 1.907e-06s), will amortize in 3.5 ops by saving 0.01598s per op. +In 1 tuning rounds (tot. 0.16s, 0.1s for constructor, 2 clones) obtained a SPEEDUP of 837812.5% (8379x) (from 0.02403 to 201.3 Mflops). #pr: updating sample at index 13 (5^th of 16), 0^th touch for (0,0,0,0,1,2,0). -First run of RSB Autotuner took 0.000163078 s (7.868e-06 s -> 5.095e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.156045 s (1.598e-02 s -> 1.907e-06 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.000444889 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.791927 s and estimated a speedup of 1.000000 x (1.907e-06 s -> 1.907e-06 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.73205078 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 18 3 3 6 0.000000 0.000070 0.000014 0.000084 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000084 -%:RSB_SUBDIVISION_TIME:A.mtx S N 18 3 3 6 0.000070 -%:RSB_SHUFFLE_TIME:A.mtx S N 18 3 3 6 0.000014 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 18 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 18 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000084 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 18 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 18 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 18 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 10 3 3 6 0.000000 0.039435 0.032013 0.071448 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.071448 +%:RSB_SUBDIVISION_TIME:A.mtx S N 10 3 3 6 0.039435 +%:RSB_SHUFFLE_TIME:A.mtx S N 10 3 3 6 0.032013 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 10 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 10 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.071448 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 10 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 10 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 10 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 18 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 18 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 18 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 18 3 3 6 156 -%:SM_MINMAXAVGNNZ:A.mtx S N 18 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 10 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 10 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 10 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 10 3 3 6 156 +%:SM_MINMAXAVGNNZ:A.mtx S N 10 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[18] SPMV[18] SPMV[18] -%operation:A.mtx 0.000102997 1e+09 1e+09 -%constructor:matrix SORT[18] SCAN[18] SHUFFLE[18] INSERT[18] -%constructor:A.mtx 0 6.98566e-05 0 1.40667e-05 +%operation:matrix CONSTRUCTOR[10] SPMV[10] SPMV[10] +%operation:A.mtx 0.107443 1e+09 1e+09 +%constructor:matrix SORT[10] SCAN[10] SHUFFLE[10] INSERT[10] +%constructor:A.mtx 0 0.0394351 0 0.0320129 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 6.998s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.004s/0.000s . +# so far, program took 12.141s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 5.472s/0.000s . getrusage() stats: -ru_maxrss: 27 (maximum resident set size -- MB) -ru_stime : 0.1319s (system CPU time used) -ru_utime : 8.508s (user CPU time used) +ru_maxrss: 9 (maximum resident set size -- MB) +ru_stime : 0.03601s (system CPU time used) +ru_utime : 47.07s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode Z (last was C). -# Cache block size total 4194304 bytes, per-thread 233016 bytes -# so far, program took 6.998s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.004s/0.000s . +# Cache block size total 524288 bytes, per-thread 52428 bytes +# so far, program took 12.141s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 5.472s/0.000s . # Reusing type converted (C->Z) arrays from last iteration instead of reloading matrix file. # multi-nrhs benchmarking (1,4) -- now using nrhs 1. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 18 threads -# Constructed matrix (took 0.000s): (3 x 3)[0x57c60220]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 10 threads +# Constructed matrix (took 0.112s): (3 x 3)[0x2394a80]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (18 th.) took 5.102e-05s; avg 1.701e-05s ( +/- 46.73/ 93.46 %); best 9.06e-06s; worst 3.29e-05s; std dev. 1.124e-05 (taking best). -Reference operation time is 9.05991e-06 s (10.6 Mflops) with 18 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 9.06e-06 Mflops: 10.596) -Merge (3 -> 1 leaves) took w.c.t. of 8.106e-06s, ~1.907e-06s of computing time (of which 0s sorting, 1.192e-06s analysis) -3 iterations (18 th.) took 1.812e-05s; avg 6.04e-06s ( +/- 99.16/184.21 %); best 5.095e-08s; worst 1.717e-05s; std dev. 7.877e-06 (taking best). -Reference operation time is 5.09501e-08 s (1884 Mflops) with 18 threads. -After merge step 1: tpop: 5.095e-08 s ~Mflops: 1884.198 nsubm:1 otn:18 -Applying merge (3 -> 1 leaves, 18 th.) yielded SPEEDUP of 177.819x: 9.06e-06s -> 5.095e-08s, so taking this instance. +3 iterations (10 th.) took 0.048s; avg 0.016s ( +/- 0.04/ 0.02 %); best 0.01599s; worst 0.016s; std dev. 4.608e-06 (taking best). +Reference operation time is 0.0159931 s (0.006003 Mflops) with 10 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.01599 Mflops: 0.006) +Merge (3 -> 1 leaves) took w.c.t. of 1.907e-05s, ~6.914e-06s of computing time (of which 2.146e-06s sorting, 3.099e-06s analysis) +3 iterations (10 th.) took 2.503e-05s; avg 8.345e-06s ( +/- 88.57/162.86 %); best 9.537e-07s; worst 2.193e-05s; std dev. 9.622e-06 (taking best). +Reference operation time is 9.53674e-07 s (100.7 Mflops) with 10 threads. +After merge step 1: tpop: 9.537e-07 s ~Mflops: 100.663 nsubm:1 otn:10 +Applying merge (3 -> 1 leaves, 10 th.) yielded SPEEDUP of 16770.000x: 0.01599s -> 9.537e-07s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 6.413e-05s (of which 1.097e-05s partitioning, 0s I/O); computing times: 1.907e-06s in par. loops, 0s sorting, 1.192e-06s analyzing) -Total merge + benchmarking process took 6.413e-05s, equivalent to 1258.8/7.1 new/old ops (4.101e-05s for 2 clones -- as 804.9/4.5 ops, or 402.4/2.3 ops per clone), SPEEDUP of 177.819x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 18 th.sp.) yielded SPEEDUP of 177.819x (9.06e-06s -> 5.095e-08s), will amortize in 7.1 ops by saving 9.009e-06s per op. -In 1 tuning rounds (tot. 0.00016s, 4.1e-05s for constructor, 2 clones) obtained a SPEEDUP of 17681.9% (177.8x) (from 10.6 to 1884 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04799s (of which 2.408e-05s partitioning, 0s I/O); computing times: 6.914e-06s in par. loops, 2.146e-06s sorting, 3.099e-06s analyzing) +Total merge + benchmarking process took 0.04799s, equivalent to 50318.0/3.0 new/old ops (0.09586s for 2 clones -- as 100521.5/6.0 ops, or 50260.8/3.0 ops per clone), SPEEDUP of 16770.000x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 10 th.sp.) yielded SPEEDUP of 16770.000x (0.01599s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.01599s per op. +In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 1676900.0% (1.677e+04x) (from 0.006003 to 100.7 Mflops). #pr: updating sample at index 7 (6^th of 16), 0^th touch for (0,0,0,0,0,3,0). -First run of RSB Autotuner took 0.000175953 s (9.060e-06 s -> 5.095e-08 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.144009 s (1.599e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.000499964 s and estimated a speedup of 1.000000 x (5.095e-08 s -> 5.095e-08 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.791981 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 18 3 3 6 0.000000 0.000026 0.000014 0.000040 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000040 -%:RSB_SUBDIVISION_TIME:A.mtx S N 18 3 3 6 0.000026 -%:RSB_SHUFFLE_TIME:A.mtx S N 18 3 3 6 0.000014 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 18 3 3 6 0.000000 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 18 3 3 6 -nan -%:SORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000040 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 18 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 18 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 18 3 3 6 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 10 3 3 6 0.000000 0.047532 0.031993 0.079525 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.079525 +%:RSB_SUBDIVISION_TIME:A.mtx S N 10 3 3 6 0.047532 +%:RSB_SHUFFLE_TIME:A.mtx S N 10 3 3 6 0.031993 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 10 3 3 6 0.000000 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 10 3 3 6 -nan +%:SORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.079525 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 10 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 10 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 10 3 3 6 -nan 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 18 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 18 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 18 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 18 3 3 6 276 -%:SM_MINMAXAVGNNZ:A.mtx S N 18 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 10 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 10 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 10 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 10 3 3 6 276 +%:SM_MINMAXAVGNNZ:A.mtx S N 10 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[18] SPMV[18] SPMV[18] -%operation:A.mtx 5.88894e-05 1e+09 1e+09 -%constructor:matrix SORT[18] SCAN[18] SHUFFLE[18] INSERT[18] -%constructor:A.mtx 0 2.59876e-05 0 1.40667e-05 +%operation:matrix CONSTRUCTOR[10] SPMV[10] SPMV[10] +%operation:A.mtx 0.111545 1e+09 1e+09 +%constructor:matrix SORT[10] SCAN[10] SHUFFLE[10] INSERT[10] +%constructor:A.mtx 0 0.0475318 0 0.0319929 # symmetric matrix --- skipping transposed benchmarking # multi-nrhs benchmarking (1,4) -- now using nrhs 4. # Using alpha=1 beta=1 order=rows for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # multi-transpose benchmarking -- now using transA = N. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS -# Using 18 threads -# Constructed matrix (took 0.000s): (3 x 3)[0x57c60220]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' +# Using 10 threads +# Constructed matrix (took 0.103s): (3 x 3)[0x2394a80]{Z} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2442186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'S' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:3 -3 iterations (18 th.) took 3.6e-05s; avg 1.2e-05s ( +/- 16.56/ 33.11 %); best 1.001e-05s; worst 1.597e-05s; std dev. 2.81e-06 (taking best). -Reference operation time is 1.00136e-05 s (38.35 Mflops) with 18 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 1.001e-05 Mflops: 38.348) -Merge (3 -> 1 leaves) took w.c.t. of 8.106e-06s, ~1.907e-06s of computing time (of which 0s sorting, 9.537e-07s analysis) -3 iterations (18 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 42.86/ 85.71 %); best 9.537e-07s; worst 3.099e-06s; std dev. 1.012e-06 (taking best). -Reference operation time is 9.53674e-07 s (402.7 Mflops) with 18 threads. -After merge step 1: tpop: 9.537e-07 s ~Mflops: 402.653 nsubm:1 otn:18 -Applying merge (3 -> 1 leaves, 18 th.) yielded SPEEDUP of 10.500x: 1.001e-05s -> 9.537e-07s, so taking this instance. +3 iterations (10 th.) took 0.048s; avg 0.016s ( +/- 0.02/ 0.05 %); best 0.016s; worst 0.01601s; std dev. 5.17e-06 (taking best). +Reference operation time is 0.015996 s (0.02401 Mflops) with 10 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=4, order=rows) (max 6 steps, inclusive 3 grace steps) on: 3 x 3, type Z, 6 nnz, 2 nnz/r, 4 subms, 3 lsubms, 4.0000 bpnz (tpop: 0.016 Mflops: 0.024) +Merge (3 -> 1 leaves) took w.c.t. of 2.813e-05s, ~1.001e-05s of computing time (of which 2.861e-06s sorting, 5.96e-06s analysis) +3 iterations (10 th.) took 9.06e-06s; avg 3.02e-06s ( +/- 68.42/105.26 %); best 9.537e-07s; worst 6.199e-06s; std dev. 2.281e-06 (taking best). +Reference operation time is 9.53674e-07 s (402.7 Mflops) with 10 threads. +After merge step 1: tpop: 9.537e-07 s ~Mflops: 402.653 nsubm:1 otn:10 +Applying merge (3 -> 1 leaves, 10 th.) yielded SPEEDUP of 16773.000x: 0.016s -> 9.537e-07s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 5.102e-05s (of which 1.287e-05s partitioning, 0s I/O); computing times: 1.907e-06s in par. loops, 0s sorting, 9.537e-07s analyzing) -Total merge + benchmarking process took 5.102e-05s, equivalent to 53.5/5.1 new/old ops (4.029e-05s for 2 clones -- as 42.2/4.0 ops, or 21.1/2.0 ops per clone), SPEEDUP of 10.500x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 18 th.sp.) yielded SPEEDUP of 10.500x (1.001e-05s -> 9.537e-07s), will amortize in 5.6 ops by saving 9.06e-06s per op. -In 1 tuning rounds (tot. 0.00014s, 4e-05s for constructor, 2 clones) obtained a SPEEDUP of 950.0% (10.5x) (from 38.35 to 402.7 Mflops). +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.04797s (of which 3.409e-05s partitioning, 0s I/O); computing times: 1.001e-05s in par. loops, 2.861e-06s sorting, 5.96e-06s analyzing) +Total merge + benchmarking process took 0.04797s, equivalent to 50304.2/3.0 new/old ops (0.09584s for 2 clones -- as 100496.8/6.0 ops, or 50248.4/3.0 ops per clone), SPEEDUP of 16773.000x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 10 th.sp.) yielded SPEEDUP of 16773.000x (0.016s -> 9.537e-07s), will amortize in 3.0 ops by saving 0.016s per op. +In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 1677200.0% (1.677e+04x) (from 0.02401 to 402.7 Mflops). #pr: updating sample at index 15 (7^th of 16), 0^th touch for (0,0,0,0,1,3,0). -First run of RSB Autotuner took 0.00015521 s (1.001e-05 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.144005 s (1.600e-02 s -> 9.537e-07 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. -Second run of RSB Autotuner took 0.00045085 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) +Second run of RSB Autotuner took 0.807912 s and estimated a speedup of 1.000000 x (9.537e-07 s -> 9.537e-07 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:3 0 #norm:1.7320508075688772 0 #used index storage compared to COO:28 vs 48 bytes (58.33%) ; compared to CSR:28 vs 40 bytes (77.78%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:A.mtx S N 18 3 3 6 0.000001 0.000026 0.000018 0.000044 -%:UNSORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000045 -%:RSB_SUBDIVISION_TIME:A.mtx S N 18 3 3 6 0.000026 -%:RSB_SHUFFLE_TIME:A.mtx S N 18 3 3 6 0.000018 -%:ROW_MAJOR_SORT_TIME:A.mtx S N 18 3 3 6 0.000001 -%:ROW_MAJOR_SORT_SCALING:A.mtx S N 18 3 3 6 1.000 -%:SORTEDCOO2RSB_TIME:A.mtx S N 18 3 3 6 0.000044 -%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 18 3 3 6 0.000 -%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:SORTEDCOO2RSB_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SUBDIVISION_SCALING:A.mtx S N 18 3 3 6 1.00 -%:RSB_SHUFFLE_SCALING:A.mtx S N 18 3 3 6 1.00 -%:CONSTRUCTOR_SCALING:A.mtx S N 18 3 3 6 1.00 1.00 1.00 1.00 +%:CONSTRUCTOR_TIMES:A.mtx S N 10 3 3 6 0.000001 0.047394 0.024016 0.071410 +%:UNSORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.071411 +%:RSB_SUBDIVISION_TIME:A.mtx S N 10 3 3 6 0.047394 +%:RSB_SHUFFLE_TIME:A.mtx S N 10 3 3 6 0.024016 +%:ROW_MAJOR_SORT_TIME:A.mtx S N 10 3 3 6 0.000001 +%:ROW_MAJOR_SORT_SCALING:A.mtx S N 10 3 3 6 1.000 +%:SORTEDCOO2RSB_TIME:A.mtx S N 10 3 3 6 0.071410 +%:ROW_MAJOR_SORT_TO_MOP:A.mtx S N 10 3 3 6 0.000 +%:UNSORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:SORTEDCOO2RSB_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SUBDIVISION_SCALING:A.mtx S N 10 3 3 6 1.00 +%:RSB_SHUFFLE_SCALING:A.mtx S N 10 3 3 6 1.00 +%:CONSTRUCTOR_SCALING:A.mtx S N 10 3 3 6 1.00 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo -%:SM_COUNTS:A.mtx S N 18 3 3 6 1 1 0 0 0 -%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 18 3 3 6 28 48 36 -%:SM_IDXOCCUPATION:A.mtx S N 18 3 3 6 28 -%:SM_MEMTRAFFIC:A.mtx S N 18 3 3 6 276 -%:SM_MINMAXAVGNNZ:A.mtx S N 18 3 3 6 6 6 6 +%:SM_COUNTS:A.mtx S N 10 3 3 6 1 1 0 0 0 +%:SM_IDXOCCUPATIONRSBVSCOOANDCSR:A.mtx S N 10 3 3 6 28 48 36 +%:SM_IDXOCCUPATION:A.mtx S N 10 3 3 6 28 +%:SM_MEMTRAFFIC:A.mtx S N 10 3 3 6 276 +%:SM_MINMAXAVGNNZ:A.mtx S N 10 3 3 6 6 6 6 # -%operation:matrix CONSTRUCTOR[18] SPMV[18] SPMV[18] -%operation:A.mtx 6.19888e-05 1e+09 1e+09 -%constructor:matrix SORT[18] SCAN[18] SHUFFLE[18] INSERT[18] -%constructor:A.mtx 1.19209e-06 2.59876e-05 0 1.78814e-05 +%operation:matrix CONSTRUCTOR[10] SPMV[10] SPMV[10] +%operation:A.mtx 0.103418 1e+09 1e+09 +%constructor:matrix SORT[10] SCAN[10] SHUFFLE[10] INSERT[10] +%constructor:A.mtx 1.19209e-06 0.047394 0 0.0240159 # symmetric matrix --- skipping transposed benchmarking -# so far, program took 7.031s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.005s/0.000s . +# so far, program took 14.409s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 7.360s/0.000s . getrusage() stats: -ru_maxrss: 27 (maximum resident set size -- MB) -ru_stime : 0.1519s (system CPU time used) -ru_utime : 9.065s (user CPU time used) +ru_maxrss: 9 (maximum resident set size -- MB) +ru_stime : 0.03601s (system CPU time used) +ru_utime : 61.48s (user CPU time used) # benchmarking terminated --- finalizing run. # ====== BEGIN Total summary record. #pr: ======== Limiting to type D: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 18 18 0 4.0000 4.6667 3 1 471.05 1.001e-05 0.000e+00 5.095e-08 0.000e+00 2.551e-04 2.90e+00 5.17e+00 1 2.40e-05 -pr: 9:R_R A 3 3 6 4 D S N 18 18 0 4.0000 4.6667 3 1 1884.20 8.821e-06 0.000e+00 5.095e-08 0.000e+00 1.619e-04 7.14e+00 2.79e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 D S N 10 10 0 4.0000 4.6667 3 1 25.17 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 1.55e-01 5.17e+00 1 2.40e-05 +pr: 9:R_R A 3 3 6 4 D S N 10 10 0 4.0000 4.6667 3 1 100.66 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.400e-01 3.82e-01 2.79e+00 1 9.60e-05 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 18383.9 % faster, avg. sp. ratio 184.839x, max sp. ratio 196.537x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 4092.2/3177.4/5007.0/8184.4 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 21.9/ 18.4/ 25.5/ 43.8 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 22.0, min. 18.5, max. 25.6 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1677050.0 % faster, avg. sp. ratio 16771.500x, max sp. ratio 16774.250x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 148901.5/146794.5/151008.5/297803.0 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 8.9/ 8.8/ 9.0/ 17.8 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 8.9, min. 8.8, max. 9.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 16/ 16/ 16) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 48/ 48/ 48) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.847/ 2.434/ 5.260,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.049/ 2.905/ 7.144,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.206/ 0.130/ 0.281,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.537/ 0.155/ 0.382,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 3.979/ 2.792/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.178e+03, min 4.710e+02, max 1.884e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 6.640e+00, min 2.397e+00, max 1.088e+01 (2 samples) -#pr: best tun. rsb operation time was: on avg. 5.095e-08s, min 5.095e-08s, max 5.095e-08s, tot 1.019e-07s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 9.418e-06s, min 8.821e-06s, max 1.001e-05s, tot 1.884e-05s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.372e+00 9.449e+00 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.728e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.28 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.28 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 6.291e+01, min 2.517e+01, max 1.007e+02 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 3.751e-03, min 1.501e-03, max 6.001e-03 (2 samples) +#pr: best tun. rsb operation time was: on avg. 9.537e-07s, min 9.537e-07s, max 9.537e-07s, tot 1.907e-06s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.599e-02s, max 1.600e-02s, tot 3.199e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.920e+01 6.311e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.037e+00 #pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (1 samples, the non-min-nrhs ones) #pr: ======== Limiting to type S: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 3:R_R A 3 3 6 1 S S N 18 18 0 4.0000 4.6667 3 1 25.17 9.060e-06 0.000e+00 9.537e-07 0.000e+00 2.460e-04 9.23e-02 3.17e+00 1 2.40e-05 -pr: 11:R_R A 3 3 6 4 S S N 18 18 0 4.0000 4.6667 3 1 1884.20 9.060e-06 0.000e+00 5.095e-08 0.000e+00 2.241e-04 3.85e+00 1.54e+00 1 9.60e-05 +pr: 3:R_R A 3 3 6 1 S S N 10 10 0 4.0000 4.6667 3 1 25.17 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.442e-01 9.23e-02 3.17e+00 1 2.40e-05 +pr: 11:R_R A 3 3 6 4 S S N 10 10 0 4.0000 4.6667 3 1 100.66 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.443e-01 2.06e-01 1.54e+00 1 9.60e-05 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 9266.0 % faster, avg. sp. ratio 93.660x, max sp. ratio 177.819x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2328.3/258.0/4398.7/4656.7 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 25.9/ 24.7/ 27.2/ 51.9 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 27.6, min. 24.9, max. 30.4 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1677150.0 % faster, avg. sp. ratio 16772.500x, max sp. ratio 16775.250x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 151240.4/151197.5/151283.2/302480.8 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.0/ 9.0/ 9.0/ 18.0 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.0, min. 9.0, max. 9.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 8/ 8/ 8) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 24/ 24/ 24) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.492/ 0.080/ 2.905,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 3.939/ 0.092/ 3.847,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.117/ 0.080/ 0.155,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.298/ 0.092/ 0.206,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.354/ 1.542/ 3.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 9.547e+02, min 2.517e+01, max 1.884e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 6.623e+00, min 2.649e+00, max 1.060e+01 (2 samples) -#pr: best tun. rsb operation time was: on avg. 5.023e-07s, min 5.095e-08s, max 9.537e-07s, tot 1.005e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 9.060e-06s, min 9.060e-06s, max 9.060e-06s, tot 1.812e-05s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 7.917e+00 2.886e+02 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.728e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 7.487e+01 x, min 7.487e+01 x, max 7.487e+01 x (1 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.29 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.29 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 6.291e+01, min 2.517e+01, max 1.007e+02 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 3.751e-03, min 1.500e-03, max 6.003e-03 (2 samples) +#pr: best tun. rsb operation time was: on avg. 9.537e-07s, min 9.537e-07s, max 9.537e-07s, tot 1.907e-06s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.600e-02s, min 1.599e-02s, max 1.600e-02s, tot 3.199e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 5.288e+01 1.030e+02 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.037e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (1 samples, the non-min-nrhs ones) #pr: ======== Limiting to type C: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 5:R_R A 3 3 6 1 C S N 18 18 0 4.0000 4.6667 3 1 100.66 9.060e-06 0.000e+00 9.537e-07 0.000e+00 2.348e-04 1.55e-01 1.29e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 18 18 0 4.0000 4.6667 3 1 7536.79 7.868e-06 0.000e+00 5.095e-08 0.000e+00 1.631e-04 7.14e+00 6.98e-01 1 3.84e-04 +pr: 5:R_R A 3 3 6 1 C S N 10 10 0 4.0000 4.6667 3 1 100.66 1.598e-02 0.000e+00 9.537e-07 0.000e+00 1.438e-01 1.55e-01 1.29e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 10 10 0 4.0000 4.6667 3 1 201.33 1.598e-02 0.000e+00 1.907e-06 0.000e+00 1.560e-01 1.91e-01 6.98e-01 1 3.84e-04 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 8096.1 % faster, avg. sp. ratio 81.961x, max sp. ratio 154.422x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1723.5/246.2/3200.7/3447.0 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 23.3/ 20.7/ 25.9/ 46.6 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 24.9, min. 20.9, max. 29.0 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1256768.8 % faster, avg. sp. ratio 12568.688x, max sp. ratio 16758.250x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 116301.5/81812.5/150790.5/232603.0 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.4/ 9.0/ 9.8/ 18.8 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.4, min. 9.0, max. 9.8 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 16/ 16/ 16) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 48/ 48/ 48) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.695/ 0.130/ 5.260,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 7.299/ 0.155/ 7.144,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.135/ 0.130/ 0.141,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.346/ 0.155/ 0.191,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 0.995/ 0.698/ 1.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 3.819e+03, min 1.007e+02, max 7.537e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 2.970e+01, min 1.060e+01, max 4.881e+01 (2 samples) -#pr: best tun. rsb operation time was: on avg. 5.023e-07s, min 5.095e-08s, max 9.537e-07s, tot 1.005e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 8.464e-06s, min 7.868e-06s, max 9.060e-06s, tot 1.693e-05s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.372e+00 1.769e+02 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.728e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 7.487e+01 x, min 7.487e+01 x, max 7.487e+01 x (1 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 0.30 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 0.30 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.510e+02, min 1.007e+02, max 2.013e+02 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.502e-02, min 6.007e-03, max 2.403e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 1.431e-06s, min 9.537e-07s, max 1.907e-06s, tot 2.861e-06s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.598e-02s, min 1.598e-02s, max 1.598e-02s, tot 3.196e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 5.840e+01 6.311e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.037e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.000e+00 x, min 2.000e+00 x, max 2.000e+00 x (1 samples, the non-min-nrhs ones) #pr: ======== Limiting to type Z: #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 7:R_R A 3 3 6 1 Z S N 18 18 0 4.0000 4.6667 3 1 1884.20 9.060e-06 0.000e+00 5.095e-08 0.000e+00 1.760e-04 5.26e+00 2.29e+00 1 9.60e-05 -pr: 15:R_R A 3 3 6 4 Z S N 18 18 0 4.0000 4.6667 3 1 402.65 1.001e-05 0.000e+00 9.537e-07 0.000e+00 1.552e-04 7.34e-01 1.32e+00 1 3.84e-04 +pr: 7:R_R A 3 3 6 1 Z S N 10 10 0 4.0000 4.6667 3 1 100.66 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 2.81e-01 2.29e+00 1 9.60e-05 +pr: 15:R_R A 3 3 6 4 Z S N 10 10 0 4.0000 4.6667 3 1 402.65 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 7.34e-01 1.32e+00 1 3.84e-04 #pr: 2 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 2 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 9316.0 % faster, avg. sp. ratio 94.160x, max sp. ratio 177.819x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 1808.1/162.8/3453.4/3616.2 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 17.5/ 15.5/ 19.4/ 34.9 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 18.3, min. 17.1, max. 19.5 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1677050.0 % faster, avg. sp. ratio 16771.500x, max sp. ratio 16773.000x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 151002.2/151000.2/151004.2/302004.5 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.0/ 9.0/ 9.0/ 18.0 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.0, min. 9.0, max. 9.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 32/ 32/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 96/ 96/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.425/ 0.533/ 4.318,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 5.994/ 0.734/ 5.260,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.382/ 0.231/ 0.533,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 1.015/ 0.281/ 0.734,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.807/ 1.323/ 2.292) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 2 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /2 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (2 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.143e+03, min 4.027e+02, max 1.884e+03 (2 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 2.447e+01, min 1.060e+01, max 3.835e+01 (2 samples) -#pr: best tun. rsb operation time was: on avg. 5.023e-07s, min 5.095e-08s, max 9.537e-07s, tot 1.005e-06s (2 samples) -#pr: ref. unt. rsb operation time was: on avg. 9.537e-06s, min 9.060e-06s, max 1.001e-05s, tot 1.907e-05s (2 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 5.326e+00 4.317e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.728e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 2.137e-01 x, min 2.137e-01 x, max 2.137e-01 x (1 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.29 s (2 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.29 s (2 samples) +#pr: best tun. rsb canon. mflops were: on avg. 2.517e+02, min 1.007e+02, max 4.027e+02 (2 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.500e-02, min 6.003e-03, max 2.401e-02 (2 samples) +#pr: best tun. rsb operation time was: on avg. 9.537e-07s, min 9.537e-07s, max 9.537e-07s, tot 1.907e-06s (2 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.599e-02s, max 1.600e-02s, tot 3.199e-02s (2 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.540e+01 3.557e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.037e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 4.000e+00 x, min 4.000e+00 x, max 4.000e+00 x (1 samples, the non-min-nrhs ones) #pr: ======== Limiting to nrhs=1: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 18 18 0 4.0000 4.6667 3 1 471.05 1.001e-05 0.000e+00 5.095e-08 0.000e+00 2.551e-04 2.90e+00 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 18 18 0 4.0000 4.6667 3 1 25.17 9.060e-06 0.000e+00 9.537e-07 0.000e+00 2.460e-04 9.23e-02 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 18 18 0 4.0000 4.6667 3 1 100.66 9.060e-06 0.000e+00 9.537e-07 0.000e+00 2.348e-04 1.55e-01 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 18 18 0 4.0000 4.6667 3 1 1884.20 9.060e-06 0.000e+00 5.095e-08 0.000e+00 1.760e-04 5.26e+00 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 D S N 10 10 0 4.0000 4.6667 3 1 25.17 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 1.55e-01 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 10 10 0 4.0000 4.6667 3 1 25.17 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.442e-01 9.23e-02 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 10 10 0 4.0000 4.6667 3 1 100.66 1.598e-02 0.000e+00 9.537e-07 0.000e+00 1.438e-01 1.55e-01 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 10 10 0 4.0000 4.6667 3 1 100.66 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 2.81e-01 2.29e+00 1 9.60e-05 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 9733.9 % faster, avg. sp. ratio 98.339x, max sp. ratio 196.537x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2241.2/246.2/5007.0/8964.7 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 24.5/ 19.4/ 27.2/ 98.0 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 26.1, min. 19.5, max. 30.4 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1676706.2 % faster, avg. sp. ratio 16768.062x, max sp. ratio 16775.250x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 151000.2/150790.5/151197.5/604000.8 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.0/ 9.0/ 9.0/ 36.0 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.0, min. 9.0, max. 9.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.740/ 0.080/ 4.318,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 8.412/ 0.092/ 5.260,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.143/ 0.080/ 0.231,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.684/ 0.092/ 0.281,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.979/ 1.292/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 6.203e+02, min 2.517e+01, max 1.884e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 6.560e+00, min 2.397e+00, max 1.060e+01 (4 samples) -#pr: best tun. rsb operation time was: on avg. 5.023e-07s, min 5.095e-08s, max 9.537e-07s, tot 2.009e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 9.298e-06s, min 9.060e-06s, max 1.001e-05s, tot 3.719e-05s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 5.326e+00 2.886e+02 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.728e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.58 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.58 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 6.291e+01, min 2.517e+01, max 1.007e+02 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 3.753e-03, min 1.500e-03, max 6.007e-03 (4 samples) +#pr: best tun. rsb operation time was: on avg. 9.537e-07s, min 9.537e-07s, max 9.537e-07s, tot 3.815e-06s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.598e-02s, max 1.600e-02s, tot 6.397e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.557e+01 1.030e+02 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.037e+00 #pr: ======== Limiting to nrhs=4: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 9:R_R A 3 3 6 4 D S N 18 18 0 4.0000 4.6667 3 1 1884.20 8.821e-06 0.000e+00 5.095e-08 0.000e+00 1.619e-04 7.14e+00 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 18 18 0 4.0000 4.6667 3 1 1884.20 9.060e-06 0.000e+00 5.095e-08 0.000e+00 2.241e-04 3.85e+00 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 18 18 0 4.0000 4.6667 3 1 7536.79 7.868e-06 0.000e+00 5.095e-08 0.000e+00 1.631e-04 7.14e+00 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 18 18 0 4.0000 4.6667 3 1 402.65 1.001e-05 0.000e+00 9.537e-07 0.000e+00 1.552e-04 7.34e-01 1.32e+00 1 3.84e-04 +pr: 9:R_R A 3 3 6 4 D S N 10 10 0 4.0000 4.6667 3 1 100.66 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.400e-01 3.82e-01 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 10 10 0 4.0000 4.6667 3 1 100.66 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.443e-01 2.06e-01 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 10 10 0 4.0000 4.6667 3 1 201.33 1.598e-02 0.000e+00 1.907e-06 0.000e+00 1.560e-01 1.91e-01 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 10 10 0 4.0000 4.6667 3 1 402.65 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 7.34e-01 1.32e+00 1 3.84e-04 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 12797.0 % faster, avg. sp. ratio 128.970x, max sp. ratio 177.819x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2734.9/162.8/4398.7/10939.5 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 19.8/ 15.5/ 24.7/ 79.3 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 20.3, min. 17.1, max. 24.9 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1467303.1 % faster, avg. sp. ratio 14674.031x, max sp. ratio 16774.250x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 132722.6/81812.5/151283.2/530890.5 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.1/ 8.8/ 9.8/ 36.5 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.1, min. 8.8, max. 9.8 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.489/ 0.533/ 5.260,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 18.869/ 0.734/ 7.144,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.277/ 0.141/ 0.533,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 1.512/ 0.191/ 0.734,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.589/ 0.698/ 2.792) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 2.927e+03, min 4.027e+02, max 7.537e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 2.716e+01, min 1.060e+01, max 4.881e+01 (4 samples) -#pr: best tun. rsb operation time was: on avg. 2.766e-07s, min 5.095e-08s, max 9.537e-07s, tot 1.107e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 8.941e-06s, min 7.868e-06s, max 1.001e-05s, tot 3.576e-05s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.372e+00 4.317e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.728e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.849e+01 x, min 2.137e-01 x, max 7.487e+01 x (4 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 0.58 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 0.58 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 2.013e+02, min 1.007e+02, max 4.027e+02 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.501e-02, min 6.001e-03, max 2.403e-02 (4 samples) +#pr: best tun. rsb operation time was: on avg. 1.192e-06s, min 9.537e-07s, max 1.907e-06s, tot 4.768e-06s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.598e-02s, max 1.600e-02s, tot 6.397e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.540e+01 5.840e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.037e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.500e+00 x, min 2.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) #pr: ======== Limiting to transA=N: #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 18 18 0 4.0000 4.6667 3 1 471.05 1.001e-05 0.000e+00 5.095e-08 0.000e+00 2.551e-04 2.90e+00 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 18 18 0 4.0000 4.6667 3 1 25.17 9.060e-06 0.000e+00 9.537e-07 0.000e+00 2.460e-04 9.23e-02 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 18 18 0 4.0000 4.6667 3 1 100.66 9.060e-06 0.000e+00 9.537e-07 0.000e+00 2.348e-04 1.55e-01 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 18 18 0 4.0000 4.6667 3 1 1884.20 9.060e-06 0.000e+00 5.095e-08 0.000e+00 1.760e-04 5.26e+00 2.29e+00 1 9.60e-05 -pr: 9:R_R A 3 3 6 4 D S N 18 18 0 4.0000 4.6667 3 1 1884.20 8.821e-06 0.000e+00 5.095e-08 0.000e+00 1.619e-04 7.14e+00 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 18 18 0 4.0000 4.6667 3 1 1884.20 9.060e-06 0.000e+00 5.095e-08 0.000e+00 2.241e-04 3.85e+00 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 18 18 0 4.0000 4.6667 3 1 7536.79 7.868e-06 0.000e+00 5.095e-08 0.000e+00 1.631e-04 7.14e+00 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 18 18 0 4.0000 4.6667 3 1 402.65 1.001e-05 0.000e+00 9.537e-07 0.000e+00 1.552e-04 7.34e-01 1.32e+00 1 3.84e-04 +pr: 1:R_R A 3 3 6 1 D S N 10 10 0 4.0000 4.6667 3 1 25.17 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 1.55e-01 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 10 10 0 4.0000 4.6667 3 1 25.17 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.442e-01 9.23e-02 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 10 10 0 4.0000 4.6667 3 1 100.66 1.598e-02 0.000e+00 9.537e-07 0.000e+00 1.438e-01 1.55e-01 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 10 10 0 4.0000 4.6667 3 1 100.66 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 2.81e-01 2.29e+00 1 9.60e-05 +pr: 9:R_R A 3 3 6 4 D S N 10 10 0 4.0000 4.6667 3 1 100.66 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.400e-01 3.82e-01 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 10 10 0 4.0000 4.6667 3 1 100.66 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.443e-01 2.06e-01 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 10 10 0 4.0000 4.6667 3 1 201.33 1.598e-02 0.000e+00 1.907e-06 0.000e+00 1.560e-01 1.91e-01 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 10 10 0 4.0000 4.6667 3 1 402.65 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 7.34e-01 1.32e+00 1 3.84e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 8 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 11265.5 % faster, avg. sp. ratio 113.655x, max sp. ratio 196.537x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2488.0/162.8/5007.0/19904.2 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 22.2/ 15.5/ 27.2/177.3 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 23.2, min. 17.1, max. 30.4 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1572004.7 % faster, avg. sp. ratio 15721.047x, max sp. ratio 16775.250x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 141861.4/81812.5/151283.2/1134891.2 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.1/ 8.8/ 9.8/ 72.6 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.1, min. 8.8, max. 9.8 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.615/ 0.080/ 5.260,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 27.282/ 0.092/ 7.144,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.210/ 0.080/ 0.533,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 2.196/ 0.092/ 0.734,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.284/ 0.698/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 8 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /8 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (8 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (8 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.774e+03, min 2.517e+01, max 7.537e+03 (8 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.686e+01, min 2.397e+00, max 4.881e+01 (8 samples) -#pr: best tun. rsb operation time was: on avg. 3.895e-07s, min 5.095e-08s, max 9.537e-07s, tot 3.116e-06s (8 samples) -#pr: ref. unt. rsb operation time was: on avg. 9.120e-06s, min 7.868e-06s, max 1.001e-05s, tot 7.296e-05s (8 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.372e+00 2.886e+02 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.728e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.849e+01 x, min 2.137e-01 x, max 7.487e+01 x (4 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 1.16 s (8 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 1.16 s (8 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.321e+02, min 2.517e+01, max 4.027e+02 (8 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 9.381e-03, min 1.500e-03, max 2.403e-02 (8 samples) +#pr: best tun. rsb operation time was: on avg. 1.073e-06s, min 9.537e-07s, max 1.907e-06s, tot 8.583e-06s (8 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.598e-02s, max 1.600e-02s, tot 1.279e-01s (8 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.540e+01 1.030e+02 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.037e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.500e+00 x, min 2.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) #pr: ======== Limiting to both transA=N and nrhs=1: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 18 18 0 4.0000 4.6667 3 1 471.05 1.001e-05 0.000e+00 5.095e-08 0.000e+00 2.551e-04 2.90e+00 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 18 18 0 4.0000 4.6667 3 1 25.17 9.060e-06 0.000e+00 9.537e-07 0.000e+00 2.460e-04 9.23e-02 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 18 18 0 4.0000 4.6667 3 1 100.66 9.060e-06 0.000e+00 9.537e-07 0.000e+00 2.348e-04 1.55e-01 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 18 18 0 4.0000 4.6667 3 1 1884.20 9.060e-06 0.000e+00 5.095e-08 0.000e+00 1.760e-04 5.26e+00 2.29e+00 1 9.60e-05 +pr: 1:R_R A 3 3 6 1 D S N 10 10 0 4.0000 4.6667 3 1 25.17 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 1.55e-01 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 10 10 0 4.0000 4.6667 3 1 25.17 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.442e-01 9.23e-02 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 10 10 0 4.0000 4.6667 3 1 100.66 1.598e-02 0.000e+00 9.537e-07 0.000e+00 1.438e-01 1.55e-01 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 10 10 0 4.0000 4.6667 3 1 100.66 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 2.81e-01 2.29e+00 1 9.60e-05 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 9733.9 % faster, avg. sp. ratio 98.339x, max sp. ratio 196.537x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2241.2/246.2/5007.0/8964.7 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 24.5/ 19.4/ 27.2/ 98.0 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 26.1, min. 19.5, max. 30.4 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1676706.2 % faster, avg. sp. ratio 16768.062x, max sp. ratio 16775.250x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 151000.2/150790.5/151197.5/604000.8 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.0/ 9.0/ 9.0/ 36.0 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.0, min. 9.0, max. 9.0 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 1.740/ 0.080/ 4.318,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 8.412/ 0.092/ 5.260,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.143/ 0.080/ 0.231,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 0.684/ 0.092/ 0.281,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.979/ 1.292/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 6.203e+02, min 2.517e+01, max 1.884e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 6.560e+00, min 2.397e+00, max 1.060e+01 (4 samples) -#pr: best tun. rsb operation time was: on avg. 5.023e-07s, min 5.095e-08s, max 9.537e-07s, tot 2.009e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 9.298e-06s, min 9.060e-06s, max 1.001e-05s, tot 3.719e-05s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 5.326e+00 2.886e+02 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.728e+00 +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.58 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.14 s, min 0.14 s, max 0.14 s, tot 0.58 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 6.291e+01, min 2.517e+01, max 1.007e+02 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 3.753e-03, min 1.500e-03, max 6.007e-03 (4 samples) +#pr: best tun. rsb operation time was: on avg. 9.537e-07s, min 9.537e-07s, max 9.537e-07s, tot 3.815e-06s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.598e-02s, max 1.600e-02s, tot 6.397e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 3.557e+01 1.030e+02 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.037e+00 #pr: ======== Limiting to both transA=N and nrhs=4: #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 9:R_R A 3 3 6 4 D S N 18 18 0 4.0000 4.6667 3 1 1884.20 8.821e-06 0.000e+00 5.095e-08 0.000e+00 1.619e-04 7.14e+00 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 18 18 0 4.0000 4.6667 3 1 1884.20 9.060e-06 0.000e+00 5.095e-08 0.000e+00 2.241e-04 3.85e+00 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 18 18 0 4.0000 4.6667 3 1 7536.79 7.868e-06 0.000e+00 5.095e-08 0.000e+00 1.631e-04 7.14e+00 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 18 18 0 4.0000 4.6667 3 1 402.65 1.001e-05 0.000e+00 9.537e-07 0.000e+00 1.552e-04 7.34e-01 1.32e+00 1 3.84e-04 +pr: 9:R_R A 3 3 6 4 D S N 10 10 0 4.0000 4.6667 3 1 100.66 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.400e-01 3.82e-01 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 10 10 0 4.0000 4.6667 3 1 100.66 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.443e-01 2.06e-01 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 10 10 0 4.0000 4.6667 3 1 201.33 1.598e-02 0.000e+00 1.907e-06 0.000e+00 1.560e-01 1.91e-01 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 10 10 0 4.0000 4.6667 3 1 402.65 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 7.34e-01 1.32e+00 1 3.84e-04 #pr: 4 samples (out of 8) matched the dump limiting criteria. #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 12797.0 % faster, avg. sp. ratio 128.970x, max sp. ratio 177.819x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2734.9/162.8/4398.7/10939.5 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 19.8/ 15.5/ 24.7/ 79.3 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 20.3, min. 17.1, max. 24.9 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1467303.1 % faster, avg. sp. ratio 14674.031x, max sp. ratio 16774.250x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 132722.6/81812.5/151283.2/530890.5 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.1/ 8.8/ 9.8/ 36.5 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.1, min. 8.8, max. 9.8 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 3.489/ 0.533/ 5.260,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 18.869/ 0.734/ 7.144,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.277/ 0.141/ 0.533,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 1.512/ 0.191/ 0.734,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.589/ 0.698/ 2.792) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 2.927e+03, min 4.027e+02, max 7.537e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 2.716e+01, min 1.060e+01, max 4.881e+01 (4 samples) -#pr: best tun. rsb operation time was: on avg. 2.766e-07s, min 5.095e-08s, max 9.537e-07s, tot 1.107e-06s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 8.941e-06s, min 7.868e-06s, max 1.001e-05s, tot 3.576e-05s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.372e+00 4.317e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.728e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.849e+01 x, min 2.137e-01 x, max 7.487e+01 x (4 samples, the non-min-nrhs ones) +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 0.58 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 0.58 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 2.013e+02, min 1.007e+02, max 4.027e+02 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 1.501e-02, min 6.001e-03, max 2.403e-02 (4 samples) +#pr: best tun. rsb operation time was: on avg. 1.192e-06s, min 9.537e-07s, max 1.907e-06s, tot 4.768e-06s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.598e-02s, max 1.600e-02s, tot 6.397e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.540e+01 5.840e+01 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.037e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.500e+00 x, min 2.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) #pr: ======== Limiting to transA=T: #pr: No sample (out of 8) matched the dump criteria -- skipping dump round. #pr: ======== Limiting to both transA=T and nrhs=1: @@ -4927,49 +4952,49 @@ #pr: ======== All results (not limiting) #pr: Dump from a base of 8 samples (of max 16) ordered by (1,1,1,1,2,4,2) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R A 3 3 6 1 D S N 18 18 0 4.0000 4.6667 3 1 471.05 1.001e-05 0.000e+00 5.095e-08 0.000e+00 2.551e-04 2.90e+00 5.17e+00 1 2.40e-05 -pr: 3:R_R A 3 3 6 1 S S N 18 18 0 4.0000 4.6667 3 1 25.17 9.060e-06 0.000e+00 9.537e-07 0.000e+00 2.460e-04 9.23e-02 3.17e+00 1 2.40e-05 -pr: 5:R_R A 3 3 6 1 C S N 18 18 0 4.0000 4.6667 3 1 100.66 9.060e-06 0.000e+00 9.537e-07 0.000e+00 2.348e-04 1.55e-01 1.29e+00 1 9.60e-05 -pr: 7:R_R A 3 3 6 1 Z S N 18 18 0 4.0000 4.6667 3 1 1884.20 9.060e-06 0.000e+00 5.095e-08 0.000e+00 1.760e-04 5.26e+00 2.29e+00 1 9.60e-05 -pr: 9:R_R A 3 3 6 4 D S N 18 18 0 4.0000 4.6667 3 1 1884.20 8.821e-06 0.000e+00 5.095e-08 0.000e+00 1.619e-04 7.14e+00 2.79e+00 1 9.60e-05 -pr: 11:R_R A 3 3 6 4 S S N 18 18 0 4.0000 4.6667 3 1 1884.20 9.060e-06 0.000e+00 5.095e-08 0.000e+00 2.241e-04 3.85e+00 1.54e+00 1 9.60e-05 -pr: 13:R_R A 3 3 6 4 C S N 18 18 0 4.0000 4.6667 3 1 7536.79 7.868e-06 0.000e+00 5.095e-08 0.000e+00 1.631e-04 7.14e+00 6.98e-01 1 3.84e-04 -pr: 15:R_R A 3 3 6 4 Z S N 18 18 0 4.0000 4.6667 3 1 402.65 1.001e-05 0.000e+00 9.537e-07 0.000e+00 1.552e-04 7.34e-01 1.32e+00 1 3.84e-04 +pr: 1:R_R A 3 3 6 1 D S N 10 10 0 4.0000 4.6667 3 1 25.17 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 1.55e-01 5.17e+00 1 2.40e-05 +pr: 3:R_R A 3 3 6 1 S S N 10 10 0 4.0000 4.6667 3 1 25.17 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.442e-01 9.23e-02 3.17e+00 1 2.40e-05 +pr: 5:R_R A 3 3 6 1 C S N 10 10 0 4.0000 4.6667 3 1 100.66 1.598e-02 0.000e+00 9.537e-07 0.000e+00 1.438e-01 1.55e-01 1.29e+00 1 9.60e-05 +pr: 7:R_R A 3 3 6 1 Z S N 10 10 0 4.0000 4.6667 3 1 100.66 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 2.81e-01 2.29e+00 1 9.60e-05 +pr: 9:R_R A 3 3 6 4 D S N 10 10 0 4.0000 4.6667 3 1 100.66 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.400e-01 3.82e-01 2.79e+00 1 9.60e-05 +pr: 11:R_R A 3 3 6 4 S S N 10 10 0 4.0000 4.6667 3 1 100.66 1.599e-02 0.000e+00 9.537e-07 0.000e+00 1.443e-01 2.06e-01 1.54e+00 1 9.60e-05 +pr: 13:R_R A 3 3 6 4 C S N 10 10 0 4.0000 4.6667 3 1 201.33 1.598e-02 0.000e+00 1.907e-06 0.000e+00 1.560e-01 1.91e-01 6.98e-01 1 3.84e-04 +pr: 15:R_R A 3 3 6 4 Z S N 10 10 0 4.0000 4.6667 3 1 402.65 1.600e-02 0.000e+00 9.537e-07 0.000e+00 1.440e-01 7.34e-01 1.32e+00 1 3.84e-04 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 8 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 11265.5 % faster, avg. sp. ratio 113.655x, max sp. ratio 196.537x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 2488.0/162.8/5007.0/19904.2 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 22.2/ 15.5/ 27.2/177.3 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 23.2, min. 17.1, max. 30.4 ops) +#pr: (in succ. cases rsb autotuning gave avg. 1572004.7 % faster, avg. sp. ratio 15721.047x, max sp. ratio 16775.250x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 141861.4/81812.5/151283.2/1134891.2 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 9.1/ 8.8/ 9.8/ 72.6 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 9.1, min. 8.8, max. 9.8 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 2/ 2/ 2) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 6/ 6/ 6) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 18/ 8/ 32) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 54/ 24/ 96) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 4.000/ 4.000/ 4.000) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.615/ 0.080/ 5.260,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 27.282/ 0.092/ 7.144,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 0.210/ 0.080/ 0.533,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 2.196/ 0.092/ 0.734,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 2.284/ 0.698/ 5.167) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 4.667/ 4.667/ 4.667) #pr: (matrix has been subdivided more/less/same in resp. 0 / 8 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /8 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (8 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.00 s, min 0.00 s, max 0.00 s, tot 0.00 s (8 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.774e+03, min 2.517e+01, max 7.537e+03 (8 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 1.686e+01, min 2.397e+00, max 4.881e+01 (8 samples) -#pr: best tun. rsb operation time was: on avg. 3.895e-07s, min 5.095e-08s, max 9.537e-07s, tot 3.116e-06s (8 samples) -#pr: ref. unt. rsb operation time was: on avg. 9.120e-06s, min 7.868e-06s, max 1.001e-05s, tot 7.296e-05s (8 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.372e+00 2.886e+02 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.728e+00 -#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.849e+01 x, min 2.137e-01 x, max 7.487e+01 x (4 samples, the non-min-nrhs ones) -#pr: Record collection took 0.12 s. +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 1.16 s (8 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.15 s, min 0.14 s, max 0.16 s, tot 1.16 s (8 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.321e+02, min 2.517e+01, max 4.027e+02 (8 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 9.381e-03, min 1.500e-03, max 2.403e-02 (8 samples) +#pr: best tun. rsb operation time was: on avg. 1.073e-06s, min 9.537e-07s, max 1.907e-06s, tot 8.583e-06s (8 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.599e-02s, min 1.598e-02s, max 1.600e-02s, tot 1.279e-01s (8 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 1.540e+01 1.030e+02 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 2.037e+00 +#pr: rsb nrhs-to-overall-min-rhs speed ratio was: on avg. 3.500e+00 x, min 2.000e+00 x, max 4.000e+00 x (4 samples, the non-min-nrhs ones) +#pr: Record collection took 7.98 s. #pr: Record comprises 40 memory benchmark samples (prepend RSB_PR_MBW=1 to dump this). -#pr: Record comprises 90 environment variables in 3821 bytes (prepend RSB_PR_ENV=1 to dump this). +#pr: Record comprises 90 environment variables in 3875 bytes (prepend RSB_PR_ENV=1 to dump this). # ====== END Total summary record. -#pr: ======== Saved a performance record of 16 samples to rsbench_pr__1740440658_gcc-12.2.rpr -# Removing the temporary record file rsbench_pr__1740440658_gcc-12.2.rpr.tmp. -# terminating run at 1740440665 (after 7.0s of w.c.t.) -srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/bash ./scripts/doc-tests.sh +#pr: ======== Saved a performance record of 16 samples to rsbench_pr__1706032584_gcc-12.2.rpr +# Removing the temporary record file rsbench_pr__1706032584_gcc-12.2.rpr.tmp. +# terminating run at 1706032598 (after 14.4s of w.c.t.) +srcdir="/build/reproducible-path/librsb-1.3.0.2+dfsg" /bin/sh ./scripts/doc-tests.sh + set -o pipefail + test x/build/reproducible-path/librsb-1.3.0.2+dfsg = x + cat /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/autotune.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/backsolve.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/hello-spblas.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/hello.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/io-spblas.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/power.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/snippets.c /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/transpose.c @@ -5023,7 +5048,7 @@ type char codes:D S C Z gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/examples' if test /build/reproducible-path/librsb-1.3.0.2+dfsg != /build/reproducible-path/librsb-1.3.0.2+dfsg ; then cp /build/reproducible-path/librsb-1.3.0.2+dfsg/pd.mtx /build/reproducible-path/librsb-1.3.0.2+dfsg/vf.mtx /build/reproducible-path/librsb-1.3.0.2+dfsg/examples ; fi -( PATH="/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games" /bin/bash /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/bench.sh; ) +( PATH="/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path" /bin/sh /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/bench.sh; ) + which rsbench /build/reproducible-path/librsb-1.3.0.2+dfsg/rsbench + BRF=test.rpr @@ -5033,15 +5058,15 @@ Will invoke autotuning for ~10.000000 s x 1 rounds, specifying verbosity=0 and threads=0. (>0 means no structure tuning; 0 means only structure tuning, <0 means tuning of both with (negated) thread count suggestion). # Requested no transposition. # performance record file set to: test.rpr -# beginning run at 1740440665 +# beginning run at 1706032599 # /build/reproducible-path/librsb-1.3.0.2+dfsg/.libs/rsbench -oa -Ob --bench --lower 100 --as-symmetric --types : -n 1 --notranspose --compare-competitors --verbose --verbose --write-performance-record=test.rpr # compiled with: CC=gcc CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/librsb-1.3.0.2+dfsg=. -fstack-protector-strong -Wformat -Werror=format-security -O3 -std=c99 -# average timer granularity: 5.2e-08 s +# average timer granularity: 6.79e-07 s # Will write a final performance record to file test.rpr and periodic checkpoints to test.rpr.tmp # will NOT perform ancillary tests. # will flush cache memory: between each operation measurement series, and NOT between each operation. # will keep any zero encountered in the matrix. -# env: export PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games +# env: export PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/i/capture/the/path # env: export LD_LIBRARY_PATH=/build/reproducible-path/librsb-1.3.0.2+dfsg/.libs:/usr/lib/libeatmydata # env: HOSTNAME is not set # env: KMP_AFFINITY is not set @@ -5080,15 +5105,15 @@ # env: SLURM_NTASKS is not set # env: SLURM_STEP_TASKS_PER_NODE is not set # env: SLURM_TASKS_PER_NODE is not set -# detected hostname: ionos6-i386 +# detected hostname: i-capture-the-hostname # user specified a verbosity level of 2 (each --verbose occurrence counts +1) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5100,40 +5125,40 @@ # This test will measure times in scanning arrays sized and aligned to fit in caches. # 2 cache levels detected Will fill struct with 40 samples... -# Memory benchmark took 6.897s +# Memory benchmark took 5.630s # auto-tuning oriented output implies times==0 iterations and sort-after-load. #pr: allocated a performance record for 4 samples (1008 bytes). # multi-type benchmarking (DSCZ) -- now using typecode D (last was D). -# Cache block size total 4194304 bytes, per-thread 233016 bytes -# so far, program took 6.899s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . +# Cache block size total 524288 bytes, per-thread 52428 bytes +# so far, program took 5.657s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.000s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.441e-03 s (100.00 %) - analyzed arrays in 1.277e-03 s (88.62 %) - cleaned-up arrays in 1.001e-05 s (0.69 %) - deduplicated arrays in 9.060e-06 s (0.63 %) + converted COO to RSB in 7.918e-02 s (100.00 %) + analyzed arrays in 2.319e-02 s (29.29 %) + cleaned-up arrays in 2.098e-05 s (0.03 %) + deduplicated arrays in 1.812e-05 s (0.02 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 8.988e-05 s (6.24 %) - memory allocations took 2.909e-05 s (2.02 %) - leafs setup took 3.099e-06 s (0.22 %) - halfword conversion took 2.003e-05 s (1.39 %) -Built (100 x 100)[0x57a8bbc0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.002s): (100 x 100)[0x57a8bbc0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + shuffled partitions in 3.196e-02 s (40.36 %) + memory allocations took 1.884e-05 s (0.02 %) + leafs setup took 8.106e-06 s (0.01 %) + halfword conversion took 2.397e-02 s (30.27 %) +Built (100 x 100)[0x1873210]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.079s): (100 x 100)[0x1873210]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. Parameters: verbosity:2 mintimes:3 maxtimes:10 mindt:0 maxdt:3 Saved plot to test-tuning-lower-100x100-5050nz--D-N-1--base.eps # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5144,11 +5169,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5159,11 +5184,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5173,16 +5198,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000134s; avg 4.466e-05s ( +/- 28.47/ 56.94 %); best 3.195e-05s; worst 7.01e-05s; std dev. 1.798e-05 (taking best). -Reference operation time is 3.19481e-05 s (632.3 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 3.195e-05 Mflops: 632.276) -Merge (3 -> 1 leaves) took w.c.t. of 0.0002069s, ~0.000195s of computing time (of which 0.000114s sorting, 3.099e-06s analysis) +3 iterations (1 th.) took 0.04797s; avg 0.01599s ( +/- 0.10/ 0.08 %); best 0.01597s; worst 0.016s; std dev. 1.215e-05 (taking best). +Reference operation time is 0.015975 s (1.264 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.01597 Mflops: 1.264) +Merge (3 -> 1 leaves) took w.c.t. of 0.0001431s, ~0.000103s of computing time (of which 4.101e-05s sorting, 5.96e-06s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5193,11 +5218,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5207,14 +5232,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 3.695e-05s; avg 1.232e-05s ( +/- 10.97/ 12.26 %); best 1.097e-05s; worst 1.383e-05s; std dev. 1.173e-06 (taking best). -Reference operation time is 1.09673e-05 s (1842 Mflops) with 1 threads. +3 iterations (1 th.) took 5.007e-05s; avg 1.669e-05s ( +/- 10.00/ 14.29 %); best 1.502e-05s; worst 1.907e-05s; std dev. 1.73e-06 (taking best). +Reference operation time is 1.50204e-05 s (1345 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5224,25 +5249,25 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -After merge step 1: tpop: 1.097e-05 s ~Mflops: 1841.847 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 2.913x: 3.195e-05s -> 1.097e-05s, so taking this instance. +After merge step 1: tpop: 1.502e-05 s ~Mflops: 1344.840 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 1063.556x: 0.01597s -> 1.502e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.004565s (of which 0.0002129s partitioning, 0.004007s I/O); computing times: 0.000195s in par. loops, 0.000114s sorting, 3.099e-06s analyzing) -Total merge + benchmarking process took 0.004565s, equivalent to 416.2/142.9 new/old ops (0.001313s for 2 clones -- as 119.7/41.1 ops, or 59.8/20.5 ops per clone), SPEEDUP of 2.913x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 2.913x (3.195e-05s -> 1.097e-05s), will amortize in 217.6 ops by saving 2.098e-05s per op. -In 1 tuning rounds (tot. 0.006s, 0.0013s for constructor, 2 clones) obtained a SPEEDUP of 191.3% (2.913x) (from 632.3 to 1842 Mflops). Employed 0.0062s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.1443s (of which 0.0001519s partitioning, 0.09629s I/O); computing times: 0.000103s in par. loops, 4.101e-05s sorting, 5.96e-06s analyzing) +Total merge + benchmarking process took 0.1443s, equivalent to 9605.7/9.0 new/old ops (0.09517s for 2 clones -- as 6336.2/6.0 ops, or 3168.1/3.0 ops per clone), SPEEDUP of 1063.556x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 1063.556x (0.01597s -> 1.502e-05s), will amortize in 9.0 ops by saving 0.01596s per op. +In 1 tuning rounds (tot. 0.24s, 0.095s for constructor, 2 clones) obtained a SPEEDUP of 106255.6% (1064x) (from 1.264 to 1345 Mflops). Employed 0.096s for I/O of matrix plots. #pr: updating sample at index 1 (0^th of 4), 0^th touch for (0,0,0,0,0,0,0). -First run of RSB Autotuner took 0.0122049 s (3.195e-05 s -> 1.097e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.33635 s (1.597e-02 s -> 1.502e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type D, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5253,11 +5278,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5268,13 +5293,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 18. +Starting with requested 0 threads ; current default 1 ; at most 10. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5285,11 +5310,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5299,27 +5324,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 3.695e-05s; avg 1.232e-05s ( +/- 10.97/ 12.26 %); best 1.097e-05s; worst 1.383e-05s; std dev. 1.173e-06 (taking best). -Reference operation time is 1.09673e-05 s (1842 Mflops) with 1 threads. +3 iterations (1 th.) took 5.293e-05s; avg 1.764e-05s ( +/- 21.62/ 41.89 %); best 1.383e-05s; worst 2.503e-05s; std dev. 5.227e-06 (taking best). +Reference operation time is 1.38283e-05 s (1461 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 7.391e-05 s (100.00 %) - analyzed arrays in 1.788e-05 s (24.19 %) - cleaned-up arrays in 1.311e-05 s (17.74 %) - deduplicated arrays in 1.311e-05 s (17.74 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 2.003e-05 s (27.10 %) - memory allocations took 2.861e-06 s (3.87 %) - leafs setup took 0.000e+00 s (0.00 %) - halfword conversion took 5.960e-06 s (8.06 %) -Built (100 x 100)[0x57a8e470]{D} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 18). -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 5.511e-02 s (100.00 %) + analyzed arrays in 2.308e-02 s (41.87 %) + cleaned-up arrays in 2.003e-05 s (0.04 %) + deduplicated arrays in 1.693e-05 s (0.03 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 3.197e-02 s (58.01 %) + memory allocations took 6.914e-06 s (0.01 %) + leafs setup took 5.007e-06 s (0.01 %) + halfword conversion took 1.192e-05 s (0.02 %) +Built (100 x 100)[0x1876c20]{D} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 10). +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5330,12 +5355,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5346,22 +5371,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.349e-04 s (100.00 %) - analyzed arrays in 5.698e-05 s (42.23 %) - cleaned-up arrays in 1.287e-05 s (9.54 %) - deduplicated arrays in 1.311e-05 s (9.72 %) + converted COO to RSB in 8.778e-02 s (100.00 %) + analyzed arrays in 2.376e-02 s (27.06 %) + cleaned-up arrays in 2.289e-05 s (0.03 %) + deduplicated arrays in 2.003e-05 s (0.02 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.195e-05 s (23.67 %) - memory allocations took 2.861e-06 s (2.12 %) - leafs setup took 1.192e-06 s (0.88 %) - halfword conversion took 1.597e-05 s (11.84 %) -Built (100 x 100)[0x57a5f4c0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + shuffled partitions in 3.198e-02 s (36.43 %) + memory allocations took 8.106e-06 s (0.01 %) + leafs setup took 2.146e-06 s (0.00 %) + halfword conversion took 3.198e-02 s (36.44 %) +Built (100 x 100)[0x1873840]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5372,11 +5397,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5387,11 +5412,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5401,16 +5426,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000104s; avg 3.465e-05s ( +/- 7.80/ 15.60 %); best 3.195e-05s; worst 4.005e-05s; std dev. 3.821e-06 (taking best). -Reference operation time is 3.19481e-05 s (632.3 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 3.19481e-05 s/0 threads (speedup 0.343284 x), same?n. +3 iterations (1 th.) took 0.04764s; avg 0.01588s ( +/- 24.45/ 23.74 %); best 0.012s; worst 0.01965s; std dev. 0.003125 (taking best). +Reference operation time is 0.0119989 s (1.683 Mflops) with 1 threads. +Challenging best inner round reference (1.38283e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 0.0119989 s/0 threads (speedup 0.00115246 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5421,12 +5446,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5437,22 +5462,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.309e-04 s (100.00 %) - analyzed arrays in 5.507e-05 s (42.08 %) - cleaned-up arrays in 1.311e-05 s (10.02 %) - deduplicated arrays in 1.287e-05 s (9.84 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 2.909e-05 s (22.22 %) - memory allocations took 3.815e-06 s (2.91 %) - leafs setup took 9.537e-07 s (0.73 %) - halfword conversion took 1.597e-05 s (12.20 %) -Built (100 x 100)[0x57a8ec50]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 10, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 9.572e-02 s (100.00 %) + analyzed arrays in 4.370e-02 s (45.65 %) + cleaned-up arrays in 2.694e-05 s (0.03 %) + deduplicated arrays in 1.884e-05 s (0.02 %) + sorted arrays in 1.192e-06 s (0.00 %) + shuffled partitions in 2.003e-02 s (20.93 %) + memory allocations took 1.502e-05 s (0.02 %) + leafs setup took 5.960e-06 s (0.01 %) + halfword conversion took 3.192e-02 s (33.35 %) +Built (100 x 100)[0x189f260]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 10, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5463,11 +5488,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5478,11 +5503,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5492,16 +5517,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001199s; avg 3.997e-05s ( +/- 7.55/ 12.13 %); best 3.695e-05s; worst 4.482e-05s; std dev. 3.462e-06 (taking best). -Reference operation time is 3.69549e-05 s (546.6 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 0.5, 10 leaves, 2.206 bytes/nz, 3.69549e-05 s/0 threads (speedup 0.296774 x), same?n. +3 iterations (1 th.) took 0.04765s; avg 0.01588s ( +/- 1.60/ 0.92 %); best 0.01563s; worst 0.01603s; std dev. 0.0001801 (taking best). +Reference operation time is 0.015631 s (1.292 Mflops) with 1 threads. +Challenging best inner round reference (1.38283e-05 s/1 threads) with: subdivision 0.5, 10 leaves, 2.206 bytes/nz, 0.015631 s/0 threads (speedup 0.000884672 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 14 subms, 10 lsubms, 2.2059 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5512,12 +5537,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5528,22 +5553,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.409e-04 s (100.00 %) - analyzed arrays in 5.603e-05 s (39.76 %) - cleaned-up arrays in 1.216e-05 s (8.63 %) - deduplicated arrays in 1.407e-05 s (9.98 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.791e-05 s (26.90 %) - memory allocations took 2.861e-06 s (2.03 %) - leafs setup took 3.099e-06 s (2.20 %) - halfword conversion took 1.478e-05 s (10.49 %) -Built (100 x 100)[0x57a8bbc0]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 22, symflags:'LS' + converted COO to RSB in 1.037e-01 s (100.00 %) + analyzed arrays in 3.962e-02 s (38.21 %) + cleaned-up arrays in 2.003e-05 s (0.02 %) + deduplicated arrays in 1.717e-05 s (0.02 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 3.201e-02 s (30.87 %) + memory allocations took 1.192e-05 s (0.01 %) + leafs setup took 5.960e-06 s (0.01 %) + halfword conversion took 3.200e-02 s (30.86 %) +Built (100 x 100)[0x189f260]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 24, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5554,11 +5579,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5569,11 +5594,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5583,16 +5608,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001299s; avg 4.331e-05s ( +/- 7.52/ 12.84 %); best 4.005e-05s; worst 4.888e-05s; std dev. 3.953e-06 (taking best). -Reference operation time is 4.00543e-05 s (504.3 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 1, 22 leaves, 2.276 bytes/nz, 4.00543e-05 s/0 threads (speedup 0.27381 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 30 subms, 22 lsubms, 2.2756 bpnz +3 iterations (1 th.) took 0.02363s; avg 0.007876s ( +/- 3.00/ 1.50 %); best 0.00764s; worst 0.007994s; std dev. 0.000167 (taking best). +Reference operation time is 0.00763988 s (2.644 Mflops) with 1 threads. +Challenging best inner round reference (1.38283e-05 s/1 threads) with: subdivision 1, 24 leaves, 2.306 bytes/nz, 0.00763988 s/0 threads (speedup 0.00181001 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 33 subms, 24 lsubms, 2.3057 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5603,12 +5628,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5619,22 +5644,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.991e-04 s (100.00 %) - analyzed arrays in 8.798e-05 s (44.19 %) - cleaned-up arrays in 1.287e-05 s (6.47 %) - deduplicated arrays in 1.311e-05 s (6.59 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 6.294e-05 s (31.62 %) - memory allocations took 3.099e-06 s (1.56 %) - leafs setup took 4.053e-06 s (2.04 %) - halfword conversion took 1.502e-05 s (7.54 %) -Built (100 x 100)[0x57a39f40]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 50, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 5.176e-02 s (100.00 %) + analyzed arrays in 1.967e-02 s (38.01 %) + cleaned-up arrays in 1.907e-05 s (0.04 %) + deduplicated arrays in 1.693e-05 s (0.03 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 1.604e-02 s (30.99 %) + memory allocations took 1.287e-05 s (0.02 %) + leafs setup took 8.821e-06 s (0.02 %) + halfword conversion took 1.598e-02 s (30.88 %) +Built (100 x 100)[0x189f260]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5645,11 +5670,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5660,11 +5685,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5674,16 +5699,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001569s; avg 5.229e-05s ( +/- 6.53/ 10.79 %); best 4.888e-05s; worst 5.794e-05s; std dev. 4.019e-06 (taking best). -Reference operation time is 4.88758e-05 s (413.3 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 2, 50 leaves, 2.425 bytes/nz, 4.88758e-05 s/0 threads (speedup 0.22439 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 68 subms, 50 lsubms, 2.4253 bpnz -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +3 iterations (1 th.) took 0.02761s; avg 0.009204s ( +/- 17.20/ 30.46 %); best 0.007621s; worst 0.01201s; std dev. 0.001988 (taking best). +Reference operation time is 0.00762105 s (2.651 Mflops) with 1 threads. +Challenging best inner round reference (1.38283e-05 s/1 threads) with: subdivision 2, 36 leaves, 2.383 bytes/nz, 0.00762105 s/0 threads (speedup 0.00181448 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5694,12 +5719,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -5710,22 +5735,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 4.709e-04 s (100.00 %) - analyzed arrays in 1.340e-04 s (28.46 %) - cleaned-up arrays in 1.287e-05 s (2.73 %) - deduplicated arrays in 1.311e-05 s (2.78 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 2.580e-04 s (54.78 %) - memory allocations took 2.909e-05 s (6.18 %) - leafs setup took 6.914e-06 s (1.47 %) - halfword conversion took 1.597e-05 s (3.39 %) -Built (100 x 100)[0x57a20130]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 103, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 6.384e-02 s (100.00 %) + analyzed arrays in 2.781e-02 s (43.57 %) + cleaned-up arrays in 2.098e-05 s (0.03 %) + deduplicated arrays in 1.812e-05 s (0.03 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 2.002e-02 s (31.36 %) + memory allocations took 1.597e-05 s (0.03 %) + leafs setup took 1.192e-05 s (0.02 %) + halfword conversion took 1.594e-02 s (24.96 %) +Built (100 x 100)[0x18a3660]{D} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5736,11 +5761,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5751,11 +5776,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5765,17 +5790,17 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0002031s; avg 6.771e-05s ( +/- 3.87/ 6.34 %); best 6.509e-05s; worst 7.2e-05s; std dev. 3.059e-06 (taking best). -Reference operation time is 6.50883e-05 s (310.3 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 4, 103 leaves, 2.653 bytes/nz, 6.50883e-05 s/0 threads (speedup 0.168498 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 139 subms, 103 lsubms, 2.6535 bpnz -Best sparse multiply performance with subdivision multiplier of 1: 1841.85 Mflops. +3 iterations (1 th.) took 0.03975s; avg 0.01325s ( +/- 9.39/ 18.16 %); best 0.01201s; worst 0.01566s; std dev. 0.001702 (taking best). +Reference operation time is 0.012006 s (1.682 Mflops) with 1 threads. +Challenging best inner round reference (1.38283e-05 s/1 threads) with: subdivision 4, 36 leaves, 2.383 bytes/nz, 0.012006 s/0 threads (speedup 0.00115178 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type D, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz +Best sparse multiply performance with subdivision multiplier of 1: 1460.77 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5785,23 +5810,23 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 0.00312901 s (eq. to 3e+02/ 3e+02 old/new op.times), gained local/global speedup 1 x (1.09673e-05 : 1.09673e-05) / 1 x (1.09673e-05 : 1.09673e-05). This is not amortizable ! +Last tuner inner round (1 of 1) took 0.647654 s (eq. to 5e+04/ 5e+04 old/new op.times), gained local/global speedup 1 x (1.38283e-05 : 1.38283e-05) / 1 x (1.38283e-05 : 1.38283e-05). This is not amortizable ! Auto tuning inner round 1 did not find a configuration better than the original. -In 1 tuning rounds (tot. 0.0031s, 0.002s for constructor, 0 clones) obtained NO speedup (best stays 1842 Mflops). -Second run of RSB Autotuner took 0.0031631 s and estimated a speedup of 1.000000 x (1.097e-05 s -> 1.097e-05 s per op) in same matrix (1 -> 1 lsubm) +In 1 tuning rounds (tot. 0.65s, 0.46s for constructor, 0 clones) obtained NO speedup (best stays 1461 Mflops). +Second run of RSB Autotuner took 0.647724 s and estimated a speedup of 1.000000 x (1.383e-05 s -> 1.383e-05 s per op) in same matrix (1 -> 1 lsubm) #min:1 #max:1 #sum:100 #norm:10 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.001277 0.000090 0.001367 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.001367 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.001277 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000090 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.023189 0.031955 0.055144 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.055144 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.023189 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.031955 %:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000000 %:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.001367 +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.055144 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 @@ -5816,45 +5841,45 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.001441 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.0791819 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 0 0.00127697 0 8.98838e-05 -# so far, program took 6.935s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.015s/0.000s . +%constructor:lower-100x100-5050nz 0 0.0231891 0 0.031955 +# so far, program took 6.777s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.984s/0.000s . getrusage() stats: -ru_maxrss: 27 (maximum resident set size -- MB) -ru_stime : 0.09291s (system CPU time used) -ru_utime : 7.356s (user CPU time used) +ru_maxrss: 9 (maximum resident set size -- MB) +ru_stime : 0.05593s (system CPU time used) +ru_utime : 12.79s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode S (last was D). -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# so far, program took 6.935s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.015s/0.000s . +# Cache block size total 524288 bytes, per-thread 524288 bytes +# so far, program took 6.778s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.984s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.130e-04 s (100.00 %) - analyzed arrays in 2.599e-05 s (23.00 %) - cleaned-up arrays in 1.311e-05 s (11.60 %) - deduplicated arrays in 1.287e-05 s (11.39 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.886e-05 s (34.39 %) - memory allocations took 5.007e-06 s (4.43 %) - leafs setup took 0.000e+00 s (0.00 %) - halfword conversion took 1.717e-05 s (15.19 %) -Built (100 x 100)[0x57a8bbc0]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.000s): (100 x 100)[0x57a8bbc0]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + converted COO to RSB in 1.113e-01 s (100.00 %) + analyzed arrays in 1.530e-02 s (13.75 %) + cleaned-up arrays in 1.884e-05 s (0.02 %) + deduplicated arrays in 1.597e-05 s (0.01 %) + sorted arrays in 1.192e-06 s (0.00 %) + shuffled partitions in 6.395e-02 s (57.47 %) + memory allocations took 1.001e-05 s (0.01 %) + leafs setup took 5.007e-06 s (0.00 %) + halfword conversion took 3.197e-02 s (28.73 %) +Built (100 x 100)[0x189f890]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.111s): (100 x 100)[0x189f890]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type S, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. Parameters: verbosity:2 mintimes:3 maxtimes:10 mindt:0 maxdt:3 Saved plot to test-tuning-lower-100x100-5050nz--S-N-1--base.eps # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5865,11 +5890,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5880,11 +5905,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5894,16 +5919,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001199s; avg 3.997e-05s ( +/- 22.47/ 25.25 %); best 3.099e-05s; worst 5.007e-05s; std dev. 7.826e-06 (taking best). -Reference operation time is 3.09944e-05 s (651.7 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type S, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 3.099e-05 Mflops: 651.730) -Merge (3 -> 1 leaves) took w.c.t. of 5.198e-05s, ~4.387e-05s of computing time (of which 1.788e-05s sorting, 1.907e-06s analysis) +3 iterations (1 th.) took 0.04798s; avg 0.01599s ( +/- 0.20/ 0.18 %); best 0.01596s; worst 0.01602s; std dev. 2.508e-05 (taking best). +Reference operation time is 0.0159628 s (1.265 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type S, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.01596 Mflops: 1.265) +Merge (3 -> 1 leaves) took w.c.t. of 8.392e-05s, ~6.7e-05s of computing time (of which 2.718e-05s sorting, 5.007e-06s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5914,11 +5939,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5928,14 +5953,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 3.6e-05s; avg 1.2e-05s ( +/- 0.66/ 1.32 %); best 1.192e-05s; worst 1.216e-05s; std dev. 1.124e-07 (taking best). -Reference operation time is 1.19209e-05 s (1694 Mflops) with 1 threads. +3 iterations (1 th.) took 4.911e-05s; avg 1.637e-05s ( +/- 2.43/ 4.85 %); best 1.597e-05s; worst 1.717e-05s; std dev. 5.62e-07 (taking best). +Reference operation time is 1.5974e-05 s (1265 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5945,25 +5970,25 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -After merge step 1: tpop: 1.192e-05 s ~Mflops: 1694.499 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 2.600x: 3.099e-05s -> 1.192e-05s, so taking this instance. +After merge step 1: tpop: 1.597e-05 s ~Mflops: 1264.551 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 999.299x: 0.01596s -> 1.597e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.003892s (of which 5.603e-05s partitioning, 0.003705s I/O); computing times: 4.387e-05s in par. loops, 1.788e-05s sorting, 1.907e-06s analyzing) -Total merge + benchmarking process took 0.003892s, equivalent to 326.5/125.6 new/old ops (8.988e-05s for 2 clones -- as 7.5/2.9 ops, or 3.8/1.4 ops per clone), SPEEDUP of 2.600x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 2.600x (3.099e-05s -> 1.192e-05s), will amortize in 204.1 ops by saving 1.907e-05s per op. -In 1 tuning rounds (tot. 0.0041s, 9e-05s for constructor, 2 clones) obtained a SPEEDUP of 160.0% (2.6x) (from 651.7 to 1694 Mflops). Employed 0.0038s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.1363s (of which 9.799e-05s partitioning, 0.08808s I/O); computing times: 6.7e-05s in par. loops, 2.718e-05s sorting, 5.007e-06s analyzing) +Total merge + benchmarking process took 0.1363s, equivalent to 8533.7/8.5 new/old ops (0.09532s for 2 clones -- as 5967.4/6.0 ops, or 2983.7/3.0 ops per clone), SPEEDUP of 999.299x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 999.299x (0.01596s -> 1.597e-05s), will amortize in 8.5 ops by saving 0.01595s per op. +In 1 tuning rounds (tot. 0.23s, 0.095s for constructor, 2 clones) obtained a SPEEDUP of 99829.9% (999.3x) (from 1.265 to 1265 Mflops). Employed 0.096s for I/O of matrix plots. #pr: updating sample at index 2 (1^th of 4), 0^th touch for (0,0,0,0,0,1,0). -First run of RSB Autotuner took 0.00799799 s (3.099e-05 s -> 1.192e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.328335 s (1.596e-02 s -> 1.597e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type S, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5974,11 +5999,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -5989,13 +6014,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 18. +Starting with requested 0 threads ; current default 1 ; at most 10. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6006,11 +6031,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6020,27 +6045,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 3.695e-05s; avg 1.232e-05s ( +/- 10.97/ 14.19 %); best 1.097e-05s; worst 1.407e-05s; std dev. 1.296e-06 (taking best). -Reference operation time is 1.09673e-05 s (1842 Mflops) with 1 threads. +3 iterations (1 th.) took 5.484e-05s; avg 1.828e-05s ( +/- 12.61/ 25.22 %); best 1.597e-05s; worst 2.289e-05s; std dev. 3.259e-06 (taking best). +Reference operation time is 1.5974e-05 s (1265 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.400e-04 s (100.00 %) - analyzed arrays in 3.791e-05 s (27.09 %) - cleaned-up arrays in 1.287e-05 s (9.20 %) - deduplicated arrays in 1.407e-05 s (10.05 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 2.003e-05 s (14.31 %) - memory allocations took 4.911e-05 s (35.09 %) - leafs setup took 0.000e+00 s (0.00 %) - halfword conversion took 5.960e-06 s (4.26 %) -Built (100 x 100)[0x57a8e470]{S} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 18). -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 6.337e-02 s (100.00 %) + analyzed arrays in 3.131e-02 s (49.40 %) + cleaned-up arrays in 2.289e-05 s (0.04 %) + deduplicated arrays in 1.884e-05 s (0.03 %) + sorted arrays in 1.192e-06 s (0.00 %) + shuffled partitions in 3.200e-02 s (50.49 %) + memory allocations took 9.060e-06 s (0.01 %) + leafs setup took 3.099e-06 s (0.00 %) + halfword conversion took 9.060e-06 s (0.01 %) +Built (100 x 100)[0x1876c20]{S} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 10). +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6051,12 +6076,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6067,22 +6092,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 6.795e-05 s (100.00 %) - analyzed arrays in 1.812e-05 s (26.67 %) - cleaned-up arrays in 1.287e-05 s (18.95 %) - deduplicated arrays in 1.311e-05 s (19.30 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 1.287e-05 s (18.95 %) - memory allocations took 1.907e-06 s (2.81 %) - leafs setup took 0.000e+00 s (0.00 %) - halfword conversion took 9.060e-06 s (13.33 %) -Built (100 x 100)[0x57a8f8c0]{S} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 3.167e-02 s (100.00 %) + analyzed arrays in 1.560e-02 s (49.28 %) + cleaned-up arrays in 2.193e-05 s (0.07 %) + deduplicated arrays in 1.979e-05 s (0.06 %) + sorted arrays in 1.192e-06 s (0.00 %) + shuffled partitions in 1.600e-02 s (50.51 %) + memory allocations took 7.153e-06 s (0.02 %) + leafs setup took 2.146e-06 s (0.01 %) + halfword conversion took 1.192e-05 s (0.04 %) +Built (100 x 100)[0x18a1c00]{S} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6093,11 +6118,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6108,11 +6133,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6122,16 +6147,15 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 3.695e-05s; avg 1.232e-05s ( +/- 10.97/ 14.19 %); best 1.097e-05s; worst 1.407e-05s; std dev. 1.296e-06 (taking best). -Reference operation time is 1.09673e-05 s (1842 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 0.25, 1 leaves, 2.08 bytes/nz, 1.09673e-05 s/0 threads (speedup 1 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +3 iterations (1 th.) took 4.506e-05s; avg 1.502e-05s ( +/- 14.29/ 26.98 %); best 1.287e-05s; worst 1.907e-05s; std dev. 2.868e-06 (taking best). +Reference operation time is 1.28746e-05 s (1569 Mflops) with 1 threads. +Challenging best inner round reference (1.5974e-05 s/1 threads) with: subdivision 0.25, 1 leaves, 2.08 bytes/nz, 1.28746e-05 s/0 threads (speedup 1.24074 x), same?n. +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6142,12 +6166,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6158,22 +6182,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.149e-04 s (100.00 %) - analyzed arrays in 4.292e-05 s (37.34 %) - cleaned-up arrays in 1.216e-05 s (10.58 %) - deduplicated arrays in 1.287e-05 s (11.20 %) - sorted arrays in 9.537e-07 s (0.83 %) - shuffled partitions in 2.599e-05 s (22.61 %) - memory allocations took 3.099e-06 s (2.70 %) - leafs setup took 9.537e-07 s (0.83 %) - halfword conversion took 1.597e-05 s (13.90 %) -Built (100 x 100)[0x57a8c1f0]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 6, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 6.768e-02 s (100.00 %) + analyzed arrays in 2.764e-02 s (40.84 %) + cleaned-up arrays in 1.788e-05 s (0.03 %) + deduplicated arrays in 1.621e-05 s (0.02 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 2.398e-02 s (35.44 %) + memory allocations took 6.199e-06 s (0.01 %) + leafs setup took 1.907e-06 s (0.00 %) + halfword conversion took 1.601e-02 s (23.65 %) +Built (100 x 100)[0x189f890]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 8, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6184,11 +6208,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6199,11 +6223,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6213,16 +6237,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000113s; avg 3.767e-05s ( +/- 20.25/ 40.51 %); best 3.004e-05s; worst 5.293e-05s; std dev. 1.079e-05 (taking best). -Reference operation time is 3.00407e-05 s (672.4 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 0.5, 6 leaves, 2.163 bytes/nz, 3.00407e-05 s/0 threads (speedup 0.365079 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 8 subms, 6 lsubms, 2.1632 bpnz +3 iterations (1 th.) took 0.03994s; avg 0.01331s ( +/- 39.87/ 20.16 %); best 0.008004s; worst 0.016s; std dev. 0.003753 (taking best). +Reference operation time is 0.00800395 s (2.524 Mflops) with 1 threads. +Challenging best inner round reference (1.28746e-05 s/0 threads) with: subdivision 0.5, 8 leaves, 2.185 bytes/nz, 0.00800395 s/0 threads (speedup 0.00160853 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 11 subms, 8 lsubms, 2.1846 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6233,12 +6257,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6249,22 +6273,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.280e-04 s (100.00 %) - analyzed arrays in 5.388e-05 s (42.09 %) - cleaned-up arrays in 1.192e-05 s (9.31 %) - deduplicated arrays in 1.407e-05 s (10.99 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 2.789e-05 s (21.79 %) - memory allocations took 3.338e-06 s (2.61 %) - leafs setup took 9.537e-07 s (0.74 %) - halfword conversion took 1.502e-05 s (11.73 %) -Built (100 x 100)[0x57a650e0]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 16, symflags:'LS' + converted COO to RSB in 7.181e-02 s (100.00 %) + analyzed arrays in 2.374e-02 s (33.06 %) + cleaned-up arrays in 1.788e-05 s (0.02 %) + deduplicated arrays in 1.478e-05 s (0.02 %) + sorted arrays in 1.192e-06 s (0.00 %) + shuffled partitions in 1.603e-02 s (22.33 %) + memory allocations took 7.153e-06 s (0.01 %) + leafs setup took 3.815e-06 s (0.01 %) + halfword conversion took 3.199e-02 s (44.55 %) +Built (100 x 100)[0x186e320]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 19, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6275,11 +6299,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6290,11 +6314,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6304,16 +6328,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001321s; avg 4.403e-05s ( +/- 18.23/ 34.30 %); best 3.6e-05s; worst 5.913e-05s; std dev. 1.068e-05 (taking best). -Reference operation time is 3.60012e-05 s (561.1 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 1, 16 leaves, 2.25 bytes/nz, 3.60012e-05 s/0 threads (speedup 0.304636 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 22 subms, 16 lsubms, 2.2503 bpnz +3 iterations (1 th.) took 0.04793s; avg 0.01598s ( +/- 0.34/ 0.21 %); best 0.01592s; worst 0.01601s; std dev. 3.91e-05 (taking best). +Reference operation time is 0.0159211 s (1.269 Mflops) with 1 threads. +Challenging best inner round reference (1.28746e-05 s/0 threads) with: subdivision 1, 19 leaves, 2.272 bytes/nz, 0.0159211 s/0 threads (speedup 0.00080865 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 26 subms, 19 lsubms, 2.2725 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6324,12 +6348,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6340,22 +6364,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.719e-04 s (100.00 %) - analyzed arrays in 7.510e-05 s (43.69 %) - cleaned-up arrays in 1.311e-05 s (7.63 %) - deduplicated arrays in 1.287e-05 s (7.49 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 4.792e-05 s (27.88 %) - memory allocations took 2.861e-06 s (1.66 %) - leafs setup took 3.099e-06 s (1.80 %) - halfword conversion took 1.597e-05 s (9.29 %) -Built (100 x 100)[0x57a650e0]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 37, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 6.378e-02 s (100.00 %) + analyzed arrays in 2.374e-02 s (37.22 %) + cleaned-up arrays in 1.812e-05 s (0.03 %) + deduplicated arrays in 1.597e-05 s (0.03 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 2.400e-02 s (37.63 %) + memory allocations took 7.153e-06 s (0.01 %) + leafs setup took 5.007e-06 s (0.01 %) + halfword conversion took 1.599e-02 s (25.08 %) +Built (100 x 100)[0x186e320]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6366,11 +6390,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6381,11 +6405,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6395,16 +6419,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001519s; avg 5.062e-05s ( +/- 17.11/ 28.10 %); best 4.196e-05s; worst 6.485e-05s; std dev. 1.014e-05 (taking best). -Reference operation time is 4.19617e-05 s (481.4 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 2, 37 leaves, 2.357 bytes/nz, 4.19617e-05 s/0 threads (speedup 0.261364 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 50 subms, 37 lsubms, 2.3572 bpnz -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +3 iterations (1 th.) took 0.03199s; avg 0.01066s ( +/- 24.99/ 49.56 %); best 0.007998s; worst 0.01595s; std dev. 0.003737 (taking best). +Reference operation time is 0.00799799 s (2.526 Mflops) with 1 threads. +Challenging best inner round reference (1.28746e-05 s/0 threads) with: subdivision 2, 36 leaves, 2.383 bytes/nz, 0.00799799 s/0 threads (speedup 0.00160973 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6415,12 +6439,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6431,22 +6455,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 2.520e-04 s (100.00 %) - analyzed arrays in 1.140e-04 s (45.22 %) - cleaned-up arrays in 1.216e-05 s (4.82 %) - deduplicated arrays in 1.287e-05 s (5.11 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 8.488e-05 s (33.68 %) - memory allocations took 4.768e-06 s (1.89 %) - leafs setup took 5.007e-06 s (1.99 %) - halfword conversion took 1.621e-05 s (6.43 %) -Built (100 x 100)[0x57a30160]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 79, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 6.375e-02 s (100.00 %) + analyzed arrays in 3.170e-02 s (49.72 %) + cleaned-up arrays in 1.812e-05 s (0.03 %) + deduplicated arrays in 1.597e-05 s (0.03 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 1.602e-02 s (25.13 %) + memory allocations took 9.060e-06 s (0.01 %) + leafs setup took 5.007e-06 s (0.01 %) + halfword conversion took 1.598e-02 s (25.06 %) +Built (100 x 100)[0x18a3660]{S} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6457,11 +6481,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6472,11 +6496,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6486,17 +6510,17 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.00018s; avg 6e-05s ( +/- 5.03/ 8.48 %); best 5.698e-05s; worst 6.509e-05s; std dev. 3.618e-06 (taking best). -Reference operation time is 5.6982e-05 s (354.5 Mflops) with 1 threads. -Challenging best inner round reference (1.09673e-05 s/1 threads) with: subdivision 4, 79 leaves, 2.558 bytes/nz, 5.6982e-05 s/0 threads (speedup 0.192469 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 107 subms, 79 lsubms, 2.5576 bpnz -Best sparse multiply performance with subdivision multiplier of 1: 1841.85 Mflops. +3 iterations (1 th.) took 0.05595s; avg 0.01865s ( +/- 14.22/ 28.43 %); best 0.016s; worst 0.02395s; std dev. 0.003748 (taking best). +Reference operation time is 0.0159969 s (1.263 Mflops) with 1 threads. +Challenging best inner round reference (1.28746e-05 s/0 threads) with: subdivision 4, 36 leaves, 2.383 bytes/nz, 0.0159969 s/0 threads (speedup 0.000804817 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type S, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz +Best sparse multiply performance with subdivision multiplier of 0.25: 1568.98 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6506,29 +6530,29 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 0.00225091 s (eq. to 2e+02/ 2e+02 old/new op.times), gained local/global speedup 1 x (1.09673e-05 : 1.09673e-05) / 1 x (1.09673e-05 : 1.09673e-05). This is not amortizable ! -Auto tuning inner round 1 did not find a configuration better than the original. -In 1 tuning rounds (tot. 0.0023s, 0.0011s for constructor, 0 clones) obtained NO speedup (best stays 1842 Mflops). -Second run of RSB Autotuner took 0.002285 s and estimated a speedup of 1.000000 x (1.097e-05 s -> 1.097e-05 s per op) in same matrix (1 -> 1 lsubm) +Last tuner inner round (1 of 1) took 0.539695 s (eq. to 3e+04/ 4e+04 old/new op.times), gained local/global speedup 1.24074 x (1.5974e-05 : 1.28746e-05) / 1.24074 x (1.5974e-05 : 1.28746e-05). This is amortizable in 174127 op.times. +In 1 tuning rounds (tot. 0.54s, 0.36s for constructor, 0 clones) obtained a SPEEDUP of 24.1% (1.241x) (from 1265 to 1569 Mflops). +Second run of RSB Autotuner took 0.53977 s and estimated a speedup of 1.240741 x (1.597e-05 s -> 1.287e-05 s per op) in new matrix (1 -> 1 lsubm) +RSB Autotuner suggested a new matrix: freeing the old one. #min:1 #max:1 #sum:100 #norm:10 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.000026 0.000039 0.000065 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000065 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000026 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000039 -%:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000000 -%:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000065 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000001 0.015295 0.063953 0.079248 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.079249 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.015295 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.063953 +%:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000001 +%:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.000 +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.079248 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:RSB_SUBDIVISION_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:RSB_SHUFFLE_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 -%:CONSTRUCTOR_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo %:SM_COUNTS:lower-100x100-5050nz S N 1 100 100 5050 1 1 0 0 0 %:SM_IDXOCCUPATIONRSBVSCOOANDCSR:lower-100x100-5050nz S N 1 100 100 5050 10504 40400 20600 @@ -6537,45 +6561,45 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.00011301 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.111276 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 0 2.59876e-05 0 3.88622e-05 -# so far, program took 6.962s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.026s/0.000s . +%constructor:lower-100x100-5050nz 1.19209e-06 0.015295 0 0.0639529 +# so far, program took 7.837s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.852s/0.000s . getrusage() stats: -ru_maxrss: 27 (maximum resident set size -- MB) -ru_stime : 0.105s (system CPU time used) -ru_utime : 7.783s (user CPU time used) +ru_maxrss: 9 (maximum resident set size -- MB) +ru_stime : 0.05959s (system CPU time used) +ru_utime : 19.51s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode C (last was D). -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# so far, program took 6.962s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.026s/0.000s . +# Cache block size total 524288 bytes, per-thread 524288 bytes +# so far, program took 7.837s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 1.852s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.161e-04 s (100.00 %) - analyzed arrays in 2.599e-05 s (22.38 %) - cleaned-up arrays in 1.287e-05 s (11.09 %) - deduplicated arrays in 1.311e-05 s (11.29 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 4.315e-05 s (37.17 %) - memory allocations took 3.099e-06 s (2.67 %) - leafs setup took 0.000e+00 s (0.00 %) - halfword conversion took 1.597e-05 s (13.76 %) -Built (100 x 100)[0x57a650e0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.000s): (100 x 100)[0x57a650e0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + converted COO to RSB in 1.114e-01 s (100.00 %) + analyzed arrays in 1.538e-02 s (13.80 %) + cleaned-up arrays in 1.884e-05 s (0.02 %) + deduplicated arrays in 1.621e-05 s (0.01 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 6.400e-02 s (57.44 %) + memory allocations took 1.287e-05 s (0.01 %) + leafs setup took 4.053e-06 s (0.00 %) + halfword conversion took 3.198e-02 s (28.70 %) +Built (100 x 100)[0x184ba90]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.111s): (100 x 100)[0x184ba90]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. Parameters: verbosity:2 mintimes:3 maxtimes:10 mindt:0 maxdt:3 Saved plot to test-tuning-lower-100x100-5050nz--C-N-1--base.eps # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6586,11 +6610,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6601,11 +6625,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6615,16 +6639,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000283s; avg 9.433e-05s ( +/- 8.76/ 16.51 %); best 8.607e-05s; worst 0.0001099s; std dev. 1.102e-05 (taking best). -Reference operation time is 8.60691e-05 s (938.8 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 8.607e-05 Mflops: 938.781) -Merge (3 -> 1 leaves) took w.c.t. of 5.484e-05s, ~4.697e-05s of computing time (of which 2.098e-05s sorting, 1.907e-06s analysis) +3 iterations (1 th.) took 0.04806s; avg 0.01602s ( +/- 0.24/ 0.36 %); best 0.01598s; worst 0.01608s; std dev. 4.109e-05 (taking best). +Reference operation time is 0.0159819 s (5.056 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.01598 Mflops: 5.056) +Merge (3 -> 1 leaves) took w.c.t. of 7.582e-05s, ~6.413e-05s of computing time (of which 2.789e-05s sorting, 2.861e-06s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6635,11 +6659,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6649,14 +6673,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001771s; avg 5.905e-05s ( +/- 1.48/ 1.75 %); best 5.817e-05s; worst 6.008e-05s; std dev. 7.867e-07 (taking best). -Reference operation time is 5.81741e-05 s (1389 Mflops) with 1 threads. +3 iterations (1 th.) took 0.0001712s; avg 5.706e-05s ( +/- 1.81/ 3.62 %); best 5.603e-05s; worst 5.913e-05s; std dev. 1.461e-06 (taking best). +Reference operation time is 5.60284e-05 s (1442 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6666,25 +6690,25 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -After merge step 1: tpop: 5.817e-05 s ~Mflops: 1388.933 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 1.480x: 8.607e-05s -> 5.817e-05s, so taking this instance. +After merge step 1: tpop: 5.603e-05 s ~Mflops: 1442.127 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 285.247x: 0.01598s -> 5.603e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.004183s (of which 0.0001068s partitioning, 0.003833s I/O); computing times: 4.697e-05s in par. loops, 2.098e-05s sorting, 1.907e-06s analyzing) -Total merge + benchmarking process took 0.004183s, equivalent to 71.9/48.6 new/old ops (5.698e-05s for 2 clones -- as 1.0/0.7 ops, or 0.5/0.3 ops per clone), SPEEDUP of 1.480x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 1.480x (8.607e-05s -> 5.817e-05s), will amortize in 149.9 ops by saving 2.789e-05s per op. -In 1 tuning rounds (tot. 0.0045s, 5.7e-05s for constructor, 2 clones) obtained a SPEEDUP of 48.0% (1.48x) (from 938.8 to 1389 Mflops). Employed 0.0039s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.144s (of which 8.202e-05s partitioning, 0.09606s I/O); computing times: 6.413e-05s in par. loops, 2.789e-05s sorting, 2.861e-06s analyzing) +Total merge + benchmarking process took 0.144s, equivalent to 2569.6/9.0 new/old ops (0.09524s for 2 clones -- as 1699.8/6.0 ops, or 849.9/3.0 ops per clone), SPEEDUP of 285.247x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 285.247x (0.01598s -> 5.603e-05s), will amortize in 9.0 ops by saving 0.01593s per op. +In 1 tuning rounds (tot. 0.24s, 0.095s for constructor, 2 clones) obtained a SPEEDUP of 28424.7% (285.2x) (from 5.056 to 1442 Mflops). Employed 0.096s for I/O of matrix plots. #pr: updating sample at index 3 (2^th of 4), 0^th touch for (0,0,0,0,0,2,0). -First run of RSB Autotuner took 0.00848508 s (8.607e-05 s -> 5.817e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.336113 s (1.598e-02 s -> 5.603e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type C, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6695,11 +6719,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6710,13 +6734,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 18. +Starting with requested 0 threads ; current default 1 ; at most 10. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6727,11 +6751,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6741,27 +6765,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001781s; avg 5.937e-05s ( +/- 2.01/ 2.81 %); best 5.817e-05s; worst 6.104e-05s; std dev. 1.216e-06 (taking best). -Reference operation time is 5.81741e-05 s (1389 Mflops) with 1 threads. +3 iterations (1 th.) took 0.0001948s; avg 6.493e-05s ( +/- 1.59/ 3.18 %); best 6.39e-05s; worst 6.7e-05s; std dev. 1.461e-06 (taking best). +Reference operation time is 6.38962e-05 s (1265 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 7.296e-05 s (100.00 %) - analyzed arrays in 1.812e-05 s (24.84 %) - cleaned-up arrays in 1.311e-05 s (17.97 %) - deduplicated arrays in 1.287e-05 s (17.65 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 1.907e-05 s (26.14 %) - memory allocations took 2.861e-06 s (3.92 %) - leafs setup took 9.537e-07 s (1.31 %) - halfword conversion took 5.007e-06 s (6.86 %) -Built (100 x 100)[0x57a8e470]{C} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 18). -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 6.329e-02 s (100.00 %) + analyzed arrays in 3.124e-02 s (49.35 %) + cleaned-up arrays in 2.098e-05 s (0.03 %) + deduplicated arrays in 1.788e-05 s (0.03 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 3.199e-02 s (50.55 %) + memory allocations took 6.437e-06 s (0.01 %) + leafs setup took 3.099e-06 s (0.00 %) + halfword conversion took 9.060e-06 s (0.01 %) +Built (100 x 100)[0x1876c20]{C} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 10). +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6772,12 +6796,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6788,22 +6812,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.571e-04 s (100.00 %) - analyzed arrays in 4.506e-05 s (28.68 %) - cleaned-up arrays in 1.287e-05 s (8.19 %) - deduplicated arrays in 1.311e-05 s (8.35 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 2.909e-05 s (18.51 %) - memory allocations took 4.005e-05 s (25.49 %) - leafs setup took 9.537e-07 s (0.61 %) - halfword conversion took 1.597e-05 s (10.17 %) -Built (100 x 100)[0x57a3ee30]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 7.989e-02 s (100.00 %) + analyzed arrays in 1.588e-02 s (19.87 %) + cleaned-up arrays in 2.003e-05 s (0.03 %) + deduplicated arrays in 1.788e-05 s (0.02 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 3.198e-02 s (40.03 %) + memory allocations took 7.153e-06 s (0.01 %) + leafs setup took 1.907e-06 s (0.00 %) + halfword conversion took 3.198e-02 s (40.04 %) +Built (100 x 100)[0x18a2850]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 3, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6814,11 +6838,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6829,11 +6853,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6843,16 +6867,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0002639s; avg 8.798e-05s ( +/- 1.36/ 2.44 %); best 8.678e-05s; worst 9.012e-05s; std dev. 1.52e-06 (taking best). -Reference operation time is 8.67844e-05 s (931 Mflops) with 1 threads. -Challenging best inner round reference (5.81741e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 8.67844e-05 s/0 threads (speedup 0.67033 x), same?n. +3 iterations (1 th.) took 0.04794s; avg 0.01598s ( +/- 0.54/ 0.45 %); best 0.01589s; worst 0.01605s; std dev. 6.538e-05 (taking best). +Reference operation time is 0.0158949 s (5.083 Mflops) with 1 threads. +Challenging best inner round reference (6.38962e-05 s/1 threads) with: subdivision 0.25, 3 leaves, 2.121 bytes/nz, 0.0158949 s/0 threads (speedup 0.00401992 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6863,12 +6887,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6879,22 +6903,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.781e-04 s (100.00 %) - analyzed arrays in 6.890e-05 s (38.69 %) - cleaned-up arrays in 1.216e-05 s (6.83 %) - deduplicated arrays in 1.383e-05 s (7.76 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 2.694e-05 s (15.13 %) - memory allocations took 4.005e-05 s (22.49 %) - leafs setup took 1.192e-06 s (0.67 %) - halfword conversion took 1.502e-05 s (8.43 %) -Built (100 x 100)[0x57a6d390]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 10, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 9.976e-02 s (100.00 %) + analyzed arrays in 4.369e-02 s (43.79 %) + cleaned-up arrays in 2.098e-05 s (0.02 %) + deduplicated arrays in 1.907e-05 s (0.02 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 2.399e-02 s (24.05 %) + memory allocations took 1.192e-05 s (0.01 %) + leafs setup took 5.960e-06 s (0.01 %) + halfword conversion took 3.202e-02 s (32.09 %) +Built (100 x 100)[0x189f8c0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 11, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6905,11 +6929,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6920,11 +6944,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6934,16 +6958,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000293s; avg 9.767e-05s ( +/- 1.63/ 2.28 %); best 9.608e-05s; worst 9.99e-05s; std dev. 1.621e-06 (taking best). -Reference operation time is 9.60827e-05 s (840.9 Mflops) with 1 threads. -Challenging best inner round reference (5.81741e-05 s/1 threads) with: subdivision 0.5, 10 leaves, 2.206 bytes/nz, 9.60827e-05 s/0 threads (speedup 0.605459 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 14 subms, 10 lsubms, 2.2059 bpnz +3 iterations (1 th.) took 0.05188s; avg 0.01729s ( +/- 8.10/ 15.73 %); best 0.01589s; worst 0.02001s; std dev. 0.001924 (taking best). +Reference operation time is 0.015892 s (5.084 Mflops) with 1 threads. +Challenging best inner round reference (6.38962e-05 s/1 threads) with: subdivision 0.5, 11 leaves, 2.207 bytes/nz, 0.015892 s/0 threads (speedup 0.00402064 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 15 subms, 11 lsubms, 2.2067 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6954,12 +6978,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -6970,22 +6994,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.910e-04 s (100.00 %) - analyzed arrays in 7.200e-05 s (37.70 %) - cleaned-up arrays in 1.311e-05 s (6.87 %) - deduplicated arrays in 1.311e-05 s (6.87 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.600e-05 s (18.85 %) - memory allocations took 3.982e-05 s (20.85 %) - leafs setup took 2.146e-06 s (1.12 %) - halfword conversion took 1.478e-05 s (7.74 %) -Built (100 x 100)[0x57a650e0]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 22, symflags:'LS' + converted COO to RSB in 9.987e-02 s (100.00 %) + analyzed arrays in 3.982e-02 s (39.88 %) + cleaned-up arrays in 2.003e-05 s (0.02 %) + deduplicated arrays in 1.788e-05 s (0.02 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 3.200e-02 s (32.05 %) + memory allocations took 1.216e-05 s (0.01 %) + leafs setup took 6.199e-06 s (0.01 %) + halfword conversion took 2.798e-02 s (28.02 %) +Built (100 x 100)[0x1870070]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 24, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -6996,11 +7020,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7011,11 +7035,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7025,16 +7049,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0003028s; avg 0.0001009s ( +/- 1.97/ 2.99 %); best 9.894e-05s; worst 0.000104s; std dev. 2.171e-06 (taking best). -Reference operation time is 9.89437e-05 s (816.6 Mflops) with 1 threads. -Challenging best inner round reference (5.81741e-05 s/1 threads) with: subdivision 1, 22 leaves, 2.295 bytes/nz, 9.89437e-05 s/0 threads (speedup 0.587952 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 30 subms, 22 lsubms, 2.2947 bpnz +3 iterations (1 th.) took 0.04798s; avg 0.01599s ( +/- 2.25/ 2.23 %); best 0.01564s; worst 0.01635s; std dev. 0.0002923 (taking best). +Reference operation time is 0.015635 s (5.168 Mflops) with 1 threads. +Challenging best inner round reference (6.38962e-05 s/1 threads) with: subdivision 1, 24 leaves, 2.306 bytes/nz, 0.015635 s/0 threads (speedup 0.00408674 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 33 subms, 24 lsubms, 2.3057 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7045,12 +7069,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7061,22 +7085,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 2.849e-04 s (100.00 %) - analyzed arrays in 1.428e-04 s (50.13 %) - cleaned-up arrays in 1.216e-05 s (4.27 %) - deduplicated arrays in 1.287e-05 s (4.52 %) + converted COO to RSB in 1.075e-01 s (100.00 %) + analyzed arrays in 3.946e-02 s (36.70 %) + cleaned-up arrays in 1.979e-05 s (0.02 %) + deduplicated arrays in 1.717e-05 s (0.02 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 5.817e-05 s (20.42 %) - memory allocations took 4.101e-05 s (14.39 %) - leafs setup took 2.861e-06 s (1.00 %) - halfword conversion took 1.502e-05 s (5.27 %) -Built (100 x 100)[0x57a20130]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 49, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + shuffled partitions in 3.600e-02 s (33.48 %) + memory allocations took 1.693e-05 s (0.02 %) + leafs setup took 7.868e-06 s (0.01 %) + halfword conversion took 3.200e-02 s (29.76 %) +Built (100 x 100)[0x189a370]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7087,11 +7111,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7102,11 +7126,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7116,16 +7140,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0003431s; avg 0.0001144s ( +/- 2.02/ 3.20 %); best 0.0001121s; worst 0.000118s; std dev. 2.614e-06 (taking best). -Reference operation time is 0.000112057 s (721.1 Mflops) with 1 threads. -Challenging best inner round reference (5.81741e-05 s/1 threads) with: subdivision 2, 49 leaves, 2.433 bytes/nz, 0.000112057 s/0 threads (speedup 0.519149 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 67 subms, 49 lsubms, 2.4333 bpnz -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +3 iterations (1 th.) took 0.0476s; avg 0.01587s ( +/- 1.65/ 0.84 %); best 0.0156s; worst 0.016s; std dev. 0.0001856 (taking best). +Reference operation time is 0.0156028 s (5.179 Mflops) with 1 threads. +Challenging best inner round reference (6.38962e-05 s/1 threads) with: subdivision 2, 36 leaves, 2.383 bytes/nz, 0.0156028 s/0 threads (speedup 0.00409517 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7136,12 +7160,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7152,22 +7176,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 4.010e-04 s (100.00 %) - analyzed arrays in 1.869e-04 s (46.61 %) - cleaned-up arrays in 1.287e-05 s (3.21 %) - deduplicated arrays in 1.311e-05 s (3.27 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 1.180e-04 s (29.43 %) - memory allocations took 4.506e-05 s (11.24 %) - leafs setup took 7.153e-06 s (1.78 %) - halfword conversion took 1.597e-05 s (3.98 %) -Built (100 x 100)[0x57a20130]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 102, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 9.987e-02 s (100.00 %) + analyzed arrays in 3.582e-02 s (35.87 %) + cleaned-up arrays in 2.003e-05 s (0.02 %) + deduplicated arrays in 1.693e-05 s (0.02 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 3.202e-02 s (32.06 %) + memory allocations took 1.431e-05 s (0.01 %) + leafs setup took 8.106e-06 s (0.01 %) + halfword conversion took 3.196e-02 s (32.01 %) +Built (100 x 100)[0x18ad440]{C} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7178,11 +7202,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7193,11 +7217,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7207,17 +7231,17 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0004101s; avg 0.0001367s ( +/- 1.10/ 1.69 %); best 0.0001352s; worst 0.000139s; std dev. 1.656e-06 (taking best). -Reference operation time is 0.000135183 s (597.7 Mflops) with 1 threads. -Challenging best inner round reference (5.81741e-05 s/1 threads) with: subdivision 4, 102 leaves, 2.642 bytes/nz, 0.000135183 s/0 threads (speedup 0.430335 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 138 subms, 102 lsubms, 2.6424 bpnz -Best sparse multiply performance with subdivision multiplier of 1: 1388.93 Mflops. +3 iterations (1 th.) took 0.05167s; avg 0.01722s ( +/- 9.00/ 16.08 %); best 0.01567s; worst 0.01999s; std dev. 0.001963 (taking best). +Reference operation time is 0.0156739 s (5.155 Mflops) with 1 threads. +Challenging best inner round reference (6.38962e-05 s/1 threads) with: subdivision 4, 36 leaves, 2.383 bytes/nz, 0.0156739 s/0 threads (speedup 0.0040766 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type C, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz +Best sparse multiply performance with subdivision multiplier of 1: 1264.55 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7227,29 +7251,29 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 0.00372815 s (eq. to 6e+01/ 6e+01 old/new op.times), gained local/global speedup 1 x (5.81741e-05 : 5.81741e-05) / 1 x (5.81741e-05 : 5.81741e-05). This is not amortizable ! +Last tuner inner round (1 of 1) took 0.799827 s (eq. to 1e+04/ 1e+04 old/new op.times), gained local/global speedup 1 x (6.38962e-05 : 6.38962e-05) / 1 x (6.38962e-05 : 6.38962e-05). This is not amortizable ! Auto tuning inner round 1 did not find a configuration better than the original. -In 1 tuning rounds (tot. 0.0037s, 0.0017s for constructor, 0 clones) obtained NO speedup (best stays 1389 Mflops). -Second run of RSB Autotuner took 0.00380611 s and estimated a speedup of 1.000000 x (5.817e-05 s -> 5.817e-05 s per op) in same matrix (1 -> 1 lsubm) +In 1 tuning rounds (tot. 0.8s, 0.55s for constructor, 0 clones) obtained NO speedup (best stays 1265 Mflops). +Second run of RSB Autotuner took 0.799895 s and estimated a speedup of 1.000000 x (6.390e-05 s -> 6.390e-05 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:100 0 #norm:10 0 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000000 0.000026 0.000043 0.000069 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000069 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000026 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000043 -%:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000000 -%:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000069 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000001 0.015381 0.063999 0.079380 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.079381 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.015381 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.063999 +%:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000001 +%:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.000 +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.079380 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:RSB_SUBDIVISION_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:RSB_SHUFFLE_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 -%:CONSTRUCTOR_SCALING:lower-100x100-5050nz S N 1 100 100 5050 -nan 1.00 1.00 1.00 +%:CONSTRUCTOR_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 1.00 1.00 1.00 #%:SM_COUNTS: Tot HalfwordCsr FullwordCsr HalfwordCoo FullwordCoo %:SM_COUNTS:lower-100x100-5050nz S N 1 100 100 5050 1 1 0 0 0 %:SM_IDXOCCUPATIONRSBVSCOOANDCSR:lower-100x100-5050nz S N 1 100 100 5050 10504 40400 20600 @@ -7258,45 +7282,45 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.00011611 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.111417 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 0 2.59876e-05 0 4.31538e-05 -# so far, program took 6.990s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.038s/0.000s . +%constructor:lower-100x100-5050nz 9.53674e-07 0.0153811 0 0.0639989 +# so far, program took 9.165s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 2.988s/0.000s . getrusage() stats: -ru_maxrss: 27 (maximum resident set size -- MB) -ru_stime : 0.1135s (system CPU time used) -ru_utime : 8.246s (user CPU time used) +ru_maxrss: 9 (maximum resident set size -- MB) +ru_stime : 0.0596s (system CPU time used) +ru_utime : 27.81s (user CPU time used) # multi-type benchmarking (DSCZ) -- now using typecode Z (last was D). -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# so far, program took 6.990s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.038s/0.000s . +# Cache block size total 524288 bytes, per-thread 524288 bytes +# so far, program took 9.166s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 2.988s/0.000s . # Using 1 threads # Using alpha=1 beta=1 order=cols for rsb_spmv/rsb_spsv/rsb_spmm/rsb_spsm. # will use input matrix flags: RSB_FLAG_USE_HALFWORD_INDICES, RSB_FLAG_SORTED_INPUT, RSB_FLAG_LOWER, RSB_FLAG_QUAD_PARTITIONING, RSB_FLAG_SYMMETRIC, RSB_FLAG_OWN_PARTITIONING_ARRAYS # Using 1 threads Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.569e-04 s (100.00 %) - analyzed arrays in 2.503e-05 s (15.96 %) - cleaned-up arrays in 1.192e-05 s (7.60 %) - deduplicated arrays in 1.287e-05 s (8.21 %) - sorted arrays in 1.192e-06 s (0.76 %) - shuffled partitions in 8.202e-05 s (52.28 %) - memory allocations took 6.914e-06 s (4.41 %) - leafs setup took 9.537e-07 s (0.61 %) - halfword conversion took 1.597e-05 s (10.18 %) -Built (100 x 100)[0x57a86cd0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' -# Constructed matrix (took 0.000s): (100 x 100)[0x57a86cd0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' + converted COO to RSB in 1.153e-01 s (100.00 %) + analyzed arrays in 1.969e-02 s (17.08 %) + cleaned-up arrays in 2.217e-05 s (0.02 %) + deduplicated arrays in 1.788e-05 s (0.02 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 6.354e-02 s (55.12 %) + memory allocations took 1.097e-05 s (0.01 %) + leafs setup took 5.960e-06 s (0.01 %) + halfword conversion took 3.199e-02 s (27.75 %) +Built (100 x 100)[0x185f640]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' +# Constructed matrix (took 0.115s): (100 x 100)[0x185f640]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x2446196 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LS' # matrix consistency check took 0.000s (ok) RSB Sparse Blocks Autotuner invoked requesting max 6 splits and max 6 merges in 1 rounds, threads spec.0 (specify negative values to enable threads tuning). Will autotune matrix: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz. Parameters: verbosity:2 mintimes:3 maxtimes:10 mindt:0 maxdt:3 Saved plot to test-tuning-lower-100x100-5050nz--Z-N-1--base.eps # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7307,11 +7331,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7322,11 +7346,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7336,16 +7360,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000308s; avg 0.0001027s ( +/- 6.66/ 13.08 %); best 9.584e-05s; worst 0.0001161s; std dev. 9.498e-06 (taking best). -Reference operation time is 9.58443e-05 s (843 Mflops) with 1 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 9.584e-05 Mflops: 843.034) -Merge (3 -> 1 leaves) took w.c.t. of 6.318e-05s, ~5.412e-05s of computing time (of which 2.909e-05s sorting, 2.146e-06s analysis) +3 iterations (1 th.) took 0.04768s; avg 0.01589s ( +/- 1.29/ 0.65 %); best 0.01569s; worst 0.016s; std dev. 0.000145 (taking best). +Reference operation time is 0.0156879 s (5.15 Mflops) with 1 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 4 subms, 3 lsubms, 2.1212 bpnz (tpop: 0.01569 Mflops: 5.150) +Merge (3 -> 1 leaves) took w.c.t. of 0.000164s, ~0.0001218s of computing time (of which 5.698e-05s sorting, 5.96e-06s analysis) # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7356,11 +7380,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7370,14 +7394,14 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0001729s; avg 5.762e-05s ( +/- 1.10/ 0.55 %); best 5.698e-05s; worst 5.794e-05s; std dev. 4.496e-07 (taking best). -Reference operation time is 5.6982e-05 s (1418 Mflops) with 1 threads. +3 iterations (1 th.) took 0.0001729s; avg 5.762e-05s ( +/- 2.76/ 3.86 %); best 5.603e-05s; worst 5.984e-05s; std dev. 1.621e-06 (taking best). +Reference operation time is 5.60284e-05 s (1442 Mflops) with 1 threads. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7387,25 +7411,25 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -After merge step 1: tpop: 5.698e-05 s ~Mflops: 1417.991 nsubm:1 otn:1 -Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 1.682x: 9.584e-05s -> 5.698e-05s, so taking this instance. +After merge step 1: tpop: 5.603e-05 s ~Mflops: 1442.127 nsubm:1 otn:1 +Applying merge (3 -> 1 leaves, 1 th.) yielded SPEEDUP of 280.000x: 0.01569s -> 5.603e-05s, so taking this instance. Saved plot to test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.004442s (of which 6.7e-05s partitioning, 0.004038s I/O); computing times: 5.412e-05s in par. loops, 2.909e-05s sorting, 2.146e-06s analyzing) -Total merge + benchmarking process took 0.004442s, equivalent to 78.0/46.3 new/old ops (0.0001621s for 2 clones -- as 2.8/1.7 ops, or 1.4/0.8 ops per clone), SPEEDUP of 1.682x -Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 1.682x (9.584e-05s -> 5.698e-05s), will amortize in 114.3 ops by saving 3.886e-05s per op. -In 1 tuning rounds (tot. 0.0049s, 0.00016s for constructor, 2 clones) obtained a SPEEDUP of 68.2% (1.682x) (from 843 to 1418 Mflops). Employed 0.0041s for I/O of matrix plots. +A total of 1 merge steps (of max 6) (3 -> 1 subms) took 0.1441s (of which 0.00017s partitioning, 0.09608s I/O); computing times: 0.0001218s in par. loops, 5.698e-05s sorting, 5.96e-06s analyzing) +Total merge + benchmarking process took 0.1441s, equivalent to 2571.5/9.2 new/old ops (0.09539s for 2 clones -- as 1702.5/6.1 ops, or 851.2/3.0 ops per clone), SPEEDUP of 280.000x +Applying multi-merge (3 -> 1 leaves, 1 steps, 0 -> 1 th.sp.) yielded SPEEDUP of 280.000x (0.01569s -> 5.603e-05s), will amortize in 9.2 ops by saving 0.01563s per op. +In 1 tuning rounds (tot. 0.24s, 0.095s for constructor, 2 clones) obtained a SPEEDUP of 27900.0% (280x) (from 5.15 to 1442 Mflops). Employed 0.096s for I/O of matrix plots. #pr: updating sample at index 4 (3^th of 4), 0^th touch for (0,0,0,0,0,3,0). -First run of RSB Autotuner took 0.00906682 s (9.584e-05 s -> 5.698e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). +First run of RSB Autotuner took 0.336404 s (1.569e-02 s -> 5.603e-05 s per spmv_sxsa) (tuned: 3 -> 1 lsubm). RSB Sparse Blocks Autotuner invoked requesting max 0 splits and max 0 merges in 1 rounds, auto threads spec. Will autotune matrix: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 1 subms, 1 lsubms, 2.0800 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:10 # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7416,11 +7440,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7431,13 +7455,13 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success Started tuning inner round: will search for an optimal matrix instance. -Starting with requested 0 threads ; current default 1 ; at most 18. +Starting with requested 0 threads ; current default 1 ; at most 10. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7448,11 +7472,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7462,27 +7486,27 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000175s; avg 5.833e-05s ( +/- 2.32/ 2.59 %); best 5.698e-05s; worst 5.984e-05s; std dev. 1.173e-06 (taking best). +3 iterations (1 th.) took 0.00018s; avg 6e-05s ( +/- 5.03/ 9.67 %); best 5.698e-05s; worst 6.58e-05s; std dev. 4.103e-06 (taking best). Reference operation time is 5.6982e-05 s (1418 Mflops) with 1 threads. Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 7.391e-05 s (100.00 %) - analyzed arrays in 1.812e-05 s (24.52 %) - cleaned-up arrays in 1.216e-05 s (16.45 %) - deduplicated arrays in 1.287e-05 s (17.42 %) - sorted arrays in 9.537e-07 s (1.29 %) - shuffled partitions in 1.907e-05 s (25.81 %) - memory allocations took 3.815e-06 s (5.16 %) - leafs setup took 9.537e-07 s (1.29 %) - halfword conversion took 5.007e-06 s (6.77 %) -Built (100 x 100)[0x57a8e470]{Z} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' -Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 18). -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 4.641e-02 s (100.00 %) + analyzed arrays in 1.437e-02 s (30.96 %) + cleaned-up arrays in 1.907e-05 s (0.04 %) + deduplicated arrays in 1.597e-05 s (0.03 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 3.198e-02 s (68.91 %) + memory allocations took 9.060e-06 s (0.02 %) + leafs setup took 3.815e-06 s (0.01 %) + halfword conversion took 9.060e-06 s (0.02 %) +Built (100 x 100)[0x1876c20]{Z} @ (0(0..100),0(0..100)) (5050 nnz, 50 nnz/r) flags 0x42644094 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'LS' +Starting autotuning stage, with subdivision of 1 (current threads=1, requested threads=0, max threads = 10). +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7493,12 +7517,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7509,22 +7533,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.252e-04 s (100.00 %) - analyzed arrays in 4.101e-05 s (32.76 %) - cleaned-up arrays in 1.192e-05 s (9.52 %) - deduplicated arrays in 1.311e-05 s (10.48 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.695e-05 s (29.52 %) - memory allocations took 4.053e-06 s (3.24 %) - leafs setup took 1.192e-06 s (0.95 %) - halfword conversion took 1.597e-05 s (12.76 %) -Built (100 x 100)[0x57a6d390]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 6, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 8.680e-02 s (100.00 %) + analyzed arrays in 2.275e-02 s (26.22 %) + cleaned-up arrays in 1.907e-05 s (0.02 %) + deduplicated arrays in 1.597e-05 s (0.02 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 3.202e-02 s (36.89 %) + memory allocations took 1.097e-05 s (0.01 %) + leafs setup took 5.007e-06 s (0.01 %) + halfword conversion took 3.197e-02 s (36.83 %) +Built (100 x 100)[0x1870d60]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 6, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7535,11 +7559,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7550,11 +7574,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7564,16 +7588,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.25 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0002859s; avg 9.529e-05s ( +/- 3.42/ 5.84 %); best 9.203e-05s; worst 0.0001009s; std dev. 3.953e-06 (taking best). -Reference operation time is 9.20296e-05 s (878 Mflops) with 1 threads. -Challenging best inner round reference (5.6982e-05 s/1 threads) with: subdivision 0.25, 6 leaves, 2.163 bytes/nz, 9.20296e-05 s/0 threads (speedup 0.619171 x), same?n. +3 iterations (1 th.) took 0.04828s; avg 0.01609s ( +/- 1.00/ 1.60 %); best 0.01593s; worst 0.01635s; std dev. 0.0001841 (taking best). +Reference operation time is 0.0159318 s (5.072 Mflops) with 1 threads. +Challenging best inner round reference (5.6982e-05 s/1 threads) with: subdivision 0.25, 6 leaves, 2.163 bytes/nz, 0.0159318 s/0 threads (speedup 0.00357661 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 8 subms, 6 lsubms, 2.1632 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7584,12 +7608,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7600,22 +7624,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.400e-04 s (100.00 %) - analyzed arrays in 5.507e-05 s (39.35 %) - cleaned-up arrays in 1.311e-05 s (9.37 %) - deduplicated arrays in 1.407e-05 s (10.05 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.695e-05 s (26.41 %) - memory allocations took 4.768e-06 s (3.41 %) - leafs setup took 9.537e-07 s (0.68 %) - halfword conversion took 1.502e-05 s (10.73 %) -Built (100 x 100)[0x57a650e0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 16, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 1.024e-01 s (100.00 %) + analyzed arrays in 3.831e-02 s (37.43 %) + cleaned-up arrays in 3.004e-05 s (0.03 %) + deduplicated arrays in 2.003e-05 s (0.02 %) + sorted arrays in 9.537e-07 s (0.00 %) + shuffled partitions in 3.201e-02 s (31.27 %) + memory allocations took 1.097e-05 s (0.01 %) + leafs setup took 5.960e-06 s (0.01 %) + halfword conversion took 3.197e-02 s (31.23 %) +Built (100 x 100)[0x1870d60]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 16, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7626,11 +7650,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7641,11 +7665,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7655,16 +7679,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 0.5 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0003321s; avg 0.0001107s ( +/- 2.44/ 3.80 %); best 0.000108s; worst 0.0001149s; std dev. 3.018e-06 (taking best). -Reference operation time is 0.000108004 s (748.1 Mflops) with 1 threads. -Challenging best inner round reference (5.6982e-05 s/1 threads) with: subdivision 0.5, 16 leaves, 2.25 bytes/nz, 0.000108004 s/0 threads (speedup 0.527594 x), same?n. +3 iterations (1 th.) took 0.04794s; avg 0.01598s ( +/- 0.24/ 0.13 %); best 0.01594s; worst 0.016s; std dev. 2.717e-05 (taking best). +Reference operation time is 0.0159409 s (5.069 Mflops) with 1 threads. +Challenging best inner round reference (5.6982e-05 s/1 threads) with: subdivision 0.5, 16 leaves, 2.25 bytes/nz, 0.0159409 s/0 threads (speedup 0.00357458 x), same?n. New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 22 subms, 16 lsubms, 2.2503 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7675,12 +7699,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7691,22 +7715,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 1.769e-04 s (100.00 %) - analyzed arrays in 7.510e-05 s (42.45 %) - cleaned-up arrays in 1.192e-05 s (6.74 %) - deduplicated arrays in 1.407e-05 s (7.95 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 5.102e-05 s (28.84 %) - memory allocations took 5.960e-06 s (3.37 %) - leafs setup took 3.815e-06 s (2.16 %) - halfword conversion took 1.502e-05 s (8.49 %) -Built (100 x 100)[0x57a40190]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 34, symflags:'LS' + converted COO to RSB in 1.105e-01 s (100.00 %) + analyzed arrays in 3.846e-02 s (34.81 %) + cleaned-up arrays in 2.098e-05 s (0.02 %) + deduplicated arrays in 1.884e-05 s (0.02 %) + sorted arrays in 1.192e-06 s (0.00 %) + shuffled partitions in 3.203e-02 s (28.99 %) + memory allocations took 1.502e-05 s (0.01 %) + leafs setup took 8.106e-06 s (0.01 %) + halfword conversion took 3.994e-02 s (36.15 %) +Built (100 x 100)[0x18cfcb0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7717,11 +7741,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7732,11 +7756,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7746,16 +7770,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0003631s; avg 0.000121s ( +/- 2.50/ 4.20 %); best 0.000118s; worst 0.0001261s; std dev. 3.618e-06 (taking best). -Reference operation time is 0.000118017 s (684.6 Mflops) with 1 threads. -Challenging best inner round reference (5.6982e-05 s/1 threads) with: subdivision 1, 34 leaves, 2.343 bytes/nz, 0.000118017 s/0 threads (speedup 0.482828 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 47 subms, 34 lsubms, 2.3430 bpnz +3 iterations (1 th.) took 0.04393s; avg 0.01464s ( +/- 45.84/ 36.50 %); best 0.007931s; worst 0.01999s; std dev. 0.005017 (taking best). +Reference operation time is 0.00793099 s (10.19 Mflops) with 1 threads. +Challenging best inner round reference (5.6982e-05 s/1 threads) with: subdivision 1, 36 leaves, 2.383 bytes/nz, 0.00793099 s/0 threads (speedup 0.00718473 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7766,12 +7790,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7782,22 +7806,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 2.542e-04 s (100.00 %) - analyzed arrays in 1.142e-04 s (44.93 %) - cleaned-up arrays in 1.311e-05 s (5.16 %) - deduplicated arrays in 1.287e-05 s (5.07 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 8.798e-05 s (34.62 %) - memory allocations took 4.053e-06 s (1.59 %) - leafs setup took 5.007e-06 s (1.97 %) - halfword conversion took 1.597e-05 s (6.29 %) -Built (100 x 100)[0x57a40190]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 72, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + converted COO to RSB in 9.891e-02 s (100.00 %) + analyzed arrays in 3.487e-02 s (35.26 %) + cleaned-up arrays in 1.884e-05 s (0.02 %) + deduplicated arrays in 1.597e-05 s (0.02 %) + sorted arrays in 1.192e-06 s (0.00 %) + shuffled partitions in 4.000e-02 s (40.44 %) + memory allocations took 1.812e-05 s (0.02 %) + leafs setup took 9.060e-06 s (0.01 %) + halfword conversion took 2.397e-02 s (24.24 %) +Built (100 x 100)[0x18cfcb0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7808,11 +7832,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7823,11 +7847,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7837,16 +7861,16 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 2 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.000406s; avg 0.0001353s ( +/- 1.70/ 3.41 %); best 0.000133s; worst 0.00014s; std dev. 3.259e-06 (taking best). -Reference operation time is 0.000133038 s (607.3 Mflops) with 1 threads. -Challenging best inner round reference (5.6982e-05 s/1 threads) with: subdivision 2, 72 leaves, 2.531 bytes/nz, 0.000133038 s/0 threads (speedup 0.428315 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 98 subms, 72 lsubms, 2.5315 bpnz -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +3 iterations (1 th.) took 0.04794s; avg 0.01598s ( +/- 24.87/ 25.17 %); best 0.01201s; worst 0.02s; std dev. 0.003264 (taking best). +Reference operation time is 0.012007 s (6.729 Mflops) with 1 threads. +Challenging best inner round reference (5.6982e-05 s/1 threads) with: subdivision 2, 36 leaves, 2.383 bytes/nz, 0.012007 s/0 threads (speedup 0.00474574 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7857,12 +7881,12 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 233016 bytes +# Cache block size total 524288 bytes, per-thread 52428 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 -# RSB_IO_WANT_EXECUTING_THREADS: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 +# RSB_IO_WANT_EXECUTING_THREADS: 10 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout # RSB_IO_WANT_VERBOSE_ERRORS: stderr @@ -7873,22 +7897,22 @@ # librsb version 1.3.0.2 - 202212201855: Initialization success Building a matrix with 5050 nnz, 100 x 100 Duplicates check: 5050 - 0 = 5050 - converted COO to RSB in 3.519e-04 s (100.00 %) - analyzed arrays in 1.612e-04 s (45.80 %) - cleaned-up arrays in 1.287e-05 s (3.66 %) - deduplicated arrays in 1.311e-05 s (3.73 %) + converted COO to RSB in 1.025e-01 s (100.00 %) + analyzed arrays in 3.844e-02 s (37.50 %) + cleaned-up arrays in 1.812e-05 s (0.02 %) + deduplicated arrays in 1.478e-05 s (0.01 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 1.330e-04 s (37.80 %) - memory allocations took 5.960e-06 s (1.69 %) - leafs setup took 6.914e-06 s (1.96 %) - halfword conversion took 1.788e-05 s (5.08 %) -Built (100 x 100)[0x57a70220]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 120, symflags:'LS' -# librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes -# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 + shuffled partitions in 3.201e-02 s (31.23 %) + memory allocations took 2.503e-05 s (0.02 %) + leafs setup took 9.060e-06 s (0.01 %) + halfword conversion took 3.198e-02 s (31.20 %) +Built (100 x 100)[0x18cfcb0]{Z} @ (0(0..0),0(0..0)) (5050 nnz, 50 nnz/r) flags 0x42646096 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 36, symflags:'LS' +# librsb version 1.3.0.2 - 202212201855: Initializing +# Cache block size total 524288 bytes, per-thread 524288 bytes +# RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7899,11 +7923,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7914,11 +7938,11 @@ # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7928,17 +7952,17 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 4 # librsb version 1.3.0.2 - 202212201855: Initialization success -3 iterations (1 th.) took 0.0004601s; avg 0.0001534s ( +/- 2.23/ 3.06 %); best 0.00015s; worst 0.0001581s; std dev. 3.429e-06 (taking best). -Reference operation time is 0.000149965 s (538.8 Mflops) with 1 threads. -Challenging best inner round reference (5.6982e-05 s/1 threads) with: subdivision 4, 120 leaves, 2.726 bytes/nz, 0.000149965 s/0 threads (speedup 0.379968 x), same?n. -New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 162 subms, 120 lsubms, 2.7255 bpnz +3 iterations (1 th.) took 0.04795s; avg 0.01598s ( +/- 0.12/ 0.06 %); best 0.01596s; worst 0.01599s; std dev. 1.366e-05 (taking best). +Reference operation time is 0.015964 s (5.061 Mflops) with 1 threads. +Challenging best inner round reference (5.6982e-05 s/1 threads) with: subdivision 4, 36 leaves, 2.383 bytes/nz, 0.015964 s/0 threads (speedup 0.0035694 x), same?n. +New candidate clone performs slowly; discarding it: 100 x 100, type Z, 5050 nnz, 50 nnz/r, 50 subms, 36 lsubms, 2.3834 bpnz Best sparse multiply performance with subdivision multiplier of 1: 1417.99 Mflops. # librsb version 1.3.0.2 - 202212201855: Initializing -# Cache block size total 4194304 bytes, per-thread 4194304 bytes +# Cache block size total 524288 bytes, per-thread 524288 bytes # RSB_IO_WANT_MEMORY_HIERARCHY_INFO_STRING: unset -# min_leaf_matrix_bytes : 32768 -# avg_leaf_matrix_bytes : 8388608 -# rsb_g_threads: 18 +# min_leaf_matrix_bytes : 65536 +# avg_leaf_matrix_bytes : 1048576 +# rsb_g_threads: 10 # RSB_IO_WANT_EXECUTING_THREADS: 1 # RSB_WANT_RSBPP: 1 # RSB_IO_WANT_OUTPUT_STREAM: stdout @@ -7948,23 +7972,23 @@ # RSB_IO_WANT_SORT_METHOD: 0 # RSB_IO_WANT_SUBDIVISION_MULTIPLIER: 1 # librsb version 1.3.0.2 - 202212201855: Initialization success -Last tuner inner round (1 of 1) took 0.00564814 s (eq. to 1e+02/ 1e+02 old/new op.times), gained local/global speedup 1 x (5.6982e-05 : 5.6982e-05) / 1 x (5.6982e-05 : 5.6982e-05). This is not amortizable ! +Last tuner inner round (1 of 1) took 0.791984 s (eq. to 1e+04/ 1e+04 old/new op.times), gained local/global speedup 1 x (5.6982e-05 : 5.6982e-05) / 1 x (5.6982e-05 : 5.6982e-05). This is not amortizable ! Auto tuning inner round 1 did not find a configuration better than the original. -In 1 tuning rounds (tot. 0.0056s, 0.0029s for constructor, 0 clones) obtained NO speedup (best stays 1418 Mflops). -Second run of RSB Autotuner took 0.00568199 s and estimated a speedup of 1.000000 x (5.698e-05 s -> 5.698e-05 s per op) in same matrix (1 -> 1 lsubm) +In 1 tuning rounds (tot. 0.79s, 0.55s for constructor, 0 clones) obtained NO speedup (best stays 1418 Mflops). +Second run of RSB Autotuner took 0.792052 s and estimated a speedup of 1.000000 x (5.698e-05 s -> 5.698e-05 s per op) in same matrix (1 -> 1 lsubm) #min:1 0 #max:1 0 #sum:100 0 #norm:10 0 #used index storage compared to COO:10504 vs 40400 bytes (26.00%) ; compared to CSR:10504 vs 20604 bytes (50.99%) #%:CONSTRUCTOR_*:SORT SCAN INSERT SCAN+INSERT -%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000001 0.000025 0.000082 0.000107 -%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000108 -%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000025 -%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000082 +%:CONSTRUCTOR_TIMES:lower-100x100-5050nz S N 1 100 100 5050 0.000001 0.019685 0.063541 0.083226 +%:UNSORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.083227 +%:RSB_SUBDIVISION_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.019685 +%:RSB_SHUFFLE_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.063541 %:ROW_MAJOR_SORT_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000001 %:ROW_MAJOR_SORT_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.000 -%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.000107 +%:SORTEDCOO2RSB_TIME:lower-100x100-5050nz S N 1 100 100 5050 0.083226 %:ROW_MAJOR_SORT_TO_MOP:lower-100x100-5050nz S N 1 100 100 5050 0.000 %:UNSORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 %:SORTEDCOO2RSB_SCALING:lower-100x100-5050nz S N 1 100 100 5050 1.00 @@ -7979,68 +8003,68 @@ %:SM_MINMAXAVGNNZ:lower-100x100-5050nz S N 1 100 100 5050 5050 5050 5050 # %operation:matrix CONSTRUCTOR[1] SPMV[1] SPMV[1] -%operation:lower-100x100-5050nz 0.000156879 1e+09 1e+09 +%operation:lower-100x100-5050nz 0.115277 1e+09 1e+09 %constructor:matrix SORT[1] SCAN[1] SHUFFLE[1] INSERT[1] -%constructor:lower-100x100-5050nz 1.19209e-06 2.5034e-05 0 8.2016e-05 -# so far, program took 7.024s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 0.053s/0.000s . +%constructor:lower-100x100-5050nz 9.53674e-07 0.019685 0 0.0635412 +# so far, program took 10.489s of wall clock time; ancillary tests 0.000s; I/O 0.000s; checks 0.000s; conversions 0.000s; rsb/mkl tuning 4.117s/0.000s . getrusage() stats: -ru_maxrss: 27 (maximum resident set size -- MB) -ru_stime : 0.1294s (system CPU time used) -ru_utime : 8.787s (user CPU time used) +ru_maxrss: 9 (maximum resident set size -- MB) +ru_stime : 0.0596s (system CPU time used) +ru_utime : 36.06s (user CPU time used) # benchmarking terminated --- finalizing run. # ====== BEGIN Total summary record. #pr: ======== All results (not limiting) #pr: Dump from a base of 4 samples (of max 4) ordered by (1,1,1,1,1,4,1) = (filename x cores x incX x incY x nrhs x typecode x transA). pr: BESTCODE MTX NR NC NNZ NRHS TYPE SYM TRANS NT AT-NT AT-MKL-NT BPNZ AT-BPNZ NSUBM AT-SUBM RSBBEST-MFLOPS OPTIME MKL-OPTIME AT-OPTIME AT-MKL-OPTIME AT-TIME RWminBW-GBps CB-bpf AT-MS CMFLOPS -pr: 1:R_R lower-100x100-5050nz 100 100 5050 1 D S N 1 1 0 2.1212 2.0800 3 1 1841.85 3.195e-05 0.000e+00 1.097e-05 0.000e+00 1.220e-02 4.86e+00 2.60e+00 1 2.02e-02 -pr: 2:R_R lower-100x100-5050nz 100 100 5050 1 S S N 1 1 0 2.1212 2.0800 3 1 1694.50 3.099e-05 0.000e+00 1.192e-05 0.000e+00 7.998e-03 2.68e+00 1.56e+00 1 2.02e-02 -pr: 3:R_R lower-100x100-5050nz 100 100 5050 1 C S N 1 1 0 2.1212 2.0800 3 1 1388.93 8.607e-05 0.000e+00 5.817e-05 0.000e+00 8.485e-03 9.16e-01 6.50e-01 1 8.08e-02 -pr: 4:R_R lower-100x100-5050nz 100 100 5050 1 Z S N 1 1 0 2.1212 2.0800 3 1 1417.99 9.584e-05 0.000e+00 5.698e-05 0.000e+00 9.067e-03 1.69e+00 1.17e+00 1 8.08e-02 +pr: 1:R_R lower-100x100-5050nz 100 100 5050 1 D S N 1 1 0 2.1212 2.0800 3 1 1344.84 1.597e-02 0.000e+00 1.502e-05 0.000e+00 3.363e-01 3.55e+00 2.60e+00 1 2.02e-02 +pr: 2:R_R lower-100x100-5050nz 100 100 5050 1 S S N 1 1 0 2.1212 2.0800 3 1 1264.55 1.596e-02 0.000e+00 1.597e-05 0.000e+00 3.283e-01 2.00e+00 1.56e+00 1 2.02e-02 +pr: 3:R_R lower-100x100-5050nz 100 100 5050 1 C S N 1 1 0 2.1212 2.0800 3 1 1442.13 1.598e-02 0.000e+00 5.603e-05 0.000e+00 3.361e-01 9.51e-01 6.50e-01 1 8.08e-02 +pr: 4:R_R lower-100x100-5050nz 100 100 5050 1 Z S N 1 1 0 2.1212 2.0800 3 1 1442.13 1.569e-02 0.000e+00 5.603e-05 0.000e+00 3.364e-01 1.72e+00 1.17e+00 1 8.08e-02 #pr: below, we define 'successful' autotuning when speedup of 1.010000x is exceeded, and 'tuned' results even the ones which are same as untuned #pr: rsb autotuning was successful in 4 cases (100.00 %) and unsuccessful in 0 cases (0.00 %) -#pr: (in succ. cases rsb autotuning gave avg. 116.9 % faster, avg. sp. ratio 2.169x, max sp. ratio 2.913x, avg. ratio 0.000x) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 522.2/145.9/1112.8/2088.7 tuned ops) -#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 208.3/ 94.6/382.0/833.3 untuned ops) -#pr: (and amortizes from untuned rsb in avg. 384.6, min. 233.3, max. 581.7 ops) +#pr: (in succ. cases rsb autotuning gave avg. 65602.5 % faster, avg. sp. ratio 657.025x, max sp. ratio 1063.556x, avg. ratio 0.000x) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 13737.6/5999.0/22392.9/54950.4 tuned ops) +#pr: (in succ. cases rsb autotuning took an avg/min/max/tot of: 21.0/ 20.6/ 21.4/ 84.1 untuned ops) +#pr: (and amortizes from untuned rsb in avg. 21.1, min. 20.6, max. 21.5 ops) #pr: (avg/min/max (avg) nnz per subm before successful tuning were 1683/ 1683/ 1683) #pr: (avg/min/max (avg) nnz per subm after successful tuning were 5050/ 5050/ 5050) #pr: (avg/min/max (avg) bytes per subm before successful tuning were 15150/ 6733/ 26933) #pr: (avg/min/max (avg) bytes per subm after successful tuning were 45450/ 20200/ 80800) #pr: (avg/min/max (avg) bytes per nnz before successful tuning were 2.121/ 2.121/ 2.121) -#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.498/ 0.903/ 4.787,GBps) -#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 10.139/ 0.916/ 4.860,GBps) +#pr: (avg/min/max operands (mtx,lhs,rhs) read bandwidth lower bound 2.023/ 0.937/ 3.496,GBps) +#pr: (avg/min/max operands (mtx,rhs:r;lhs:rw) bandwidth lower bound 8.213/ 0.951/ 3.549,GBps) #pr: (avg/min/max code balance (bytes read at least once per flop) 1.495/ 0.650/ 2.599) #pr: (avg/min/max (avg) bytes per nnz after successful tuning were 2.080/ 2.080/ 2.080) #pr: (matrix has been subdivided more/less/same in resp. 0 / 4 /0 cases) #pr: (matrix has used more/less/same threads in resp. 0 / 0 /4 cases) #pr: no unsuccessful rsb autotuning attempt (according to 1.01x threshold) -#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.01 s, min 0.01 s, max 0.01 s, tot 0.04 s (4 samples) -#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.01 s, min 0.01 s, max 0.01 s, tot 0.04 s (4 samples) -#pr: best tun. rsb canon. mflops were: on avg. 1.586e+03, min 1.389e+03, max 1.842e+03 (4 samples) -#pr: ref. unt. rsb canon. mflops were: on avg. 7.665e+02, min 6.323e+02, max 9.388e+02 (4 samples) -#pr: best tun. rsb operation time was: on avg. 3.451e-05s, min 1.097e-05s, max 5.817e-05s, tot 1.380e-04s (4 samples) -#pr: ref. unt. rsb operation time was: on avg. 6.121e-05s, min 3.099e-05s, max 9.584e-05s, tot 2.449e-04s (4 samples) -#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 4.663e+00 2.473e+01 -#pr: in-cache to in-memory MEMSET bandwidth ratio: 3.893e+00 -#pr: Record collection took 0.10 s. +#pr: rsb auto tuning (either succ. or uns.) time was: on avg.: 0.33 s, min 0.33 s, max 0.34 s, tot 1.34 s (4 samples) +#pr: rsb auto tuning ( only successful ) time was: on avg.: 0.33 s, min 0.33 s, max 0.34 s, tot 1.34 s (4 samples) +#pr: best tun. rsb canon. mflops were: on avg. 1.373e+03, min 1.265e+03, max 1.442e+03 (4 samples) +#pr: ref. unt. rsb canon. mflops were: on avg. 3.184e+00, min 1.264e+00, max 5.150e+00 (4 samples) +#pr: best tun. rsb operation time was: on avg. 3.576e-05s, min 1.502e-05s, max 5.603e-05s, tot 1.431e-04s (4 samples) +#pr: ref. unt. rsb operation time was: on avg. 1.590e-02s, min 1.569e-02s, max 1.598e-02s, tot 6.361e-02s (4 samples) +#pr: min / max ratio of in-memory MEMSET bandwidth to extrapolated read bandwidth ratio: 2.221e+00 8.286e+00 +#pr: in-cache to in-memory MEMSET bandwidth ratio: 1.826e+00 +#pr: Record collection took 3.96 s. #pr: Record comprises 40 memory benchmark samples (prepend RSB_PR_MBW=1 to dump this). -#pr: Record comprises 89 environment variables in 3867 bytes (prepend RSB_PR_ENV=1 to dump this). +#pr: Record comprises 89 environment variables in 3921 bytes (prepend RSB_PR_ENV=1 to dump this). # ====== END Total summary record. #pr: ======== Saved a performance record of 4 samples to test.rpr # Removing the temporary record file test.rpr.tmp. -# terminating run at 1740440672 (after 7.0s of w.c.t.) +# terminating run at 1706032609 (after 10.5s of w.c.t.) + ls -ltr test-tuning-lower-100x100-5050nz--C-N-1--base.eps test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps test-tuning-lower-100x100-5050nz--D-N-1--base.eps test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps test-tuning-lower-100x100-5050nz--S-N-1--base.eps test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps test-tuning-lower-100x100-5050nz--Z-N-1--base.eps test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85628 Feb 24 23:44 test-tuning-lower-100x100-5050nz--D-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84551 Feb 24 23:44 test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85629 Feb 24 23:44 test-tuning-lower-100x100-5050nz--S-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84551 Feb 24 23:44 test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85629 Feb 24 23:44 test-tuning-lower-100x100-5050nz--C-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84551 Feb 24 23:44 test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps --rw-r--r-- 1 pbuilder1 pbuilder1 85629 Feb 24 23:44 test-tuning-lower-100x100-5050nz--Z-N-1--base.eps --rw-r--r-- 1 pbuilder1 pbuilder1 84551 Feb 24 23:44 test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85638 Jan 23 17:56 test-tuning-lower-100x100-5050nz--D-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84560 Jan 23 17:56 test-tuning-lower-100x100-5050nz--D-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85638 Jan 23 17:56 test-tuning-lower-100x100-5050nz--S-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84560 Jan 23 17:56 test-tuning-lower-100x100-5050nz--S-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85637 Jan 23 17:56 test-tuning-lower-100x100-5050nz--C-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84558 Jan 23 17:56 test-tuning-lower-100x100-5050nz--C-N-1--mv-tuned_merge1_1x1th.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 85637 Jan 23 17:56 test-tuning-lower-100x100-5050nz--Z-N-1--base.eps +-rw-r--r-- 1 pbuilder2 pbuilder2 84560 Jan 23 17:56 test-tuning-lower-100x100-5050nz--Z-N-1--mv-tuned_merge1_1x1th.eps + rsbench --read-performance-record test.rpr + ls -ltr test.txt --rw-r--r-- 1 pbuilder1 pbuilder1 4083 Feb 24 23:44 test.txt +-rw-r--r-- 1 pbuilder2 pbuilder2 4095 Jan 23 17:56 test.txt + RSB_PR_WLTC=2 + RSB_PR_SR=0 + rsbench --read-performance-record test.rpr @@ -8050,29 +8074,29 @@ /usr/bin/kpsepath ++ kpsepath tex ++ sed 's/!!//g;s/:/\n/g;' -+ find . /nonexistent/first-build/.texlive2022/texmf-config/tex/kpsewhich// /nonexistent/first-build/.texlive2022/texmf-var/tex/kpsewhich// /nonexistent/first-build/texmf/tex/kpsewhich// /usr/local/share/texmf/tex/kpsewhich// /etc/texmf/tex/kpsewhich// /var/lib/texmf/tex/kpsewhich// /usr/share/texmf/tex/kpsewhich// /usr/share/texlive/texmf-dist/tex/kpsewhich// /nonexistent/first-build/.texlive2022/texmf-config/tex/generic// /nonexistent/first-build/.texlive2022/texmf-var/tex/generic// /nonexistent/first-build/texmf/tex/generic// /usr/local/share/texmf/tex/generic// /etc/texmf/tex/generic// /var/lib/texmf/tex/generic// /usr/share/texmf/tex/generic// /usr/share/texlive/texmf-dist/tex/generic// /nonexistent/first-build/.texlive2022/texmf-config/tex/latex// /nonexistent/first-build/.texlive2022/texmf-var/tex/latex// /nonexistent/first-build/texmf/tex/latex// /usr/local/share/texmf/tex/latex// /etc/texmf/tex/latex// /var/lib/texmf/tex/latex// /usr/share/texmf/tex/latex// /usr/share/texlive/texmf-dist/tex/latex// /nonexistent/first-build/.texlive2022/texmf-config/tex/// /nonexistent/first-build/.texlive2022/texmf-var/tex/// /nonexistent/first-build/texmf/tex/// /usr/local/share/texmf/tex/// /etc/texmf/tex/// /var/lib/texmf/tex/// /usr/share/texmf/tex/// /usr/share/texlive/texmf-dist/tex/// -name sciposter.cls -find: '/nonexistent/first-build/.texlive2022/texmf-config/tex/kpsewhich//': No such file or directory -find: '/nonexistent/first-build/.texlive2022/texmf-var/tex/kpsewhich//': No such file or directory -find: '/nonexistent/first-build/texmf/tex/kpsewhich//': No such file or directory ++ find . /nonexistent/second-build/.texlive2022/texmf-config/tex/kpsewhich// /nonexistent/second-build/.texlive2022/texmf-var/tex/kpsewhich// /nonexistent/second-build/texmf/tex/kpsewhich// /usr/local/share/texmf/tex/kpsewhich// /etc/texmf/tex/kpsewhich// /var/lib/texmf/tex/kpsewhich// /usr/share/texmf/tex/kpsewhich// /usr/share/texlive/texmf-dist/tex/kpsewhich// /nonexistent/second-build/.texlive2022/texmf-config/tex/generic// /nonexistent/second-build/.texlive2022/texmf-var/tex/generic// /nonexistent/second-build/texmf/tex/generic// /usr/local/share/texmf/tex/generic// /etc/texmf/tex/generic// /var/lib/texmf/tex/generic// /usr/share/texmf/tex/generic// /usr/share/texlive/texmf-dist/tex/generic// /nonexistent/second-build/.texlive2022/texmf-config/tex/latex// /nonexistent/second-build/.texlive2022/texmf-var/tex/latex// /nonexistent/second-build/texmf/tex/latex// /usr/local/share/texmf/tex/latex// /etc/texmf/tex/latex// /var/lib/texmf/tex/latex// /usr/share/texmf/tex/latex// /usr/share/texlive/texmf-dist/tex/latex// /nonexistent/second-build/.texlive2022/texmf-config/tex/// /nonexistent/second-build/.texlive2022/texmf-var/tex/// /nonexistent/second-build/texmf/tex/// /usr/local/share/texmf/tex/// /etc/texmf/tex/// /var/lib/texmf/tex/// /usr/share/texmf/tex/// /usr/share/texlive/texmf-dist/tex/// -name sciposter.cls +find: '/nonexistent/second-build/.texlive2022/texmf-config/tex/kpsewhich//': No such file or directory +find: '/nonexistent/second-build/.texlive2022/texmf-var/tex/kpsewhich//': No such file or directory +find: '/nonexistent/second-build/texmf/tex/kpsewhich//': No such file or directory find: '/usr/local/share/texmf/tex/kpsewhich//': No such file or directory find: '/etc/texmf/tex/kpsewhich//': No such file or directory find: '/var/lib/texmf/tex/kpsewhich//': No such file or directory find: '/usr/share/texmf/tex/kpsewhich//': No such file or directory find: '/usr/share/texlive/texmf-dist/tex/kpsewhich//': No such file or directory -find: '/nonexistent/first-build/.texlive2022/texmf-config/tex/generic//': No such file or directory -find: '/nonexistent/first-build/.texlive2022/texmf-var/tex/generic//': No such file or directory -find: '/nonexistent/first-build/texmf/tex/generic//': No such file or directory +find: '/nonexistent/second-build/.texlive2022/texmf-config/tex/generic//': No such file or directory +find: '/nonexistent/second-build/.texlive2022/texmf-var/tex/generic//': No such file or directory +find: '/nonexistent/second-build/texmf/tex/generic//': No such file or directory find: '/usr/local/share/texmf/tex/generic//': No such file or directory find: '/usr/share/texmf/tex/generic//': No such file or directory -find: '/nonexistent/first-build/.texlive2022/texmf-config/tex/latex//': No such file or directory -find: '/nonexistent/first-build/.texlive2022/texmf-var/tex/latex//': No such file or directory -find: '/nonexistent/first-build/texmf/tex/latex//': No such file or directory +find: '/nonexistent/second-build/.texlive2022/texmf-config/tex/latex//': No such file or directory +find: '/nonexistent/second-build/.texlive2022/texmf-var/tex/latex//': No such file or directory +find: '/nonexistent/second-build/texmf/tex/latex//': No such file or directory find: '/usr/local/share/texmf/tex/latex//': No such file or directory find: '/etc/texmf/tex/latex//': No such file or directory find: '/var/lib/texmf/tex/latex//': No such file or directory -find: '/nonexistent/first-build/.texlive2022/texmf-config/tex///': No such file or directory -find: '/nonexistent/first-build/.texlive2022/texmf-var/tex///': No such file or directory -find: '/nonexistent/first-build/texmf/tex///': No such file or directory +find: '/nonexistent/second-build/.texlive2022/texmf-config/tex///': No such file or directory +find: '/nonexistent/second-build/.texlive2022/texmf-var/tex///': No such file or directory +find: '/nonexistent/second-build/texmf/tex///': No such file or directory find: '/usr/local/share/texmf/tex///': No such file or directory + exit 0 for mf in pd.mtx vf.mtx ; do if test -f /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/$mf ; then true; else cp -p /build/reproducible-path/librsb-1.3.0.2+dfsg/$mf /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/$mf ; fi; done @@ -8087,7 +8111,7 @@ This error may be safely ignored. Correctly allocated a matrix. Summary information of the matrix: -(3 x 3)[0x56a9bcf0]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' +(3 x 3)[0xc56ce0]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' Correctly performed a SPMV. Correctly freed the matrix. Correctly finalized the library. @@ -8102,7 +8126,7 @@ This error may be safely ignored. Correctly allocated a matrix. Summary information of the matrix: -(3 x 3)[0x5763ccf0]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' +(3 x 3)[0x14e3ce0]{D} @ (0(0..3),0(0..3)) (3 nnz, 1 nnz/r) flags 0x2040384 (coo:1, csr:0, hw:0, ic:1, fi:0), storage: 40, subm: 1, symflags:'' Correctly performed a SPMV. Correctly freed the matrix. Correctly finalized the library. @@ -8166,51 +8190,51 @@ Done. Building a matrix with 5 nnz, 5 x 5 Duplicates check: 5 - 0 = 5 - converted COO to RSB in 6.700e-05 s (100.00 %) - analyzed arrays in 2.193e-05 s (32.74 %) - cleaned-up arrays in 9.537e-07 s (1.42 %) - deduplicated arrays in 0.000e+00 s (0.00 %) - sorted arrays in 1.001e-05 s (14.95 %) - shuffled partitions in 1.502e-05 s (22.42 %) - memory allocations took 4.053e-06 s (6.05 %) - leafs setup took 0.000e+00 s (0.00 %) - halfword conversion took 1.407e-05 s (21.00 %) -Built (5 x 5)[0x576414d0]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' + converted COO to RSB in 9.343e-02 s (100.00 %) + analyzed arrays in 3.999e-02 s (42.81 %) + cleaned-up arrays in 1.907e-06 s (0.00 %) + deduplicated arrays in 9.537e-07 s (0.00 %) + sorted arrays in 1.340e-02 s (14.34 %) + shuffled partitions in 2.399e-02 s (25.68 %) + memory allocations took 1.001e-05 s (0.01 %) + leafs setup took 1.907e-06 s (0.00 %) + halfword conversion took 1.603e-02 s (17.15 %) +Built (5 x 5)[0x14e7ad0]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' Allocated matrix of 5 nonzeroes: -(5 x 5)[0x576414d0]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +(5 x 5)[0x14e7ad0]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' -Before auto-tuning, 100 multiplications took 0.000718s. +Before auto-tuning, 100 multiplications took 1.315937s. Threads autotuning (may take more than 1.500000s)... Will use autotuning routine to sample matrix: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=8.53062e-08), 18 suggested as starting thread count(default). -3 iterations (18 th.) took 2.503e-05s; avg 8.345e-06s ( +/- 17.14/ 20.00 %); best 6.914e-06s; worst 1.001e-05s; std dev. 1.277e-06 (taking best). -Reference operation time is 6.91414e-06 s (2.893 Mflops) with 18 threads. -3 iterations (18 th.) took 2.289e-05s; avg 7.629e-06s ( +/- 9.38/ 6.25 %); best 6.914e-06s; worst 8.106e-06s; std dev. 5.15e-07 (taking best). -Reference operation time is 6.91414e-06 s (2.893 Mflops) with 18 threads. -After 0.000089s, autotuning routine did not find a better threads count configuration. -(5 x 5)[0x576414d0]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' -After threads auto-tuning, 100 multiplications took 0.000688s -- effective speedup of 1.04366 x -Matrix autotuning (may take more than 1.500000s; using 18 threads )... +Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=1.10815e-06), 10 suggested as starting thread count(default). +3 iterations (10 th.) took 0.04793s; avg 0.01598s ( +/- 0.27/ 0.15 %); best 0.01593s; worst 0.016s; std dev. 3.088e-05 (taking best). +Reference operation time is 0.015934 s (0.001255 Mflops) with 10 threads. +3 iterations (10 th.) took 0.04798s; avg 0.01599s ( +/- 0.08/ 0.05 %); best 0.01598s; worst 0.016s; std dev. 9.425e-06 (taking best). +Reference operation time is 0.0159791 s (0.001252 Mflops) with 10 threads. +After 0.095993s, autotuning routine did not find a better threads count configuration. +(5 x 5)[0x14e7ad0]{D} @ (0(0..0),0(0..0)) (5 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +After threads auto-tuning, 100 multiplications took 1.591975s -- effective speedup of 0.826607 x +Matrix autotuning (may take more than 1.500000s; using 10 threads )... Will autotune matrix: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=8.53062e-08), 18 suggested as starting thread count. -3 iterations (18 th.) took 2.98e-05s; avg 9.934e-06s ( +/- 20.80/ 39.20 %); best 7.868e-06s; worst 1.383e-05s; std dev. 2.755e-06 (taking best). -Reference operation time is 7.86781e-06 s (2.542 Mflops) with 18 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz (tpop: 7.868e-06 Mflops: 2.542) -Merge (2 -> 1 leaves) took w.c.t. of 1.097e-05s, ~2.146e-06s of computing time (of which 9.537e-07s sorting, 1.907e-06s analysis) -3 iterations (18 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 91.74/107.69 %); best 8.531e-08s; worst 2.146e-06s; std dev. 8.778e-07 (taking best). -Reference operation time is 8.53062e-08 s (234.4 Mflops) with 18 threads. -After merge step 1: tpop: 8.531e-08 s ~Mflops: 234.450 nsubm:1 otn:18 -Applying merge (2 -> 1 leaves, 18 th.) yielded SPEEDUP of 92.230x: 7.868e-06s -> 8.531e-08s, so taking this instance. +Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=1.10815e-06), 10 suggested as starting thread count. +3 iterations (10 th.) took 0.04801s; avg 0.016s ( +/- 0.07/ 0.08 %); best 0.01599s; worst 0.01602s; std dev. 1.003e-05 (taking best). +Reference operation time is 0.0159941 s (0.00125 Mflops) with 10 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 5 x 5, type D, 5 nnz, 1 nnz/r, 3 subms, 2 lsubms, 4.0000 bpnz (tpop: 0.01599 Mflops: 0.001) +Merge (2 -> 1 leaves) took w.c.t. of 2.408e-05s, ~5.96e-06s of computing time (of which 9.537e-07s sorting, 5.007e-06s analysis) +3 iterations (10 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 33.60/ 85.71 %); best 1.108e-06s; worst 3.099e-06s; std dev. 1.012e-06 (taking best). +Reference operation time is 1.10815e-06 s (18.05 Mflops) with 10 threads. +After merge step 1: tpop: 1.108e-06 s ~Mflops: 18.048 nsubm:1 otn:10 +Applying merge (2 -> 1 leaves, 10 th.) yielded SPEEDUP of 14433.185x: 0.01599s -> 1.108e-06s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (2 -> 1 subms) took 0.000227s (of which 0.000181s partitioning, 0s I/O); computing times: 2.146e-06s in par. loops, 9.537e-07s sorting, 1.907e-06s analyzing) -Total merge + benchmarking process took 0.000227s, equivalent to 2660.7/28.8 new/old ops (4.983e-05s for 2 clones -- as 584.1/6.3 ops, or 292.1/3.2 ops per clone), SPEEDUP of 92.230x -Applying multi-merge (2 -> 1 leaves, 1 steps, 18 -> 18 th.sp.) yielded SPEEDUP of 92.230x (7.868e-06s -> 8.531e-08s), will amortize in 29.2 ops by saving 7.783e-06s per op. -In 1 tuning rounds (tot. 0.00031s, 5e-05s for constructor, 2 clones) obtained a SPEEDUP of 9123.0% (92.23x) (from 2.542 to 234.4 Mflops). -After 0.000325s, autotuning routine declared speedup of 92.2303 x, when using threads count of 18. -(5 x 5)[0x5763f440]{D} @ (0(0..5),0(0..5)) (5 nnz, 1 nnz/r) flags 0x2040186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'' -After threads auto-tuning, 100 multiplications took 0.000031s -- further speedup of 22.2 x +A total of 1 merge steps (of max 6) (2 -> 1 subms) took 0.04797s (of which 0.00015s partitioning, 0s I/O); computing times: 5.96e-06s in par. loops, 9.537e-07s sorting, 5.007e-06s analyzing) +Total merge + benchmarking process took 0.04797s, equivalent to 43289.4/3.0 new/old ops (0.09577s for 2 clones -- as 86419.2/6.0 ops, or 43209.6/3.0 ops per clone), SPEEDUP of 14433.185x +Applying multi-merge (2 -> 1 leaves, 1 steps, 10 -> 10 th.sp.) yielded SPEEDUP of 14433.185x (0.01599s -> 1.108e-06s), will amortize in 3.0 ops by saving 0.01599s per op. +In 1 tuning rounds (tot. 0.14s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 1443218.5% (1.443e+04x) (from 0.00125 to 18.05 Mflops). +After 0.144011s, autotuning routine declared speedup of 14433.2 x, when using threads count of 10. +(5 x 5)[0x14e59b0]{D} @ (0(0..5),0(0..5)) (5 nnz, 1 nnz/r) flags 0x2040186 (coo:1, csr:0, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'' +After threads auto-tuning, 100 multiplications took 0.000023s -- further speedup of 68837.4 x 0/2 0 0 -> 0 1/2 1 0 -> 5 0/2 0 3 -> 0 @@ -8221,7 +8245,7 @@ Correctly initialized the library. Correctly allocated a matrix with 7 nonzeroes. Summary information of the matrix: -(6 x 6)[0x576414d0]{D} @ (0(1..2),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' +(6 x 6)[0x14e7ad0]{D} @ (0(1..2),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' Matrix printout: %%MatrixMarket matrix coordinate real general 6 6 1 @@ -8375,76 +8399,56 @@ Creating 500 x 500 matrix with 62500 nonzeroes. Building a matrix with 62500 nnz, 500 x 500 Duplicates check: 62500 - 0 = 62500 - converted COO to RSB in 3.524e-03 s (100.00 %) - analyzed arrays in 1.001e-04 s (2.84 %) - cleaned-up arrays in 9.894e-05 s (2.81 %) - deduplicated arrays in 1.452e-04 s (4.12 %) - sorted arrays in 2.983e-03 s (84.64 %) - shuffled partitions in 1.349e-04 s (3.83 %) - memory allocations took 2.694e-05 s (0.76 %) - leafs setup took 4.053e-06 s (0.12 %) - halfword conversion took 3.004e-05 s (0.85 %) -Built (500 x 500)[0x5823bb20]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' + converted COO to RSB in 2.697e-01 s (100.00 %) + analyzed arrays in 4.768e-02 s (17.68 %) + cleaned-up arrays in 1.888e-04 s (0.07 %) + deduplicated arrays in 1.712e-04 s (0.06 %) + sorted arrays in 9.361e-02 s (34.71 %) + shuffled partitions in 7.999e-02 s (29.66 %) + memory allocations took 3.791e-05 s (0.01 %) + leafs setup took 1.001e-05 s (0.00 %) + halfword conversion took 4.799e-02 s (17.79 %) +Built (500 x 500)[0x21cdb10]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 49, symflags:'' Allocated matrix of 62500 nonzeroes: -(500 x 500)[0x5823bb20]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' +(500 x 500)[0x21cdb10]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 49, symflags:'' -Before auto-tuning, 100 multiplications took 0.015339s. +Before auto-tuning, 100 multiplications took 1.603607s. Threads autotuning (may take more than 1.500000s)... -Will use autotuning routine to sample matrix: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 39 subms, 28 lsubms, 2.0817 bpnz. +Will use autotuning routine to sample matrix: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 67 subms, 49 lsubms, 2.1111 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.44548e-08), 18 suggested as starting thread count(default). -3 iterations (18 th.) took 0.0004239s; avg 0.0001413s ( +/- 6.69/ 4.78 %); best 0.0001318s; worst 0.0001481s; std dev. 6.889e-06 (taking best). -Reference operation time is 0.000131845 s (1896 Mflops) with 18 threads. -3 iterations (18 th.) took 0.000479s; avg 0.0001597s ( +/- 2.94/ 3.93 %); best 0.000155s; worst 0.0001659s; std dev. 4.616e-06 (taking best). -Reference operation time is 0.000154972 s (1613 Mflops) with 18 threads. -After 0.000948s, autotuning routine did not find a better threads count configuration. -(500 x 500)[0x5823bb20]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' -After threads auto-tuning, 100 multiplications took 0.015195s -- effective speedup of 1.00949 x -Matrix autotuning (may take more than 1.500000s; using 18 threads )... -Will autotune matrix: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 39 subms, 28 lsubms, 2.0817 bpnz. +Sampling (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=6.33502e-07), 10 suggested as starting thread count(default). +3 iterations (10 th.) took 0.04794s; avg 0.01598s ( +/- 0.33/ 0.21 %); best 0.01593s; worst 0.01601s; std dev. 3.765e-05 (taking best). +Reference operation time is 0.0159278 s (15.7 Mflops) with 10 threads. +3 iterations (10 th.) took 0.04797s; avg 0.01599s ( +/- 0.15/ 0.08 %); best 0.01596s; worst 0.016s; std dev. 1.744e-05 (taking best). +Reference operation time is 0.015964 s (15.66 Mflops) with 10 threads. +After 0.095995s, autotuning routine did not find a better threads count configuration. +(500 x 500)[0x21cdb10]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 49, symflags:'' +After threads auto-tuning, 100 multiplications took 1.611982s -- effective speedup of 0.994804 x +Matrix autotuning (may take more than 1.500000s; using 10 threads )... +Will autotune matrix: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 67 subms, 49 lsubms, 2.1111 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=5.44548e-08), 18 suggested as starting thread count. -3 iterations (18 th.) took 0.000464s; avg 0.0001547s ( +/- 10.74/ 7.91 %); best 0.000138s; worst 0.0001669s; std dev. 1.218e-05 (taking best). -Reference operation time is 0.000138044 s (1811 Mflops) with 18 threads. -Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 39 subms, 28 lsubms, 2.0817 bpnz (tpop: 0.000138 Mflops: 1811.012) -Merge (28 -> 22 leaves) took w.c.t. of 0.0004501s, ~0.000855s of computing time (of which 7.2e-05s sorting, 5.007e-06s analysis) -3 iterations (18 th.) took 0.001172s; avg 0.0003907s ( +/- 59.54/118.59 %); best 0.0001581s; worst 0.000854s; std dev. 0.0003276 (taking best). -Reference operation time is 0.000158072 s (1582 Mflops) with 18 threads. -After merge step 1: tpop: 0.0001581 s ~Mflops: 1581.563 nsubm:22 otn:18 -Applying merge (28 -> 22 leaves, 18 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.145x: 0.000138s -> 0.0001581s. -Merge (22 -> 16 leaves) took w.c.t. of 7.582e-05s, ~0.0001168s of computing time (of which 5.817e-05s sorting, 4.053e-06s analysis) -3 iterations (18 th.) took 0.001773s; avg 0.000591s ( +/- 80.03/157.00 %); best 0.000118s; worst 0.001519s; std dev. 0.0006562 (taking best). -Reference operation time is 0.000118017 s (2118 Mflops) with 18 threads. -After merge step 2: tpop: 0.000118 s ~Mflops: 2118.335 nsubm:16 otn:18 -Applying merge (22 -> 16 leaves, 18 th.) yielded SPEEDUP of 1.170x: 0.000138s -> 0.000118s, so taking this instance. -Merge (16 -> 10 leaves) took w.c.t. of 0.0003629s, ~0.000694s of computing time (of which 0.0004749s sorting, 3.099e-06s analysis) -3 iterations (18 th.) took 0.002106s; avg 0.000702s ( +/- 85.19/169.94 %); best 0.000104s; worst 0.001895s; std dev. 0.0008436 (taking best). -Reference operation time is 0.000103951 s (2405 Mflops) with 18 threads. -After merge step 3: tpop: 0.000104 s ~Mflops: 2404.991 nsubm:10 otn:18 -Applying merge (16 -> 10 leaves, 18 th.) yielded SPEEDUP of 1.135x: 0.000118s -> 0.000104s, so taking this instance. -Merge (10 -> 7 leaves) took w.c.t. of 0.00016s, ~0.00015s of computing time (of which 7.486e-05s sorting, 3.099e-06s analysis) -3 iterations (18 th.) took 0.0003281s; avg 0.0001094s ( +/- 4.07/ 6.18 %); best 0.0001049s; worst 0.0001161s; std dev. 4.856e-06 (taking best). -Reference operation time is 0.000104904 s (2383 Mflops) with 18 threads. -After merge step 4: tpop: 0.0001049 s ~Mflops: 2383.127 nsubm:7 otn:18 -Applying merge (10 -> 7 leaves, 18 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=0.99091x): 0.000104s -> 0.0001049s, so IGNORING this instance. -Merge (7 -> 4 leaves) took w.c.t. of 0.0001469s, ~0.000139s of computing time (of which 7.105e-05s sorting, 1.907e-06s analysis) -3 iterations (18 th.) took 0.0003021s; avg 0.0001007s ( +/- 2.68/ 2.29 %); best 9.799e-05s; worst 0.000103s; std dev. 2.063e-06 (taking best). -Reference operation time is 9.799e-05 s (2551 Mflops) with 18 threads. -After merge step 5: tpop: 9.799e-05 s ~Mflops: 2551.280 nsubm:4 otn:18 -Applying merge (7 -> 4 leaves, 18 th.) yielded SPEEDUP of 1.061x: 0.000104s -> 9.799e-05s, so taking this instance. -Merge (4 -> 3 leaves) took w.c.t. of 0.00014s, ~0.0001328s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) -3 iterations (18 th.) took 0.000586s; avg 0.0001953s ( +/- 0.65/ 0.33 %); best 0.0001941s; worst 0.000196s; std dev. 8.991e-07 (taking best). -Reference operation time is 0.000194073 s (1288 Mflops) with 18 threads. -After merge step 6: tpop: 0.0001941 s ~Mflops: 1288.177 nsubm:3 otn:18 -Applying merge (4 -> 3 leaves, 18 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.981x: 9.799e-05s -> 0.0001941s. +Starting autotuning (15 x 0.1 s stages, transA=N, nrhs=2, timer gran.=6.33502e-07), 10 suggested as starting thread count. +3 iterations (10 th.) took 0.04796s; avg 0.01599s ( +/- 0.03/ 0.04 %); best 0.01598s; worst 0.01599s; std dev. 4.616e-06 (taking best). +Reference operation time is 0.0159829 s (15.64 Mflops) with 10 threads. +Starting merge (user-supplied threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 500 x 500, type D, 62500 nnz, 1.2e+02 nnz/r, 67 subms, 49 lsubms, 2.1111 bpnz (tpop: 0.01598 Mflops: 15.642) +Merge (49 -> 34 leaves) took w.c.t. of 0.008872s, ~0.003283s of computing time (of which 0.0006607s sorting, 1.407e-05s analysis) +3 iterations (10 th.) took 0.0551s; avg 0.01837s ( +/- 31.98/ 47.56 %); best 0.01249s; worst 0.0271s; std dev. 0.006298 (taking best). +Reference operation time is 0.0124929 s (20.01 Mflops) with 10 threads. +After merge step 1: tpop: 0.01249 s ~Mflops: 20.011 nsubm:34 otn:10 +Applying merge (49 -> 34 leaves, 10 th.) yielded SPEEDUP of 1.279x: 0.01598s -> 0.01249s, so taking this instance. +Merge (34 -> 25 leaves) took w.c.t. of 0.008415s, ~0.0002811s of computing time (of which 0.0001111s sorting, 9.775e-06s analysis) +3 iterations (10 th.) took 0.06751s; avg 0.0225s ( +/- 28.89/ 40.05 %); best 0.016s; worst 0.03152s; std dev. 0.006579 (taking best). +Reference operation time is 0.0160022 s (15.62 Mflops) with 10 threads. +After merge step 2: tpop: 0.016 s ~Mflops: 15.623 nsubm:25 otn:10 +Applying merge (34 -> 25 leaves, 10 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.281x: 0.01249s -> 0.016s. Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 6 merge steps (of max 6) (28 -> 3 subms) took 0.008067s (of which 0.001418s partitioning, 0s I/O); computing times: 0.002088s in par. loops, 0.000752s sorting, 1.812e-05s analyzing) -Total merge + benchmarking process took 0.008067s, equivalent to 82.3/58.4 new/old ops (0.0005336s for 4 clones -- as 5.4/3.9 ops, or 1.4/1.0 ops per clone), SPEEDUP of 1.409x -Applying multi-merge (28 -> 4 leaves, 5 steps, 18 -> 18 th.sp.) yielded SPEEDUP of 1.409x (0.000138s -> 9.799e-05s), will amortize in 201.4 ops by saving 4.005e-05s per op. -In 1 tuning rounds (tot. 0.009s, 0.00053s for constructor, 4 clones) obtained a SPEEDUP of 40.9% (1.409x) (from 1811 to 2551 Mflops). -After 0.008978s, autotuning routine declared speedup of 1.40876 x, when using threads count of 18. -(500 x 500)[0x582420d0]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 4, symflags:'' -After threads auto-tuning, 100 multiplications took 0.010434s -- further speedup of 1.4563 x +A total of 2 merge steps (of max 6) (49 -> 25 subms) took 0.188s (of which 0.01731s partitioning, 0s I/O); computing times: 0.003564s in par. loops, 0.0007718s sorting, 2.384e-05s analyzing) +Total merge + benchmarking process took 0.188s, equivalent to 15.0/11.8 new/old ops (0.09598s for 2 clones -- as 7.7/6.0 ops, or 3.8/3.0 ops per clone), SPEEDUP of 1.279x +Applying multi-merge (49 -> 34 leaves, 1 steps, 10 -> 10 th.sp.) yielded SPEEDUP of 1.279x (0.01598s -> 0.01249s), will amortize in 53.9 ops by saving 0.00349s per op. +In 1 tuning rounds (tot. 0.28s, 0.096s for constructor, 2 clones) obtained a SPEEDUP of 27.9% (1.279x) (from 15.64 to 20.01 Mflops). +After 0.284690s, autotuning routine declared speedup of 1.27936 x, when using threads count of 10. +(500 x 500)[0x21d4ff0]{D} @ (0(0..0),0(0..0)) (62500 nnz, 1.2e+02 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 34, symflags:'' +After threads auto-tuning, 100 multiplications took 1.599271s -- further speedup of 1.00795 x librsb timer-based profiling is not supported in this build. If you wish to have it, re-configure librsb with its support. So you can safely ignore the error you might just have seen printed out on screen. /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/backsolve Hello, RSB! @@ -8452,19 +8456,19 @@ Correctly initialized the library. Building a matrix with 7 nnz, 6 x 6 Duplicates check: 1 - 0 = 1 - converted COO to RSB in 1.090e-03 s (100.00 %) - analyzed arrays in 1.059e-03 s (97.18 %) - cleaned-up arrays in 3.099e-06 s (0.28 %) - deduplicated arrays in 0.000e+00 s (0.00 %) - sorted arrays in 9.537e-07 s (0.09 %) - shuffled partitions in 1.001e-05 s (0.92 %) - memory allocations took 1.073e-05 s (0.98 %) - leafs setup took 2.146e-06 s (0.20 %) - halfword conversion took 2.861e-06 s (0.26 %) -Built (6 x 6)[0x578f4d00]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' + converted COO to RSB in 6.150e-02 s (100.00 %) + analyzed arrays in 4.545e-02 s (73.90 %) + cleaned-up arrays in 7.153e-06 s (0.01 %) + deduplicated arrays in 9.537e-07 s (0.00 %) + sorted arrays in 1.907e-06 s (0.00 %) + shuffled partitions in 1.599e-02 s (26.00 %) + memory allocations took 3.099e-05 s (0.05 %) + leafs setup took 6.914e-06 s (0.01 %) + halfword conversion took 9.060e-06 s (0.01 %) +Built (6 x 6)[0x9adcf0]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' Correctly allocated a matrix with 7 nonzeroes. Summary information of the matrix: -(6 x 6)[0x578f4d00]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' +(6 x 6)[0x9adcf0]{D} @ (0(0..1),0(5..6)) (1 nnz, 0.17 nnz/r) flags 0x20443ee (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 1, symflags:'UT' Matrix printout: %%MatrixMarket matrix coordinate real general 6 6 1 @@ -8490,20 +8494,55 @@ 1 1 Will autotune matrix: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz. -Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:5.109e-08 -3 iterations (18 th.) took 3.004e-05s; avg 1.001e-05s ( +/- 90.48/178.57 %); best 9.537e-07s; worst 2.789e-05s; std dev. 1.264e-05 (taking best). -Reference operation time is 9.53674e-07 s (2.097 Mflops) with 18 threads. +Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:7.017e-07 +3 iterations (10 th.) took 3.886e-05s; avg 1.295e-05s ( +/- 92.64/177.91 %); best 9.537e-07s; worst 3.6e-05s; std dev. 1.63e-05 (taking best). +Reference operation time is 9.53674e-07 s (2.097 Mflops) with 10 threads. Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz (tpop: 9.537e-07 Mflops: 2.097) Merge (1 -> 1 leaves) took w.c.t. of 9.537e-07s, ~0s of computing time (of which 0s sorting, 0s analysis) -3 iterations (18 th.) took 1.192e-06s; avg 3.974e-07s ( +/- 87.14/200.00 %); best 5.109e-08s; worst 1.192e-06s; std dev. 5.62e-07 (taking best). -Reference operation time is 5.10931e-08 s (39.14 Mflops) with 18 threads. -After merge step 1: tpop: 5.109e-08 s ~Mflops: 39.144 nsubm:1 otn:18 -Applying merge (1 -> 1 leaves, 18 th.) yielded SPEEDUP of 18.665x: 9.537e-07s -> 5.109e-08s, so taking this instance. +3 iterations (10 th.) took 4.053e-06s; avg 1.351e-06s ( +/- 29.41/ 58.82 %); best 9.537e-07s; worst 2.146e-06s; std dev. 5.62e-07 (taking best). +Reference operation time is 9.53674e-07 s (2.097 Mflops) with 10 threads. +After merge step 1: tpop: 9.537e-07 s ~Mflops: 2.097 nsubm:1 otn:10 +Applying merge (1 -> 1 leaves, 10 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00000x): 9.537e-07s -> 9.537e-07s, so IGNORING this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 1 merge steps (of max 6) (1 -> 1 subms) took 3.886e-05s (of which 2.861e-06s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 0s analyzing) -Total merge + benchmarking process took 3.886e-05s, equivalent to 760.6/40.8 new/old ops (4.506e-05s for 2 clones -- as 881.9/47.2 ops, or 441.0/23.6 ops per clone), SPEEDUP of 18.665x -Applying multi-merge (1 -> 1 leaves, 1 steps, 0 -> 18 th.sp.) yielded SPEEDUP of 18.665x (9.537e-07s -> 5.109e-08s), will amortize in 43.1 ops by saving 9.026e-07s per op. -In 1 tuning rounds (tot. 0.00013s, 4.5e-05s for constructor, 2 clones) obtained a SPEEDUP of 1766.5% (18.67x) (from 2.097 to 39.14 Mflops). +A total of 1 merge steps (of max 6) (1 -> 1 subms) took 3.219e-05s (of which 7.868e-06s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 0s analyzing) +Total merge + benchmarking process took 3.219e-05s, equivalent to 33.8/33.8 new/old ops (0.04781s for 1 clones -- as 50134.5/50134.5 ops, or 50134.5/50134.5 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... +3 iterations (10 th.) took 1.097e-05s; avg 3.656e-06s ( +/- 73.91/121.74 %); best 9.537e-07s; worst 8.106e-06s; std dev. 3.171e-06 (taking best). +Reference operation time is 9.53674e-07 s (2.097 Mflops) with 10 threads. +Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 1 nnz, 0.17 nnz/r, 1 subms, 1 lsubms, 4.0000 bpnz (tpop: 9.537e-07 Mflops: 2.097) +Split (1 -> 1 leaves, 1 -> 1 subms) took 2.98e-05s (of which: 5.007e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (10 th.) took 4.053e-06s; avg 1.351e-06s ( +/- 29.41/ 58.82 %); best 9.537e-07s; worst 2.146e-06s; std dev. 5.62e-07 (taking best). +Reference operation time is 9.53674e-07 s (2.097 Mflops) with 10 threads. +After split step 1: tpop: 9.537e-07 s ~Mflops: 2.097 nsubm:1 otn:10 +Applying split (1 -> 1 leaves, 10 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00000x): 9.537e-07s -> 9.537e-07s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 8.106e-06s (of which: 1.192e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (10 th.) took 4.053e-06s; avg 1.351e-06s ( +/- 29.41/ 58.82 %); best 9.537e-07s; worst 2.146e-06s; std dev. 5.62e-07 (taking best). +Reference operation time is 9.53674e-07 s (2.097 Mflops) with 10 threads. +After split step 2: tpop: 9.537e-07 s ~Mflops: 2.097 nsubm:1 otn:10 +Applying split (1 -> 1 leaves, 10 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00000x): 9.537e-07s -> 9.537e-07s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 1.812e-05s (of which: 1.192e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (10 th.) took 5.96e-06s; avg 1.987e-06s ( +/- 52.00/104.00 %); best 9.537e-07s; worst 4.053e-06s; std dev. 1.461e-06 (taking best). +Reference operation time is 9.53674e-07 s (2.097 Mflops) with 10 threads. +After split step 3: tpop: 9.537e-07 s ~Mflops: 2.097 nsubm:1 otn:10 +Applying split (1 -> 1 leaves, 10 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00000x): 9.537e-07s -> 9.537e-07s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 8.106e-06s (of which: 9.537e-07s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (10 th.) took 4.053e-06s; avg 1.351e-06s ( +/- 29.41/ 41.18 %); best 9.537e-07s; worst 1.907e-06s; std dev. 4.052e-07 (taking best). +Reference operation time is 9.53674e-07 s (2.097 Mflops) with 10 threads. +After split step 4: tpop: 9.537e-07 s ~Mflops: 2.097 nsubm:1 otn:10 +Applying split (1 -> 1 leaves, 10 th.) yielded NEGLIGIBLE change (4th in a row) (old/new=1.00000x): 9.537e-07s -> 9.537e-07s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 1.192e-05s (of which: 9.537e-07s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (10 th.) took 4.053e-06s; avg 1.351e-06s ( +/- 29.41/ 41.18 %); best 9.537e-07s; worst 1.907e-06s; std dev. 4.052e-07 (taking best). +Reference operation time is 9.53674e-07 s (2.097 Mflops) with 10 threads. +After split step 5: tpop: 9.537e-07 s ~Mflops: 2.097 nsubm:1 otn:10 +Applying split (1 -> 1 leaves, 10 th.) yielded NEGLIGIBLE change (5th in a row) (old/new=1.00000x): 9.537e-07s -> 9.537e-07s, so IGNORING this instance. +Split (1 -> 1 leaves, 1 -> 1 subms) took 8.106e-06s (of which: 1.192e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (10 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 42.86/ 28.57 %); best 9.537e-07s; worst 2.146e-06s; std dev. 5.15e-07 (taking best). +Reference operation time is 9.53674e-07 s (2.097 Mflops) with 10 threads. +After split step 6: tpop: 9.537e-07 s ~Mflops: 2.097 nsubm:1 otn:10 +Applying split (1 -> 1 leaves, 10 th.) yielded NEGLIGIBLE change (6th in a row) (old/new=1.00000x): 9.537e-07s -> 9.537e-07s, so IGNORING this instance. +A total of 6 split steps (of max 6) (1 -> 1 subms) took 0.0004928s (of which 0.0001409s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 1.049e-05s analyzing) +Total split + benchmarking process took 0.0004928s, equivalent to 516.8/516.8 new/old ops (0.04787s for 1 clones -- as 50197.5/50197.5 ops, or 50197.5/50197.5 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +In 1 tuning rounds (tot. 0.096s, 0.096s for constructor, 2 clones) obtained NO speedup (best stays 2.097 Mflops). Backsolving we should get a unitary vector: %%MatrixMarket matrix array real general @@ -8538,146 +8577,220 @@ /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/fortran Building a matrix with 210 nnz, 20 x 20 Duplicates check: 210 - 0 = 210 - converted COO to RSB in 1.038e-03 s (100.00 %) - analyzed arrays in 5.102e-05 s (4.92 %) - cleaned-up arrays in 9.537e-07 s (0.09 %) - deduplicated arrays in 2.146e-06 s (0.21 %) - sorted arrays in 9.000e-04 s (86.70 %) - shuffled partitions in 2.980e-05 s (2.87 %) - memory allocations took 2.885e-05 s (2.78 %) - leafs setup took 3.099e-06 s (0.30 %) - halfword conversion took 1.502e-05 s (1.45 %) -Built (20 x 20)[0x57665c10]{D} @ (0(0..0),0(0..0)) (210 nnz, 10 nnz/r) flags 0x2446396 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'LS' -Will autotune matrix: 20 x 20, type D, 210 nnz, 10 nnz/r, 30 subms, 22 lsubms, 3.7524 bpnz. -Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:5.111e-08 -Starting autotuning (16 x 5.1105e-08 s stages, transA=N, nrhs=1, timer gran.=5.1105e-08), 18 suggested as starting thread count(default). -3 iterations (18 th.) took 0.000526s; avg 0.0001753s ( +/- 22.35/ 28.24 %); best 0.0001361s; worst 0.0002248s; std dev. 3.694e-05 (taking best). -Reference operation time is 0.000136137 s (6.17 Mflops) with 18 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 30 subms, 22 lsubms, 3.7524 bpnz (tpop: 0.0001361 Mflops: 6.170) -Merge (22 -> 16 leaves) took w.c.t. of 0.0003369s, ~0.0006359s of computing time (of which 1.907e-06s sorting, 3.099e-06s analysis) -3 iterations (18 th.) took 0.001289s; avg 0.0004296s ( +/- 68.59/132.52 %); best 0.0001349s; worst 0.000999s; std dev. 0.0004027 (taking best). -Reference operation time is 0.000134945 s (6.225 Mflops) with 18 threads. -After merge step 1: tpop: 0.0001349 s ~Mflops: 6.225 nsubm:16 otn:18 -Applying merge (22 -> 16 leaves, 18 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00883x): 0.0001361s -> 0.0001349s, so IGNORING this instance. -Merge (16 -> 10 leaves) took w.c.t. of 2.789e-05s, ~1.001e-05s of computing time (of which 1.907e-06s sorting, 4.053e-06s analysis) -3 iterations (18 th.) took 0.001577s; avg 0.0005257s ( +/- 81.54/155.46 %); best 9.704e-05s; worst 0.001343s; std dev. 0.0005781 (taking best). -Reference operation time is 9.70364e-05 s (8.657 Mflops) with 18 threads. -After merge step 2: tpop: 9.704e-05 s ~Mflops: 8.657 nsubm:10 otn:18 -Applying merge (16 -> 10 leaves, 18 th.) yielded SPEEDUP of 1.403x: 0.0001361s -> 9.704e-05s, so taking this instance. -Merge (10 -> 8 leaves) took w.c.t. of 1.597e-05s, ~4.053e-06s of computing time (of which 1.192e-06s sorting, 1.907e-06s analysis) -3 iterations (18 th.) took 0.0003641s; avg 0.0001214s ( +/- 44.01/ 45.78 %); best 6.795e-05s; worst 0.0001769s; std dev. 4.451e-05 (taking best). -Reference operation time is 6.79493e-05 s (12.36 Mflops) with 18 threads. -After merge step 3: tpop: 6.795e-05 s ~Mflops: 12.362 nsubm:8 otn:18 -Applying merge (10 -> 8 leaves, 18 th.) yielded SPEEDUP of 1.428x: 9.704e-05s -> 6.795e-05s, so taking this instance. -Merge (8 -> 6 leaves) took w.c.t. of 7.868e-06s, ~3.099e-06s of computing time (of which 1.192e-06s sorting, 9.537e-07s analysis) -3 iterations (18 th.) took 0.0002351s; avg 7.836e-05s ( +/- 66.84/ 96.55 %); best 2.599e-05s; worst 0.000154s; std dev. 5.48e-05 (taking best). -Reference operation time is 2.59876e-05 s (32.32 Mflops) with 18 threads. -After merge step 4: tpop: 2.599e-05 s ~Mflops: 32.323 nsubm:6 otn:18 -Applying merge (8 -> 6 leaves, 18 th.) yielded SPEEDUP of 2.615x: 6.795e-05s -> 2.599e-05s, so taking this instance. -Merge (6 -> 3 leaves) took w.c.t. of 8.821e-06s, ~3.099e-06s of computing time (of which 2.146e-06s sorting, 9.537e-07s analysis) -3 iterations (18 th.) took 0.000181s; avg 6.032e-05s ( +/- 35.57/ 19.37 %); best 3.886e-05s; worst 7.2e-05s; std dev. 1.519e-05 (taking best). -Reference operation time is 3.88622e-05 s (21.61 Mflops) with 18 threads. -After merge step 5: tpop: 3.886e-05 s ~Mflops: 21.615 nsubm:3 otn:18 -Applying merge (6 -> 3 leaves, 18 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.495x: 2.599e-05s -> 3.886e-05s. + converted COO to RSB in 1.267e-01 s (100.00 %) + analyzed arrays in 3.994e-02 s (31.52 %) + cleaned-up arrays in 3.099e-06 s (0.00 %) + deduplicated arrays in 4.053e-06 s (0.00 %) + sorted arrays in 2.271e-02 s (17.92 %) + shuffled partitions in 3.200e-02 s (25.25 %) + memory allocations took 5.078e-05 s (0.04 %) + leafs setup took 8.106e-06 s (0.01 %) + halfword conversion took 3.199e-02 s (25.24 %) +Built (20 x 20)[0x18a9c00]{D} @ (0(0..0),0(0..0)) (210 nnz, 10 nnz/r) flags 0x2446396 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 23, symflags:'LS' +Will autotune matrix: 20 x 20, type D, 210 nnz, 10 nnz/r, 32 subms, 23 lsubms, 3.7524 bpnz. +Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:6.444e-07 +Starting autotuning (16 x 6.4435e-07 s stages, transA=N, nrhs=1, timer gran.=6.4435e-07), 10 suggested as starting thread count(default). +3 iterations (10 th.) took 0.04799s; avg 0.016s ( +/- 0.03/ 0.02 %); best 0.01599s; worst 0.016s; std dev. 3.311e-06 (taking best). +Reference operation time is 0.0159938 s (0.05252 Mflops) with 10 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 32 subms, 23 lsubms, 3.7524 bpnz (tpop: 0.01599 Mflops: 0.053) +Merge (23 -> 19 leaves) took w.c.t. of 0.0005848s, ~0.0005541s of computing time (of which 2.003e-05s sorting, 7.868e-06s analysis) +3 iterations (10 th.) took 0.05938s; avg 0.01979s ( +/- 19.16/ 38.32 %); best 0.016s; worst 0.02738s; std dev. 0.005363 (taking best). +Reference operation time is 0.016 s (0.0525 Mflops) with 10 threads. +After merge step 1: tpop: 0.016 s ~Mflops: 0.052 nsubm:19 otn:10 +Applying merge (23 -> 19 leaves, 10 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=0.99961x): 0.01599s -> 0.016s, so IGNORING this instance. +Merge (19 -> 13 leaves) took w.c.t. of 0.007975s, ~2.789e-05s of computing time (of which 7.153e-06s sorting, 1.001e-05s analysis) +3 iterations (10 th.) took 0.05995s; avg 0.01998s ( +/- 19.99/ 39.92 %); best 0.01599s; worst 0.02796s; std dev. 0.005642 (taking best). +Reference operation time is 0.01599 s (0.05253 Mflops) with 10 threads. +After merge step 2: tpop: 0.01599 s ~Mflops: 0.053 nsubm:13 otn:10 +Applying merge (19 -> 13 leaves, 10 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00024x): 0.01599s -> 0.01599s, so IGNORING this instance. +Merge (13 -> 10 leaves) took w.c.t. of 2.098e-05s, ~8.821e-06s of computing time (of which 3.099e-06s sorting, 4.053e-06s analysis) +3 iterations (10 th.) took 0.04795s; avg 0.01598s ( +/- 0.23/ 0.12 %); best 0.01594s; worst 0.016s; std dev. 2.621e-05 (taking best). +Reference operation time is 0.015945 s (0.05268 Mflops) with 10 threads. +After merge step 3: tpop: 0.01594 s ~Mflops: 0.053 nsubm:10 otn:10 +Applying merge (13 -> 10 leaves, 10 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00307x): 0.01599s -> 0.01594s, so IGNORING this instance. +Merge (10 -> 8 leaves) took w.c.t. of 1.812e-05s, ~8.106e-06s of computing time (of which 1.907e-06s sorting, 3.099e-06s analysis) +3 iterations (10 th.) took 0.04795s; avg 0.01598s ( +/- 0.19/ 0.12 %); best 0.01595s; worst 0.016s; std dev. 2.156e-05 (taking best). +Reference operation time is 0.015954 s (0.05265 Mflops) with 10 threads. +After merge step 4: tpop: 0.01595 s ~Mflops: 0.053 nsubm:8 otn:10 +Applying merge (10 -> 8 leaves, 10 th.) yielded NEGLIGIBLE change (4th in a row) (old/new=1.00250x): 0.01599s -> 0.01595s, so IGNORING this instance. +Merge (8 -> 6 leaves) took w.c.t. of 1.907e-05s, ~5.96e-06s of computing time (of which 2.146e-06s sorting, 5.96e-06s analysis) +3 iterations (10 th.) took 0.04795s; avg 0.01598s ( +/- 0.21/ 0.10 %); best 0.01595s; worst 0.016s; std dev. 2.36e-05 (taking best). +Reference operation time is 0.01595 s (0.05266 Mflops) with 10 threads. +After merge step 5: tpop: 0.01595 s ~Mflops: 0.053 nsubm:6 otn:10 +Applying merge (8 -> 6 leaves, 10 th.) yielded NEGLIGIBLE change (5th in a row) (old/new=1.00275x): 0.01599s -> 0.01595s, so IGNORING this instance. +Merge (6 -> 3 leaves) took w.c.t. of 3.219e-05s, ~1.383e-05s of computing time (of which 4.053e-06s sorting, 1.907e-06s analysis) +3 iterations (10 th.) took 0.04792s; avg 0.01597s ( +/- 0.28/ 0.19 %); best 0.01593s; worst 0.01601s; std dev. 3.26e-05 (taking best). +Reference operation time is 0.0159299 s (0.05273 Mflops) with 10 threads. +After merge step 6: tpop: 0.01593 s ~Mflops: 0.053 nsubm:3 otn:10 +Applying merge (6 -> 3 leaves, 10 th.) yielded NEGLIGIBLE change (6th in a row) (old/new=1.00401x): 0.01599s -> 0.01593s, so IGNORING this instance. +A total of 6 merge steps (of max 6) (23 -> 3 subms) took 0.32s (of which 0.008698s partitioning, 0s I/O); computing times: 0.0006187s in par. loops, 3.839e-05s sorting, 3.29e-05s analyzing) +Total merge + benchmarking process took 0.32s, equivalent to 20.0/20.0 new/old ops (0.04789s for 1 clones -- as 3.0/3.0 ops, or 3.0/3.0 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... +3 iterations (10 th.) took 0.04801s; avg 0.016s ( +/- 0.08/ 0.06 %); best 0.01599s; worst 0.01601s; std dev. 9.585e-06 (taking best). +Reference operation time is 0.01599 s (0.05253 Mflops) with 10 threads. +Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 32 subms, 23 lsubms, 3.7524 bpnz (tpop: 0.01599 Mflops: 0.053) +Split (23 -> 53 leaves, 32 -> 73 subms) took 0.02394s (of which: 6.914e-06s analysis, -1.877e+10s mem.mgmt); compute time: 0.001684s overall, 2.217e-05s searches, 0.001662s shuffle, 0.0008135s switch, 3.481e-05s quadrants. +3 iterations (10 th.) took 0.04796s; avg 0.01599s ( +/- 0.16/ 0.10 %); best 0.01596s; worst 0.016s; std dev. 1.843e-05 (taking best). +Reference operation time is 0.0159612 s (0.05263 Mflops) with 10 threads. +After split step 1: tpop: 0.01596 s ~Mflops: 0.053 nsubm:53 otn:10 +Applying split (23 -> 53 leaves, 10 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00181x): 0.01599s -> 0.01596s, so IGNORING this instance. +Split (53 -> 128 leaves, 73 -> 174 subms) took 0.02391s (of which: 1.311e-05s analysis, -4.436e+10s mem.mgmt); compute time: 0.001371s overall, 3.743e-05s searches, 0.001334s shuffle, 0.000628s switch, 0.0001087s quadrants. +3 iterations (10 th.) took 0.04796s; avg 0.01599s ( +/- 0.01/ 0.02 %); best 0.01599s; worst 0.01599s; std dev. 2.36e-06 (taking best). +Reference operation time is 0.015986 s (0.05255 Mflops) with 10 threads. +After split step 2: tpop: 0.01599 s ~Mflops: 0.053 nsubm:128 otn:10 +Applying split (53 -> 128 leaves, 10 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00025x): 0.01599s -> 0.01599s, so IGNORING this instance. +Split (128 -> 146 leaves, 174 -> 198 subms) took 0.02388s (of which: 2.313e-05s analysis, -1.024e+10s mem.mgmt); compute time: 0.0001652s overall, 1.097e-05s searches, 0.0001543s shuffle, 3.457e-05s switch, 3.886e-05s quadrants. +3 iterations (10 th.) took 0.04797s; avg 0.01599s ( +/- 0.01/ 0.01 %); best 0.01599s; worst 0.01599s; std dev. 1.107e-06 (taking best). +Reference operation time is 0.0159872 s (0.05254 Mflops) with 10 threads. +After split step 3: tpop: 0.01599 s ~Mflops: 0.053 nsubm:146 otn:10 +Applying split (128 -> 146 leaves, 10 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00018x): 0.01599s -> 0.01599s, so IGNORING this instance. +Split (146 -> 146 leaves, 198 -> 198 subms) took 0.02395s (of which: 2.694e-05s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (10 th.) took 0.05599s; avg 0.01866s ( +/- 35.50/ 50.05 %); best 0.01204s; worst 0.02801s; std dev. 0.006795 (taking best). +Reference operation time is 0.012038 s (0.06978 Mflops) with 10 threads. +After split step 4: tpop: 0.01204 s ~Mflops: 0.070 nsubm:146 otn:10 +Applying split (146 -> 146 leaves, 10 th.) yielded SPEEDUP of 1.328x: 0.01599s -> 0.01204s, so taking this instance. +Split (146 -> 146 leaves, 198 -> 198 subms) took 0.02375s (of which: 2.503e-05s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (10 th.) took 0.04949s; avg 0.0165s ( +/- 3.41/ 6.35 %); best 0.01593s; worst 0.01754s; std dev. 0.0007415 (taking best). +Reference operation time is 0.0159349 s (0.05271 Mflops) with 10 threads. +After split step 5: tpop: 0.01593 s ~Mflops: 0.053 nsubm:146 otn:10 +Applying split (146 -> 146 leaves, 10 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.324x: 0.01204s -> 0.01593s. +Skipping further split based tests after 1 definite performance degradations in a row (and last exceeding limit). +A total of 5 split steps (of max 6) (23 -> 146 subms) took 0.4176s (of which 0.1201s partitioning, 0s I/O); computing times: 0.003221s in par. loops, 7.057e-05s sorting, 9.513e-05s analyzing) +Total split + benchmarking process took 0.4176s, equivalent to 34.7/26.1 new/old ops (0.09595s for 2 clones -- as 8.0/6.0 ops, or 4.0/3.0 ops per clone), SPEEDUP of 1.328x +Applying multi-split (23 -> 146 leaves, 4 steps, 0 -> 10 th.sp.) yielded SPEEDUP of 1.328x (0.01599s -> 0.01204s), will amortize in 105.7 ops by saving 0.003952s per op. +In 1 tuning rounds (tot. 0.93s, 0.14s for constructor, 3 clones) obtained a SPEEDUP of 32.8% (1.328x) (from 0.05253 to 0.06978 Mflops). + autotuner chose 10 threads +Will autotune matrix: 20 x 20, type D, 210 nnz, 10 nnz/r, 198 subms, 146 lsubms, 4.2286 bpnz. +Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:6.444e-07 +Starting autotuning (16 x 6.4435e-07 s stages, transA=N, nrhs=1, timer gran.=6.4435e-07), 10 suggested as starting thread count(default). +3 iterations (10 th.) took 0.04801s; avg 0.016s ( +/- 0.17/ 0.11 %); best 0.01598s; worst 0.01602s; std dev. 1.932e-05 (taking best). +~ 10 threads: 0.01598s (0.053 Mflops) (0/2 degradations so far) - +3 iterations (9 th.) took 0.03997s; avg 0.01332s ( +/- 10.14/ 20.18 %); best 0.01197s; worst 0.01601s; std dev. 0.001901 (taking best). + 9 threads: 0.01197s (0.07 Mflops) (0/2 degradations so far) - +3 iterations (8 th.) took 0.04798s; avg 0.01599s ( +/- 16.68/ 16.78 %); best 0.01332s; worst 0.01868s; std dev. 0.002184 (taking best). + 8 threads: 0.01332s (0.063 Mflops) (1/2 degradations so far) - +3 iterations (7 th.) took 0.04796s; avg 0.01599s ( +/- 0.13/ 0.24 %); best 0.01597s; worst 0.01603s; std dev. 2.697e-05 (taking best). + 7 threads: 0.01597s (0.053 Mflops) (2/2 degradations so far) - +Best threads choice is 9; starting threads were 10; max speed gap is 1.3x; search took 0.18s. +Starting merge (and threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 198 subms, 146 lsubms, 4.2286 bpnz (tpop: 0.01197 Mflops: 0.070) +Merge (146 -> 99 leaves) took w.c.t. of 0.01597s, ~0.0001938s of computing time (of which 3.6e-05s sorting, 1.907e-05s analysis) +3 iterations (10 th.) took 0.04796s; avg 0.01599s ( +/- 0.18/ 0.13 %); best 0.01596s; worst 0.01601s; std dev. 2.1e-05 (taking best). +~ 10 threads: 0.01596s (0.053 Mflops) (0/2 degradations so far) - +3 iterations (9 th.) took 0.04796s; avg 0.01599s ( +/- 2.93/ 3.05 %); best 0.01552s; worst 0.01647s; std dev. 0.0003909 (taking best). + 9 threads: 0.01552s (0.054 Mflops) (0/2 degradations so far) - +3 iterations (8 th.) took 0.05198s; avg 0.01733s ( +/- 7.61/ 9.13 %); best 0.01601s; worst 0.01891s; std dev. 0.001198 (taking best). + 8 threads: 0.01601s (0.052 Mflops) (1/2 degradations so far) - +3 iterations (7 th.) took 0.04796s; avg 0.01599s ( +/- 49.84/ 25.08 %); best 0.008018s; worst 0.01999s; std dev. 0.005634 (taking best). + 7 threads: 0.008018s (0.1 Mflops) (0/2 degradations so far) - +3 iterations (6 th.) took 0.08013s; avg 0.02671s ( +/- 40.00/ 79.40 %); best 0.01603s; worst 0.04792s; std dev. 0.015 (taking best). + 6 threads: 0.01603s (0.052 Mflops) (1/2 degradations so far) - +3 iterations (5 th.) took 0.05179s; avg 0.01726s ( +/- 30.57/ 37.82 %); best 0.01199s; worst 0.02379s; std dev. 0.004901 (taking best). + 5 threads: 0.01199s (0.07 Mflops) (2/2 degradations so far) - +Best threads choice is 7; starting threads were 10; max speed gap is 2x; search took 0.33s. +After merge step 1: tpop: 0.008018 s ~Mflops: 0.105 nsubm:99 otn:7 +Applying merge (146 -> 99 leaves, 7 th.) yielded SPEEDUP of 1.493x: 0.01197s -> 0.008018s, so taking this instance. +Merge (99 -> 68 leaves) took w.c.t. of 0.01573s, ~0.0001314s of computing time (of which 2.36e-05s sorting, 2.003e-05s analysis) +3 iterations (10 th.) took 0.05996s; avg 0.01999s ( +/- 19.94/ 39.86 %); best 0.016s; worst 0.02795s; std dev. 0.005633 (taking best). +~ 10 threads: 0.016s (0.053 Mflops) (0/2 degradations so far) - +3 iterations (9 th.) took 0.04799s; avg 0.016s ( +/- 0.11/ 0.07 %); best 0.01598s; worst 0.01601s; std dev. 1.277e-05 (taking best). + 9 threads: 0.01598s (0.053 Mflops) (0/2 degradations so far) - +3 iterations (8 th.) took 0.04797s; avg 0.01599s ( +/- 5.07/ 5.03 %); best 0.01518s; worst 0.0168s; std dev. 0.0006594 (taking best). + 8 threads: 0.01518s (0.055 Mflops) (0/2 degradations so far) - +3 iterations (7 th.) took 0.05595s; avg 0.01865s ( +/- 35.67/ 49.96 %); best 0.012s; worst 0.02797s; std dev. 0.006787 (taking best). + 7 threads: 0.012s (0.07 Mflops) (0/2 degradations so far) - +3 iterations (6 th.) took 0.04796s; avg 0.01599s ( +/- 25.02/ 25.10 %); best 0.01199s; worst 0.02s; std dev. 0.003271 (taking best). + 6 threads: 0.01199s (0.07 Mflops) (0/2 degradations so far) - +3 iterations (5 th.) took 0.05194s; avg 0.01731s ( +/- 7.98/ 15.56 %); best 0.01593s; worst 0.02001s; std dev. 0.001905 (taking best). + 5 threads: 0.01593s (0.053 Mflops) (1/2 degradations so far) - +3 iterations (4 th.) took 0.03599s; avg 0.012s ( +/- 33.12/ 33.35 %); best 0.008023s; worst 0.016s; std dev. 0.003256 (taking best). + 4 threads: 0.008023s (0.1 Mflops) (0/2 degradations so far) - +3 iterations (3 th.) took 0.04796s; avg 0.01599s ( +/- 24.89/ 24.79 %); best 0.01201s; worst 0.01995s; std dev. 0.003243 (taking best). + 3 threads: 0.01201s (0.07 Mflops) (1/2 degradations so far) - +3 iterations (2 th.) took 0.05195s; avg 0.01732s ( +/- 7.79/ 15.38 %); best 0.01597s; worst 0.01998s; std dev. 0.001884 (taking best). + 2 threads: 0.01597s (0.053 Mflops) (2/2 degradations so far) - +Best threads choice is 4; starting threads were 10; max speed gap is 2x; search took 0.45s. +After merge step 2: tpop: 0.008023 s ~Mflops: 0.105 nsubm:68 otn:4 +Applying merge (99 -> 68 leaves, 4 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=0.99938x): 0.008018s -> 0.008023s, so IGNORING this instance. +Merge (68 -> 51 leaves) took w.c.t. of 0.008002s, ~9.584e-05s of computing time (of which 2.527e-05s sorting, 1.693e-05s analysis) +3 iterations (10 th.) took 0.06393s; avg 0.02131s ( +/- 62.45/ 33.34 %); best 0.008002s; worst 0.02841s; std dev. 0.009417 (taking best). +~ 10 threads: 0.008002s (0.1 Mflops) (0/2 degradations so far) - +3 iterations (9 th.) took 0.05597s; avg 0.01866s ( +/- 14.23/ 7.15 %); best 0.016s; worst 0.01999s; std dev. 0.001877 (taking best). + 9 threads: 0.016s (0.052 Mflops) (1/2 degradations so far) - +3 iterations (8 th.) took 0.04397s; avg 0.01466s ( +/- 45.64/ 36.49 %); best 0.007967s; worst 0.02s; std dev. 0.005005 (taking best). + 8 threads: 0.007967s (0.11 Mflops) (0/2 degradations so far) - +3 iterations (7 th.) took 0.04798s; avg 0.01599s ( +/- 25.02/ 25.06 %); best 0.01199s; worst 0.02s; std dev. 0.00327 (taking best). + 7 threads: 0.01199s (0.07 Mflops) (1/2 degradations so far) - +3 iterations (6 th.) took 0.04398s; avg 0.01466s ( +/- 17.65/ 8.98 %); best 0.01207s; worst 0.01597s; std dev. 0.001829 (taking best). + 6 threads: 0.01207s (0.07 Mflops) (2/2 degradations so far) - +Best threads choice is 8; starting threads were 10; max speed gap is 2x; search took 0.26s. +After merge step 3: tpop: 0.007967 s ~Mflops: 0.105 nsubm:51 otn:8 +Applying merge (68 -> 51 leaves, 8 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00640x): 0.008018s -> 0.007967s, so IGNORING this instance. +Merge (51 -> 36 leaves) took w.c.t. of 0.01999s, ~9.322e-05s of computing time (of which 1.693e-05s sorting, 1.502e-05s analysis) +3 iterations (10 th.) took 0.05195s; avg 0.01732s ( +/- 30.43/ 38.32 %); best 0.01205s; worst 0.02395s; std dev. 0.004956 (taking best). +~ 10 threads: 0.01205s (0.07 Mflops) (0/2 degradations so far) - +3 iterations (9 th.) took 0.05197s; avg 0.01732s ( +/- 7.65/ 15.18 %); best 0.016s; worst 0.01995s; std dev. 0.00186 (taking best). + 9 threads: 0.016s (0.053 Mflops) (1/2 degradations so far) - +3 iterations (8 th.) took 0.04799s; avg 0.016s ( +/- 24.77/ 24.73 %); best 0.01203s; worst 0.01995s; std dev. 0.003232 (taking best). + 8 threads: 0.01203s (0.07 Mflops) (0/2 degradations so far) - +3 iterations (7 th.) took 0.05595s; avg 0.01865s ( +/- 14.26/ 7.28 %); best 0.01599s; worst 0.02001s; std dev. 0.001881 (taking best). + 7 threads: 0.01599s (0.053 Mflops) (1/2 degradations so far) - +3 iterations (6 th.) took 0.03198s; avg 0.01066s ( +/- 62.29/ 49.82 %); best 0.00402s; worst 0.01597s; std dev. 0.004968 (taking best). + 6 threads: 0.00402s (0.21 Mflops) (0/2 degradations so far) - +3 iterations (5 th.) took 0.03599s; avg 0.012s ( +/- 66.62/ 66.79 %); best 0.004004s; worst 0.02001s; std dev. 0.006534 (taking best). + 5 threads: 0.004004s (0.21 Mflops) (0/2 degradations so far) - +3 iterations (4 th.) took 0.03197s; avg 0.01066s ( +/- 23.91/ 12.20 %); best 0.008109s; worst 0.01196s; std dev. 0.001802 (taking best). + 4 threads: 0.008109s (0.1 Mflops) (1/2 degradations so far) - +3 iterations (3 th.) took 0.03582s; avg 0.01194s ( +/- 32.89/ 32.46 %); best 0.008013s; worst 0.01582s; std dev. 0.003185 (taking best). + 3 threads: 0.008013s (0.1 Mflops) (2/2 degradations so far) - +Best threads choice is 5; starting threads were 10; max speed gap is 4x; search took 0.34s. +After merge step 4: tpop: 0.004004 s ~Mflops: 0.210 nsubm:36 otn:5 +Applying merge (51 -> 36 leaves, 5 th.) yielded SPEEDUP of 2.003x: 0.008018s -> 0.004004s, so taking this instance. +Merge (36 -> 25 leaves) took w.c.t. of 0.012s, ~8.178e-05s of computing time (of which 1.407e-05s sorting, 1.121e-05s analysis) +3 iterations (10 th.) took 0.08398s; avg 0.02799s ( +/- 42.73/ 42.72 %); best 0.01603s; worst 0.03995s; std dev. 0.009766 (taking best). +~ 10 threads: 0.01603s (0.052 Mflops) (0/2 degradations so far) - +3 iterations (9 th.) took 0.07193s; avg 0.02398s ( +/- 33.26/ 33.50 %); best 0.016s; worst 0.03201s; std dev. 0.006535 (taking best). + 9 threads: 0.016s (0.052 Mflops) (0/2 degradations so far) - +3 iterations (8 th.) took 0.05597s; avg 0.01866s ( +/- 14.53/ 7.33 %); best 0.01594s; worst 0.02002s; std dev. 0.001917 (taking best). + 8 threads: 0.01594s (0.053 Mflops) (0/2 degradations so far) - +3 iterations (7 th.) took 0.03995s; avg 0.01332s ( +/- 39.82/ 19.92 %); best 0.008013s; worst 0.01597s; std dev. 0.003749 (taking best). + 7 threads: 0.008013s (0.1 Mflops) (0/2 degradations so far) - +3 iterations (6 th.) took 0.04798s; avg 0.01599s ( +/- 50.02/ 49.98 %); best 0.007994s; worst 0.02399s; std dev. 0.006529 (taking best). + 6 threads: 0.007994s (0.11 Mflops) (0/2 degradations so far) - +3 iterations (5 th.) took 0.048s; avg 0.016s ( +/- 24.87/ 24.93 %); best 0.01202s; worst 0.01999s; std dev. 0.003253 (taking best). + 5 threads: 0.01202s (0.07 Mflops) (1/2 degradations so far) - +3 iterations (4 th.) took 0.05192s; avg 0.01731s ( +/- 7.85/ 15.49 %); best 0.01595s; worst 0.01999s; std dev. 0.001896 (taking best). + 4 threads: 0.01595s (0.053 Mflops) (2/2 degradations so far) - +Best threads choice is 6; starting threads were 10; max speed gap is 2x; search took 0.4s. +After merge step 5: tpop: 0.007994 s ~Mflops: 0.105 nsubm:25 otn:6 +Applying merge (36 -> 25 leaves, 6 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.996x: 0.004004s -> 0.007994s. Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). -A total of 5 merge steps (of max 6) (22 -> 3 subms) took 0.004363s (of which 0.0004177s partitioning, 0s I/O); computing times: 0.0006561s in par. loops, 8.345e-06s sorting, 1.097e-05s analyzing) -Total merge + benchmarking process took 0.004363s, equivalent to 167.9/32.0 new/old ops (0.0001023s for 4 clones -- as 3.9/0.8 ops, or 1.0/0.2 ops per clone), SPEEDUP of 5.239x -Applying multi-merge (22 -> 6 leaves, 4 steps, 0 -> 18 th.sp.) yielded SPEEDUP of 5.239x (0.0001361s -> 2.599e-05s), will amortize in 39.6 ops by saving 0.0001101s per op. -In 1 tuning rounds (tot. 0.005s, 0.0001s for constructor, 4 clones) obtained a SPEEDUP of 423.9% (5.239x) (from 6.17 to 32.32 Mflops). - autotuner chose 18 threads -Will autotune matrix: 20 x 20, type D, 210 nnz, 10 nnz/r, 8 subms, 6 lsubms, 2.8762 bpnz. -Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:5.111e-08 -Starting autotuning (16 x 5.1105e-08 s stages, transA=N, nrhs=1, timer gran.=5.1105e-08), 18 suggested as starting thread count(default). -3 iterations (18 th.) took 0.0003219s; avg 0.0001073s ( +/- 28.22/ 44.44 %); best 7.701e-05s; worst 0.000155s; std dev. 3.412e-05 (taking best). -~ 18 threads: 7.701e-05s (11 Mflops) (0/2 degradations so far) - -3 iterations (17 th.) took 0.0003481s; avg 0.000116s ( +/- 15.55/ 18.15 %); best 9.799e-05s; worst 0.0001371s; std dev. 1.61e-05 (taking best). - 17 threads: 9.799e-05s (8.6 Mflops) (1/2 degradations so far) - -3 iterations (16 th.) took 0.0002501s; avg 8.337e-05s ( +/- 38.80/ 57.29 %); best 5.102e-05s; worst 0.0001311s; std dev. 3.447e-05 (taking best). - 16 threads: 5.102e-05s (16 Mflops) (0/2 degradations so far) - -3 iterations (15 th.) took 0.0001781s; avg 5.937e-05s ( +/- 20.48/ 12.85 %); best 4.721e-05s; worst 6.7e-05s; std dev. 8.691e-06 (taking best). - 15 threads: 4.721e-05s (18 Mflops) (0/2 degradations so far) - -3 iterations (14 th.) took 0.0002458s; avg 8.194e-05s ( +/- 72.07/ 90.30 %); best 2.289e-05s; worst 0.0001559s; std dev. 5.533e-05 (taking best). - 14 threads: 2.289e-05s (37 Mflops) (0/2 degradations so far) - -3 iterations (13 th.) took 0.000104s; avg 3.465e-05s ( +/- 45.64/ 73.39 %); best 1.884e-05s; worst 6.008e-05s; std dev. 1.816e-05 (taking best). - 13 threads: 1.884e-05s (45 Mflops) (0/2 degradations so far) - -3 iterations (12 th.) took 0.000185s; avg 6.167e-05s ( +/- 38.14/ 27.96 %); best 3.815e-05s; worst 7.892e-05s; std dev. 1.723e-05 (taking best). - 12 threads: 3.815e-05s (22 Mflops) (1/2 degradations so far) - -3 iterations (11 th.) took 0.0001831s; avg 6.104e-05s ( +/- 19.53/ 29.30 %); best 4.911e-05s; worst 7.892e-05s; std dev. 1.288e-05 (taking best). - 11 threads: 4.911e-05s (17 Mflops) (2/2 degradations so far) - -Best threads choice is 13; starting threads were 18; max speed gap is 5.2x; search took 0.0019s. -Starting merge (and threads) based auto-tuning procedure (transA=N, nrhs=1, order=cols) (max 6 steps, inclusive 3 grace steps) on: 20 x 20, type D, 210 nnz, 10 nnz/r, 8 subms, 6 lsubms, 2.8762 bpnz (tpop: 1.884e-05 Mflops: 44.598) -Merge (6 -> 3 leaves) took w.c.t. of 1.216e-05s, ~5.007e-06s of computing time (of which 2.146e-06s sorting, 2.146e-06s analysis) -3 iterations (18 th.) took 6.89e-05s; avg 2.297e-05s ( +/- 26.30/ 48.44 %); best 1.693e-05s; worst 3.409e-05s; std dev. 7.877e-06 (taking best). -~ 18 threads: 1.693e-05s (50 Mflops) (0/2 degradations so far) - -3 iterations (17 th.) took 0.000114s; avg 3.799e-05s ( +/- 74.27/100.21 %); best 9.775e-06s; worst 7.606e-05s; std dev. 2.794e-05 (taking best). - 17 threads: 9.775e-06s (86 Mflops) (0/2 degradations so far) - -3 iterations (16 th.) took 4.816e-05s; avg 1.605e-05s ( +/- 43.56/ 75.25 %); best 9.06e-06s; worst 2.813e-05s; std dev. 8.577e-06 (taking best). - 16 threads: 9.06e-06s (93 Mflops) (0/2 degradations so far) - -3 iterations (15 th.) took 0.0001109s; avg 3.695e-05s ( +/- 78.71/143.23 %); best 7.868e-06s; worst 8.988e-05s; std dev. 3.749e-05 (taking best). - 15 threads: 7.868e-06s (1.1e+02 Mflops) (0/2 degradations so far) - -3 iterations (14 th.) took 5.388e-05s; avg 1.796e-05s ( +/- 44.25/ 77.88 %); best 1.001e-05s; worst 3.195e-05s; std dev. 9.921e-06 (taking best). - 14 threads: 1.001e-05s (84 Mflops) (1/2 degradations so far) - -3 iterations (13 th.) took 3.791e-05s; avg 1.264e-05s ( +/- 37.74/ 58.49 %); best 7.868e-06s; worst 2.003e-05s; std dev. 5.299e-06 (taking best). - 13 threads: 7.868e-06s (1.1e+02 Mflops) (0/2 degradations so far) - -3 iterations (12 th.) took 8.798e-05s; avg 2.933e-05s ( +/- 72.36/ 77.24 %); best 8.106e-06s; worst 5.198e-05s; std dev. 1.794e-05 (taking best). - 12 threads: 8.106e-06s (1e+02 Mflops) (1/2 degradations so far) - -3 iterations (11 th.) took 0.000124s; avg 4.133e-05s ( +/- 80.38/129.62 %); best 8.106e-06s; worst 9.489e-05s; std dev. 3.824e-05 (taking best). - 11 threads: 8.106e-06s (1e+02 Mflops) (2/2 degradations so far) - -Best threads choice is 15; starting threads were 18; max speed gap is 2.2x; search took 0.00084s. -After merge step 1: tpop: 7.868e-06 s ~Mflops: 106.764 nsubm:3 otn:15 -Applying merge (6 -> 3 leaves, 15 th.) yielded SPEEDUP of 2.394x: 1.884e-05s -> 7.868e-06s, so taking this instance. -Merge (3 -> 1 leaves) took w.c.t. of 1.407e-05s, ~5.96e-06s of computing time (of which 1.907e-06s sorting, 2.146e-06s analysis) -3 iterations (18 th.) took 5.007e-06s; avg 1.669e-06s ( +/- 42.86/ 71.43 %); best 9.537e-07s; worst 2.861e-06s; std dev. 8.485e-07 (taking best). -~ 18 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - -3 iterations (17 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). - 17 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - -3 iterations (16 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). - 16 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - -3 iterations (15 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). - 15 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - -3 iterations (14 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). - 14 threads: 9.537e-07s (8.8e+02 Mflops) (0/2 degradations so far) - -3 iterations (13 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 91.96/ 50.00 %); best 5.111e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). - 13 threads: 5.111e-08s (1.6e+04 Mflops) (0/2 degradations so far) - -3 iterations (12 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). - 12 threads: 9.537e-07s (8.8e+02 Mflops) (1/2 degradations so far) - -3 iterations (11 th.) took 1.907e-06s; avg 6.358e-07s ( +/- 91.96/ 50.00 %); best 5.111e-08s; worst 9.537e-07s; std dev. 4.496e-07 (taking best). - 11 threads: 5.111e-08s (1.6e+04 Mflops) (0/2 degradations so far) - -3 iterations (10 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). - 10 threads: 9.537e-07s (8.8e+02 Mflops) (1/2 degradations so far) - -3 iterations (9 th.) took 2.861e-06s; avg 9.537e-07s ( +/- 0.00/ 0.00 %); best 9.537e-07s; worst 9.537e-07s; std dev. 0 (taking best). - 9 threads: 9.537e-07s (8.8e+02 Mflops) (2/2 degradations so far) - -Best threads choice is 13; starting threads were 18; max speed gap is 19x; search took 0.00011s. -After merge step 2: tpop: 5.111e-08 s ~Mflops: 16436.741 nsubm:1 otn:13 -Applying merge (3 -> 1 leaves, 13 th.) yielded SPEEDUP of 153.954x: 7.868e-06s -> 5.111e-08s, so taking this instance. -Merged all the matrix leaves: no reason to continue merging. -A total of 2 merge steps (of max 6) (6 -> 1 subms) took 0.001044s (of which 3.338e-05s partitioning, 0s I/O); computing times: 1.097e-05s in par. loops, 4.053e-06s sorting, 4.292e-06s analyzing) -Total merge + benchmarking process took 0.001044s, equivalent to 20429.2/55.4 new/old ops (7.129e-05s for 3 clones -- as 1394.9/3.8 ops, or 465.0/1.3 ops per clone), SPEEDUP of 368.556x -Applying multi-merge (6 -> 1 leaves, 2 steps, 13 -> 13 th.sp.) yielded SPEEDUP of 368.556x (1.884e-05s -> 5.111e-08s), will amortize in 55.6 ops by saving 1.878e-05s per op. -In 1 tuning rounds (tot. 0.003s, 7.1e-05s for constructor, 3 clones) obtained a SPEEDUP of 36755.6% (368.6x) (from 44.6 to 1.644e+04 Mflops). +A total of 5 merge steps (of max 6) (146 -> 25 subms) took 1.924s (of which 0.07177s partitioning, 0s I/O); computing times: 0.000596s in par. loops, 0.0001159s sorting, 8.225e-05s analyzing) +Total merge + benchmarking process took 1.924s, equivalent to 480.5/160.7 new/old ops (0.1242s for 3 clones -- as 31.0/10.4 ops, or 10.3/3.5 ops per clone), SPEEDUP of 2.990x +Applying multi-merge (146 -> 36 leaves, 4 steps, 9 -> 5 th.sp.) yielded SPEEDUP of 2.990x (0.01197s -> 0.004004s), will amortize in 241.5 ops by saving 0.007967s per op. +In 1 tuning rounds (tot. 2.2s, 0.12s for constructor, 3 clones) obtained a SPEEDUP of 199.0% (2.99x) (from 0.07017 to 0.2098 Mflops). check results are ok Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 4.315e-05 s (100.00 %) - analyzed arrays in 1.311e-05 s (30.39 %) - cleaned-up arrays in 9.537e-07 s (2.21 %) - deduplicated arrays in 9.537e-07 s (2.21 %) - sorted arrays in 1.407e-05 s (32.60 %) - shuffled partitions in 7.868e-06 s (18.23 %) - memory allocations took 3.099e-06 s (7.18 %) - leafs setup took 0.000e+00 s (0.00 %) - halfword conversion took 2.146e-06 s (4.97 %) -Built (6 x 6)[0x5766c030]{Z} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x20440b4 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'UL' + converted COO to RSB in 4.765e-02 s (100.00 %) + analyzed arrays in 1.556e-02 s (32.65 %) + cleaned-up arrays in 2.146e-06 s (0.00 %) + deduplicated arrays in 1.907e-06 s (0.00 %) + sorted arrays in 1.608e-02 s (33.75 %) + shuffled partitions in 1.598e-02 s (33.54 %) + memory allocations took 1.597e-05 s (0.03 %) + leafs setup took 1.907e-06 s (0.00 %) + halfword conversion took 3.099e-06 s (0.01 %) +Built (6 x 6)[0x18a8a90]{Z} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x20440b4 (coo:0, csr:1, hw:0, ic:1, fi:0), storage: 1, subm: 1, symflags:'UL' Read matrix pd.mtx 6 x 6 : 36 Matrix has no symmetry Using NRHS=4 -Repeated USMV took 0.3195E-04 s -A single USMM took 0.1907E-04 s -USMM-to-USMV speed ratio is is 1.675 x +Repeated USMV took 0.4506E-04 s +A single USMM took 0.1192E-04 s +USMM-to-USMV speed ratio is is 3.780 x Call auto-tuning routine.. Repeat measurement. -Tuned USMM took 0.4053E-05 s -Tuned-to-untuned speed ratio is is 4.706 x +Tuned USMM took 0.5960E-05 s +Tuned-to-untuned speed ratio is is 2.000 x FAILED: 0 PASSED: 2 /build/reproducible-path/librsb-1.3.0.2+dfsg/examples/fortran_rsb_fi @@ -8726,243 +8839,237 @@ Loading matrix from file "/build/reproducible-path/librsb-1.3.0.2+dfsg/pd.mtx". Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 1.013e-03 s (100.00 %) - analyzed arrays in 4.411e-05 s (4.35 %) - cleaned-up arrays in 4.053e-06 s (0.40 %) - deduplicated arrays in 0.000e+00 s (0.00 %) - sorted arrays in 9.048e-04 s (89.32 %) - shuffled partitions in 3.099e-05 s (3.06 %) - memory allocations took 5.245e-06 s (0.52 %) - leafs setup took 3.099e-06 s (0.31 %) - halfword conversion took 1.287e-05 s (1.27 %) -Built (6 x 6)[0x5768f170]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' + converted COO to RSB in 1.930e-01 s (100.00 %) + analyzed arrays in 5.996e-02 s (31.06 %) + cleaned-up arrays in 1.192e-05 s (0.01 %) + deduplicated arrays in 4.053e-06 s (0.00 %) + sorted arrays in 4.103e-02 s (21.26 %) + shuffled partitions in 4.447e-02 s (23.04 %) + memory allocations took 2.098e-05 s (0.01 %) + leafs setup took 9.060e-06 s (0.00 %) + halfword conversion took 4.752e-02 s (24.61 %) +Built (6 x 6)[0x14fb160]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' Considering D clone. Base matrix: -(6 x 6)[0x576925b0]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' +(6 x 6)[0x14fdb10]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' Will use autotuning routine to sample matrix: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (18 th.) took 0.000345s; avg 0.000115s ( +/- 11.26/ 11.33 %); best 0.000102s; worst 0.000128s; std dev. 1.061e-05 (taking best). -Reference operation time is 0.000102043 s (1.411 Mflops) with 18 threads. -After 0.000375s, autotuning routine did not find a better threads count configuration. +3 iterations (10 th.) took 0.04792s; avg 0.01597s ( +/- 25.38/ 50.14 %); best 0.01192s; worst 0.02398s; std dev. 0.005663 (taking best). +Reference operation time is 0.01192 s (0.01208 Mflops) with 10 threads. +After 0.048005s, autotuning routine did not find a better threads count configuration. Will autotune matrix: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (18 th.) took 0.0004368s; avg 0.0001456s ( +/- 13.54/ 9.88 %); best 0.0001259s; worst 0.00016s; std dev. 1.442e-05 (taking best). -Reference operation time is 0.000125885 s (1.144 Mflops) with 18 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.0001259 Mflops: 1.144) -Merge (22 -> 16 leaves) took w.c.t. of 0.0002761s, ~0.0005109s of computing time (of which 2.146e-06s sorting, 3.099e-06s analysis) -3 iterations (18 th.) took 0.001979s; avg 0.0006596s ( +/- 88.94/176.36 %); best 7.296e-05s; worst 0.001823s; std dev. 0.0008226 (taking best). -Reference operation time is 7.29561e-05 s (1.974 Mflops) with 18 threads. -After merge step 1: tpop: 7.296e-05 s ~Mflops: 1.974 nsubm:16 otn:18 -Applying merge (22 -> 16 leaves, 18 th.) yielded SPEEDUP of 1.725x: 0.0001259s -> 7.296e-05s, so taking this instance. -Merge (16 -> 10 leaves) took w.c.t. of 2.599e-05s, ~1.097e-05s of computing time (of which 1.907e-06s sorting, 1.907e-06s analysis) -3 iterations (18 th.) took 0.001091s; avg 0.0003637s ( +/- 87.61/175.02 %); best 4.506e-05s; worst 0.001s; std dev. 0.0004501 (taking best). -Reference operation time is 4.50611e-05 s (3.196 Mflops) with 18 threads. -After merge step 2: tpop: 4.506e-05 s ~Mflops: 3.196 nsubm:10 otn:18 -Applying merge (16 -> 10 leaves, 18 th.) yielded SPEEDUP of 1.619x: 7.296e-05s -> 4.506e-05s, so taking this instance. -Merge (10 -> 7 leaves) took w.c.t. of 1.121e-05s, ~4.053e-06s of computing time (of which 9.537e-07s sorting, 1.907e-06s analysis) -3 iterations (18 th.) took 6.413e-05s; avg 2.138e-05s ( +/- 16.36/ 27.14 %); best 1.788e-05s; worst 2.718e-05s; std dev. 4.131e-06 (taking best). -Reference operation time is 1.78814e-05 s (8.053 Mflops) with 18 threads. -After merge step 3: tpop: 1.788e-05 s ~Mflops: 8.053 nsubm:7 otn:18 -Applying merge (10 -> 7 leaves, 18 th.) yielded SPEEDUP of 2.520x: 4.506e-05s -> 1.788e-05s, so taking this instance. -Merge (7 -> 4 leaves) took w.c.t. of 7.153e-06s, ~1.907e-06s of computing time (of which 0s sorting, 9.537e-07s analysis) -3 iterations (18 th.) took 2.909e-05s; avg 9.696e-06s ( +/- 6.56/ 3.28 %); best 9.06e-06s; worst 1.001e-05s; std dev. 4.496e-07 (taking best). -Reference operation time is 9.05991e-06 s (15.89 Mflops) with 18 threads. -After merge step 4: tpop: 9.06e-06 s ~Mflops: 15.894 nsubm:4 otn:18 -Applying merge (7 -> 4 leaves, 18 th.) yielded SPEEDUP of 1.974x: 1.788e-05s -> 9.06e-06s, so taking this instance. -Merge (4 -> 1 leaves) took w.c.t. of 1.407e-05s, ~4.053e-06s of computing time (of which 9.537e-07s sorting, 2.146e-06s analysis) -3 iterations (18 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 94.87/107.69 %); best 5.295e-08s; worst 2.146e-06s; std dev. 8.778e-07 (taking best). -Reference operation time is 5.29528e-08 s (2719 Mflops) with 18 threads. -After merge step 5: tpop: 5.295e-08 s ~Mflops: 2719.405 nsubm:1 otn:18 -Applying merge (4 -> 1 leaves, 18 th.) yielded SPEEDUP of 171.094x: 9.06e-06s -> 5.295e-08s, so taking this instance. -Merged all the matrix leaves: no reason to continue merging. -A total of 5 merge steps (of max 6) (22 -> 1 subms) took 0.003861s (of which 0.0003622s partitioning, 0s I/O); computing times: 0.0005319s in par. loops, 5.96e-06s sorting, 1.001e-05s analyzing) -Total merge + benchmarking process took 0.003861s, equivalent to 72913.1/30.7 new/old ops (0.000139s for 6 clones -- as 2624.9/1.1 ops, or 437.5/0.2 ops per clone), SPEEDUP of 2377.308x -Applying multi-merge (22 -> 1 leaves, 5 steps, 0 -> 18 th.sp.) yielded SPEEDUP of 2377.308x (0.0001259s -> 5.295e-08s), will amortize in 30.7 ops by saving 0.0001258s per op. -In 1 tuning rounds (tot. 0.0044s, 0.00014s for constructor, 6 clones) obtained a SPEEDUP of 237630.8% (2377x) (from 1.144 to 2719 Mflops). -After 0.004367s, global autotuning declared speedup of 2377.31 x, when using threads count of 18 and a new matrix: -(6 x 6)[0x576918f0]{D} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x2244086 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'' +3 iterations (10 th.) took 0.04799s; avg 0.016s ( +/- 25.06/ 50.02 %); best 0.01199s; worst 0.024s; std dev. 0.005658 (taking best). +Reference operation time is 0.011987 s (0.01201 Mflops) with 10 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type D, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.01199 Mflops: 0.012) +Merge (22 -> 16 leaves) took w.c.t. of 0.01203s, ~0.01597s of computing time (of which 7.153e-06s sorting, 1.121e-05s analysis) +3 iterations (10 th.) took 0.04391s; avg 0.01464s ( +/- 47.72/ 90.74 %); best 0.007651s; worst 0.02792s; std dev. 0.009395 (taking best). +Reference operation time is 0.00765109 s (0.01882 Mflops) with 10 threads. +After merge step 1: tpop: 0.007651 s ~Mflops: 0.019 nsubm:16 otn:10 +Applying merge (22 -> 16 leaves, 10 th.) yielded SPEEDUP of 1.567x: 0.01199s -> 0.007651s, so taking this instance. +Merge (16 -> 10 leaves) took w.c.t. of 0.004186s, ~3.6e-05s of computing time (of which 6.199e-06s sorting, 8.106e-06s analysis) +3 iterations (10 th.) took 0.05985s; avg 0.01995s ( +/- 19.41/ 25.12 %); best 0.01608s; worst 0.02496s; std dev. 0.003714 (taking best). +Reference operation time is 0.016077 s (0.008957 Mflops) with 10 threads. +After merge step 2: tpop: 0.01608 s ~Mflops: 0.009 nsubm:10 otn:10 +Applying merge (16 -> 10 leaves, 10 th.) yielded SLOWDOWN (1th of 3 tolerable) of 2.101x: 0.007651s -> 0.01608s. +Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). +A total of 2 merge steps (of max 6) (22 -> 10 subms) took 0.1601s (of which 0.01626s partitioning, 0s I/O); computing times: 0.016s in par. loops, 1.335e-05s sorting, 1.931e-05s analyzing) +Total merge + benchmarking process took 0.1601s, equivalent to 20.9/13.4 new/old ops (0.08788s for 2 clones -- as 11.5/7.3 ops, or 5.7/3.7 ops per clone), SPEEDUP of 1.567x +Applying multi-merge (22 -> 16 leaves, 1 steps, 0 -> 10 th.sp.) yielded SPEEDUP of 1.567x (0.01199s -> 0.007651s), will amortize in 36.9 ops by saving 0.004336s per op. +In 1 tuning rounds (tot. 0.26s, 0.088s for constructor, 2 clones) obtained a SPEEDUP of 56.7% (1.567x) (from 0.01201 to 0.01882 Mflops). +After 0.256093s, global autotuning declared speedup of 1.5667 x, when using threads count of 10 and a new matrix: +(6 x 6)[0x14ff430]{D} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' Considering S clone. Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 7.796e-05 s (100.00 %) - analyzed arrays in 3.600e-05 s (46.18 %) - cleaned-up arrays in 0.000e+00 s (0.00 %) - deduplicated arrays in 9.537e-07 s (1.22 %) - sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 2.217e-05 s (28.44 %) - memory allocations took 2.861e-06 s (3.67 %) - leafs setup took 1.907e-06 s (2.45 %) - halfword conversion took 1.407e-05 s (18.04 %) -Built (6 x 6)[0x576925b0]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' + converted COO to RSB in 1.038e-01 s (100.00 %) + analyzed arrays in 3.952e-02 s (38.07 %) + cleaned-up arrays in 1.907e-06 s (0.00 %) + deduplicated arrays in 9.537e-07 s (0.00 %) + sorted arrays in 1.192e-06 s (0.00 %) + shuffled partitions in 3.203e-02 s (30.85 %) + memory allocations took 2.849e-04 s (0.27 %) + leafs setup took 5.960e-06 s (0.01 %) + halfword conversion took 3.197e-02 s (30.79 %) +Built (6 x 6)[0x14ff430]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' Base matrix: -(6 x 6)[0x576925b0]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' +(6 x 6)[0x14ff430]{S} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 16, symflags:'' -Will use autotuning routine to sample matrix: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz. +Will use autotuning routine to sample matrix: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.5000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (18 th.) took 0.000248s; avg 8.265e-05s ( +/- 7.98/ 5.29 %); best 7.606e-05s; worst 8.702e-05s; std dev. 4.746e-06 (taking best). -Reference operation time is 7.60555e-05 s (1.893 Mflops) with 18 threads. -After 0.000263s, autotuning routine did not find a better threads count configuration. +3 iterations (10 th.) took 0.05594s; avg 0.01865s ( +/- 14.62/ 28.79 %); best 0.01592s; worst 0.02401s; std dev. 0.003796 (taking best). +Reference operation time is 0.0159199 s (0.009045 Mflops) with 10 threads. +After 0.055996s, autotuning routine did not find a better threads count configuration. -Will autotune matrix: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz. +Will autotune matrix: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.5000 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (18 th.) took 0.0002341s; avg 7.804e-05s ( +/- 11.41/ 7.54 %); best 6.914e-05s; worst 8.392e-05s; std dev. 6.401e-06 (taking best). -Reference operation time is 6.91414e-05 s (2.083 Mflops) with 18 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.8889 bpnz (tpop: 6.914e-05 Mflops: 2.083) -Merge (16 -> 10 leaves) took w.c.t. of 2.289e-05s, ~7.153e-06s of computing time (of which 2.146e-06s sorting, 1.907e-06s analysis) -3 iterations (18 th.) took 0.001548s; avg 0.0005159s ( +/- 92.65/182.58 %); best 3.791e-05s; worst 0.001458s; std dev. 0.0006661 (taking best). -Reference operation time is 3.79086e-05 s (3.799 Mflops) with 18 threads. -After merge step 1: tpop: 3.791e-05 s ~Mflops: 3.799 nsubm:10 otn:18 -Applying merge (16 -> 10 leaves, 18 th.) yielded SPEEDUP of 1.824x: 6.914e-05s -> 3.791e-05s, so taking this instance. -Merge (10 -> 7 leaves) took w.c.t. of 1.216e-05s, ~3.099e-06s of computing time (of which 1.192e-06s sorting, 2.146e-06s analysis) -3 iterations (18 th.) took 0.000108s; avg 3.6e-05s ( +/- 47.02/ 77.48 %); best 1.907e-05s; worst 6.39e-05s; std dev. 1.987e-05 (taking best). -Reference operation time is 1.90735e-05 s (7.55 Mflops) with 18 threads. -After merge step 2: tpop: 1.907e-05 s ~Mflops: 7.550 nsubm:7 otn:18 -Applying merge (10 -> 7 leaves, 18 th.) yielded SPEEDUP of 1.988x: 3.791e-05s -> 1.907e-05s, so taking this instance. -Merge (7 -> 4 leaves) took w.c.t. of 1.001e-05s, ~3.099e-06s of computing time (of which 9.537e-07s sorting, 1.907e-06s analysis) -3 iterations (18 th.) took 3.409e-05s; avg 1.136e-05s ( +/- 20.28/ 32.17 %); best 9.06e-06s; worst 1.502e-05s; std dev. 2.614e-06 (taking best). -Reference operation time is 9.05991e-06 s (15.89 Mflops) with 18 threads. -After merge step 3: tpop: 9.06e-06 s ~Mflops: 15.894 nsubm:4 otn:18 -Applying merge (7 -> 4 leaves, 18 th.) yielded SPEEDUP of 2.105x: 1.907e-05s -> 9.06e-06s, so taking this instance. -Merge (4 -> 1 leaves) took w.c.t. of 6.914e-06s, ~1.907e-06s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) -3 iterations (18 th.) took 3.099e-06s; avg 1.033e-06s ( +/- 7.69/ 15.38 %); best 9.537e-07s; worst 1.192e-06s; std dev. 1.124e-07 (taking best). -Reference operation time is 9.53674e-07 s (151 Mflops) with 18 threads. -After merge step 4: tpop: 9.537e-07 s ~Mflops: 150.995 nsubm:1 otn:18 -Applying merge (4 -> 1 leaves, 18 th.) yielded SPEEDUP of 9.500x: 9.06e-06s -> 9.537e-07s, so taking this instance. +3 iterations (10 th.) took 0.05598s; avg 0.01866s ( +/- 14.35/ 25.11 %); best 0.01598s; worst 0.02335s; std dev. 0.003325 (taking best). +Reference operation time is 0.0159841 s (0.009009 Mflops) with 10 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type S, 36 nnz, 6 nnz/r, 21 subms, 16 lsubms, 4.5000 bpnz (tpop: 0.01598 Mflops: 0.009) +Merge (16 -> 13 leaves) took w.c.t. of 3.409e-05s, ~1.001e-05s of computing time (of which 3.099e-06s sorting, 8.106e-06s analysis) +3 iterations (10 th.) took 0.05593s; avg 0.01864s ( +/- 14.16/ 28.25 %); best 0.016s; worst 0.02391s; std dev. 0.003725 (taking best). +Reference operation time is 0.0160038 s (0.008998 Mflops) with 10 threads. +After merge step 1: tpop: 0.016 s ~Mflops: 0.009 nsubm:13 otn:10 +Applying merge (16 -> 13 leaves, 10 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=0.99876x): 0.01598s -> 0.016s, so IGNORING this instance. +Merge (13 -> 10 leaves) took w.c.t. of 3.695e-05s, ~1.192e-05s of computing time (of which 2.861e-06s sorting, 1.001e-05s analysis) +3 iterations (10 th.) took 0.0479s; avg 0.01597s ( +/- 0.43/ 0.22 %); best 0.0159s; worst 0.016s; std dev. 4.861e-05 (taking best). +Reference operation time is 0.015898 s (0.009058 Mflops) with 10 threads. +After merge step 2: tpop: 0.0159 s ~Mflops: 0.009 nsubm:10 otn:10 +Applying merge (13 -> 10 leaves, 10 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00541x): 0.01598s -> 0.0159s, so IGNORING this instance. +Merge (10 -> 7 leaves) took w.c.t. of 3.6e-05s, ~1.287e-05s of computing time (of which 3.099e-06s sorting, 7.868e-06s analysis) +3 iterations (10 th.) took 0.05192s; avg 0.01731s ( +/- 7.99/ 15.57 %); best 0.01592s; worst 0.02s; std dev. 0.001905 (taking best). +Reference operation time is 0.015923 s (0.009044 Mflops) with 10 threads. +After merge step 3: tpop: 0.01592 s ~Mflops: 0.009 nsubm:7 otn:10 +Applying merge (10 -> 7 leaves, 10 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00383x): 0.01598s -> 0.01592s, so IGNORING this instance. +Merge (7 -> 4 leaves) took w.c.t. of 3.505e-05s, ~1.311e-05s of computing time (of which 2.861e-06s sorting, 6.914e-06s analysis) +3 iterations (10 th.) took 0.04795s; avg 0.01598s ( +/- 25.45/ 25.20 %); best 0.01191s; worst 0.02001s; std dev. 0.003305 (taking best). +Reference operation time is 0.011914 s (0.01209 Mflops) with 10 threads. +After merge step 4: tpop: 0.01191 s ~Mflops: 0.012 nsubm:4 otn:10 +Applying merge (7 -> 4 leaves, 10 th.) yielded SPEEDUP of 1.342x: 0.01598s -> 0.01191s, so taking this instance. +Merge (4 -> 1 leaves) took w.c.t. of 3.505e-05s, ~1.287e-05s of computing time (of which 4.053e-06s sorting, 5.96e-06s analysis) +3 iterations (10 th.) took 1.216e-05s; avg 4.053e-06s ( +/- 76.47/147.06 %); best 9.537e-07s; worst 1.001e-05s; std dev. 4.216e-06 (taking best). +Reference operation time is 9.53674e-07 s (151 Mflops) with 10 threads. +After merge step 5: tpop: 9.537e-07 s ~Mflops: 150.995 nsubm:1 otn:10 +Applying merge (4 -> 1 leaves, 10 th.) yielded SPEEDUP of 12492.750x: 0.01191s -> 9.537e-07s, so taking this instance. Merged all the matrix leaves: no reason to continue merging. -A total of 4 merge steps (of max 6) (16 -> 1 subms) took 0.001973s (of which 0.0001285s partitioning, 0s I/O); computing times: 1.526e-05s in par. loops, 5.245e-06s sorting, 6.914e-06s analyzing) -Total merge + benchmarking process took 0.001973s, equivalent to 2068.8/28.5 new/old ops (0.000113s for 5 clones -- as 118.5/1.6 ops, or 23.7/0.3 ops per clone), SPEEDUP of 72.500x -Applying multi-merge (16 -> 1 leaves, 4 steps, 0 -> 18 th.sp.) yielded SPEEDUP of 72.500x (6.914e-05s -> 9.537e-07s), will amortize in 28.9 ops by saving 6.819e-05s per op. -In 1 tuning rounds (tot. 0.0023s, 0.00011s for constructor, 5 clones) obtained a SPEEDUP of 7150.0% (72.5x) (from 2.083 to 151 Mflops). -After 0.002270s, global autotuning declared speedup of 72.5 x, when using threads count of 18 and a new matrix: -(6 x 6)[0x576918f0]{S} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x2244086 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'' +A total of 5 merge steps (of max 6) (16 -> 1 subms) took 0.268s (of which 0.0004849s partitioning, 0s I/O); computing times: 6.08e-05s in par. loops, 1.597e-05s sorting, 3.886e-05s analyzing) +Total merge + benchmarking process took 0.268s, equivalent to 280995.5/16.8 new/old ops (0.1115s for 3 clones -- as 116934.0/7.0 ops, or 38978.0/2.3 ops per clone), SPEEDUP of 16760.500x +Applying multi-merge (16 -> 1 leaves, 5 steps, 0 -> 10 th.sp.) yielded SPEEDUP of 16760.500x (0.01598s -> 9.537e-07s), will amortize in 16.8 ops by saving 0.01598s per op. +In 1 tuning rounds (tot. 0.37s, 0.11s for constructor, 3 clones) obtained a SPEEDUP of 1675950.0% (1.676e+04x) (from 0.009009 to 151 Mflops). +After 0.372017s, global autotuning declared speedup of 16760.5 x, when using threads count of 10 and a new matrix: +(6 x 6)[0x1501e70]{S} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x2244086 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'' Considering C clone. Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 8.702e-05 s (100.00 %) - analyzed arrays in 4.005e-05 s (46.03 %) - cleaned-up arrays in 9.537e-07 s (1.10 %) - deduplicated arrays in 0.000e+00 s (0.00 %) + converted COO to RSB in 7.191e-02 s (100.00 %) + analyzed arrays in 2.387e-02 s (33.20 %) + cleaned-up arrays in 1.907e-06 s (0.00 %) + deduplicated arrays in 2.146e-06 s (0.00 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 2.599e-05 s (29.86 %) - memory allocations took 3.099e-06 s (3.56 %) - leafs setup took 1.907e-06 s (2.19 %) - halfword conversion took 1.407e-05 s (16.16 %) -Built (6 x 6)[0x57699130]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' + shuffled partitions in 2.400e-02 s (33.38 %) + memory allocations took 2.694e-05 s (0.04 %) + leafs setup took 7.153e-06 s (0.01 %) + halfword conversion took 2.399e-02 s (33.36 %) +Built (6 x 6)[0x1504800]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' Base matrix: -(6 x 6)[0x57699130]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' +(6 x 6)[0x1504800]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 22, symflags:'' Will use autotuning routine to sample matrix: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (18 th.) took 0.00038s; avg 0.0001267s ( +/- 2.89/ 4.08 %); best 0.000123s; worst 0.0001318s; std dev. 3.756e-06 (taking best). -Reference operation time is 0.000123024 s (4.682 Mflops) with 18 threads. -After 0.000395s, autotuning routine did not find a better threads count configuration. +3 iterations (10 th.) took 0.03193s; avg 0.01064s ( +/- 24.97/ 49.80 %); best 0.007985s; worst 0.01594s; std dev. 0.003748 (taking best). +Reference operation time is 0.00798488 s (0.07214 Mflops) with 10 threads. +After 0.031977s, autotuning routine did not find a better threads count configuration. Will autotune matrix: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (18 th.) took 0.00032s; avg 0.0001067s ( +/- 7.23/ 3.95 %); best 9.894e-05s; worst 0.0001109s; std dev. 5.459e-06 (taking best). -Reference operation time is 9.89437e-05 s (5.821 Mflops) with 18 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 9.894e-05 Mflops: 5.821) -Merge (22 -> 16 leaves) took w.c.t. of 1.884e-05s, ~6.914e-06s of computing time (of which 2.146e-06s sorting, 2.861e-06s analysis) -3 iterations (18 th.) took 0.001394s; avg 0.0004647s ( +/- 81.68/163.21 %); best 8.512e-05s; worst 0.001223s; std dev. 0.0005363 (taking best). -Reference operation time is 8.51154e-05 s (6.767 Mflops) with 18 threads. -After merge step 1: tpop: 8.512e-05 s ~Mflops: 6.767 nsubm:16 otn:18 -Applying merge (22 -> 16 leaves, 18 th.) yielded SPEEDUP of 1.162x: 9.894e-05s -> 8.512e-05s, so taking this instance. -Merge (16 -> 10 leaves) took w.c.t. of 1.502e-05s, ~7.868e-06s of computing time (of which 1.907e-06s sorting, 3.099e-06s analysis) -3 iterations (18 th.) took 0.001197s; avg 0.000399s ( +/- 90.98/179.92 %); best 3.6e-05s; worst 0.001117s; std dev. 0.0005077 (taking best). -Reference operation time is 3.60012e-05 s (16 Mflops) with 18 threads. -After merge step 2: tpop: 3.6e-05 s ~Mflops: 15.999 nsubm:10 otn:18 -Applying merge (16 -> 10 leaves, 18 th.) yielded SPEEDUP of 2.364x: 8.512e-05s -> 3.6e-05s, so taking this instance. -Merge (10 -> 7 leaves) took w.c.t. of 1.097e-05s, ~3.815e-06s of computing time (of which 9.537e-07s sorting, 1.907e-06s analysis) -3 iterations (18 th.) took 6.294e-05s; avg 2.098e-05s ( +/- 9.09/ 13.64 %); best 1.907e-05s; worst 2.384e-05s; std dev. 2.06e-06 (taking best). -Reference operation time is 1.90735e-05 s (30.2 Mflops) with 18 threads. -After merge step 3: tpop: 1.907e-05 s ~Mflops: 30.199 nsubm:7 otn:18 -Applying merge (10 -> 7 leaves, 18 th.) yielded SPEEDUP of 1.887x: 3.6e-05s -> 1.907e-05s, so taking this instance. -Merge (7 -> 4 leaves) took w.c.t. of 6.914e-06s, ~1.907e-06s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) -3 iterations (18 th.) took 3.791e-05s; avg 1.264e-05s ( +/- 5.66/ 11.32 %); best 1.192e-05s; worst 1.407e-05s; std dev. 1.012e-06 (taking best). -Reference operation time is 1.19209e-05 s (48.32 Mflops) with 18 threads. -After merge step 4: tpop: 1.192e-05 s ~Mflops: 48.318 nsubm:4 otn:18 -Applying merge (7 -> 4 leaves, 18 th.) yielded SPEEDUP of 1.600x: 1.907e-05s -> 1.192e-05s, so taking this instance. -Merge (4 -> 1 leaves) took w.c.t. of 7.153e-06s, ~1.907e-06s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) -3 iterations (18 th.) took 4.053e-06s; avg 1.351e-06s ( +/- 29.41/ 58.82 %); best 9.537e-07s; worst 2.146e-06s; std dev. 5.62e-07 (taking best). -Reference operation time is 9.53674e-07 s (604 Mflops) with 18 threads. -After merge step 5: tpop: 9.537e-07 s ~Mflops: 603.980 nsubm:1 otn:18 -Applying merge (4 -> 1 leaves, 18 th.) yielded SPEEDUP of 12.500x: 1.192e-05s -> 9.537e-07s, so taking this instance. -Merged all the matrix leaves: no reason to continue merging. -A total of 5 merge steps (of max 6) (22 -> 1 subms) took 0.002959s (of which 7.629e-05s partitioning, 0s I/O); computing times: 2.241e-05s in par. loops, 6.914e-06s sorting, 9.775e-06s analyzing) -Total merge + benchmarking process took 0.002959s, equivalent to 3102.8/29.9 new/old ops (0.0001488s for 6 clones -- as 156.0/1.5 ops, or 26.0/0.3 ops per clone), SPEEDUP of 103.750x -Applying multi-merge (22 -> 1 leaves, 5 steps, 0 -> 18 th.sp.) yielded SPEEDUP of 103.750x (9.894e-05s -> 9.537e-07s), will amortize in 30.2 ops by saving 9.799e-05s per op. -In 1 tuning rounds (tot. 0.0034s, 0.00015s for constructor, 6 clones) obtained a SPEEDUP of 10275.0% (103.8x) (from 5.821 to 604 Mflops). -After 0.003399s, global autotuning declared speedup of 103.75 x, when using threads count of 18 and a new matrix: -(6 x 6)[0x57695150]{C} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x2244086 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'' +3 iterations (10 th.) took 0.03994s; avg 0.01331s ( +/- 39.85/ 20.10 %); best 0.008008s; worst 0.01599s; std dev. 0.003752 (taking best). +Reference operation time is 0.008008 s (0.07193 Mflops) with 10 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.008008 Mflops: 0.072) +Merge (22 -> 16 leaves) took w.c.t. of 4.506e-05s, ~2.48e-05s of computing time (of which 7.868e-06s sorting, 9.06e-06s analysis) +3 iterations (10 th.) took 0.07993s; avg 0.02664s ( +/- 70.03/ 79.85 %); best 0.007984s; worst 0.04792s; std dev. 0.01641 (taking best). +Reference operation time is 0.00798416 s (0.07214 Mflops) with 10 threads. +After merge step 1: tpop: 0.007984 s ~Mflops: 0.072 nsubm:16 otn:10 +Applying merge (22 -> 16 leaves, 10 th.) yielded NEGLIGIBLE change (1th in a row) (old/new=1.00299x): 0.008008s -> 0.007984s, so IGNORING this instance. +Merge (16 -> 10 leaves) took w.c.t. of 0.007998s, ~3.982e-05s of computing time (of which 6.199e-06s sorting, 7.868e-06s analysis) +3 iterations (10 th.) took 0.05193s; avg 0.01731s ( +/- 53.92/ 61.34 %); best 0.007976s; worst 0.02793s; std dev. 0.008195 (taking best). +Reference operation time is 0.00797606 s (0.07222 Mflops) with 10 threads. +After merge step 2: tpop: 0.007976 s ~Mflops: 0.072 nsubm:10 otn:10 +Applying merge (16 -> 10 leaves, 10 th.) yielded NEGLIGIBLE change (2th in a row) (old/new=1.00401x): 0.008008s -> 0.007976s, so IGNORING this instance. +Merge (10 -> 7 leaves) took w.c.t. of 3.099e-05s, ~1.097e-05s of computing time (of which 1.907e-06s sorting, 8.106e-06s analysis) +3 iterations (10 th.) took 0.0398s; avg 0.01327s ( +/- 39.70/ 20.21 %); best 0.008s; worst 0.01595s; std dev. 0.003725 (taking best). +Reference operation time is 0.00800014 s (0.072 Mflops) with 10 threads. +After merge step 3: tpop: 0.008 s ~Mflops: 0.072 nsubm:7 otn:10 +Applying merge (10 -> 7 leaves, 10 th.) yielded NEGLIGIBLE change (3th in a row) (old/new=1.00098x): 0.008008s -> 0.008s, so IGNORING this instance. +Merge (7 -> 4 leaves) took w.c.t. of 1.621e-05s, ~7.153e-06s of computing time (of which 2.146e-06s sorting, 1.907e-06s analysis) +3 iterations (10 th.) took 0.04001s; avg 0.01334s ( +/- 9.79/ 17.33 %); best 0.01203s; worst 0.01565s; std dev. 0.001639 (taking best). +Reference operation time is 0.0120292 s (0.04788 Mflops) with 10 threads. +After merge step 4: tpop: 0.01203 s ~Mflops: 0.048 nsubm:4 otn:10 +Applying merge (7 -> 4 leaves, 10 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.502x: 0.008008s -> 0.01203s. +Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). +A total of 4 merge steps (of max 6) (22 -> 4 subms) took 0.22s (of which 0.008124s partitioning, 0s I/O); computing times: 8.273e-05s in par. loops, 1.812e-05s sorting, 2.694e-05s analyzing) +Total merge + benchmarking process took 0.22s, equivalent to 27.5/27.5 new/old ops (0.03199s for 1 clones -- as 4.0/4.0 ops, or 4.0/4.0 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... +3 iterations (10 th.) took 0.03136s; avg 0.01045s ( +/- 78.32/ 47.62 %); best 0.002267s; worst 0.01543s; std dev. 0.005835 (taking best). +Reference operation time is 0.00226688 s (0.2541 Mflops) with 10 threads. +Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type C, 36 nnz, 6 nnz/r, 29 subms, 22 lsubms, 4.6667 bpnz (tpop: 0.002267 Mflops: 0.254) +Split (22 -> 28 leaves, 29 -> 37 subms) took 0.0001349s (of which: 7.868e-06s analysis, -3.412e+09s mem.mgmt); compute time: 0.0001559s overall, 4.292e-06s searches, 0.0001516s shuffle, 6.509e-05s switch, 4.053e-06s quadrants. +3 iterations (10 th.) took 0.000983s; avg 0.0003277s ( +/- 16.98/ 20.57 %); best 0.000272s; worst 0.0003951s; std dev. 5.091e-05 (taking best). +Reference operation time is 0.000272036 s (2.117 Mflops) with 10 threads. +After split step 1: tpop: 0.000272 s ~Mflops: 2.117 nsubm:28 otn:10 +Applying split (22 -> 28 leaves, 10 th.) yielded SPEEDUP of 8.333x: 0.002267s -> 0.000272s, so taking this instance. +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.000236s (of which: 1.311e-05s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (10 th.) took 0.001125s; avg 0.000375s ( +/- 12.02/ 10.43 %); best 0.00033s; worst 0.0004141s; std dev. 3.462e-05 (taking best). +Reference operation time is 0.000329971 s (1.746 Mflops) with 10 threads. +After split step 2: tpop: 0.00033 s ~Mflops: 1.746 nsubm:28 otn:10 +Applying split (28 -> 28 leaves, 10 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.213x: 0.000272s -> 0.00033s. +Skipping further split based tests after 1 definite performance degradations in a row (and last exceeding limit). +A total of 2 split steps (of max 6) (22 -> 28 subms) took 0.03472s (of which 0.00051s partitioning, 0s I/O); computing times: 0.0001559s in par. loops, 4.292e-06s sorting, 2.098e-05s analyzing) +Total split + benchmarking process took 0.03472s, equivalent to 127.6/15.3 new/old ops (0.07533s for 2 clones -- as 276.9/33.2 ops, or 138.5/16.6 ops per clone), SPEEDUP of 8.333x +Applying multi-split (22 -> 28 leaves, 1 steps, 0 -> 10 th.sp.) yielded SPEEDUP of 8.333x (0.002267s -> 0.000272s), will amortize in 17.4 ops by saving 0.001995s per op. +In 1 tuning rounds (tot. 0.4s, 0.11s for constructor, 3 clones) obtained a SPEEDUP of 733.3% (8.333x) (from 0.2541 to 2.117 Mflops). +After 0.402765s, global autotuning declared speedup of 8.33304 x, when using threads count of 10 and a new matrix: +(6 x 6)[0x1506fd0]{C} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' Considering Z clone. Building a matrix with 36 nnz, 6 x 6 Duplicates check: 36 - 0 = 36 - converted COO to RSB in 2.019e-04 s (100.00 %) - analyzed arrays in 9.489e-05 s (46.99 %) - cleaned-up arrays in 9.537e-07 s (0.47 %) - deduplicated arrays in 1.192e-06 s (0.59 %) + converted COO to RSB in 1.831e-04 s (100.00 %) + analyzed arrays in 7.892e-05 s (43.10 %) + cleaned-up arrays in 2.146e-06 s (1.17 %) + deduplicated arrays in 1.907e-06 s (1.04 %) sorted arrays in 0.000e+00 s (0.00 %) - shuffled partitions in 3.314e-05 s (16.41 %) - memory allocations took 5.484e-05 s (27.15 %) - leafs setup took 2.861e-06 s (1.42 %) - halfword conversion took 1.407e-05 s (6.97 %) -Built (6 x 6)[0x57699130]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' + shuffled partitions in 6.795e-05 s (37.11 %) + memory allocations took 9.060e-06 s (4.95 %) + leafs setup took 2.861e-06 s (1.56 %) + halfword conversion took 1.597e-05 s (8.72 %) +Built (6 x 6)[0x1506fd0]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x42046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' Base matrix: -(6 x 6)[0x57699130]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' +(6 x 6)[0x1506fd0]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' Will use autotuning routine to sample matrix: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (18 th.) took 0.0004199s; avg 0.00014s ( +/- 12.78/ 22.15 %); best 0.0001221s; worst 0.0001709s; std dev. 2.2e-05 (taking best). -Reference operation time is 0.00012207 s (4.719 Mflops) with 18 threads. -After 0.000440s, autotuning routine did not find a better threads count configuration. +3 iterations (10 th.) took 0.0008359s; avg 0.0002786s ( +/- 7.07/ 4.82 %); best 0.0002589s; worst 0.0002921s; std dev. 1.424e-05 (taking best). +Reference operation time is 0.000258923 s (2.225 Mflops) with 10 threads. +After 0.000859s, autotuning routine did not find a better threads count configuration. Will autotune matrix: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz. Parameters: verbosity:1 mintimes:3 maxtimes:10 mindt:0 maxdt:0.1 -3 iterations (18 th.) took 0.000432s; avg 0.000144s ( +/- 7.62/ 5.63 %); best 0.000133s; worst 0.0001521s; std dev. 8.045e-06 (taking best). -Reference operation time is 0.000133038 s (4.33 Mflops) with 18 threads. -Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz (tpop: 0.000133 Mflops: 4.330) -Merge (28 -> 22 leaves) took w.c.t. of 1.693e-05s, ~8.106e-06s of computing time (of which 9.537e-07s sorting, 2.861e-06s analysis) -3 iterations (18 th.) took 0.001388s; avg 0.0004626s ( +/- 74.28/147.48 %); best 0.000119s; worst 0.001145s; std dev. 0.0004824 (taking best). -Reference operation time is 0.000118971 s (4.842 Mflops) with 18 threads. -After merge step 1: tpop: 0.000119 s ~Mflops: 4.842 nsubm:22 otn:18 -Applying merge (28 -> 22 leaves, 18 th.) yielded SPEEDUP of 1.118x: 0.000133s -> 0.000119s, so taking this instance. -Merge (22 -> 16 leaves) took w.c.t. of 1.788e-05s, ~7.868e-06s of computing time (of which 1.907e-06s sorting, 4.053e-06s analysis) -3 iterations (18 th.) took 0.00134s; avg 0.0004466s ( +/- 82.76/159.91 %); best 7.701e-05s; worst 0.001161s; std dev. 0.0005051 (taking best). -Reference operation time is 7.70092e-05 s (7.48 Mflops) with 18 threads. -After merge step 2: tpop: 7.701e-05 s ~Mflops: 7.480 nsubm:16 otn:18 -Applying merge (22 -> 16 leaves, 18 th.) yielded SPEEDUP of 1.545x: 0.000119s -> 7.701e-05s, so taking this instance. -Merge (16 -> 10 leaves) took w.c.t. of 1.597e-05s, ~9.06e-06s of computing time (of which 1.907e-06s sorting, 2.146e-06s analysis) -3 iterations (18 th.) took 0.001158s; avg 0.000386s ( +/- 89.62/178.51 %); best 4.005e-05s; worst 0.001075s; std dev. 0.0004872 (taking best). -Reference operation time is 4.00543e-05 s (14.38 Mflops) with 18 threads. -After merge step 3: tpop: 4.005e-05 s ~Mflops: 14.380 nsubm:10 otn:18 -Applying merge (16 -> 10 leaves, 18 th.) yielded SPEEDUP of 1.923x: 7.701e-05s -> 4.005e-05s, so taking this instance. -Merge (10 -> 7 leaves) took w.c.t. of 1.097e-05s, ~2.861e-06s of computing time (of which 9.537e-07s sorting, 1.907e-06s analysis) -3 iterations (18 th.) took 7.105e-05s; avg 2.368e-05s ( +/- 15.44/ 26.85 %); best 2.003e-05s; worst 3.004e-05s; std dev. 4.512e-06 (taking best). -Reference operation time is 2.00272e-05 s (28.76 Mflops) with 18 threads. -After merge step 4: tpop: 2.003e-05 s ~Mflops: 28.761 nsubm:7 otn:18 -Applying merge (10 -> 7 leaves, 18 th.) yielded SPEEDUP of 2.000x: 4.005e-05s -> 2.003e-05s, so taking this instance. -Merge (7 -> 4 leaves) took w.c.t. of 7.153e-06s, ~1.907e-06s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) -3 iterations (18 th.) took 3.6e-05s; avg 1.2e-05s ( +/- 16.56/ 9.27 %); best 1.001e-05s; worst 1.311e-05s; std dev. 1.408e-06 (taking best). -Reference operation time is 1.00136e-05 s (57.52 Mflops) with 18 threads. -After merge step 5: tpop: 1.001e-05 s ~Mflops: 57.522 nsubm:4 otn:18 -Applying merge (7 -> 4 leaves, 18 th.) yielded SPEEDUP of 2.000x: 2.003e-05s -> 1.001e-05s, so taking this instance. -Merge (4 -> 1 leaves) took w.c.t. of 1.001e-05s, ~4.053e-06s of computing time (of which 9.537e-07s sorting, 9.537e-07s analysis) -3 iterations (18 th.) took 3.815e-06s; avg 1.272e-06s ( +/- 25.00/ 50.00 %); best 9.537e-07s; worst 1.907e-06s; std dev. 4.496e-07 (taking best). -Reference operation time is 9.53674e-07 s (604 Mflops) with 18 threads. -After merge step 6: tpop: 9.537e-07 s ~Mflops: 603.980 nsubm:1 otn:18 -Applying merge (4 -> 1 leaves, 18 th.) yielded SPEEDUP of 10.500x: 1.001e-05s -> 9.537e-07s, so taking this instance. -Merged all the matrix leaves: no reason to continue merging. -A total of 6 merge steps (of max 6) (28 -> 1 subms) took 0.004494s (of which 0.0002587s partitioning, 0s I/O); computing times: 3.386e-05s in par. loops, 7.629e-06s sorting, 1.287e-05s analyzing) -Total merge + benchmarking process took 0.004494s, equivalent to 4712.2/33.8 new/old ops (0.0001729s for 7 clones -- as 181.2/1.3 ops, or 25.9/0.2 ops per clone), SPEEDUP of 139.500x -Applying multi-merge (28 -> 1 leaves, 6 steps, 0 -> 18 th.sp.) yielded SPEEDUP of 139.500x (0.000133s -> 9.537e-07s), will amortize in 34.0 ops by saving 0.0001321s per op. -In 1 tuning rounds (tot. 0.005s, 0.00017s for constructor, 7 clones) obtained a SPEEDUP of 13850.0% (139.5x) (from 4.33 to 604 Mflops). -After 0.005002s, global autotuning declared speedup of 139.5 x, when using threads count of 18 and a new matrix: -(6 x 6)[0x57693f00]{Z} @ (0(0..6),0(0..6)) (36 nnz, 6 nnz/r) flags 0x2244086 (coo:0, csr:1, hw:1, ic:1, fi:0), storage: 1, subm: 1, symflags:'' +3 iterations (10 th.) took 0.03244s; avg 0.01081s ( +/- 28.82/ 17.38 %); best 0.007698s; worst 0.01269s; std dev. 0.002219 (taking best). +Reference operation time is 0.00769806 s (0.07482 Mflops) with 10 threads. +Starting merge (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz (tpop: 0.007698 Mflops: 0.075) +Merge (28 -> 22 leaves) took w.c.t. of 0.008031s, ~2.599e-05s of computing time (of which 4.768e-06s sorting, 5.007e-06s analysis) +3 iterations (10 th.) took 0.06753s; avg 0.02251s ( +/- 28.96/ 57.87 %); best 0.01599s; worst 0.03554s; std dev. 0.009211 (taking best). +Reference operation time is 0.0159891 s (0.03602 Mflops) with 10 threads. +After merge step 1: tpop: 0.01599 s ~Mflops: 0.036 nsubm:22 otn:10 +Applying merge (28 -> 22 leaves, 10 th.) yielded SLOWDOWN (1th of 3 tolerable) of 2.077x: 0.007698s -> 0.01599s. +Skipping further merge based tests after 1 definite performance degradations in a row (and last exceeding limit). +A total of 1 merge steps (of max 6) (28 -> 22 subms) took 0.07561s (of which 0.008044s partitioning, 0s I/O); computing times: 2.599e-05s in par. loops, 4.768e-06s sorting, 5.007e-06s analyzing) +Total merge + benchmarking process took 0.07561s, equivalent to 9.8/9.8 new/old ops (2.289e-05s for 1 clones -- as 0.0/0.0 ops, or 0.0/0.0 ops per clone), SPEEDUP of 1.000x (NO SPEEDUP) +Merging based autotuning FAILED (=NO SPEEDUP); let's try splitting then... +3 iterations (10 th.) took 0.04801s; avg 0.016s ( +/- 0.06/ 0.06 %); best 0.016s; worst 0.01601s; std dev. 7.398e-06 (taking best). +Reference operation time is 0.015996 s (0.03601 Mflops) with 10 threads. +Starting split (same threads) based auto-tuning procedure (transA=N, nrhs=2, order=cols) (max 6 steps, inclusive 3 grace steps) on: 6 x 6, type Z, 36 nnz, 6 nnz/r, 37 subms, 28 lsubms, 4.4444 bpnz (tpop: 0.016 Mflops: 0.036) +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.02789s (of which: 8.106e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (10 th.) took 0.04375s; avg 0.01458s ( +/- 19.43/ 9.72 %); best 0.01175s; worst 0.016s; std dev. 0.002003 (taking best). +Reference operation time is 0.0117509 s (0.04902 Mflops) with 10 threads. +After split step 1: tpop: 0.01175 s ~Mflops: 0.049 nsubm:28 otn:10 +Applying split (28 -> 28 leaves, 10 th.) yielded SPEEDUP of 1.361x: 0.016s -> 0.01175s, so taking this instance. +Split (28 -> 28 leaves, 37 -> 37 subms) took 0.02397s (of which: 5.96e-06s analysis, 0s mem.mgmt); compute time: 0s overall, 0s searches, 0s shuffle, 0s switch, 0s quadrants. +3 iterations (10 th.) took 0.04801s; avg 0.016s ( +/- 0.10/ 0.06 %); best 0.01599s; worst 0.01601s; std dev. 1.164e-05 (taking best). +Reference operation time is 0.015986 s (0.03603 Mflops) with 10 threads. +After split step 2: tpop: 0.01599 s ~Mflops: 0.036 nsubm:28 otn:10 +Applying split (28 -> 28 leaves, 10 th.) yielded SLOWDOWN (1th of 3 tolerable) of 1.360x: 0.01175s -> 0.01599s. +Skipping further split based tests after 1 definite performance degradations in a row (and last exceeding limit). +A total of 2 split steps (of max 6) (28 -> 28 subms) took 0.192s (of which 0.05214s partitioning, 0s I/O); computing times: 0s in par. loops, 0s sorting, 1.407e-05s analyzing) +Total split + benchmarking process took 0.192s, equivalent to 16.3/12.0 new/old ops (0.09593s for 2 clones -- as 8.2/6.0 ops, or 4.1/3.0 ops per clone), SPEEDUP of 1.361x +Applying multi-split (28 -> 28 leaves, 1 steps, 0 -> 10 th.sp.) yielded SPEEDUP of 1.361x (0.016s -> 0.01175s), will amortize in 45.2 ops by saving 0.004245s per op. +In 1 tuning rounds (tot. 0.4s, 0.096s for constructor, 3 clones) obtained a SPEEDUP of 36.1% (1.361x) (from 0.03601 to 0.04902 Mflops). +After 0.396209s, global autotuning declared speedup of 1.36125 x, when using threads count of 10 and a new matrix: +(6 x 6)[0x1509a20]{Z} @ (0(0..0),0(0..0)) (36 nnz, 6 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 28, symflags:'' gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/examples' gmake[3]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' @@ -9093,10 +9200,10 @@ 4 0 DIFF PRINT TEST END Beginning large binary search test. -Detected 3995746304 bytes of memory, comprehensive of 4121923584 of free memory. +Detected 4110413824 bytes of memory, comprehensive of 748412928 of free memory. On this system, maximal array of coordinates can have 2147483137 elements and occupy 4294965252 bytes. -Will perform the test using less memory (900 MB) than on the maximal coordinate indices array (943959040) allows. -(c)allocated 235989760 nnz (943959040 bytes) +Will perform the test using less memory (535 MB) than on the maximal coordinate indices array (561309696) allows. +(c)allocated 140327424 nnz (561309696 bytes) Succeeded retrieving array last element. Successfully performed large binary search test. BASIC SPARSE BLAS TEST: BEGIN @@ -9105,7 +9212,7 @@ got RSB_IO_WANT_IS_INITIALIZED_MARKER: 1 INIT INTERFACE TEST: END (SUCCESS) DEVEL PRINT TEST: BEGIN -(4 x 4)[0x580fa4e0]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +(4 x 4)[0x1a78aa0]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' RSB_FLAG_USE_HALFWORD_INDICES | RSB_FLAG_SORTED_INPUT | RSB_FLAG_WANT_COO_STORAGE | @@ -9114,8 +9221,8 @@ RSB_FLAG_ASSEMBLED_IN_COO_ARRAYS | RSB_FLAG_OWN_PARTITIONING_ARRAYS | RSB_FLAG_SORT_INPUT -(2 x 2)[0x580fa5b0]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' -(2 x 2)[0x580fa680]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(2 x 2)[0x1a78b70]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(2 x 2)[0x1a78c40]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' #R 4 x 4, 4 nnz (16 bytes), 16 index space for bytes, 416 bytes for 2 structs (2 of which are on the diagonal) (1e+02% of nnz are on the diagonal) #N at 0 0, 4 x 4, 4 nnz ( 25%) #T at 0 0, 2 x 2, 2 nnz ( 50%) @@ -9123,9 +9230,9 @@ ( 0x2046186 = { rec:1 coo:1 css:1 hw:1 ic:1 fi:0 symflags: } ) DEVEL PRINT TEST: END PRINT TEST: BEGIN [QUIET] -(2 x 2)[0x580fa5b0]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' -(2 x 2)[0x580fa680]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' -(4 x 4)[0x580fa4e0]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' +(2 x 2)[0x1a78b70]{S} @ (0(0..2),0(0..2)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(2 x 2)[0x1a78c40]{S} @ (2(2..4),2(2..4)) (2 nnz, 1 nnz/r) flags 0x2144386 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 0, symflags:'' +(4 x 4)[0x1a78aa0]{S} @ (0(0..0),0(0..0)) (4 nnz, 1 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 2, symflags:'' RSB_FLAG_USE_HALFWORD_INDICES | RSB_FLAG_SORTED_INPUT | RSB_FLAG_WANT_COO_STORAGE | @@ -9143,8 +9250,8 @@ BASIC PRIMITIVES TEST: BEGIN BASIC PRIMITIVES TEST: END (SUCCESS) ADVANCED SPARSE BLAS TEST: BEGIN [limit 30.000000s] [QUIET] -Terminating testing earlier due to user timeout request: test took 30.038932 s, max allowed was 30.000000. - PASSED:35526 +Terminating testing earlier due to user timeout request: test took 30.019950 s, max allowed was 30.000000. + PASSED:1211 FAILED:0 ADVANCED SPARSE BLAS TEST: END (SUCCESS) gmake qtests -C librsbpp @@ -9153,7 +9260,7 @@ gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp' gmake[4]: Leaving directory '/build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp' ./rsbtt -if ! test -f G.mtx ; then cp -p /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/G.mtx . ; fi ; /bin/bash /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/test.sh +if ! test -f G.mtx ; then cp -p /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/G.mtx . ; fi ; /bin/sh /build/reproducible-path/librsb-1.3.0.2+dfsg/librsbpp/test.sh ++ ./rsbpp Td,s G.mtx ++ grep Z-sort ++ wc -l @@ -9171,81 +9278,81 @@ ++ wc -l + test 54 = 54 ++ ./rsbpp vTd,z G.mtx -++ grep Z-sort ++ wc -l +++ grep Z-sort + test 54 = 54 ++ ./rsbpp vvvTd,z G.mtx -++ grep Zorted ++ wc -l +++ grep Zorted + test 8 = 8 ++ ./rsbpp vvTd,z G.mtx -++ grep Z-sort ++ wc -l +++ grep Z-sort + test 54 = 54 ++ ./rsbpp vvTd,z G.mtx -++ grep Range ++ wc -l +++ grep Range + test 0 = 0 ++ ./rsbpp vvvTd,z G.mtx -++ grep Range ++ wc -l +++ grep Range + test 258 -gt 0 ++ ./rsbpp vvvTd,z S.mtx -++ grep Range ++ wc -l +++ grep Range + test 0 -eq 0 ++ ./rsbpp vvvTd,z G.mtx -++ grep Range ++ wc -l +++ grep Range + test 258 = 258 ++ OMP_NUM_THREADS=1 -++ ./rsbpp m10M10I1r1,4,8sFv ++ grep spmm- +++ ./rsbpp m10M10I1r1,4,8sFv ++ wc -l + test 9 = 9 ++ OMP_NUM_THREADS=1 ++ ./rsbpp C1000m100M100I1r1,4,8sFv -++ grep spmm- ++ wc -l +++ grep spmm- + test 9 = 9 ++ OMP_NUM_THREADS=1 ++ ./rsbpp C1000m100M100I1r1sFvtN,T -++ grep spmm- ++ wc -l +++ grep spmm- + test 3 = 3 ++ OMP_NUM_THREADS=1 ++ ./rsbpp C1000m100M100I1r1vtN,TsF -++ grep spmm- ++ wc -l +++ grep spmm- + test 2 = 2 ++ OMP_NUM_THREADS=1 ++ ./rsbpp C1000m100M100I1r0vtN,TsF -++ grep spmm- ++ wc -l +++ grep spmm- + test 0 = 0 ++ OMP_NUM_THREADS=1 ++ RSB_NUM_THREADS=1 ++ ./rsbpp vvvC1000m100M100I1r1vtN,TorsF -++ grep Recursing ++ wc -l +++ grep Recursing + test 4 = 4 ++ OMP_NUM_THREADS=2 ++ RSB_NUM_THREADS=2 ++ ./rsbpp vvvC1000m100M100I1r1vtN,TorsF -++ grep Recursing ++ wc -l +++ grep Recursing + test 4 = 4 ++ OMP_NUM_THREADS=1 ++ RSB_NUM_THREADS=1 ++ ./rsbpp vvvC1000m100M100I1r1vtN,ToRsF -++ grep Recursing ++ wc -l +++ grep Recursing + test 208 = 208 ++ OMP_NUM_THREADS=2 ++ RSB_NUM_THREADS=2 -++ grep Recursing ++ ./rsbpp vvvC1000m100M100I1r1vtN,ToRsF ++ wc -l +++ grep Recursing + test 410 = 410 echo "Skipping tests based on Google Test (not detected at configure time)" Skipping tests based on Google Test (not detected at configure time) @@ -9292,7 +9399,7 @@ 0 0 0 0 before tuning for SPMV: -(3 x 3)[0x580a2770]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'' +(3 x 3)[0x9a4f60]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x2046186 (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'' ** x: 1.1 @@ -9381,18 +9488,18 @@ BEGIN Rsb_Matrix_test_multimatrix_ms_mnrhs BEGIN -(3 x 3)[0x580d1580]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.26667: -(3 x 3)[0x580d5660]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -(3 x 3)[0x580d1580]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.10619: -(3 x 3)[0x580d4180]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -(3 x 3)[0x580d1580]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.17241: -(3 x 3)[0x580d5c90]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -(3 x 3)[0x580d1580]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' -Tuned with speedup factor of 1.03175: -(3 x 3)[0x580cdb40]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x9d2260]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1.88813: +(3 x 3)[0x9d8070]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x9d2260]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1: +(3 x 3)[0x9d2260]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x9ce550]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1.3669: +(3 x 3)[0x9dc760]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +(3 x 3)[0x9ce550]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' +Tuned with speedup factor of 1: +(3 x 3)[0x9ce550]{D} @ (0(0..0),0(0..0)) (6 nnz, 2 nnz/r) flags 0x204619e (coo:1, csr:1, hw:1, ic:1, fi:0), storage: 40, subm: 3, symflags:'LT' END OK: terminating with no allocations registered in librsb [*] tests terminated successfully ! @@ -9409,10 +9516,10 @@ ./rsbtest --no-tune --max_t 0.01 --serial | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q Building ./rsbtest --no-tune --max_t 0.01 --max 1 --nrhs 1 --beta 1 --incy 1 --incx 1 --no-trans --alpha 1 --type d --rand --serial . | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q adding ! ./rsbtest --mkl A.mkl -running on ionos6-i386 +running on i-capture-the-hostname Built without the MKL. ( ! ./rsbtest --unrecognized-option-triggers-abort ) -running on ionos6-i386 +running on i-capture-the-hostname /build/reproducible-path/librsb-1.3.0.2+dfsg/rsbtest/.libs/rsbtest: unrecognized option '--unrecognized-option-triggers-abort' unrecognized option, aborting. ( ./rsbtest --no-tune --max_t 0.01 --skip-loading-hermitian-matrices --skip-loading-unsymmetric-matrices --tune-maxt 10 --tune-maxr 10 --verbose-tuning --extra-verbose-interface --min_t 0.01 --max_t 0.01 --mintimes 1 --maxtimes 1 --verbose --skip-loading-symmetric-matrices A.mtx | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q skip ) @@ -9421,7 +9528,7 @@ ( ! ./rsbtest --no-tune --max_t 0.01 --quiet --types all --nthreads 1,2 --maxtimes 1 -+ A.mtx | dd if=/dev/stdin of=/dev/stdout bs=16M status=none iflag=fullblock | grep -q 2.threads ) ( ! ./rsbtest --no-tune --max_t 0.01 --quiet --render-only A.mtx > /dev/null ) ! ./rsbtest --no-tune --max_t 0.01 --quiet --max 1 --nrhs 1 --beta 1 --incy 1 --incx 1 --render --no-trans --alpha 1 --type all A.mtx -running on ionos6-i386 +running on i-capture-the-hostname Will not invoke autotuning routine. Benchmark will sample for at most 0.01 s Built without render support! @@ -9474,7 +9581,7 @@ gmake[4]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' gmake[5]: Entering directory '/build/reproducible-path/librsb-1.3.0.2+dfsg' /bin/mkdir -p '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/i386-linux-gnu' - /bin/bash ./libtool --mode=install /usr/bin/install -c librsb.la '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/i386-linux-gnu' + /bin/sh ./libtool --mode=install /usr/bin/install -c librsb.la '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/i386-linux-gnu' libtool: install: /usr/bin/install -c .libs/librsb.so.0.0.0 /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/i386-linux-gnu/librsb.so.0.0.0 libtool: install: (cd /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/i386-linux-gnu && { ln -s -f librsb.so.0.0.0 librsb.so.0 || { rm -f librsb.so.0 && ln -s librsb.so.0.0.0 librsb.so.0; }; }) libtool: install: (cd /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/i386-linux-gnu && { ln -s -f librsb.so.0.0.0 librsb.so || { rm -f librsb.so && ln -s librsb.so.0.0.0 librsb.so; }; }) @@ -9484,7 +9591,7 @@ libtool: install: ranlib /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/lib/i386-linux-gnu/librsb.a libtool: warning: remember to run 'libtool --finish /usr/lib/i386-linux-gnu' /bin/mkdir -p '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' - /bin/bash ./libtool --mode=install /usr/bin/install -c rsbench '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' + /bin/sh ./libtool --mode=install /usr/bin/install -c rsbench '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' libtool: warning: 'librsb.la' has not been installed in '/usr/lib/i386-linux-gnu' libtool: install: /usr/bin/install -c .libs/rsbench /build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin/rsbench /bin/mkdir -p '/build/reproducible-path/librsb-1.3.0.2+dfsg/debian/tmp/usr/bin' @@ -9612,12 +9719,12 @@ dh_gencontrol -O--no-parallel dh_md5sums -O--no-parallel dh_builddeb -O--no-parallel -dpkg-deb: building package 'librsb0-dbgsym' in '../librsb0-dbgsym_1.3.0.2+dfsg-4_i386.deb'. dpkg-deb: building package 'librsb0' in '../librsb0_1.3.0.2+dfsg-4_i386.deb'. -dpkg-deb: building package 'librsb-dev' in '../librsb-dev_1.3.0.2+dfsg-4_i386.deb'. dpkg-deb: building package 'librsb-tools' in '../librsb-tools_1.3.0.2+dfsg-4_i386.deb'. -dpkg-deb: building package 'librsb-tools-dbgsym' in '../librsb-tools-dbgsym_1.3.0.2+dfsg-4_i386.deb'. +dpkg-deb: building package 'librsb-dev' in '../librsb-dev_1.3.0.2+dfsg-4_i386.deb'. +dpkg-deb: building package 'librsb0-dbgsym' in '../librsb0-dbgsym_1.3.0.2+dfsg-4_i386.deb'. dpkg-deb: building package 'librsb-doc' in '../librsb-doc_1.3.0.2+dfsg-4_all.deb'. +dpkg-deb: building package 'librsb-tools-dbgsym' in '../librsb-tools-dbgsym_1.3.0.2+dfsg-4_i386.deb'. dpkg-genbuildinfo --build=binary -O../librsb_1.3.0.2+dfsg-4_i386.buildinfo dpkg-genchanges --build=binary -O../librsb_1.3.0.2+dfsg-4_i386.changes dpkg-genchanges: info: binary-only upload (no source code included) @@ -9626,12 +9733,14 @@ dpkg-buildpackage: info: binary-only upload (no source included) dpkg-genchanges: info: not including original source code in upload I: copying local configuration +I: user script /srv/workspace/pbuilder/27792/tmp/hooks/B01_cleanup starting +I: user script /srv/workspace/pbuilder/27792/tmp/hooks/B01_cleanup finished I: unmounting dev/ptmx filesystem I: unmounting dev/pts filesystem I: unmounting dev/shm filesystem I: unmounting proc filesystem I: unmounting sys filesystem I: cleaning the build env -I: removing directory /srv/workspace/pbuilder/5587 and its subdirectories -I: Current time: Mon Feb 24 11:45:22 -12 2025 -I: pbuilder-time-stamp: 1740440722 +I: removing directory /srv/workspace/pbuilder/27792 and its subdirectories +I: Current time: Wed Jan 24 08:09:41 +14 2024 +I: pbuilder-time-stamp: 1706033381