{"diffoscope-json-version": 1, "source1": "/srv/reproducible-results/rbuild-debian/r-b-build.iJwJmUjh/b1/pandas_2.1.4+dfsg-8_arm64.changes", "source2": "/srv/reproducible-results/rbuild-debian/r-b-build.iJwJmUjh/b2/pandas_2.1.4+dfsg-8_arm64.changes", "unified_diff": null, "details": [{"source1": "Files", "source2": "Files", "unified_diff": "@@ -1,5 +1,5 @@\n \n- 60c867b08781c76ba632688f64a2128b 10530152 doc optional python-pandas-doc_2.1.4+dfsg-8_all.deb\n+ 2be4689a4d451b81d0937bdbce5c3e94 10530128 doc optional python-pandas-doc_2.1.4+dfsg-8_all.deb\n c4e9494b6e4e00338c5e543c834a38af 63524172 debug optional python3-pandas-lib-dbgsym_2.1.4+dfsg-8_arm64.deb\n cd7b04861e8737c9881954e6ed600919 6609396 python optional python3-pandas-lib_2.1.4+dfsg-8_arm64.deb\n 98be946dcc3a490faf0f4fbc303f4b2a 3015192 python optional python3-pandas_2.1.4+dfsg-8_all.deb\n"}, {"source1": "python-pandas-doc_2.1.4+dfsg-8_all.deb", "source2": "python-pandas-doc_2.1.4+dfsg-8_all.deb", "unified_diff": null, "details": [{"source1": "file list", "source2": "file list", "unified_diff": "@@ -1,3 +1,3 @@\n -rw-r--r-- 0 0 0 4 2024-04-21 12:50:13.000000 debian-binary\n--rw-r--r-- 0 0 0 153744 2024-04-21 12:50:13.000000 control.tar.xz\n--rw-r--r-- 0 0 0 10376216 2024-04-21 12:50:13.000000 data.tar.xz\n+-rw-r--r-- 0 0 0 153748 2024-04-21 12:50:13.000000 control.tar.xz\n+-rw-r--r-- 0 0 0 10376188 2024-04-21 12:50:13.000000 data.tar.xz\n"}, {"source1": "control.tar.xz", "source2": "control.tar.xz", "unified_diff": null, "details": [{"source1": "control.tar", "source2": "control.tar", "unified_diff": null, "details": [{"source1": "./control", "source2": "./control", "unified_diff": "@@ -1,13 +1,13 @@\n Package: python-pandas-doc\n Source: pandas\n Version: 2.1.4+dfsg-8\n Architecture: all\n Maintainer: Debian Science Team \n-Installed-Size: 215695\n+Installed-Size: 215696\n Depends: libjs-sphinxdoc (>= 7.2.2), libjs-mathjax\n Suggests: python3-pandas\n Section: doc\n Priority: optional\n Multi-Arch: foreign\n Homepage: https://pandas.pydata.org/\n Description: data structures for \"relational\" or \"labeled\" data - documentation\n"}, {"source1": "./md5sums", "source2": "./md5sums", "unified_diff": null, "details": [{"source1": "./md5sums", "source2": "./md5sums", "comments": ["Files differ"], "unified_diff": null}]}]}]}, {"source1": "data.tar.xz", "source2": "data.tar.xz", "unified_diff": null, "details": [{"source1": "data.tar", "source2": "data.tar", "unified_diff": null, "details": [{"source1": "file list", "source2": "file list", "unified_diff": "@@ -5663,17 +5663,17 @@\n -rw-r--r-- 0 root (0) root (0) 22391 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.set_uuid.html\n -rw-r--r-- 0 root (0) root (0) 18903 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.template_html.html\n -rw-r--r-- 0 root (0) root (0) 18997 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.template_html_style.html\n -rw-r--r-- 0 root (0) root (0) 19001 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.template_html_table.html\n -rw-r--r-- 0 root (0) root (0) 18960 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.template_latex.html\n -rw-r--r-- 0 root (0) root (0) 18913 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.template_string.html\n -rw-r--r-- 0 root (0) root (0) 34113 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.text_gradient.html\n--rw-r--r-- 0 root (0) root (0) 33008 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.to_excel.html\n--rw-r--r-- 0 root (0) root (0) 29383 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.to_html.html\n--rw-r--r-- 0 root (0) root (0) 75382 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.to_latex.html\n+-rw-r--r-- 0 root (0) root (0) 33327 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.to_excel.html\n+-rw-r--r-- 0 root (0) root (0) 29702 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.to_html.html\n+-rw-r--r-- 0 root (0) root (0) 75701 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.to_latex.html\n -rw-r--r-- 0 root (0) root (0) 24503 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.to_string.html\n -rw-r--r-- 0 root (0) root (0) 23165 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.use.html\n -rw-r--r-- 0 root (0) root (0) 24110 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.json.build_table_schema.html\n -rw-r--r-- 0 root (0) root (0) 254 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.json.json_normalize.html\n -rw-r--r-- 0 root (0) root (0) 21790 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.stata.StataReader.data_label.html\n -rw-r--r-- 0 root (0) root (0) 22790 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.stata.StataReader.value_labels.html\n -rw-r--r-- 0 root (0) root (0) 22821 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.stata.StataReader.variable_labels.html\n@@ -6536,15 +6536,15 @@\n -rw-r--r-- 0 root (0) root (0) 198274 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/series.html\n -rw-r--r-- 0 root (0) root (0) 38687 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/style.html\n -rw-r--r-- 0 root (0) root (0) 38825 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/testing.html\n -rw-r--r-- 0 root (0) root (0) 43429 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reference/window.html\n -rw-r--r-- 0 root (0) root (0) 244 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/release.html\n -rw-r--r-- 0 root (0) root (0) 269 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/reshaping.html\n -rw-r--r-- 0 root (0) root (0) 7354 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/search.html\n--rw-r--r-- 0 root (0) root (0) 2482606 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/searchindex.js\n+-rw-r--r-- 0 root (0) root (0) 2482556 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/searchindex.js\n -rw-r--r-- 0 root (0) root (0) 259 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/sparse.html\n -rw-r--r-- 0 root (0) root (0) 244 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/style.html\n -rw-r--r-- 0 root (0) root (0) 255 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/text.html\n -rw-r--r-- 0 root (0) root (0) 256 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/timedeltas.html\n -rw-r--r-- 0 root (0) root (0) 277 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/timeseries.html\n -rw-r--r-- 0 root (0) root (0) 272 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/tutorials.html\n drwxr-xr-x 0 root (0) root (0) 0 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/\n@@ -6553,44 +6553,44 @@\n -rw-r--r-- 0 root (0) root (0) 429343 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/basics.html\n -rw-r--r-- 0 root (0) root (0) 26041 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/boolean.html\n -rw-r--r-- 0 root (0) root (0) 207764 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/categorical.html\n -rw-r--r-- 0 root (0) root (0) 7742 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/cookbook.html\n -rw-r--r-- 0 root (0) root (0) 43487 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/copy_on_write.html\n -rw-r--r-- 0 root (0) root (0) 150222 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/dsintro.html\n -rw-r--r-- 0 root (0) root (0) 69858 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/duplicates.html\n--rw-r--r-- 0 root (0) root (0) 105363 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/enhancingperf.html\n+-rw-r--r-- 0 root (0) root (0) 105352 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/enhancingperf.html\n -rw-r--r-- 0 root (0) root (0) 98893 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/gotchas.html\n -rw-r--r-- 0 root (0) root (0) 287735 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/groupby.html\n -rw-r--r-- 0 root (0) root (0) 49149 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/index.html\n -rw-r--r-- 0 root (0) root (0) 375555 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/indexing.html\n -rw-r--r-- 0 root (0) root (0) 31232 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/integer_na.html\n -rw-r--r-- 0 root (0) root (0) 1117010 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/io.html\n -rw-r--r-- 0 root (0) root (0) 207719 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/merging.html\n -rw-r--r-- 0 root (0) root (0) 153800 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/missing_data.html\n -rw-r--r-- 0 root (0) root (0) 101227 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/options.html\n -rw-r--r-- 0 root (0) root (0) 136937 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/pyarrow.html\n -rw-r--r-- 0 root (0) root (0) 152700 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/reshaping.html\n--rw-r--r-- 0 root (0) root (0) 158229 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/scale.html\n+-rw-r--r-- 0 root (0) root (0) 158230 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/scale.html\n -rw-r--r-- 0 root (0) root (0) 55502 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/sparse.html\n -rw-r--r-- 0 root (0) root (0) 694833 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/style.html\n--rw-r--r-- 0 root (0) root (0) 87920 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/style.ipynb.gz\n+-rw-r--r-- 0 root (0) root (0) 87896 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/style.ipynb.gz\n -rw-r--r-- 0 root (0) root (0) 155009 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/text.html\n -rw-r--r-- 0 root (0) root (0) 90618 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/timedeltas.html\n -rw-r--r-- 0 root (0) root (0) 474930 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/timeseries.html\n -rw-r--r-- 0 root (0) root (0) 192427 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/visualization.html\n -rw-r--r-- 0 root (0) root (0) 131402 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/user_guide/window.html\n -rw-r--r-- 0 root (0) root (0) 270 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/visualization.html\n drwxr-xr-x 0 root (0) root (0) 0 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/\n -rw-r--r-- 0 root (0) root (0) 94736 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/index.html\n -rw-r--r-- 0 root (0) root (0) 8504 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/index.html.gz\n -rw-r--r-- 0 root (0) root (0) 73511 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/v0.10.0.html\n -rw-r--r-- 0 root (0) root (0) 55915 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/v0.10.1.html\n -rw-r--r-- 0 root (0) root (0) 78522 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/v0.11.0.html\n -rw-r--r-- 0 root (0) root (0) 92042 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/v0.12.0.html\n--rw-r--r-- 0 root (0) root (0) 222389 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/v0.13.0.html\n+-rw-r--r-- 0 root (0) root (0) 222390 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/v0.13.0.html\n -rw-r--r-- 0 root (0) root (0) 78511 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/v0.13.1.html\n -rw-r--r-- 0 root (0) root (0) 229509 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/v0.14.0.html\n -rw-r--r-- 0 root (0) root (0) 71498 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/v0.14.1.html\n -rw-r--r-- 0 root (0) root (0) 236679 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/v0.15.0.html\n -rw-r--r-- 0 root (0) root (0) 57437 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/v0.15.1.html\n -rw-r--r-- 0 root (0) root (0) 65187 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/v0.15.2.html\n -rw-r--r-- 0 root (0) root (0) 133329 2024-04-21 12:50:13.000000 ./usr/share/doc/python-pandas-doc/html/whatsnew/v0.16.0.html\n"}, {"source1": "./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.to_excel.html", "source2": "./usr/share/doc/python-pandas-doc/html/reference/api/pandas.io.formats.style.Styler.to_excel.html", "unified_diff": "@@ -152,287 +152,352 @@\n
\n
In [9]: %timeit df.apply(lambda x: integrate_f_plain(x["a"], x["b"], x["N"]), axis=1)\n-154 ms +- 22 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+86.5 ms +- 29.4 us per loop (mean +- std. dev. of 7 runs, 10 loops each)\n 
\n
\n

This has improved the performance compared to the pure Python approach by one-third.

\n \n
\n

Declaring C types\u00b6

\n

We can annotate the function variables and return types as well as use cdef\n@@ -595,36 +595,36 @@\n ....: for i in range(N):\n ....: s += f_typed(a + i * dx)\n ....: return s * dx\n ....: \n \n \n

In [11]: %timeit df.apply(lambda x: integrate_f_typed(x["a"], x["b"], x["N"]), axis=1)\n-20.2 ms +- 1.33 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+10 ms +- 5.07 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n 
\n
\n

Annotating the functions with C types yields an over ten times performance improvement compared to\n the original Python implementation.

\n
\n
\n

Using ndarray\u00b6

\n

When re-profiling, time is spent creating a Series from each row, and calling __getitem__ from both\n the index and the series (three times for each row). These Python function calls are expensive and\n can be improved by passing an np.ndarray.

\n
In [12]: %prun -l 4 df.apply(lambda x: integrate_f_typed(x["a"], x["b"], x["N"]), axis=1)\n-         52540 function calls (52522 primitive calls) in 0.053 seconds\n+         52540 function calls (52522 primitive calls) in 0.027 seconds\n \n    Ordered by: internal time\n    List reduced from 165 to 4 due to restriction <4>\n \n    ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n-     3000    0.013    0.000    0.035    0.000 series.py:1016(__getitem__)\n-     3000    0.007    0.000    0.016    0.000 series.py:1139(_get_value)\n-     1000    0.006    0.000    0.041    0.000 <string>:1(<lambda>)\n-    16094    0.006    0.000    0.007    0.000 {built-in method builtins.isinstance}\n+     3000    0.005    0.000    0.018    0.000 series.py:1016(__getitem__)\n+     3000    0.003    0.000    0.008    0.000 series.py:1139(_get_value)\n+     3000    0.002    0.000    0.004    0.000 indexing.py:2678(check_dict_or_set_indexers)\n+     3000    0.002    0.000    0.003    0.000 base.py:3763(get_loc)\n 
\n
\n
In [13]: %%cython\n    ....: cimport numpy as np\n    ....: import numpy as np\n    ....: cdef double f_typed(double x) except? -2:\n    ....:     return x * (x - 1)\n@@ -659,32 +659,32 @@\n 
\n

This implementation creates an array of zeros and inserts the result\n of integrate_f_typed applied over each row. Looping over an ndarray is faster\n in Cython than looping over a Series object.

\n

Since apply_integrate_f is typed to accept an np.ndarray, Series.to_numpy()\n calls are needed to utilize this function.

\n
In [14]: %timeit apply_integrate_f(df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy())\n-2.19 ms +- 318 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n+1.17 ms +- 377 ns per loop (mean +- std. dev. of 7 runs, 1,000 loops each)\n 
\n
\n

Performance has improved from the prior implementation by almost ten times.

\n
\n
\n

Disabling compiler directives\u00b6

\n

The majority of the time is now spent in apply_integrate_f. Disabling Cython\u2019s boundscheck\n and wraparound checks can yield more performance.

\n
In [15]: %prun -l 4 apply_integrate_f(df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy())\n-         75 function calls in 0.005 seconds\n+         75 function calls in 0.001 seconds\n \n    Ordered by: internal time\n    List reduced from 20 to 4 due to restriction <4>\n \n    ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n-        1    0.005    0.005    0.005    0.005 <string>:1(<module>)\n-        1    0.000    0.000    0.005    0.005 {built-in method builtins.exec}\n+        1    0.001    0.001    0.001    0.001 <string>:1(<module>)\n+        1    0.000    0.000    0.001    0.001 {built-in method builtins.exec}\n         3    0.000    0.000    0.000    0.000 frame.py:3853(__getitem__)\n         3    0.000    0.000    0.000    0.000 base.py:541(to_numpy)\n 
\n
\n
In [16]: %%cython\n    ....: cimport cython\n    ....: cimport numpy as np\n@@ -719,15 +719,15 @@\n                  from /build/reproducible-path/pandas-2.1.4+dfsg/buildtmp/.cache/ipython/cython/_cython_magic_dbd2f2be48d2a3825f8cbd16b561de5210461988.c:1251:\n /usr/lib/python3/dist-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h:17:2: warning: #warning "Using deprecated NumPy API, disable it with " "#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" [-Wcpp]\n    17 | #warning "Using deprecated NumPy API, disable it with " \\\n       |  ^~~~~~~\n 
\n
\n
In [17]: %timeit apply_integrate_f_wrap(df["a"].to_numpy(), df["b"].to_numpy(), df["N"].to_numpy())\n-2.18 ms +- 53.3 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n+818 us +- 105 ns per loop (mean +- std. dev. of 7 runs, 1,000 loops each)\n 
\n
\n

However, a loop indexer i accessing an invalid location in an array would cause a segfault because memory access isn\u2019t checked.\n For more about boundscheck and wraparound, see the Cython docs on\n compiler directives.

\n
\n \n@@ -1085,19 +1085,19 @@\n compared to standard Python syntax for large DataFrame. This engine requires the\n optional dependency numexpr to be installed.

\n

The 'python' engine is generally not useful except for testing\n other evaluation engines against it. You will achieve no performance\n benefits using eval() with engine='python' and may\n incur a performance hit.

\n
In [40]: %timeit df1 + df2 + df3 + df4\n-27.1 ms +- 1.69 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+8.29 ms +- 29.4 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n 
\n
\n
In [41]: %timeit pd.eval("df1 + df2 + df3 + df4", engine="python")\n-29.1 ms +- 1.04 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+11.4 ms +- 22.3 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n 
\n
\n \n
\n

The DataFrame.eval() method\u00b6

\n

In addition to the top level pandas.eval() function you can also\n evaluate an expression in the \u201ccontext\u201d of a DataFrame.

\n@@ -1212,39 +1212,39 @@\n
In [58]: nrows, ncols = 20000, 100\n \n In [59]: df1, df2, df3, df4 = [pd.DataFrame(np.random.randn(nrows, ncols)) for _ in range(4)]\n 
\n
\n

DataFrame arithmetic:

\n
In [60]: %timeit df1 + df2 + df3 + df4\n-28.3 ms +- 2.33 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+8.33 ms +- 37.6 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n 
\n
\n
In [61]: %timeit pd.eval("df1 + df2 + df3 + df4")\n-13.4 ms +- 1.69 ms per loop (mean +- std. dev. of 7 runs, 100 loops each)\n+7.38 ms +- 18.7 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n 
\n
\n

DataFrame comparison:

\n
In [62]: %timeit (df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)\n-30.6 ms +- 5.23 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+10.6 ms +- 23.6 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n 
\n
\n
In [63]: %timeit pd.eval("(df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)")\n-26.3 ms +- 3.92 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+14.7 ms +- 13.5 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n 
\n
\n

DataFrame arithmetic with unaligned axes.

\n
In [64]: s = pd.Series(np.random.randn(50))\n \n In [65]: %timeit df1 + df2 + df3 + df4 + s\n-35.3 ms +- 2.82 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+19.2 ms +- 65 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n 
\n
\n
In [66]: %timeit pd.eval("df1 + df2 + df3 + df4 + s")\n-15.4 ms +- 878 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n+11 ms +- 26.3 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n 
\n
\n
\n

Note

\n

Operations such as

\n
1 and 2  # would parse to 1 & 2, but should evaluate to 2\n 3 or 4  # would parse to 3 | 4, but should evaluate to 3\n", "details": [{"source1": "html2text {}", "source2": "html2text {}", "unified_diff": "@@ -113,33 +113,32 @@\n    ...:     dx = (b - a) / N\n    ...:     for i in range(N):\n    ...:         s += f(a + i * dx)\n    ...:     return s * dx\n    ...:\n We achieve our result by using _\bD_\ba_\bt_\ba_\bF_\br_\ba_\bm_\be_\b._\ba_\bp_\bp_\bl_\by_\b(_\b) (row-wise):\n In [5]: %timeit df.apply(lambda x: integrate_f(x[\"a\"], x[\"b\"], x[\"N\"]), axis=1)\n-185 ms +- 27.3 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+96.9 ms +- 74.6 us per loop (mean +- std. dev. of 7 runs, 10 loops each)\n Let\u2019s take a look and see where the time is spent during this operation using\n the _\bp_\br_\bu_\bn_\b _\bi_\bp_\by_\bt_\bh_\bo_\bn_\b _\bm_\ba_\bg_\bi_\bc_\b _\bf_\bu_\bn_\bc_\bt_\bi_\bo_\bn:\n # most time consuming 4 calls\n In [6]: %prun -l 4 df.apply(lambda x: integrate_f(x[\"a\"], x[\"b\"], x[\"N\"]),\n axis=1)  # noqa E999\n-         605963 function calls (605945 primitive calls) in 0.724 seconds\n+         605963 function calls (605945 primitive calls) in 0.304 seconds\n \n    Ordered by: internal time\n    List reduced from 167 to 4 due to restriction <4>\n \n    ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n-     1000    0.500    0.001    0.664    0.001 :1\n+     1000    0.197    0.000    0.276    0.000 :1\n (integrate_f)\n-   552423    0.163    0.000    0.163    0.000 :1\n+   552423    0.079    0.000    0.079    0.000 :1\n (f)\n-    16094    0.015    0.000    0.016    0.000 {built-in method\n-builtins.isinstance}\n-     1001    0.011    0.000    0.019    0.000 apply.py:1085(series_generator)\n+     3000    0.005    0.000    0.018    0.000 series.py:1016(__getitem__)\n+     3000    0.003    0.000    0.008    0.000 series.py:1139(_get_value)\n By far the majority of time is spend inside either integrate_f or f, hence\n we\u2019ll concentrate our efforts cythonizing these two functions.\n *\b**\b**\b**\b* P\bPl\bla\bai\bin\bn C\bCy\byt\bth\bho\bon\bn_\b?\b\u00b6 *\b**\b**\b**\b*\n First we\u2019re going to need to import the Cython magic function to IPython:\n In [7]: %load_ext Cython\n Now, let\u2019s simply copy our functions over to Cython:\n In [8]: %%cython\n@@ -150,15 +149,15 @@\n    ...:     dx = (b - a) / N\n    ...:     for i in range(N):\n    ...:         s += f_plain(a + i * dx)\n    ...:     return s * dx\n    ...:\n In [9]: %timeit df.apply(lambda x: integrate_f_plain(x[\"a\"], x[\"b\"], x[\"N\"]),\n axis=1)\n-154 ms +- 22 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+86.5 ms +- 29.4 us per loop (mean +- std. dev. of 7 runs, 10 loops each)\n This has improved the performance compared to the pure Python approach by one-\n third.\n *\b**\b**\b**\b* D\bDe\bec\bcl\bla\bar\bri\bin\bng\bg C\bC t\bty\byp\bpe\bes\bs_\b?\b\u00b6 *\b**\b**\b**\b*\n We can annotate the function variables and return types as well as use cdef and\n cpdef to improve performance:\n In [10]: %%cython\n    ....: cdef double f_typed(double x) except? -2:\n@@ -170,35 +169,35 @@\n    ....:     dx = (b - a) / N\n    ....:     for i in range(N):\n    ....:         s += f_typed(a + i * dx)\n    ....:     return s * dx\n    ....:\n In [11]: %timeit df.apply(lambda x: integrate_f_typed(x[\"a\"], x[\"b\"], x[\"N\"]),\n axis=1)\n-20.2 ms +- 1.33 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+10 ms +- 5.07 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n Annotating the functions with C types yields an over ten times performance\n improvement compared to the original Python implementation.\n *\b**\b**\b**\b* U\bUs\bsi\bin\bng\bg n\bnd\bda\bar\brr\bra\bay\by_\b?\b\u00b6 *\b**\b**\b**\b*\n When re-profiling, time is spent creating a _\bS_\be_\br_\bi_\be_\bs from each row, and calling\n __getitem__ from both the index and the series (three times for each row).\n These Python function calls are expensive and can be improved by passing an\n np.ndarray.\n In [12]: %prun -l 4 df.apply(lambda x: integrate_f_typed(x[\"a\"], x[\"b\"], x\n [\"N\"]), axis=1)\n-         52540 function calls (52522 primitive calls) in 0.053 seconds\n+         52540 function calls (52522 primitive calls) in 0.027 seconds\n \n    Ordered by: internal time\n    List reduced from 165 to 4 due to restriction <4>\n \n    ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n-     3000    0.013    0.000    0.035    0.000 series.py:1016(__getitem__)\n-     3000    0.007    0.000    0.016    0.000 series.py:1139(_get_value)\n-     1000    0.006    0.000    0.041    0.000 :1()\n-    16094    0.006    0.000    0.007    0.000 {built-in method\n-builtins.isinstance}\n+     3000    0.005    0.000    0.018    0.000 series.py:1016(__getitem__)\n+     3000    0.003    0.000    0.008    0.000 series.py:1139(_get_value)\n+     3000    0.002    0.000    0.004    0.000 indexing.py:2678\n+(check_dict_or_set_indexers)\n+     3000    0.002    0.000    0.003    0.000 base.py:3763(get_loc)\n In [13]: %%cython\n    ....: cimport numpy as np\n    ....: import numpy as np\n    ....: cdef double f_typed(double x) except? -2:\n    ....:     return x * (x - 1)\n    ....: cpdef double integrate_f_typed(double a, double b, int N):\n    ....:     cdef int i\n@@ -239,29 +238,29 @@\n This implementation creates an array of zeros and inserts the result of\n integrate_f_typed applied over each row. Looping over an ndarray is faster in\n Cython than looping over a _\bS_\be_\br_\bi_\be_\bs object.\n Since apply_integrate_f is typed to accept an np.ndarray, _\bS_\be_\br_\bi_\be_\bs_\b._\bt_\bo_\b__\bn_\bu_\bm_\bp_\by_\b(_\b)\n calls are needed to utilize this function.\n In [14]: %timeit apply_integrate_f(df[\"a\"].to_numpy(), df[\"b\"].to_numpy(), df\n [\"N\"].to_numpy())\n-2.19 ms +- 318 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n+1.17 ms +- 377 ns per loop (mean +- std. dev. of 7 runs, 1,000 loops each)\n Performance has improved from the prior implementation by almost ten times.\n *\b**\b**\b**\b* D\bDi\bis\bsa\bab\bbl\bli\bin\bng\bg c\bco\bom\bmp\bpi\bil\ble\ber\br d\bdi\bir\bre\bec\bct\bti\biv\bve\bes\bs_\b?\b\u00b6 *\b**\b**\b**\b*\n The majority of the time is now spent in apply_integrate_f. Disabling Cython\u2019s\n boundscheck and wraparound checks can yield more performance.\n In [15]: %prun -l 4 apply_integrate_f(df[\"a\"].to_numpy(), df[\"b\"].to_numpy(),\n df[\"N\"].to_numpy())\n-         75 function calls in 0.005 seconds\n+         75 function calls in 0.001 seconds\n \n    Ordered by: internal time\n    List reduced from 20 to 4 due to restriction <4>\n \n    ncalls  tottime  percall  cumtime  percall filename:lineno(function)\n-        1    0.005    0.005    0.005    0.005 :1()\n-        1    0.000    0.000    0.005    0.005 {built-in method builtins.exec}\n+        1    0.001    0.001    0.001    0.001 :1()\n+        1    0.000    0.000    0.001    0.001 {built-in method builtins.exec}\n         3    0.000    0.000    0.000    0.000 frame.py:3853(__getitem__)\n         3    0.000    0.000    0.000    0.000 base.py:541(to_numpy)\n In [16]: %%cython\n    ....: cimport cython\n    ....: cimport numpy as np\n    ....: import numpy as np\n    ....: cdef np.float64_t f_typed(np.float64_t x) except? -2:\n@@ -301,15 +300,15 @@\n /usr/lib/python3/dist-packages/numpy/core/include/numpy/\n npy_1_7_deprecated_api.h:17:2: warning: #warning \"Using deprecated NumPy API,\n disable it with \" \"#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION\" [-Wcpp]\n    17 | #warning \"Using deprecated NumPy API, disable it with \" \\\n       |  ^~~~~~~\n In [17]: %timeit apply_integrate_f_wrap(df[\"a\"].to_numpy(), df[\"b\"].to_numpy(),\n df[\"N\"].to_numpy())\n-2.18 ms +- 53.3 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n+818 us +- 105 ns per loop (mean +- std. dev. of 7 runs, 1,000 loops each)\n However, a loop indexer i accessing an invalid location in an array would cause\n a segfault because memory access isn\u2019t checked. For more about boundscheck and\n wraparound, see the Cython docs on _\bc_\bo_\bm_\bp_\bi_\bl_\be_\br_\b _\bd_\bi_\br_\be_\bc_\bt_\bi_\bv_\be_\bs.\n *\b**\b**\b**\b**\b* N\bNu\bum\bmb\bba\ba (\b(J\bJI\bIT\bT c\bco\bom\bmp\bpi\bil\bla\bat\bti\bio\bon\bn)\b)_\b?\b\u00b6 *\b**\b**\b**\b**\b*\n An alternative to statically compiling Cython code is to use a dynamic just-in-\n time (JIT) compiler with _\bN_\bu_\bm_\bb_\ba.\n Numba allows you to write a pure Python function which can be JIT compiled to\n@@ -612,17 +611,17 @@\n The 'numexpr' engine is the more performant engine that can yield performance\n improvements compared to standard Python syntax for large _\bD_\ba_\bt_\ba_\bF_\br_\ba_\bm_\be. This\n engine requires the optional dependency numexpr to be installed.\n The 'python' engine is generally n\bno\bot\bt useful except for testing other evaluation\n engines against it. You will achieve n\bno\bo performance benefits using _\be_\bv_\ba_\bl_\b(_\b) with\n engine='python' and may incur a performance hit.\n In [40]: %timeit df1 + df2 + df3 + df4\n-27.1 ms +- 1.69 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+8.29 ms +- 29.4 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n In [41]: %timeit pd.eval(\"df1 + df2 + df3 + df4\", engine=\"python\")\n-29.1 ms +- 1.04 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+11.4 ms +- 22.3 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n *\b**\b**\b**\b* T\bTh\bhe\be _\bD\bD_\ba\ba_\bt\bt_\ba\ba_\bF\bF_\br\br_\ba\ba_\bm\bm_\be\be_\b.\b._\be\be_\bv\bv_\ba\ba_\bl\bl_\b(\b(_\b)\b) m\bme\bet\bth\bho\bod\bd_\b?\b\u00b6 *\b**\b**\b**\b*\n In addition to the top level _\bp_\ba_\bn_\bd_\ba_\bs_\b._\be_\bv_\ba_\bl_\b(_\b) function you can also evaluate an\n expression in the \u201ccontext\u201d of a _\bD_\ba_\bt_\ba_\bF_\br_\ba_\bm_\be.\n In [42]: df = pd.DataFrame(np.random.randn(5, 2), columns=[\"a\", \"b\"])\n \n In [43]: df.eval(\"a + b\")\n Out[43]:\n@@ -719,29 +718,29 @@\n _\bp_\ba_\bn_\bd_\ba_\bs_\b._\be_\bv_\ba_\bl_\b(_\b) works well with expressions containing large arrays.\n In [58]: nrows, ncols = 20000, 100\n \n In [59]: df1, df2, df3, df4 = [pd.DataFrame(np.random.randn(nrows, ncols)) for\n _ in range(4)]\n _\bD_\ba_\bt_\ba_\bF_\br_\ba_\bm_\be arithmetic:\n In [60]: %timeit df1 + df2 + df3 + df4\n-28.3 ms +- 2.33 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+8.33 ms +- 37.6 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n In [61]: %timeit pd.eval(\"df1 + df2 + df3 + df4\")\n-13.4 ms +- 1.69 ms per loop (mean +- std. dev. of 7 runs, 100 loops each)\n+7.38 ms +- 18.7 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n _\bD_\ba_\bt_\ba_\bF_\br_\ba_\bm_\be comparison:\n In [62]: %timeit (df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)\n-30.6 ms +- 5.23 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+10.6 ms +- 23.6 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n In [63]: %timeit pd.eval(\"(df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)\")\n-26.3 ms +- 3.92 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+14.7 ms +- 13.5 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n _\bD_\ba_\bt_\ba_\bF_\br_\ba_\bm_\be arithmetic with unaligned axes.\n In [64]: s = pd.Series(np.random.randn(50))\n \n In [65]: %timeit df1 + df2 + df3 + df4 + s\n-35.3 ms +- 2.82 ms per loop (mean +- std. dev. of 7 runs, 10 loops each)\n+19.2 ms +- 65 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n In [66]: %timeit pd.eval(\"df1 + df2 + df3 + df4 + s\")\n-15.4 ms +- 878 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n+11 ms +- 26.3 us per loop (mean +- std. dev. of 7 runs, 100 loops each)\n Note\n Operations such as\n 1 and 2  # would parse to 1 & 2, but should evaluate to 2\n 3 or 4  # would parse to 3 | 4, but should evaluate to 3\n ~1  # this is okay, but slower when using eval\n should be performed in Python. An exception will be raised if you try to\n perform any boolean/bitwise operations with scalar operands that are not of\n"}]}, {"source1": "./usr/share/doc/python-pandas-doc/html/user_guide/scale.html", "source2": "./usr/share/doc/python-pandas-doc/html/user_guide/scale.html", "unified_diff": "@@ -889,16 +889,16 @@\n    ....: files = pathlib.Path("data/timeseries/").glob("ts*.parquet")\n    ....: counts = pd.Series(dtype=int)\n    ....: for path in files:\n    ....:     df = pd.read_parquet(path)\n    ....:     counts = counts.add(df["name"].value_counts(), fill_value=0)\n    ....: counts.astype(int)\n    ....: \n-CPU times: user 695 us, sys: 46 us, total: 741 us\n-Wall time: 750 us\n+CPU times: user 490 us, sys: 135 us, total: 625 us\n+Wall time: 633 us\n Out[32]: Series([], dtype: int64)\n 
\n
\n

Some readers, like pandas.read_csv(), offer parameters to control the\n chunksize when reading a single file.

\n

Manually chunking is an OK option for workflows that don\u2019t\n require too sophisticated of operations. Some operations, like pandas.DataFrame.groupby(), are\n", "details": [{"source1": "html2text {}", "source2": "html2text {}", "unified_diff": "@@ -600,16 +600,16 @@\n ....: files = pathlib.Path(\"data/timeseries/\").glob(\"ts*.parquet\")\n ....: counts = pd.Series(dtype=int)\n ....: for path in files:\n ....: df = pd.read_parquet(path)\n ....: counts = counts.add(df[\"name\"].value_counts(), fill_value=0)\n ....: counts.astype(int)\n ....:\n-CPU times: user 695 us, sys: 46 us, total: 741 us\n-Wall time: 750 us\n+CPU times: user 490 us, sys: 135 us, total: 625 us\n+Wall time: 633 us\n Out[32]: Series([], dtype: int64)\n Some readers, like _\bp_\ba_\bn_\bd_\ba_\bs_\b._\br_\be_\ba_\bd_\b__\bc_\bs_\bv_\b(_\b), offer parameters to control the chunksize\n when reading a single file.\n Manually chunking is an OK option for workflows that don\u2019t require too\n sophisticated of operations. Some operations, like _\bp_\ba_\bn_\bd_\ba_\bs_\b._\bD_\ba_\bt_\ba_\bF_\br_\ba_\bm_\be_\b._\bg_\br_\bo_\bu_\bp_\bb_\by_\b(_\b),\n are much harder to do chunkwise. In these cases, you may be better switching to\n a different library that implements these out-of-core algorithms for you.\n"}]}, {"source1": "./usr/share/doc/python-pandas-doc/html/user_guide/style.ipynb.gz", "source2": "./usr/share/doc/python-pandas-doc/html/user_guide/style.ipynb.gz", "unified_diff": null, "details": [{"source1": "style.ipynb", "source2": "style.ipynb", "unified_diff": null, "details": [{"source1": "Pretty-printed", "source2": "Pretty-printed", "comments": ["Similarity: 0.9985610875706213%", "Differences: {\"'cells'\": \"{1: {'metadata': {'execution': {'iopub.execute_input': '2024-05-06T05:03:50.282603Z', \"", " \"'iopub.status.busy': '2024-05-06T05:03:50.282016Z', 'iopub.status.idle': \"", " \"'2024-05-06T05:03:50.783818Z', 'shell.execute_reply': \"", " \"'2024-05-06T05:03:50.782955Z'}}}, 3: {'metadata': {'execution': \"", " \"{'iopub.execute_input': '2024-05-06T05:03:50.789304Z', 'iopub.status.busy': \"", " \"'2024-05-06T05:03:50.788918Z', 'iopub.status.idle': '2024-05-06T05:03:5 [\u2026]"], "unified_diff": "@@ -39,18 +39,18 @@\n ]\n },\n {\n \"cell_type\": \"code\",\n \"execution_count\": 1,\n \"metadata\": {\n \"execution\": {\n- \"iopub.execute_input\": \"2025-06-08T08:48:55.062107Z\",\n- \"iopub.status.busy\": \"2025-06-08T08:48:55.061773Z\",\n- \"iopub.status.idle\": \"2025-06-08T08:48:55.915066Z\",\n- \"shell.execute_reply\": \"2025-06-08T08:48:55.914203Z\"\n+ \"iopub.execute_input\": \"2024-05-06T05:03:50.282603Z\",\n+ \"iopub.status.busy\": \"2024-05-06T05:03:50.282016Z\",\n+ \"iopub.status.idle\": \"2024-05-06T05:03:50.783818Z\",\n+ \"shell.execute_reply\": \"2024-05-06T05:03:50.782955Z\"\n },\n \"nbsphinx\": \"hidden\"\n },\n \"outputs\": [],\n \"source\": [\n \"import matplotlib.pyplot\\n\",\n \"# We have this here to trigger matplotlib's font cache stuff.\\n\",\n@@ -77,36 +77,36 @@\n ]\n },\n {\n \"cell_type\": \"code\",\n \"execution_count\": 2,\n \"metadata\": {\n \"execution\": {\n- \"iopub.execute_input\": \"2025-06-08T08:48:55.919677Z\",\n- \"iopub.status.busy\": \"2025-06-08T08:48:55.919258Z\",\n- \"iopub.status.idle\": \"2025-06-08T08:48:56.503053Z\",\n- \"shell.execute_reply\": \"2025-06-08T08:48:56.502205Z\"\n+ \"iopub.execute_input\": \"2024-05-06T05:03:50.789304Z\",\n+ \"iopub.status.busy\": \"2024-05-06T05:03:50.788918Z\",\n+ \"iopub.status.idle\": \"2024-05-06T05:03:51.023677Z\",\n+ \"shell.execute_reply\": \"2024-05-06T05:03:51.022556Z\"\n }\n },\n \"outputs\": [],\n \"source\": [\n \"import pandas as pd\\n\",\n \"import numpy as np\\n\",\n \"import matplotlib as mpl\\n\"\n ]\n },\n {\n \"cell_type\": \"code\",\n \"execution_count\": 3,\n \"metadata\": {\n \"execution\": {\n- \"iopub.execute_input\": \"2025-06-08T08:48:56.511528Z\",\n- \"iopub.status.busy\": \"2025-06-08T08:48:56.511080Z\",\n- \"iopub.status.idle\": \"2025-06-08T08:48:56.823101Z\",\n- \"shell.execute_reply\": \"2025-06-08T08:48:56.822200Z\"\n+ \"iopub.execute_input\": \"2024-05-06T05:03:51.028692Z\",\n+ \"iopub.status.busy\": \"2024-05-06T05:03:51.028294Z\",\n+ \"iopub.status.idle\": \"2024-05-06T05:03:51.157146Z\",\n+ \"shell.execute_reply\": \"2024-05-06T05:03:51.156084Z\"\n },\n \"nbsphinx\": \"hidden\"\n },\n \"outputs\": [],\n \"source\": [\n \"# For reproducibility - this doesn't respect uuid_len or positionally-passed uuid but the places here that use that coincidentally bypass this anyway\\n\",\n \"from pandas.io.formats.style import Styler\\n\",\n@@ -123,18 +123,18 @@\n ]\n },\n {\n \"cell_type\": \"code\",\n \"execution_count\": 4,\n \"metadata\": {\n \"execution\": {\n- \"iopub.execute_input\": \"2025-06-08T08:48:56.831520Z\",\n- \"iopub.status.busy\": \"2025-06-08T08:48:56.831080Z\",\n- \"iopub.status.idle\": \"2025-06-08T08:48:56.855005Z\",\n- \"shell.execute_reply\": \"2025-06-08T08:48:56.854179Z\"\n+ \"iopub.execute_input\": \"2024-05-06T05:03:51.162164Z\",\n+ \"iopub.status.busy\": \"2024-05-06T05:03:51.161758Z\",\n+ \"iopub.status.idle\": \"2024-05-06T05:03:51.173976Z\",\n+ \"shell.execute_reply\": \"2024-05-06T05:03:51.173152Z\"\n }\n },\n \"outputs\": [\n {\n \"data\": {\n \"text/html\": [\n \"