diff --git a/docs/changelog.md b/docs/changelog.md index cad8da54..9046fd18 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,6 +1,13 @@ ITables ChangeLog ================= +1.5.3 (2023-03-??) +------------------ + +**Added** +- We have added support for LaTeX/MathJax equations ([#154](https://github.com/mwouts/itables/issues/154)) + + 1.5.2 (2023-03-26) ------------------ diff --git a/docs/polars_dataframes.md b/docs/polars_dataframes.md index 064ac2fe..deeeb236 100644 --- a/docs/polars_dataframes.md +++ b/docs/polars_dataframes.md @@ -150,3 +150,9 @@ opt.warn_on_int_to_str_conversion = False show(dict_of_test_dfs["big_integers"]) ``` + +## maths + +```{code-cell} +show(dict_of_test_dfs["maths"]) +``` diff --git a/docs/sample_dataframes.md b/docs/sample_dataframes.md index 57940a98..fe93529e 100644 --- a/docs/sample_dataframes.md +++ b/docs/sample_dataframes.md @@ -21,7 +21,7 @@ from itables import init_notebook_mode, show from itables.sample_dfs import get_dict_of_test_dfs dict_of_test_dfs = get_dict_of_test_dfs() -init_notebook_mode(all_interactive=True) +init_notebook_mode(all_interactive=True, connected=True) ``` ## empty @@ -161,3 +161,9 @@ opt.warn_on_int_to_str_conversion = False show(dict_of_test_dfs["big_integers"]) ``` + +## maths + +```{code-cell} +show(dict_of_test_dfs["maths"]) +``` diff --git a/itables/__init__.py b/itables/__init__.py index 7ecb5f1d..4ae174fb 100644 --- a/itables/__init__.py +++ b/itables/__init__.py @@ -1,10 +1,5 @@ -from .javascript import ( - JavascriptCode, - JavascriptFunction, - init_notebook_mode, - show, - to_html_datatable, -) +from .datatables_repr import init_notebook_mode, show, to_html_datatable +from .javascript import JavascriptCode, JavascriptFunction from .version import __version__ __all__ = [ diff --git a/itables/datatables_repr.py b/itables/datatables_repr.py new file mode 100644 index 00000000..520e6aa8 --- /dev/null +++ b/itables/datatables_repr.py @@ -0,0 +1,405 @@ +"""HTML/js representation of Pandas dataframes""" + +import logging +import re +import sys +import uuid +import warnings +from base64 import b64encode + +import numpy as np +import pandas as pd + +try: + import polars as pl +except ImportError: + # Define pl.Series as pd.Series + import pandas as pl + +from IPython.display import HTML, Javascript, display + +import itables.options as opt + +from .datatables_format import datatables_rows +from .downsample import downsample +from .javascript import JavascriptFunction, json_dumps +from .utils import read_package_file + +logging.basicConfig() +logger = logging.getLogger(__name__) + +_ORIGINAL_DATAFRAME_REPR_HTML = pd.DataFrame._repr_html_ +_ORIGINAL_POLARS_DATAFRAME_REPR_HTML = pl.DataFrame._repr_html_ +_CONNECTED = True + +try: + import google.colab + + # I can't find out how to suppress the LGTM alert about unused-import + # (Tried with # lgtm[py/unused-import] # noqa: F401) + # So we use the import: + assert google.colab.output + + GOOGLE_COLAB = True +except ImportError: + GOOGLE_COLAB = False + + +def init_notebook_mode( + all_interactive=False, connected=GOOGLE_COLAB, warn_if_call_is_superfluous=True +): + """Load the datatables.net library and the corresponding css (if connected=False), + and (if all_interactive=True), activate the datatables representation for all the Pandas DataFrames and Series. + + Warning: make sure you keep the output of this cell when 'connected=False', + otherwise the interactive tables will stop working. + """ + global _CONNECTED + if GOOGLE_COLAB and not connected: + warnings.warn( + "The offline mode for itables is not supposed to work in Google Colab. " + "This is because HTML outputs in Google Colab are encapsulated in iframes." + ) + + if ( + all_interactive is False + and pd.DataFrame._repr_html_ == _ORIGINAL_DATAFRAME_REPR_HTML + and connected is True + and _CONNECTED == connected + ): + if warn_if_call_is_superfluous: + warnings.warn( + "Did you know? " + "init_notebook_mode(all_interactive=False, connected=True) does nothing. " + "Feel free to remove this line, or pass warn_if_call_is_superfluous=False." + ) + return + + _CONNECTED = connected + + if all_interactive: + pd.DataFrame._repr_html_ = _datatables_repr_ + pd.Series._repr_html_ = _datatables_repr_ + pl.DataFrame._repr_html_ = _datatables_repr_ + pl.Series._repr_html_ = _datatables_repr_ + else: + pd.DataFrame._repr_html_ = _ORIGINAL_DATAFRAME_REPR_HTML + pl.DataFrame._repr_html_ = _ORIGINAL_POLARS_DATAFRAME_REPR_HTML + if hasattr(pd.Series, "_repr_html_"): + del pd.Series._repr_html_ + if hasattr(pl.Series, "_repr_html_"): + del pl.Series._repr_html_ + + if not connected: + display(Javascript(read_package_file("external/jquery.min.js"))) + # We use datatables' ES module version because the non module version + # fails to load as a simple script in the presence of require.js + dt64 = b64encode( + read_package_file("external/jquery.dataTables.mjs").encode("utf-8") + ).decode("ascii") + display( + HTML( + replace_value( + read_package_file("html/initialize_offline_datatable.html"), + "dt_src", + "data:text/javascript;base64,{}".format(dt64), + ) + ) + ) + display( + HTML( + "" + ) + ) + + +def _table_header( + df, table_id, show_index, classes, style, tags, footer, column_filters +): + """This function returns the HTML table header. Rows are not included.""" + # Generate table head using pandas.to_html(), see issue 63 + pattern = re.compile(r".*(.*)", flags=re.MULTILINE | re.DOTALL) + try: + html_header = df.head(0).to_html() + except AttributeError: + # Polars DataFrames + html_header = pd.DataFrame(data=[], columns=df.columns).to_html() + # Don't remove the index header for empty dfs + if not len(df.columns): + show_index = True + match = pattern.match(html_header) + thead = match.groups()[0] + if not show_index: + thead = thead.replace("", "", 1) + + if column_filters: + # We use this header in the column filters, so we need to remove any column multiindex first""" + thead_flat = "" + if show_index: + for index in df.index.names: + thead_flat += "{}".format(index) + + for column in df.columns: + thead_flat += "{}".format(column) + + loading = "Loading... (need help?)" + tbody = "{}".format(loading) + + if style: + style = 'style="{}"'.format(style) + else: + style = "" + + if column_filters == "header": + header = "{}".format(thead_flat) + else: + header = "{}".format(thead) + + if column_filters == "footer": + footer = "{}".format(thead_flat) + elif footer: + footer = "{}".format(thead) + else: + footer = "" + + return """{tags}{header}{tbody}{footer}
""".format( + table_id=table_id, + classes=classes, + style=style, + tags=tags, + header=header, + tbody=tbody, + footer=footer, + ) + + +def replace_value(template, pattern, value): + """Set the given pattern to the desired value in the template, + after making sure that the pattern is found exactly once.""" + if sys.version_info >= (3,): + assert isinstance(template, str), template + assert template.count(pattern) == 1, pattern + return template.replace(pattern, value) + + +def _datatables_repr_(df=None, tableId=None, **kwargs): + return to_html_datatable(df, tableId, connected=_CONNECTED, **kwargs) + + +def to_html_datatable(df=None, caption=None, tableId=None, connected=True, **kwargs): + """Return the HTML representation of the given dataframe as an interactive datatable""" + # Default options + for option in dir(opt): + if ( + option not in kwargs + and not option.startswith("__") + and option not in ["read_package_file"] + ): + kwargs[option] = getattr(opt, option) + + for name, value in kwargs.items(): + if value is None: + raise ValueError( + "Please don't pass an option with a value equal to None ('{}=None')".format( + name + ) + ) + + # These options are used here, not in DataTable + classes = kwargs.pop("classes") + style = kwargs.pop("style") + css = kwargs.pop("css") + tags = kwargs.pop("tags") + + if caption is not None: + tags = '{}{}'.format( + tags, caption + ) + + showIndex = kwargs.pop("showIndex") + maxBytes = kwargs.pop("maxBytes", 0) + maxRows = kwargs.pop("maxRows", 0) + maxColumns = kwargs.pop("maxColumns", pd.get_option("display.max_columns") or 0) + eval_functions = kwargs.pop("eval_functions", None) + pre_dt_code = kwargs.pop("pre_dt_code") + warn_on_unexpected_types = kwargs.pop("warn_on_unexpected_types", False) + warn_on_int_to_str_conversion = kwargs.pop("warn_on_int_to_str_conversion", False) + + if isinstance(df, (np.ndarray, np.generic)): + df = pd.DataFrame(df) + + if isinstance(df, (pd.Series, pl.Series)): + df = df.to_frame() + + if showIndex == "auto": + try: + showIndex = df.index.name is not None or not isinstance( + df.index, pd.RangeIndex + ) + except AttributeError: + # Polars DataFrame + showIndex = False + + df, downsampling_warning = downsample( + df, max_rows=maxRows, max_columns=maxColumns, max_bytes=maxBytes + ) + + if downsampling_warning and "fnInfoCallback" not in kwargs: + kwargs["fnInfoCallback"] = JavascriptFunction( + "function (oSettings, iStart, iEnd, iMax, iTotal, sPre) {{ return sPre + ' ({warning})'; }}".format( + warning=downsampling_warning + ) + ) + + if "dom" not in kwargs and _df_fits_in_one_page(df, kwargs): + kwargs["dom"] = "ti" if downsampling_warning else "t" + + footer = kwargs.pop("footer") + column_filters = kwargs.pop("column_filters") + if column_filters == "header": + pass + elif column_filters == "footer": + footer = True + elif column_filters is not False: + raise ValueError( + "column_filters should be either " + "'header', 'footer' or False, not {}".format(column_filters) + ) + + # Load the HTML template + if connected: + output = read_package_file("html/datatables_template_connected.html") + else: + output = read_package_file("html/datatables_template.html") + + tableId = tableId or str(uuid.uuid4()) + if isinstance(classes, list): + classes = " ".join(classes) + + if not showIndex: + try: + df = df.set_index(pd.RangeIndex(len(df.index))) + except AttributeError: + # Polars DataFrames + pass + + table_header = _table_header( + df, tableId, showIndex, classes, style, tags, footer, column_filters + ) + output = replace_value( + output, + '
$\\pi+\\epsilon$
', + table_header, + ) + output = replace_value(output, "#table_id", "#{}".format(tableId)) + output = replace_value( + output, + "", + "".format(css), + ) + + if column_filters: + # If the below was false, we would need to concatenate the JS code + # which might not be trivial... + assert pre_dt_code == "" + assert "initComplete" not in kwargs + + pre_dt_code = replace_value( + read_package_file("html/column_filters/pre_dt_code.js"), + "thead_or_tfoot", + "thead" if column_filters == "header" else "tfoot", + ) + kwargs["initComplete"] = JavascriptFunction( + replace_value( + replace_value( + read_package_file("html/column_filters/initComplete.js"), + "const initComplete = ", + "", + ), + "header", + column_filters, + ) + ) + + # Export the DT args to JSON + dt_args = json_dumps(kwargs, eval_functions) + + output = replace_value( + output, "let dt_args = {};", "let dt_args = {};".format(dt_args) + ) + output = replace_value( + output, + "// [pre-dt-code]", + pre_dt_code.replace("#table_id", "#{}".format(tableId)), + ) + + # Export the table data to JSON and include this in the HTML + if showIndex: + df = safe_reset_index(df) + + # When the header has an extra column, we add + # an extra empty column in the table data #141 + column_count = _column_count_in_header(table_header) + dt_data = datatables_rows( + df, + column_count, + warn_on_unexpected_types=warn_on_unexpected_types, + warn_on_int_to_str_conversion=warn_on_int_to_str_conversion, + ) + + output = replace_value( + output, "const data = [];", "const data = {};".format(dt_data) + ) + + return output + + +def _column_count_in_header(table_header): + return max(line.count("") for line in table_header.split("")) + + +def _min_rows(kwargs): + if "lengthMenu" not in kwargs: + return 10 + + lengthMenu = kwargs["lengthMenu"] + min_rows = lengthMenu[0] + + if isinstance(min_rows, (int, float)): + return min_rows + + return min_rows[0] + + +def _df_fits_in_one_page(df, kwargs): + """Display just the table (not the search box, etc...) if the rows fit on one 'page'""" + return len(df) <= _min_rows(kwargs) + + +def safe_reset_index(df): + try: + return df.reset_index() + except ValueError: + # Issue #134: the above might fail if the index has duplicated names or if one of the + # index names is already a column, with e.g "ValueError: cannot insert A, already exists" + index_levels = [ + pd.Series( + df.index.get_level_values(i), + name=name + or ( + "index{}".format(i) + if isinstance(df.index, pd.MultiIndex) + else "index" + ), + ) + for i, name in enumerate(df.index.names) + ] + return pd.concat(index_levels + [df.reset_index(drop=True)], axis=1) + + +def show(df=None, caption=None, **kwargs): + """Show a dataframe""" + html = to_html_datatable(df, caption=caption, connected=_CONNECTED, **kwargs) + display(HTML(html)) diff --git a/itables/html/datatables_template.html b/itables/html/datatables_template.html index 056aa477..285ed5a6 100644 --- a/itables/html/datatables_template.html +++ b/itables/html/datatables_template.html @@ -1,6 +1,6 @@
-
A
+
$\pi+\epsilon$
+ +