# :Id: $Id: tex2mathml_extern.py 9536 2024-02-01 13:04:22Z milde $
# :Copyright: © 2015 Günter Milde.
# :License: Released under the terms of the `2-Clause BSD license`__, in short:
#
#    Copying and distribution of this file, with or without modification,
#    are permitted in any medium without royalty provided the copyright
#    notice and this notice are preserved.
#    This file is offered as-is, without any warranty.
#
# __ https://opensource.org/licenses/BSD-2-Clause

"""Wrappers for TeX->MathML conversion by external tools

This module is provisional:
the API is not settled and may change with any minor Docutils version.
"""

import subprocess

from docutils import nodes
from docutils.utils.math import MathError, wrap_math_code

# `latexml` expects a complete document:
document_template = r"""\documentclass{article}
\begin{document}
%s
\end{document}
"""


def _check_result(result, details=[]):
    # raise MathError if the conversion went wrong
    # :details: list of doctree nodes with additional info
    msg = ''
    if not details and result.stderr:
        details = [nodes.paragraph('', result.stderr, classes=['pre-wrap'])]
    if details:
        msg = f'TeX to MathML converter `{result.args[0]}` failed:'
    elif result.returncode:
        msg = (f'TeX to MathMl converter `{result.args[0]}` '
               f'exited with Errno {result.returncode}.')
    elif not result.stdout:
        msg = f'TeX to MathML converter `{result.args[0]}` returned no MathML.'
    if msg:
        raise MathError(msg, details=details)


def blahtexml(math_code, as_block=False):
    """Convert LaTeX math code to MathML with blahtexml__.

    __ http://gva.noekeon.org/blahtexml/
    """
    args = ['blahtexml',
            '--mathml',
            '--indented',
            '--spacing', 'moderate',
            '--mathml-encoding', 'raw',
            '--other-encoding', 'raw',
            '--doctype-xhtml+mathml',
            '--annotate-TeX',
            ]
    # "blahtexml" expects LaTeX code without math-mode-switch.
    # We still need to tell it about displayed equation(s).
    mathml_args = ' display="block"' if as_block else ''
    _wrapped = wrap_math_code(math_code, as_block)
    if '{align*}' in _wrapped:
        math_code = _wrapped.replace('{align*}', '{aligned}')

    result = subprocess.run(args, input=math_code,
                            capture_output=True, text=True)

    # blahtexml writes <error> messages to stdout
    if '<error>' in result.stdout:
        result.stderr = result.stdout[result.stdout.find('<message>')+9:
                                      result.stdout.find('</message>')]
    else:
        result.stdout = result.stdout[result.stdout.find('<markup>')+9:
                                      result.stdout.find('</markup>')]
    _check_result(result)
    return (f'<math xmlns="http://www.w3.org/1998/Math/MathML"{mathml_args}>'
            f'\n{result.stdout}</math>')


def latexml(math_code, as_block=False):
    """Convert LaTeX math code to MathML with LaTeXML__.

    Comprehensive macro support but **very** slow.

    __ http://dlmf.nist.gov/LaTeXML/
    """

    # LaTeXML works in 2 stages, expects complete documents.
    #
    # The `latexmlmath`__ convenience wrapper does not support block-level
    # (displayed) equations.
    #
    # __ https://metacpan.org/dist/LaTeXML/view/bin/latexmlmath
    args1 = ['latexml',
             '-',  # read from stdin
             '--preload=amsmath',
             '--preload=amssymb',  # also loads amsfonts
             '--inputencoding=utf8',
             '--',
             ]
    math_code = document_template % wrap_math_code(math_code, as_block)

    result1 = subprocess.run(args1, input=math_code,
                             capture_output=True, text=True)
    if result1.stderr:
        result1.stderr = '\n'.join(line for line in result1.stderr.splitlines()
                                   if line.startswith('Error:')
                                   or line.startswith('Warning:')
                                   or line.startswith('Fatal:'))
    _check_result(result1)

    args2 = ['latexmlpost',
             '-',
             '--nonumbersections',
             '--format=html5',  # maths included as MathML
             '--omitdoctype',   # Make it simple, we only need the maths.
             '--noscan',        # ...
             '--nocrossref',
             '--nographicimages',
             '--nopictureimages',
             '--nodefaultresources',  # do not copy *.css files to output dir
             '--'
             ]
    result2 = subprocess.run(args2, input=result1.stdout,
                             capture_output=True, text=True)
    # Extract MathML from HTML document:
    # <table> with <math> in cells for "align", <math> element else.
    start = result2.stdout.find('<table class="ltx_equationgroup')
    if start != -1:
        stop = result2.stdout.find('</table>', start)+8
        result2.stdout = result2.stdout[start:stop].replace(
            'ltx_equationgroup', 'borderless align-center')
    else:
        result2.stdout = result2.stdout[result2.stdout.find('<math'):
                                        result2.stdout.find('</math>')+7]
    # Search for error messages
    if result2.stdout:
        _msg_source = result2.stdout  # latexmlpost reports errors in output
    else:
        _msg_source = result2.stderr  # just in case
    result2.stderr = '\n'.join(line for line in _msg_source.splitlines()
                               if line.startswith('Error:')
                               or line.startswith('Warning:')
                               or line.startswith('Fatal:'))
    _check_result(result2)
    return result2.stdout


def pandoc(math_code, as_block=False):
    """Convert LaTeX math code to MathML with pandoc__.

    __ https://pandoc.org/
    """
    args = ['pandoc',
            '--mathml',
            '--from=latex',
            ]
    result = subprocess.run(args, input=wrap_math_code(math_code, as_block),
                            capture_output=True, text=True)

    result.stdout = result.stdout[result.stdout.find('<math'):
                                  result.stdout.find('</math>')+7]
    # Pandoc (2.9.2.1) messages are pre-formatted for the terminal:
    #   1. summary
    #   2. math source (part)
    #   3. error spot indicator '^' (works only in a literal block)
    #   4. assumed problem
    #   5. assumed solution (may be wrong or confusing)
    # Construct a "details" list:
    details = []
    if result.stderr:
        lines = result.stderr.splitlines()
        details.append(nodes.paragraph('', lines[0]))
        details.append(nodes.literal_block('', '\n'.join(lines[1:3])))
        details.append(nodes.paragraph('', '\n'.join(lines[3:]),
                                       classes=['pre-wrap']))
    _check_result(result, details=details)
    return result.stdout


def ttm(math_code, as_block=False):
    """Convert LaTeX math code to MathML with TtM__.

    Aged, limited, but fast.

    __ http://silas.psfc.mit.edu/tth/mml/
    """
    args = ['ttm',
            '-L',  # source is LaTeX snippet
            '-r']  # output MathML snippet
    math_code = wrap_math_code(math_code, as_block)

    # "ttm" does not support UTF-8 input. (Docutils converts most math
    # characters to LaTeX commands before calling this function.)
    try:
        result = subprocess.run(args, input=math_code,
                                capture_output=True, text=True,
                                encoding='ISO-8859-1')
    except UnicodeEncodeError as err:
        raise MathError(err)

    result.stdout = result.stdout[result.stdout.find('<math'):
                                  result.stdout.find('</math>')+7]
    if as_block:
        result.stdout = result.stdout.replace('<math xmlns=',
                                              '<math display="block" xmlns=')
    result.stderr = '\n'.join(line[5:] + '.'
                              for line in result.stderr.splitlines()
                              if line.startswith('**** '))
    _check_result(result)
    return result.stdout


# self-test

if __name__ == "__main__":
    example = (r'\frac{\partial \sin^2(\alpha)}{\partial \vec r}'
               r'\varpi \mathbb{R} \, \text{Grüße}')

    print("""<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>test external mathml converters</title>
</head>
<body>
<p>Test external converters</p>
<p>
""")
    print(f'latexml: {latexml(example)},')
    print(f'ttm: {ttm(example.replace("mathbb", "mathbf"))},')
    print(f'blahtexml: {blahtexml(example)},')
    print(f'pandoc: {pandoc(example)}.')
    print('</p>')

    print('<p>latexml:</p>')
    print(latexml(example, as_block=True))
    print('<p>ttm:</p>')
    print(ttm(example.replace('mathbb', 'mathbf'), as_block=True))
    print('<p>blahtexml:</p>')
    print(blahtexml(example, as_block=True))
    print('<p>pandoc:</p>')
    print(pandoc(example, as_block=True))

    print('</main>\n</body>\n</html>')

    buggy = r'\sinc \phy'
    # buggy = '\sqrt[e]'
    try:
        # print(blahtexml(buggy))
        # print(latexml(f'${buggy}$'))
        print(pandoc(f'${buggy}$'))
        # print(ttm(f'${buggy}$'))
    except MathError as err:
        print(err)
        print(err.details)
        for node in err.details:
            print(node.astext())
