Source code for EduNLP.SIF.tokenization.formula.formula

# coding: utf-8
# 2021/5/18 @ tongshiwei

import warnings

from .linear_token import linear_tokenize
from .ast_token import ast_tokenize


[docs]def tokenize(formula, method="linear", errors="raise", **kwargs):
    """

    Parameters
    ----------
    formula
    method
    errors: how to handle the exception occurs in ast tokenize
        "coerce": use linear_tokenize
        "raise": raise exception
    kwargs

    Returns
    -------

    Examples
    --------
    >>> tokenize(r"\\frac{\\pi}{x + y} + 1 = x")
    ['\\\\frac', '{', '\\\\pi', '}', '{', 'x', '+', 'y', '}', '+', '1', '=', 'x']
    >>> tokenize(r"\\frac{\\pi}{x + y} + 1 = x", method="ast", ord2token=True)
    <Formula: \\frac{\\pi}{x + y} + 1 = x>
    >>> tokenize(r"\\frac{\\pi}{x + y} + 1 = x", method="ast", ord2token=True, return_type="list")
    ['mathord', '{ }', 'mathord', '+', 'mathord', '{ }', '\\\\frac', '+', 'textord', '=', 'mathord']
    """
    if method == "linear":
        return linear_tokenize(formula, **kwargs)
    elif method == "ast":
        try:
            return ast_tokenize(formula, **kwargs)
        except TypeError as e:  # pragma: no cover
            if errors == "coerce":
                warnings.warn("A type error is detected, linear tokenize is applied")
                return linear_tokenize(formula)
            else:
                raise e
    else:
        raise TypeError("Unknown method type: %s" % method)