Source code for fastpyxl.xml.functions

# Copyright (c) 2010-2024 fastpyxl

"""
XML compatibility functions
"""

# Python stdlib imports
import re

from fastpyxl import DEFUSEDXML, LXML

if LXML is True:
    from lxml.etree import (  # noqa: F401
    Element,
    SubElement,
    register_namespace,
    QName,
    xmlfile,
    XMLParser,
    )
    from lxml.etree import XMLSyntaxError
    from lxml.etree import fromstring as _lxml_fromstring, tostring
    # Do not load external DTDs/entities; keep parsing failures as ValueError for callers.
    safe_parser = XMLParser(
        resolve_entities=False,
        load_dtd=False,
        no_network=True,
        huge_tree=False,
    )

    def fromstring(source, parser=None):
        from_file = hasattr(source, "read")
        if from_file:
            source = source.read()
        if from_file and isinstance(source, (bytes, bytearray)) and b"<!doctype" in source.lower():
            raise ValueError("DOCTYPE declarations are not supported")
        if from_file and isinstance(source, str) and "<!doctype" in source.lower():
            raise ValueError("DOCTYPE declarations are not supported")
        try:
            return _lxml_fromstring(source, parser=parser or safe_parser)
        except XMLSyntaxError as exc:
            raise ValueError(str(exc)) from exc

else:
    from xml.etree.ElementTree import (  # noqa: F401
    Element,
    SubElement,
    fromstring as _stdlib_fromstring,
    tostring,
    QName,
    register_namespace
    )
    from et_xmlfile import xmlfile  # noqa: F401
    if DEFUSEDXML is True:
        from defusedxml.ElementTree import fromstring as _stdlib_fromstring

[docs] def fromstring(source, parser=None): if hasattr(source, "read"): source = source.read() return _stdlib_fromstring(source)
from xml.etree.ElementTree import iterparse # noqa: F401 from fastpyxl.xml.constants import ( CHART_NS, DRAWING_NS, SHEET_DRAWING_NS, CHART_DRAWING_NS, SHEET_MAIN_NS, REL_NS, VTYPES_NS, COREPROPS_NS, CUSTPROPS_NS, DCTERMS_NS, DCTERMS_PREFIX, XML_NS ) register_namespace(DCTERMS_PREFIX, DCTERMS_NS) register_namespace('dcmitype', 'http://purl.org/dc/dcmitype/') register_namespace('cp', COREPROPS_NS) register_namespace('c', CHART_NS) register_namespace('a', DRAWING_NS) register_namespace('s', SHEET_MAIN_NS) register_namespace('r', REL_NS) register_namespace('vt', VTYPES_NS) register_namespace('xdr', SHEET_DRAWING_NS) register_namespace('cdr', CHART_DRAWING_NS) register_namespace('xml', XML_NS) register_namespace('cust', CUSTPROPS_NS) from functools import partial tostring = partial(tostring, encoding="utf-8") NS_REGEX = re.compile("({(?P<namespace>.*)})?(?P<localname>.*)")
[docs] def localname(node): if callable(node.tag): return "comment" m = NS_REGEX.match(node.tag) if m is None: return node.tag return m.group('localname')
[docs] def whitespace(node): stripped = node.text.strip() if stripped and node.text != stripped: node.set("{%s}space" % XML_NS, "preserve")