# This file is part of CairoSVG # Copyright © 2010-2018 Kozea # # This library is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) any # later version. # # This library is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more # details. # # You should have received a copy of the GNU Lesser General Public License # along with CairoSVG. If not, see . """ SVG Parser. """ import gzip import re from urllib.parse import urlunparse from xml.etree.ElementTree import Element import cssselect2 from defusedxml import ElementTree from . import css from .features import match_features from .helpers import flatten, pop_rotation, rotations from .url import fetch, parse_url, read_url # 'display' is actually inherited but handled differently because some markers # are part of a none-displaying group (see test painting-marker-07-f.svg) NOT_INHERITED_ATTRIBUTES = frozenset(( 'clip', 'clip-path', 'display', 'filter', 'height', 'id', 'mask', 'opacity', 'overflow', 'rotate', 'stop-color', 'stop-opacity', 'style', 'transform', 'viewBox', 'width', 'x', 'y', 'dx', 'dy', '{http://www.w3.org/1999/xlink}href', 'href', )) COLOR_ATTRIBUTES = frozenset(( 'fill', 'flood-color', 'lighting-color', 'stop-color', 'stroke', )) def handle_white_spaces(string, preserve): """Handle white spaces in text nodes. See http://www.w3.org/TR/SVG/text.html#WhiteSpace """ if not string: return '' if preserve: return re.sub('[\n\r\t]', ' ', string) else: string = re.sub('[\n\r]', '', string) string = re.sub('\t', ' ', string) return re.sub(' +', ' ', string) def normalize_style_declaration(name, value): """Normalize style declaration consisting of name/value pair. Names are always case insensitive, make all lowercase. Values are case insensitive in most cases. Adapt for 'specials': id - case sensitive identifier class - case sensitive identifier(s) font-family - case sensitive name(s) font - shorthand in which font-family is case sensitive any declaration with url in value - url is case sensitive """ name = name.strip().lower() value = value.strip() if name in CASE_SENSITIVE_STYLE_METHODS: value = CASE_SENSITIVE_STYLE_METHODS[name](value) else: value = value.lower() return name, value def normalize_noop_style_declaration(value): """No-operation for normalization where value is case sensitive. This is actually the exception to the rule. Normally value will be made lowercase (see normalize_style_declaration above). """ return value def normalize_url_style_declaration(value): """Normalize style declaration, but keep URL's as-is. Lowercase everything except for the URL. """ regex_style = re.compile(r""" (.*?) # non-URL part (will be normalized) (?: url\(\s* # url( (?: "(?:\\.|[^"])*" # "" | \'(?:\\.|[^\'])*\' # '' | (?:\\.|[^\)])* # ) \s*\) # ) |$ ) """, re.IGNORECASE | re.VERBOSE) for match in regex_style.finditer(value): value_start = value[:match.start()] if match.start() > 0 else '' normalized_value = match.group(1).lower() value_end = value[match.start() + len(normalized_value):] value = value_start + normalized_value + value_end return value def normalize_font_style_declaration(value): """Make first part of font style declaration lowercase (case insensitive). Lowercase first part of declaration. Only the font name is case sensitive. The font name is at the end of the declaration and can be 'recognized' by being preceded by a size or line height. There can actually be multiple names. So the first part is 'calculated' by selecting everything up to and including the last valid token followed by a size or line height (both starting with a number). A valid token is either a size/length or an identifier. See http://www.w3.org/TR/css-fonts-3/#font-prop """ return re.sub(r""" ^( (\d[^\s,]*|\w[^\s,]*) # , or (\s+|\s*,\s*) # and/or comma )* # Repeat until last \d[^\s,]* # or """, lambda match: match.group().lower(), value, 0, re.VERBOSE) class Node(dict): """SVG node with dict-like properties and children.""" def __init__(self, element, style, url_fetcher, parent=None, parent_children=False, url=None, unsafe=False): """Create the Node from ElementTree ``node``, with ``parent`` Node.""" super().__init__() self.children = () self.root = False node = element.etree_element self.element = element self.style = style self.tag = ( element.local_name if element.namespace_url in ('', 'http://www.w3.org/2000/svg') else '{%s}%s' % (element.namespace_url, element.local_name)) self.text = node.text self.url_fetcher = url_fetcher self.unsafe = unsafe # Only set xml_tree if it's not been set before (ie. if node is a tree) self.xml_tree = getattr(self, 'xml_tree', node) # Inherits from parent properties if parent is not None: self.update([ (attribute, parent[attribute]) for attribute in parent if attribute not in NOT_INHERITED_ATTRIBUTES]) self.url = url or parent.url self.parent = parent else: self.url = getattr(self, 'url', None) self.parent = getattr(self, 'parent', None) self.update(self.xml_tree.attrib) # Apply CSS rules style_attr = node.get('style') if style_attr: normal_attr, important_attr = css.parse_declarations(style_attr) else: normal_attr = [] important_attr = [] normal_matcher, important_matcher = style normal = [rule[-1] for rule in normal_matcher.match(element)] important = [rule[-1] for rule in important_matcher.match(element)] for declaration_lists in ( normal, [normal_attr], important, [important_attr]): for declarations in declaration_lists: for name, value in declarations: self[name] = value.strip() # Replace currentColor by a real color value for attribute in COLOR_ATTRIBUTES: if self.get(attribute) == 'currentColor': self[attribute] = self.get('color', 'black') # Replace inherit by the parent value for attribute in [ attribute for attribute in self if self[attribute] == 'inherit']: if parent is not None and attribute in parent: self[attribute] = parent.get(attribute) else: del self[attribute] # Manage text by creating children if self.tag in ('text', 'textPath', 'a'): self.children, _ = self.text_children( element, trailing_space=True, text_root=True) if parent_children: self.children = [ Node(child.element, style, self.url_fetcher, parent=self, unsafe=self.unsafe) for child in parent.children] elif not self.children: self.children = [] for child in element.iter_children(): if match_features(child.etree_element): self.children.append( Node(child, style, self.url_fetcher, parent=self, unsafe=self.unsafe)) if self.tag == 'switch': break def fetch_url(self, url, resource_type): return read_url(url, self.url_fetcher, resource_type) def text_children(self, element, trailing_space, text_root=False): """Create children and return them.""" children = [] space = '{http://www.w3.org/XML/1998/namespace}space' preserve = self.get(space) == 'preserve' self.text = handle_white_spaces(element.etree_element.text, preserve) if trailing_space and not preserve: self.text = self.text.lstrip(' ') original_rotate = rotations(self) rotate = list(original_rotate) if original_rotate: pop_rotation(self, original_rotate, rotate) if self.text: trailing_space = self.text.endswith(' ') for child_element in element.iter_children(): child = child_element.etree_element if child.tag in ('{http://www.w3.org/2000/svg}tref', 'tref'): href = child.get( '{http://www.w3.org/1999/xlink}href', child.get('href')) url = parse_url(href).geturl() child_tree = Tree( url=url, url_fetcher=self.url_fetcher, parent=self, unsafe=self.unsafe) child_tree.clear() child_tree.update(self) child_node = Node( child_element, self.style, self.url_fetcher, parent=child_tree, parent_children=True, unsafe=self.unsafe) child_node.tag = 'tspan' # Retrieve the referenced node and get its flattened text # and remove the node children. child = child_tree.xml_tree child.text = flatten(child) child_element = cssselect2.ElementWrapper.from_xml_root(child) else: child_node = Node( child_element, self.style, self.url_fetcher, parent=self, unsafe=self.unsafe) child_preserve = child_node.get(space) == 'preserve' child_node.text = handle_white_spaces(child.text, child_preserve) child_node.children, trailing_space = child_node.text_children( child_element, trailing_space) trailing_space = child_node.text.endswith(' ') if original_rotate and 'rotate' not in child_node: pop_rotation(child_node, original_rotate, rotate) children.append(child_node) if child.tail: anonymous_etree = Element('{http://www.w3.org/2000/svg}tspan') anonymous = Node( cssselect2.ElementWrapper.from_xml_root(anonymous_etree), self.style, self.url_fetcher, parent=self, unsafe=self.unsafe) anonymous.text = handle_white_spaces(child.tail, preserve) if original_rotate: pop_rotation(anonymous, original_rotate, rotate) if trailing_space and not preserve: anonymous.text = anonymous.text.lstrip(' ') if anonymous.text: trailing_space = anonymous.text.endswith(' ') children.append(anonymous) if text_root and not children and not preserve: self.text = self.text.rstrip(' ') return children, trailing_space def get_href(self): return self.get('{http://www.w3.org/1999/xlink}href', self.get('href')) class Tree(Node): """SVG tree.""" def __new__(cls, **kwargs): tree_cache = kwargs.get('tree_cache') if tree_cache and kwargs.get('url'): parsed_url = parse_url(kwargs['url']) element_id = parsed_url.fragment parent = kwargs.get('parent') unsafe = kwargs.get('unsafe') if any(parsed_url[:-1]): url = urlunparse(parsed_url[:-1] + ('',)) elif parent: url = parent.url else: url = None if url and (url, element_id) in tree_cache: cached_tree = tree_cache[(url, element_id)] new_tree = Node( cached_tree.element, cached_tree.style, cached_tree.url_fetcher, parent, unsafe=unsafe) new_tree.xml_tree = cached_tree.xml_tree new_tree.url = url new_tree.tag = cached_tree.tag new_tree.root = True return new_tree return super().__new__(cls) def __init__(self, **kwargs): """Create the Tree from SVG ``text``.""" bytestring = kwargs.get('bytestring') file_obj = kwargs.get('file_obj') url = kwargs.get('url') unsafe = kwargs.get('unsafe') parent = kwargs.get('parent') parent_children = kwargs.get('parent_children') tree_cache = kwargs.get('tree_cache') element_id = None self.url_fetcher = kwargs.get('url_fetcher', fetch) if bytestring is not None: self.url = url elif file_obj is not None: bytestring = file_obj.read() self.url = getattr(file_obj, 'name', None) if self.url == '': self.url = None elif url is not None: parent_url = parent.url if parent else None parsed_url = parse_url(url, parent_url) if parsed_url.fragment: self.url = urlunparse(parsed_url[:-1] + ('',)) element_id = parsed_url.fragment else: self.url = parsed_url.geturl() element_id = None self.url = self.url or None else: raise TypeError( 'No input. Use one of bytestring, file_obj or url.') self_is_parent = ( (parent and self.url == parent.url) or (url and url.startswith('#') and not self.url)) if self_is_parent: root_parent = parent while root_parent.parent is not None: root_parent = root_parent.parent tree = root_parent.xml_tree else: if not bytestring: bytestring = self.fetch_url( parse_url(self.url), 'image/svg+xml') if len(bytestring) >= 2 and bytestring[:2] == b'\x1f\x8b': bytestring = gzip.decompress(bytestring) tree = ElementTree.fromstring( bytestring, forbid_entities=not unsafe, forbid_external=not unsafe) self.xml_tree = tree root = cssselect2.ElementWrapper.from_xml_root(tree) style = parent.style if parent else css.parse_stylesheets(self, url) if element_id: for element in root.iter_subtree(): if element.id == element_id: root = element self.xml_tree = element.etree_element break else: raise TypeError( 'No tag with id="{}" found.'.format(element_id)) super().__init__( root, style, self.url_fetcher, parent, parent_children, self.url, unsafe) self.root = True if tree_cache is not None and self.url: tree_cache[(self.url, self.get('id'))] = self CASE_SENSITIVE_STYLE_METHODS = { 'id': normalize_noop_style_declaration, 'class': normalize_noop_style_declaration, 'font-family': normalize_noop_style_declaration, 'font': normalize_font_style_declaration, 'clip-path': normalize_url_style_declaration, 'color-profile': normalize_url_style_declaration, 'cursor': normalize_url_style_declaration, 'fill': normalize_url_style_declaration, 'filter': normalize_url_style_declaration, 'marker-start': normalize_url_style_declaration, 'marker-mid': normalize_url_style_declaration, 'marker-end': normalize_url_style_declaration, 'mask': normalize_url_style_declaration, 'stroke': normalize_url_style_declaration, }