# coding: utf8 """ cssselect2.parser ----------------- A parser for CSS selectors, based on the tinycss tokenizer. :copyright: (c) 2012 by Simon Sapin, 2017 by Guillaume Ayoub. :license: BSD, see LICENSE for more details. """ from __future__ import unicode_literals from tinycss2 import parse_component_value_list from ._compat import basestring __all__ = ['parse'] def parse(input, namespaces=None): """ :param input: A :term:`string`, or an iterable of :term:`component values`. """ if isinstance(input, basestring): input = parse_component_value_list(input) tokens = TokenStream(input) namespaces = namespaces or {} yield parse_selector(tokens, namespaces) tokens.skip_whitespace_and_comment() while 1: next = tokens.next() if next is None: return elif next == ',': yield parse_selector(tokens, namespaces) else: raise SelectorError(next, 'unpexpected %s token.' % next.type) def parse_selector(tokens, namespaces): result, pseudo_element = parse_compound_selector(tokens, namespaces) while 1: has_whitespace = tokens.skip_whitespace() while tokens.skip_comment(): has_whitespace = tokens.skip_whitespace() or has_whitespace if pseudo_element is not None: return Selector(result, pseudo_element) peek = tokens.peek() if peek is None or peek == ',': return Selector(result, pseudo_element) elif peek in ('>', '+', '~'): combinator = peek.value tokens.next() elif has_whitespace: combinator = ' ' else: return Selector(result, pseudo_element) compound, pseudo_element = parse_compound_selector(tokens, namespaces) result = CombinedSelector(result, combinator, compound) def parse_compound_selector(tokens, namespaces): type_selectors = parse_type_selector(tokens, namespaces) simple_selectors = type_selectors if type_selectors is not None else [] while 1: simple_selector, pseudo_element = parse_simple_selector( tokens, namespaces) if pseudo_element is not None or simple_selector is None: break simple_selectors.append(simple_selector) if (simple_selectors or type_selectors is not None or pseudo_element is not None): return CompoundSelector(simple_selectors), pseudo_element else: peek = tokens.peek() raise SelectorError(peek, 'expected a compound selector, got %s' % (peek.type if peek else 'EOF')) def parse_type_selector(tokens, namespaces): tokens.skip_whitespace() qualified_name = parse_qualified_name(tokens, namespaces) if qualified_name is None: return None simple_selectors = [] namespace, local_name = qualified_name if local_name is not None: simple_selectors.append(LocalNameSelector(local_name)) if namespace is not None: simple_selectors.append(NamespaceSelector(namespace)) return simple_selectors def parse_simple_selector(tokens, namespaces, in_negation=False): peek = tokens.peek() if peek is None: return None, None if peek.type == 'hash' and peek.is_identifier: tokens.next() return IDSelector(peek.value), None elif peek == '.': tokens.next() next = tokens.next() if next is None or next.type != 'ident': raise SelectorError( next, 'Expected a class name, got %s' % next) return ClassSelector(next.value), None elif peek.type == '[] block': tokens.next() attr = parse_attribute_selector(TokenStream(peek.content), namespaces) return attr, None elif peek == ':': tokens.next() next = tokens.next() if next == ':': next = tokens.next() if next is None or next.type != 'ident': raise SelectorError( next, 'Expected a pseudo-element name, got %s' % next) return None, next.lower_value elif next is not None and next.type == 'ident': name = next.lower_value if name in ('before', 'after', 'first-line', 'first-letter'): return None, name else: return PseudoClassSelector(name), None elif next is not None and next.type == 'function': name = next.lower_name if name == 'not': if in_negation: raise SelectorError(next, 'nested :not()') return parse_negation(next, namespaces), None else: return ( FunctionalPseudoClassSelector(name, next.arguments), None) else: raise SelectorError(next, 'unexpected %s token.' % next) else: return None, None def parse_negation(negation_token, namespaces): tokens = TokenStream(negation_token.arguments) type_selectors = parse_type_selector(tokens, namespaces) if type_selectors is not None: return NegationSelector(type_selectors) simple_selector, pseudo_element = parse_simple_selector( tokens, namespaces, in_negation=True) tokens.skip_whitespace() if pseudo_element is None and tokens.next() is None: return NegationSelector([simple_selector]) else: raise SelectorError( negation_token, ':not() only accepts a simple selector') def parse_attribute_selector(tokens, namespaces): tokens.skip_whitespace() qualified_name = parse_qualified_name( tokens, namespaces, is_attribute=True) if qualified_name is None: next = tokens.next() raise SelectorError( next, 'expected attribute name, got %s' % next) namespace, local_name = qualified_name tokens.skip_whitespace() peek = tokens.peek() if peek is None: operator = None value = None elif peek in ('=', '~=', '|=', '^=', '$=', '*='): operator = peek.value tokens.next() tokens.skip_whitespace() next = tokens.next() if next is None or next.type not in ('ident', 'string'): next_type = 'None' if next is None else next.type raise SelectorError( next, 'expected attribute value, got %s' % next_type) value = next.value else: raise SelectorError( peek, 'expected attribute selector operator, got %s' % peek) tokens.skip_whitespace() next = tokens.next() if next is not None: raise SelectorError(next, 'expected ], got %s' % next.type) return AttributeSelector(namespace, local_name, operator, value) def parse_qualified_name(tokens, namespaces, is_attribute=False): """Returns None (not a qualified name) or (ns, local), in which None is a wildcard. The empty string for ns is "no namespace". """ peek = tokens.peek() if peek is None: return None if peek.type == 'ident': first_ident = tokens.next() peek = tokens.peek() if peek != '|': namespace = '' if is_attribute else namespaces.get(None, None) return namespace, (first_ident.value, first_ident.lower_value) tokens.next() namespace = namespaces.get(first_ident.value) if namespace is None: raise SelectorError( first_ident, 'undefined namespace prefix: ' + first_ident.value) elif peek == '*': next = tokens.next() peek = tokens.peek() if peek != '|': if is_attribute: raise SelectorError( next, 'Expected local name, got %s' % next.type) return namespaces.get(None, None), None tokens.next() namespace = None elif peek == '|': tokens.next() namespace = '' else: return None # If we get here, we just consumed '|' and set ``namespace`` next = tokens.next() if next.type == 'ident': return namespace, (next.value, next.lower_value) elif next == '*' and not is_attribute: return namespace, None else: raise SelectorError(next, 'Expected local name, got %s' % next.type) class SelectorError(ValueError): """A specialized ``ValueError`` for invalid selectors.""" class TokenStream(object): def __init__(self, tokens): self.tokens = iter(tokens) self.peeked = [] # In reversed order def next(self): if self.peeked: return self.peeked.pop() else: return next(self.tokens, None) def peek(self): if not self.peeked: self.peeked.append(next(self.tokens, None)) return self.peeked[-1] def skip(self, skip_types): found = False while 1: peek = self.peek() if peek is None or peek.type not in skip_types: break self.next() found = True return found def skip_whitespace(self): return self.skip(['whitespace']) def skip_comment(self): return self.skip(['comment']) def skip_whitespace_and_comment(self): return self.skip(['comment', 'whitespace']) class Selector(object): def __init__(self, tree, pseudo_element=None): self.parsed_tree = tree if pseudo_element is None: self.pseudo_element = pseudo_element #: Tuple of 3 integers: http://www.w3.org/TR/selectors/#specificity self.specificity = tree.specificity else: self.pseudo_element = pseudo_element a, b, c = tree.specificity self.specificity = a, b, c + 1 def __repr__(self): if self.pseudo_element is None: return repr(self.parsed_tree) else: return '%r::%s' % (self.parsed_tree, self.pseudo_element) class CombinedSelector(object): def __init__(self, left, combinator, right): #: Combined or compound selector self.left = left # One of `` `` (a single space), ``>``, ``+`` or ``~``. self.combinator = combinator #: compound selector self.right = right @property def specificity(self): a1, b1, c1 = self.left.specificity a2, b2, c2 = self.right.specificity return a1 + a2, b1 + b2, c1 + c2 def __repr__(self): return '%r%s%r' % (self.left, self.combinator, self.right) class CompoundSelector(object): """Aka. sequence of simple selectors, in Level 3.""" def __init__(self, simple_selectors): self.simple_selectors = simple_selectors @property def specificity(self): if self.simple_selectors: # zip(*foo) turns [(a1, b1, c1), (a2, b2, c2), ...] # into [(a1, a2, ...), (b1, b2, ...), (c1, c2, ...)] return tuple(map(sum, zip( *(sel.specificity for sel in self.simple_selectors)))) else: return 0, 0, 0 def __repr__(self): return ''.join(map(repr, self.simple_selectors)) class LocalNameSelector(object): specificity = 0, 0, 1 def __init__(self, local_name): self.local_name, self.lower_local_name = local_name def __repr__(self): return self.local_name class NamespaceSelector(object): specificity = 0, 0, 0 def __init__(self, namespace): #: The namespace URL as a string, #: or the empty string for elements not in any namespace. self.namespace = namespace def __repr__(self): if self.namespace == '': return '|' else: return '{%s}|' % self.namespace class IDSelector(object): specificity = 1, 0, 0 def __init__(self, ident): self.ident = ident def __repr__(self): return '#' + self.ident class ClassSelector(object): specificity = 0, 1, 0 def __init__(self, class_name): self.class_name = class_name def __repr__(self): return '.' + self.class_name class AttributeSelector(object): specificity = 0, 1, 0 def __init__(self, namespace, name, operator, value): self.namespace = namespace self.name, self.lower_name = name #: A string like ``=`` or ``~=``, or None for ``[attr]`` selectors self.operator = operator #: A string, or None for ``[attr]`` selectors self.value = value def __repr__(self): namespace = ('*|' if self.namespace is None else '{%s}' % self.namespace) return '[%s%s%s%r]' % (namespace, self.name, self.operator, self.value) class PseudoClassSelector(object): specificity = 0, 1, 0 def __init__(self, name): self.name = name def __repr__(self): return ':' + self.name class FunctionalPseudoClassSelector(object): specificity = 0, 1, 0 def __init__(self, name, arguments): self.name = name self.arguments = arguments def __repr__(self): return ':%s%r' % (self.name, tuple(self.arguments)) class NegationSelector(CompoundSelector): def __repr__(self): return ':not(%r)' % CompoundSelector.__repr__(self)