guessit
This commit is contained in:
10
lib/rebulk/__init__.py
Normal file
10
lib/rebulk/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Define simple search patterns in bulk to perform advanced matching on any string.
|
||||
"""
|
||||
# pylint:disable=import-self
|
||||
from .rebulk import Rebulk
|
||||
from .rules import Rule, CustomRule, AppendMatch, RemoveMatch, RenameMatch, AppendTags, RemoveTags
|
||||
from .processors import ConflictSolver, PrivateRemover, POST_PROCESS, PRE_PROCESS
|
||||
from .pattern import REGEX_AVAILABLE
|
||||
7
lib/rebulk/__version__.py
Normal file
7
lib/rebulk/__version__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Version module
|
||||
"""
|
||||
# pragma: no cover
|
||||
__version__ = '2.0.1.dev0'
|
||||
217
lib/rebulk/builder.py
Normal file
217
lib/rebulk/builder.py
Normal file
@@ -0,0 +1,217 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Base builder class for Rebulk
|
||||
"""
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from copy import deepcopy
|
||||
from logging import getLogger
|
||||
|
||||
from six import add_metaclass
|
||||
|
||||
from .loose import set_defaults
|
||||
from .pattern import RePattern, StringPattern, FunctionalPattern
|
||||
|
||||
log = getLogger(__name__).log
|
||||
|
||||
|
||||
@add_metaclass(ABCMeta)
|
||||
class Builder(object):
|
||||
"""
|
||||
Base builder class for patterns
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._defaults = {}
|
||||
self._regex_defaults = {}
|
||||
self._string_defaults = {}
|
||||
self._functional_defaults = {}
|
||||
self._chain_defaults = {}
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Reset all defaults.
|
||||
|
||||
:return:
|
||||
"""
|
||||
self.__init__()
|
||||
|
||||
def defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for all patterns
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._defaults, override=True)
|
||||
return self
|
||||
|
||||
def regex_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._regex_defaults, override=True)
|
||||
return self
|
||||
|
||||
def string_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for string patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._string_defaults, override=True)
|
||||
return self
|
||||
|
||||
def functional_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for functional patterns.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._functional_defaults, override=True)
|
||||
return self
|
||||
|
||||
def chain_defaults(self, **kwargs):
|
||||
"""
|
||||
Define default keyword arguments for patterns chain.
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(kwargs, self._chain_defaults, override=True)
|
||||
return self
|
||||
|
||||
def build_re(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new regular expression pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._regex_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return RePattern(*pattern, **kwargs)
|
||||
|
||||
def build_string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._string_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return StringPattern(*pattern, **kwargs)
|
||||
|
||||
def build_functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Builds a new functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
set_defaults(self._functional_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
return FunctionalPattern(*pattern, **kwargs)
|
||||
|
||||
def build_chain(self, **kwargs):
|
||||
"""
|
||||
Builds a new patterns chain
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
from .chain import Chain
|
||||
set_defaults(self._chain_defaults, kwargs)
|
||||
set_defaults(self._defaults, kwargs)
|
||||
chain = Chain(self, **kwargs)
|
||||
chain._defaults = deepcopy(self._defaults) # pylint: disable=protected-access
|
||||
chain._regex_defaults = deepcopy(self._regex_defaults) # pylint: disable=protected-access
|
||||
chain._functional_defaults = deepcopy(self._functional_defaults) # pylint: disable=protected-access
|
||||
chain._string_defaults = deepcopy(self._string_defaults) # pylint: disable=protected-access
|
||||
chain._chain_defaults = deepcopy(self._chain_defaults) # pylint: disable=protected-access
|
||||
return chain
|
||||
|
||||
@abstractmethod
|
||||
def pattern(self, *pattern):
|
||||
"""
|
||||
Register a list of Pattern instance
|
||||
:param pattern:
|
||||
:return:
|
||||
"""
|
||||
pass
|
||||
|
||||
def regex(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add re pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
return self.pattern(self.build_re(*pattern, **kwargs))
|
||||
|
||||
def string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
return self.pattern(self.build_string(*pattern, **kwargs))
|
||||
|
||||
def functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
functional = self.build_functional(*pattern, **kwargs)
|
||||
return self.pattern(functional)
|
||||
|
||||
def chain(self, **kwargs):
|
||||
"""
|
||||
Add patterns chain, using configuration of this rebulk
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
chain = self.build_chain(**kwargs)
|
||||
self.pattern(chain)
|
||||
return chain
|
||||
380
lib/rebulk/chain.py
Normal file
380
lib/rebulk/chain.py
Normal file
@@ -0,0 +1,380 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Chain patterns and handle repetiting capture group
|
||||
"""
|
||||
# pylint: disable=super-init-not-called
|
||||
import itertools
|
||||
|
||||
from .builder import Builder
|
||||
from .loose import call
|
||||
from .match import Match, Matches
|
||||
from .pattern import Pattern, filter_match_kwargs, BasePattern
|
||||
from .remodule import re
|
||||
|
||||
|
||||
class _InvalidChainException(Exception):
|
||||
"""
|
||||
Internal exception raised when a chain is not valid
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class Chain(Pattern, Builder):
|
||||
"""
|
||||
Definition of a pattern chain to search for.
|
||||
"""
|
||||
|
||||
def __init__(self, parent, chain_breaker=None, **kwargs):
|
||||
Builder.__init__(self)
|
||||
call(Pattern.__init__, self, **kwargs)
|
||||
self._kwargs = kwargs
|
||||
self._match_kwargs = filter_match_kwargs(kwargs)
|
||||
if callable(chain_breaker):
|
||||
self.chain_breaker = chain_breaker
|
||||
else:
|
||||
self.chain_breaker = None
|
||||
self.parent = parent
|
||||
self.parts = []
|
||||
|
||||
def pattern(self, *pattern):
|
||||
"""
|
||||
|
||||
:param pattern:
|
||||
:return:
|
||||
"""
|
||||
if not pattern:
|
||||
raise ValueError("One pattern should be given to the chain")
|
||||
if len(pattern) > 1:
|
||||
raise ValueError("Only one pattern can be given to the chain")
|
||||
part = ChainPart(self, pattern[0])
|
||||
self.parts.append(part)
|
||||
return part
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Deeply close the chain
|
||||
:return: Rebulk instance
|
||||
"""
|
||||
parent = self.parent
|
||||
while isinstance(parent, Chain):
|
||||
parent = parent.parent
|
||||
return parent
|
||||
|
||||
def _match(self, pattern, input_string, context=None):
|
||||
# pylint: disable=too-many-locals,too-many-nested-blocks
|
||||
chain_matches = []
|
||||
chain_input_string = input_string
|
||||
offset = 0
|
||||
while offset < len(input_string):
|
||||
chain_found = False
|
||||
current_chain_matches = []
|
||||
valid_chain = True
|
||||
for chain_part in self.parts:
|
||||
try:
|
||||
chain_part_matches, raw_chain_part_matches = chain_part.matches(chain_input_string,
|
||||
context,
|
||||
with_raw_matches=True)
|
||||
|
||||
chain_found, chain_input_string, offset = \
|
||||
self._to_next_chain_part(chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
|
||||
input_string, chain_input_string, offset, current_chain_matches)
|
||||
except _InvalidChainException:
|
||||
valid_chain = False
|
||||
if current_chain_matches:
|
||||
offset = current_chain_matches[0].raw_end
|
||||
break
|
||||
if not chain_found:
|
||||
break
|
||||
if current_chain_matches and valid_chain:
|
||||
match = self._build_chain_match(current_chain_matches, input_string)
|
||||
chain_matches.append(match)
|
||||
|
||||
return chain_matches
|
||||
|
||||
def _to_next_chain_part(self, chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
|
||||
input_string, chain_input_string, offset, current_chain_matches):
|
||||
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
|
||||
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
|
||||
|
||||
if raw_chain_part_matches:
|
||||
grouped_matches_dict = self._group_by_match_index(chain_part_matches)
|
||||
grouped_raw_matches_dict = self._group_by_match_index(raw_chain_part_matches)
|
||||
|
||||
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
|
||||
chain_found = True
|
||||
offset = grouped_raw_matches[-1].raw_end
|
||||
chain_input_string = input_string[offset:]
|
||||
|
||||
if not chain_part.is_hidden:
|
||||
grouped_matches = grouped_matches_dict.get(match_index, [])
|
||||
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
|
||||
current_chain_matches.extend(grouped_matches)
|
||||
return chain_found, chain_input_string, offset
|
||||
|
||||
def _process_match(self, match, match_index, child=False):
|
||||
"""
|
||||
Handle a match
|
||||
:param match:
|
||||
:type match:
|
||||
:param match_index:
|
||||
:type match_index:
|
||||
:param child:
|
||||
:type child:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
# pylint: disable=too-many-locals
|
||||
ret = super(Chain, self)._process_match(match, match_index, child=child)
|
||||
if ret:
|
||||
return True
|
||||
|
||||
if match.children:
|
||||
last_pattern = match.children[-1].pattern
|
||||
last_pattern_groups = self._group_by_match_index(
|
||||
[child_ for child_ in match.children if child_.pattern == last_pattern]
|
||||
)
|
||||
|
||||
if last_pattern_groups:
|
||||
original_children = Matches(match.children)
|
||||
original_end = match.end
|
||||
|
||||
for index in reversed(list(last_pattern_groups)):
|
||||
last_matches = last_pattern_groups[index]
|
||||
for last_match in last_matches:
|
||||
match.children.remove(last_match)
|
||||
match.end = match.children[-1].end if match.children else match.start
|
||||
ret = super(Chain, self)._process_match(match, match_index, child=child)
|
||||
if ret:
|
||||
return True
|
||||
|
||||
match.children = original_children
|
||||
match.end = original_end
|
||||
|
||||
return False
|
||||
|
||||
def _build_chain_match(self, current_chain_matches, input_string):
|
||||
start = None
|
||||
end = None
|
||||
for match in current_chain_matches:
|
||||
if start is None or start > match.start:
|
||||
start = match.start
|
||||
if end is None or end < match.end:
|
||||
end = match.end
|
||||
match = call(Match, start, end, pattern=self, input_string=input_string, **self._match_kwargs)
|
||||
for chain_match in current_chain_matches:
|
||||
if chain_match.children:
|
||||
for child in chain_match.children:
|
||||
match.children.append(child)
|
||||
if chain_match not in match.children:
|
||||
match.children.append(chain_match)
|
||||
chain_match.parent = match
|
||||
return match
|
||||
|
||||
def _chain_breaker_eval(self, matches):
|
||||
return not self.chain_breaker or not self.chain_breaker(Matches(matches))
|
||||
|
||||
@staticmethod
|
||||
def _fix_matches_offset(chain_part_matches, input_string, offset):
|
||||
for chain_part_match in chain_part_matches:
|
||||
if chain_part_match.input_string != input_string:
|
||||
chain_part_match.input_string = input_string
|
||||
chain_part_match.end += offset
|
||||
chain_part_match.start += offset
|
||||
if chain_part_match.children:
|
||||
Chain._fix_matches_offset(chain_part_match.children, input_string, offset)
|
||||
|
||||
@staticmethod
|
||||
def _group_by_match_index(matches):
|
||||
grouped_matches_dict = dict()
|
||||
for match_index, match in itertools.groupby(matches, lambda m: m.match_index):
|
||||
grouped_matches_dict[match_index] = list(match)
|
||||
return grouped_matches_dict
|
||||
|
||||
@property
|
||||
def match_options(self):
|
||||
return {}
|
||||
|
||||
@property
|
||||
def patterns(self):
|
||||
return [self]
|
||||
|
||||
def __repr__(self):
|
||||
defined = ""
|
||||
if self.defined_at:
|
||||
defined = "@%s" % (self.defined_at,)
|
||||
return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts)
|
||||
|
||||
|
||||
class ChainPart(BasePattern):
|
||||
"""
|
||||
Part of a pattern chain.
|
||||
"""
|
||||
|
||||
def __init__(self, chain, pattern):
|
||||
self._chain = chain
|
||||
self.pattern = pattern
|
||||
self.repeater_start = 1
|
||||
self.repeater_end = 1
|
||||
self._hidden = False
|
||||
|
||||
@property
|
||||
def _is_chain_start(self):
|
||||
return self._chain.parts[0] == self
|
||||
|
||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
||||
matches, raw_matches = self.pattern.matches(input_string, context=context, with_raw_matches=True)
|
||||
|
||||
matches = self._truncate_repeater(matches, input_string)
|
||||
raw_matches = self._truncate_repeater(raw_matches, input_string)
|
||||
|
||||
self._validate_repeater(raw_matches)
|
||||
|
||||
if with_raw_matches:
|
||||
return matches, raw_matches
|
||||
|
||||
return matches
|
||||
|
||||
def _truncate_repeater(self, matches, input_string):
|
||||
if not matches:
|
||||
return matches
|
||||
|
||||
if not self._is_chain_start:
|
||||
separator = input_string[0:matches[0].initiator.raw_start]
|
||||
if separator:
|
||||
return []
|
||||
|
||||
j = 1
|
||||
for i in range(0, len(matches) - 1):
|
||||
separator = input_string[matches[i].initiator.raw_end:
|
||||
matches[i + 1].initiator.raw_start]
|
||||
if separator:
|
||||
break
|
||||
j += 1
|
||||
truncated = matches[:j]
|
||||
if self.repeater_end is not None:
|
||||
truncated = [m for m in truncated if m.match_index < self.repeater_end]
|
||||
return truncated
|
||||
|
||||
def _validate_repeater(self, matches):
|
||||
max_match_index = -1
|
||||
if matches:
|
||||
max_match_index = max([m.match_index for m in matches])
|
||||
if max_match_index + 1 < self.repeater_start:
|
||||
raise _InvalidChainException
|
||||
|
||||
def chain(self):
|
||||
"""
|
||||
Add patterns chain, using configuration from this chain
|
||||
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self._chain.chain()
|
||||
|
||||
def hidden(self, hidden=True):
|
||||
"""
|
||||
Hide chain part results from global chain result
|
||||
|
||||
:param hidden:
|
||||
:type hidden:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._hidden = hidden
|
||||
return self
|
||||
|
||||
@property
|
||||
def is_hidden(self):
|
||||
"""
|
||||
Check if the chain part is hidden
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self._hidden
|
||||
|
||||
def regex(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add re pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self._chain.regex(*pattern, **kwargs)
|
||||
|
||||
def functional(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add functional pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self._chain.functional(*pattern, **kwargs)
|
||||
|
||||
def string(self, *pattern, **kwargs):
|
||||
"""
|
||||
Add string pattern
|
||||
|
||||
:param pattern:
|
||||
:type pattern:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self._chain.string(*pattern, **kwargs)
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Close the chain builder to continue registering other patterns
|
||||
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self._chain.close()
|
||||
|
||||
def repeater(self, value):
|
||||
"""
|
||||
Define the repeater of the current chain part.
|
||||
|
||||
:param value:
|
||||
:type value:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
try:
|
||||
value = int(value)
|
||||
self.repeater_start = value
|
||||
self.repeater_end = value
|
||||
return self
|
||||
except ValueError:
|
||||
pass
|
||||
if value == '+':
|
||||
self.repeater_start = 1
|
||||
self.repeater_end = None
|
||||
if value == '*':
|
||||
self.repeater_start = 0
|
||||
self.repeater_end = None
|
||||
elif value == '?':
|
||||
self.repeater_start = 0
|
||||
self.repeater_end = 1
|
||||
else:
|
||||
match = re.match(r'\{\s*(\d*)\s*,?\s*(\d*)\s*\}', value)
|
||||
if match:
|
||||
start = match.group(1)
|
||||
end = match.group(2)
|
||||
if start or end:
|
||||
self.repeater_start = int(start) if start else 0
|
||||
self.repeater_end = int(end) if end else None
|
||||
return self
|
||||
|
||||
def __repr__(self):
|
||||
return "%s({%s,%s})" % (self.pattern, self.repeater_start, self.repeater_end)
|
||||
56
lib/rebulk/debug.py
Normal file
56
lib/rebulk/debug.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Debug tools.
|
||||
|
||||
Can be configured by changing values of those variable.
|
||||
|
||||
DEBUG = False
|
||||
Enable this variable to activate debug features (like defined_at parameters). It can slow down Rebulk
|
||||
|
||||
LOG_LEVEL = 0
|
||||
Default log level of generated rebulk logs.
|
||||
"""
|
||||
|
||||
import inspect
|
||||
import logging
|
||||
import os
|
||||
from collections import namedtuple
|
||||
|
||||
|
||||
DEBUG = False
|
||||
LOG_LEVEL = logging.DEBUG
|
||||
|
||||
|
||||
class Frame(namedtuple('Frame', ['lineno', 'package', 'name', 'filename'])):
|
||||
"""
|
||||
Stack frame representation.
|
||||
"""
|
||||
__slots__ = ()
|
||||
|
||||
def __repr__(self):
|
||||
return "%s#L%s" % (os.path.basename(self.filename), self.lineno)
|
||||
|
||||
|
||||
def defined_at():
|
||||
"""
|
||||
Get definition location of a pattern or a match (outside of rebulk package).
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if DEBUG:
|
||||
frame = inspect.currentframe()
|
||||
while frame:
|
||||
try:
|
||||
if frame.f_globals['__package__'] != __package__:
|
||||
break
|
||||
except KeyError: # pragma:no cover
|
||||
# If package is missing, consider we are in. Workaround for python 3.3.
|
||||
break
|
||||
frame = frame.f_back
|
||||
ret = Frame(frame.f_lineno,
|
||||
frame.f_globals.get('__package__'),
|
||||
frame.f_globals.get('__name__'),
|
||||
frame.f_code.co_filename)
|
||||
del frame
|
||||
return ret
|
||||
33
lib/rebulk/formatters.py
Normal file
33
lib/rebulk/formatters.py
Normal file
@@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Formatter functions to use in patterns.
|
||||
|
||||
All those function have last argument as match.value (str).
|
||||
"""
|
||||
|
||||
|
||||
def formatters(*chained_formatters):
|
||||
"""
|
||||
Chain formatter functions.
|
||||
:param chained_formatters:
|
||||
:type chained_formatters:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
def formatters_chain(input_string): # pylint:disable=missing-docstring
|
||||
for chained_formatter in chained_formatters:
|
||||
input_string = chained_formatter(input_string)
|
||||
return input_string
|
||||
|
||||
return formatters_chain
|
||||
|
||||
|
||||
def default_formatter(input_string):
|
||||
"""
|
||||
Default formatter
|
||||
:param input_string:
|
||||
:return:
|
||||
"""
|
||||
return input_string
|
||||
127
lib/rebulk/introspector.py
Normal file
127
lib/rebulk/introspector.py
Normal file
@@ -0,0 +1,127 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Introspect rebulk object to retrieve capabilities.
|
||||
"""
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from collections import defaultdict
|
||||
|
||||
import six
|
||||
from .pattern import StringPattern, RePattern, FunctionalPattern
|
||||
from .utils import extend_safe
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class Description(object):
|
||||
"""
|
||||
Abstract class for a description.
|
||||
"""
|
||||
@property
|
||||
@abstractmethod
|
||||
def properties(self): # pragma: no cover
|
||||
"""
|
||||
Properties of described object.
|
||||
:return: all properties that described object can generate grouped by name.
|
||||
:rtype: dict
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class PatternDescription(Description):
|
||||
"""
|
||||
Description of a pattern.
|
||||
"""
|
||||
def __init__(self, pattern): # pylint:disable=too-many-branches
|
||||
self.pattern = pattern
|
||||
self._properties = defaultdict(list)
|
||||
|
||||
if pattern.properties:
|
||||
for key, values in pattern.properties.items():
|
||||
extend_safe(self._properties[key], values)
|
||||
elif 'value' in pattern.match_options:
|
||||
self._properties[pattern.name].append(pattern.match_options['value'])
|
||||
elif isinstance(pattern, StringPattern):
|
||||
extend_safe(self._properties[pattern.name], pattern.patterns)
|
||||
elif isinstance(pattern, RePattern):
|
||||
if pattern.name and pattern.name not in pattern.private_names:
|
||||
extend_safe(self._properties[pattern.name], [None])
|
||||
if not pattern.private_children:
|
||||
for regex_pattern in pattern.patterns:
|
||||
for group_name, values in regex_pattern.groupindex.items():
|
||||
if group_name not in pattern.private_names:
|
||||
extend_safe(self._properties[group_name], [None])
|
||||
elif isinstance(pattern, FunctionalPattern):
|
||||
if pattern.name and pattern.name not in pattern.private_names:
|
||||
extend_safe(self._properties[pattern.name], [None])
|
||||
|
||||
|
||||
@property
|
||||
def properties(self):
|
||||
"""
|
||||
Properties for this rule.
|
||||
:return:
|
||||
:rtype: dict
|
||||
"""
|
||||
return self._properties
|
||||
|
||||
|
||||
class RuleDescription(Description):
|
||||
"""
|
||||
Description of a rule.
|
||||
"""
|
||||
def __init__(self, rule):
|
||||
self.rule = rule
|
||||
|
||||
self._properties = defaultdict(list)
|
||||
|
||||
if rule.properties:
|
||||
for key, values in rule.properties.items():
|
||||
extend_safe(self._properties[key], values)
|
||||
|
||||
@property
|
||||
def properties(self):
|
||||
"""
|
||||
Properties for this rule.
|
||||
:return:
|
||||
:rtype: dict
|
||||
"""
|
||||
return self._properties
|
||||
|
||||
|
||||
class Introspection(Description):
|
||||
"""
|
||||
Introspection results.
|
||||
"""
|
||||
def __init__(self, rebulk, context=None):
|
||||
self.patterns = [PatternDescription(pattern) for pattern in rebulk.effective_patterns(context)
|
||||
if not pattern.private and not pattern.marker]
|
||||
self.rules = [RuleDescription(rule) for rule in rebulk.effective_rules(context)]
|
||||
|
||||
@property
|
||||
def properties(self):
|
||||
"""
|
||||
Properties for Introspection results.
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
properties = defaultdict(list)
|
||||
for pattern in self.patterns:
|
||||
for key, values in pattern.properties.items():
|
||||
extend_safe(properties[key], values)
|
||||
for rule in self.rules:
|
||||
for key, values in rule.properties.items():
|
||||
extend_safe(properties[key], values)
|
||||
return properties
|
||||
|
||||
|
||||
def introspect(rebulk, context=None):
|
||||
"""
|
||||
Introspect a Rebulk instance to grab defined objects and properties that can be generated.
|
||||
:param rebulk:
|
||||
:type rebulk: Rebulk
|
||||
:param context:
|
||||
:type context:
|
||||
:return: Introspection instance
|
||||
:rtype: Introspection
|
||||
"""
|
||||
return Introspection(rebulk, context)
|
||||
242
lib/rebulk/loose.py
Normal file
242
lib/rebulk/loose.py
Normal file
@@ -0,0 +1,242 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Various utilities functions
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
from inspect import isclass
|
||||
try:
|
||||
from inspect import getfullargspec as getargspec
|
||||
|
||||
_fullargspec_supported = True
|
||||
except ImportError:
|
||||
_fullargspec_supported = False
|
||||
from inspect import getargspec
|
||||
|
||||
from .utils import is_iterable
|
||||
|
||||
if sys.version_info < (3, 4, 0): # pragma: no cover
|
||||
def _constructor(class_):
|
||||
"""
|
||||
Retrieves constructor from given class
|
||||
|
||||
:param class_:
|
||||
:type class_: class
|
||||
:return: constructor from given class
|
||||
:rtype: callable
|
||||
"""
|
||||
return class_.__init__
|
||||
else: # pragma: no cover
|
||||
def _constructor(class_):
|
||||
"""
|
||||
Retrieves constructor from given class
|
||||
|
||||
:param class_:
|
||||
:type class_: class
|
||||
:return: constructor from given class
|
||||
:rtype: callable
|
||||
"""
|
||||
return class_
|
||||
|
||||
|
||||
def call(function, *args, **kwargs):
|
||||
"""
|
||||
Call a function or constructor with given args and kwargs after removing args and kwargs that doesn't match
|
||||
function or constructor signature
|
||||
|
||||
:param function: Function or constructor to call
|
||||
:type function: callable
|
||||
:param args:
|
||||
:type args:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return: sale vakye as default function call
|
||||
:rtype: object
|
||||
"""
|
||||
func = constructor_args if isclass(function) else function_args
|
||||
call_args, call_kwargs = func(function, *args, **kwargs)
|
||||
return function(*call_args, **call_kwargs)
|
||||
|
||||
|
||||
def function_args(callable_, *args, **kwargs):
|
||||
"""
|
||||
Return (args, kwargs) matching the function signature
|
||||
|
||||
:param callable: callable to inspect
|
||||
:type callable: callable
|
||||
:param args:
|
||||
:type args:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return: (args, kwargs) matching the function signature
|
||||
:rtype: tuple
|
||||
"""
|
||||
argspec = getargspec(callable_) # pylint:disable=deprecated-method
|
||||
return argspec_args(argspec, False, *args, **kwargs)
|
||||
|
||||
|
||||
def constructor_args(class_, *args, **kwargs):
|
||||
"""
|
||||
Return (args, kwargs) matching the function signature
|
||||
|
||||
:param callable: callable to inspect
|
||||
:type callable: Callable
|
||||
:param args:
|
||||
:type args:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return: (args, kwargs) matching the function signature
|
||||
:rtype: tuple
|
||||
"""
|
||||
argspec = getargspec(_constructor(class_)) # pylint:disable=deprecated-method
|
||||
return argspec_args(argspec, True, *args, **kwargs)
|
||||
|
||||
|
||||
def argspec_args(argspec, constructor, *args, **kwargs):
|
||||
"""
|
||||
Return (args, kwargs) matching the argspec object
|
||||
|
||||
:param argspec: argspec to use
|
||||
:type argspec: argspec
|
||||
:param constructor: is it a constructor ?
|
||||
:type constructor: bool
|
||||
:param args:
|
||||
:type args:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return: (args, kwargs) matching the function signature
|
||||
:rtype: tuple
|
||||
"""
|
||||
if argspec.varkw:
|
||||
call_kwarg = kwargs
|
||||
else:
|
||||
call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension
|
||||
if argspec.varargs:
|
||||
call_args = args
|
||||
else:
|
||||
call_args = args[:len(argspec.args) - (1 if constructor else 0)]
|
||||
return call_args, call_kwarg
|
||||
|
||||
|
||||
if not _fullargspec_supported:
|
||||
def argspec_args_legacy(argspec, constructor, *args, **kwargs):
|
||||
"""
|
||||
Return (args, kwargs) matching the argspec object
|
||||
|
||||
:param argspec: argspec to use
|
||||
:type argspec: argspec
|
||||
:param constructor: is it a constructor ?
|
||||
:type constructor: bool
|
||||
:param args:
|
||||
:type args:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return: (args, kwargs) matching the function signature
|
||||
:rtype: tuple
|
||||
"""
|
||||
if argspec.keywords:
|
||||
call_kwarg = kwargs
|
||||
else:
|
||||
call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension
|
||||
if argspec.varargs:
|
||||
call_args = args
|
||||
else:
|
||||
call_args = args[:len(argspec.args) - (1 if constructor else 0)]
|
||||
return call_args, call_kwarg
|
||||
|
||||
|
||||
argspec_args = argspec_args_legacy
|
||||
|
||||
|
||||
def ensure_list(param):
|
||||
"""
|
||||
Retrieves a list from given parameter.
|
||||
|
||||
:param param:
|
||||
:type param:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not param:
|
||||
param = []
|
||||
elif not is_iterable(param):
|
||||
param = [param]
|
||||
return param
|
||||
|
||||
|
||||
def ensure_dict(param, default_value, default_key=None):
|
||||
"""
|
||||
Retrieves a dict and a default value from given parameter.
|
||||
|
||||
if parameter is not a dict, it will be promoted as the default value.
|
||||
|
||||
:param param:
|
||||
:type param:
|
||||
:param default_value:
|
||||
:type default_value:
|
||||
:param default_key:
|
||||
:type default_key:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not param:
|
||||
param = default_value
|
||||
if not isinstance(param, dict):
|
||||
if param:
|
||||
default_value = param
|
||||
return {default_key: param}, default_value
|
||||
return param, default_value
|
||||
|
||||
|
||||
def filter_index(collection, predicate=None, index=None):
|
||||
"""
|
||||
Filter collection with predicate function and index.
|
||||
|
||||
If index is not found, returns None.
|
||||
:param collection:
|
||||
:type collection: collection supporting iteration and slicing
|
||||
:param predicate: function to filter the collection with
|
||||
:type predicate: function
|
||||
:param index: position of a single element to retrieve
|
||||
:type index: int
|
||||
:return: filtered list, or single element of filtered list if index is defined
|
||||
:rtype: list or object
|
||||
"""
|
||||
if index is None and isinstance(predicate, int):
|
||||
index = predicate
|
||||
predicate = None
|
||||
if predicate:
|
||||
collection = collection.__class__(filter(predicate, collection))
|
||||
if index is not None:
|
||||
try:
|
||||
collection = collection[index]
|
||||
except IndexError:
|
||||
collection = None
|
||||
return collection
|
||||
|
||||
|
||||
def set_defaults(defaults, kwargs, override=False):
|
||||
"""
|
||||
Set defaults from defaults dict to kwargs dict
|
||||
|
||||
:param override:
|
||||
:type override:
|
||||
:param defaults:
|
||||
:type defaults:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if 'clear' in defaults.keys() and defaults.pop('clear'):
|
||||
kwargs.clear()
|
||||
for key, value in defaults.items():
|
||||
if key in kwargs:
|
||||
if isinstance(value, list) and isinstance(kwargs[key], list):
|
||||
kwargs[key] = list(value) + kwargs[key]
|
||||
elif isinstance(value, dict) and isinstance(kwargs[key], dict):
|
||||
set_defaults(value, kwargs[key])
|
||||
if key not in kwargs or override:
|
||||
kwargs[key] = value
|
||||
890
lib/rebulk/match.py
Normal file
890
lib/rebulk/match.py
Normal file
@@ -0,0 +1,890 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Classes and functions related to matches
|
||||
"""
|
||||
import copy
|
||||
import itertools
|
||||
from collections import defaultdict
|
||||
try:
|
||||
from collections.abc import MutableSequence
|
||||
except ImportError:
|
||||
from collections import MutableSequence
|
||||
|
||||
try:
|
||||
from collections import OrderedDict # pylint:disable=ungrouped-imports
|
||||
except ImportError: # pragma: no cover
|
||||
from ordereddict import OrderedDict # pylint:disable=import-error
|
||||
import six
|
||||
|
||||
from .loose import ensure_list, filter_index
|
||||
from .utils import is_iterable
|
||||
from .debug import defined_at
|
||||
|
||||
|
||||
class MatchesDict(OrderedDict):
|
||||
"""
|
||||
A custom dict with matches property.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(MatchesDict, self).__init__()
|
||||
self.matches = defaultdict(list)
|
||||
self.values_list = defaultdict(list)
|
||||
|
||||
|
||||
class _BaseMatches(MutableSequence):
|
||||
"""
|
||||
A custom list[Match] that automatically maintains name, tag, start and end lookup structures.
|
||||
"""
|
||||
_base = list
|
||||
_base_add = _base.append
|
||||
_base_remove = _base.remove
|
||||
_base_extend = _base.extend
|
||||
|
||||
def __init__(self, matches=None, input_string=None): # pylint: disable=super-init-not-called
|
||||
self.input_string = input_string
|
||||
self._max_end = 0
|
||||
self._delegate = []
|
||||
self.__name_dict = None
|
||||
self.__tag_dict = None
|
||||
self.__start_dict = None
|
||||
self.__end_dict = None
|
||||
self.__index_dict = None
|
||||
if matches:
|
||||
self.extend(matches)
|
||||
|
||||
@property
|
||||
def _name_dict(self):
|
||||
if self.__name_dict is None:
|
||||
self.__name_dict = defaultdict(_BaseMatches._base)
|
||||
for name, values in itertools.groupby([m for m in self._delegate if m.name], lambda item: item.name):
|
||||
_BaseMatches._base_extend(self.__name_dict[name], values)
|
||||
|
||||
return self.__name_dict
|
||||
|
||||
@property
|
||||
def _start_dict(self):
|
||||
if self.__start_dict is None:
|
||||
self.__start_dict = defaultdict(_BaseMatches._base)
|
||||
for start, values in itertools.groupby([m for m in self._delegate], lambda item: item.start):
|
||||
_BaseMatches._base_extend(self.__start_dict[start], values)
|
||||
|
||||
return self.__start_dict
|
||||
|
||||
@property
|
||||
def _end_dict(self):
|
||||
if self.__end_dict is None:
|
||||
self.__end_dict = defaultdict(_BaseMatches._base)
|
||||
for start, values in itertools.groupby([m for m in self._delegate], lambda item: item.end):
|
||||
_BaseMatches._base_extend(self.__end_dict[start], values)
|
||||
|
||||
return self.__end_dict
|
||||
|
||||
@property
|
||||
def _tag_dict(self):
|
||||
if self.__tag_dict is None:
|
||||
self.__tag_dict = defaultdict(_BaseMatches._base)
|
||||
for match in self._delegate:
|
||||
for tag in match.tags:
|
||||
_BaseMatches._base_add(self.__tag_dict[tag], match)
|
||||
|
||||
return self.__tag_dict
|
||||
|
||||
@property
|
||||
def _index_dict(self):
|
||||
if self.__index_dict is None:
|
||||
self.__index_dict = defaultdict(_BaseMatches._base)
|
||||
for match in self._delegate:
|
||||
for index in range(*match.span):
|
||||
_BaseMatches._base_add(self.__index_dict[index], match)
|
||||
|
||||
return self.__index_dict
|
||||
|
||||
def _add_match(self, match):
|
||||
"""
|
||||
Add a match
|
||||
:param match:
|
||||
:type match: Match
|
||||
"""
|
||||
if self.__name_dict is not None:
|
||||
if match.name:
|
||||
_BaseMatches._base_add(self._name_dict[match.name], (match))
|
||||
if self.__tag_dict is not None:
|
||||
for tag in match.tags:
|
||||
_BaseMatches._base_add(self._tag_dict[tag], match)
|
||||
if self.__start_dict is not None:
|
||||
_BaseMatches._base_add(self._start_dict[match.start], match)
|
||||
if self.__end_dict is not None:
|
||||
_BaseMatches._base_add(self._end_dict[match.end], match)
|
||||
if self.__index_dict is not None:
|
||||
for index in range(*match.span):
|
||||
_BaseMatches._base_add(self._index_dict[index], match)
|
||||
if match.end > self._max_end:
|
||||
self._max_end = match.end
|
||||
|
||||
def _remove_match(self, match):
|
||||
"""
|
||||
Remove a match
|
||||
:param match:
|
||||
:type match: Match
|
||||
"""
|
||||
if self.__name_dict is not None:
|
||||
if match.name:
|
||||
_BaseMatches._base_remove(self._name_dict[match.name], match)
|
||||
if self.__tag_dict is not None:
|
||||
for tag in match.tags:
|
||||
_BaseMatches._base_remove(self._tag_dict[tag], match)
|
||||
if self.__start_dict is not None:
|
||||
_BaseMatches._base_remove(self._start_dict[match.start], match)
|
||||
if self.__end_dict is not None:
|
||||
_BaseMatches._base_remove(self._end_dict[match.end], match)
|
||||
if self.__index_dict is not None:
|
||||
for index in range(*match.span):
|
||||
_BaseMatches._base_remove(self._index_dict[index], match)
|
||||
if match.end >= self._max_end and not self._end_dict[match.end]:
|
||||
self._max_end = max(self._end_dict.keys())
|
||||
|
||||
def previous(self, match, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves the nearest previous matches.
|
||||
:param match:
|
||||
:type match:
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index: int
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
current = match.start
|
||||
while current > -1:
|
||||
previous_matches = self.ending(current)
|
||||
if previous_matches:
|
||||
return filter_index(previous_matches, predicate, index)
|
||||
current -= 1
|
||||
return filter_index(_BaseMatches._base(), predicate, index)
|
||||
|
||||
def next(self, match, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves the nearest next matches.
|
||||
:param match:
|
||||
:type match:
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index: int
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
current = match.start + 1
|
||||
while current <= self._max_end:
|
||||
next_matches = self.starting(current)
|
||||
if next_matches:
|
||||
return filter_index(next_matches, predicate, index)
|
||||
current += 1
|
||||
return filter_index(_BaseMatches._base(), predicate, index)
|
||||
|
||||
def named(self, name, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a set of Match objects that have the given name.
|
||||
:param name:
|
||||
:type name: str
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index: int
|
||||
:return: set of matches
|
||||
:rtype: set[Match]
|
||||
"""
|
||||
return filter_index(_BaseMatches._base(self._name_dict[name]), predicate, index)
|
||||
|
||||
def tagged(self, tag, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a set of Match objects that have the given tag defined.
|
||||
:param tag:
|
||||
:type tag: str
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index: int
|
||||
:return: set of matches
|
||||
:rtype: set[Match]
|
||||
"""
|
||||
return filter_index(_BaseMatches._base(self._tag_dict[tag]), predicate, index)
|
||||
|
||||
def starting(self, start, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a set of Match objects that starts at given index.
|
||||
:param start: the starting index
|
||||
:type start: int
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index: int
|
||||
:return: set of matches
|
||||
:rtype: set[Match]
|
||||
"""
|
||||
return filter_index(_BaseMatches._base(self._start_dict[start]), predicate, index)
|
||||
|
||||
def ending(self, end, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a set of Match objects that ends at given index.
|
||||
:param end: the ending index
|
||||
:type end: int
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:return: set of matches
|
||||
:rtype: set[Match]
|
||||
"""
|
||||
return filter_index(_BaseMatches._base(self._end_dict[end]), predicate, index)
|
||||
|
||||
def range(self, start=0, end=None, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a set of Match objects that are available in given range, sorted from start to end.
|
||||
:param start: the starting index
|
||||
:type start: int
|
||||
:param end: the ending index
|
||||
:type end: int
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index: int
|
||||
:return: set of matches
|
||||
:rtype: set[Match]
|
||||
"""
|
||||
if end is None:
|
||||
end = self.max_end
|
||||
else:
|
||||
end = min(self.max_end, end)
|
||||
ret = _BaseMatches._base()
|
||||
for match in sorted(self):
|
||||
if match.start < end and match.end > start:
|
||||
ret.append(match)
|
||||
return filter_index(ret, predicate, index)
|
||||
|
||||
def chain_before(self, position, seps, start=0, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a list of chained matches, before position, matching predicate and separated by characters from seps
|
||||
only.
|
||||
:param position:
|
||||
:type position:
|
||||
:param seps:
|
||||
:type seps:
|
||||
:param start:
|
||||
:type start:
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if hasattr(position, 'start'):
|
||||
position = position.start
|
||||
|
||||
chain = _BaseMatches._base()
|
||||
position = min(self.max_end, position)
|
||||
|
||||
for i in reversed(range(start, position)):
|
||||
index_matches = self.at_index(i)
|
||||
filtered_matches = [index_match for index_match in index_matches if not predicate or predicate(index_match)]
|
||||
if filtered_matches:
|
||||
for chain_match in filtered_matches:
|
||||
if chain_match not in chain:
|
||||
chain.append(chain_match)
|
||||
elif self.input_string[i] not in seps:
|
||||
break
|
||||
|
||||
return filter_index(chain, predicate, index)
|
||||
|
||||
def chain_after(self, position, seps, end=None, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a list of chained matches, after position, matching predicate and separated by characters from seps
|
||||
only.
|
||||
:param position:
|
||||
:type position:
|
||||
:param seps:
|
||||
:type seps:
|
||||
:param end:
|
||||
:type end:
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if hasattr(position, 'end'):
|
||||
position = position.end
|
||||
chain = _BaseMatches._base()
|
||||
|
||||
if end is None:
|
||||
end = self.max_end
|
||||
else:
|
||||
end = min(self.max_end, end)
|
||||
|
||||
for i in range(position, end):
|
||||
index_matches = self.at_index(i)
|
||||
filtered_matches = [index_match for index_match in index_matches if not predicate or predicate(index_match)]
|
||||
if filtered_matches:
|
||||
for chain_match in filtered_matches:
|
||||
if chain_match not in chain:
|
||||
chain.append(chain_match)
|
||||
elif self.input_string[i] not in seps:
|
||||
break
|
||||
|
||||
return filter_index(chain, predicate, index)
|
||||
|
||||
@property
|
||||
def max_end(self):
|
||||
"""
|
||||
Retrieves the maximum index.
|
||||
:return:
|
||||
"""
|
||||
return max(len(self.input_string), self._max_end) if self.input_string else self._max_end
|
||||
|
||||
def _hole_start(self, position, ignore=None):
|
||||
"""
|
||||
Retrieves the start of hole index from position.
|
||||
:param position:
|
||||
:type position:
|
||||
:param ignore:
|
||||
:type ignore:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for lindex in reversed(range(0, position)):
|
||||
for starting in self.starting(lindex):
|
||||
if not ignore or not ignore(starting):
|
||||
return lindex
|
||||
return 0
|
||||
|
||||
def _hole_end(self, position, ignore=None):
|
||||
"""
|
||||
Retrieves the end of hole index from position.
|
||||
:param position:
|
||||
:type position:
|
||||
:param ignore:
|
||||
:type ignore:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for rindex in range(position, self.max_end):
|
||||
for starting in self.starting(rindex):
|
||||
if not ignore or not ignore(starting):
|
||||
return rindex
|
||||
return self.max_end
|
||||
|
||||
def holes(self, start=0, end=None, formatter=None, ignore=None, seps=None, predicate=None,
|
||||
index=None): # pylint: disable=too-many-branches,too-many-locals
|
||||
"""
|
||||
Retrieves a set of Match objects that are not defined in given range.
|
||||
:param start:
|
||||
:type start:
|
||||
:param end:
|
||||
:type end:
|
||||
:param formatter:
|
||||
:type formatter:
|
||||
:param ignore:
|
||||
:type ignore:
|
||||
:param seps:
|
||||
:type seps:
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
assert self.input_string if seps else True, "input_string must be defined when using seps parameter"
|
||||
if end is None:
|
||||
end = self.max_end
|
||||
else:
|
||||
end = min(self.max_end, end)
|
||||
ret = _BaseMatches._base()
|
||||
hole = False
|
||||
rindex = start
|
||||
|
||||
loop_start = self._hole_start(start, ignore)
|
||||
|
||||
for rindex in range(loop_start, end):
|
||||
current = []
|
||||
for at_index in self.at_index(rindex):
|
||||
if not ignore or not ignore(at_index):
|
||||
current.append(at_index)
|
||||
|
||||
if seps and hole and self.input_string and self.input_string[rindex] in seps:
|
||||
hole = False
|
||||
ret[-1].end = rindex
|
||||
else:
|
||||
if not current and not hole:
|
||||
# Open a new hole match
|
||||
hole = True
|
||||
ret.append(Match(max(rindex, start), None, input_string=self.input_string, formatter=formatter))
|
||||
elif current and hole:
|
||||
# Close current hole match
|
||||
hole = False
|
||||
ret[-1].end = rindex
|
||||
|
||||
if ret and hole:
|
||||
# go the the next starting element ...
|
||||
ret[-1].end = min(self._hole_end(rindex, ignore), end)
|
||||
return filter_index(ret, predicate, index)
|
||||
|
||||
def conflicting(self, match, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a list of ``Match`` objects that conflicts with given match.
|
||||
:param match:
|
||||
:type match:
|
||||
:param predicate:
|
||||
:type predicate:
|
||||
:param index:
|
||||
:type index:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
ret = _BaseMatches._base()
|
||||
|
||||
for i in range(*match.span):
|
||||
for at_match in self.at_index(i):
|
||||
if at_match not in ret:
|
||||
ret.append(at_match)
|
||||
|
||||
ret.remove(match)
|
||||
|
||||
return filter_index(ret, predicate, index)
|
||||
|
||||
def at_match(self, match, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a list of matches from given match.
|
||||
"""
|
||||
return self.at_span(match.span, predicate, index)
|
||||
|
||||
def at_span(self, span, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a list of matches from given (start, end) tuple.
|
||||
"""
|
||||
starting = self._index_dict[span[0]]
|
||||
ending = self._index_dict[span[1] - 1]
|
||||
|
||||
merged = list(starting)
|
||||
for marker in ending:
|
||||
if marker not in merged:
|
||||
merged.append(marker)
|
||||
|
||||
return filter_index(merged, predicate, index)
|
||||
|
||||
def at_index(self, pos, predicate=None, index=None):
|
||||
"""
|
||||
Retrieves a list of matches from given position
|
||||
"""
|
||||
return filter_index(self._index_dict[pos], predicate, index)
|
||||
|
||||
@property
|
||||
def names(self):
|
||||
"""
|
||||
Retrieve all names.
|
||||
:return:
|
||||
"""
|
||||
return self._name_dict.keys()
|
||||
|
||||
@property
|
||||
def tags(self):
|
||||
"""
|
||||
Retrieve all tags.
|
||||
:return:
|
||||
"""
|
||||
return self._tag_dict.keys()
|
||||
|
||||
def to_dict(self, details=False, first_value=False, enforce_list=False):
|
||||
"""
|
||||
Converts matches to a dict object.
|
||||
:param details if True, values will be complete Match object, else it will be only string Match.value property
|
||||
:type details: bool
|
||||
:param first_value if True, only the first value will be kept. Else, multiple values will be set as a list in
|
||||
the dict.
|
||||
:type first_value: bool
|
||||
:param enforce_list: if True, value is wrapped in a list even when a single value is found. Else, list values
|
||||
are available under `values_list` property of the returned dict object.
|
||||
:type enforce_list: bool
|
||||
:return:
|
||||
:rtype: dict
|
||||
"""
|
||||
ret = MatchesDict()
|
||||
for match in sorted(self):
|
||||
value = match if details else match.value
|
||||
ret.matches[match.name].append(match)
|
||||
if not enforce_list and value not in ret.values_list[match.name]:
|
||||
ret.values_list[match.name].append(value)
|
||||
if match.name in ret.keys():
|
||||
if not first_value:
|
||||
if not isinstance(ret[match.name], list):
|
||||
if ret[match.name] == value:
|
||||
continue
|
||||
ret[match.name] = [ret[match.name]]
|
||||
else:
|
||||
if value in ret[match.name]:
|
||||
continue
|
||||
ret[match.name].append(value)
|
||||
else:
|
||||
if enforce_list and not isinstance(value, list):
|
||||
ret[match.name] = [value]
|
||||
else:
|
||||
ret[match.name] = value
|
||||
return ret
|
||||
|
||||
if six.PY2: # pragma: no cover
|
||||
def clear(self):
|
||||
"""
|
||||
Python 3 backport
|
||||
"""
|
||||
del self[:]
|
||||
|
||||
def __len__(self):
|
||||
return len(self._delegate)
|
||||
|
||||
def __getitem__(self, index):
|
||||
ret = self._delegate[index]
|
||||
if isinstance(ret, list):
|
||||
return Matches(ret)
|
||||
return ret
|
||||
|
||||
def __setitem__(self, index, match):
|
||||
self._delegate[index] = match
|
||||
if isinstance(index, slice):
|
||||
for match_item in match:
|
||||
self._add_match(match_item)
|
||||
return
|
||||
self._add_match(match)
|
||||
|
||||
def __delitem__(self, index):
|
||||
match = self._delegate[index]
|
||||
del self._delegate[index]
|
||||
if isinstance(match, list):
|
||||
# if index is a slice, we has a match list
|
||||
for match_item in match:
|
||||
self._remove_match(match_item)
|
||||
else:
|
||||
self._remove_match(match)
|
||||
|
||||
def __repr__(self):
|
||||
return self._delegate.__repr__()
|
||||
|
||||
def insert(self, index, value):
|
||||
self._delegate.insert(index, value)
|
||||
self._add_match(value)
|
||||
|
||||
|
||||
class Matches(_BaseMatches):
|
||||
"""
|
||||
A custom list[Match] contains matches list.
|
||||
"""
|
||||
|
||||
def __init__(self, matches=None, input_string=None):
|
||||
self.markers = Markers(input_string=input_string)
|
||||
super(Matches, self).__init__(matches=matches, input_string=input_string)
|
||||
|
||||
def _add_match(self, match):
|
||||
assert not match.marker, "A marker match should not be added to <Matches> object"
|
||||
super(Matches, self)._add_match(match)
|
||||
|
||||
|
||||
class Markers(_BaseMatches):
|
||||
"""
|
||||
A custom list[Match] containing markers list.
|
||||
"""
|
||||
|
||||
def __init__(self, matches=None, input_string=None):
|
||||
super(Markers, self).__init__(matches=None, input_string=input_string)
|
||||
|
||||
def _add_match(self, match):
|
||||
assert match.marker, "A non-marker match should not be added to <Markers> object"
|
||||
super(Markers, self)._add_match(match)
|
||||
|
||||
|
||||
class Match(object):
|
||||
"""
|
||||
Object storing values related to a single match
|
||||
"""
|
||||
|
||||
def __init__(self, start, end, value=None, name=None, tags=None, marker=None, parent=None, private=None,
|
||||
pattern=None, input_string=None, formatter=None, conflict_solver=None, **kwargs):
|
||||
# pylint: disable=unused-argument
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.name = name
|
||||
self._value = value
|
||||
self.tags = ensure_list(tags)
|
||||
self.marker = marker
|
||||
self.parent = parent
|
||||
self.input_string = input_string
|
||||
self.formatter = formatter
|
||||
self.pattern = pattern
|
||||
self.private = private
|
||||
self.conflict_solver = conflict_solver
|
||||
self._children = None
|
||||
self._raw_start = None
|
||||
self._raw_end = None
|
||||
self.defined_at = pattern.defined_at if pattern else defined_at()
|
||||
|
||||
@property
|
||||
def span(self):
|
||||
"""
|
||||
2-tuple with start and end indices of the match
|
||||
"""
|
||||
return self.start, self.end
|
||||
|
||||
@property
|
||||
def children(self):
|
||||
"""
|
||||
Children matches.
|
||||
"""
|
||||
if self._children is None:
|
||||
self._children = Matches(None, self.input_string)
|
||||
return self._children
|
||||
|
||||
@children.setter
|
||||
def children(self, value):
|
||||
self._children = value
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
"""
|
||||
Get the value of the match, using formatter if defined.
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self._value:
|
||||
return self._value
|
||||
if self.formatter:
|
||||
return self.formatter(self.raw)
|
||||
return self.raw
|
||||
|
||||
@value.setter
|
||||
def value(self, value):
|
||||
"""
|
||||
Set the value (hardcode)
|
||||
:param value:
|
||||
:type value:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._value = value # pylint: disable=attribute-defined-outside-init
|
||||
|
||||
@property
|
||||
def names(self):
|
||||
"""
|
||||
Get all names of children
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not self.children:
|
||||
return set([self.name])
|
||||
ret = set()
|
||||
for child in self.children:
|
||||
for name in child.names:
|
||||
ret.add(name)
|
||||
return ret
|
||||
|
||||
@property
|
||||
def raw_start(self):
|
||||
"""
|
||||
start index of raw value
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self._raw_start is None:
|
||||
return self.start
|
||||
return self._raw_start
|
||||
|
||||
@raw_start.setter
|
||||
def raw_start(self, value):
|
||||
"""
|
||||
Set start index of raw value
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._raw_start = value
|
||||
|
||||
@property
|
||||
def raw_end(self):
|
||||
"""
|
||||
end index of raw value
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self._raw_end is None:
|
||||
return self.end
|
||||
return self._raw_end
|
||||
|
||||
@raw_end.setter
|
||||
def raw_end(self, value):
|
||||
"""
|
||||
Set end index of raw value
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self._raw_end = value
|
||||
|
||||
@property
|
||||
def raw(self):
|
||||
"""
|
||||
Get the raw value of the match, without using hardcoded value nor formatter.
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self.input_string:
|
||||
return self.input_string[self.raw_start:self.raw_end]
|
||||
return None
|
||||
|
||||
@property
|
||||
def initiator(self):
|
||||
"""
|
||||
Retrieve the initiator parent of a match
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
match = self
|
||||
while match.parent:
|
||||
match = match.parent
|
||||
return match
|
||||
|
||||
def crop(self, crops, predicate=None, index=None):
|
||||
"""
|
||||
crop the match with given Match objects or spans tuples
|
||||
:param crops:
|
||||
:type crops: list or object
|
||||
:return: a list of Match objects
|
||||
:rtype: list[Match]
|
||||
"""
|
||||
if not is_iterable(crops) or len(crops) == 2 and isinstance(crops[0], int):
|
||||
crops = [crops]
|
||||
initial = copy.deepcopy(self)
|
||||
ret = [initial]
|
||||
for crop in crops:
|
||||
if hasattr(crop, 'span'):
|
||||
start, end = crop.span
|
||||
else:
|
||||
start, end = crop
|
||||
for current in list(ret):
|
||||
if start <= current.start and end >= current.end:
|
||||
# self is included in crop, remove current ...
|
||||
ret.remove(current)
|
||||
elif start >= current.start and end <= current.end:
|
||||
# crop is included in self, split current ...
|
||||
right = copy.deepcopy(current)
|
||||
current.end = start
|
||||
if not current:
|
||||
ret.remove(current)
|
||||
right.start = end
|
||||
if right:
|
||||
ret.append(right)
|
||||
elif current.end >= end > current.start:
|
||||
current.start = end
|
||||
elif current.start <= start < current.end:
|
||||
current.end = start
|
||||
return filter_index(ret, predicate, index)
|
||||
|
||||
def split(self, seps, predicate=None, index=None):
|
||||
"""
|
||||
Split this match in multiple matches using given separators.
|
||||
:param seps:
|
||||
:type seps: string containing separator characters
|
||||
:return: list of new Match objects
|
||||
:rtype: list
|
||||
"""
|
||||
split_match = copy.deepcopy(self)
|
||||
current_match = split_match
|
||||
ret = []
|
||||
|
||||
for i in range(0, len(self.raw)):
|
||||
if self.raw[i] in seps:
|
||||
if not split_match:
|
||||
split_match = copy.deepcopy(current_match)
|
||||
current_match.end = self.start + i
|
||||
|
||||
else:
|
||||
if split_match:
|
||||
split_match.start = self.start + i
|
||||
current_match = split_match
|
||||
ret.append(split_match)
|
||||
split_match = None
|
||||
|
||||
return filter_index(ret, predicate, index)
|
||||
|
||||
def tagged(self, *tags):
|
||||
"""
|
||||
Check if this match has at least one of the provided tags
|
||||
|
||||
:param tags:
|
||||
:return: True if at least one tag is defined, False otherwise.
|
||||
"""
|
||||
return any(tag in self.tags for tag in tags)
|
||||
|
||||
def named(self, *names):
|
||||
"""
|
||||
Check if one of the children match has one of the provided name
|
||||
|
||||
:param names:
|
||||
:return: True if at least one child is named with a given name is defined, False otherwise.
|
||||
"""
|
||||
return any(name in self.names for name in names)
|
||||
|
||||
def __len__(self):
|
||||
return self.end - self.start
|
||||
|
||||
def __hash__(self):
|
||||
return hash(Match) + hash(self.start) + hash(self.end) + hash(self.value)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, Match):
|
||||
return self.span == other.span and self.value == other.value and self.name == other.name and \
|
||||
self.parent == other.parent
|
||||
return NotImplemented
|
||||
|
||||
def __ne__(self, other):
|
||||
if isinstance(other, Match):
|
||||
return self.span != other.span or self.value != other.value or self.name != other.name or \
|
||||
self.parent != other.parent
|
||||
return NotImplemented
|
||||
|
||||
def __lt__(self, other):
|
||||
if isinstance(other, Match):
|
||||
return self.span < other.span
|
||||
return NotImplemented
|
||||
|
||||
def __gt__(self, other):
|
||||
if isinstance(other, Match):
|
||||
return self.span > other.span
|
||||
return NotImplemented
|
||||
|
||||
def __le__(self, other):
|
||||
if isinstance(other, Match):
|
||||
return self.span <= other.span
|
||||
return NotImplemented
|
||||
|
||||
def __ge__(self, other):
|
||||
if isinstance(other, Match):
|
||||
return self.span >= other.span
|
||||
return NotImplemented
|
||||
|
||||
def __repr__(self):
|
||||
flags = ""
|
||||
name = ""
|
||||
tags = ""
|
||||
defined = ""
|
||||
initiator = ""
|
||||
if self.initiator.value != self.value:
|
||||
initiator = "+initiator=" + self.initiator.value
|
||||
if self.private:
|
||||
flags += '+private'
|
||||
if self.name:
|
||||
name = "+name=%s" % (self.name,)
|
||||
if self.tags:
|
||||
tags = "+tags=%s" % (self.tags,)
|
||||
if self.defined_at:
|
||||
defined += "@%s" % (self.defined_at,)
|
||||
return "<%s:%s%s%s%s%s%s>" % (self.value, self.span, flags, name, tags, initiator, defined)
|
||||
559
lib/rebulk/pattern.py
Normal file
559
lib/rebulk/pattern.py
Normal file
@@ -0,0 +1,559 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Abstract pattern class definition along with various implementations (regexp, string, functional)
|
||||
"""
|
||||
# pylint: disable=super-init-not-called,wrong-import-position
|
||||
|
||||
from abc import ABCMeta, abstractmethod, abstractproperty
|
||||
|
||||
import six
|
||||
|
||||
from . import debug
|
||||
from .formatters import default_formatter
|
||||
from .loose import call, ensure_list, ensure_dict
|
||||
from .match import Match
|
||||
from .remodule import re, REGEX_AVAILABLE
|
||||
from .utils import find_all, is_iterable, get_first_defined
|
||||
from .validators import allways_true
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class BasePattern(object):
|
||||
"""
|
||||
Base class for Pattern like objects
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
||||
"""
|
||||
Computes all matches for a given input
|
||||
|
||||
:param input_string: the string to parse
|
||||
:type input_string: str
|
||||
:param context: the context
|
||||
:type context: dict
|
||||
:param with_raw_matches: should return details
|
||||
:type with_raw_matches: dict
|
||||
:return: matches based on input_string for this pattern
|
||||
:rtype: iterator[Match]
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class Pattern(BasePattern):
|
||||
"""
|
||||
Definition of a particular pattern to search for.
|
||||
"""
|
||||
|
||||
def __init__(self, name=None, tags=None, formatter=None, value=None, validator=None, children=False, every=False,
|
||||
private_parent=False, private_children=False, private=False, private_names=None, ignore_names=None,
|
||||
marker=False, format_all=False, validate_all=False, disabled=lambda context: False, log_level=None,
|
||||
properties=None, post_processor=None, pre_match_processor=None, post_match_processor=None, **kwargs):
|
||||
"""
|
||||
:param name: Name of this pattern
|
||||
:type name: str
|
||||
:param tags: List of tags related to this pattern
|
||||
:type tags: list[str]
|
||||
:param formatter: dict (name, func) of formatter to use with this pattern. name is the match name to support,
|
||||
and func a function(input_string) that returns the formatted string. A single formatter function can also be
|
||||
passed as a shortcut for {None: formatter}. The returned formatted string with be set in Match.value property.
|
||||
:type formatter: dict[str, func] || func
|
||||
:param value: dict (name, value) of value to use with this pattern. name is the match name to support,
|
||||
and value an object for the match value. A single object value can also be
|
||||
passed as a shortcut for {None: value}. The value with be set in Match.value property.
|
||||
:type value: dict[str, object] || object
|
||||
:param validator: dict (name, func) of validator to use with this pattern. name is the match name to support,
|
||||
and func a function(match) that returns the a boolean. A single validator function can also be
|
||||
passed as a shortcut for {None: validator}. If return value is False, match will be ignored.
|
||||
:param children: generates children instead of parent
|
||||
:type children: bool
|
||||
:param every: generates both parent and children.
|
||||
:type every: bool
|
||||
:param private: flag this pattern as beeing private.
|
||||
:type private: bool
|
||||
:param private_parent: force return of parent and flag parent matches as private.
|
||||
:type private_parent: bool
|
||||
:param private_children: force return of children and flag children matches as private.
|
||||
:type private_children: bool
|
||||
:param private_names: force return of named matches as private.
|
||||
:type private_names: bool
|
||||
:param ignore_names: drop some named matches after validation.
|
||||
:type ignore_names: bool
|
||||
:param marker: flag this pattern as beeing a marker.
|
||||
:type private: bool
|
||||
:param format_all if True, pattern will format every match in the hierarchy (even match not yield).
|
||||
:type format_all: bool
|
||||
:param validate_all if True, pattern will validate every match in the hierarchy (even match not yield).
|
||||
:type validate_all: bool
|
||||
:param disabled: if True, this pattern is disabled. Can also be a function(context).
|
||||
:type disabled: bool|function
|
||||
:param log_lvl: Log level associated to this pattern
|
||||
:type log_lvl: int
|
||||
:param post_processor: Post processing function
|
||||
:type post_processor: func
|
||||
:param pre_match_processor: Pre match processing function
|
||||
:type pre_match_processor: func
|
||||
:param post_match_processor: Post match processing function
|
||||
:type post_match_processor: func
|
||||
"""
|
||||
# pylint:disable=too-many-locals,unused-argument
|
||||
self.name = name
|
||||
self.tags = ensure_list(tags)
|
||||
self.formatters, self._default_formatter = ensure_dict(formatter, default_formatter)
|
||||
self.values, self._default_value = ensure_dict(value, None)
|
||||
self.validators, self._default_validator = ensure_dict(validator, allways_true)
|
||||
self.every = every
|
||||
self.children = children
|
||||
self.private = private
|
||||
self.private_names = private_names if private_names else []
|
||||
self.ignore_names = ignore_names if ignore_names else []
|
||||
self.private_parent = private_parent
|
||||
self.private_children = private_children
|
||||
self.marker = marker
|
||||
self.format_all = format_all
|
||||
self.validate_all = validate_all
|
||||
if not callable(disabled):
|
||||
self.disabled = lambda context: disabled
|
||||
else:
|
||||
self.disabled = disabled
|
||||
self._log_level = log_level
|
||||
self._properties = properties
|
||||
self.defined_at = debug.defined_at()
|
||||
if not callable(post_processor):
|
||||
self.post_processor = None
|
||||
else:
|
||||
self.post_processor = post_processor
|
||||
if not callable(pre_match_processor):
|
||||
self.pre_match_processor = None
|
||||
else:
|
||||
self.pre_match_processor = pre_match_processor
|
||||
if not callable(post_match_processor):
|
||||
self.post_match_processor = None
|
||||
else:
|
||||
self.post_match_processor = post_match_processor
|
||||
|
||||
@property
|
||||
def log_level(self):
|
||||
"""
|
||||
Log level for this pattern.
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self._log_level if self._log_level is not None else debug.LOG_LEVEL
|
||||
|
||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
||||
"""
|
||||
Computes all matches for a given input
|
||||
|
||||
:param input_string: the string to parse
|
||||
:type input_string: str
|
||||
:param context: the context
|
||||
:type context: dict
|
||||
:param with_raw_matches: should return details
|
||||
:type with_raw_matches: dict
|
||||
:return: matches based on input_string for this pattern
|
||||
:rtype: iterator[Match]
|
||||
"""
|
||||
# pylint: disable=too-many-branches
|
||||
|
||||
matches = []
|
||||
raw_matches = []
|
||||
|
||||
for pattern in self.patterns:
|
||||
match_index = 0
|
||||
for match in self._match(pattern, input_string, context):
|
||||
raw_matches.append(match)
|
||||
matches.extend(self._process_matches(match, match_index))
|
||||
match_index += 1
|
||||
|
||||
matches = self._post_process_matches(matches)
|
||||
|
||||
if with_raw_matches:
|
||||
return matches, raw_matches
|
||||
return matches
|
||||
|
||||
@property
|
||||
def _should_include_children(self):
|
||||
"""
|
||||
Check if children matches from this pattern should be included in matches results.
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return self.children or self.every
|
||||
|
||||
@property
|
||||
def _should_include_parent(self):
|
||||
"""
|
||||
Check is a match from this pattern should be included in matches results.
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return not self.children or self.every
|
||||
|
||||
@staticmethod
|
||||
def _match_config_property_keys(match, child=False):
|
||||
if match.name:
|
||||
yield match.name
|
||||
if child:
|
||||
yield '__children__'
|
||||
else:
|
||||
yield '__parent__'
|
||||
yield None
|
||||
|
||||
@staticmethod
|
||||
def _process_match_index(match, match_index):
|
||||
"""
|
||||
Process match index from this pattern process state.
|
||||
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
match.match_index = match_index
|
||||
|
||||
def _process_match_private(self, match, child=False):
|
||||
"""
|
||||
Process match privacy from this pattern configuration.
|
||||
|
||||
:param match:
|
||||
:param child:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if match.name and match.name in self.private_names or \
|
||||
not child and self.private_parent or \
|
||||
child and self.private_children:
|
||||
match.private = True
|
||||
|
||||
def _process_match_value(self, match, child=False):
|
||||
"""
|
||||
Process match value from this pattern configuration.
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
keys = self._match_config_property_keys(match, child=child)
|
||||
pattern_value = get_first_defined(self.values, keys, self._default_value)
|
||||
if pattern_value:
|
||||
match.value = pattern_value
|
||||
|
||||
def _process_match_formatter(self, match, child=False):
|
||||
"""
|
||||
Process match formatter from this pattern configuration.
|
||||
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
included = self._should_include_children if child else self._should_include_parent
|
||||
if included or self.format_all:
|
||||
keys = self._match_config_property_keys(match, child=child)
|
||||
match.formatter = get_first_defined(self.formatters, keys, self._default_formatter)
|
||||
|
||||
def _process_match_validator(self, match, child=False):
|
||||
"""
|
||||
Process match validation from this pattern configuration.
|
||||
|
||||
:param match:
|
||||
:return: True if match is validated by the configured validator, False otherwise.
|
||||
"""
|
||||
included = self._should_include_children if child else self._should_include_parent
|
||||
if included or self.validate_all:
|
||||
keys = self._match_config_property_keys(match, child=child)
|
||||
validator = get_first_defined(self.validators, keys, self._default_validator)
|
||||
if validator and not validator(match):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _process_match(self, match, match_index, child=False):
|
||||
"""
|
||||
Process match from this pattern by setting all properties from defined configuration
|
||||
(index, private, value, formatter, validator, ...).
|
||||
|
||||
:param match:
|
||||
:type match:
|
||||
:return: True if match is validated by the configured validator, False otherwise.
|
||||
:rtype:
|
||||
"""
|
||||
self._process_match_index(match, match_index)
|
||||
self._process_match_private(match, child)
|
||||
self._process_match_value(match, child)
|
||||
self._process_match_formatter(match, child)
|
||||
return self._process_match_validator(match, child)
|
||||
|
||||
@staticmethod
|
||||
def _process_match_processor(match, processor):
|
||||
if processor:
|
||||
ret = processor(match)
|
||||
if ret is not None:
|
||||
return ret
|
||||
return match
|
||||
|
||||
def _process_matches(self, match, match_index):
|
||||
"""
|
||||
Process and generate all matches for the given unprocessed match.
|
||||
:param match:
|
||||
:param match_index:
|
||||
:return: Process and dispatched matches.
|
||||
"""
|
||||
match = self._process_match_processor(match, self.pre_match_processor)
|
||||
if not match:
|
||||
return
|
||||
|
||||
if not self._process_match(match, match_index):
|
||||
return
|
||||
|
||||
for child in match.children:
|
||||
if not self._process_match(child, match_index, child=True):
|
||||
return
|
||||
|
||||
match = self._process_match_processor(match, self.post_match_processor)
|
||||
if not match:
|
||||
return
|
||||
|
||||
if (self._should_include_parent or self.private_parent) and match.name not in self.ignore_names:
|
||||
yield match
|
||||
if self._should_include_children or self.private_children:
|
||||
children = [x for x in match.children if x.name not in self.ignore_names]
|
||||
for child in children:
|
||||
yield child
|
||||
|
||||
def _post_process_matches(self, matches):
|
||||
"""
|
||||
Post process matches with user defined function
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self.post_processor:
|
||||
return self.post_processor(matches, self)
|
||||
return matches
|
||||
|
||||
@abstractproperty
|
||||
def patterns(self): # pragma: no cover
|
||||
"""
|
||||
List of base patterns defined
|
||||
|
||||
:return: A list of base patterns
|
||||
:rtype: list
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
def properties(self):
|
||||
"""
|
||||
Properties names and values that can ben retrieved by this pattern.
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if self._properties:
|
||||
return self._properties
|
||||
return {}
|
||||
|
||||
@abstractproperty
|
||||
def match_options(self): # pragma: no cover
|
||||
"""
|
||||
dict of default options for generated Match objects
|
||||
|
||||
:return: **options to pass to Match constructor
|
||||
:rtype: dict
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _match(self, pattern, input_string, context=None): # pragma: no cover
|
||||
"""
|
||||
Computes all unprocess matches for a given pattern and input.
|
||||
|
||||
:param pattern: the pattern to use
|
||||
:param input_string: the string to parse
|
||||
:type input_string: str
|
||||
:param context: the context
|
||||
:type context: dict
|
||||
:return: matches based on input_string for this pattern
|
||||
:rtype: iterator[Match]
|
||||
"""
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
defined = ""
|
||||
if self.defined_at:
|
||||
defined = "@%s" % (self.defined_at,)
|
||||
return "<%s%s:%s>" % (self.__class__.__name__, defined, self.__repr__patterns__)
|
||||
|
||||
@property
|
||||
def __repr__patterns__(self):
|
||||
return self.patterns
|
||||
|
||||
|
||||
class StringPattern(Pattern):
|
||||
"""
|
||||
Definition of one or many strings to search for.
|
||||
"""
|
||||
|
||||
def __init__(self, *patterns, **kwargs):
|
||||
super(StringPattern, self).__init__(**kwargs)
|
||||
self._patterns = patterns
|
||||
self._kwargs = kwargs
|
||||
self._match_kwargs = filter_match_kwargs(kwargs)
|
||||
|
||||
@property
|
||||
def patterns(self):
|
||||
return self._patterns
|
||||
|
||||
@property
|
||||
def match_options(self):
|
||||
return self._match_kwargs
|
||||
|
||||
def _match(self, pattern, input_string, context=None):
|
||||
for index in find_all(input_string, pattern, **self._kwargs):
|
||||
match = Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
|
||||
if match:
|
||||
yield match
|
||||
|
||||
|
||||
class RePattern(Pattern):
|
||||
"""
|
||||
Definition of one or many regular expression pattern to search for.
|
||||
"""
|
||||
|
||||
def __init__(self, *patterns, **kwargs):
|
||||
super(RePattern, self).__init__(**kwargs)
|
||||
self.repeated_captures = REGEX_AVAILABLE
|
||||
if 'repeated_captures' in kwargs:
|
||||
self.repeated_captures = kwargs.get('repeated_captures')
|
||||
if self.repeated_captures and not REGEX_AVAILABLE: # pragma: no cover
|
||||
raise NotImplementedError("repeated_capture is available only with regex module.")
|
||||
self.abbreviations = kwargs.get('abbreviations', [])
|
||||
self._kwargs = kwargs
|
||||
self._match_kwargs = filter_match_kwargs(kwargs)
|
||||
self._children_match_kwargs = filter_match_kwargs(kwargs, children=True)
|
||||
self._patterns = []
|
||||
for pattern in patterns:
|
||||
if isinstance(pattern, six.string_types):
|
||||
if self.abbreviations and pattern:
|
||||
for key, replacement in self.abbreviations:
|
||||
pattern = pattern.replace(key, replacement)
|
||||
pattern = call(re.compile, pattern, **self._kwargs)
|
||||
elif isinstance(pattern, dict):
|
||||
if self.abbreviations and 'pattern' in pattern:
|
||||
for key, replacement in self.abbreviations:
|
||||
pattern['pattern'] = pattern['pattern'].replace(key, replacement)
|
||||
pattern = re.compile(**pattern)
|
||||
elif hasattr(pattern, '__iter__'):
|
||||
pattern = re.compile(*pattern)
|
||||
self._patterns.append(pattern)
|
||||
|
||||
@property
|
||||
def patterns(self):
|
||||
return self._patterns
|
||||
|
||||
@property
|
||||
def __repr__patterns__(self):
|
||||
return [pattern.pattern for pattern in self.patterns]
|
||||
|
||||
@property
|
||||
def match_options(self):
|
||||
return self._match_kwargs
|
||||
|
||||
def _match(self, pattern, input_string, context=None):
|
||||
names = dict((v, k) for k, v in pattern.groupindex.items())
|
||||
for match_object in pattern.finditer(input_string):
|
||||
start = match_object.start()
|
||||
end = match_object.end()
|
||||
main_match = Match(start, end, pattern=self, input_string=input_string, **self._match_kwargs)
|
||||
|
||||
if pattern.groups:
|
||||
for i in range(1, pattern.groups + 1):
|
||||
name = names.get(i, main_match.name)
|
||||
if self.repeated_captures:
|
||||
for start, end in match_object.spans(i):
|
||||
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
|
||||
input_string=input_string, **self._children_match_kwargs)
|
||||
if child_match:
|
||||
main_match.children.append(child_match)
|
||||
else:
|
||||
start, end = match_object.span(i)
|
||||
if start > -1 and end > -1:
|
||||
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
|
||||
input_string=input_string, **self._children_match_kwargs)
|
||||
if child_match:
|
||||
main_match.children.append(child_match)
|
||||
|
||||
if main_match:
|
||||
yield main_match
|
||||
|
||||
|
||||
class FunctionalPattern(Pattern):
|
||||
"""
|
||||
Definition of one or many functional pattern to search for.
|
||||
"""
|
||||
|
||||
def __init__(self, *patterns, **kwargs):
|
||||
super(FunctionalPattern, self).__init__(**kwargs)
|
||||
self._patterns = patterns
|
||||
self._kwargs = kwargs
|
||||
self._match_kwargs = filter_match_kwargs(kwargs)
|
||||
|
||||
@property
|
||||
def patterns(self):
|
||||
return self._patterns
|
||||
|
||||
@property
|
||||
def match_options(self):
|
||||
return self._match_kwargs
|
||||
|
||||
def _match(self, pattern, input_string, context=None):
|
||||
ret = call(pattern, input_string, context, **self._kwargs)
|
||||
if ret:
|
||||
if not is_iterable(ret) or isinstance(ret, dict) \
|
||||
or (is_iterable(ret) and hasattr(ret, '__getitem__') and isinstance(ret[0], int)):
|
||||
args_iterable = [ret]
|
||||
else:
|
||||
args_iterable = ret
|
||||
for args in args_iterable:
|
||||
if isinstance(args, dict):
|
||||
options = args
|
||||
options.pop('input_string', None)
|
||||
options.pop('pattern', None)
|
||||
if self._match_kwargs:
|
||||
options = self._match_kwargs.copy()
|
||||
options.update(args)
|
||||
match = Match(pattern=self, input_string=input_string, **options)
|
||||
if match:
|
||||
yield match
|
||||
else:
|
||||
kwargs = self._match_kwargs
|
||||
if isinstance(args[-1], dict):
|
||||
kwargs = dict(kwargs)
|
||||
kwargs.update(args[-1])
|
||||
args = args[:-1]
|
||||
match = Match(*args, pattern=self, input_string=input_string, **kwargs)
|
||||
if match:
|
||||
yield match
|
||||
|
||||
|
||||
def filter_match_kwargs(kwargs, children=False):
|
||||
"""
|
||||
Filters out kwargs for Match construction
|
||||
|
||||
:param kwargs:
|
||||
:type kwargs: dict
|
||||
:param children:
|
||||
:type children: Flag to filter children matches
|
||||
:return: A filtered dict
|
||||
:rtype: dict
|
||||
"""
|
||||
kwargs = kwargs.copy()
|
||||
for key in ('pattern', 'start', 'end', 'parent', 'formatter', 'value'):
|
||||
if key in kwargs:
|
||||
del kwargs[key]
|
||||
if children:
|
||||
for key in ('name',):
|
||||
if key in kwargs:
|
||||
del kwargs[key]
|
||||
return kwargs
|
||||
107
lib/rebulk/processors.py
Normal file
107
lib/rebulk/processors.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Processor functions
|
||||
"""
|
||||
from logging import getLogger
|
||||
|
||||
from .utils import IdentitySet
|
||||
|
||||
from .rules import Rule, RemoveMatch
|
||||
|
||||
log = getLogger(__name__).log
|
||||
|
||||
DEFAULT = '__default__'
|
||||
|
||||
POST_PROCESS = -2048
|
||||
PRE_PROCESS = 2048
|
||||
|
||||
|
||||
def _default_conflict_solver(match, conflicting_match):
|
||||
"""
|
||||
Default conflict solver for matches, shorter matches if they conflicts with longer ones
|
||||
|
||||
:param conflicting_match:
|
||||
:type conflicting_match:
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if len(conflicting_match.initiator) < len(match.initiator):
|
||||
return conflicting_match
|
||||
if len(match.initiator) < len(conflicting_match.initiator):
|
||||
return match
|
||||
return None
|
||||
|
||||
|
||||
class ConflictSolver(Rule):
|
||||
"""
|
||||
Remove conflicting matches.
|
||||
"""
|
||||
priority = PRE_PROCESS
|
||||
|
||||
consequence = RemoveMatch
|
||||
|
||||
@property
|
||||
def default_conflict_solver(self): # pylint:disable=no-self-use
|
||||
"""
|
||||
Default conflict solver to use.
|
||||
"""
|
||||
return _default_conflict_solver
|
||||
|
||||
def when(self, matches, context):
|
||||
# pylint:disable=too-many-nested-blocks
|
||||
to_remove_matches = IdentitySet()
|
||||
|
||||
public_matches = [match for match in matches if not match.private]
|
||||
public_matches.sort(key=len)
|
||||
|
||||
for match in public_matches:
|
||||
conflicting_matches = matches.conflicting(match)
|
||||
|
||||
if conflicting_matches:
|
||||
# keep the match only if it's the longest
|
||||
conflicting_matches = [conflicting_match for conflicting_match in conflicting_matches if
|
||||
not conflicting_match.private]
|
||||
conflicting_matches.sort(key=len)
|
||||
|
||||
for conflicting_match in conflicting_matches:
|
||||
conflict_solvers = [(self.default_conflict_solver, False)]
|
||||
|
||||
if match.conflict_solver:
|
||||
conflict_solvers.append((match.conflict_solver, False))
|
||||
if conflicting_match.conflict_solver:
|
||||
conflict_solvers.append((conflicting_match.conflict_solver, True))
|
||||
|
||||
for conflict_solver, reverse in reversed(conflict_solvers):
|
||||
if reverse:
|
||||
to_remove = conflict_solver(conflicting_match, match)
|
||||
else:
|
||||
to_remove = conflict_solver(match, conflicting_match)
|
||||
if to_remove == DEFAULT:
|
||||
continue
|
||||
if to_remove and to_remove not in to_remove_matches:
|
||||
both_matches = [match, conflicting_match]
|
||||
both_matches.remove(to_remove)
|
||||
to_keep = both_matches[0]
|
||||
|
||||
if to_keep not in to_remove_matches:
|
||||
log(self.log_level, "Conflicting match %s will be removed in favor of match %s",
|
||||
to_remove, to_keep)
|
||||
|
||||
to_remove_matches.add(to_remove)
|
||||
break
|
||||
return to_remove_matches
|
||||
|
||||
|
||||
class PrivateRemover(Rule):
|
||||
"""
|
||||
Removes private matches rule.
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
return [match for match in matches if match.private]
|
||||
190
lib/rebulk/rebulk.py
Normal file
190
lib/rebulk/rebulk.py
Normal file
@@ -0,0 +1,190 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Entry point functions and classes for Rebulk
|
||||
"""
|
||||
from logging import getLogger
|
||||
|
||||
from .builder import Builder
|
||||
from .match import Matches
|
||||
from .processors import ConflictSolver, PrivateRemover
|
||||
from .rules import Rules
|
||||
from .utils import extend_safe
|
||||
|
||||
log = getLogger(__name__).log
|
||||
|
||||
|
||||
class Rebulk(Builder):
|
||||
r"""
|
||||
Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to
|
||||
chain ``string``, ``regex``, and ``functional`` methods to define various patterns types.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> from rebulk import Rebulk
|
||||
>>> bulk = Rebulk().string('brown').regex(r'qu\w+').functional(lambda s: (20, 25))
|
||||
|
||||
When ``Rebulk`` object is fully configured, you can call ``matches`` method with an input string to retrieve all
|
||||
``Match`` objects found by registered pattern.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> bulk.matches("The quick brown fox jumps over the lazy dog")
|
||||
[<brown:(10, 15)>, <quick:(4, 9)>, <jumps:(20, 25)>]
|
||||
|
||||
If multiple ``Match`` objects are found at the same position, only the longer one is kept.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> bulk = Rebulk().string('lakers').string('la')
|
||||
>>> bulk.matches("the lakers are from la")
|
||||
[<lakers:(4, 10)>, <la:(20, 22)>]
|
||||
"""
|
||||
|
||||
# pylint:disable=protected-access
|
||||
|
||||
def __init__(self, disabled=lambda context: False, default_rules=True):
|
||||
"""
|
||||
Creates a new Rebulk object.
|
||||
:param disabled: if True, this pattern is disabled. Can also be a function(context).
|
||||
:type disabled: bool|function
|
||||
:param default_rules: use default rules
|
||||
:type default_rules:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
super(Rebulk, self).__init__()
|
||||
if not callable(disabled):
|
||||
self.disabled = lambda context: disabled
|
||||
else:
|
||||
self.disabled = disabled
|
||||
self._patterns = []
|
||||
self._rules = Rules()
|
||||
if default_rules:
|
||||
self.rules(ConflictSolver, PrivateRemover)
|
||||
self._rebulks = []
|
||||
|
||||
def pattern(self, *pattern):
|
||||
"""
|
||||
Add patterns objects
|
||||
|
||||
:param pattern:
|
||||
:type pattern: rebulk.pattern.Pattern
|
||||
:return: self
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
self._patterns.extend(pattern)
|
||||
return self
|
||||
|
||||
def rules(self, *rules):
|
||||
"""
|
||||
Add rules as a module, class or instance.
|
||||
:param rules:
|
||||
:type rules: list[Rule]
|
||||
:return:
|
||||
"""
|
||||
self._rules.load(*rules)
|
||||
return self
|
||||
|
||||
def rebulk(self, *rebulks):
|
||||
"""
|
||||
Add a children rebulk object
|
||||
:param rebulks:
|
||||
:type rebulks: Rebulk
|
||||
:return:
|
||||
"""
|
||||
self._rebulks.extend(rebulks)
|
||||
return self
|
||||
|
||||
def matches(self, string, context=None):
|
||||
"""
|
||||
Search for all matches with current configuration against input_string
|
||||
:param string: string to search into
|
||||
:type string: str
|
||||
:param context: context to use
|
||||
:type context: dict
|
||||
:return: A custom list of matches
|
||||
:rtype: Matches
|
||||
"""
|
||||
matches = Matches(input_string=string)
|
||||
if context is None:
|
||||
context = {}
|
||||
|
||||
self._matches_patterns(matches, context)
|
||||
|
||||
self._execute_rules(matches, context)
|
||||
|
||||
return matches
|
||||
|
||||
def effective_rules(self, context=None):
|
||||
"""
|
||||
Get effective rules for this rebulk object and its children.
|
||||
:param context:
|
||||
:type context:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
rules = Rules()
|
||||
rules.extend(self._rules)
|
||||
for rebulk in self._rebulks:
|
||||
if not rebulk.disabled(context):
|
||||
extend_safe(rules, rebulk._rules)
|
||||
return rules
|
||||
|
||||
def _execute_rules(self, matches, context):
|
||||
"""
|
||||
Execute rules for this rebulk and children.
|
||||
:param matches:
|
||||
:type matches:
|
||||
:param context:
|
||||
:type context:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not self.disabled(context):
|
||||
rules = self.effective_rules(context)
|
||||
rules.execute_all_rules(matches, context)
|
||||
|
||||
def effective_patterns(self, context=None):
|
||||
"""
|
||||
Get effective patterns for this rebulk object and its children.
|
||||
:param context:
|
||||
:type context:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
patterns = list(self._patterns)
|
||||
for rebulk in self._rebulks:
|
||||
if not rebulk.disabled(context):
|
||||
extend_safe(patterns, rebulk._patterns)
|
||||
return patterns
|
||||
|
||||
def _matches_patterns(self, matches, context):
|
||||
"""
|
||||
Search for all matches with current paterns agains input_string
|
||||
:param matches: matches list
|
||||
:type matches: Matches
|
||||
:param context: context to use
|
||||
:type context: dict
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not self.disabled(context):
|
||||
patterns = self.effective_patterns(context)
|
||||
for pattern in patterns:
|
||||
if not pattern.disabled(context):
|
||||
pattern_matches = pattern.matches(matches.input_string, context)
|
||||
if pattern_matches:
|
||||
log(pattern.log_level, "Pattern has %s match(es). (%s)", len(pattern_matches), pattern)
|
||||
else:
|
||||
pass
|
||||
# log(pattern.log_level, "Pattern doesn't match. (%s)" % (pattern,))
|
||||
for match in pattern_matches:
|
||||
if match.marker:
|
||||
log(pattern.log_level, "Marker found. (%s)", match)
|
||||
matches.markers.append(match)
|
||||
else:
|
||||
log(pattern.log_level, "Match found. (%s)", match)
|
||||
matches.append(match)
|
||||
else:
|
||||
log(pattern.log_level, "Pattern is disabled. (%s)", pattern)
|
||||
17
lib/rebulk/remodule.py
Normal file
17
lib/rebulk/remodule.py
Normal file
@@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Uniform re module
|
||||
"""
|
||||
# pylint: disable-all
|
||||
import os
|
||||
|
||||
REGEX_AVAILABLE = False
|
||||
if os.environ.get('REGEX_DISABLED') in ["1", "true", "True", "Y"]:
|
||||
import re
|
||||
else:
|
||||
try:
|
||||
import regex as re
|
||||
REGEX_AVAILABLE = True
|
||||
except ImportError:
|
||||
import re
|
||||
373
lib/rebulk/rules.py
Normal file
373
lib/rebulk/rules.py
Normal file
@@ -0,0 +1,373 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Abstract rule class definition and rule engine implementation
|
||||
"""
|
||||
from abc import ABCMeta, abstractmethod
|
||||
import inspect
|
||||
from itertools import groupby
|
||||
from logging import getLogger
|
||||
|
||||
import six
|
||||
from .utils import is_iterable
|
||||
|
||||
from .toposort import toposort
|
||||
|
||||
from . import debug
|
||||
|
||||
log = getLogger(__name__).log
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class Consequence(object):
|
||||
"""
|
||||
Definition of a consequence to apply.
|
||||
"""
|
||||
@abstractmethod
|
||||
def then(self, matches, when_response, context): # pragma: no cover
|
||||
"""
|
||||
Action implementation.
|
||||
|
||||
:param matches:
|
||||
:type matches: rebulk.match.Matches
|
||||
:param context:
|
||||
:type context:
|
||||
:param when_response: return object from when call.
|
||||
:type when_response: object
|
||||
:return: True if the action was runned, False if it wasn't.
|
||||
:rtype: bool
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class Condition(object):
|
||||
"""
|
||||
Definition of a condition to check.
|
||||
"""
|
||||
@abstractmethod
|
||||
def when(self, matches, context): # pragma: no cover
|
||||
"""
|
||||
Condition implementation.
|
||||
|
||||
:param matches:
|
||||
:type matches: rebulk.match.Matches
|
||||
:param context:
|
||||
:type context:
|
||||
:return: truthy if rule should be triggered and execute then action, falsy if it should not.
|
||||
:rtype: object
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class CustomRule(Condition, Consequence):
|
||||
"""
|
||||
Definition of a rule to apply
|
||||
"""
|
||||
# pylint: disable=no-self-use, unused-argument, abstract-method
|
||||
priority = 0
|
||||
name = None
|
||||
dependency = None
|
||||
properties = {}
|
||||
|
||||
def __init__(self, log_level=None):
|
||||
self.defined_at = debug.defined_at()
|
||||
if log_level is None and not hasattr(self, 'log_level'):
|
||||
self.log_level = debug.LOG_LEVEL
|
||||
|
||||
def enabled(self, context):
|
||||
"""
|
||||
Disable rule.
|
||||
|
||||
:param context:
|
||||
:type context:
|
||||
:return: True if rule is enabled, False if disabled
|
||||
:rtype: bool
|
||||
"""
|
||||
return True
|
||||
|
||||
def __lt__(self, other):
|
||||
return self.priority > other.priority
|
||||
|
||||
def __repr__(self):
|
||||
defined = ""
|
||||
if self.defined_at:
|
||||
defined = "@%s" % (self.defined_at,)
|
||||
return "<%s%s>" % (self.name if self.name else self.__class__.__name__, defined)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.__class__ == other.__class__
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.__class__)
|
||||
|
||||
|
||||
class Rule(CustomRule):
|
||||
"""
|
||||
Definition of a rule to apply
|
||||
"""
|
||||
# pylint:disable=abstract-method
|
||||
consequence = None
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
assert self.consequence
|
||||
if is_iterable(self.consequence):
|
||||
if not is_iterable(when_response):
|
||||
when_response = [when_response]
|
||||
iterator = iter(when_response)
|
||||
for cons in self.consequence: #pylint: disable=not-an-iterable
|
||||
if inspect.isclass(cons):
|
||||
cons = cons()
|
||||
cons.then(matches, next(iterator), context)
|
||||
else:
|
||||
cons = self.consequence
|
||||
if inspect.isclass(cons):
|
||||
cons = cons() # pylint:disable=not-callable
|
||||
cons.then(matches, when_response, context)
|
||||
|
||||
|
||||
class RemoveMatch(Consequence): # pylint: disable=abstract-method
|
||||
"""
|
||||
Remove matches returned by then
|
||||
"""
|
||||
def then(self, matches, when_response, context):
|
||||
if is_iterable(when_response):
|
||||
ret = []
|
||||
when_response = list(when_response)
|
||||
for match in when_response:
|
||||
if match in matches:
|
||||
matches.remove(match)
|
||||
ret.append(match)
|
||||
return ret
|
||||
if when_response in matches:
|
||||
matches.remove(when_response)
|
||||
return when_response
|
||||
|
||||
|
||||
class AppendMatch(Consequence): # pylint: disable=abstract-method
|
||||
"""
|
||||
Append matches returned by then
|
||||
"""
|
||||
def __init__(self, match_name=None):
|
||||
self.match_name = match_name
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
if is_iterable(when_response):
|
||||
ret = []
|
||||
when_response = list(when_response)
|
||||
for match in when_response:
|
||||
if match not in matches:
|
||||
if self.match_name:
|
||||
match.name = self.match_name
|
||||
matches.append(match)
|
||||
ret.append(match)
|
||||
return ret
|
||||
if self.match_name:
|
||||
when_response.name = self.match_name
|
||||
if when_response not in matches:
|
||||
matches.append(when_response)
|
||||
return when_response
|
||||
|
||||
|
||||
class RenameMatch(Consequence): # pylint: disable=abstract-method
|
||||
"""
|
||||
Rename matches returned by then
|
||||
"""
|
||||
def __init__(self, match_name):
|
||||
self.match_name = match_name
|
||||
self.remove = RemoveMatch()
|
||||
self.append = AppendMatch()
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
removed = self.remove.then(matches, when_response, context)
|
||||
if is_iterable(removed):
|
||||
removed = list(removed)
|
||||
for match in removed:
|
||||
match.name = self.match_name
|
||||
elif removed:
|
||||
removed.name = self.match_name
|
||||
if removed:
|
||||
self.append.then(matches, removed, context)
|
||||
|
||||
|
||||
class AppendTags(Consequence): # pylint: disable=abstract-method
|
||||
"""
|
||||
Add tags to returned matches
|
||||
"""
|
||||
def __init__(self, tags):
|
||||
self.tags = tags
|
||||
self.remove = RemoveMatch()
|
||||
self.append = AppendMatch()
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
removed = self.remove.then(matches, when_response, context)
|
||||
if is_iterable(removed):
|
||||
removed = list(removed)
|
||||
for match in removed:
|
||||
match.tags.extend(self.tags)
|
||||
elif removed:
|
||||
removed.tags.extend(self.tags) # pylint: disable=no-member
|
||||
if removed:
|
||||
self.append.then(matches, removed, context)
|
||||
|
||||
|
||||
class RemoveTags(Consequence): # pylint: disable=abstract-method
|
||||
"""
|
||||
Remove tags from returned matches
|
||||
"""
|
||||
def __init__(self, tags):
|
||||
self.tags = tags
|
||||
self.remove = RemoveMatch()
|
||||
self.append = AppendMatch()
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
removed = self.remove.then(matches, when_response, context)
|
||||
if is_iterable(removed):
|
||||
removed = list(removed)
|
||||
for match in removed:
|
||||
for tag in self.tags:
|
||||
if tag in match.tags:
|
||||
match.tags.remove(tag)
|
||||
elif removed:
|
||||
for tag in self.tags:
|
||||
if tag in removed.tags: # pylint: disable=no-member
|
||||
removed.tags.remove(tag) # pylint: disable=no-member
|
||||
if removed:
|
||||
self.append.then(matches, removed, context)
|
||||
|
||||
|
||||
class Rules(list):
|
||||
"""
|
||||
list of rules ready to execute.
|
||||
"""
|
||||
|
||||
def __init__(self, *rules):
|
||||
super(Rules, self).__init__()
|
||||
self.load(*rules)
|
||||
|
||||
def load(self, *rules):
|
||||
"""
|
||||
Load rules from a Rule module, class or instance
|
||||
|
||||
:param rules:
|
||||
:type rules:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for rule in rules:
|
||||
if inspect.ismodule(rule):
|
||||
self.load_module(rule)
|
||||
elif inspect.isclass(rule):
|
||||
self.load_class(rule)
|
||||
else:
|
||||
self.append(rule)
|
||||
|
||||
def load_module(self, module):
|
||||
"""
|
||||
Load a rules module
|
||||
|
||||
:param module:
|
||||
:type module:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
# pylint: disable=unused-variable
|
||||
for name, obj in inspect.getmembers(module,
|
||||
lambda member: hasattr(member, '__module__')
|
||||
and member.__module__ == module.__name__
|
||||
and inspect.isclass):
|
||||
self.load_class(obj)
|
||||
|
||||
def load_class(self, class_):
|
||||
"""
|
||||
Load a Rule class.
|
||||
|
||||
:param class_:
|
||||
:type class_:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
self.append(class_())
|
||||
|
||||
def execute_all_rules(self, matches, context):
|
||||
"""
|
||||
Execute all rules from this rules list. All when condition with same priority will be performed before
|
||||
calling then actions.
|
||||
|
||||
:param matches:
|
||||
:type matches:
|
||||
:param context:
|
||||
:type context:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
ret = []
|
||||
for priority, priority_rules in groupby(sorted(self), lambda rule: rule.priority):
|
||||
sorted_rules = toposort_rules(list(priority_rules)) # Group by dependency graph toposort
|
||||
for rules_group in sorted_rules:
|
||||
rules_group = list(sorted(rules_group, key=self.index)) # Sort rules group based on initial ordering.
|
||||
group_log_level = None
|
||||
for rule in rules_group:
|
||||
if group_log_level is None or group_log_level < rule.log_level:
|
||||
group_log_level = rule.log_level
|
||||
log(group_log_level, "%s independent rule(s) at priority %s.", len(rules_group), priority)
|
||||
for rule in rules_group:
|
||||
when_response = execute_rule(rule, matches, context)
|
||||
if when_response is not None:
|
||||
ret.append((rule, when_response))
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def execute_rule(rule, matches, context):
|
||||
"""
|
||||
Execute the given rule.
|
||||
:param rule:
|
||||
:type rule:
|
||||
:param matches:
|
||||
:type matches:
|
||||
:param context:
|
||||
:type context:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if rule.enabled(context):
|
||||
log(rule.log_level, "Checking rule condition: %s", rule)
|
||||
when_response = rule.when(matches, context)
|
||||
if when_response:
|
||||
log(rule.log_level, "Rule was triggered: %s", when_response)
|
||||
log(rule.log_level, "Running rule consequence: %s %s", rule, when_response)
|
||||
rule.then(matches, when_response, context)
|
||||
return when_response
|
||||
else:
|
||||
log(rule.log_level, "Rule is disabled: %s", rule)
|
||||
|
||||
def toposort_rules(rules):
|
||||
"""
|
||||
Sort given rules using toposort with dependency parameter.
|
||||
:param rules:
|
||||
:type rules:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
graph = {}
|
||||
class_dict = {}
|
||||
for rule in rules:
|
||||
if rule.__class__ in class_dict:
|
||||
raise ValueError("Duplicate class rules are not allowed: %s" % rule.__class__)
|
||||
class_dict[rule.__class__] = rule
|
||||
for rule in rules:
|
||||
if not is_iterable(rule.dependency) and rule.dependency:
|
||||
rule_dependencies = [rule.dependency]
|
||||
else:
|
||||
rule_dependencies = rule.dependency
|
||||
dependencies = set()
|
||||
if rule_dependencies:
|
||||
for dependency in rule_dependencies:
|
||||
if inspect.isclass(dependency):
|
||||
dependency = class_dict.get(dependency)
|
||||
if dependency:
|
||||
dependencies.add(dependency)
|
||||
graph[rule] = dependencies
|
||||
return toposort(graph)
|
||||
84
lib/rebulk/toposort.py
Normal file
84
lib/rebulk/toposort.py
Normal file
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2014 True Blade Systems, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Original:
|
||||
# - https://bitbucket.org/ericvsmith/toposort (1.4)
|
||||
# Modifications:
|
||||
# - merged Pull request #2 for CyclicDependency error
|
||||
# - import reduce as original name
|
||||
# - support python 2.6 dict comprehension
|
||||
|
||||
# pylint: skip-file
|
||||
from functools import reduce
|
||||
|
||||
|
||||
class CyclicDependency(ValueError):
|
||||
def __init__(self, cyclic):
|
||||
s = 'Cyclic dependencies exist among these items: {0}'.format(', '.join(repr(x) for x in cyclic.items()))
|
||||
super(CyclicDependency, self).__init__(s)
|
||||
self.cyclic = cyclic
|
||||
|
||||
|
||||
def toposort(data):
|
||||
"""
|
||||
Dependencies are expressed as a dictionary whose keys are items
|
||||
and whose values are a set of dependent items. Output is a list of
|
||||
sets in topological order. The first set consists of items with no
|
||||
dependences, each subsequent set consists of items that depend upon
|
||||
items in the preceeding sets.
|
||||
:param data:
|
||||
:type data:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
# Special case empty input.
|
||||
if len(data) == 0:
|
||||
return
|
||||
|
||||
# Copy the input so as to leave it unmodified.
|
||||
data = data.copy()
|
||||
|
||||
# Ignore self dependencies.
|
||||
for k, v in data.items():
|
||||
v.discard(k)
|
||||
# Find all items that don't depend on anything.
|
||||
extra_items_in_deps = reduce(set.union, data.values()) - set(data.keys())
|
||||
# Add empty dependences where needed.
|
||||
data.update(dict((item, set()) for item in extra_items_in_deps))
|
||||
while True:
|
||||
ordered = set(item for item, dep in data.items() if len(dep) == 0)
|
||||
if not ordered:
|
||||
break
|
||||
yield ordered
|
||||
data = dict((item, (dep - ordered))
|
||||
for item, dep in data.items()
|
||||
if item not in ordered)
|
||||
if len(data) != 0:
|
||||
raise CyclicDependency(data)
|
||||
|
||||
|
||||
def toposort_flatten(data, sort=True):
|
||||
"""
|
||||
Returns a single list of dependencies. For any set returned by
|
||||
toposort(), those items are sorted and appended to the result (just to
|
||||
make the results deterministic).
|
||||
:param data:
|
||||
:type data:
|
||||
:param sort:
|
||||
:type sort:
|
||||
:return: Single list of dependencies.
|
||||
:rtype: list
|
||||
"""
|
||||
|
||||
result = []
|
||||
for d in toposort(data):
|
||||
result.extend((sorted if sort else list)(d))
|
||||
return result
|
||||
156
lib/rebulk/utils.py
Normal file
156
lib/rebulk/utils.py
Normal file
@@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Various utilities functions
|
||||
"""
|
||||
try:
|
||||
from collections.abc import MutableSet
|
||||
except ImportError:
|
||||
from collections import MutableSet
|
||||
|
||||
from types import GeneratorType
|
||||
|
||||
|
||||
def find_all(string, sub, start=None, end=None, ignore_case=False, **kwargs):
|
||||
"""
|
||||
Return all indices in string s where substring sub is
|
||||
found, such that sub is contained in the slice s[start:end].
|
||||
|
||||
>>> list(find_all('The quick brown fox jumps over the lazy dog', 'fox'))
|
||||
[16]
|
||||
|
||||
>>> list(find_all('The quick brown fox jumps over the lazy dog', 'mountain'))
|
||||
[]
|
||||
|
||||
>>> list(find_all('The quick brown fox jumps over the lazy dog', 'The'))
|
||||
[0]
|
||||
|
||||
>>> list(find_all(
|
||||
... 'Carved symbols in a mountain hollow on the bank of an inlet irritated an eccentric person',
|
||||
... 'an'))
|
||||
[44, 51, 70]
|
||||
|
||||
>>> list(find_all(
|
||||
... 'Carved symbols in a mountain hollow on the bank of an inlet irritated an eccentric person',
|
||||
... 'an',
|
||||
... 50,
|
||||
... 60))
|
||||
[51]
|
||||
|
||||
:param string: the input string
|
||||
:type string: str
|
||||
:param sub: the substring
|
||||
:type sub: str
|
||||
:return: all indices in the input string
|
||||
:rtype: __generator[str]
|
||||
"""
|
||||
#pylint: disable=unused-argument
|
||||
if ignore_case:
|
||||
sub = sub.lower()
|
||||
string = string.lower()
|
||||
while True:
|
||||
start = string.find(sub, start, end)
|
||||
if start == -1:
|
||||
return
|
||||
yield start
|
||||
start += len(sub)
|
||||
|
||||
|
||||
def get_first_defined(data, keys, default_value=None):
|
||||
"""
|
||||
Get the first defined key in data.
|
||||
:param data:
|
||||
:type data:
|
||||
:param keys:
|
||||
:type keys:
|
||||
:param default_value:
|
||||
:type default_value:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for key in keys:
|
||||
if key in data:
|
||||
return data[key]
|
||||
return default_value
|
||||
|
||||
|
||||
def is_iterable(obj):
|
||||
"""
|
||||
Are we being asked to look up a list of things, instead of a single thing?
|
||||
We check for the `__iter__` attribute so that this can cover types that
|
||||
don't have to be known by this module, such as NumPy arrays.
|
||||
|
||||
Strings, however, should be considered as atomic values to look up, not
|
||||
iterables.
|
||||
|
||||
We don't need to check for the Python 2 `unicode` type, because it doesn't
|
||||
have an `__iter__` attribute anyway.
|
||||
"""
|
||||
# pylint: disable=consider-using-ternary
|
||||
return hasattr(obj, '__iter__') and not isinstance(obj, str) or isinstance(obj, GeneratorType)
|
||||
|
||||
|
||||
def extend_safe(target, source):
|
||||
"""
|
||||
Extends source list to target list only if elements doesn't exists in target list.
|
||||
:param target:
|
||||
:type target: list
|
||||
:param source:
|
||||
:type source: list
|
||||
"""
|
||||
for elt in source:
|
||||
if elt not in target:
|
||||
target.append(elt)
|
||||
|
||||
|
||||
class _Ref(object):
|
||||
"""
|
||||
Reference for IdentitySet
|
||||
"""
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.value is other.value
|
||||
|
||||
def __hash__(self):
|
||||
return id(self.value)
|
||||
|
||||
|
||||
class IdentitySet(MutableSet): # pragma: no cover
|
||||
"""
|
||||
Set based on identity
|
||||
"""
|
||||
def __init__(self, items=None): # pylint: disable=super-init-not-called
|
||||
if items is None:
|
||||
items = []
|
||||
self.refs = set(map(_Ref, items))
|
||||
|
||||
def __contains__(self, elem):
|
||||
return _Ref(elem) in self.refs
|
||||
|
||||
def __iter__(self):
|
||||
return (ref.value for ref in self.refs)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.refs)
|
||||
|
||||
def add(self, value):
|
||||
self.refs.add(_Ref(value))
|
||||
|
||||
def discard(self, value):
|
||||
self.refs.discard(_Ref(value))
|
||||
|
||||
def update(self, iterable):
|
||||
"""
|
||||
Update set with iterable
|
||||
:param iterable:
|
||||
:type iterable:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for elem in iterable:
|
||||
self.add(elem)
|
||||
|
||||
def __repr__(self): # pragma: no cover
|
||||
return "%s(%s)" % (type(self).__name__, list(self))
|
||||
81
lib/rebulk/validators.py
Normal file
81
lib/rebulk/validators.py
Normal file
@@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Validator functions to use in patterns.
|
||||
|
||||
All those function have last argument as match, so it's possible to use functools.partial to bind previous arguments.
|
||||
"""
|
||||
|
||||
|
||||
def chars_before(chars, match):
|
||||
"""
|
||||
Validate the match if left character is in a given sequence.
|
||||
|
||||
:param chars:
|
||||
:type chars:
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if match.start <= 0:
|
||||
return True
|
||||
return match.input_string[match.start - 1] in chars
|
||||
|
||||
|
||||
def chars_after(chars, match):
|
||||
"""
|
||||
Validate the match if right character is in a given sequence.
|
||||
|
||||
:param chars:
|
||||
:type chars:
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if match.end >= len(match.input_string):
|
||||
return True
|
||||
return match.input_string[match.end] in chars
|
||||
|
||||
|
||||
def chars_surround(chars, match):
|
||||
"""
|
||||
Validate the match if surrounding characters are in a given sequence.
|
||||
|
||||
:param chars:
|
||||
:type chars:
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return chars_before(chars, match) and chars_after(chars, match)
|
||||
|
||||
|
||||
def validators(*chained_validators):
|
||||
"""
|
||||
Creates a validator chain from several validator functions.
|
||||
|
||||
:param chained_validators:
|
||||
:type chained_validators:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
def validator_chain(match): # pylint:disable=missing-docstring
|
||||
for chained_validator in chained_validators:
|
||||
if not chained_validator(match):
|
||||
return False
|
||||
return True
|
||||
|
||||
return validator_chain
|
||||
|
||||
|
||||
def allways_true(match): # pylint:disable=unused-argument
|
||||
"""
|
||||
A validator which is allways true
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
return True
|
||||
Reference in New Issue
Block a user