This commit is contained in:
marco
2020-05-01 18:03:37 +02:00
parent 1152befcf1
commit e60525fee1
95 changed files with 306 additions and 21444 deletions
-34
View File
@@ -1,34 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Words utils
"""
from collections import namedtuple
from . import seps
_Word = namedtuple('_Word', ['span', 'value'])
def iter_words(string):
"""
Iterate on all words in a string
:param string:
:type string:
:return:
:rtype: iterable[str]
"""
i = 0
last_sep_index = -1
inside_word = False
for char in string:
if ord(char) < 128 and char in seps: # Make sure we don't exclude unicode characters.
if inside_word:
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
inside_word = False
last_sep_index = i
else:
inside_word = True
i += 1
if inside_word:
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])