from typing import Any, List, Optional, Union
from easytxt import parse_string, text
from pyquery import PyQuery
from easydata.parsers.base import BaseData
__all__ = (
"Text",
"Str",
)
[docs]class Text(BaseData):
def __init__(
self,
*args,
normalize: bool = True,
capitalize: bool = False,
title: bool = False,
uppercase: bool = False,
lowercase: bool = False,
replace_keys: Optional[list] = None,
remove_keys: Optional[list] = None,
split_key: Optional[Union[str, tuple]] = None,
split_keys: Optional[List[Union[str, tuple]]] = None,
take: Optional[int] = None,
skip: Optional[int] = None,
text_num_to_numeric: bool = False,
language: Optional[str] = None,
fix_spaces: bool = True,
escape_new_lines: bool = True,
new_line_replacement: str = " ",
add_stop: Optional[Union[bool, str]] = None,
separator: str = " ",
index: Optional[int] = None,
strip: bool = False,
**kwargs,
):
self._normalize = normalize
self._capitalize = capitalize
self._title = title
self._uppercase = uppercase
self._lowercase = lowercase
self._replace_keys = replace_keys
self._remove_keys = remove_keys
self._split_key = split_key
self._split_keys = split_keys
self._take = take
self._skip = skip
self._text_num_to_numeric = text_num_to_numeric
self._fix_spaces = fix_spaces
self._escape_new_lines = escape_new_lines
self._new_line_replacement = new_line_replacement
self._add_stop = add_stop
self._separator = separator
self._index = index
self._strip = strip
self.__language = language
super().__init__(
*args,
**kwargs,
)
@property
def _language(self):
return self.__language or self.config["ED_LANGUAGE"]
def parse_value(
self,
value: Any,
data: Any,
):
value = super(Text, self).parse_value(
value=value,
data=data,
)
if not isinstance(value, PyQuery) and isinstance(value, (list, tuple)):
value = [text.to_str(v) for v in value if v is not None]
if not value:
return None
if self._index is not None:
value = value[self._index]
else:
value = self._separator.join(value)
if value is None:
return value
if isinstance(value, str) and self._strip:
value = value.strip()
value = parse_string(
raw_text=value,
normalize=self._normalize,
title=self._title,
capitalize=self._capitalize,
uppercase=self._uppercase,
lowercase=self._lowercase,
replace_keys=self._replace_keys,
remove_keys=self._remove_keys,
split_key=self._split_key,
split_keys=self._split_keys,
take=self._take,
skip=self._skip,
text_num_to_numeric=self._text_num_to_numeric,
language=self._language,
fix_spaces=self._fix_spaces,
escape_new_lines=self._escape_new_lines,
new_line_replacement=self._new_line_replacement,
add_stop=self._add_stop,
)
return value if value else None
[docs]class Str(Text):
def __init__(
self,
*args,
normalize: bool = False,
escape_new_lines: bool = False,
**kwargs,
):
kwargs["normalize"] = normalize
kwargs["escape_new_lines"] = escape_new_lines
super().__init__(*args, **kwargs)