from functools import cached_property
from typing import Any, Callable
from typing import List as ListType
from typing import Optional, Union
from easytxt import sentences
from easydata.parsers.base import Base, BaseData
from easydata.parsers.data import Data
from easydata.parsers.text import Text
from easydata.parsers.url import Url
from easydata.queries.base import QuerySearchBase
from easydata.utils import email, mix
__all__ = (
"List",
"TextList",
"UrlList",
"EmailSearchList",
)
[docs]class List(BaseData):
def __init__(
self,
query: Optional[QuerySearchBase] = None,
parser: Optional[Base] = None,
unique: bool = True,
max_num: Optional[int] = None,
split_key: Optional[Union[ListType[str], str]] = None,
allow_parser: Optional[Base] = None,
deny_parser: Optional[Base] = None,
preprocess_allow: Optional[Callable] = None,
process_allow: Optional[Callable] = None,
**kwargs,
):
kwargs["query"] = query
self._unique = unique
self._max_num = max_num
self._split_key = split_key
self._preprocess_allow = preprocess_allow
self._process_allow = process_allow
self.__parser = parser
self.__parser_obj = None
self.__allow_parser = allow_parser
self.__deny_parser = deny_parser
super().__init__(**kwargs)
@cached_property
def _parser(self):
# Initialize and validate value parser
value_parser = self.__parser or self._default_parser_obj
mix.validate_parser(value_parser)
return value_parser.init_config(self.config)
@cached_property
def _allow_parser(self):
# Initialize and validate allow parser
mix.validate_parser(self.__allow_parser)
return self.__allow_parser.init_config(self.config)
@cached_property
def _deny_parser(self):
# Initialize and validate deny parser
mix.validate_parser(self.__deny_parser)
return self.__deny_parser.init_config(self.config)
@property
def _default_parser_obj(self):
return Data()
def parse_value(self, value: Any, data: Any) -> list:
if value is None:
return []
list_values = self._preprocess_list_values(value)
if self._preprocess_allow:
list_values = mix.filter_list_by_bool_callable(
list_values=list_values,
data=data,
callable_param=self._preprocess_allow,
)
parsed_list_values = []
for lv in list_values:
if self.__allow_parser and not self._allow_parser.parse(data, lv, True):
# Filter out any unwanted list values
continue
if self.__deny_parser and self._deny_parser.parse(data, lv, True):
# Filter out any unwanted list values
continue
parsed_list_values.append(self._parser.parse(data, lv, True)) # type: ignore
processed_list_values = self._process_list_values(parsed_list_values)
filtered_list_values = self._filter_list_values(processed_list_values)
if self._process_allow:
filtered_list_values = mix.filter_list_by_bool_callable(
list_values=filtered_list_values,
data=data,
callable_param=self._process_allow,
)
return filtered_list_values
def _preprocess_list_values(self, list_values: Any) -> ListType[Any]:
if isinstance(list_values, (dict, str, int, float)):
list_values = [list_values]
if self._split_key:
list_values = mix.multiply_list_values_by_split_key(
list_values=list_values,
split_key=self._split_key,
)
return list_values
def _process_list_values(self, list_values: Any) -> ListType[Any]:
return list_values
def _filter_list_values(self, list_values: ListType[str]) -> ListType[Any]:
if self._unique and list_values:
list_values = mix.unique_list(list_values)
if self._max_num and len(list_values) >= self._max_num:
return list_values[0 : self._max_num]
return list_values
[docs]class TextList(List):
def __init__(
self,
*args,
normalize: bool = True,
capitalize: bool = False,
title: bool = False,
uppercase: bool = False,
lowercase: bool = False,
replace_keys: Optional[list] = None,
remove_keys: Optional[list] = None,
split_text_key: Optional[Union[str, tuple]] = None,
split_text_keys: Optional[ListType[Union[str, tuple]]] = None,
take: Optional[int] = None,
skip: Optional[int] = None,
text_num_to_numeric: bool = False,
language: Optional[str] = None,
fix_spaces: bool = True,
escape_new_lines: bool = True,
new_line_replacement: str = " ",
add_stop: Optional[Union[bool, str]] = None,
allow: Optional[Union[str, ListType[str]]] = None,
callow: Optional[Union[str, ListType[str]]] = None,
from_allow: Optional[Union[str, ListType[str]]] = None,
from_callow: Optional[Union[str, ListType[str]]] = None,
to_allow: Optional[Union[str, ListType[str]]] = None,
to_callow: Optional[Union[str, ListType[str]]] = None,
deny: Optional[Union[str, ListType[str]]] = None,
cdeny: Optional[Union[str, ListType[str]]] = None,
multiply_keys: Optional[Union[list, tuple]] = None,
**kwargs,
):
self._text_parser_properties = {
"normalize": normalize,
"capitalize": capitalize,
"title": title,
"uppercase": uppercase,
"lowercase": lowercase,
"replace_keys": replace_keys,
"remove_keys": remove_keys,
"split_key": split_text_key,
"split_keys": split_text_keys,
"take": take,
"skip": skip,
"text_num_to_numeric": text_num_to_numeric,
"language": language,
"fix_spaces": fix_spaces,
"escape_new_lines": escape_new_lines,
"new_line_replacement": new_line_replacement,
"add_stop": add_stop,
}
self._allow = allow
self._callow = callow
self._from_allow = from_allow
self._from_callow = from_callow
self._to_allow = to_allow
self._to_callow = to_callow
self._deny = deny
self._cdeny = cdeny
self._multiply_keys = multiply_keys
super().__init__(
*args,
**kwargs,
)
@cached_property
def _default_parser_obj(self):
return Text(**self._text_parser_properties)
def _process_list_values(
self,
list_values: Any,
) -> ListType[Any]:
if self._multiply_keys:
list_values = mix.multiply_list_values(
list_values=list_values,
multiply_keys=self._multiply_keys,
)
return list_values
def _filter_list_values(
self,
list_values: ListType[str],
) -> ListType[str]:
allow_keys = self._callow or self._allow
if allow_keys:
list_values = sentences.allow_contains(
sentences=list_values,
keys=allow_keys,
case_sensitive=bool(self._callow),
)
from_allow_keys = self._from_allow or self._from_callow
if from_allow_keys:
list_values = sentences.from_allow_contains(
sentences=list_values,
keys=from_allow_keys,
case_sensitive=bool(self._from_callow),
)
to_allow_keys = self._to_allow or self._to_callow
if to_allow_keys:
list_values = sentences.to_allow_contains(
sentences=list_values,
keys=to_allow_keys,
case_sensitive=bool(self._to_callow),
)
deny_keys = self._cdeny or self._deny
if deny_keys:
list_values = sentences.deny_contains(
sentences=list_values,
keys=deny_keys,
case_sensitive=bool(self._cdeny),
)
return super(TextList, self)._filter_list_values(list_values)
[docs]class UrlList(TextList):
def __init__(
self,
*args,
from_text: bool = False,
remove_qs: Optional[Union[str, list, bool]] = None,
qs: Optional[dict] = None,
domain: Optional[str] = None,
protocol: Optional[str] = None,
**kwargs,
):
self._url_parser_properties = {
"from_text": from_text,
"remove_qs": remove_qs,
"qs": qs,
"domain": domain,
"protocol": protocol,
}
super().__init__(
*args,
**kwargs,
)
@cached_property
def _default_parser_obj(self):
return Url(
**self._url_parser_properties,
**self._text_parser_properties,
)
[docs]class EmailSearchList(TextList):
def _process_list_values(
self,
list_values: Any,
) -> ListType[Any]:
list_values = super()._process_list_values(list_values)
email_list_values = []
for list_value in list_values:
for email_value in email.search(list_value):
email_list_values.append(email_value)
return email_list_values