aisquared.config.preprocessing.text package

Submodules

aisquared.config.preprocessing.text.Steps module

class aisquared.config.preprocessing.text.Steps.ConvertToCase(lowercase: bool = True)[source]

Bases: BaseObject

Text preprocessing object to convert inputs to all lowercase or all uppercase

Example usage:

>>> import aisquared
>>> preprocesser = aisquared.config.preprocessing.text.TextPreprocesser()
>>> preprocesser.add_step(
    aisquared.config.preprocessing.text.ConvertToCase()
)
property lowercase
to_dict() dict[source]

Get the configuration object as a dictionary

class aisquared.config.preprocessing.text.Steps.ConvertToVocabulary(vocabulary: dict, start_character: int = 1, oov_character: int = 2, max_vocab: int | None = None)[source]

Bases: BaseObject

Text preprocessing object to convert tokens to integer vocabularies

Example usage:

>>> import aisquared
>>> preprocesser = aisquared.config.preprocessing.text.TextPreprocesser()
>>> preprocesser.add_step(
    aisquared.config.preprocessing.text.ConvertToVocabulary(
        {
            'test' : 3,
            'vocabulary' : 4
        }
    )
)
property max_vocab
property oov_character
property start_character
to_dict() dict[source]

Get the configuration object as a dictionary

property vocabulary
class aisquared.config.preprocessing.text.Steps.PadSequences(pad_character: int = 0, length: int = 128, pad_location: str = 'post', truncate_location: str = 'post')[source]

Bases: BaseObject

Text preprocessing object to pad sequences

Example usage:

>>> import aisquared
>>> preprocesser = aisquared.config.preprocessing.text.TextPreprocesser()
>>> preprocesser.add_step(
    aisquared.config.preprocessing.text.PadSequences()
)
property length
property pad_character
property pad_location
to_dict() dict[source]

Get the configuration object as a dictionary

property truncate_location
class aisquared.config.preprocessing.text.Steps.RemoveCharacters(remove_digits: bool = True, remove_punctuation: bool = True)[source]

Bases: BaseObject

Preprocessing step to remove characters from text

Example usage:

>>> import aisquared
>>> preprocesser = aisquared.config.preprocessing.text.TextPreprocesser()
>>> preprocesser.add_step(
    aisquared.config.preprocessing.text.RemoveCharacters()
)
property remove_digits
property remove_punctuation
to_dict() dict[source]

Get the configuration object as a dictionary

class aisquared.config.preprocessing.text.Steps.Tokenize(split_sentences: bool = False, split_words: bool = True, token_pattern: str = '\x08\\w\\w+\x08')[source]

Bases: BaseObject

Preprocessing Step to tokenize text

Example usage:

>>> import aisquared
>>> preprocesser = aisquared.config.preprocessing.text.TextPreprocesser()
>>> preprocesser.add_step(
    aisquared.config.preprocessing.text.Tokenize()
)
property split_sentences
property split_words
to_dict() dict[source]

Get the configuration object as a dictionary

property token_pattern
class aisquared.config.preprocessing.text.Steps.Trim[source]

Bases: BaseObject

Text preprocessing class to trim whitespace from text

Example usage:

>>> import aisquared
>>> preprocesser = aisquared.config.preprocessing.text.TextPreprocesser()
>>> preprocesser.add_step(
    aisquared.config.preprocessing.text.Trim()
)
to_dict() dict[source]

Get the configuration object as a dictionary

aisquared.config.preprocessing.text.TextPreprocessing module

class aisquared.config.preprocessing.text.TextPreprocessing.TextPreprocesser(steps: list | None = None)[source]

Bases: BaseObject

Preprocesser object for natural language

Example usage:

>>> import aisquared
>>> preprocesser = aisquared.config.preprocessing.text.TextPreprocesser()
>>> preprocesser.add_step(
    aisquared.config.preprocessing.text.Tokenize()
)
add_step(step)[source]

Add a step to the preprocesser object

property step_dict
to_dict() dict[source]

Get the configuration object as a dictionary

Module contents

The aisquared.config.preprocessing.text subpackage contains objects for preprocessing text data.