Character classes

See:

`Anchor`

Bases: RegexComponent

Static constants defining useful anchors.

Source code in src/pyregexbuilder/character_classes.py

class Anchor(RegexComponent):
    """
    Static constants defining useful anchors.
    """

    START_OF_STRING = r"/^/"
    END_OF_STRING = r"/$/"
    WORD_BOUNDARY = r"/\b/"

`Character`

Bases: RegexComponent

Static constants defining useful characters.

Source code in src/pyregexbuilder/character_classes.py

class Character(RegexComponent):
    """
    Static constants defining useful characters.
    """

    ANY = r"/./"
    DIGIT = r"/\d/"
    NOT_DIGIT = r"/\D/"
    WHITESPACE = r"/\s/"
    NOT_WHITESPACE = r"/\S/"
    WORD = r"/\w/"
    NOT_WORD = r"/\W/"
    GRAPHEME = r"/\X/"

`CharacterClass`

Bases: SupportsBracketExpression

Creates a general character class.

Source code in src/pyregexbuilder/character_classes.py

class CharacterClass(SupportsBracketExpression):
    """
    Creates a general character class.
    """

    def __init__(self, *character_set: "str | SupportsBracketExpression") -> None:
        str_args = filter(lambda s: isinstance(s, str), character_set)
        if any(not re.match(r"^/\[.*\]/$", arg) for arg in str_args):
            raise RegexBuilderException(
                "Strings passed to `CharacterClass` must be regex literals "
                "that create character classes (e.g., '/[A-Z]/'). "
                "To create a character class that matches A, -, and Z, "
                "use `CharacterClass.any_of('A-Z')`."
            )

        char_sets = [self.parse(component) for component in character_set]

        self._regex = rf"[{'||'.join(char_sets)}]"

    @staticmethod
    def any_of(character_sequence: Sequence) -> "CharacterClass":
        """
        Returns a character class that matches any of the characters in a sequence.
        """
        return CharacterClass(rf"/[{re.escape(''.join(character_sequence))}]/")

`any_of(character_sequence)` `staticmethod`

Returns a character class that matches any of the characters in a sequence.

Source code in src/pyregexbuilder/character_classes.py

@staticmethod
def any_of(character_sequence: Sequence) -> "CharacterClass":
    """
    Returns a character class that matches any of the characters in a sequence.
    """
    return CharacterClass(rf"/[{re.escape(''.join(character_sequence))}]/")

`NamedCharacter`

Bases: RegexComponent

Creates a regex component that matches a named character.

Regex: \N{...}

Source code in src/pyregexbuilder/character_classes.py

class NamedCharacter(RegexComponent):
    r"""
    Creates a regex component that matches a named character.

    Regex: `\N{...}`
    """

    def __init__(self, name: str) -> None:
        self._regex = rf"\N{{{name}}}"

`PosixClass`

Bases: SupportsBracketExpression

Creates a POSIX character class.

Regex: [[:...:]]

Source code in src/pyregexbuilder/character_classes.py

class PosixClass(SupportsBracketExpression):
    """
    Creates a POSIX character class.

    Regex: `[[:...:]]`
    """

    def __init__(self, posix_class: str) -> None:
        self._regex = rf"[[:{posix_class}:]]"

    def _get_regex_complement(self) -> str:
        return re.sub(
            r"(?<=^\[\[:)(?|\^|)", lambda m: "^" if m.group() == "" else "", self._regex
        )

`SupportsBracketExpression`

Bases: RegexComponent, Protocol

A protocol for classes that support bracket expressions.

Source code in src/pyregexbuilder/character_classes.py

class SupportsBracketExpression(RegexComponent, Protocol):
    """
    A protocol for classes that support bracket expressions.
    """

    def _get_regex_complement(self) -> str:
        return re.sub(
            r"(?<=^\[)(?|\^|)", lambda m: "^" if m.group() == "" else "", self._regex
        )

    @property
    def inverted(self) -> "SupportsBracketExpression":
        """
        A class that matches any character that does NOT match this character class.
        """
        inverted_regex = self._get_regex_complement()
        return CharacterClass(rf"/{inverted_regex}/")

    def intersection(
        self, other: "SupportsBracketExpression"
    ) -> "SupportsBracketExpression":
        """
        Returns a class that is the intersection of `self` and `other`.
        """
        return CharacterClass(rf"/[{self.regex}&&{other.regex}]/")

    def subtracting(
        self, other: "SupportsBracketExpression"
    ) -> "SupportsBracketExpression":
        """
        Returns a class that is the result of subtracting `other` from `self`.
        """
        return CharacterClass(rf"/[{self.regex}--{other.regex}]/")

    def symmetric_difference(
        self, other: "SupportsBracketExpression"
    ) -> "SupportsBracketExpression":
        """
        Returns a class that is the symmetric difference of `self` and `other`.
        """
        return CharacterClass(rf"/[{self.regex}~~{other.regex}]/")

    def union(self, other: "SupportsBracketExpression") -> "SupportsBracketExpression":
        """
        Returns a class that is the union of `self` and `other`.
        """
        return CharacterClass(rf"/[{self.regex}||{other.regex}]/")

`inverted` `property`

A class that matches any character that does NOT match this character class.

`intersection(other)`

Returns a class that is the intersection of self and other.

Source code in src/pyregexbuilder/character_classes.py

def intersection(
    self, other: "SupportsBracketExpression"
) -> "SupportsBracketExpression":
    """
    Returns a class that is the intersection of `self` and `other`.
    """
    return CharacterClass(rf"/[{self.regex}&&{other.regex}]/")

`subtracting(other)`

Returns a class that is the result of subtracting other from self.

Source code in src/pyregexbuilder/character_classes.py

def subtracting(
    self, other: "SupportsBracketExpression"
) -> "SupportsBracketExpression":
    """
    Returns a class that is the result of subtracting `other` from `self`.
    """
    return CharacterClass(rf"/[{self.regex}--{other.regex}]/")

`symmetric_difference(other)`

Returns a class that is the symmetric difference of self and other.

Source code in src/pyregexbuilder/character_classes.py

def symmetric_difference(
    self, other: "SupportsBracketExpression"
) -> "SupportsBracketExpression":
    """
    Returns a class that is the symmetric difference of `self` and `other`.
    """
    return CharacterClass(rf"/[{self.regex}~~{other.regex}]/")

`union(other)`

Returns a class that is the union of self and other.

Source code in src/pyregexbuilder/character_classes.py

def union(self, other: "SupportsBracketExpression") -> "SupportsBracketExpression":
    """
    Returns a class that is the union of `self` and `other`.
    """
    return CharacterClass(rf"/[{self.regex}||{other.regex}]/")

`UnicodeClass`

Bases: SupportsBracketExpression

Creates a Unicode character class.

Regex: \p{...}

Source code in src/pyregexbuilder/character_classes.py

class UnicodeClass(SupportsBracketExpression):
    r"""
    Creates a Unicode character class.

    Regex: `\p{...}`
    """

    @overload
    def __init__(self, *, key: str, value: str) -> None: ...

    @overload
    def __init__(self, value: str) -> None: ...

    def __init__(self, *args, **kwargs) -> None:
        if len(args) == 1:
            self._regex = rf"\p{{{args[0]}}}"
        elif len(set(["key", "value"]).intersection(kwargs.keys())) == 2:
            self._regex = rf"\p{{{kwargs["key"]}={kwargs["value"]}}}"
        else:
            raise RegexBuilderException()

    def _get_regex_complement(self) -> str:
        return re.sub(
            r"(?<=^\\)[pP]", lambda m: "P" if m.group() == "p" else "p", self._regex
        )

Character classes

Anchor

Character

CharacterClass

any_of(character_sequence) staticmethod

NamedCharacter

PosixClass

SupportsBracketExpression

inverted property

intersection(other)

subtracting(other)

symmetric_difference(other)

union(other)

UnicodeClass

`Anchor`

`Character`

`CharacterClass`

`any_of(character_sequence)` `staticmethod`

`NamedCharacter`

`PosixClass`

`SupportsBracketExpression`

`inverted` `property`

`intersection(other)`

`subtracting(other)`

`symmetric_difference(other)`

`union(other)`

`UnicodeClass`