Skip to content

Character classes

See:

Anchor

Bases: RegexComponent

Static constants defining useful anchors.

Source code in src/pyregexbuilder/character_classes.py
14
15
16
17
18
19
20
21
class Anchor(RegexComponent):
    """
    Static constants defining useful anchors.
    """

    START_OF_STRING = r"/^/"
    END_OF_STRING = r"/$/"
    WORD_BOUNDARY = r"/\b/"

Character

Bases: RegexComponent

Static constants defining useful characters.

Source code in src/pyregexbuilder/character_classes.py
24
25
26
27
28
29
30
31
32
33
34
35
36
class Character(RegexComponent):
    """
    Static constants defining useful characters.
    """

    ANY = r"/./"
    DIGIT = r"/\d/"
    NOT_DIGIT = r"/\D/"
    WHITESPACE = r"/\s/"
    NOT_WHITESPACE = r"/\S/"
    WORD = r"/\w/"
    NOT_WORD = r"/\W/"
    GRAPHEME = r"/\X/"

CharacterClass

Bases: SupportsBracketExpression

Creates a general character class.

Source code in src/pyregexbuilder/character_classes.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
class CharacterClass(SupportsBracketExpression):
    """
    Creates a general character class.
    """

    def __init__(self, *character_set: "str | SupportsBracketExpression") -> None:
        str_args = filter(lambda s: isinstance(s, str), character_set)
        if any(not re.match(r"^/\[.*\]/$", arg) for arg in str_args):
            raise RegexBuilderException(
                "Strings passed to `CharacterClass` must be regex literals "
                "that create character classes (e.g., '/[A-Z]/'). "
                "To create a character class that matches A, -, and Z, "
                "use `CharacterClass.any_of('A-Z')`."
            )

        char_sets = [self.parse(component) for component in character_set]

        self._regex = rf"[{'||'.join(char_sets)}]"

    @staticmethod
    def any_of(character_sequence: Sequence) -> "CharacterClass":
        """
        Returns a character class that matches any of the characters in a sequence.
        """
        return CharacterClass(rf"/[{re.escape(''.join(character_sequence))}]/")

any_of(character_sequence) staticmethod

Returns a character class that matches any of the characters in a sequence.

Source code in src/pyregexbuilder/character_classes.py
107
108
109
110
111
112
@staticmethod
def any_of(character_sequence: Sequence) -> "CharacterClass":
    """
    Returns a character class that matches any of the characters in a sequence.
    """
    return CharacterClass(rf"/[{re.escape(''.join(character_sequence))}]/")

NamedCharacter

Bases: RegexComponent

Creates a regex component that matches a named character.

Regex: \N{...}

Source code in src/pyregexbuilder/character_classes.py
158
159
160
161
162
163
164
165
166
class NamedCharacter(RegexComponent):
    r"""
    Creates a regex component that matches a named character.

    Regex: `\N{...}`
    """

    def __init__(self, name: str) -> None:
        self._regex = rf"\N{{{name}}}"

PosixClass

Bases: SupportsBracketExpression

Creates a POSIX character class.

Regex: [[:...:]]

Source code in src/pyregexbuilder/character_classes.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
class PosixClass(SupportsBracketExpression):
    """
    Creates a POSIX character class.

    Regex: `[[:...:]]`
    """

    def __init__(self, posix_class: str) -> None:
        self._regex = rf"[[:{posix_class}:]]"

    def _get_regex_complement(self) -> str:
        return re.sub(
            r"(?<=^\[\[:)(?|\^|)", lambda m: "^" if m.group() == "" else "", self._regex
        )

SupportsBracketExpression

Bases: RegexComponent, Protocol

A protocol for classes that support bracket expressions.

Source code in src/pyregexbuilder/character_classes.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
class SupportsBracketExpression(RegexComponent, Protocol):
    """
    A protocol for classes that support bracket expressions.
    """

    def _get_regex_complement(self) -> str:
        return re.sub(
            r"(?<=^\[)(?|\^|)", lambda m: "^" if m.group() == "" else "", self._regex
        )

    @property
    def inverted(self) -> "SupportsBracketExpression":
        """
        A class that matches any character that does NOT match this character class.
        """
        inverted_regex = self._get_regex_complement()
        return CharacterClass(rf"/{inverted_regex}/")

    def intersection(
        self, other: "SupportsBracketExpression"
    ) -> "SupportsBracketExpression":
        """
        Returns a class that is the intersection of `self` and `other`.
        """
        return CharacterClass(rf"/[{self.regex}&&{other.regex}]/")

    def subtracting(
        self, other: "SupportsBracketExpression"
    ) -> "SupportsBracketExpression":
        """
        Returns a class that is the result of subtracting `other` from `self`.
        """
        return CharacterClass(rf"/[{self.regex}--{other.regex}]/")

    def symmetric_difference(
        self, other: "SupportsBracketExpression"
    ) -> "SupportsBracketExpression":
        """
        Returns a class that is the symmetric difference of `self` and `other`.
        """
        return CharacterClass(rf"/[{self.regex}~~{other.regex}]/")

    def union(self, other: "SupportsBracketExpression") -> "SupportsBracketExpression":
        """
        Returns a class that is the union of `self` and `other`.
        """
        return CharacterClass(rf"/[{self.regex}||{other.regex}]/")

inverted property

A class that matches any character that does NOT match this character class.

intersection(other)

Returns a class that is the intersection of self and other.

Source code in src/pyregexbuilder/character_classes.py
57
58
59
60
61
62
63
def intersection(
    self, other: "SupportsBracketExpression"
) -> "SupportsBracketExpression":
    """
    Returns a class that is the intersection of `self` and `other`.
    """
    return CharacterClass(rf"/[{self.regex}&&{other.regex}]/")

subtracting(other)

Returns a class that is the result of subtracting other from self.

Source code in src/pyregexbuilder/character_classes.py
65
66
67
68
69
70
71
def subtracting(
    self, other: "SupportsBracketExpression"
) -> "SupportsBracketExpression":
    """
    Returns a class that is the result of subtracting `other` from `self`.
    """
    return CharacterClass(rf"/[{self.regex}--{other.regex}]/")

symmetric_difference(other)

Returns a class that is the symmetric difference of self and other.

Source code in src/pyregexbuilder/character_classes.py
73
74
75
76
77
78
79
def symmetric_difference(
    self, other: "SupportsBracketExpression"
) -> "SupportsBracketExpression":
    """
    Returns a class that is the symmetric difference of `self` and `other`.
    """
    return CharacterClass(rf"/[{self.regex}~~{other.regex}]/")

union(other)

Returns a class that is the union of self and other.

Source code in src/pyregexbuilder/character_classes.py
81
82
83
84
85
def union(self, other: "SupportsBracketExpression") -> "SupportsBracketExpression":
    """
    Returns a class that is the union of `self` and `other`.
    """
    return CharacterClass(rf"/[{self.regex}||{other.regex}]/")

UnicodeClass

Bases: SupportsBracketExpression

Creates a Unicode character class.

Regex: \p{...}

Source code in src/pyregexbuilder/character_classes.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
class UnicodeClass(SupportsBracketExpression):
    r"""
    Creates a Unicode character class.

    Regex: `\p{...}`
    """

    @overload
    def __init__(self, *, key: str, value: str) -> None: ...

    @overload
    def __init__(self, value: str) -> None: ...

    def __init__(self, *args, **kwargs) -> None:
        if len(args) == 1:
            self._regex = rf"\p{{{args[0]}}}"
        elif len(set(["key", "value"]).intersection(kwargs.keys())) == 2:
            self._regex = rf"\p{{{kwargs["key"]}={kwargs["value"]}}}"
        else:
            raise RegexBuilderException()

    def _get_regex_complement(self) -> str:
        return re.sub(
            r"(?<=^\\)[pP]", lambda m: "P" if m.group() == "p" else "p", self._regex
        )