Skip to content

Common

ChoiceOf

Bases: RegexComponent

A regex component that matches any of the supplied regex components.

Regex: |

Source code in src/pyregexbuilder/common.py
129
130
131
132
133
134
135
136
137
138
139
class ChoiceOf(RegexComponent):
    """
    A regex component that matches any of the supplied regex components.

    Regex: `|`
    """

    def __init__(self, *components: str | RegexComponent) -> None:
        self._regex = (
            rf"(?:{'|'.join(self.parse(component) for component in components)})"
        )

Regex

Bases: RegexComponent

The entry point for building a regular expression.

Source code in src/pyregexbuilder/common.py
117
118
119
120
121
122
123
124
125
126
class Regex(RegexComponent):
    """
    The entry point for building a regular expression.
    """

    def __init__(self, *components: str | RegexComponent) -> None:
        self._regex = self.parse(*components)

    def compile(self, *args, **kwargs):
        return re.compile(self._regex, *args, **kwargs)

RegexBuilderException

Bases: Exception

A general exception thrown when problems arise during the building of a regular expression.

Source code in src/pyregexbuilder/common.py
 5
 6
 7
 8
 9
10
class RegexBuilderException(Exception):
    """
    A general exception thrown when problems arise during the building of a regular expression.
    """

    pass

RegexComponent

Bases: Protocol

A protocol for classes that can be used as components in the regex builder.

Source code in src/pyregexbuilder/common.py
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
class RegexComponent(Protocol):
    """
    A protocol for classes that can be used as components in the regex builder.
    """

    _regex: str

    @property
    def regex(self) -> str:
        """
        The string corresponding to the regular expression returned by a RegexComponent.
        """
        return self._regex

    def parse(self, *components: "str | RegexComponent") -> str:
        patterns = []

        for component in components:
            if isinstance(component, str):
                if re.match(r"^/.*/$", component):
                    patterns.append(component[1:-1])
                else:
                    patterns.append(re.escape(component))
            else:
                patterns.append(component.regex)

        return "".join(patterns)

    def with_flags(self, flags: RegexFlagsDict) -> "Regex":
        """
        Returns a copy of the `Regex` object with the corresponding scoped flags set.
        """
        flags_shorthand = {
            "ASCII": "a",
            "FULLCASE": "f",
            "IGNORECASE": "i",
            "LOCALE": "L",
            "MULTILINE": "m",
            "DOTALL": "s",
            "UNICODE": "u",
            "VERBOSE": "x",
            "WORD": "w",
        }

        flags_to_set = set(
            flags_shorthand[flag] for flag in filter(lambda f: flags[f], flags)
        )
        flags_to_remove = set(
            flags_shorthand[flag] for flag in filter(lambda f: not flags[f], flags)
        )

        return Regex(
            rf"/(?{''.join(flags_to_set)}"
            rf"{"-"+''.join(flags_to_remove) if flags_to_remove else ''}"
            rf":{self._regex})/"
        )

    def with_global_flags(self, flags: RegexGlobalFlagsDict) -> "Regex":
        """
        Returns a copy of the `Regex` object with the corresponding global flags set.
        """
        flags_shorthand = {
            "BESTMATCH": "b",
            "ENHANCEMATCH": "e",
            "POSIX": "p",
            "REVERSE": "r",
            "VERSION0": "V0",
            "VERSION1": "V1",
        }

        return Regex(
            rf"/(?{''.join(flags_shorthand[flag] for flag in flags)}){self._regex}/"
        )

regex property

The string corresponding to the regular expression returned by a RegexComponent.

with_flags(flags)

Returns a copy of the Regex object with the corresponding scoped flags set.

Source code in src/pyregexbuilder/common.py
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def with_flags(self, flags: RegexFlagsDict) -> "Regex":
    """
    Returns a copy of the `Regex` object with the corresponding scoped flags set.
    """
    flags_shorthand = {
        "ASCII": "a",
        "FULLCASE": "f",
        "IGNORECASE": "i",
        "LOCALE": "L",
        "MULTILINE": "m",
        "DOTALL": "s",
        "UNICODE": "u",
        "VERBOSE": "x",
        "WORD": "w",
    }

    flags_to_set = set(
        flags_shorthand[flag] for flag in filter(lambda f: flags[f], flags)
    )
    flags_to_remove = set(
        flags_shorthand[flag] for flag in filter(lambda f: not flags[f], flags)
    )

    return Regex(
        rf"/(?{''.join(flags_to_set)}"
        rf"{"-"+''.join(flags_to_remove) if flags_to_remove else ''}"
        rf":{self._regex})/"
    )

with_global_flags(flags)

Returns a copy of the Regex object with the corresponding global flags set.

Source code in src/pyregexbuilder/common.py
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def with_global_flags(self, flags: RegexGlobalFlagsDict) -> "Regex":
    """
    Returns a copy of the `Regex` object with the corresponding global flags set.
    """
    flags_shorthand = {
        "BESTMATCH": "b",
        "ENHANCEMATCH": "e",
        "POSIX": "p",
        "REVERSE": "r",
        "VERSION0": "V0",
        "VERSION1": "V1",
    }

    return Regex(
        rf"/(?{''.join(flags_shorthand[flag] for flag in flags)}){self._regex}/"
    )

RegexFlagsDict

Bases: TypedDict

A mapping of scoped flags that can apply to only part of regular expression.

Source code in src/pyregexbuilder/common.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class RegexFlagsDict(TypedDict, total=False):
    """
    A mapping of scoped flags that can apply to only part of regular expression.
    """

    ASCII: Literal[True]
    FULLCASE: bool
    IGNORECASE: bool
    LOCALE: Literal[True]
    MULTILINE: bool
    DOTALL: bool
    UNICODE: Literal[True]
    VERBOSE: bool
    WORD: bool

RegexGlobalFlagsDict

Bases: TypedDict

A mapping of flags that apply to the entire regular expression.

Source code in src/pyregexbuilder/common.py
29
30
31
32
33
34
35
36
37
38
39
class RegexGlobalFlagsDict(TypedDict, total=False):
    """
    A mapping of flags that apply to the entire regular expression.
    """

    BESTMATCH: Literal[True]
    ENHANCEMATCH: Literal[True]
    POSIX: Literal[True]
    REVERSE: Literal[True]
    VERSION0: Literal[True]
    VERSION1: Literal[True]