#tek2023
"A regular expression... is a sequence of characters that specifies a match pattern in text." Wikipedia
preg
) is generally the one you want. In particular, check out its Pattern Syntax manual section.ereg
) was deprecated in PHP 5.3 and removed in PHP 7.preg_match()
finds the first match.preg_match_all()
finds all matches.$pattern
, $subject
, and a $matches
array passed by reference to store found matches.preg_match()
, 0+ for preg_match_all()
.preg_grep()
.
$present = (strpos($string, 'foo') !== false);
$present = (preg_match('/foo/', $string) === 1);
strpos
) for static substrings, ctype functions for common simple patterns./foo/
, /
is the pattern delimiter. More on that later.
$present = (strpos($string, 'foo') === 0);
// or in PHP 8
$present = str_starts_with($string, 'foo');
$present = (preg_match('/^foo/', $string) === 1);
^
) before a pattern denotes the start of $string
.
$present = (substr($string, -1 * strlen('foo')) === 'foo');
// or in PHP 8
$present = str_ends_with($string, 'foo');
$present = (preg_match('/foo$/', $string) === 1);
$
) after a pattern denotes the end of $string
.
$present = ($string === 'foo');
$present = (preg_match('/^foo$/', $string) === 1);
^
and $
together means the pattern must match the entirety of $string
.
$present = (strpos($string, 'foo') !== false
|| strpos($string, 'bar') !== false
|| strpos($string, 'baz') !== false);
$present = (preg_match('/foo|bar|baz/', $string) === 1);
|
) can be used to delimit multiple possible patterns to match.
$result = preg_match('/^foo|bar/', 'abar'); // 1
$result = preg_match('/^foo|^bar/', 'abar'); // 0
$present = (preg_match('/a{0,1}/', $string) === 1);
$present = (preg_match('/a?/', $string) === 1);
$present = (preg_match('/a{0,}/', $string) === 1);
$present = (preg_match('/a*/', $string) === 1);
$present = (preg_match('/a{1,}/', $string) === 1);
$present = (preg_match('/a+/', $string) === 1);
$present = (preg_match('/a{2}/', $string) === 1);
// a followed by 1+ instances of b
$present = (preg_match('/ab+/', $string) === 1);
// 1+ instances of ab (ab, abab, ababab, etc.)
$present = (preg_match('/(ab)+/', $string) === 1);
// foo or foobar
$present = (preg_match('/foo(bar)?/', $string) === 1);
// ab or ac
$present = (preg_match('/a(b|c)/', $string) === 1);
// ab, ac, abb, abc, acb, acc, etc.
$present = (preg_match('/a(b|c)+/', $string) === 1);
$present = preg_match(
'/^(?P<area>[0-9]{3})'
. '-(?P<prefix>[0-9]{3})'
. '-(?P<line>[0-9]{4})$/',
'123-456-7890',
$match
);
print_r($match);
Array
(
[0] => 123-456-7890
[area] => 123
[1] => 123
[prefix] => 456
[2] => 456
[line] => 7890
[3] => 7890
)
if (preg_match('/foo(bar)?(baz)?/',
'foo', $match) === 1) {
print_r($match);
}
Array
(
[0] => foo
)
if (preg_match('/foo(bar)?(baz)?/',
'foobar', $match) === 1) {
print_r($match);
}
Array
(
[0] => foobar
[1] => bar
)
if (preg_match('/foo(bar)?(baz)?/',
'foobarbaz', $match) === 1) {
print_r($match);
}
Array
(
[0] => foobarbaz
[1] => bar
[2] => baz
)
if (preg_match('/foo(bar)?(baz)?/',
'foobaz', $match) === 1) {
print_r($match);
}
Array
(
[0] => foobarbaz
[1] =>
[2] => baz
)
if (preg_match('/foo(ba(r|z))?/',
'foobar', $match) === 1) {
print_r($match);
}
Array
(
[0] => foobar
[1] => bar
[2] => r
)
if (preg_match('/foo(?:bar)?(baz)?/',
'foobarbaz', $match) === 1) {
print_r($match);
}
Array
(
[0] => foobarbaz
[1] => baz
)
(?:
to denote the start of non-captured subpatterns.Three ways to match a single character from a range of possible characters:
.
) metacharacter
if (preg_match('/.+/', 'foobarbaz', $match) === 1) {
print_r($match);
}
Array
(
[0] => foobarbaz
)
.
matches any character except a line feed ("\n"
).
Sequence | Description | Inverse |
---|---|---|
\d |
Digit, 0 through 9. | \D |
\h |
Horizontal whitespace, e.g. " " , "\t" |
\H |
\v |
Vertical whitespace, e.g. "\r" , "\n" |
\V |
\s |
Any whitespace, i.e. any from \h or \v |
\S |
\w |
"Word character", i.e. any letter, digit, or underscore | \W |
if (preg_match('/\d+/', '0123456789', $match) === 1) {
print_r($match);
}
Array
(
[0] => 0123456789
)
[
and ]
)a-z
) are respective to ASCII
// [0-9] is equivalent to \d
if (preg_match('/[0-9]+/', '0123456789', $match) === 1) {
print_r($match);
}
Array
(
[0] => 0123456789
)
// [a-zA-Z0-9_] is equivalent to \w
if (preg_match('/[a-zA-Z0-9_]+/', 'FOObar_123', $match) === 1) {
print_r($match);
}
Array
(
[0] => FOObar_123
)
// Matches hex strings
if (preg_match('/[0-9a-fA-F]+/',
'7c0319169c4aba498d441ca91c6c4f1d', $match) === 1) {
print_r($match);
}
Array
(
[0] => 7c0319169c4aba498d441ca91c6c4f1d
)
// Out-of-order ASCII range
if (preg_match('/[F-A]+/', 'ABCDEF', $match) === 1) {
print_r($match);
}
Warning: preg_match(): Compilation failed: range out of order in character
class at offset 3
if (preg_match('/[^a]+/', 'abc', $match) === 1) {
print_r($match);
}
Array
(
[0] => bc
)
^
inside a character range negates it.
// Matching ]
preg_match('/[\\]]/', ']', $match);
// Matching ^
preg_match('/[\\^]/', '^', $match); // or
preg_match('/[a^]/', '^', $match);
/
was the pattern delimiter?/
— it can be any character that isn't alphanumeric, a backslash, or whitespace./
.
if (preg_match('/[a-z]+/i', 'ABCDEF', $match) === 1) {
print_r($match);
}
Array
(
[0] => ABCDEF
)
/i
makes any letters match both upper/lowercase.
if (preg_match('/^bar/m', "foo\nbar", $match) === 1) {
print_r($match);
}
Array
(
[0] => bar
)
/m
makes ^
and $
match line (versus string) starts/ends.
if (preg_match('/.+/s', "foo\nbar", $match) === 1) {
print_r($match);
}
Array
(
[0] => foo
bar
)
/s
makes .
match \n
.
for ($i = 0; $i < 10_000; $i++) {
if (preg_match('/[0-9a-f]+/S', md5($i), $match) === 1) {
print_r($match);
}
}
/S
analyzes a pattern for better performance.
No effect in PHP 7.3+ due to PCRE2 migration.
if (preg_match('/p.*/', 'php', $match) === 1) {
print_r($match);
}
Array
(
[0] => php
)
if (preg_match('/p.*/U', 'php') === 1) {
print_r($match);
}
Array
(
[0] => p
)
if (preg_match('/p.*?/', 'php') === 1) {
print_r($match);
}
Array
(
[0] => p
)
This works in patterns that use /U
.
"If this modifier is set, whitespace data characters in the pattern are totally ignored except when escaped or inside a character class, and characters between an unescaped # outside a character class and the next newline character, inclusive, are also ignored. This is equivalent to Perl's /x modifier, and makes it possible to include commentary inside complicated patterns. Note, however, that this applies only to data characters. Whitespace characters may never appear within special character sequences in a pattern, for example within the sequence (?( which introduces a conditional subpattern."
More trouble than it's worth IMO. YMMV.
$result = preg_replace(
'/([0-9]{3})-([0-9]{3})-([0-9]{4})/',
'($1) $2-$3',
'123-456-7890'
);
echo $result;
(123) 456-7890
See preg_replace()
documentation.
$result = preg_split('/\s*,\s*/', '3,4 , 5 , 6');
print_r($result);
Array
(
[0] => 3
[1] => 4
[2] => 5
[3] => 6
)
$result = preg_grep('/^[0-9]+$/', ['1', 'a', '1a', '2', 'b3']);
print_r($result);
Array
(
[0] => 1
[3] => 2
)
/\bRegular\s+expressions\!+/i