
matthewturland.com/presentations

#tek2023
"A regular expression... is a sequence of characters that specifies a match pattern in text." Wikipedia
preg) is generally the one you want. In particular, check out its Pattern Syntax manual section.ereg) was deprecated in PHP 5.3 and removed in PHP 7.preg_match() finds the first match.preg_match_all() finds all matches.$pattern, $subject, and a $matches array passed by reference to store found matches.preg_match(), 0+ for preg_match_all().preg_grep().
$present = (strpos($string, 'foo') !== false);
$present = (preg_match('/foo/', $string) === 1);
strpos) for static substrings, ctype functions for common simple patterns./foo/, / is the pattern delimiter. More on that later.
$present = (strpos($string, 'foo') === 0);
// or in PHP 8
$present = str_starts_with($string, 'foo');
$present = (preg_match('/^foo/', $string) === 1);
^) before a pattern denotes the start of $string.
$present = (substr($string, -1 * strlen('foo')) === 'foo');
// or in PHP 8
$present = str_ends_with($string, 'foo');
$present = (preg_match('/foo$/', $string) === 1);
$) after a pattern denotes the end of $string.
$present = ($string === 'foo');
$present = (preg_match('/^foo$/', $string) === 1);
^ and $ together means the pattern must match the entirety of $string.
$present = (strpos($string, 'foo') !== false
|| strpos($string, 'bar') !== false
|| strpos($string, 'baz') !== false);
$present = (preg_match('/foo|bar|baz/', $string) === 1);
|) can be used to delimit multiple possible patterns to match.
$result = preg_match('/^foo|bar/', 'abar'); // 1
$result = preg_match('/^foo|^bar/', 'abar'); // 0
$present = (preg_match('/a{0,1}/', $string) === 1);
$present = (preg_match('/a?/', $string) === 1);
$present = (preg_match('/a{0,}/', $string) === 1);
$present = (preg_match('/a*/', $string) === 1);
$present = (preg_match('/a{1,}/', $string) === 1);
$present = (preg_match('/a+/', $string) === 1);
$present = (preg_match('/a{2}/', $string) === 1);
// a followed by 1+ instances of b
$present = (preg_match('/ab+/', $string) === 1);
// 1+ instances of ab (ab, abab, ababab, etc.)
$present = (preg_match('/(ab)+/', $string) === 1);
// foo or foobar
$present = (preg_match('/foo(bar)?/', $string) === 1);
// ab or ac
$present = (preg_match('/a(b|c)/', $string) === 1);
// ab, ac, abb, abc, acb, acc, etc.
$present = (preg_match('/a(b|c)+/', $string) === 1);
$present = preg_match(
'/^(?P<area>[0-9]{3})'
. '-(?P<prefix>[0-9]{3})'
. '-(?P<line>[0-9]{4})$/',
'123-456-7890',
$match
);
print_r($match);
Array
(
[0] => 123-456-7890
[area] => 123
[1] => 123
[prefix] => 456
[2] => 456
[line] => 7890
[3] => 7890
)
if (preg_match('/foo(bar)?(baz)?/',
'foo', $match) === 1) {
print_r($match);
}
Array
(
[0] => foo
)
if (preg_match('/foo(bar)?(baz)?/',
'foobar', $match) === 1) {
print_r($match);
}
Array
(
[0] => foobar
[1] => bar
)
if (preg_match('/foo(bar)?(baz)?/',
'foobarbaz', $match) === 1) {
print_r($match);
}
Array
(
[0] => foobarbaz
[1] => bar
[2] => baz
)
if (preg_match('/foo(bar)?(baz)?/',
'foobaz', $match) === 1) {
print_r($match);
}
Array
(
[0] => foobarbaz
[1] =>
[2] => baz
)
if (preg_match('/foo(ba(r|z))?/',
'foobar', $match) === 1) {
print_r($match);
}
Array
(
[0] => foobar
[1] => bar
[2] => r
)
if (preg_match('/foo(?:bar)?(baz)?/',
'foobarbaz', $match) === 1) {
print_r($match);
}
Array
(
[0] => foobarbaz
[1] => baz
)
(?: to denote the start of non-captured subpatterns.Three ways to match a single character from a range of possible characters:
.) metacharacter
if (preg_match('/.+/', 'foobarbaz', $match) === 1) {
print_r($match);
}
Array
(
[0] => foobarbaz
)
. matches any character except a line feed ("\n").
| Sequence | Description | Inverse |
|---|---|---|
\d |
Digit, 0 through 9. | \D |
\h |
Horizontal whitespace, e.g. " ", "\t" |
\H |
\v |
Vertical whitespace, e.g. "\r", "\n" |
\V |
\s |
Any whitespace, i.e. any from \h or \v |
\S |
\w |
"Word character", i.e. any letter, digit, or underscore | \W |
if (preg_match('/\d+/', '0123456789', $match) === 1) {
print_r($match);
}
Array
(
[0] => 0123456789
)
[ and ])a-z) are respective to ASCII
// [0-9] is equivalent to \d
if (preg_match('/[0-9]+/', '0123456789', $match) === 1) {
print_r($match);
}
Array
(
[0] => 0123456789
)
// [a-zA-Z0-9_] is equivalent to \w
if (preg_match('/[a-zA-Z0-9_]+/', 'FOObar_123', $match) === 1) {
print_r($match);
}
Array
(
[0] => FOObar_123
)
// Matches hex strings
if (preg_match('/[0-9a-fA-F]+/',
'7c0319169c4aba498d441ca91c6c4f1d', $match) === 1) {
print_r($match);
}
Array
(
[0] => 7c0319169c4aba498d441ca91c6c4f1d
)
// Out-of-order ASCII range
if (preg_match('/[F-A]+/', 'ABCDEF', $match) === 1) {
print_r($match);
}
Warning: preg_match(): Compilation failed: range out of order in character
class at offset 3
if (preg_match('/[^a]+/', 'abc', $match) === 1) {
print_r($match);
}
Array
(
[0] => bc
)
^ inside a character range negates it.
// Matching ]
preg_match('/[\\]]/', ']', $match);
// Matching ^
preg_match('/[\\^]/', '^', $match); // or
preg_match('/[a^]/', '^', $match);
/ was the pattern delimiter?/ — it can be any character that isn't alphanumeric, a backslash, or whitespace./.
if (preg_match('/[a-z]+/i', 'ABCDEF', $match) === 1) {
print_r($match);
}
Array
(
[0] => ABCDEF
)
/i makes any letters match both upper/lowercase.
if (preg_match('/^bar/m', "foo\nbar", $match) === 1) {
print_r($match);
}
Array
(
[0] => bar
)
/m makes ^ and $ match line (versus string) starts/ends.
if (preg_match('/.+/s', "foo\nbar", $match) === 1) {
print_r($match);
}
Array
(
[0] => foo
bar
)
/s makes . match \n.
for ($i = 0; $i < 10_000; $i++) {
if (preg_match('/[0-9a-f]+/S', md5($i), $match) === 1) {
print_r($match);
}
}
/S analyzes a pattern for better performance.
No effect in PHP 7.3+ due to PCRE2 migration.
if (preg_match('/p.*/', 'php', $match) === 1) {
print_r($match);
}
Array
(
[0] => php
)
if (preg_match('/p.*/U', 'php') === 1) {
print_r($match);
}
Array
(
[0] => p
)
if (preg_match('/p.*?/', 'php') === 1) {
print_r($match);
}
Array
(
[0] => p
)
This works in patterns that use /U.
"If this modifier is set, whitespace data characters in the pattern are totally ignored except when escaped or inside a character class, and characters between an unescaped # outside a character class and the next newline character, inclusive, are also ignored. This is equivalent to Perl's /x modifier, and makes it possible to include commentary inside complicated patterns. Note, however, that this applies only to data characters. Whitespace characters may never appear within special character sequences in a pattern, for example within the sequence (?( which introduces a conditional subpattern."
More trouble than it's worth IMO. YMMV.
$result = preg_replace(
'/([0-9]{3})-([0-9]{3})-([0-9]{4})/',
'($1) $2-$3',
'123-456-7890'
);
echo $result;
(123) 456-7890
See preg_replace() documentation.
$result = preg_split('/\s*,\s*/', '3,4 , 5 , 6');
print_r($result);
Array
(
[0] => 3
[1] => 4
[2] => 5
[3] => 6
)
$result = preg_grep('/^[0-9]+$/', ['1', 'a', '1a', '2', 'b3']);
print_r($result);
Array
(
[0] => 1
[3] => 2
)
/\bRegular\s+expressions\!+/i