-
Swift Regex: Beyond the basics
Go beyond the basics of string processing with Swift Regex. We'll share an overview of Regex and how it works, explore Foundation's rich data parsers and discover how to integrate your own, and delve into captures. We'll also provide best practices for matching strings and wielding Regex-powered algorithms with ease.
Ressources
- SE-0357: Regex-powered algorithms
- SE-0355: Regex syntax
- SE-0354: Regex literals
- SE-0351: Regex builder DSL
- SE-0350: Regex type and overview
Vidéos connexes
WWDC22
WWDC21
-
Rechercher dans cette vidéo…
-
-
0:39 - Regex matching "Hi, WWDC22!"
Regex { "Hi, WWDC" Repeat(.digit, count: 2) "!" } -
1:06 - Simple Regex from a string
let input = "name: John Appleseed, user_id: 100" let regex = try Regex(#"user_id:\s*(\d+)"#) if let match = input.firstMatch(of: regex) { print("Matched: \(match[0])") print("User ID: \(match[1])") } -
1:56 - Simple Regex from a literal
let input = "name: John Appleseed, user_id: 100" let regex = /user_id:\s*(\d+)/ if let match = input.firstMatch(of: regex) { print("Matched: \(match.0)") print("User ID: \(match.1)") } -
2:08 - Simple regex builder
import RegexBuilder let input = "name: John Appleseed, user_id: 100" let regex = Regex { "user_id:" OneOrMore(.whitespace) Capture(.localizedInteger) } if let match = input.firstMatch(of: regex) { print("Matched: \(match.0)") print("User ID: \(match.1)") } -
2:38 - A trivial Regex interpreted by the Regex engine
let regex = Regex { OneOrMore("a") OneOrMore(.digit) } let match = "aaa12".wholeMatch(of: regex) -
3:49 - Regex-powered algorithms
let input = "name: John Appleseed, user_id: 100" let regex = /user_id:\s*(\d+)/ input.firstMatch(of: regex) // Regex.Match<(Substring, Substring)> input.wholeMatch(of: regex) // nil input.prefixMatch(of: regex) // nil input.starts(with: regex) // false input.replacing(regex, with: "456") // "name: John Appleseed, 456" input.trimmingPrefix(regex) // "name: John Appleseed, user_id: 100" input.split(separator: /\s*,\s*/) // ["name: John Appleseed", "user_id: 100"] switch "abc" { case /\w+/: print("It's a word!") } -
5:14 - Use Foundation parsers in regex builder
let statement = """ DSLIP 04/06/20 Paypal $3,020.85 CREDIT 04/03/20 Payroll $69.73 DEBIT 04/02/20 Rent ($38.25) DEBIT 03/31/20 Grocery ($27.44) DEBIT 03/24/20 IRS ($52,249.98) """ let regex = Regex { Capture(.date(format: "\(month: .twoDigits)/\(day: .twoDigits)/\(year: .twoDigits)")) OneOrMore(.whitespace) OneOrMore(.word) OneOrMore(.whitespace) Capture(.currency(code: "USD").sign(strategy: .accounting)) } -
6:24 - XCTest log regex (version 1)
import RegexBuilder let regex = Regex { "Test Suite '" /[a-zA-Z][a-zA-Z0-9]*/ "' " ChoiceOf { "started" "passed" "failed" } " at " OneOrMore(.any) Optionally(".") } -
6:25 - Test our Regex against some inputs
let testSuiteTestInputs = [ "Test Suite 'RegexDSLTests' started at 2022-06-06 09:41:00.001", "Test Suite 'RegexDSLTests' failed at 2022-06-06 09:41:00.001.", "Test Suite 'RegexDSLTests' passed at 2022-06-06 09:41:00.001." ] for line in testSuiteTestInputs { if let match = line.wholeMatch(of: regex) { print("Matched: \(match.output)") } } -
10:28 - Example of capture
let regex = Regex { "a" Capture("b") "c" /d(e)f/ } if let match = "abcdef".wholeMatch(of: regex) { let (wholeMatch, b, e) = match.output } -
11:10 - XCTest log regex (version 2, with captures)
import RegexBuilder let regex = Regex { "Test Suite '" Capture(/[a-zA-Z][a-zA-Z0-9]*/) "' " Capture { ChoiceOf { "started" "passed" "failed" } } " at " Capture(OneOrMore(.any)) Optionally(".") } -
11:21 - Test our Regex (version 2) against some inputs
let testSuiteTestInputs = [ "Test Suite 'RegexDSLTests' started at 2022-06-06 09:41:00.001", "Test Suite 'RegexDSLTests' failed at 2022-06-06 09:41:00.001.", "Test Suite 'RegexDSLTests' passed at 2022-06-06 09:41:00.001." ] for line in testSuiteTestInputs { if let (whole, name, status, dateTime) = line.wholeMatch(of: regex)?.output { print("Matched: \"\(name)\", \"\(status)\", \"\(dateTime)\"") } } -
11:51 - XCTest log regex (version 3, with reluctant repetition)
import RegexBuilder let regex = Regex { "Test Suite '" Capture(/[a-zA-Z][a-zA-Z0-9]*/) "' " Capture { ChoiceOf { "started" "passed" "failed" } } " at " Capture(OneOrMore(.any, .reluctant)) Optionally(".") } -
15:20 - Example of transforming capture
Regex { Capture { OneOrMore(.digit) } transform: { Int($0) // Int.init?(_: some StringProtocol) } } // Regex<(Substring, Int?)> -
15:55 - Example of transforming capture and removing optionality
Regex { TryCapture { OneOrMore(.digit) } transform: { Int($0) // Int.init?(_: some StringProtocol) } } // Regex<(Substring, Int)> -
16:21 - XCTest log regex (version 4, with transforming capture)
enum TestStatus: String { case started = "started" case passed = "passed" case failed = "failed" } let regex = Regex { "Test Suite '" Capture(/[a-zA-Z][a-zA-Z0-9]*/) "' " TryCapture { ChoiceOf { "started" "passed" "failed" } } transform: { TestStatus(rawValue: String($0)) } " at " Capture(OneOrMore(.any, .reluctant)) Optionally(".") } // Regex<(Substring, Substring, TestStatus, Substring)> -
17:23 - XCTest log regex (version 5, with Foundation ISO 8601 date parser)
let regex = Regex { "Test Suite '" Capture(/[a-zA-Z][a-zA-Z0-9]*/) "' " TryCapture { ChoiceOf { "started" "passed" "failed" } } transform: { TestStatus(rawValue: String($0)) } " at " Capture(.iso8601( timeZone: .current, includingFractionalSeconds: true, dateTimeSeparator: .space)) Optionally(".") } // Regex<(Substring, Substring, TestStatus, Date)> -
18:19 - XCTest log duration parser
let input = "Test Case '-[RegexDSLTests testCharacterClass]' passed (0.001 seconds)." let regex = Regex { "Test Case " OneOrMore(.any, .reluctant) "(" Capture { .localizedDouble } " seconds)." } if let match = input.wholeMatch(of: regex) { print("Time: \(match.output)") } -
19:16 - CDoubleParser definition
import Darwin struct CDoubleParser: CustomConsumingRegexComponent { typealias RegexOutput = Double func consuming( _ input: String, startingAt index: String.Index, in bounds: Range<String.Index> ) throws -> (upperBound: String.Index, output: Double)? { input[index...].withCString { startAddress in var endAddress: UnsafeMutablePointer<CChar>! let output = strtod(startAddress, &endAddress) guard endAddress > startAddress else { return nil } let parsedLength = startAddress.distance(to: endAddress) let upperBound = input.utf8.index(index, offsetBy: parsedLength) return (upperBound, output) } } } -
20:13 - Use CDoubleParser in regex builder
let input = "Test Case '-[RegexDSLTests testCharacterClass]' passed (0.001 seconds)." let regex = Regex { "Test Case " OneOrMore(.any, .reluctant) "(" Capture { CDoubleParser() } " seconds)." } // Regex<(Substring, Double)> if let match = input.wholeMatch(of: regex) { print("Time: \(match.1)") }
-