-
Meet Swift Regex
Learn how you can process strings more effectively when you take advantage of Swift Regex. Come for concise literals but stay for Regex builders — a new, declarative approach to string processing. We'll also explore the Unicode models in String and share how Swift Regex can make Unicode-correct processing easy.
Recursos
Videos relacionados
WWDC22
-
Buscar este video…
-
-
1:35 - Processing collections
let transaction = "DEBIT 03/05/2022 Doug's Dugout Dogs $33.27" let fragments = transaction.split(whereSeparator: \.isWhitespace) // ["DEBIT", "03/05/2022", "Doug\'s", "Dugout", "Dogs", "$33.27"] -
1:49 - Low-level index manipulation
var slice = transaction[...] // Extract a field, advancing `slice` to the start of the next field func extractField() -> Substring { let endIdx = { var start = slice.startIndex while true { // Position of next whitespace (including tabs) guard let spaceIdx = slice[start...].firstIndex(where: \.isWhitespace) else { return slice.endIndex } // Tab suffices if slice[spaceIdx] == "\t" { return spaceIdx } // Otherwise check for a second whitespace character let afterSpaceIdx = slice.index(after: spaceIdx) if afterSpaceIdx == slice.endIndex || slice[afterSpaceIdx].isWhitespace { return spaceIdx } // Skip over the single space and try again start = afterSpaceIdx } }() defer { slice = slice[endIdx...].drop(while: \.isWhitespace) } return slice[..<endIdx] } let kind = extractField() let date = try Date(String(extractField()), strategy: Date.FormatStyle(date: .numeric)) let account = extractField() let amount = try Decimal(String(extractField()), format: .currency(code: "USD")) -
2:47 - Regex literals
// Regex literals let digits = /\d+/ // digits: Regex<Substring> -
3:20 - Regex created at run-time
// Run-time construction let runtimeString = #"\d+"# let digits = try Regex(runtimeString) // digits: Regex<AnyRegexOutput> -
3:44 - Regex builder
// Regex builders let digits = OneOrMore(.digit) // digits: Regex<Substring> -
3:56 - Split approach with a regex literal
let transaction = "DEBIT 03/05/2022 Doug's Dugout Dogs $33.27" let fragments = transaction.split(separator: /\s{2,}|\t/) // ["DEBIT", "03/05/2022", "Doug's Dugout Dogs", "$33.27"] -
4:36 - Normalize field separators
let transaction = "DEBIT 03/05/2022 Doug's Dugout Dogs $33.27" let normalized = transaction.replacing(/\s{2,}|\t/, with: "\t") // DEBIT»03/05/2022»Doug's Dugout Dogs»$33.27 -
6:55 - Create a Regex builder
// CREDIT 03/02/2022 Payroll from employer $200.23 // CREDIT 03/03/2022 Suspect A $2,000,000.00 // DEBIT 03/03/2022 Ted's Pet Rock Sanctuary $2,000,000.00 // DEBIT 03/05/2022 Doug's Dugout Dogs $33.27 import RegexBuilder let fieldSeparator = /\s{2,}|\t/ let transactionMatcher = Regex { /CREDIT|DEBIT/ fieldSeparator One(.date(.numeric, locale: Locale(identifier: "en_US"), timeZone: .gmt)) fieldSeparator OneOrMore { NegativeLookahead { fieldSeparator } CharacterClass.any } fieldSeparator One(.localizedCurrency(code: "USD").locale(Locale(identifier: "en_US"))) } -
9:04 - Use Captures to extract portions of input
let fieldSeparator = /\s{2,}|\t/ let transactionMatcher = Regex { Capture { /CREDIT|DEBIT/ } fieldSeparator Capture { One(.date(.numeric, locale: Locale(identifier: "en_US"), timeZone: .gmt)) } fieldSeparator Capture { OneOrMore { NegativeLookahead { fieldSeparator } CharacterClass.any } } fieldSeparator Capture { One(.localizedCurrency(code: "USD").locale(Locale(identifier: "en_US"))) } } // transactionMatcher: Regex<(Substring, Substring, Date, Substring, Decimal)> -
10:31 - Plot twist!
private let ledger = """ KIND DATE INSTITUTION AMOUNT ---------------------------------------------------------------- CREDIT 03/01/2022 Payroll from employer $200.23 CREDIT 03/03/2022 Suspect A $2,000,000.00 DEBIT 03/03/2022 Ted's Pet Rock Sanctuary $2,000,000.00 DEBIT 03/05/2022 Doug's Dugout Dogs $33.27 DEBIT 06/03/2022 Oxford Comma Supply Ltd. £57.33 """ // 😱 -
10:53 - Use named captures
let regex = #/ (?<date> \d{2} / \d{2} / \d{4}) (?<middle> \P{currencySymbol}+) (?<currency> \p{currencySymbol}) /# // Regex<(Substring, date: Substring, middle: Substring, currency: Substring)> -
11:33 - Use Foundation's date parser
let regex = #/ (?<date> \d{2} / \d{2} / \d{4}) (?<middle> \P{currencySymbol}+) (?<currency> \p{currencySymbol}) /# // Regex<(Substring, date: Substring, middle: Substring, currency: Substring)> func pickStrategy(_ currency: Substring) -> Date.ParseStrategy { switch currency { case "$": return .date(.numeric, locale: Locale(identifier: "en_US"), timeZone: .gmt) case "£": return .date(.numeric, locale: Locale(identifier: "en_GB"), timeZone: .gmt) default: fatalError("We found another one!") } } -
11:48 - Find and replace
let regex = #/ (?<date> \d{2} / \d{2} / \d{4}) (?<middle> \P{currencySymbol}+) (?<currency> \p{currencySymbol}) /# // Regex<(Substring, date: Substring, middle: Substring, currency: Substring)> func pickStrategy(_ currency: Substring) -> Date.ParseStrategy { … } ledger.replace(regex) { match -> String in let date = try! Date(String(match.date), strategy: pickStrategy(match.currency)) // ISO 8601, it's the only way to be sure let newDate = date.formatted(.iso8601.year().month().day()) return newDate + match.middle + match.currency } -
12:45 - A zombie love story
let aZombieLoveStory = "🧟♀️💖🧠" // Characters: 🧟♀️, 💖, 🧠 -
13:01 - A zombie love story in unicode scalars
aZombieLoveStory.unicodeScalars // Unicode scalar values: U+1F9DF, U+200D, U+2640, U+FE0F, U+1F496, U+1F9E0 -
13:44 - A zombie love story in UTF8
aZombieLoveStory.utf8 // UTF-8 code units: F0 9F A7 9F E2 80 8D E2 99 80 EF B8 8F F0 9F 92 96 F0 9F A7 A0 -
14:12 - Unicode canonical equivalence
"café".elementsEqual("cafe\u{301}") // true -
14:49 - String's views are compared at binary level
"café".elementsEqual("cafe\u{301}") // true "café".unicodeScalars.elementsEqual("cafe\u{301}".unicodeScalars) // false "café".utf8.elementsEqual("cafe\u{301}".utf8) // false -
15:14 - Unicode processing
switch ("🧟♀️💖🧠", "The Brain Cafe\u{301}") { case (/.\N{SPARKLING HEART}./, /.*café/.ignoresCase()): print("Oh no! 🧟♀️💖🧠, but 🧠💖☕️!") default: print("No conflicts found") } -
15:54 - Complex scalar processing
let input = "Oh no! 🧟♀️💖🧠, but 🧠💖☕️!" input.firstMatch(of: /.\N{SPARKLING HEART}./) // 🧟♀️💖🧠 input.firstMatch(of: /.\N{SPARKLING HEART}./.matchingSemantics(.unicodeScalar)) // ️💖🧠 -
17:56 - Live transaction matcher
let timestamp = Regex { ... } // proprietary let details = try Regex(inputString) let amountMatcher = /[\d.]+/ // CREDIT <proprietary> <redacted> 200.23 A1B34EFF ... let fieldSeparator = /\s{2,}|\t/ let transactionMatcher = Regex { Capture { /CREDIT|DEBIT/ } fieldSeparator Capture { timestamp } fieldSeparator Capture { details } fieldSeparator // ... } -
18:26 - Replace field separator
let field = OneOrMore { NegativeLookahead { fieldSeparator } CharacterClass.any } -
18:55 - Use TryCapture
// CREDIT <proprietary> <redacted> 200.23 A1B34EFF ... let fieldSeparator = /\s{2,}|\t/ let field = OneOrMore { NegativeLookahead { fieldSeparator } CharacterClass.any } let transactionMatcher = Regex { Capture { /CREDIT|DEBIT/ } fieldSeparator TryCapture(field) { timestamp ~= $0 ? $0 : nil } fieldSeparator TryCapture(field) { details ~= $0 ? $0 : nil } fieldSeparator // ... } -
21:45 - Fixing the scaling issues
// CREDIT <proprietary> <redacted> 200.23 A1B34EFF ... let fieldSeparator = Local { /\s{2,}|\t/ } let field = OneOrMore { NegativeLookahead { fieldSeparator } CharacterClass.any } let transactionMatcher = Regex { Capture { /CREDIT|DEBIT/ } fieldSeparator TryCapture(field) { timestamp ~= $0 ? $0 : nil } fieldSeparator TryCapture(field) { details ~= $0 ? $0 : nil } fieldSeparator // ... }
-