diff --git a/spacy/lang/hu/tokenizer_exceptions.py b/spacy/lang/hu/tokenizer_exceptions.py index 8637e378f..c18a2cec2 100644 --- a/spacy/lang/hu/tokenizer_exceptions.py +++ b/spacy/lang/hu/tokenizer_exceptions.py @@ -636,17 +636,17 @@ for orth in [ _exc[orth] = [{ORTH: orth}] -_ord_num_or_date = "([A-Z0-9]+[./-])*(\d+\.?)" -_num = "[+\-]?\d+([,.]\d+)*" -_ops = "[=<>+\-\*/^()÷%²]" -_suffixes = "-[{al}]+".format(al=ALPHA_LOWER) -_numeric_exp = "({n})(({o})({n}))*[%]?".format(n=_num, o=_ops) -_time_exp = "\d+(:\d+)*(\.\d+)?" +_ord_num_or_date = r"([A-Z0-9]+[./-])*(\d+\.?)" +_num = r"[+\-]?\d+([,.]\d+)*" +_ops = r"[=<>+\-\*/^()÷%²]" +_suffixes = r"-[{al}]+".format(al=ALPHA_LOWER) +_numeric_exp = r"({n})(({o})({n}))*[%]?".format(n=_num, o=_ops) +_time_exp = r"\d+(:\d+)*(\.\d+)?" -_nums = "(({ne})|({t})|({on})|({c}))({s})?".format( +_nums = r"(({ne})|({t})|({on})|({c}))({s})?".format( ne=_numeric_exp, t=_time_exp, on=_ord_num_or_date, c=CURRENCY, s=_suffixes ) TOKENIZER_EXCEPTIONS = _exc -TOKEN_MATCH = re.compile("^({u})|({n})$".format(u=URL_PATTERN, n=_nums)).match +TOKEN_MATCH = re.compile(r"^({u})|({n})$".format(u=URL_PATTERN, n=_nums)).match diff --git a/spacy/tests/lang/en/test_customized_tokenizer.py b/spacy/tests/lang/en/test_customized_tokenizer.py index ad371bf47..fdac32a90 100644 --- a/spacy/tests/lang/en/test_customized_tokenizer.py +++ b/spacy/tests/lang/en/test_customized_tokenizer.py @@ -13,10 +13,10 @@ def custom_en_tokenizer(en_vocab): prefix_re = compile_prefix_regex(English.Defaults.prefixes) suffix_re = compile_suffix_regex(English.Defaults.suffixes) custom_infixes = [ - "\.\.\.+", - "(?<=[0-9])-(?=[0-9])", - "[0-9]+(,[0-9]+)+", - "[\[\]!&:,()\*—–\/-]", + r"\.\.\.+", + r"(?<=[0-9])-(?=[0-9])", + r"[0-9]+(,[0-9]+)+", + r"[\[\]!&:,()\*—–\/-]", ] infix_re = compile_infix_regex(custom_infixes) return Tokenizer( diff --git a/spacy/tests/tokenizer/test_naughty_strings.py b/spacy/tests/tokenizer/test_naughty_strings.py index b383c8fc8..36c69611e 100644 --- a/spacy/tests/tokenizer/test_naughty_strings.py +++ b/spacy/tests/tokenizer/test_naughty_strings.py @@ -9,104 +9,104 @@ import pytest NAUGHTY_STRINGS = [ # ASCII punctuation - ",./;'[]\-=", - '<>?:"{}|_+', - '!@#$%^&*()`~"', + r",./;'[]\-=", + r'<>?:"{}|_+', + r'!@#$%^&*()`~"', # Unicode additional control characters, byte order marks - "­؀؁؂؃؄؅؜۝܏᠎​‌‍‎‏‪", - "￾", + r"­؀؁؂؃؄؅؜۝܏᠎​‌‍‎‏‪", + r"￾", # Unicode Symbols - "Ω≈ç√∫˜µ≤≥÷", - "åß∂ƒ©˙∆˚¬…æ", + r"Ω≈ç√∫˜µ≤≥÷", + r"åß∂ƒ©˙∆˚¬…æ", "œ∑´®†¥¨ˆøπ“‘", - "¡™£¢∞§¶•ªº–≠", - "¸˛Ç◊ı˜Â¯˘¿", - "ÅÍÎÏ˝ÓÔÒÚÆ☃", - "Œ„´‰ˇÁ¨ˆØ∏”’", - "`⁄€‹›fifl‡°·‚—±", - "⅛⅜⅝⅞", - "ЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя", - "٠١٢٣٤٥٦٧٨٩", + r"¡™£¢∞§¶•ªº–≠", + r"¸˛Ç◊ı˜Â¯˘¿", + r"ÅÍÎÏ˝ÓÔÒÚÆ☃", + r"Œ„´‰ˇÁ¨ˆØ∏”’", + r"`⁄€‹›fifl‡°·‚—±", + r"⅛⅜⅝⅞", + r"ЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя", + r"٠١٢٣٤٥٦٧٨٩", # Unicode Subscript/Superscript/Accents - "⁰⁴⁵", - "₀₁₂", - "⁰⁴⁵₀₁₂", - "ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็", + r"⁰⁴⁵", + r"₀₁₂", + r"⁰⁴⁵₀₁₂", + r"ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็", # Two-Byte Characters - "田中さんにあげて下さい", - "パーティーへ行かないか", - "和製漢語", - "部落格", - "사회과학원 어학연구소", - "찦차를 타고 온 펲시맨과 쑛다리 똠방각하", - "社會科學院語學研究所", - "울란바토르", - "𠜎𠜱𠝹𠱓𠱸𠲖𠳏", + r"田中さんにあげて下さい", + r"パーティーへ行かないか", + r"和製漢語", + r"部落格", + r"사회과학원 어학연구소", + r"찦차를 타고 온 펲시맨과 쑛다리 똠방각하", + r"社會科學院語學研究所", + r"울란바토르", + r"𠜎𠜱𠝹𠱓𠱸𠲖𠳏", # Japanese Emoticons - "ヽ༼ຈل͜ຈ༽ノ ヽ༼ຈل͜ຈ༽ノ", - "(。◕ ∀ ◕。)", - "`ィ(´∀`∩", - "__ロ(,_,*)", - "・( ̄∀ ̄)・:*:", - "゚・✿ヾ╲(。◕‿◕。)╱✿・゚", - ",。・:*:・゜’( ☻ ω ☻ )。・:*:・゜’", - "(╯°□°)╯︵ ┻━┻)" "(ノಥ益ಥ)ノ ┻━┻", - "┬─┬ノ( º _ ºノ)", - "( ͡° ͜ʖ ͡°)", + r"ヽ༼ຈل͜ຈ༽ノ ヽ༼ຈل͜ຈ༽ノ", + r"(。◕ ∀ ◕。)", + r"`ィ(´∀`∩", + r"__ロ(,_,*)", + r"・( ̄∀ ̄)・:*:", + r"゚・✿ヾ╲(。◕‿◕。)╱✿・゚", + r",。・:*:・゜’( ☻ ω ☻ )。・:*:・゜’", + r"(╯°□°)╯︵ ┻━┻)" "(ノಥ益ಥ)ノ ┻━┻", + r"┬─┬ノ( º _ ºノ)", + r"( ͡° ͜ʖ ͡°)", # Emoji - "😍", - "👩🏽", - "👾 🙇 💁 🙅 🙆 🙋 🙎 🙍", - "🐵 🙈 🙉 🙊", - "❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙", - "✋🏿 💪🏿 👐🏿 🙌🏿 👏🏿 🙏🏿", - "🚾 🆒 🆓 🆕 🆖 🆗 🆙 🏧", - "0️⃣ 1️⃣ 2️⃣ 3️⃣ 4️⃣ 5️⃣ 6️⃣ 7️⃣ 8️⃣ 9️⃣ 🔟", + r"😍", + r"👩🏽", + r"👾 🙇 💁 🙅 🙆 🙋 🙎 🙍", + r"🐵 🙈 🙉 🙊", + r"❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙", + r"✋🏿 💪🏿 👐🏿 🙌🏿 👏🏿 🙏🏿", + r"🚾 🆒 🆓 🆕 🆖 🆗 🆙 🏧", + r"0️⃣ 1️⃣ 2️⃣ 3️⃣ 4️⃣ 5️⃣ 6️⃣ 7️⃣ 8️⃣ 9️⃣ 🔟", # Regional Indicator Symbols - "🇺🇸🇷🇺🇸 🇦🇫🇦🇲🇸", - "🇺🇸🇷🇺🇸🇦🇫🇦🇲", - "🇺🇸🇷🇺🇸🇦", + r"🇺🇸🇷🇺🇸 🇦🇫🇦🇲🇸", + r"🇺🇸🇷🇺🇸🇦🇫🇦🇲", + r"🇺🇸🇷🇺🇸🇦", # Unicode Numbers - "123", - "١٢٣", + r"123", + r"١٢٣", # Right-To-Left Strings - "ثم نفس سقطت وبالتحديد،, جزيرتي باستخدام أن دنو. إذ هنا؟ الستار وتنصيب كان. أهّل ايطاليا، بريطانيا-فرنسا قد أخذ. سليمان، إتفاقية بين ما, يذكر الحدود أي بعد, معاملة بولندا، الإطلاق عل إيو.", - "إيو.", - "בְּרֵאשִׁית, בָּרָא אֱלֹהִים, אֵת הַשָּׁמַיִם, וְאֵת הָאָרֶץ", - "הָיְתָהtestالصفحات التّحول", - "﷽", - "ﷺ", - "مُنَاقَشَةُ سُبُلِ اِسْتِخْدَامِ اللُّغَةِ فِي النُّظُمِ الْقَائِمَةِ وَفِيم يَخُصَّ التَّطْبِيقَاتُ الْحاسُوبِيَّةُ،", + r"ثم نفس سقطت وبالتحديد،, جزيرتي باستخدام أن دنو. إذ هنا؟ الستار وتنصيب كان. أهّل ايطاليا، بريطانيا-فرنسا قد أخذ. سليمان، إتفاقية بين ما, يذكر الحدود أي بعد, معاملة بولندا، الإطلاق عل إيو.", + r"إيو.", + r"בְּרֵאשִׁית, בָּרָא אֱלֹהִים, אֵת הַשָּׁמַיִם, וְאֵת הָאָרֶץ", + r"הָיְתָהtestالصفحات التّحول", + r"﷽", + r"ﷺ", + r"مُنَاقَشَةُ سُبُلِ اِسْتِخْدَامِ اللُّغَةِ فِي النُّظُمِ الْقَائِمَةِ وَفِيم يَخُصَّ التَّطْبِيقَاتُ الْحاسُوبِيَّةُ،", # Trick Unicode - "‪‪test‪", - "‫test", - "
test
", - "test⁠test", - "⁦test⁧", + r"‪‪test‪", + r"‫test", + r"
test
", + r"test⁠test", + r"⁦test⁧", # Zalgo Text - "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣", - "̡͓̞ͅI̗̘̦͝n͇͇͙v̮̫ok̲̫̙͈i̖͙̭̹̠̞n̡̻̮̣̺g̲͈͙̭͙̬͎ ̰t͔̦h̞̲e̢̤ ͍̬̲͖f̴̘͕̣è͖ẹ̥̩l͖͔͚i͓͚̦͠n͖͍̗͓̳̮g͍ ̨o͚̪͡f̘̣̬ ̖̘͖̟͙̮c҉͔̫͖͓͇͖ͅh̵̤̣͚͔á̗̼͕ͅo̼̣̥s̱͈̺̖̦̻͢.̛̖̞̠̫̰", - "̗̺͖̹̯͓Ṯ̤͍̥͇͈h̲́e͏͓̼̗̙̼̣͔ ͇̜̱̠͓͍ͅN͕͠e̗̱z̘̝̜̺͙p̤̺̹͍̯͚e̠̻̠͜r̨̤͍̺̖͔̖̖d̠̟̭̬̝͟i̦͖̩͓͔̤a̠̗̬͉̙n͚͜ ̻̞̰͚ͅh̵͉i̳̞v̢͇ḙ͎͟-҉̭̩̼͔m̤̭̫i͕͇̝̦n̗͙ḍ̟ ̯̲͕͞ǫ̟̯̰̲͙̻̝f ̪̰̰̗̖̭̘͘c̦͍̲̞͍̩̙ḥ͚a̮͎̟̙͜ơ̩̹͎s̤.̝̝ ҉Z̡̖̜͖̰̣͉̜a͖̰͙̬͡l̲̫̳͍̩g̡̟̼̱͚̞̬ͅo̗͜.̟", - "̦H̬̤̗̤͝e͜ ̜̥̝̻͍̟́w̕h̖̯͓o̝͙̖͎̱̮ ҉̺̙̞̟͈W̷̼̭a̺̪͍į͈͕̭͙̯̜t̶̼̮s̘͙͖̕ ̠̫̠B̻͍͙͉̳ͅe̵h̵̬͇̫͙i̹͓̳̳̮͎̫̕n͟d̴̪̜̖ ̰͉̩͇͙̲͞ͅT͖̼͓̪͢h͏͓̮̻e̬̝̟ͅ ̤̹̝W͙̞̝͔͇͝ͅa͏͓͔̹̼̣l̴͔̰̤̟͔ḽ̫.͕", - "Z̮̞̠͙͔ͅḀ̗̞͈̻̗Ḷ͙͎̯̹̞͓G̻O̭̗̮", + r"Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣", + r"̡͓̞ͅI̗̘̦͝n͇͇͙v̮̫ok̲̫̙͈i̖͙̭̹̠̞n̡̻̮̣̺g̲͈͙̭͙̬͎ ̰t͔̦h̞̲e̢̤ ͍̬̲͖f̴̘͕̣è͖ẹ̥̩l͖͔͚i͓͚̦͠n͖͍̗͓̳̮g͍ ̨o͚̪͡f̘̣̬ ̖̘͖̟͙̮c҉͔̫͖͓͇͖ͅh̵̤̣͚͔á̗̼͕ͅo̼̣̥s̱͈̺̖̦̻͢.̛̖̞̠̫̰", + r"̗̺͖̹̯͓Ṯ̤͍̥͇͈h̲́e͏͓̼̗̙̼̣͔ ͇̜̱̠͓͍ͅN͕͠e̗̱z̘̝̜̺͙p̤̺̹͍̯͚e̠̻̠͜r̨̤͍̺̖͔̖̖d̠̟̭̬̝͟i̦͖̩͓͔̤a̠̗̬͉̙n͚͜ ̻̞̰͚ͅh̵͉i̳̞v̢͇ḙ͎͟-҉̭̩̼͔m̤̭̫i͕͇̝̦n̗͙ḍ̟ ̯̲͕͞ǫ̟̯̰̲͙̻̝f ̪̰̰̗̖̭̘͘c̦͍̲̞͍̩̙ḥ͚a̮͎̟̙͜ơ̩̹͎s̤.̝̝ ҉Z̡̖̜͖̰̣͉̜a͖̰͙̬͡l̲̫̳͍̩g̡̟̼̱͚̞̬ͅo̗͜.̟", + r"̦H̬̤̗̤͝e͜ ̜̥̝̻͍̟́w̕h̖̯͓o̝͙̖͎̱̮ ҉̺̙̞̟͈W̷̼̭a̺̪͍į͈͕̭͙̯̜t̶̼̮s̘͙͖̕ ̠̫̠B̻͍͙͉̳ͅe̵h̵̬͇̫͙i̹͓̳̳̮͎̫̕n͟d̴̪̜̖ ̰͉̩͇͙̲͞ͅT͖̼͓̪͢h͏͓̮̻e̬̝̟ͅ ̤̹̝W͙̞̝͔͇͝ͅa͏͓͔̹̼̣l̴͔̰̤̟͔ḽ̫.͕", + r"Z̮̞̠͙͔ͅḀ̗̞͈̻̗Ḷ͙͎̯̹̞͓G̻O̭̗̮", # Unicode Upsidedown - "˙ɐnbᴉlɐ ɐuƃɐɯ ǝɹolop ʇǝ ǝɹoqɐl ʇn ʇunpᴉpᴉɔuᴉ ɹodɯǝʇ poɯsnᴉǝ op pǝs 'ʇᴉlǝ ƃuᴉɔsᴉdᴉpɐ ɹnʇǝʇɔǝsuoɔ 'ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥", - "00˙Ɩ$-", + r"˙ɐnbᴉlɐ ɐuƃɐɯ ǝɹolop ʇǝ ǝɹoqɐl ʇn ʇunpᴉpᴉɔuᴉ ɹodɯǝʇ poɯsnᴉǝ op pǝs 'ʇᴉlǝ ƃuᴉɔsᴉdᴉpɐ ɹnʇǝʇɔǝsuoɔ 'ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥", + r"00˙Ɩ$-", # Unicode font - "The quick brown fox jumps over the lazy dog", - "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠", - "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌", - "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈", - "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰", - "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘", - "𝚃𝚑𝚎 𝚚𝚞𝚒𝚌𝚔 𝚋𝚛𝚘𝚠𝚗 𝚏𝚘𝚡 𝚓𝚞𝚖𝚙𝚜 𝚘𝚟𝚎𝚛 𝚝𝚑𝚎 𝚕𝚊𝚣𝚢 𝚍𝚘𝚐", - "⒯⒣⒠ ⒬⒰⒤⒞⒦ ⒝⒭⒪⒲⒩ ⒡⒪⒳ ⒥⒰⒨⒫⒮ ⒪⒱⒠⒭ ⒯⒣⒠ ⒧⒜⒵⒴ ⒟⒪⒢", + r"The quick brown fox jumps over the lazy dog", + r"𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠", + r"𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌", + r"𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈", + r"𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰", + r"𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘", + r"𝚃𝚑𝚎 𝚚𝚞𝚒𝚌𝚔 𝚋𝚛𝚘𝚠𝚗 𝚏𝚘𝚡 𝚓𝚞𝚖𝚙𝚜 𝚘𝚟𝚎𝚛 𝚝𝚑𝚎 𝚕𝚊𝚣𝚢 𝚍𝚘𝚐", + r"⒯⒣⒠ ⒬⒰⒤⒞⒦ ⒝⒭⒪⒲⒩ ⒡⒪⒳ ⒥⒰⒨⒫⒮ ⒪⒱⒠⒭ ⒯⒣⒠ ⒧⒜⒵⒴ ⒟⒪⒢", # File paths - "../../../../../../../../../../../etc/passwd%00", - "../../../../../../../../../../../etc/hosts", + r"../../../../../../../../../../../etc/passwd%00", + r"../../../../../../../../../../../etc/hosts", # iOS Vulnerabilities - "Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗", - "🏳0🌈️", + r"Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗", + r"🏳0🌈️", ]