1 <?php 2 3 declare(strict_types=1); 4 5 namespace Phpml\Tokenization; 6 7 use Phpml\Exception\InvalidArgumentException; 8 9 class WhitespaceTokenizer implements Tokenizer 10 { 11 public function tokenize(string $text): array 12 { 13 $substrings = preg_split('/[\pZ\pC]+/u', $text, -1, PREG_SPLIT_NO_EMPTY); 14 if ($substrings === false) { 15 throw new InvalidArgumentException('preg_split failed on: '.$text); 16 } 17 18 return $substrings; 19 } 20 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body