Hoa central
Ustring.php
Go to the documentation of this file.
1 <?php
2 
37 namespace Hoa\Ustring;
38 
39 use Hoa\Core;
40 
53 class Ustring implements \ArrayAccess, \Countable, \IteratorAggregate
54 {
60  const LTR = 0;
61 
67  const RTL = 1;
68 
74  const BOM = 0xfeff;
75 
81  const LRM = 0x200e;
82 
88  const RLM = 0x200f;
89 
95  const LRE = 0x202a;
96 
102  const RLE = 0x202b;
103 
109  const PDF = 0x202c;
110 
116  const LRO = 0x202d;
117 
123  const RLO = 0x202e;
124 
130  const BEGINNING = 1;
131 
137  const END = 2;
138 
144  const WITHOUT_EMPTY = PREG_SPLIT_NO_EMPTY;
145 
152  const WITH_DELIMITERS = PREG_SPLIT_DELIM_CAPTURE;
153 
159  const WITH_OFFSET = 260; // PREG_OFFSET_CAPTURE
160  // | PREG_SPLIT_OFFSET_CAPTURE
161 
167  const GROUP_BY_PATTERN = PREG_PATTERN_ORDER;
168 
174  const GROUP_BY_TUPLE = PREG_SET_ORDER;
175 
181  protected $_string = null;
182 
188  protected $_direction = null;
189 
195  protected static $_collator = null;
196 
197 
198 
205  public function __construct($string = null)
206  {
207  if (false === function_exists('mb_substr')) {
208  throw new Exception(
209  '%s needs the mbstring extension.',
210  0,
211  get_class($this)
212  );
213  }
214 
215  if (null !== $string) {
216  $this->append($string);
217  }
218 
219  return;
220  }
221 
228  public function append($substring)
229  {
230  $this->_string .= $substring;
231 
232  return $this;
233  }
234 
241  public function prepend($substring)
242  {
243  $this->_string = $substring . $this->_string;
244 
245  return $this;
246  }
247 
257  public function pad($length, $piece, $side = self::END)
258  {
259  $difference = $length - $this->count();
260 
261  if (0 >= $difference) {
262  return $this;
263  }
264 
265  $handle = null;
266 
267  for ($i = $difference / mb_strlen($piece) - 1; $i >= 0; --$i) {
268  $handle .= $piece;
269  }
270 
271  $handle .= mb_substr($piece, 0, $difference - mb_strlen($handle));
272 
273  return
274  static::END === $side
275  ? $this->append($handle)
276  : $this->prepend($handle);
277  }
278 
287  public function compare($string)
288  {
289  if (null === $collator = static::getCollator()) {
290  return strcmp($this->_string, (string) $string);
291  }
292 
293  return $collator->compare($this->_string, $string);
294  }
295 
301  public static function getCollator()
302  {
303  if (false === class_exists('Collator')) {
304  return null;
305  }
306 
307  if (null === static::$_collator) {
308  static::$_collator = new \Collator(setlocale(LC_COLLATE, null));
309  }
310 
311  return static::$_collator;
312  }
313 
320  public static function safePattern($pattern)
321  {
322  $delimiter = mb_substr($pattern, 0, 1);
323  $options = mb_substr(
324  mb_strrchr($pattern, $delimiter, false),
325  mb_strlen($delimiter)
326  );
327 
328  if (false === strpos($options, 'u')) {
329  $pattern .= 'u';
330  }
331 
332  return $pattern;
333  }
334 
348  public function match(
349  $pattern,
350  &$matches = null,
351  $flags = 0,
352  $offset = 0,
353  $global = false
354  ) {
355  $pattern = static::safePattern($pattern);
356 
357  if (0 === $flags) {
358  if (true === $global) {
359  $flags = static::GROUP_BY_PATTERN;
360  }
361  } else {
362  $flags &= ~PREG_SPLIT_OFFSET_CAPTURE;
363  }
364 
365 
366  $offset = strlen(mb_substr($this->_string, 0, $offset));
367 
368  if (true === $global) {
369  return preg_match_all(
370  $pattern,
371  $this->_string,
372  $matches,
373  $flags,
374  $offset
375  );
376  }
377 
378  return preg_match($pattern, $this->_string, $matches, $flags, $offset);
379  }
380 
390  public function replace($pattern, $replacement, $limit = -1)
391  {
392  $pattern = static::safePattern($pattern);
393 
394  if (false === is_callable($replacement)) {
395  $this->_string = preg_replace(
396  $pattern,
397  $replacement,
398  $this->_string,
399  $limit
400  );
401  } else {
402  $this->_string = preg_replace_callback(
403  $pattern,
404  $replacement,
405  $this->_string,
406  $limit
407  );
408  }
409 
410  return $this;
411  }
412 
422  public function split(
423  $pattern,
424  $limit = -1,
425  $flags = self::WITHOUT_EMPTY
426  ) {
427  return preg_split(
428  static::safePattern($pattern),
429  $this->_string,
430  $limit,
431  $flags
432  );
433  }
434 
440  public function getIterator()
441  {
442  return new \ArrayIterator(preg_split('#(?<!^)(?!$)#u', $this->_string));
443  }
444 
450  public function toLowerCase()
451  {
452  $this->_string = mb_strtolower($this->_string);
453 
454  return $this;
455  }
456 
462  public function toUpperCase()
463  {
464  $this->_string = mb_strtoupper($this->_string);
465 
466  return $this;
467  }
468 
478  public function toAscii($try = false)
479  {
480  if (0 === preg_match('#[\x80-\xff]#', $this->_string)) {
481  return $this;
482  }
483 
484  $string = $this->_string;
485  $transId =
486  'Any-Latin; ' .
487  '[\p{S}] Name; ' .
488  'Latin-ASCII';
489 
490  if (null !== $transliterator = static::getTransliterator($transId)) {
491  $this->_string = preg_replace_callback(
492  '#\\\N\{([A-Z ]+)\}#u',
493  function (Array $matches) {
494  return '(' . strtolower($matches[1]) . ')';
495  },
496  $transliterator->transliterate($string)
497  );
498 
499  return $this;
500  }
501 
502  if (false === class_exists('Normalizer')) {
503  if (false === $try) {
504  throw new Exception(
505  '%s needs the class Normalizer to work properly, ' .
506  'or you can force a try by using %1$s(true).',
507  1,
508  __METHOD__
509  );
510  }
511 
512  $string = static::transcode($string, 'UTF-8', 'ASCII//IGNORE//TRANSLIT');
513  $this->_string = preg_replace('#(?:[\'"`^](\w))#u', '\1', $string);
514 
515  return $this;
516  }
517 
518  $string = \Normalizer::normalize($string, \Normalizer::NFKD);
519  $string = preg_replace('#\p{Mn}+#u', '', $string);
520  $this->_string = static::transcode($string, 'UTF-8', 'ASCII//IGNORE//TRANSLIT');
521 
522  return $this;
523  }
524 
535  public function transliterate($identifier, $start = 0, $end = null)
536  {
537  if (null === $transliterator = static::getTransliterator($identifier)) {
538  throw new Exception(
539  '%s needs the class Transliterator to work properly.',
540  2,
541  __METHOD__
542  );
543  }
544 
545  $this->_string = $transliterator->transliterate($this->_string, $start, $end);
546 
547  return $this;
548  }
549 
557  public static function getTransliterator($identifier)
558  {
559  if (false === class_exists('Transliterator')) {
560  return null;
561  }
562 
563  return \Transliterator::create($identifier);
564  }
565 
574  public function trim($regex = '\s', $side = 3 /* static::BEGINNING | static::END */)
575  {
576  $regex = '(?:' . $regex . ')+';
577  $handle = null;
578 
579  if (0 !== ($side & static::BEGINNING)) {
580  $handle .= '(^' . $regex . ')';
581  }
582 
583  if (0 !== ($side & static::END)) {
584  if (null !== $handle) {
585  $handle .= '|';
586  }
587 
588  $handle .= '(' . $regex . '$)';
589  }
590 
591  $this->_string = preg_replace('#' . $handle . '#u', '', $this->_string);
592  $this->_direction = null;
593 
594  return $this;
595  }
596 
603  protected function computeOffset($offset)
604  {
605  $length = mb_strlen($this->_string);
606 
607  if (0 > $offset) {
608  $offset = -$offset % $length;
609 
610  if (0 !== $offset) {
611  $offset = $length - $offset;
612  }
613  } elseif ($offset >= $length) {
614  $offset %= $length;
615  }
616 
617  return $offset;
618  }
619 
626  public function offsetGet($offset)
627  {
628  return mb_substr($this->_string, $this->computeOffset($offset), 1);
629  }
630 
638  public function offsetSet($offset, $value)
639  {
640  $head = null;
641  $offset = $this->computeOffset($offset);
642 
643  if (0 < $offset) {
644  $head = mb_substr($this->_string, 0, $offset);
645  }
646 
647  $tail = mb_substr($this->_string, $offset + 1);
648  $this->_string = $head . $value . $tail;
649  $this->_direction = null;
650 
651  return $this;
652  }
653 
660  public function offsetUnset($offset)
661  {
662  return $this->offsetSet($offset, null);
663  }
664 
670  public function offsetExists($offset)
671  {
672  return true;
673  }
674 
682  public function reduce($start, $length = null)
683  {
684  $this->_string = mb_substr($this->_string, $start, $length);
685 
686  return $this;
687  }
688 
694  public function count()
695  {
696  return mb_strlen($this->_string);
697  }
698 
705  public function getByteAt($offset)
706  {
707  $length = strlen($this->_string);
708 
709  if (0 > $offset) {
710  $offset = -$offset % $length;
711 
712  if (0 !== $offset) {
713  $offset = $length - $offset;
714  }
715  } elseif ($offset >= $length) {
716  $offset %= $length;
717  }
718 
719  return $this->_string[$offset];
720  }
721 
727  public function getBytesLength()
728  {
729  return strlen($this->_string);
730  }
731 
739  public function getWidth()
740  {
741  return mb_strwidth($this->_string);
742  }
743 
751  public function getDirection()
752  {
753  if (null === $this->_direction) {
754  if (null === $this->_string) {
755  $this->_direction = static::LTR;
756  } else {
757  $this->_direction = static::getCharDirection(
758  mb_substr($this->_string, 0, 1)
759  );
760  }
761  }
762 
763  return $this->_direction;
764  }
765 
773  public static function getCharDirection($char)
774  {
775  $c = static::toCode($char);
776 
777  if (!(0x5be <= $c && 0x10b7f >= $c)) {
778  return static::LTR;
779  }
780 
781  if (0x85e >= $c) {
782  if (0x5be === $c ||
783  0x5c0 === $c ||
784  0x5c3 === $c ||
785  0x5c6 === $c ||
786  (0x5d0 <= $c && 0x5ea >= $c) ||
787  (0x5f0 <= $c && 0x5f4 >= $c) ||
788  0x608 === $c ||
789  0x60b === $c ||
790  0x60d === $c ||
791  0x61b === $c ||
792  (0x61e <= $c && 0x64a >= $c) ||
793  (0x66d <= $c && 0x66f >= $c) ||
794  (0x671 <= $c && 0x6d5 >= $c) ||
795  (0x6e5 <= $c && 0x6e6 >= $c) ||
796  (0x6ee <= $c && 0x6ef >= $c) ||
797  (0x6fa <= $c && 0x70d >= $c) ||
798  0x710 === $c ||
799  (0x712 <= $c && 0x72f >= $c) ||
800  (0x74d <= $c && 0x7a5 >= $c) ||
801  0x7b1 === $c ||
802  (0x7c0 <= $c && 0x7ea >= $c) ||
803  (0x7f4 <= $c && 0x7f5 >= $c) ||
804  0x7fa === $c ||
805  (0x800 <= $c && 0x815 >= $c) ||
806  0x81a === $c ||
807  0x824 === $c ||
808  0x828 === $c ||
809  (0x830 <= $c && 0x83e >= $c) ||
810  (0x840 <= $c && 0x858 >= $c) ||
811  0x85e === $c) {
812  return static::RTL;
813  }
814  } elseif (0x200f === $c) {
815  return static::RTL;
816  } elseif (0xfb1d <= $c) {
817  if (0xfb1d === $c ||
818  (0xfb1f <= $c && 0xfb28 >= $c) ||
819  (0xfb2a <= $c && 0xfb36 >= $c) ||
820  (0xfb38 <= $c && 0xfb3c >= $c) ||
821  0xfb3e === $c ||
822  (0xfb40 <= $c && 0xfb41 >= $c) ||
823  (0xfb43 <= $c && 0xfb44 >= $c) ||
824  (0xfb46 <= $c && 0xfbc1 >= $c) ||
825  (0xfbd3 <= $c && 0xfd3d >= $c) ||
826  (0xfd50 <= $c && 0xfd8f >= $c) ||
827  (0xfd92 <= $c && 0xfdc7 >= $c) ||
828  (0xfdf0 <= $c && 0xfdfc >= $c) ||
829  (0xfe70 <= $c && 0xfe74 >= $c) ||
830  (0xfe76 <= $c && 0xfefc >= $c) ||
831  (0x10800 <= $c && 0x10805 >= $c) ||
832  0x10808 === $c ||
833  (0x1080a <= $c && 0x10835 >= $c) ||
834  (0x10837 <= $c && 0x10838 >= $c) ||
835  0x1083c === $c ||
836  (0x1083f <= $c && 0x10855 >= $c) ||
837  (0x10857 <= $c && 0x1085f >= $c) ||
838  (0x10900 <= $c && 0x1091b >= $c) ||
839  (0x10920 <= $c && 0x10939 >= $c) ||
840  0x1093f === $c ||
841  0x10a00 === $c ||
842  (0x10a10 <= $c && 0x10a13 >= $c) ||
843  (0x10a15 <= $c && 0x10a17 >= $c) ||
844  (0x10a19 <= $c && 0x10a33 >= $c) ||
845  (0x10a40 <= $c && 0x10a47 >= $c) ||
846  (0x10a50 <= $c && 0x10a58 >= $c) ||
847  (0x10a60 <= $c && 0x10a7f >= $c) ||
848  (0x10b00 <= $c && 0x10b35 >= $c) ||
849  (0x10b40 <= $c && 0x10b55 >= $c) ||
850  (0x10b58 <= $c && 0x10b72 >= $c) ||
851  (0x10b78 <= $c && 0x10b7f >= $c)) {
852  return static::RTL;
853  }
854  }
855 
856  return static::LTR;
857  }
858 
874  public static function getCharWidth($char)
875  {
876  $char = (string) $char;
877  $c = static::toCode($char);
878 
879  // Test for 8-bit control characters.
880  if (0x0 === $c) {
881  return 0;
882  }
883 
884  if (0x20 > $c || (0x7f <= $c && $c < 0xa0)) {
885  return -1;
886  }
887 
888  // Non-spacing characters.
889  if (0xad !== $c &&
890  0 !== preg_match('#^[\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11ff}\x{200b}]#u', $char)) {
891  return 0;
892  }
893 
894  // If we arrive here, $c is not a combining C0/C1 control character.
895  return 1 +
896  (0x1100 <= $c &&
897  (0x115f >= $c || // Hangul Jamo init. consonants
898  0x2329 === $c || 0x232a === $c ||
899  (0x2e80 <= $c && 0xa4cf >= $c &&
900  0x303f !== $c) || // CJK…Yi
901  (0xac00 <= $c && 0xd7a3 >= $c) || // Hangul Syllables
902  (0xf900 <= $c && 0xfaff >= $c) || // CJK Compatibility Ideographs
903  (0xfe10 <= $c && 0xfe19 >= $c) || // Vertical forms
904  (0xfe30 <= $c && 0xfe6f >= $c) || // CJK Compatibility Forms
905  (0xff00 <= $c && 0xff60 >= $c) || // Fullwidth Forms
906  (0xffe0 <= $c && 0xffe6 >= $c) ||
907  (0x20000 <= $c && 0x2fffd >= $c) ||
908  (0x30000 <= $c && 0x3fffd >= $c)));
909  }
910 
917  public static function isCharPrintable($char)
918  {
919  return 1 <= static::getCharWidth($char);
920  }
921 
928  public static function fromCode($code)
929  {
930  return mb_convert_encoding(
931  '&#x' . dechex($code) . ';',
932  'UTF-8',
933  'HTML-ENTITIES'
934  );
935  }
936 
943  public static function toCode($char)
944  {
945  $char = (string) $char;
946  $code = ord($char[0]);
947  $bytes = 1;
948 
949  if (!($code & 0x80)) {// 0xxxxxxx
950  return $code;
951  }
952 
953  if (($code & 0xe0) === 0xc0) { // 110xxxxx
954  $bytes = 2;
955  $code = $code & ~0xc0;
956  } elseif (($code & 0xf0) == 0xe0) { // 1110xxxx
957  $bytes = 3;
958  $code = $code & ~0xe0;
959  } elseif (($code & 0xf8) === 0xf0) { // 11110xxx
960  $bytes = 4;
961  $code = $code & ~0xf0;
962  }
963 
964  for ($i = 2; $i <= $bytes; $i++) {// 10xxxxxx
965  $code = ($code << 6) + (ord($char[$i - 1]) & ~0x80);
966  }
967 
968  return $code;
969  }
970 
977  public static function toBinaryCode($char)
978  {
979  $char = (string) $char;
980  $out = null;
981 
982  for ($i = 0, $max = strlen($char); $i < $max; ++$i) {
983  $out .= vsprintf('%08b', ord($char[$i]));
984  }
985 
986  return $out;
987  }
988 
997  public static function transcode($string, $from, $to = 'UTF-8')
998  {
999  return iconv($from, $to, $string);
1000  }
1001 
1008  public static function isUtf8($string)
1009  {
1010  return (bool) preg_match('##u', $string);
1011  }
1012 
1018  public function copy()
1019  {
1020  return clone $this;
1021  }
1022 
1028  public function __toString()
1029  {
1030  return $this->_string;
1031  }
1032 }
1033 
1037 Core\Consistency::flexEntity('Hoa\Ustring\Ustring');
static toCode($char)
Definition: Ustring.php:943
static getCharWidth($char)
Definition: Ustring.php:874
static getCollator()
Definition: Ustring.php:301
toAscii($try=false)
Definition: Ustring.php:478
split($pattern, $limit=-1, $flags=self::WITHOUT_EMPTY)
Definition: Ustring.php:422
pad($length, $piece, $side=self::END)
Definition: Ustring.php:257
reduce($start, $length=null)
Definition: Ustring.php:682
offsetSet($offset, $value)
Definition: Ustring.php:638
static isUtf8($string)
Definition: Ustring.php:1008
static getTransliterator($identifier)
Definition: Ustring.php:557
static fromCode($code)
Definition: Ustring.php:928
offsetUnset($offset)
Definition: Ustring.php:660
replace($pattern, $replacement, $limit=-1)
Definition: Ustring.php:390
append($substring)
Definition: Ustring.php:228
match($pattern, &$matches=null, $flags=0, $offset=0, $global=false)
Definition: Ustring.php:348
offsetGet($offset)
Definition: Ustring.php:626
const GROUP_BY_PATTERN
Definition: Ustring.php:167
offsetExists($offset)
Definition: Ustring.php:670
trim($regex= '\s', $side=3)
Definition: Ustring.php:574
computeOffset($offset)
Definition: Ustring.php:603
static transcode($string, $from, $to= 'UTF-8')
Definition: Ustring.php:997
getByteAt($offset)
Definition: Ustring.php:705
transliterate($identifier, $start=0, $end=null)
Definition: Ustring.php:535
static safePattern($pattern)
Definition: Ustring.php:320
compare($string)
Definition: Ustring.php:287
static isCharPrintable($char)
Definition: Ustring.php:917
__construct($string=null)
Definition: Ustring.php:205
prepend($substring)
Definition: Ustring.php:241
static toBinaryCode($char)
Definition: Ustring.php:977
static getCharDirection($char)
Definition: Ustring.php:773