Data Repair Attempted by setting constructor "$repair" to TRUE
1...5...10...15..
The Æ AE Ligature
5662c244246667776
485060150c9714525
1...5...10...15...
The Æ AE Ligature
5662c8244246667776
4850360150c9714525
UTF8 Object
(
[error] =>
[bytes] => 18
[chars] => 17
[str] => The Æ AE Ligature
)
1...5...10...15...20...25...3
Accented "a" à in this string
4666676622622e266276672777666
1335e45402120009e0489303429e7
1...5...10...15...20...25...30
Accented "a" Ã in this string
4666676622622ca266276672777666
1335e454021203009e0489303429e7
UTF8 Object
(
[error] =>
[bytes] => 30
[chars] => 29
[str] => Accented "a" Ã in this string
)
1...5...10...15
Euro € in text
247762826627677
0552f0009e04584
1...5...10...15..
Euro € in text
247762e8a26627677
0552f022c09e04584
UTF8 Object
(
[error] =>
[bytes] => 17
[chars] => 15
[str] => Euro € in text
)
<?php // classes/demo_UTF8_renee.php
/**
* See: https://www.phpclasses.org/post_forum_message.html?package=11059&message=40244
*
* This script uses class_UTF8 to determine if a string is UTF-8 compatible.
*
* The constructor receives a string and returns an object containing the
* string and a validity indicator. If the string fails UTF-8 validation,
* the offset location of the failures will be provided in an array in the
* "error" property.
*
* The class can also attempt to repair damaged encodings, but the outcome
* of repairs is less certain. PHP converts extended ASCII into UTF-8 by
* putting hex C0 in front of the extended ASCII characters, thus not all
* conversions will be correct (eg: The UTF8 Euro symbol is three bytes).
*
*/
error_reporting(E_ALL);
require_once('class_UTF8.php');
echo '<meta charset="utf-8" />';
echo '<pre>';
// Some test data
$arr =
[ 'The ' . chr(0xC6) . ' AE Ligature'
, 'Accented "a" ' . chr(0xE0) . ' in this string'
, ' Euro ' . chr(0x80) . ' in text'
];
echo '<h3>Data Repair Attempted by setting constructor "$repair" to TRUE</h3>';
foreach ($arr as $str) {
hexdump($str);
echo PHP_EOL;
$obj = new UTF8($str, TRUE); // Note the second parameter to the constructor
hexdump($obj->str);
print_r($obj);
echo PHP_EOL;
}
highlight_file(__FILE__);
// Unrelated utility function to show us the hex byte values
function hexdump($str, $br=PHP_EOL)
{
if (empty($str)) return FALSE;
// Get the hex byte values in a string
$hex = str_split(implode(NULL, unpack('H*', $str)));
// Allocate bytes into hi and lo nibbles
$hi = NULL;
$lo = NULL;
$mod = 0;
foreach ($hex as $nib)
{
$mod++;
$mod = $mod % 2;
if ($mod) {
$hi .= $nib;
}
else {
$lo .= $nib;
}
}
// Show the scale, the string and the hex
$num = substr('1...5...10...15...20...25...30...35...40...45...50...55...60...65...70...75...80...85...90...95..100..105..110..115..120..125..130', 0, strlen($str));
echo $br . $num;
echo $br . $str;
echo $br . $hi;
echo $br . $lo;
echo $br;
}