PHPで文字列をCP1252、UTF8エンコードするサンプル

phpコード:
<?php

function isUTF8($str) {   //UTF8の文字列を判定
return preg_match('/^([\x09\x0A\x0D\x20-\x7E]|[\xC2][\xA0-\xBF]|[\xC3-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$/’, $str);
}

function isISO88591($str) { //ISO88591の文字列を判定
return preg_match('/^([\x09\x0A\x0D\x20-\x7E\xA0-\xFF])*$/’, $str);
}

function isCP1252($str) { //CP1252の文字列を判定
return preg_match('/^([\x09\x0A\x0D\x20-\x7E\x80\x82-\x8C\x8E\x91-\x9C\x9E-\xFF])*$/’, $str);
}

// コードポイントからUTF8エンコードされた文字列を生成
function utf8Char($codePoint){
$char = ";
if ($codePoint < 0){
return false;
} elseif ($codePoint <= 0x007f) {
$char .= chr($codePoint);
} elseif ($codePoint <= 0x07ff) {
$char .= chr(0xc0 | ($codePoint >> 6));
$char .= chr(0x80 | ($codePoint & 0x003f));
} elseif ($codePoint == 0xFEFF) {
// nop — zap the BOM
} elseif ($codePoint >= 0xD800 && $codePoint <= 0xDFFF) {
// found a surrogate
return false;
} elseif($codePoint <= 0xffff) {
$char .= chr(0xe0 | ($codePoint >> 12));
$char .= chr(0x80 | (($codePoint >> 6) & 0x003f));
$char .= chr(0x80 | ($codePoint & 0x003f));
} elseif($codePoint <= 0x10ffff) {
$char .= chr(0xf0 | ($codePoint >> 18));
$char .= chr(0x80 | (($codePoint >> 12) & 0x3f));
$char .= chr(0x80 | (($codePoint >> 6) & 0x3f));
$char .= chr(0x80 | ($codePoint & 0x3f));
} else {
// out of range
return false;
}
return $char;
}

// Callback function for utf8FromCP1252()
function utf8FromCP1252Char($char) {
$utf8CodePoint = array(
128 => 0x20AC,
129 => ",
130 => 0x201A,
131 => 0x0192,
132 => 0x201E,
133 => 0x2026,
134 => 0x2020,
135 => 0x2021,
136 => 0x02C6,
137 => 0x2030,
138 => 0x0160,
139 => 0x2039,
140 => 0x0152,
141 => ",
142 => 0x017D,
143 => ",
144 => ",
145 => 0x2018,
146 => 0x2019,
147 => 0x201C,
148 => 0x201D,
149 => 0x2022,
150 => 0x2013,
151 => 0x2014,
152 => 0x02DC,
153 => 0x2122,
154 => 0x0161,
155 => 0x203A,
156 => 0x0153,
157 => ",
158 => 0x017E,
159 => 0x0178);
$cp1252CodePoint = ord($char);
return utf8Char($utf8CodePoint[$cp1252CodePoint]);
}

// 文字列がWindows-1252からUTF-8に変換
function utf8FromCP1252($string) {
if (isCP1252($string)) {
$utf8String = utf8_encode($string);
return preg_replace_callback('|\xC2([\x80\x82-\x8C\x8E\x91-\x9C\x9E\x9F])|’, create_function('$s’,’return utf8FromCP1252Char($s[1]);’), $utf8String);
} else {
return ";
}
}
?>

PHP

Posted by arkgame