CREATE TABLE `state` (
`id` tinyint(4) NOT NULL auto_increment,
`name` char(50) NOT NULL default '',
`abbreviation` char(2) NOT NULL default '',
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=52 DEFAULT CHARSET=utf8;
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('1','Alabama','AL');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('2','Alaska','AK');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('3','Arizona','AZ');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('4','Arkansas','AR');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('5','California','CA');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('6','Colorado','CO');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('7','Connecticut','CT');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('8','Delaware','DE');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('9','District of Columbia','DC');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('10','Florida','FL');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('11','Georgia','GA');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('12','Hawaii','HI');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('13','Idaho','ID');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('14','Illinois','IL');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('15','Indiana','IN');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('16','Iowa','IA');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('17','Kansas','KS');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('18','Kentucky','KY');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('19','Louisiana','LA');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('20','Maine','ME');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('21','Maryland','MD');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('22','Massachusetts','MA');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('23','Michigan','MI');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('24','Minnesota','MN');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('25','Mississippi','MS');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('26','Missouri','MO');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('27','Montana','MT');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('28','Nebraska','NE');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('29','Nevada','NV');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('30','New Hampshire','NH');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('31','New Jersey','NJ');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('32','New Mexico','NM');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('33','New York','NY');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('34','North Carolina','NC');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('35','North Dakota','ND');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('36','Ohio','OH');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('37','Oklahoma','OK');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('38','Oregon','OR');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('39','Pennsylvania','PA');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('40','Rhode Island','RI');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('41','South Carolina','SC');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('42','South Dakota','SD');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('43','Tennessee','TN');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('44','Texas','TX');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('45','Utah','UT');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('46','Vermont','VT');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('47','Virginia','VA');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('48','Washington','WA');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('49','West Virginia','WV');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('50','Wisconsin','WI');
INSERT INTO `state` (`id`,`name`,`abbreviation`) VALUES ('51','Wyoming','WY');
Tag Archive for utf-8
50 States & Washington D.C.
Convert text ISO to UTF-8
iconv --from-code=ISO-8859-1 --to-code=UTF-8 iso.txt > utf.txt
Category: Uncategorized |
Tags: utf-8
Cut a Long String to a moderate Display String in UTF-8 chracter
//æ“·å–å—串å‰å¹¾å€‹å—並é¿å…截掉åŠå€‹ä¸æ–‡å—,$strlenè¦æ“·å–çš„å—串長度(ä»¥è‹±æ–‡å—æ¯æ•¸è¨ˆç®—ï¼Œä¸æ–‡å—éœ€ç®—äºŒå€‹å—æ•¸)
//æ¤è™•直接傳入從資料庫讀出之UTF-8編碼å—串
function CuttingStr($str, $strlen) {
//把' '先轉æˆç©ºç™½
$str = str_replace(' ', ' ', $str);
$output_str_len = 0; //累計è¦è¼¸å‡ºçš„æ“·å–å—串長度
$output_str = ''; //è¦è¼¸å‡ºçš„æ“·å–å—串
//é€ä¸€è®€å‡ºåŽŸå§‹å—串æ¯ä¸€å€‹å—å…ƒ
for($i=0; $i<strlen($str);$i++){
//æ“·å–å—æ•¸å·²é”åˆ°è¦æ“·å–çš„å—串長度,跳出回圈
if($output_str_len >= $strlen){
break;
}
//å–å¾—ç›®å‰å—元的ASCII碼
$str_bit = ord(substr($str, $i, 1));
if($str_bit < 128) {
//ASCIIç¢¼å°æ–¼ 128 為英文或數å—å—符
$output_str_len += 1; //累計è¦è¼¸å‡ºçš„æ“·å–å—ä¸²é•·åº¦ï¼Œè‹±æ–‡å—æ¯ç®—䏀個嗿•¸
$output_str .= substr($str, $i, 1); //è¦è¼¸å‡ºçš„æ“·å–å—串
}elseif($str_bit > 191 && $str_bit < 224) {
//第一å—ç¯€ç‚ºè½æ–¼192~223çš„utf8çš„ä¸æ–‡å—(è¡¨ç¤ºè©²ä¸æ–‡ç‚ºç”±2個å—節所組æˆutf8䏿–‡å—)
$output_str_len += 2; //累計è¦è¼¸å‡ºçš„æ“·å–å—ä¸²é•·åº¦ï¼Œä¸æ–‡å—éœ€ç®—äºŒå€‹å—æ•¸
$output_str .= substr($str, $i, 2); //è¦è¼¸å‡ºçš„æ“·å–å—串
$i++;
}elseif($str_bit > 223 && $str_bit < 240) {
//第一å—ç¯€ç‚ºè½æ–¼223~239çš„utf8çš„ä¸æ–‡å—(è¡¨ç¤ºè©²ä¸æ–‡ç‚ºç”±3個å—節所組æˆçš„utf8䏿–‡å—)
$output_str_len += 2; //累計è¦è¼¸å‡ºçš„æ“·å–å—ä¸²é•·åº¦ï¼Œä¸æ–‡å—éœ€ç®—äºŒå€‹å—æ•¸
$output_str .= substr($str, $i, 3); //è¦è¼¸å‡ºçš„æ“·å–å—串
$i+=2;
}elseif($str_bit > 239 && $str_bit < 248) {
//第一å—ç¯€ç‚ºè½æ–¼240~247çš„utf8çš„ä¸æ–‡å—(è¡¨ç¤ºè©²ä¸æ–‡ç‚ºç”±4個å—節所組æˆçš„utf8䏿–‡å—)
$output_str_len += 2; //累計è¦è¼¸å‡ºçš„æ“·å–å—ä¸²é•·åº¦ï¼Œä¸æ–‡å—éœ€ç®—äºŒå€‹å—æ•¸
$output_str .= substr($str, $i, 4); //è¦è¼¸å‡ºçš„æ“·å–å—串
$i+=3;
}
}
//è¦è¼¸å‡ºçš„æ“·å–å—串為空白時,輸出原始å—串
return ($output_str == '') ? $str : $output_str;
}
PHP æ–‡å—コード判定
function detect_encoding_ja( $str )
{
$enc = @mb_detect_encoding( $str, 'ASCII,JIS,eucJP-win,SJIS-win,UTF-8' );
switch ( $enc ) {
case FALSE :
case 'ASCII' :
case 'JIS' :
case 'UTF-8' : break;
case 'eucJP-win' :
// ã“ã“ã§ eucJP-win を検出ã—ãŸå ´åˆã€eucJP-win ã¨ã—ã¦åˆ¤å®š
if ( @mb_detect_encoding( $str, 'SJIS-win,UTF-8,eucJP-win' ) === 'eucJP-win' ) {
break;
}
$_hint = "¥xbf¥xfd" . $str; // "¥xbf¥xfd" : EUC-JP "雀"
// EUC-JP -> UTF-8 å¤‰æ›æ™‚ã«ãƒžãƒƒãƒ”ングãŒå¤‰æ›´ã•れる文å—を削除( ≒ ≡ ∫ ãªã©)
mb_regex_encoding( 'EUC-JP' );
$_hint = mb_ereg_replace( "Â¥xad(?:Â¥xe2|Â¥xf5|Â¥xf6|Â¥xf7|Â¥xfa|Â¥xfb|Â¥xfc|Â¥xf0|Â¥xf1|Â¥xf2)"
$_tmp = mb_convert_encoding( $_hint, 'UTF-8', 'eucJP-win' );
$_tmp2 = mb_convert_encoding( $_tmp, 'eucJP-win', 'UTF-8' );
if ( $_tmp2 === $_hint ) {
// 例外処ç†( EUC-JP 以外ã¨èªè˜ã™ã‚‹ç¯„囲 )
if (
// SJIS ã¨é‡ãªã‚‹ç¯„囲(2ãƒã‚¤ãƒˆ|3ãƒã‚¤ãƒˆ|iモード絵文å—|1ãƒã‚¤ãƒˆæ–‡å—)
! preg_match( '/^(?:'
. '[Â¥x8EÂ¥xE0-Â¥xE9][Â¥x80-Â¥xFC]|Â¥xEA[Â¥x80-Â¥xA4]|'
. 'Â¥x8F[Â¥xB0-Â¥xEF][Â¥xE0-Â¥xEF][Â¥x40-Â¥x7F]|'
. 'Â¥xF8[Â¥x9F-Â¥xFC]|Â¥xF9[Â¥x40-Â¥x49Â¥x50-Â¥x52Â¥x55-Â¥x57Â¥x5B-Â¥x5EÂ¥x72-Â¥x7EÂ¥x80
. '[Â¥x00-Â¥x7E]'
. ')+$/', $str ) &&
// UTF-8 ã¨é‡ãªã‚‹ç¯„囲(全角英数å—|æ¼¢å—|1ãƒã‚¤ãƒˆæ–‡å—)
! preg_match( '/^(?:'
. 'Â¥xEFÂ¥xBC[Â¥xA1-Â¥xBA]|[Â¥x00-Â¥x7E]|'
. '[Â¥xE4-Â¥xE9][Â¥x8E-Â¥x8FÂ¥xA1-Â¥xBF][Â¥x8FÂ¥xA0-Â¥xEF]|'
. '[Â¥x00-Â¥x7E]'
. ')+$/', $str )
) {
// æ¡ä»¶å¼ã®ç¯„囲ã«å…¥ã‚‰ãªã‹ã£ãŸå ´åˆã¯ã€eucJP-win ã¨ã—ã¦æ¤œå‡º
break;
}
// 例外処ç†2(一部ã®é »åº¦ã®å¤šãã†ãªç†Ÿèªžã¯ eucJP-win ã¨ã—ã¦åˆ¤å®š)
// (çˆç²|ç¥ç€|瑪瑙|癇癪|碼碯|耄碌|膀胱|è’Ÿè’»|薔薇|蜻蛉)
if ( mb_ereg( '^(?:'
. 'Â¥xE0Â¥xDDÂ¥xE0Â¥xEA|Â¥xE0Â¥xE8Â¥xE0Â¥xE1|Â¥xE0Â¥xF5Â¥xE0Â¥xEF|Â¥xE1Â¥xF2Â¥xE1Â¥xFB|'
. 'Â¥xE2Â¥xFBÂ¥xE2Â¥xF5|Â¥xE6Â¥xCEÂ¥xE2Â¥xF1|Â¥xE7Â¥xAFÂ¥xE6Â¥xF9|Â¥xE8Â¥xE7Â¥xE8Â¥xEA|'
. 'Â¥xE9Â¥xACÂ¥xE9Â¥xAF|Â¥xE9Â¥xF1Â¥xE9Â¥xD9|[Â¥x00-Â¥x7E]'
. ')+$', $str )
) {
break;
}
}
default :
// ã“ã“ã§ SJIS-win ã¨åˆ¤æ–ã•れãŸå ´åˆã¯ã€æ–‡å—コード㯠SJIS-win ã¨ã—ã¦åˆ¤å®š
$enc = @mb_detect_encoding( $str, 'UTF-8,SJIS-win' );
if ( $enc === 'SJIS-win' ) {
break;
}
// デフォルトã¨ã—㦠SJIS-win ã‚’è¨å®š
$enc = 'SJIS-win';
$_hint = "¥xe9¥x9b¥x80" . $str; // "¥xe9¥x9b¥x80" : UTF-8 "雀"
// å¤‰æ›æ™‚ã«ãƒžãƒƒãƒ”ングãŒå¤‰æ›´ã•れる文å—を調整
mb_regex_encoding( 'UTF-8' );
$_hint = mb_ereg_replace( "¥xe3¥x80¥x9c", "¥xef¥xbd¥x9e", $_hint );
$_hint = mb_ereg_replace( "Â¥xe2Â¥x88Â¥x92", "Â¥xe3Â¥x83Â¥xbc", $_hint );
$_hint = mb_ereg_replace( "Â¥xe2Â¥x80Â¥x96", "Â¥xe2Â¥x88Â¥xa5", $_hint );
$_tmp = mb_convert_encoding( $_hint, 'SJIS-win', 'UTF-8' );
$_tmp2 = mb_convert_encoding( $_tmp, 'UTF-8', 'SJIS-win' );
if ( $_tmp2 === $_hint ) {
$enc = 'UTF-8';
}
// UTF-8 㨠SJIS 2æ–‡å—ãŒé‡ãªã‚‹ç¯„囲ã¸ã®å¯¾å‡¦(SJIS を優先)
if ( preg_match( '/^(?:[Â¥xE4-Â¥xE9][Â¥x80-Â¥xBF][Â¥x80-Â¥x9F][Â¥x00-Â¥x7F])+/', $str ) ) {
$enc = 'SJIS-win';
}
}
return $enc;
}
iconv conversion
require 'iconv'
module PermalinkFu
class << self
attr_accessor :translation_to
attr_accessor :translation_from
def escape(str)
s = Iconv.iconv(translation_to, translation_from, str).to_s
s.gsub!(/W+/, ' ') # all non-word chars to spaces
s.strip! # ohh la la
s.downcase! #
s.gsub!(/ +/, '-') # spaces to dashes, preferred separator char everywhere
s
end
end
def has_permalink(attr_name, permalink_field = nil)
permalink_field ||= 'permalink'
after_validation { |record| record.send("#{permalink_field}=", PermalinkFu.escape(record.send(attr_name).to_s)) if record.send(permalink_field).to_s.empty? }
end
end
PermalinkFu.translation_to = 'ascii//ignore//translit'
PermalinkFu.translation_from = 'utf-8'