My favorites
▼
|
Sign in
sean
Sean's File
Project Home
Downloads
Wiki
Issues
Source
Checkout
Browse
Changes
Source path:
svn
/
trunk
/
Perl
/
Win32Encode.pm
‹r25
r26
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
package Win32Encode;
use Encode qw(encode decode);
use Win32::API;
use utf8;
use strict;
use Exporter;
use vars qw(
$VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS
%cp $API_MultiByteToWideChar $API_WideCharToMultiByte $API_LCMapString
);
$VERSION = '0.01';
@ISA = qw(Exporter);
@EXPORT = qw(ToUnicode ToANSI ToCHS ToCHT ToHTML);
@EXPORT_OK = qw(ToUnicode ToANSI ToCHS ToCHT ToHTML);
use constant LOCALE_SYSTEM_DEFAULT => 0x0800;
use constant LANG_CHINESE_SIMPLIFIED => 0x0804;
use constant LANG_CHINESE_TRADITIONAL => 0x0404;
use constant LCMAP_SIMPLIFIED_CHINESE => 0x02000000;
use constant LCMAP_TRADITIONAL_CHINESE => 0x04000000;
%cp = qw( EBCDICCPUS 37 437 437 CP437 437 CSPC8 437 CODEPAGE437 437 IBM437 437 ASMO708 708 DOS720 720 IBM737 737 CP500 775 IBM775 775 IBM850 850 IBM852 852 CP852 852 IBM857 857 IBM861 861 DOS862 862 CP866 866 IBM866 866 IBM869 869 CP870 870 ISO885911 874 DOS874 874 WINDOWS874 874 TIS620 874 XEBCDICGREEKMODERN 875 MSKANJI 932 CSSHIFTJIS 932 CSWINDOWS31J 932 XMSCP932 932 SHIFTJIS 932 XSJIS 932 CSISO58GB231280 936 GBK 936 GB231280 936 CSGB2312 936 CHINESE 936 CNGB 936 ISOIR58 936 GB2312 936 CSGB231280 936 KSC5601 949 KOREAN 949 KSC56011989 949 CSKSC56011987 949 KSC56011987 949 ISOIR149 949 CSBIG5 950 BIG5 950 XXBIG5 950 CNBIG5 950 CP1026 1026 XEBCDICCPUSEURO 1140 XEBCDICGERMANYEURO 1141 XEBCDICDENMARKNORWAYEURO 1142 XEBCDICFINLANDSWEDENEURO 1143 XEBCDICFRANCE 1143 XEBCDICITALYEURO 1144 XEBCDICSPAINEURO 1145 XEBCDICUKEURO 1146 XEBCDICFRANCEEURO 1147 XEBCDICINTERNATIONALEURO 1148 XEBCDICICELANDICEURO 1149 UNICODE 1200 UTF16 1200 UNICODEFFFE 1201 WINDOWS1250 1250 XCP1250 1250 WINDOWS1251 1251 XCP1251 1251 ISO646US 1252 WINDOWS1252 1252 ISO646IRV1991 1252 USASCII 1252 IBM819 1252 LATIN1 1252 CP819 1252 XANSI 1252 ANSIX341986 1252 ISOIR6 1252 ISO885911987 1252 ASCII 1252 ISOIR100 1252 US 1252 ANSIX341968 1252 CSASCII 1252 CP367 1252 ISO88591 1252 IBM367 1252 WINDOWS1253 1253 WINDOWS1254 1254 ISOIR148 1254 ISO88599 1254 LATIN5 1254 ISO885991989 1254 ISO88598I 1255 VISUAL 1255 WINDOWS1255 1255 ISO88598 1255 WINDOWS1256 1256 CP1256 1256 WINDOWS1257 1257 WINDOWS1258 1258 JOHAB 1361 MACINTOSH 10000 XMACJAPANESE 10001 XMACCHINESETRAD 10002 XMACKOREAN 10003 XMACARABIC 10004 XMACHEBREW 10005 XMACGREEK 10006 XMACCYRILLIC 10007 XMACCHINESESIMP 10008 XMACCE 10029 XMACICELANDIC 10079 XMACTURKISH 10081 XCHINESECNS 20000 XCHINESEETEN 20002 XIA5 20105 XIA5GERMAN 20106 XIA5SWEDISH 20107 XIA5NORWEGIAN 20108 ISOIR6US 20127 XEBCDICGERMANY 20273 XEBCDICDENMARKNORWAY 20277 XEBCDICFINLANDSWEDEN 20278 XEBCDICITALY 20280 XEBCDICSPAIN 20284 XEBCDICUK 20285 XEBCDICJAPANESEKATAKANA 20290 XEBCDICARABIC 20420 XEBCDICGREEK 20423 XEBCDICHEBREW 20424 XEBCDICKOREANEXTENDED 20833 XEBCDICTHAI 20838 CSKOI8R 20866 KOI 20866 KOI8 20866 KOI8R 20866 XEBCDICICELANDIC 20871 XEBCDICCYRILLICRUSSIAN 20880 XEBCDICTURKISH 20905 XEBCDICCYRILLICSERBIANBULGARIAN 21025 KOI8U 21866 KOI8RU 21866 L1 28591 CSISO 28591 L2 28592 ISOIR101 28592 ISO88592 28592 CSISOLATIN2 28592 LATIN2 28592 ISO885921987 28592 ISO88593 28593 ISO885931988 28593 ISOIR109 28593 LATIN3 28593 L3 28593 LATIN4 28594 ISO885941988 28594 ISOIR110 28594 L4 28594 ISO88594 28594 CSISOLATIN4 28594 ISO88595 28595 CYRILLIC 28595 ISOIR144 28595 CSISOLATIN5 28595 CSISOLATINCYRILLIC 28595 ISO885951988 28595 ECMA114 28596 ISO885961987 28596 ARABIC 28596 ISOIR127 28596 ISO88596 28596 CSISOLATINARABIC 28596 GREEK8 28597 ELOT928 28597 ISO885971987 28597 CSISOLATINGREEK 28597 ISO88597 28597 ECMA118 28597 ISOIR126 28597 GREEK 28597 HEBREW 28598 ISO885981988 28598 CSISOLATINHEBREW 28598 ISOIR138 28598 L5 28599 L9 28605 LATIN9 28605 ISO885915 28605 XEUROPA 29001 LOGICAL 38598 ISO2022JP 50221 CSISO2022JP 50221 CSISO2022KR 50225 ISO2022KR 50225 XEBCDICJAPANESEANDKANA 50930 XEBCDICJAPANESEANDUSCANADA 50931 XEBCDICKOREANANDKOREANEXTENDED 50933 XEBCDICSIMPLIFIEDCHINESE 50935 XEBCDICTRADITIONALCHINESE 50937 XEBCDICJAPANESEANDJAPANESELATIN 50939 XEUC 51932 EUCJP 51932 XEUCJP 51932 EXTENDEDUNIXCODEPACKEDFORMATFORJAPANESE 51932 CSEUCPKDFMTJAPANESE 51932 EUCCN 51936 XEUCCN 51936 EUCKR 51949 CSEUCKR 51949 HZGB2312 52936 XISCIIDE 57002 XISCIIBE 57003 XISCIITA 57004 XISCIITE 57005 XISCIIAS 57006 XISCIIOR 57007 XISCIIKA 57008 XISCIIMA 57009 XISCIIGU 57010 XISCIIPA 57011 CSUNICODE11UTF7 65000 XUNICODE20UTF7 65000 UNICODE11UTF7 65000 UTF7 65000 XUNICODE20UTF8 65001 UTF8 65001 UNICODE20UTF8 65001 UNICODE11UTF8 65001);
# 使用 Windows API 互轉 ANSI 字元及 Perl Unicode 字串
#
#
# 原始碼以 utf-8 儲存時
# $ansi = ToANSI("中文字ABC\n"); # 相當於 encode("Big5","中文ABC") 傳回的 Big5 byte 型式
# $unicode = ToUnicode($ansi); # 轉成 Perl Unicode 字串
#
# 字元集參數預設為 CP_ACP(0) 即系統預設語言
# Perl 的 Unicode 字串正體中文轉簡體中文
sub ToCHS {
my $source = ToANSI(shift,'GBK');
my $len = length($source);
my $target = "\0" x $len;
$API_LCMapString = new Win32::API('kernel32','LCMapString','NNPNPN','N') unless defined $API_LCMapString;
my $ret = $API_LCMapString->Call(LANG_CHINESE_SIMPLIFIED,LCMAP_SIMPLIFIED_CHINESE,$source,$len,$target,$len);
return undef unless $ret;
return(ToUnicode($target,'GBK'));
}
# Perl 的 Unicode 字串簡體中文轉正體中文
sub ToCHT {
my $source = ToANSI(shift,'GBK');
my $len = length($source);
my $target = "\0" x $len;
$API_LCMapString = new Win32::API('kernel32','LCMapString','NNPNPN','N') unless defined $API_LCMapString;
my $ret = $API_LCMapString->Call(LANG_CHINESE_SIMPLIFIED,LCMAP_TRADITIONAL_CHINESE,$source,$len,$target,$len);
return undef unless $ret;
return(ToUnicode($target,'GBK'));
}
# 轉成多位元字元 (Perl 的 Unicode字串,[字元集])
sub ToHTML {
my($u8,$codepage) = @_;
$codepage ||=0;
unless($codepage =~ /^\d+$/) {
$codepage = uc($codepage);
$codepage =~ s/[^A-Z0-9]//g;
$codepage = (exists $cp{$codepage})?$cp{$codepage}:0;
}
my($u_len,$i,$u,$a);
$u_len = length($u8);
my($output) = '';
for($i=0;$i<$u_len;$i++) {
$u = substr($u8,$i,1);
if($u=~/^[\x20-\x7e]$/) {
$output .= $u;
}
else {
$a = ToANSI($u,$codepage);
if($a eq '?'){
$output .= '&#'.ord($u).';';
}
else {
$output .= $a;
}
}
}
return($output);
}
# 轉成多位元字元 (Perl 的 Unicode字串,[字元集])
sub ToANSI {
my($u8,$codepage) = @_;
$codepage ||=0;
unless($codepage =~ /^\d+$/) {
$codepage = uc($codepage);
$codepage =~ s/[^A-Z0-9]//g;
$codepage = (exists $cp{$codepage})?$cp{$codepage}:0;
}
my $u_len = length($u8);
my $uni_str = encode("UCS-2LE",$u8);
$API_WideCharToMultiByte = new Win32::API('kernel32','WideCharToMultiByte','NNPNPNPP','N') unless defined $API_WideCharToMultiByte;
my $len = $API_WideCharToMultiByte->Call($codepage,0,$uni_str,$u_len,0,0,0,0);
return undef unless $len;
my $string = "\0" x $len;
my $result = $API_WideCharToMultiByte->Call($codepage,0,$uni_str,$u_len,$string,$len,0,0);
return undef unless $result;
return($string);
}
# 轉成 Perl 的 Unicode 字串( ANSI字串,[字元集] )
sub ToUnicode {
my($string,$codepage) = @_;
$codepage ||=0;
unless($codepage =~ /^\d+$/) {
$codepage = uc($codepage);
$codepage =~ s/[^A-Z0-9]//g;
$codepage = (exists $cp{$codepage})?$cp{$codepage}:0;
}
$API_MultiByteToWideChar = new Win32::API('kernel32','MultiByteToWideChar','NNPNPN','N') unless defined $API_MultiByteToWideChar;
my $len = $API_MultiByteToWideChar->Call($codepage,0,$string,length($string),0,0);
return undef unless $len;
my $ustring = "\0" x ($len*2);
my $result = $API_MultiByteToWideChar->Call($codepage,0,$string,length($string),$ustring,$len);
return undef unless $result;
return decode("UCS-2LE",$ustring);
}
1;
Show details
Hide details
Change log
r26
by sean.tw on Nov 30, 2008
Diff
新增 ToHTML
Go to:
/trunk/Perl/Win32Encode.pm
Sign in
to write a code review
Older revisions
r25
by sean.tw on Nov 25, 2008
Diff
加入簡繁轉換
r24
by sean.tw on Nov 19, 2008
Diff
用 Windows API 來轉碼
All revisions of this file
File info
Size: 8078 bytes, 163 lines
View raw file
Powered by
Google Project Hosting