gb2312_unicode对照表
最近一直在学习和编写fat32文件系统的代码,读到长文件名时发现微软的这个长文件名支持真是要命:普通的83文件名用的是系统默认的编码,比如中文window是cp936,但长文件名,就用了unicode.于是为了处理目录文件项目,你非得做一个cp936与unicode的对照表不可。我的嵌入式系统不得不额外的增加了100多k的大小。
下面的这个python程序我用来生成一个gb2312与unicode的汉字对照表,gb2312没有的unicode汉字我都用“口”代替了。
#-*- coding: gb2312 -*-
#GB2312-UNICODE CONVERTER
#This program generate a c header file to conver GB2312 to UNICODE,and UNICODE to gb2312.
import struct
chrgb=""
chrun=""
biggestuni=0 #record the biggest unicode
count0=0
print "//convering gk2312 to unicode"
print "U16 gk2un[6763]={"
for i in range(0xb0, 0xf8):
if(i==0xd7):
#print "specially"
for j in range(0xa1,0xfa):
chrgb=struct.pack("H",(j8)+i) #since pack order ,i ,j is switch
chrun=unicode(chrgb,"gbk")
k=ord(chrun)
if(k>biggestuni):
biggestuni=k
#print " gbk:",chrgb," uni: ", chrun
print "0x%x,"%k,
print " //",chrgb,": 0x%x%x"%(i,j)
count0=count0+1
#5 blank here
chrgb="口"
chrun=unicode(chrgb,"gbk")
k=ord(chrun)
for j in range(0xfa,0xff):
print "0x%x, //blank,"%k,": 0x%x%x"%(j,i)
else:
for j in range(0xa1,0xff):
chrgb=struct.pack("H",(j8)+i) #since pack order ,i ,j is switch
chrun=unicode(chrgb,"gbk")
k=ord(chrun)
if(k>biggestuni):
biggestuni=k
#print " gbk:",chrgb," uni: ", chrun
print "0x%x,"%k,
print " //",chrgb,": 0x%x%x"%(i,j)
count0=count0+1
print "};"
count1=0
count2=0
#now gen the unicode to gb2312 table, no the hole unicode but include all gb2312
print "//conver unicode to gb2312"
print "U16 un2gb[20897]={"
#bigestuni=0x4e0f
for i in range(0x4e00,biggestuni+1):
chrun=unichr(i)
chrgb=chrun.encode("gbk")
#print "{",
#check if this char is in gb2312 ,if not repalce it with 0
j=struct.unpack("H",chrgb)[0]
l=j>>8 #the byte order is switched in gb
h=j&0x00ff
if((h0xb0)or(h>0xf7)or(l=0xa0)or(l>0xfe)):
print "0xe0ed,", " //",chrgb,": 0x%x"%ord(chrun)," not gb2312"
else:
print "0x%x%x,"%(h,l),
print " //",chrgb,": 0x%x"%ord(chrun)
count2=count2+1
count1=count1+1
print "};"
print "//gb2312 count=",count0," unicode count=",count1,"un2gb count=",count2