gb2312_unicode对照表

    最近一直在学习和编写fat32文件系统的代码,读到长文件名时发现微软的这个长文件名支持真是要命:普通的83文件名用的是系统默认的编码,比如中文window是cp936,但长文件名,就用了unicode.于是为了处理目录文件项目,你非得做一个cp936与unicode的对照表不可。我的嵌入式系统不得不额外的增加了100多k的大小。
    下面的这个python程序我用来生成一个gb2312与unicode的汉字对照表,gb2312没有的unicode汉字我都用“口”代替了。
               
               
                #-*- coding: gb2312 -*-
#GB2312-UNICODE CONVERTER
#This program generate a c header file to conver GB2312 to UNICODE,and UNICODE to gb2312.
import struct
chrgb=""
chrun=""
biggestuni=0  #record the biggest unicode
count0=0
print "//convering gk2312 to unicode"
print "U16 gk2un[6763]={"
for i in range(0xb0, 0xf8):
    if(i==0xd7):
        #print "specially"
        for j in range(0xa1,0xfa):
            chrgb=struct.pack("H",(j8)+i)  #since pack order ,i ,j is switch
            chrun=unicode(chrgb,"gbk")
            k=ord(chrun)
            if(k>biggestuni):
                biggestuni=k
            #print " gbk:",chrgb," uni: ", chrun
            print "0x%x,"%k,
            print "    //",chrgb,": 0x%x%x"%(i,j)
            count0=count0+1
        #5 blank here
        chrgb="口"
        chrun=unicode(chrgb,"gbk")
        k=ord(chrun)
        for j in range(0xfa,0xff):            
            print "0x%x,    //blank,"%k,": 0x%x%x"%(j,i)
    else:
        for j in range(0xa1,0xff):
            chrgb=struct.pack("H",(j8)+i)  #since pack order ,i ,j is switch
            chrun=unicode(chrgb,"gbk")
            k=ord(chrun)
            if(k>biggestuni):
                biggestuni=k
            #print " gbk:",chrgb," uni: ", chrun
            print "0x%x,"%k,
            print "    //",chrgb,": 0x%x%x"%(i,j)
            count0=count0+1
print "};"
count1=0
count2=0
#now gen the unicode to gb2312 table, no the hole unicode but include all gb2312
print "//conver unicode to gb2312"
print "U16 un2gb[20897]={"
#bigestuni=0x4e0f
for i in range(0x4e00,biggestuni+1):
    chrun=unichr(i)
    chrgb=chrun.encode("gbk")
    #print "{",
    #check if this char is in gb2312 ,if not repalce it with 0
    j=struct.unpack("H",chrgb)[0]
    l=j>>8              #the byte order is switched in gb
    h=j&0x00ff
    if((h0xb0)or(h>0xf7)or(l=0xa0)or(l>0xfe)):
        print  "0xe0ed,", "    //",chrgb,": 0x%x"%ord(chrun)," not gb2312"
    else:
        print  "0x%x%x,"%(h,l),         
        print "    //",chrgb,": 0x%x"%ord(chrun)
        count2=count2+1
    count1=count1+1
print "};"
print "//gb2312 count=",count0," unicode count=",count1,"un2gb count=",count2