QQ消息记录统计程序
wibrst
|
1#
wibrst 发表于 2007-07-27 15:40
QQ消息记录统计程序目前功能:分析每个发言者的发言次数并排序列出 根据开关sInputType来决定从拷贝记录还是导出记录的形式分析。 主文件:organizeQQMsg.py 排序模块:sortDict.py --------------------------------------------------------------------------- # organizeQQMsg.py import sys import os import re import sortDict def file2List(_uf): bUTF8 = 0 f = file(_uf , 'r') l = f.readlines() if l[0][:3]=='\xef\xbb\xbf': bUTF8 = 1 l[0]=l[0][3:] # print l[0].decode('utf8') # test the trouble line f.close() return l,bUTF8 def getFilePath(): bValidInput = 0 while not bValidInput: uInput = raw_input("input source file path:") if os.path.isfile ( uInput ) : bValidInput = 1 return uInput def getMsgsDictExport(aRecordRaw): pTitle = r'\d{4}-\d{2}-\d{2}\ \d{2}\:\d{2}\:\d{2}\ (.*)' cpTitle = re.compile(pTitle) dAuthorWords = {} for l in aRecordRaw: m = cpTitle.match(l) if m: sAuthor = m.group(1) if not dAuthorWords.has_key(sAuthor): dAuthorWords[sAuthor]=1 dAuthorWords[sAuthor] +=1 return dAuthorWords def getMsgsDictHistory(aRecordRaw): pTitle = r'(.*)\ \d{2}\:\d{2}\:\d{2}' cpTitle = re.compile(pTitle) dAuthorWords = {} bTitle = 0 bContentReading = 0 iIdx = 0 for i in range(len(aRecordRaw)): l = aRecordRaw if bTitle: # read first words line if not bContentReading: bTitle = 0 bContentReading = 1 else : m = cpTitle.match(l) if m: # encount title bTitle = 1 bContentReading = 0 # process prev words if 'aWords' in dir(): aWords.append(i-1) del aWords # start this iIdx+=1 sAuthor = m.group(1) aWords = [iIdx,i+1] if not dAuthorWords.has_key(sAuthor): dAuthorWords[sAuthor] = [] dAuthorWords[sAuthor].append(aWords) else : # continiue reading pass aWords.append(i) del aWords return dAuthorWords def appendElement(e ): aResult.append( (e.decode('utf8').encode('gbk') if bUTF8 else e).strip()) def writeResult(aResult,uInput): b,e = os.path.splitext(os.path.abspath(uInput)) uOutput = b+'_stat'+e if 0: print aResult else: f = file(uOutput,'w') f.write('\n'.join(aResult)) f.close() if __name__ == "__main__": bModeOnlyTitle = 1 # do not display msgs sInputType = 'export' # export history uInput = 'sample.txt' # uInput = getFilePath() aRecordRaw,bUTF8 = file2List(uInput) aResult = [] if sInputType =='export': dAuthorCounts = getMsgsDictExport(aRecordRaw) aSorted = sortDict.getListSortDict(dAuthorCounts,1) for i in range(len(aSorted)): item = aSorted #print '%s [%d]' % (item[0],item[1]) appendElement( '%-20s [%d]' % (item[0],item[1]) ) elif sInputType == 'history': dAuthorWords = getMsgsDictHistory(aRecordRaw) aSorted = sortDict.getListSortDict(dAuthorWords,2) for e in range(len(aSorted)): item = aSorted[e] appendElement( '%-20s [%d]' % (item[0],item[2]) +('' if bModeOnlyTitle else ':') ) if bModeOnlyTitle : continue aWords = item[1] for i in range(len(aWords)): r = aWords appendElement('%02d:\t%s' % (r[0],aRecordRaw[r[1]]) ) for j in range(r[1]+1,r[2]): appendElement( '\t'+aRecordRaw[j]) appendElement( '-------------------'+os.linesep) writeResult(aResult,uInput) --------------------------------------------------------------------------- # sortDict.py import random def getRdmDict(): d ={} iCodeBase =ord('a') for i in range(13): d[chr(iCodeBase+i)] = random.randint(1,100) return d def getListSortDict(d ,iMethod): aSort =[] if iMethod == 1 : for i in d: insertElement1(aSort,i,d) elif iMethod == 2 : for i in d: insertElement2(aSort,i,d) return aSort def insertElement1(aSort,sAuthor,iTimes): # [sAuthor,iTimes] format for i in range(len(aSort)): if iTimes <aSort[1]: aSort.insert(i,[sAuthor,iTimes]) return aSort.append([sAuthor,iTimes]) def insertElement2(aSort,sAuthor,aWords): # [sAuthor,aWords,iTimes] format iTimes = len(aWords) for i in range(len(aSort)): if iTimes <aSort[2]: aSort.insert(i,[sAuthor,aWords,iTimes]) return aSort.append([sAuthor,aWords,iTimes]) if __name__ == '__main__': d = getRdmDict() a = getListSortDict(d) print d print a |