c++去掉汉语文本中的某些不明空白符
本人对汉语文本进行分词,由于中间夹杂了某些不明的空白符,无法按照去掉空格,换行回车这些方式去去除,这样分离出的有些汉语词语前面总是留有空白。注:处理的文本为ANSI编码方式,求大侠帮忙……
作者: ccnunlp 发布时间: 2011-06-16
这是分词后一个行显示一个词的结果:
版权
局
号
权
字
通知
国家
关于
图书
合同
著作权
实施
试行
“国家”前面就有不明的空白
版权
局
号
权
字
通知
国家
关于
图书
合同
著作权
实施
试行
“国家”前面就有不明的空白
作者: ccnunlp 发布时间: 2011-06-16
C/C++ code
#include <windows.h> #include <stdio.h> int main() { const DWORD uWidth = 18 + 17 * 256, uHeight = 18 + 17 * 128; PBITMAPINFO pbmi = (PBITMAPINFO) LocalAlloc (LPTR, sizeof (BITMAPINFOHEADER) + sizeof (RGBQUAD) * 2); pbmi->bmiHeader.biSize = sizeof (BITMAPINFOHEADER); pbmi->bmiHeader.biWidth = uWidth; pbmi->bmiHeader.biHeight = uHeight; pbmi->bmiHeader.biPlanes = 1; pbmi->bmiHeader.biBitCount = 1; pbmi->bmiHeader.biSizeImage = ((uWidth + 31) & ~31) / 8 * uHeight; pbmi->bmiColors[0].rgbBlue = 0; pbmi->bmiColors[0].rgbGreen = 0; pbmi->bmiColors[0].rgbRed = 0; pbmi->bmiColors[1].rgbBlue = 255; pbmi->bmiColors[1].rgbGreen = 255; pbmi->bmiColors[1].rgbRed = 255; HDC hDC = CreateCompatibleDC (0); void * pvBits; HBITMAP hBitmap = CreateDIBSection (hDC, pbmi, 0, &pvBits, NULL, 0); SelectObject (hDC, hBitmap); HFONT hFont = CreateFont (16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "宋体"); SelectObject (hDC, hFont); BitBlt (hDC, 0, 0, uWidth, uHeight, NULL, 0, 0, WHITENESS); char c[4]; int i, j; for (i = 128; i < 256; i++) { sprintf (c, "%02X", i); TextOut (hDC, 1, (i - 127) * 17 + 1, c, 2); } for (j = 0; j < 256; j++) { sprintf (c, "%02X", j); TextOut (hDC, (j + 1)* 17 + 1, 1, c, 2); } for (i = 128; i < 256; i++) { for (j = 0; j < 256; j++) { c[0] = (char) i; c[1] = (char) j; TextOut (hDC, (j + 1) * 17 + 1, (i - 127) * 17 + 1, c, 2); } } for (i = 0; i < 130; i++) { MoveToEx (hDC, 0, i * 17, NULL); LineTo (hDC, uWidth, i * 17); } for (j = 0; j < 258; j++) { MoveToEx (hDC, j * 17, 0, NULL); LineTo (hDC, j * 17, uHeight); } BITMAPFILEHEADER bmfh; bmfh.bfType = *(PWORD) "BM"; bmfh.bfReserved1 = 0; bmfh.bfReserved2 = 0; bmfh.bfOffBits = sizeof (BITMAPFILEHEADER) + sizeof (BITMAPINFOHEADER) + sizeof (RGBQUAD) * 2; bmfh.bfSize = bmfh.bfOffBits + pbmi->bmiHeader.biSizeImage; HANDLE hFile = CreateFile ("goal.bmp", GENERIC_WRITE, 0, 0, CREATE_ALWAYS, 0, 0); if (hFile != INVALID_HANDLE_VALUE) { DWORD dwWritten; WriteFile (hFile, &bmfh, sizeof (BITMAPFILEHEADER), &dwWritten, NULL); WriteFile (hFile, pbmi, sizeof (BITMAPINFOHEADER) + sizeof (RGBQUAD) * 2, &dwWritten, NULL); WriteFile (hFile, pvBits, pbmi->bmiHeader.biSizeImage, &dwWritten, NULL); CloseHandle (hFile); } DeleteObject (hFont); DeleteObject (hBitmap); DeleteDC (hDC); LocalFree (pbmi); return 0; }
作者: zhao4zhong1 发布时间: 2011-06-16