读书人

求高效的字符串转换为UTF-8编码的方法

发布时间: 2012-01-13 22:43:29 作者: rapoo

求高效的字符串转换为UTF-8编码的方法
这里有一个函数可以实现 http://bjhv.xfblog.com.cn/user2/cjmud/archives/2006/92883.html

但是占用CPU太高了,如果的次性转换一篇几千次的文章,等上大半天也没反应过来,有没有高效一些的方法?

[解决办法]
Public Declare Function WideCharToMultiByte Lib "kernel32 " Alias "WideCharToMultiByte " (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As String, ByVal cchWideChar As Long, ByVal lpMultiByteStr As String, ByVal cchMultiByte As Long, ByVal lpDefaultChar As String, ByVal lpUsedDefaultChar As Long) As Long

Public Const CP_UTF8 = 65001

Public Function GetUTF8Code(ByVal sUnicode As String) As Byte()
Dim i As Long
Dim lLength As Long
Dim lBufferSize As Long
Dim lResult As Long
Dim bUTF8() As Byte

GetUTF8Code = " "
lLength = Len(sUnicode)
If lLength = 0 Then Exit Function


lBufferSize = lLength * 3 + 1
ReDim bUTF8(lBufferSize - 1)

lResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(sUnicode), lLength, bUTF8(0), lBufferSize, vbNullString, 0)
MsgBox lResult
If lResult <> 0 Then
lResult = lResult - 1
ReDim Preserve bUTF8(lResult)
GetUTF8Code = bUTF8
End If
End Function
[解决办法]
函数没问题,是你用的有毛病。

Private Declare Function WideCharToMultiByte Lib "kernel32 " (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpDefaultChar As String, ByVal lpUsedDefaultChar As Long) As Long
Private Declare Function MultiByteToWideChar Lib "kernel32 " (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpMultiByteStr As Long, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Private Const CP_UTF8 = 65001

Public Function UTF8_Encode(ByVal strUnicode As String) As Byte()
'UTF-8 编码

Dim TLen As Long
Dim lngBufferSize As Long
Dim lngResult As Long
Dim bytUtf8() As Byte

TLen = Len(strUnicode)
If TLen = 0 Then Exit Function

lngBufferSize = TLen * 3 + 1
ReDim bytUtf8(lngBufferSize - 1)

lngResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(strUnicode), TLen, bytUtf8(0), lngBufferSize, vbNullString, 0)

If lngResult <> 0 Then
lngResult = lngResult - 1
ReDim Preserve bytUtf8(lngResult)
End If

UTF8_Encode = bytUtf8
End Function

Public Function UTF8_Decode(ByRef bUTF8() As Byte) As String
'UTF-8 解码
Dim lRet As Long
Dim lLen As Long
Dim lBufferSize As Long
Dim sBuffer As String
Dim bBuffer() As Byte

lLen = UBound(bUTF8) + 1

If lLen = 0 Then Exit Function

lBufferSize = lLen * 2

sBuffer = String$(lBufferSize, Chr(0))

lRet = MultiByteToWideChar(CP_UTF8, 0, VarPtr(bUTF8(0)), lLen, StrPtr(sBuffer), lBufferSize)

If lRet <> 0 Then
sBuffer = Left(sBuffer, lRet)
End If

UTF8_Decode = sBuffer
End Function


测试:

dim mBuff() As Byte,mIndex as long
dim str as string
str= "我根本不爱你 "
mBuff=UTF8_Encode(str)
for mIndex =0 to ubound(mBuff)
debug.print mbuff(mIndex)
next
msgbox UTF8_Decode(mbuff)
------解决方案--------------------


比较完整的代码如下:

使用方法:
convertFile "c:\test.txt ", "c:\test2.txt "

我转换一个1.5M的文件(100万字?),用时0.8s

计算机配置:C2.66+512M winXP+vb6

特别说明:以上代码综合本版许多高手之代码而成,其中HEX变换代码出自 小吉 之手,我只是稍做改动。

'-----------------------------------------------------
Private Declare Function WideCharToMultiByte Lib "kernel32 " (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpDefaultChar As String, ByVal lpUsedDefaultChar As Long) As Long
Private Declare Function MultiByteToWideChar Lib "kernel32 " (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpMultiByteStr As Long, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Private Const CP_UTF8 = 65001

Private Declare Sub MoveMemory Lib "kernel32 " Alias "RtlMoveMemory " (pDest As Any, pSource As Any, ByVal dwLength As Long) '前面已定义
Private StrTable() As String '公用Byte To String转换表

Sub ConvertFile(ByVal fromfile As String, ByVal tofile As String)

Dim tOnTimer As Double
tOnTimer = Timer

Dim fh1 As Integer, fh2 As Integer, l As Long

Dim buffBytes() As Byte
Dim buffString As String, buffHex As String

fh1 = FreeFile()
Open fromfile For Binary As #fh1
l = LOF(fh1)

ReDim buffBytes(l - 1)
Get #fh1, , buffBytes

Close #fh1

buffString = CreateStringFromByte(buffBytes, l)

Dim utfBytes() As Byte
utfBytes = UTF8_Encode(buffString)

StrTable() = StringTable

buffHex = HEXStringGetByBytes1(utfBytes, StrTable)

fh2 = FreeFile()
Open tofile For Binary As #fh2
Put #fh2, , buffHex

Close #fh2

Debug.Print "完成!用时: " & Abs(tOnTimer - Timer) & " s "
End Sub

Private Function UTF8_Encode(ByVal strUnicode As String) As Byte()
'UTF-8 编码

Dim TLen As Long
Dim lngBufferSize As Long
Dim lngResult As Long
Dim bytUtf8() As Byte

TLen = Len(strUnicode)
If TLen = 0 Then Exit Function

lngBufferSize = TLen * 3 + 1
ReDim bytUtf8(lngBufferSize - 1)

lngResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(strUnicode), TLen, bytUtf8(0), lngBufferSize, vbNullString, 0)

If lngResult <> 0 Then
lngResult = lngResult - 1
ReDim Preserve bytUtf8(lngResult)
End If

UTF8_Encode = bytUtf8
End Function

Private Function CreateStringFromByte(ByRef byteArray() As Byte, ByVal ByteLength As Long) As String
'字节数组中的数据连接成字符串

Dim StringData As String

'** 分配字符串空间
StringData = Space(ByteLength)
'** 复制字符数组地址内容到字符串地址
MoveMemory ByVal StringData, ByVal VarPtr(byteArray(0)), ByteLength

'** 返回字符串
CreateStringFromByte = StringData
End Function

Private Function HEXStringGetByBytes1(ByRef pBytes() As Byte, ByRef pOutTable() As String) As String
Dim tOutString As String
Dim tOutBytes() As String
Dim tInBytes_Length As Long
Dim tIndex As Long
tInBytes_Length = UBound(pBytes())
tOutString = String$((tInBytes_Length + 1) * 3 - 1, "% ")
For tIndex = 0 To tInBytes_Length
Mid$(tOutString, tIndex * 3 + 1, 2) = pOutTable(pBytes(tIndex))


Next
HEXStringGetByBytes1 = tOutString
End Function

Private Function StringTable() As String()
'初始化表
Dim tOutBytes() As String
ReDim tOutBytes(255)
Dim tIndex As Long
For tIndex = 0 To 255
tOutBytes(tIndex) = Hex$(tIndex)
If Len(tOutBytes(tIndex)) = 1 Then tOutBytes(tIndex) = "0 " & tOutBytes(tIndex)
Next
StringTable = tOutBytes()
End Function


读书人网 >VB

热点推荐