高分要求 UTF_8 编码 解码 例子(90分)

  • 主题发起人 主题发起人 hunzean
  • 开始时间 开始时间
H

hunzean

Unregistered / Unconfirmed
GUEST, unregistred user!
//解码
void __stdcall Utf8Decode( char* str, int maxSize )
{
wchar_t* tempBuf;

int len = strlen( str );
if ( len < 2 )
return;

tempBuf = ( wchar_t* )alloca(( len+1 )*sizeof( wchar_t ));
{
wchar_t* d = tempBuf;
BYTE* s = ( BYTE* )str;

while( *s )
{
if (( *s &amp; 0x80 ) == 0 )
{ *d++ = *s++;
continue;
}

if (( s[0] &amp; 0xE0 ) == 0xE0 &amp;&amp; ( s[1] &amp; 0xC0 ) == 0x80 &amp;&amp; ( s[2] &amp; 0xC0 ) == 0x80 )
{
*d++ = (( WORD )( s[0] &amp; 0x0F) << 12 ) + ( WORD )(( s[1] &amp; 0x3F ) << 6 ) + ( WORD )( s[2] &amp; 0x3F );
s += 3;
continue;
}

if (( s[0] &amp; 0xE0 ) == 0xC0 &amp;&amp; ( s[1] &amp; 0xC0 ) == 0x80 )
{
*d++ = ( WORD )(( s[0] &amp; 0x1F ) << 6 ) + ( WORD )( s[1] &amp; 0x3F );
s += 2;
continue;
}

*d++ = *s++;
}

*d = 0;
}

if ( maxSize == 0 )
maxSize = len;

WideCharToMultiByte( CP_ACP, 0, tempBuf, -1, str, maxSize, NULL, NULL );
}

//编码
char* __stdcall Utf8Encode( const char* src )
{
wchar_t* tempBuf;

int len = strlen( src );
char* result = ( char* )malloc( len*3 + 1 );
if ( result == NULL )
return NULL;

tempBuf = ( wchar_t* )alloca(( len+1 )*sizeof( wchar_t ));
MultiByteToWideChar( CP_ACP, 0, src, -1, tempBuf, len );
tempBuf[ len ] = 0;
{
wchar_t* s = tempBuf;
BYTE* d = ( BYTE* )result;

while( *s )
{
int U = *s++;

if ( U < 0x80 )
{
*d++ = ( BYTE )U;
}
else if ( U < 0x800 )
{
*d++ = 0xC0 + (( U >> 6 ) &amp; 0x3F );
*d++ = 0x80 + ( U &amp; 0x003F );
}
else
{
*d++ = 0xE0 + ( U >> 12 );
*d++ = 0x80 + (( U >> 6 ) &amp; 0x3F );
*d++ = 0x80 + ( U &amp; 0x3F );
} }

*d = 0;
}

return result;
}
 
谁有时间将它转成DELPHI的?
 
我需要的是DELPHI的,C++的我也有
 
[:D]

function WideStringToUTF8(S: WideString): AnsiString;

var
Ch: UCS4;
L, J, T,
BytesToWrite: Cardinal;
ByteMask: UCS4;
ByteMark: UCS4;

begin
if Length(S) = 0 then
Result := ''
else
begin
SetLength(Result, Length(S) * 6); // assume worst case
T := 1;
ByteMask := $BF;
ByteMark := $80;

for J := 1 to Length(S) do
begin
Ch := UCS4(S[J]);

if Ch < $80 then
BytesToWrite := 1
else
if Ch < $800 then
BytesToWrite := 2
else
if Ch < $10000 then
BytesToWrite := 3
else
if Ch < $200000 then
BytesToWrite := 4
else
if Ch < $4000000 then
BytesToWrite := 5
else
if Ch <= MaximumUCS4 then
BytesToWrite := 6
else
begin
BytesToWrite := 2;
Ch := ReplacementCharacter;
end;

for L := BytesToWrite downto 2 do
begin
Result[T + L - 1] := Char((Ch or ByteMark) and ByteMask);
Ch := Ch shr 6;
end;
Result[T] := Char(Ch or FirstByteMark[BytesToWrite]);
Inc(T, BytesToWrite);
end;
SetLength(Result, T - 1); // set to actual length
end;
end;
 
这是编码,解码呢?
最好是把上面 C 的翻译一下
 
Delphi自带的,何苦呢?

AnsiToUtf8 function
Converts a string encoded in Ansi to UTF-8.

PUCS4Chars function
Converts a UCS4 string to a null-terminated array of UCS4 characters.

StringToWideChar function
Returns a UNICODE string from an AnsiString.

UCS4StringToWideString function
Converts a string of 4-byte Unicode characters into a WideString value.

UnicodeToUtf8 function
Converts a string of Unicode characters into a UTF-8 string.

UTF8Decode function
Converts a UTF8 string to a Unicode string (WideString).

UTF8Encode function
Converts a WideString value to a UTF8 string.

Utf8ToAnsi function
Converts a UTF8 string to a string of Ansi characters.

Utf8ToUnicode function
Converts a string of UTF-8 characters to a Unicode string.

WideCharLenToString function
Converts Unicode characters to single or multi byte character data.

WideCharLenToStrVar procedure
WideCharLenToStrVar converts Unicode characters to single or multi byte character data.

WideCharToString function
Converts null-terminated Unicode string to single or multi byte character data.

WideCharToStrVar procedure
Converts Unicode string to a single or multi byte character data.

WideStringToUCS4String function
Converts a WideString value to a string of 4-byte Unicode characters.
 
我发给你一个Email
 
接受答案了.
 
后退
顶部