世界上最快的替换函数(完全中文兼容版)(0分)

  • 主题发起人 DreamTiger
  • 开始时间
目前的最新版本,我已经尽力了......
另外,中间的注释部分我都删除了,这样清楚一些,如果需要,
请参考以前的版本!

unit FReplace;

interface

Type
TFastPosProc = function(
const aSourceString, aFindString : String;
const aSourceLen, aFindLen, StartPos : integer
) : integer;

function FastReplace(
var aSourceString : String;
const aFindString, aReplaceString : String;
CaseSensitive : Boolean = False) : String;

function FastPos(
const aSourceString, aFindString : String;
const aSourceLen, aFindLen, StartPos : integer
) : integer;

function FastPosNoCase(
const aSourceString, aFindString : String;
const aSourceLen, aFindLen, StartPos : integer
) : integer;

function FastPosNoCaseNoUpcaseFindString(
const aSourceString, aFindString : String;
const aSourceLen, aFindLen, StartPos : integer
) : integer;

implementation


Function FastPos(
Const aSourceString, aFindString : String;
Const aSourceLen, aFindLen, StartPos : Integer
) : Integer;
// EAX is aSourceString
// EDX is aFindString
// ECX is aSourceLen
asm
Push ESI
Push EDI
Push EBX
Mov EBX, aFindLen
Sub ECX, EBX
Jo @Result0
Mov EBX, StartPos
Dec EBX
Sub ECX, EBX
Jo @Result0
Inc ECX
Mov ESI, EDX
Mov EDX, EAX
Mov EDI, EDX
Add EDI, EBX
Mov AL, [ESI]
@ScaSB:
Mov AH, [EDI]
Cmp AH, AL
Jne @NextChar
@CompareStrings:
Mov EBX, aFindLen
Dec EBX
Jz @EndofMatch
@CompareNext:
Mov AL, [ESI+EBX]
Mov AH, [EDI+EBX]
Cmp AL, AH
Jnz @NextChar1
Dec EBX
Jnz @CompareNext
@EndOfMatch:
Mov EAX, EDI
Sub EAX, EDX
Inc EAX
Mov Result, EAX
Jmp @TheEnd
@NextChar1:
Mov AL, [ESI]
@NextChar:
Inc EDI
Dec ECX
Jz @Result0
Cmp AH, $80
Jb @ScaSB
Inc EDI
Dec ECX
Jnz @ScaSB
@Result0:
Mov Result,0
@TheEnd:
Pop EBX
Pop EDI
Pop ESI
End;
Function FastPosNoCase(
Const aSourceString, aFindString : String;
Const aSourceLen, aFindLen, StartPos : Integer
) : Integer;
// EAX is aSourceString
// EDX is aFindString
// ECX is aSourceLen
asm
Push ESI
Push EDI
Push EBX
Mov EBX, aFindLen
Sub ECX, EBX
Jo @Result0
Mov EBX, StartPos
Dec EBX
Sub ECX, EBX
Jo @Result0
Inc ECX
Mov EDI, EAX
Mov ESI, EDX
Mov EDX, EAX
Mov ECX, aFindLen
Dec ECX
Mov AL, [ESI]
Cmp AL, $7A
Ja @ScaSB
Cmp AL, $61
Jb @ScaSB
And AL, $DF
@ScaSB:
Mov AH, [EDI]
Cmp AH, $7A
Ja @CompareChar
Cmp AH, $61
Jb @CompareChar
And AH, $DF
@CompareChar:
Cmp AH, AL
Jne @NextChar
@CompareStrings:
Mov EBX, aFindLen
Dec EBX
Jz @EndofMatch
@CompareNext:
Mov AL, [ESI+EBX]
Mov AH, [EDI+EBX]
Cmp AL, $7A
Ja @CompareChar2
Cmp AL, $61
Jb @NoUpCaseAL
And AL, $DF
Jmp @UpCaseAH
@NoUpCaseAL:
Cmp AH, $7A
Ja @CompareChar2
Cmp AH, $61
Jb @CompareChar2
@UpCaseAH:
And AH, $DF
@CompareChar2:
Cmp AL, AH
Jnz @NextChar1
Dec EBX
Jnz @CompareNext
@EndOfMatch:
Mov EAX, EDI
Sub EAX, EDX
Inc EAX
Mov Result, EAX
Jmp @TheEnd
@NextChar1:
Mov AL, [ESI]
Cmp AL, $7A
Ja @NextChar
Cmp AL, $61
Jb @NextChar
And AL, $DF
@NextChar:
Inc EDI
Dec ECX
Jz @Result0
Cmp AH, $80
Jb @ScaSB
Inc EDI
Dec ECX
Jnz @ScaSB
@Result0:
Mov Result,0
@TheEnd:
Pop EBX
Pop EDI
Pop ESI
End;
Function FastPosNoCaseNoUpCaseFindString(
Const aSourceString, aFindString : String;
Const aSourceLen, aFindLen, StartPos : Integer
) : Integer;
// EAX is aSourceString
// EDX is aFindString
// ECX is aSourceLen
asm
Push ESI
Push EDI
Push EBX
Mov EBX, aFindLen
Sub ECX, EBX
Jo @Result0
Mov EBX, StartPos
Dec EBX
Sub ECX, EBX
Jo @Result0
Inc ECX
Mov EDI, EAX
Mov ESI, EDX
Mov EDX, EAX
Mov ECX, aFindLen
Dec ECX
Mov AL, [ESI]
@ScaSB:
Mov AH, [EDI]
Cmp AH, $7A
Ja @CompareChar
Cmp AH, $61
Jb @CompareChar
And AH, $DF
@CompareChar:
Cmp AH, AL
Jne @NextChar
@CompareStrings:
Mov EBX, aFindLen
Dec EBX
Jz @EndofMatch
@CompareNext:
Mov AL, [ESI+EBX]
Mov AH, [EDI+EBX]
Cmp AH, $7A
Ja @CompareChar2
Cmp AH, $61
Jb @CompareChar2
And AH, $DF
@CompareChar2:
Cmp AL, AH
Jnz @NextChar1
Dec EBX
Jnz @CompareNext
@EndOfMatch:
Mov EAX, EDI
Sub EAX, EDX
Inc EAX
Mov Result, EAX
Jmp @TheEnd
@NextChar1:
Mov AL, [ESI]
@NextChar:
Inc EDI
Dec ECX
Jz @Result0
Cmp AH, $80
Jb @ScaSB
Inc EDI
Dec ECX
Jnz @ScaSB
@Result0:
Mov Result,0
@TheEnd:
Pop EBX
Pop EDI
Pop ESI
End;

Procedure MyMove(
Const Source
Var Dest
Count:Integer);
Asm
Cmp ECX,0
Jz @JustQuit
Push ESI
Push EDI
Rep Movs Source, Dest
Pop EDI
Pop ESI
@JustQuit:
End;
Procedure MyUpCase(
Var UpString
Count:Integer);
// EAX is UpString
// EDX is Count
Asm
Cmp EDX, 0
Jz @JustQuit
Push ESI
Mov ECX, EDX
Dec ECX
Mov ESI, EAX
@Loop:
Mov AH, [ESI+ECX]
Cmp AH, $7A
Ja @NoCase
Cmp AH, $61
Jb @NoCase
And AH, $DF
Mov [ESI+ECX], AH
@NoCase
Loop @Loop
Pop ESI
@JustQuit
End;
function FastReplace(
var aSourceString : String;
const aFindString, aReplaceString : String;
CaseSensitive : Boolean = False) : String;
var
ActualResultLen,
CurrentPos,
LastPos,
BytesToCopy,
ResultLen,
FindLen,
ReplaceLen,
SourceLen : Integer;
FastPosProc : TFastPosProc;
theFindString :String;
begin
Result := '';
FindLen := Length(aFindString);
ReplaceLen := Length(aReplaceString);
SourceLen := Length(aSourceString);
theFindString := aFindString;
if CaseSensitive then
FastPosProc := FastPOS
else
begin
FastPOSProc := FastPosNoCaseNoUpCaseFindString;
MyUpcase(TheFindString,FindLen);
end;
if ReplaceLen <= FindLen then
ActualResultLen := SourceLen
else
ActualResultLen :=
SourceLen +
(SourceLen * ReplaceLen div FindLen) +
ReplaceLen;
SetLength(Result,ActualResultLen);
CurrentPos := 1;
ResultLen := 0;
LastPos := 1;
if ReplaceLen > 0 then begin
repeat
CurrentPos :=
FastPosProc(aSourceString, theFindString,
SourceLen, FindLen, CurrentPos);
if CurrentPos = 0 then break;
BytesToCopy := CurrentPos-LastPos;
MyMove(aSourceString[LastPos],
Result[ResultLen+1], BytesToCopy);
MyMove(aReplaceString[1],
Result[ResultLen+1+BytesToCopy], ReplaceLen);
ResultLen := ResultLen +
BytesToCopy + ReplaceLen;
CurrentPos := CurrentPos + FindLen;
LastPos := CurrentPos;
if CurrentPos>=SourceLen then
break;
until false;
end else begin
repeat
CurrentPos :=
FastPosProc(aSourceString, theFindString,
SourceLen, FindLen, CurrentPos);
if CurrentPos = 0 then break;
BytesToCopy := CurrentPos-LastPos;
MyMove(aSourceString[LastPos],
Result[ResultLen+1], BytesToCopy);
ResultLen := ResultLen +
BytesToCopy + ReplaceLen;
CurrentPos := CurrentPos + FindLen;
LastPos := CurrentPos;
if CurrentPos>=SourceLen then
break;
until false;
end;
Dec(LastPOS);
SetLength(Result, ResultLen + (SourceLen-LastPos));
if LastPOS+1 <= SourceLen then
MyMove(aSourceString[LastPos+1],
Result[ResultLen+1],SourceLen-LastPos);
end;

end.
 
cartersun:我发现你的版本中有些问题和隐患,还需要再改动:
1、MyMove函数我编译通不过
Rep Movs Source, Dest
这一句通不过。
2、MyUpcase函数没有考虑中文,你的MyUpcase是从UpString的
末尾向前移动进行的,但是中文字符,有个别汉字的前半个字
符>128,而后半个字符会落在'a'..'z'之间,这样就会出错。
如果从前向后移动的话,可以通过像FastPosNoCase中:
Cmp AH, $80
Jb @ScaSB
Inc EDI
Dec ECX
Jnz @ScaSB
来跳过所有汉字符,从后向前就不行了。
3、就是我前面提到的在FastPos、FastPosNoCase、
FastPosNoCaseNoUpcaseFindString中应该对输入的
aSourceLen和StartPos的大小进行判断,如果aSourceLen
小于StartPos的话,就直接退出。这样就不用在FastReplace
中对CurrentPos是否大于SourceLen进行判断,而且
FastPos系列更健壮。

我对汇编真的懂得很少,能看懂就很不错了,要优化就没辙了。
所以只能提提意见,呵呵。

 
最新版本已经出来,大家更新一下吧,修正了两个Bug。
 
顶部