⊙⊙比世界上最快的替换函数还要快的替换函数⊙⊙(0分)

  • 主题发起人 delphidoc
  • 开始时间
D

delphidoc

Unregistered / Unconfirmed
GUEST, unregistred user!
//不是我写的,摘自fastcode的代码
//速度比当初DreamTiger写的世界上最快的替换函数速度更快,而这个不是汇编的,并且同delphi的StringReplace参数和功能上完全兼容,

unit FastcodeAnsiStringReplace;

interface

{$UNDEF RangeCheck}
{$IFOPT R+}
{$R-}
{$DEFINE RangeCheck}
{$ENDIF}
{$UNDEF OverflowCheck}
{$IFOPT Q+}
{$Q-}
{$DEFINE OverflowCheck}
{$ENDIF}

{$DEFINE AllowLengthShortcut}

uses
SysUtils;
function AnsiStringReplaceFastcodePascal(const S, OldPattern, NewPattern: AnsiString;
Flags: TReplaceFlags): AnsiString;

implementation

var
UppercaseLookUp : array[Char] of Char;

procedure SetUppercaseLookUp;
var
S : AnsiString;
I : Integer;
begin
SetLength(S, 256);
for I := 0 to 255 do
PChar(Integer(S)+I)^ := Char(I);
S := AnsiUpperCase(S);
Move(S[1], UppercaseLookUp, 256);
end; {SetUppercaseLookUp}

function AnsiUpperCase(const S : AnsiString) : AnsiString;
var
Len : Integer;
PRes : PChar;
begin
{$IFDEF AllowLengthShortcut}
Len := 0;
if S <> '' then
Len := PCardinal(Cardinal(S)-4)^;
{$ELSE}
Len := Length(S);
{$ENDIF}
SetLength(Result, Len);
PRes := Pointer(Integer(Result)-1);
while Len > 0 do
begin
PRes[Len] := UppercaseLookUp[S[Len]];
Dec(Len);
end;
end; {AnsiUpperCase}

procedure Move(const Source; var Dest; Count : Integer);
var
S, D : Cardinal;
Temp, C, I : Integer;
L : PInteger;
begin
S := Cardinal(@Source);
D := Cardinal(@Dest);
if S = D then
Exit;
if Count <= 4 then
case Count of
1 : PByte(@Dest)^ := PByte(S)^;
2 : PWord(@Dest)^ := PWord(S)^;
3 : if D > S then
begin
PByte(Integer(@Dest)+2)^ := PByte(S+2)^;
PWord(@Dest)^ := PWord(S)^;
end
else
begin
PWord(@Dest)^ := PWord(S)^;
PByte(Integer(@Dest)+2)^ := PByte(S+2)^;
end;
4 : PInteger(@Dest)^ := PInteger(S)^
else Exit; {Count <= 0}
end
else
if D > S then
begin
Temp := PInteger(S)^;
I := Integer(@Dest);
C := Count - 4;
L := PInteger(Integer(@Dest) + C);
Inc(S, C);
repeat
L^ := PInteger(S)^;
if Count <= 8 then
Break;
Dec(Count, 4);
Dec(S, 4);
Dec(L);
until False;
PInteger(I)^ := Temp;
end
else
begin
C := Count - 4;
Temp := PInteger(S + Cardinal(C))^;
I := Integer(@Dest) + C;
L := @Dest;
repeat
L^ := PInteger(S)^;
if Count <= 8 then
Break;
Dec(Count, 4);
Inc(S, 4);
Inc(L);
until False;
PInteger(I)^ := Temp;
end;
end; {Move}

function Pos(const SubStr : AnsiString; const Str : AnsiString) : Integer;
var
StrLen, SubLen, Remainder : Integer;
PStr, PSub, PMax : PChar;
FirstChar : Char;
begin;
Result := 0;
{$IFDEF AllowLengthShortcut}
if Str = '' then Exit;
if SubStr = '' then Exit;
StrLen := PCardinal(Cardinal(Str ) - 4)^;
SubLen := PCardinal(Cardinal(SubStr) - 4)^;
{$ELSE}
SubLen := Length(SubStr);
StrLen := Length(Str);
{$ENDIF}
if (SubLen = 0) then
Exit;
if (SubLen > StrLen) then
Exit;
PSub := Pointer(SubStr);
PStr := Pointer(Str); {Search Start Position}
PMax := PStr + StrLen - SubLen; {Maximum Start Position}
FirstChar := PSub^;
if SubLen = 1 then
repeat {Single Character Saarch}
if PStr^ = FirstChar then
begin
Result := PStr + 1 - Pointer(Str);
Exit;
end;
if PStr[1] = FirstChar then
begin
if PStr < PMax then
Result := PStr + 2 - Pointer(Str);
Exit;
end;
Inc(PStr, 2);
until PStr > PMax
else
begin {Multi-Character Search}
Dec(SubLen, 2); {Characters to Check after Match}
repeat
if PStr^ = FirstChar then
begin
Remainder := SubLen;
while True do
begin
if (PSub[Remainder ] <> PStr[Remainder ])
or (PSub[Remainder+1] <> PStr[Remainder+1]) then
Break; {No Match}
Dec(Remainder, 2);
if Remainder < 0 then
begin
Result := PStr + 1 - Pointer(Str);
Exit;
end;
end;
end;
if PStr[1] = FirstChar then
begin
Remainder := SubLen;
while True do
begin
if (PSub[Remainder ] <> PStr[Remainder+1])
or (PSub[Remainder+1] <> PStr[Remainder+2]) then
Break; {No Match}
Dec(Remainder, 2);
if Remainder < 0 then
begin
if PStr < PMax then
Result := PStr + 2 - Pointer(Str);
Exit;
end;
end;
end;
Inc(PStr, 2);
until PStr > PMax;
end;
end; {Pos}

function PosEx(const SubStr : AnsiString; const Str : AnsiString;
const StartPos : Cardinal) : Integer;
var
StrLen, SubLen, Remainder : Integer;
PStr, PSub, PMax : PChar;
FirstChar : Char; {First Character of SubStr}
begin;
Result := 0;
{$IFDEF AllowLengthShortcut}
if Str = '' then Exit;
if SubStr = '' then Exit;
StrLen := PCardinal(Cardinal(Str ) - 4)^;
SubLen := PCardinal(Cardinal(SubStr) - 4)^;
{$ELSE}
SubLen := Length(SubStr);
StrLen := Length(Str);
{$ENDIF}
if (SubLen = 0) then
Exit;
PSub := Pointer(SubStr);
PStr := Pointer(Str);
PMax := PStr + StrLen - SubLen; {Maximum Start Position}
{The following 3 Lines are the only Difference between Pos and PosEx}
Inc(PStr, StartPos - 1);
if PStr > PMax then
Exit;
FirstChar := PSub^;
if SubLen = 1 then
repeat {Single Character Saarch}
if PStr^ = FirstChar then
begin
Result := PStr + 1 - Pointer(Str);
Exit;
end;
if PStr[1] = FirstChar then
begin
if PStr < PMax then {Within Valid Range}
Result := PStr + 2 - Pointer(Str);
Exit;
end;
Inc(PStr, 2);
until PStr > PMax
else
begin {Multi-Character Search}
Dec(SubLen, 2); {Characters to Check after Match}
repeat
if PStr^ = FirstChar then
begin
Remainder := SubLen;
while True do
begin
if (PSub[Remainder ] <> PStr[Remainder ])
or (PSub[Remainder+1] <> PStr[Remainder+1]) then
Break; {No Match}
Dec(Remainder, 2);
if Remainder < 0 then
begin {First Char already Checked}
Result := PStr + 1 - Pointer(Str);
Exit;
end;
end;
end;
if PStr[1] = FirstChar then
begin
Remainder := SubLen;
while True do
begin
if (PSub[Remainder ] <> PStr[Remainder+1])
or (PSub[Remainder+1] <> PStr[Remainder+2]) then
Break; {No Match}
Dec(Remainder, 2);
if Remainder < 0 then
begin {First Char already Checked}
if PStr < PMax then {Within Valid Range}
Result := PStr + 2 - Pointer(Str);
Exit;
end;
end;
end;
Inc(PStr, 2);
until PStr > PMax;
end;
end; {PosEx}

const
MaxStrLen = Cardinal(MaxInt) - (16 * 1024) - 16;

{Replace All Occurances - Ignoring Case}
function AnsiStringReplaceAllIC(const Src, Old, New : AnsiString) : AnsiString;
var
SourceString, SearchString : AnsiString;
SrcLen, OldLen, NewLen, Found, Start, ResultLen, Count : Cardinal;
PSrc, PNew, PRes : PChar;
LengthCanGrow : Boolean;
begin
{$IFDEF AllowLengthShortcut}
SrcLen := 0;
if (Src <> '') then
SrcLen := PCardinal(Cardinal(Src)-4)^;
OldLen := 0;
if (Old <> '') then
OldLen := PCardinal(Cardinal(Old)-4)^;
{$ELSE}
SrcLen := Length(Src);
OldLen := Length(Old);
{$ENDIF}
if (OldLen = 0) or (SrcLen < OldLen) then
begin
if SrcLen = 0 then
Result := '' {Needed for Non-Nil Zero Length Strings}
else
Result := Src
end
else
begin
SourceString := AnsiUpperCase(Src);
SearchString := AnsiUpperCase(Old);
Found := Pos(SearchString, SourceString);
if Found <> 0 then
begin {First Match Found}
{$IFDEF AllowLengthShortcut}
NewLen := 0;
if (New <> '') then
NewLen := PCardinal(Cardinal(New)-4)^;
{$ELSE}
NewLen := Length(New);
{$ENDIF}
LengthCanGrow := False;
if NewLen > OldLen then
begin {Set Initial Result Length - May be Adjusted Later}
if (SrcLen > 8*1024) {Skip DIV and 64-Bit Math if Possible}
and (Int64(NewLen div OldLen) * Int64(SrcLen) > 64*1024*1024) then
begin {Large - Initally Allocate Space for First Replace Only}
ResultLen := SrcLen - OldLen + NewLen;
if ResultLen > MaxStrLen then
Error(reOutOfMemory);
LengthCanGrow := True;
end
else
begin {Allocate Enough Space for Maximum Possible Replacements}
ResultLen := SrcLen
+ (((SrcLen - Found + 1) div OldLen) * (NewLen - OldLen));
end;
end
else
ResultLen := SrcLen; {Final Result Length will be <= Src Length}
SetLength(Result, ResultLen);
PNew := Pointer(New);
PSrc := Pointer(Src);
PRes := Pointer(Result);
Start := 1;
repeat
if Found <> Start then
begin
Count := Found - Start;
Move(PSrc^, PRes^, Count);
Inc(PSrc, Count);
Inc(PRes, Count);
end;
if NewLen = 1 then
PRes^ := PNew^ {Optimize Single Byte Move}
else
Move(PNew^, PRes^, NewLen);
Inc(PRes, NewLen);
Inc(PSrc, OldLen);
Start := Found + OldLen;
if Start > SrcLen then
Break;
Found := PosEx(SearchString, SourceString, Start);
if Found <> 0 then
if LengthCanGrow then
begin {Grow Result Length}
Inc(ResultLen, NewLen - OldLen);
if ResultLen > MaxStrLen then
Error(reOutOfMemory);
SetLength(Result, ResultLen);
PRes := Pointer(Result);
end;
until Found = 0;
Count := SrcLen - Start + 1;
Move(PSrc^, PRes^, Count);
Inc(Count, Cardinal(PRes) - Cardinal(Result));
if Count <> ResultLen then
SetLength(Result, Count); {Correct Result Length if Necessary}
end
else {No Matches Found}
Result := Src
end;
end; {AnsiStringReplaceAllIC}

{Replace All Occurances - Case Sensitive}
function AnsiStringReplaceAllCS(const Src, Old, New : AnsiString) : AnsiString;
var
SrcLen, OldLen, NewLen, Found, Start, ResultLen, Count : Cardinal;
PSrc, PNew, PRes : PChar;
LengthCanGrow : Boolean;
begin
{$IFDEF AllowLengthShortcut}
SrcLen := 0;
if (Src <> '') then
SrcLen := PCardinal(Cardinal(Src)-4)^;
OldLen := 0;
if (Old <> '') then
OldLen := PCardinal(Cardinal(Old)-4)^;
{$ELSE}
SrcLen := Length(Src);
OldLen := Length(Old);
{$ENDIF}
if (OldLen = 0) or (SrcLen < OldLen) then
begin
if SrcLen = 0 then
Result := '' {Needed for Non-Nil Zero Length Strings}
else
Result := Src
end
else
begin
Found := Pos(Old, Src);
if Found <> 0 then
begin {First Match Found}
{$IFDEF AllowLengthShortcut}
NewLen := 0;
if (New <> '') then
NewLen := PCardinal(Cardinal(New)-4)^;
{$ELSE}
NewLen := Length(New);
{$ENDIF}
LengthCanGrow := False;
if NewLen > OldLen then
begin {Set Initial Result Length - May be Adjusted Later}
if (SrcLen > 8*1024) {Skip DIV and 64-Bit Math if Possible}
and (Int64(NewLen div OldLen) * Int64(SrcLen) > 64*1024*1024) then
begin {Large - Initally Allocate Space for First Replace Only}
ResultLen := SrcLen - OldLen + NewLen;
if ResultLen > MaxStrLen then
Error(reOutOfMemory);
LengthCanGrow := True;
end
else
begin {Allocate Enough Space for Maximum Possible Replacements}
ResultLen := SrcLen
+ (((SrcLen - Found + 1) div OldLen) * (NewLen - OldLen));
end;
end
else
ResultLen := SrcLen; {Final Result Length will be <= Src Length}
SetLength(Result, ResultLen);
PNew := Pointer(New);
PSrc := Pointer(Src);
PRes := Pointer(Result);
Start := 1;
repeat
if Found <> Start then
begin
Count := Found - Start;
Move(PSrc^, PRes^, Count);
Inc(PSrc, Count);
Inc(PRes, Count);
end;
if NewLen = 1 then
PRes^ := PNew^ {Optimize Single Byte Move}
else
Move(PNew^, PRes^, NewLen);
Inc(PRes, NewLen);
Inc(PSrc, OldLen);
Start := Found + OldLen;
if Start > SrcLen then
Break;
Found := PosEx(Old, Src, Start);
if Found <> 0 then
if LengthCanGrow then
begin {Grow Result Length}
Inc(ResultLen, NewLen - OldLen);
if ResultLen > MaxStrLen then
Error(reOutOfMemory);
SetLength(Result, ResultLen);
PRes := Pointer(Result);
end;
until Found = 0;
Count := SrcLen - Start + 1;
Move(PSrc^, PRes^, Count);
Inc(Count, Cardinal(PRes) - Cardinal(Result));
if Count <> ResultLen then
SetLength(Result, Count); {Correct Result Length if Necessary}
end
else {No Matches Found}
Result := Src
end;
end; {AnsiStringReplaceAllCS}

{Replace First Occurance Only - Ignoring Case}
function AnsiStringReplace1stIC(const Src, Old, New : AnsiString) : AnsiString;
var
SourceString, SearchString : AnsiString;
SrcLen, OldLen, NewLen, Found, ResultLen : Cardinal;
PSrc, PNew, PRes : PChar;
begin
{$IFDEF AllowLengthShortcut}
SrcLen := 0;
if (Src <> '') then
SrcLen := PCardinal(Cardinal(Src)-4)^;
OldLen := 0;
if (Old <> '') then
OldLen := PCardinal(Cardinal(Old)-4)^;
{$ELSE}
SrcLen := Length(Src);
OldLen := Length(Old);
{$ENDIF}
if (OldLen = 0) or (SrcLen < OldLen) then
begin
if SrcLen = 0 then
Result := '' {Needed for Non-Nil Zero Length Strings}
else
Result := Src
end
else
begin
SourceString := AnsiUpperCase(Src);
SearchString := AnsiUpperCase(Old);
Found := Pos(SearchString, SourceString);
if Found <> 0 then
begin {Match Found}
{$IFDEF AllowLengthShortcut}
NewLen := 0;
if (New <> '') then
NewLen := PCardinal(Cardinal(New)-4)^;
{$ELSE}
NewLen := Length(New);
{$ENDIF}
ResultLen := SrcLen - OldLen + NewLen;
if ResultLen > MaxStrLen then
Error(reOutOfMemory);
SetLength(Result, ResultLen);
Dec(Found);
PNew := Pointer(New);
PSrc := Pointer(Src);
PRes := Pointer(Result);
if NewLen = OldLen then
begin
Move(PSrc^, PRes^, SrcLen);
Inc(PRes, Found);
Move(PNew^, PRes^, NewLen);
end
else
begin
Move(PSrc^, PRes^, Found);
Inc(PRes, Found);
Inc(PSrc, Found + OldLen);
if NewLen <> 0 then
begin
Move(PNew^, PRes^, NewLen);
Inc(PRes, NewLen);
end;
Move(PSrc^, PRes^, SrcLen - Found - OldLen);
end;
end
else {No Matches Found}
Result := Src
end;
end; {AnsiStringReplace1stIC}

{Replace First Occurance Only - Case Sensitive}
function AnsiStringReplace1stCS(const Src, Old, New : AnsiString) : AnsiString;
var
SrcLen, OldLen, NewLen, Found, ResultLen : Cardinal;
PSrc, PNew, PRes : PChar;
begin
{$IFDEF AllowLengthShortcut}
SrcLen := 0;
if (Src <> '') then
SrcLen := PCardinal(Cardinal(Src)-4)^;
OldLen := 0;
if (Old <> '') then
OldLen := PCardinal(Cardinal(Old)-4)^;
{$ELSE}
SrcLen := Length(Src);
OldLen := Length(Old);
{$ENDIF}
if (OldLen = 0) or (SrcLen < OldLen) then
begin
if SrcLen = 0 then
Result := '' {Needed for Non-Nil Zero Length Strings}
else
Result := Src
end
else
begin
Found := Pos(Old, Src);
if Found <> 0 then
begin {Match Found}
{$IFDEF AllowLengthShortcut}
NewLen := 0;
if (New <> '') then
NewLen := PCardinal(Cardinal(New)-4)^;
{$ELSE}
NewLen := Length(New);
{$ENDIF}
ResultLen := SrcLen - OldLen + NewLen;
if ResultLen > MaxStrLen then
Error(reOutOfMemory);
SetLength(Result, ResultLen);
Dec(Found);
PNew := Pointer(New);
PSrc := Pointer(Src);
PRes := Pointer(Result);
if NewLen = OldLen then
begin
Move(PSrc^, PRes^, SrcLen);
Inc(PRes, Found);
Move(PNew^, PRes^, NewLen);
end
else
begin
Move(PSrc^, PRes^, Found);
Inc(PRes, Found);
Inc(PSrc, Found + OldLen);
if NewLen <> 0 then
begin
Move(PNew^, PRes^, NewLen);
Inc(PRes, NewLen);
end;
Move(PSrc^, PRes^, SrcLen - Found - OldLen);
end;
end
else {No Matches Found}
Result := Src
end;
end; {AnsiStringReplace1stCS}

//Author: John O'Harrow
//Optimized for: All
//Instructionset(s): IA32
//Original Name: AnsiStringReplaceJOH_PAS

function AnsiStringReplaceFastcodePascal(const S, OldPattern, NewPattern: AnsiString;
Flags: TReplaceFlags): AnsiString;
type
TReplaceFunction = function(const Src, Old, New : AnsiString) : AnsiString;
const
StringReplaceFunction : array[0..3] of TReplaceFunction =
(AnsiStringReplace1stCS, AnsiStringReplaceAllCS,
AnsiStringReplace1stIC, AnsiStringReplaceAllIC);
begin
Result := StringReplaceFunction[PByte(@Flags)^](S, OldPattern, NewPattern);
end;

{$IFDEF RangeCheck}
{$R+}
{$UNDEF RangeCheck}
{$ENDIF}
{$IFDEF OverflowCheck}
{$Q+}
{$UNDEF OverflowCheck}
{$ENDIF}

initialization
SetUppercaseLookUp;

end.
 
我测试了一下,替换一个20.9M的文本文件,仅需要120毫秒!
 
好东西,只能支持win32的delphi,换了.net的delphi编译也通不过,因为有地址指针等东西均不支持
 
那可以编译成dll调用吧[8D]
 
很普通的技术啊,关键点无非是:一、字符串内存预分配;二、通过PChar变量而不是字符串变量来索引字符串,从而避免“copy on write”和“确保引用计数为一的字符串备份”这两个隐藏机制。
这两项提高字符串操作性能的技术在大富翁以前的帖子中也有提及,只是没有人系统总结过而已啦。
单纯地把高级语言写的处理函数改用汇编来写,并不能有效提高程序性能。“只要是汇编写的程序,就是性能最好的程序”这种观点是片面的,是因为没有认识性能瓶颈本质而导致的一种误解。
说到如何有效提高程序性能,有一本好书值得推荐--《代码优化:有效使用内存》。
 
我也不支持“只要是汇编写的程序,就是性能最好的程序”,还要看谁写,怎样写,但是大家都尽力去写的话,或者用同一思路,汇编写的程序绝对要比高级语言性能好!高级语言难免很多冗余的代码,比如在模块入口的压栈动作,其实有些寄存器无须压栈的,但编译器是固定要压栈,我在一个VC写的dll插入自己的代码,没有空闲的位置,所以将冗余的代码简化删除,24K的dll文件,优化出3K来插入我自己的代码,还加快了速度。
字串操作之所以慢,全因为delphi不断向分配空间来进行动作剪裁,改为指针操作,直接对地址操作当然快了很多。
 
{
TRichEdit -> TRxRichEdit(RichEdit20A/RichEdit50W)
其中在FastReport中也有一個嵌套替換處理,本人把它拿出來分享一下
}
// TRxRichEdit中要考慮UNICODE
function TRxCustomRichEdit.GetTextRange(StartPos, EndPos: Longint): string;
var
TextRangeW: TTextRangeW;
TextRangeA: TTextRangeA;
w : WideString;
begin
SetLength(Result, EndPos - StartPos + 1);
if not RichEditVersionIs50 then
begin
TextRangeA.chrg.cpMin := StartPos;
TextRangeA.chrg.cpMax := EndPos;
TextRangeA.lpstrText := PChar(Result);
SetLength(Result, SendMessage(Handle, EM_GETTEXTRANGE, 0, Longint(@TextRangeA)));
end
else
begin
TextRangeW.chrg.cpMin := StartPos;
TextRangeW.chrg.cpMax := EndPos;
SetLength(w, (EndPos - StartPos + 1) * 2);
TextRangeW.lpstrText := PWideChar(w);
SetLength(Result, SendMessage(Handle, EM_GETTEXTRANGE, 0, Longint(@TextRangeW)));
WideCharToMultiByte(CP_ACP, 0, PWideChar(w), -1, PChar(Result), EndPos - StartPos + 1, nil, nil);
end;
end;

// TfrRichView
function GetBrackedVariableEx(const ASource: TRxRichEdit; var i, j: Integer): String;
var
c: Integer;
fl1, fl2: Boolean;
begin
j := i; fl1 := True; fl2 := True; c := 0;
Result := '';
if j > ASource.GetTextLen then Exit;
Dec(j);
repeat
Inc(j);
if fl1 and fl2 then
if ASource.GetTextRange(j-1, j) = '[' then
begin
if c = 0 then i := j;
Inc(c);
end
else if ASource.GetTextRange(j-1, j) = ']' then Dec(c);
if fl1 then
if ASource.GetTextRange(j-1, j) = '"' then fl2 := not fl2;
if fl2 then
if ASource.GetTextRange(j-1, j) = '''' then fl1 := not fl1;
until (c = 0) or (j >= ASource.GetTextLen);
Result := ASource.GetTextRange(i, j-1);
end;

procedure TfrRichView.GetRichData(ASource: TRxRichEdit);
var
R, S: String;
i, j: Integer;
begin
CurView := Self;
with ASource do
try
Lines.BeginUpdate;
i := FindText('[', 0, Length(Text), []) + 1;
while i > 0 do
begin
SelStart := i - 1;
R := GetBrackedVariableEx(ASource, i, j);
try
CurReport.InternalOnGetValue(R, S);
except
break;
end;
SelLength := j - i + 1;
SelText := S;
{$IFNDEF Delphi2}
SelAttributes.Charset := frCharset;
{$ENDIF}
Inc(i, Length(s) - 1);
i := FindText('[', i, Length(Text) - i, []) + 1;
end;
finally
Lines.EndUpdate;
end;
end;
 
顶部