一次性统统提取——提取文本里的数字文本(50)

小雨哥

Unregistered / Unconfirmed
GUEST, unregistred user!
呵呵,再做一回标题党徒....所谓数字文本,就是1、2、3、4这样的单个或连续数字(包括0.8、1.2这样的数字),但不包括IP地址那种有多个小数点的文字(如有需要,请自行修改)。这些数字通常隐藏在众多非数字的文本文件或者字符串中。这里可以一次性把他们全部提取出来放入列表里,列表里的Strings部分保存原始文本,Values里保存转化成数字的变量。使用起来与TStringList风格相似,留给有这种需要的人。有一个特例:对于单一的“.”符号,也会被收录(我在类里没有处理,因为这种写法,有可能就是懒人0.0的简写,练手的代码,不考虑这么多),如果不喜欢这样的处理,可以自行修改。unit NumFromStr;interfaceuses Classes, SysUtils;type PSeriesItem = ^TSeriesItem;
TSeriesItem = record FNumString: string;
FNumValue : Extended;
end;
PSeriesItemList = ^TSeriesItemList;
TSeriesItemList = array[0..Maxint div 64] of TSeriesItem;
TPickNumStringHelper = class(TObject) private FList: PSeriesItemList;
FCount: Integer;
FCapacity: Integer;
procedure Error(Index: Integer);
procedure Grow;
function GetIntegerValue(Index: Integer): Integer;
function StartNumber(const S: string):Integer;
function StopNumber(const S: string):Integer;
protected function Get(Index: Integer): string;virtual;
function GetCapacity: Integer;virtual;
function GetCount: Integer;virtual;
function GetValue(Index: Integer): Extended;
virtual;
procedure ParseTextStr(const AnyString: string);
virtual;
procedure Put(Index: Integer;
const NumString: string);
virtual;
function SepValue(const S: string;const lPos:integer;
var VarStr: string):Integer;virtual;
procedure SetValue(Index: Integer;
NumValue: Extended);
virtual;
procedure SetCapacity(NewCapacity: Integer);virtual;
procedure InsertItem(Index: Integer;const NumString: string;
NumValue: Extended);
virtual;
public destructor Destroy;
override;
procedure Clear;
function Add(const AnyString: string): Integer;
function AddNumValue(const NumString: string;
NumValue: Extended): Integer;
procedure LoadFromFile(const FileName: string);
procedure LoadFromStream(Stream: TStream);
property Count: Integer read GetCount;
property ValsToInt[Index: Integer]: Integer read GetIntegerValue;
property Values[Index: Integer]: Extended read GetValue write SetValue;
property Strings[Index: Integer]: string read Get write Put;
default;
end;
implementation// ##############################################################// # #// # <Delphi Pickup Number from String> ++ Write by 小雨哥 #// # #// ##############################################################{ TPickNumStringHelper }destructor TPickNumStringHelper.Destroy;
begin
if FCount <> 0 then
Finalize(FList^[0], FCount);
FCount := 0;
SetCapacity(0);
inherited Destroy;
end;
function TPickNumStringHelper.Add(const AnyString: string): Integer;var I, J: Integer;
VarStr,temp:string;
FNumValue: Extended;
begin
Result := 1;
temp := AnyString;
while truedo
begin
I := StartNumber(temp);
if I < Length(AnyString) then
begin
J := SepValue(temp, I, VarStr);
if J > 1 then
begin
/// FNumValue: "." value auto to 0 if TryStrToFloat(VarStr, FNumValue) then
Result := AddNumValue(VarStr, FNumValue);
end else
Break;
temp := Copy(temp, I + j - 1, MaxInt);
end else
Break;
end;
end;
function TPickNumStringHelper.AddNumValue(const NumString: string;
NumValue: Extended): Integer;
begin
Result := FCount;
/// This is a faster add action, not test it!!! add is free. InsertItem(Result, NumString, NumValue);
end;
procedure TPickNumStringHelper.Clear;
begin
if FCount <> 0 then
begin
Finalize(FList^[0], FCount);
FCount := 0;
SetCapacity(0);
end;
end;
procedure TPickNumStringHelper.Error(Index: Integer);
function ReturnAddr: Pointer;
asm MOV EAX,[EBP+4] end;
const IndexError = 'index out of bounds (%d)';
begin
raise Exception.CreateFmt(IndexError, [Index]) at ReturnAddr;
end;
function TPickNumStringHelper.Get(Index: Integer): string;
begin
if (Index < 0) or (Index >= FCount) then
Error(Index) else
Result := FList^[Index].FNumString;
end;
function TPickNumStringHelper.GetCapacity: Integer;
begin
Result := FCapacity;
end;
function TPickNumStringHelper.GetCount: Integer;
begin
Result := FCount;
end;
function TPickNumStringHelper.GetIntegerValue(Index: Integer): Integer;
begin
Result := Round(GetValue(Index));
end;
function TPickNumStringHelper.GetValue(Index: Integer): Extended;
begin
Result := 0.0;
if (Index < 0) or (Index >= FCount) then
Error(Index) else
Result := FList^[Index].FNumValue;
end;
procedure TPickNumStringHelper.Grow;var Delta: Integer;
begin
if FCapacity > 128 then
Delta := FCapacity div 4 else
if FCapacity > 32 then
Delta := 16 else
Delta := 4;
SetCapacity(FCapacity + Delta);
end;
procedure TPickNumStringHelper.InsertItem(Index: Integer;const NumString: string;
NumValue: Extended);
begin
if FCount = FCapacity then
Grow;
if Index < FCount then
System.Move(FList^[Index], FList^[Index + 1], (FCount - Index) * SizeOf(TSeriesItem));
with FList^[Index]do
begin
Pointer(FNumString) := nil;
FNumValue := NumValue;
FNumString := NumString;
end;
Inc(FCount);
end;
procedure TPickNumStringHelper.LoadFromFile(const FileName: string);var Stream: TStream;
begin
Stream := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
try LoadFromStream(Stream);
finally Stream.Free;
end;
end;
procedure TPickNumStringHelper.LoadFromStream(Stream: TStream);var Size: Integer;
S: string;
begin
Size := Stream.Size - Stream.Position;
SetString(S, nil, Size);
Stream.Read(Pointer(S)^, Size);
ParseTextStr(S);
end;
procedure TPickNumStringHelper.Put(Index: Integer;
const NumString: string);var NumValue: Extended;
begin
if (Index < 0) or (Index >= FCount) then
Error(Index) else
begin
if TryStrToFloat(NumString, NumValue) then
begin
FList^[Index].FNumString := NumString;
FList^[Index].FNumValue := NumValue;
end;
end;
end;
function TPickNumStringHelper.StartNumber(const S: string): Integer;
begin
Result := 1;
while (Result <= Length(S)) and (not (S[Result] in ['0'..'9', '.']))do
Inc(Result);
end;
function TPickNumStringHelper.StopNumber(const S: string): Integer;
begin
Result := 1;
while (Result <= Length(S)) and (S[Result] in ['0'..'9', '.'])do
Inc(Result);
end;
function TPickNumStringHelper.SepValue(const S: string;
const lPos: integer;
var VarStr: string): Integer;
begin
VarStr := Copy(S, lPos, MaxInt);
Result := StopNumber(VarStr);
if Result > 1 then
begin
VarStr := Copy(S, lPos, Result - 1);
end else
VarStr := '';
end;
procedure TPickNumStringHelper.SetCapacity(NewCapacity: Integer);
begin
ReallocMem(FList, NewCapacity * SizeOf(TSeriesItem));
FCapacity := NewCapacity;
end;
procedure TPickNumStringHelper.ParseTextStr(const AnyString: string);var P, Start: PChar;
S: string;
begin
Clear;
P := Pointer(AnyString);
if P <> nil then
begin
while P^ <> #0do
begin
Start := P;
while not (P^ in [#0, #10, #13])do
Inc(P);
SetString(S, Start, P - Start);
Add(S);
if P^ = #13 then
Inc(P);
if P^ = #10 then
Inc(P);
end;
end;
end;
procedure TPickNumStringHelper.SetValue(Index: Integer;
NumValue: Extended);
begin
if (Index < 0) or (Index >= FCount) then
Error(Index) else
begin
FList^[Index].FNumString := FloatToStr(NumValue);
FList^[Index].FNumValue := NumValue;
end;
end;
end.
代码没有充分测试,请有心人斧正。
 

小雨哥

Unregistered / Unconfirmed
GUEST, unregistred user!
上面代码的提取核心代码如下,如果只是很少量的提取,可以直接使用:/// 定位到数字文本开头function ToNumberHelper(const S: string):Integer;
begin
Result := 1;
while (Result <= Length(S)) and (not (S[Result] in ['0'..'9', '.']))do
Inc(Result);
end;
/// 定位到数字文本结尾function ToStringHelper(const S: string):Integer;
begin
Result := 1;
while (Result <= Length(S)) and (S[Result] in ['0'..'9', '.'])do
Inc(Result);
end;
/// 找到一段提取一段function GetNumValueHelper(const S: string;const lPos:integer;
var VarStr: string):Integer;
begin
VarStr := Copy(S, lPos, MaxInt);
Result := ToStringHelper(VarStr);
if Result > 1 then
VarStr := Copy(S, lPos, Result - 1);
end;
/// 主函数,把文本的数字文本一个个取出来,中间用“|”符号分隔function ParseStrToSeparatorValue(const S: string;
FDelimiter: Char = '|'): string;var I, J: Integer;
NumStr,temp:string;
begin
temp := s;
Result := '';
while truedo
begin
I := ToNumberHelper(temp);
if I < Length(S) then
begin
J := GetNumValueHelper(temp, I, NumStr);
if J > 1 then
begin
if Result ='' then
Result := NumStr else
Result := Result + FDelimiter + NumStr;
end else
Break;
temp := Copy(temp, I + j - 1, MaxInt);
end else
Break;
end;
end;
 
D

dorry

Unregistered / Unconfirmed
GUEST, unregistred user!
还能看到小雨哥写编程手记,赞一个!
 
V

Vsun

Unregistered / Unconfirmed
GUEST, unregistred user!
这个东西比正则表达式还要高效吗?
 
S

szhcracker

Unregistered / Unconfirmed
GUEST, unregistred user!
有空测一下。
 
H

hanxiao555

Unregistered / Unconfirmed
GUEST, unregistred user!
赞一个!有空测一下。
 
Top Bottom