30分:分隔字符串 ( 积分: 0 )

  • 主题发起人 主题发起人 zqssoft
  • 开始时间 开始时间
Z

zqssoft

Unregistered / Unconfirmed
GUEST, unregistred user!
已经Memo1中的文本格式为:
SpSharedRecoContext1 CreateGrammar how
如何根据首大写字母和空格符号,把上述内容分隔显示为如下形式,在memo2中:
Sp
Shared
Reco
Context1
Create
Grammar
how
分隔规则,遇大写字母分隔,遇空格分隔.
 
提供下我的思路 遍历下字符串 查找ASC码大于z 小于等于 Z或者等于' '的字符
 
转一个类似的
问题:英文分割,求最快速度算法 ( 积分:300, 回复:44, 阅读:233 )
分类:Object Pascal ( 版主:menxin, cAkk )
来自:A delphi, 时间:2007-7-15 11:38:00, ID:3810740 [显示:小字体 | 大字体]

If love have lent you twenty thousand tongues,
And every tongue more moving than your own,
Bewitching like the wanton mermaid's songs,
Yet from mine ear the tempting tune is blown;
For know, my heart stands armed in mine ear,
And will not let a false sound enter there,
Lest the deceiving harmony should run
Into the quiet closure of my breast;
And then my little heart were quite undone,
In his bedchamber to be barred of rest.
No, lady, no; my heart longs not to groan,
But soundly sleeps, while now it sleeps alone.
What have you urged that I cannot reprove?
The path is smooth that lead on to danger;
I hate not love, but your device in love
That lends embracement unto every stranger.
You do it for increase: O strange excuse,
When reason is the bawd to lust's abuse!
Call it not love, for Love to heaven is fled
Since sweating Lust on earth usurped his name;
Under whose simple semblance he hath fed
Upon fresh beauty, blotting it with blame;
Which the hot tyrant stains and soon bereaves,
As caterpillars do the tender leaves.


大家好,如何快速的将上面一段英文分割成词,如下显示
If
love
have
lent
you
twenty
thousand
tongues
And
every
tongue
more
moving
than
your
own
Bewitching
like
the
wanton
mermaid's
songs
......

只要单词,有点像切词,要最快速度的方法,不要逐字节判断,那样太慢了。


来自:ufo!, 时间:2007-7-15 12:18:14, ID:3810747
只能逐字节判断的,找到空格,逗号,分号等标点,都替换为回车符就是了。一个循环搞定,是最快的。


来自:A delphi, 时间:2007-7-15 12:23:48, ID:3810750
我目前就是这样,可是稍大一点的文件要几秒钟致10几秒,难以接受啊


来自:ufo!, 时间:2007-7-15 12:38:57, ID:3810751
从网上找找 faststrings 单元,用里面的函数FastReplace试试。
另外,你文件多大?看看瓶颈在哪,是在屏幕显示上,还是字符拷贝,或者查找上消耗了时间,你也可以把代码贴出来让大家看看,如何优化


来自:A delphi, 时间:2007-7-15 12:58:39, ID:3810755
谢谢UFO!,
10k到1MB的文件分割起来太慢了, 以下是我的分割过程
while I < DocSize do begin
Inc(I);
Ch := sDoc;
if Ch in['a'..'z', 'A'..'Z', '0'..'9', '_', '-'] then
Key := Key + AnsiLowerCase(ch)
else if Key <> '' then // 添加一个词
begin
if (Length(Key) = 1) and (Key[1] in ['0'..'9', '_', '-']) then
begin
Key := '';
System.Continue;
end;
ParseKeyWord(Key, TempKeys);
Key := '';
end;
end;// while do


来自:ufo!, 时间:2007-7-15 13:12:07, ID:3810759
您这几个地方改改:
while I < DocSize do begin
Inc(I);
Ch := sDoc;
//if Ch in['a'..'z', 'A'..'Z', '0'..'9', '_', '-'] then
//改为
if Ch in[' ',',',';','.'] then sdoc:= #13
//这样可以减少判断,并且免去了下面的字符串组合操作,字符串重组很耗时
//下面的语句都不要了,这个循环完毕后,单词已经被分割了,可以直接赋值给memo.lines.text来显示了,或者赋值到stringlist.text 做进一步处理。
{ Key := Key + AnsiLowerCase(ch)
else if Key <> '' then // 添加一个词
begin
if (Length(Key) = 1) and (Key[1] in ['0'..'9', '_', '-']) then
begin
Key := '';
System.Continue;
end;
ParseKeyWord(Key, TempKeys);
Key := '';
end;}
end;// while do


来自:ufo!, 时间:2007-7-15 13:14:12, ID:3810761
或者直接改成这样:
for I:= 1 to DocSize do
if sDoc in[' ',',',';','.'] then sdoc:= #13;




来自:A delphi, 时间:2007-7-15 13:50:37, ID:3810769
其实,我这下面的ParseKeyWord还有其他处理,一个文档中不要重复的单词,并提取的唯一单词编号放入一个列表,所以单单替换这些非字母的符号,用正则表达式就行;
另用正则表达式,替换后,分出来的结果还是很慢。


来自:errorcode, 时间:2007-7-16 19:11:39, ID:3810771

const
MAX_BUFCOUNT = 1024;
MAX_HASHSIZE = $FFFF;

type
PStr = ^TStr;
TStr = record
buf: PChar;
len: Integer;
next: PStr;
end;

PStrArray = ^TStrArray;
TStrArray = record
Next: PStrArray;
Strs: array [0..MAX_BUFCOUNT - 1] of TStr;
end;

THashData = Cardinal;

PHash = ^THash;
THash = record
HashData: THashData;
Str: PStr;
Next: PHash;
end;

PHashArray = ^THashArray;
THashArray = record
Next: PHashArray;
Hashs: array [0..MAX_BUFCOUNT - 1] of THash;
end;

PStrHash = ^TStrHash;
TStrHash = array [0..MAX_HASHSIZE - 1] of PHash;

PDocData = ^TDocData;
TDocData = record
SourceDocData: TStr; // 保存源串数据
Word: PStr; // 保存第一个单词指针,第二个单词在next
WordCount: Integer; // 源串中有多少个单词, 通过Word字段访问
Hash: TStrHash; // Hash表
IdleStr: PStr; // 空闲单词指针,from StrBufs
StrBufCount: Integer; // 空闲单词指针数量
StrBufs: PStrArray; // 单词指针缓存,一次性申请N个指针
IdleHash: PHash; // 如上str
HashBufCount: Integer; // 如上strbufcount
HashArrays: PHashArray; // 如上strbufs
end;

TCharToByteValue = array [Char] of Byte;

var
CHAR_2_BYTE: TCharToByteValue; // 区分大小写的值
CHAR_2_BYTE_CASEINSENSITIVE: TCharToByteValue; // 不区分大小写的值
CHAR_2_BYTEs: array [Boolean] of TCharToByteValue;

procedure Init_CHAR_2_BYTEs;
var
I: Char;
begin
for I := #0 to Chr(MAXCHAR - 1) do
begin
CHAR_2_BYTE := Ord(I);
CHAR_2_BYTE_CASEINSENSITIVE := Ord(I);
end;
for I := 'a' to 'z' do
Dec(CHAR_2_BYTE_CASEINSENSITIVE, 32);
CHAR_2_BYTEs[False] := CHAR_2_BYTE;
CHAR_2_BYTEs[True] := CHAR_2_BYTE_CASEINSENSITIVE;
end;

function GetHashValue(const Buf; Len: Integer; HashSeed: Cardinal; IgnoreCase: Boolean = False): Cardinal;
var
P: PChar;
I: Integer;
CharByteArray: ^TCharToByteValue;
begin
Result := HashSeed;
P := @Buf;
CharByteArray := @CHAR_2_BYTEs[IgnoreCase];
for I := 1 to Len do
begin
// 不区分大小写
Result := Result + CharByteArray[P^] * 37;
Inc(P);
end;
end;

function GetStrHashData(AStr: PStr): THashData;
//var
// PreHash: Int64;
// H1, H2: PCardinal;
begin
//Result := HashValue(AStr.Buf^, AStr.len, $DEEDBEEF) and MAX_HASHSIZE;
//H1 := @Int64Rec(Result).Hi;
//H2 := @Int64Rec(Result).Lo;
//H1^ := GetHashValue(AStr.Buf^, AStr.len, 0);
//H2^ := GetHashValue(AStr.Buf^, AStr.len, $DEEDBEEF);
Result := GetHashValue(AStr.Buf^, AStr.len, $DEEDBEEF);
end;

function GetIdleHash(ADocData: PDocData): PHash;
var
HashArray: PHashArray;
begin
if ADocData.HashBufCount <= 0 then
begin
HashArray := AllocMem(SizeOf(THashArray));
HashArray.Next := ADocData.HashArrays;
ADocData.HashArrays := HashArray;
ADocData.HashBufCount := MAX_BUFCOUNT;
ADocData.IdleHash := @HashArray.Hashs[0];
end;
Result := ADocData.IdleHash;
Inc(ADocData.IdleHash);
Dec(ADocData.HashBufCount);
end;

// 在Hash表中检查是否已存在该WORD,存在则return False(Hash表未修改)
function AddHash(ADocData: PDocData; AStr: PStr): Boolean;
var
IsFind: Boolean;
HashData: THashData;
HashValue: Word;
Hash, NewHash: PHash;
begin
HashData := GetStrHashData(AStr);
HashValue := HashData and MAX_HASHSIZE;
//HashValue := GetHashValue(HashData, SizeOf(HashData), 0) and MAX_HASHSIZE;
Hash := ADocData.Hash[HashValue];

IsFind := False;
while Hash <> nil do
begin
IsFind := (Hash.Str.len = AStr.len) and (Hash.HashData = HashData);
if IsFind then break;
Hash := Hash.Next;
end;

Result := not IsFind;
if Result then
begin
Hash := ADocData.Hash[HashValue];
NewHash := GetIdleHash(ADocData);
Newhash.Str := AStr;
NewHash.HashData := HashData;
if Hash = nil then
NewHash.Next := nil
else
NewHash.Next := Hash;
ADocData.Hash[HashValue] := NewHash;
end;
end;

function GetIdleStr(ADocData: PDocData): PStr;
var
StrArray: PStrArray;
begin
if ADocData.StrBufCount <= 0 then
begin
StrArray := AllocMem(SizeOf(TStrArray));
StrArray.Next := ADocData.StrBufs;
ADocData.StrBufs := StrArray;
ADocData.StrBufCount := MAX_BUFCOUNT;
ADocData.IdleStr := @StrArray.Strs[0];
end;
Result := ADocData.IdleStr;
Inc(ADocData.IdleStr);
Dec(ADocData.StrBufCount);
end;

function InitDocData(const AFileName: string; var ADocData: PDocData): Boolean;
var
FileHandle: THandle;
begin
FileHandle := FileOpen(AFileName, fmShareDenyNone);
Result := FileHandle <> INVALID_HANDLE_VALUE;
if not Result then Exit;
New(ADocData);
try
ADocData.SourceDocData.len := FileSeek(FileHandle, 0, FILE_END);
FileSeek(FileHandle, 0, FILE_BEGIN);
ADocData.SourceDocData.buf := AllocMem(ADocData.SourceDocData.len);
FileRead(FileHandle, ADocData.SourceDocData.buf^, ADocData.SourceDocData.len);
finally
CloseHandle(FileHandle);
end;
ADocData.WordCount := 0;
ADocData.Word := nil;
ADocData.IdleStr := nil;
ADocData.StrBufCount := 0;
ADocData.StrBufs := nil;
FillChar(ADocData.Hash, SizeOf(ADocData.Hash), 0);
ADocData.IdleHash := nil;
ADocData.HashBufCount := 0;
ADocData.HashArrays := nil;
end;

procedure FreeDocData(var ADocData: PDocData);
var
StrArray, Next: PStrArray;
HashArray, NextHash: PHashArray;
begin
if ADocData.SourceDocData.len > 0 then
begin
FreeMem(ADocData.SourceDocData.buf);
ADocData.SourceDocData.len := 0;
end;
StrArray := ADocData.StrBufs;
while StrArray <> nil do
begin
Next := StrArray.Next;
FreeMem(StrArray);
StrArray := Next;
end;
ADocData.StrBufCount := 0;
ADocData.StrBufs := nil;
ADocData.IdleStr := nil;

HashArray := ADocData.HashArrays;
while HashArray <> nil do
begin
NextHash := HashArray.Next;
FreeMem(HashArray);
HashArray := NextHash;
end;
ADocData.HashBufCount := 0;
ADocData.HashArrays := nil;
ADocData.IdleHash := nil;
Dispose(ADocData);
ADocData := nil;
end;

procedure SplitterDocData(ADocData: PDocData);
var
DocLen: Integer;
DocPtr, StartPtr, CurrPtr: PChar;

FirstWord, LastWord, CurrWord: PStr;
begin
DocPtr := ADocData.SourceDocData.buf;
DocLen := ADocData.SourceDocData.len;
CurrPtr := DocPtr;

FirstWord := GetIdleStr(ADocData);
CurrWord := FirstWord;
ADocData.Word := FirstWord;
LastWord := nil;
while CurrPtr - DocPtr < DocLen do
begin
while not (CurrPtr^ in ['a'..'z', 'A'..'Z', '0'..'9', '_', '-', #0]) do Inc(CurrPtr);
if CurrPtr - DocPtr > DocLen then
break;
StartPtr := CurrPtr;
while CurrPtr^ in ['a'..'z', 'A'..'Z', '0'..'9', '_', '-'] do Inc(CurrPtr);

if (CurrPtr - StartPtr = 1) and (StartPtr^ in ['0'..'9', '_', '-']) then
begin
Inc(CurrPtr);
Continue;
end;

CurrWord.buf := StartPtr;
CurrWord.len := CurrPtr - StartPtr;
CurrWord.next := nil;
if AddHash(ADocData, CurrWord) then
begin
if LastWord <> nil then
LastWord.next := CurrWord;
LastWord := CurrWord;
CurrWord := GetIdleStr(ADocData);
Inc(ADocData.WordCount);
end;
Inc(CurrPtr);
end;
end;

procedure TForm1.Button1Click(Sender: TObject);
var
MyWord: PStr;
DocData: PDocData;
TempStr: string;
StartSplitterTime, EndSplitterTime, StartWorkerTime, EndWorkerTime: Cardinal;
begin
StartWorkerTime := GetTickCount;
// 初始数据
if not InitDocData('c:/test2.txt', DocData) then
raise Exception.Create(SysErrorMessage(GetLastError));

// 分隔单词
StartSplitterTime := GetTickCount;
SplitterDocData(DocData);
EndSplitterTime := GetTickCount;

// 显示
MyWord := DocData.Word;
Memo1.Lines.BeginUpdate;
while MyWord <> nil do
begin
SetString(TempStr, MyWord.buf, MyWord.len);
Memo1.Lines.Add(TempStr);
MyWord := MyWord.next;
end;
Memo1.Lines.EndUpdate;

EndWorkerTime := GetTickCount;

Caption := Format('ALL Time: %d, Splitter Time: %d, WordCount: %d', [
EndWorkerTime - StartWorkerTime, EndSplitterTime - StartSplitterTime, DocData.WordCount]);
// Free
FreeDocData(DocData);
end;

initialization
Init_CHAR_2_BYTEs;


来自:A delphi, 时间:2007-7-15 14:01:39, ID:3810773
看看楼上的代码先


来自:ufo!, 时间:2007-7-15 14:10:39, ID:3810774
直接替换比正则表达式快,处理的规则越少越快。
你可以把处理后的结果赋值到一个Tstringlist1.text
注意先设置:stringlist1.Sorted := True;
stringlist1.Duplicates:= dupIgnore;

这样,里面的内容就是没有重复的了。
字符串处理方面,delphi已经有了不少现成的东西,合理利用这些,可以做到事半功倍。


来自:ufo!, 时间:2007-7-15 15:49:30, ID:3810780
在我的赛扬1.7处理器,512M内存的笔记本上
处理一个1.77M 中英文混合的文本文件
从开始载入文件到显示完毕,耗时约 3 秒
处理后的不重复单词数是 2564个
代码如下:

procedure TForm1.Button1Click(Sender: TObject);
var ss: string;
st1: Tstringlist;
iFileHandle,i: integer;
begin
iFileHandle:=Fileopen('e:/ss2.txt',0);
try
i:=fileseek(ifilehandle,0,2);;
setlength(ss, i);
FileSeek(iFileHandle,0,0);
Fileread(iFileHandle,ss[1],i);
finally
FileClose(iFileHandle);
end;
for i:= 1 to length(ss) do
if ss in['a'..'z','A'..'Z','0'..'9','''','-','_'] then
Continue
else
ss:= #13;
st1:= Tstringlist.Create;
st1.Sorted:= true;
st1.Duplicates := dupIgnore;
st1.Text:= ss;
//显示
richedit1.Lines.BeginUpdate;
for i:= 0 to st1.Count-1 do
richedit1.Lines.Append(st1.Strings);
richedit1.Lines.EndUpdate;
st1.free;
end;


来自:hs-kill, 时间:2007-7-15 16:52:52, ID:3810794
试试我这个,测试一个7.9M的文本文件,循环查找大概用3.4秒:
type
ttst=record
l:integer; {单词长度}
s:string;
end;
ptst=^ttst;

...

procedure TForm1.Button1Click(Sender: TObject);
var
x,l,k,
i,m:integer;
fs:tfilestream;
sm:tstringstream;
c:char;
ls:tlist;
p:ptst;
bi,bs:boolean;
s:string;
tc:cardinal;
begin
ls:=tlist.Create;
fs:=tfilestream.Create('d:/test.txt',fmOpenRead );
sm:=tstringstream.Create('');
try
tc:=gettickcount;
fs.Position:=0;
sm.CopyFrom(fs,fs.Size);
sm.Position:=0;
x:=0;
i:=0;
bs:=true;
while sm.Position<sm.Size do
begin
sm.Read(c,sizeof(char));
if not (c in['a'..'z','A'..'Z','0'..'9','-','''']) then {那些字符算单词里的,自己添加}
begin
if not bs then {如果前一个字符也是无效字符,则跳过读取}
begin
l:=sm.Position-x-1;
if l>0 then
begin
setlength(s,l);
sm.Position:=x;
sm.Read(pchar(s)^,l); {读出当前字符}
sm.Position:=sm.Position+1;
x:=sm.Position;
k:=-1;
bi:=false;
for m:=0 to ls.Count-1 do {检查重复}
begin
if (ptst(ls[m]).l=l) and SameText(ptst(ls[m]).s,s) then
{这里不区分大小写,如果要区分,改用AnsiSameStr}
begin
bi:=true;
break;
end
else if ptst(ls[m]).l>l then
begin
k:=m;
break;
end;
end;

if not bi then
begin
new(p);
p.l:=l;
p.s:=s;
if k>-1 then
ls.Insert(k,p)
else
ls.Add(p);
end;
end;
end;
x:=sm.Position;
bs:=true;
end
else
bs:=false;
end;
self.Caption:=inttostr(gettickcount-tc); {显示查找占用时间}
listbox1.Items.BeginUpdate;
listbox1.Items.Clear;
for i:=ls.Count-1 downto 0 do
begin
ListBox1.Items.Add(ptst(ls).s+' ------ '+inttostr(ptst(ls).l));
dispose(ptst(ls));
ls.Delete(i);
end;
listbox1.Items.EndUpdate;
finally
fs.Free;
sm.Free;
ls.Free;
end;
end;



来自:wqhatnet, 时间:2007-7-15 17:00:35, ID:3810796
var
i:integer;
abc:String;
list:TStrings;
begin
list:=Tstringlist.Create;
abc:='If love have lent you twenty thousand tongues,';
list.text :=StringReplace(abc,' ',#13#10,[rfReplaceAll]);
for i:=0 to list.count-1 do
showMessage(list.strings);
list.Destroy;
end;



来自:hs-kill, 时间:2007-7-15 17:19:27, ID:3810802
汗......刚才搞错了,我那个文件里都是重复单词
UFO的速度比我的快多了......郁闷


来自:A delphi, 时间:2007-7-15 18:12:11, ID:3810952

哈哈,都不错都收下了,星期一测试,到时结贴
还有没有更快的


来自:ufo!, 时间:2007-7-15 18:15:45, ID:3810953
to:wqhatnet,
如果只有空格,自然好办,但文件内可能有括号,标点符号等各种复杂情况。
如果替换多次,重复工作多了,效率自然不高。

to:hs-kill,
你在循环内检测重复单词,效率很低。检查重复单词在有序表的情况下,用二分法查找才能提高速度,否则,无序的情况下需要做全部单词扫描,平均效率很低。


来自:hs-kill, 时间:2007-7-15 18:37:43, ID:3810958
恩......我刚检查了下,其实检测重复单词效率不是特别差......当然,二分法也许能快点,我没测试

实际上我的代码慢在找出单词后就已经按单词长度进行排序了,这样我在检测重复单词的时候并不是全部循环,只检测了长度相等的单词
由于我是从短向长排列,而同样长度的单词数量与单词长度是成正比的,所以实际检测部分是很快的

真正的慢是在ls.Insert(k,p)这句,看了下tlist的代码,每次insert都是把后面所有的内存块move,几万次的move效率还是很低的
可是郁闷的是,我要是想排序提高检测重复单词的速度,又必须使用插入......哎 头痛

现在正打算看看抛弃tlist,改用个双向链表会不会有所帮助


来自:hs-kill, 时间:2007-7-15 19:53:57, ID:3810967
郁闷......增加了个索引,使用双向链表........查5.8M的文件5W单词还是要18秒.....
UFO的需要7~8秒......


来自:errorcode, 时间:2007-7-15 20:17:26, ID:3810971
晕,查找在大数据量的情况下当然是用hash,一般情况下,像这种不到10M的查询都在1s以下。。。。
有时间再帮LZ写上。


来自:A delphi, 时间:2007-7-15 21:20:11, ID:3810982
好好,大家多多提建议,可以另外开贴加分


来自:A delphi, 时间:2007-7-15 21:24:48, ID:3810984
to errorcode

用hash如何做呢?给点例子


来自:ufo!, 时间:2007-7-15 22:07:23, ID:3810994
不是极度追求速度,没必要使用哈希表,哈希是用空间(内存)换时间。
如果一定要用,delphi提供了Thashedstringlist类可以使用
在IniFiles单元内,注意uses IniFiles单元。

这么小的数据量,hash表的作用不明显,因为hash的构造需要时间,而且使用hash还要考虑到你的数据类型,是否冲突多,如果冲突多,那么需要调整散列算法,否则效率也很低的。



来自:A delphi, 时间:2007-7-15 22:32:29, ID:3811000
{
测试结果:1.5M的文档,分出17475个单词,10352(ms)
因为中途处理了两次,一次只5000(ms)左右,目前是最快的了,但是这样的处理
结果还是不够理想。


procedure TForm1.Button3Click(Sender: TObject);
var
EnDiv: TPartList;
begin

EnDiv := TPartList.Create(self.Memo1.Text);
try
if EnDiv.DivList <> nil then
begin
self.Memo2.Text := EnDiv.DivList.Text;
self.Caption := Format('UseTime: %s, 总单词数: %d, 总长度:%d',
[FormatDateTime('hh:nn:ss.zzz', EnDiv.UseTime), EnDiv.DivList.Count, EnDiv.DocSize]);
end;
finally
FreeAndNil(EnDiv);
end;
end;

}
unit EN_Part;

interface

uses
Windows, SysUtils, Classes;


type
TPartList = class
private
FUseTick: Cardinal; //处理时间(毫秒)
FUseTime: TDateTime; //处理时间
FDocument: string; //文档内容
FDocSize: Integer; //大小
FDivList: TStringList; //结果

procedure LoadFile(const FileName: string);//加载文件
procedure ParseDiv; //处理分词
public
constructor Create(const sDoc: string; const FileName: string = '');
destructor Destroy; override;
//文档内容
property Document: string read FDocument;
//结果词
property DivList: TStringList read FDivList;
//文档大小
property DocSize: Integer read FDocSize;
//处理时间
property UseTick: Cardinal read FUseTick;

property UseTime: TDateTime read FUseTime;
end;



implementation

{ TPartList }

constructor TPartList.Create(const sDoc, FileName: string);
begin
inherited Create;
FDivList := nil;
FDocSize := 0;
FUseTime := 0;

FUseTime := Now;
FUseTick := Windows.GetTickCount;
if FileExists(FileName) then
LoadFile(FileName)
else if Trim(sDoc) <> '' then
begin
FDocSize := Length(sDoc);
SetLength(FDocument, FDocSize);
System.Move(sDoc[1], FDocument[1], FDocSize);
end
else raise Exception.Create('没有发现可用文档内容或文件名。');

self.FDivList := TStringList.Create;
self.ParseDiv;
FUseTick := Windows.GetTickCount - FUseTick;
FUseTime := Now - FUseTime;
end;

destructor TPartList.Destroy;
begin
SetLength(FDocument, 0); //释放内存string
if FDivList <> nil then begin
FDivList.Clear;
FreeAndNil(FDivList); //释放列表
end;
inherited Destroy;
end;

{$DEFINE NEW_PARSEDIV}

{$IFDEF NEW_PARSEDIV}
procedure TPartList.ParseDiv;
var
I: Integer;
s1: string;
begin
s1 := '';
try
for I := 1 to FDocSize do
if FDocument in ['a'..'z', 'A'..'Z', '0'..'9', '-', '_'] then
System.Continue
else FDocument := #13;

s1 := FDocument;
for I := 1 to Length(s1) do
if s1 in ['a'..'z', 'A'..'Z'] then
System.Continue
else s1 := #13;

FDivList.Sorted := True;
FDivList.Duplicates := dupIgnore;
FDivList.Text := s1 + FDocument;
finally
s1 := '';
end;
end;

{$ELSE}
procedure TPartList.ParseDiv;
function SubParse(const S: string): string;
var
I: Integer;
begin
Result := '';
for I := 1 to Length(S) do
if S in ['a'..'z'] then //到这儿只有小写了
Result := Result + S
else
Result := Result + #13;

//如果只是一个单词
if Result = S then Result := '';
if (Result <> '') and (Result[Length(Result)] <> #13)then
Result := Result + #13;
end;

var
I: Integer;
s1, s2: string;
begin
s1 := '';
s2 := '';
try

for I := 1 to FDocSize do
if FDocument in ['a'..'z', 'A'..'Z', '0'..'9', '-', '_'] then
s1 := s1 + lowercase(FDocument)
else if s1 <> '' then begin
s2 := s2 + s1 + #13;
//s2 := s2 + SubParse(s1);
s1 := '';
end;

FDivList.Sorted := True;
FDivList.Duplicates := dupIgnore;
FDivList.Text := s2;
finally
s1 := '';
s2 := '';
end;

end;
{$ENDIF}

procedure TPartList.LoadFile(const FileName: string);
var
hFile: THandle;
len: Integer;
begin
hFile := FileOpen(FileName, SysUtils.fmOpenRead);

if hFile = INVALID_HANDLE_VALUE then
//打开文件句柄失败,抛出错误
raise Exception.Create(SysUtils.SysErrorMessage(Windows.GetLastError));

try
FDocSize := FileSeek(hFile, 0, FILE_END);
FileSeek(hFile, 0 ,FILE_BEGIN);

SetLength(self.FDocument, FDocSize);
len := FileRead(hFile, Pointer(FDocument)^, FDocSize);
if len <> FDocSize then
begin
//读取文件数据失败,抛出错误
raise Exception.Create(SysUtils.SysErrorMessage(Windows.GetLastError));
end;
finally
FileClose(hFile);
end;
end;

end.
 

Similar threads

回复
0
查看
1K
不得闲
S
回复
0
查看
3K
SUNSTONE的Delphi笔记
S
S
回复
0
查看
2K
SUNSTONE的Delphi笔记
S
后退
顶部