delphi 英文分词(50分)

故居 · 2008-05-19

英文在书写上的特殊性，使得分词算法相对中文来说，简单得多。一般来说，我们可以通过单词与单词之间的空格，以及标点符号来完成这个分词过程。

兄弟想学delphi，无奈水平太次，求有心的xdjm给编一个例程，不要三方控件，把memo的txt，忽略中文，分出英文单词：

例如：

我大声说："Hello world! Hello delphi! 我要学delphi。"

为：

Hello 2
world 1
delphi 2

最好能统计出出现次数。

多谢，多谢。

onyliu · 2008-05-20

貌是没啥哎
先去掉所有非英文
再以空格为终点循环逐个放到数组中统计不就完了

故居 · 2008-05-20

是的，onyliu兄，兄弟就是想看看标准的写法是什么样的。我自己写的总是感觉不太顺畅

creation-zy · 2008-05-20

var
Str,WordStr:String;
Words:TStrings;
procedure CountWord;
var
i,c:Integer;
begin
if WordStr='' then exit;
i:=Words.IndexOf(WordStr)
//在历史单词表中检索
if i<0 then
Words.AddObject(WordStr,TObject(1)) //第一次遇到的单词——利用Objects存放计数
else //曾经遇到过同样的单词——增加计数
Words.Objects:=TObject(Integer(Words.Objects)+1);
WordStr:=''
//完成计数，清空当前单词信息
end;
var
i:Integer;
begin
Str:='Hello world! Hello delphi! 我要学delphi。'
WordStr:='';
Words:=TStringList.Create;
for i:=1 to Length(Str) do
begin
if Str in ['a'..'z','A'..'Z'] then
WordStr:=WordStr+Str
else //断字
CountWord;
end;
CountWord
//考虑到最后一个字符是英文字符的情况，在循环结束后进行断字
for i:=0 to Words.Count-1 do
Memo1.Lines.Add(Words+' '+IntToStr(Integer(Words.Objects)));
Words.Free;
end;

——没有编译，大概应该是这样了

linuxping · 2008-05-24

unit treeCutWord;

interface
uses Windows ,SysUtils,StrUtils, Variants, Classes,Dialogs,DB, ADODB,untDict,RecgName;
const Hanzixm='Hanzixm.txt';
Dict='Words.txt'
//汉字费用表名和字典名
const Spiliter=' '
//词间的分隔符
Red='';
Green='';
Yellow='';
Grey='';
color1='';
color2='';
NameColor='';
EndColor=' ';

MaxWordLength=8
//词的最大长度

type TInterfacedCutWord=class(TRecgName)
private
FDict:TInterfacedDict;
SResult:string
//分词结果
//StatList:TList;//保存分词（统计）结果
//strList:TStringList
//每个句子分词后的结果
PassTime:Cardinal
//分词耗时
ByteLen:LongWord
//文件字节数
CharLen:LongWord
//文件字的个数

function NodesCount:LongWord;
procedure SetPassTime(V:Cardinal);
procedure SetByteLen(V:LongWord);
procedure SetCharLen(V:LongWord);

procedure SetDict(ADict:TInterfacedDict);

protected
procedure SentenceToWords(var S:string);virtual;abstract
//对句子分词
//function ReturnWord(var AStr:string):string;virtual;abstract
//返回一个词
public
procedure FileToSentence(var S:string);virtual
//文件打断成句子
published
procedure ReadToFile(APath:string)
//文件读出到字符串
procedure SaveToFile(Apath:string)
//分词的结果保存到文件
function GetResult:string;
property Count:LongWord read NodesCount;
property ConsumeTime:Cardinal read PassTime {write SetPassTime default 0}
//只读属性
property ByteLenOfFile:LongWord read ByteLen {write SetByteLen default 0};
property CharLenOfFile:LongWord read CharLen {write SetCharLen default 0};
property Dict:TInterfacedDict read FDict write SetDict;

end;

type TOrderMaxCutWord=class(TInterfacedCutWord)
private
procedure SentenceToWords(var S:string);override
//对句子分词
public
destructor Destroy;override;

end;

type TConverseMaxCutWord=class(TInterfacedCutWord)
private

procedure SentenceToWords(var S:string);override
//对句子分词
public
destructor Destroy;override;

end;

type TOrderMinCutWord=class(TInterfacedCutWord)
private
procedure SentenceToWords(var S:string);override
//对句子分词
public
destructor Destroy;override;

end;

type TReverseMinCutWord=class(TInterfacedCutWord)
private
procedure SentenceToWords(var S:string);override
//对句子分词
public
destructor Destroy;override;
end;

const MaxWordCount=200000;

type TInstantOfWord=record
iOffset, //起点位置
iLength:Integer
//词长度
dFee, //当前词费用
dSumFee:Single
//路径总费用
iGoodPrev:Integer
//最佳前趋词序号
end;

type TWordInSentences=array[0..99] of TInstantOfWord
//假定最多100个侯选词

type TProbabilityCutWord=class(TInterfacedCutWord)
private
FWordInSentence:TWordInSentences
//一个句子里的所有候选词.

function FindWordInDict(S: string): TCharTreeNode;virtual
//S是否是一个词
function GetTempWord(S:AnsiString):Integer
//获取所有侯选词
procedure GetPrev(i:Integer)
//取得序号为i的词的最佳左临词
procedure SentenceToWords(var S:string);override
//对句子分词
public
destructor Destroy;override;
end;

type TFactory=class
function CreateOrderMaxCutWord(ANameDictPath,ADictPath:string):TInterfacedCutWord;
function CreateOrderMinCutWord(ANameDictPath,ADictPath:string):TInterfacedCutWord;
function CreateReverseMaxCutWord(ANameDictPath,ADictPath:string):TInterfacedCutWord;
function CreateReverseMinCutWord(ANameDictPath,ADictPath:string):TInterfacedCutWord;
end;

implementation
uses untConst;

{ TCutWord }

destructor TOrderMaxCutWord.Destroy;
begin
if Assigned(Dict) then
TDict(Dict).ReleaseInstance;
inherited;
end;

procedure TOrderMaxCutWord.SentenceToWords(var S: string);
var
SPart,sWord,sTemp:string;
i:Integer;
nd,nd1:TCharTreeNode;
begin
sTemp:='';
while (S<>'') do
begin
i:=0;
nd:=nil;
repeat
SPart:=Copy(S,i*2+1,2)
//依次取一个字
nd1:=nd
//保存上一次的值
nd:=FDict.FindCharacter(SPart,nd);
Inc(i,1);
until ((nd=nil) or (i*2+2>Length(S)))
//直到没有匹配到或字串取完为止

if (i*2+2>Length(S)) and (nd<>nil) then //字串取完了
sWord:=FDict.Trace(nd)
else if (nd1<>nil) then //没有匹配到，向后回溯
sWord:=FDict.Trace(nd1)
else raise Exception.CreateFmt(StrCharsetEncodingError,[SPart])

if sword='' then sWord:=Copy(S,1,2)
//单字词

Delete(S,1,Length(sWord));
if Length(Sword)>2 then
if sTemp='' then
SResult:=SResult+sWord+' ' //分词结果
else
begin
SResult:=SResult+color2+CheckName(sTemp,NameColor,EndColor)+endColor+sWord+' ';
sTemp:='';
end
else
begin
{SResult:=SResult+color2+sWord+endColor
}
sTemp:=sTemp+sWord;
end;
end;
if sTemp<>'' then
SResult:=SResult+color2+CheckName(sTemp,NameColor,EndColor)+endColor+' ';
end;

{ TInterfacedCutWord }

procedure TInterfacedCutWord.FileToSentence(var S: string);
var
s1,s2:string;
i,dd:integer;
Ch,preCh,sucCh:byte;
isHZ:boolean;
begin
SResult:='';
while(S<>'') do
begin
s2:='';
ch:=byte(s[1]);
if ch<128 then //英文字符
begin
i:=2;
dd:=length(S);
while((i<=dd) and ((byte(s))<128) and ((byte(s))<>10) and ((byte(s))<>13)) do //回车或换行符
inc(i)
//如果当前词的后一个词也是英文词(或空格)，则累加。

if {(ch<>32) and }(ch<>10) or (ch<>13) then // 如果不是西文空格或换行或回车符
SResult:=SResult+Yellow+leftbstr(S,i-1)+endcolor
else //如果是回车或换行符
// if (ch=10) or (ch=13) then
SResult:=SResult+leftbstr(S,i-1);
delete(S,1,i-1);
continue;
end
else //非英文
begin
if (ch<176) then
begin
i:=1;
dd:=length(S);
preCh:=byte(S);
sucCh:=byte(S[i+1]);
isHZ:=(i<=dd) and (preCh<176) and (preCh>=161)
//非简体。一些奇怪的符号或繁体。
isHZ:=isHZ and (not((preCh=161) and ((sucCh>=162) and (sucCh<=168))));
isHZ:=isHZ and (not((preCh=161) and ((sucCh>=171) and (sucCh<=191))));
isHZ:=isHZ and (not((preCh=163) and ((sucCh=172) or (sucCh=161)) or (sucCh=168) or (sucCh=169) or (sucCh=186) or (sucCh=187) or (sucCh=191)));
while (isHZ) do
begin
inc(i,2);
preCh:=byte(S);
sucCh:=byte(S[i+1]);
isHZ:=(i<=dd) and (preCh<176) and (preCh>=161)
//非简体。一些奇怪的符号或繁体。
isHZ:=isHZ and (not((preCh=161) and ((sucCh>=162) and (sucCh<=168))));
isHZ:=isHZ and (not((preCh=161) and ((sucCh>=171) and (sucCh<=191))));
isHZ:=isHZ and (not((preCh=163) and ((sucCh=172) or (sucCh=161)) or (sucCh=168) or (sucCh=169) or (sucCh=186) or (sucCh=187) or (sucCh=191)));
end;
if i=1 then inc(i,2);
if (not((ch=161) and (sucCh=161))) then SResult:=SResult+color1+leftbstr(S,i-1)+endColor;
delete(S,1,i-1);
continue;
end;
end
//end 'if ...else...'

i:=1;
while (CharLength(S,i)<>2) do
Inc(i,CharLength(S,i));
if i<>1 then
begin
SResult:=SResult+Red+copy(S,1,i-1)+EndColor;
Delete(S,1,i-1);
end;

{汉字GBK范围(GBK与GB2312是兼容的)：
1st byte 2nd byte
0x81~0xfe 0x40~0x7e and 0x80~0xfe }

//根据GBK码 ,进一步剔除非汉字字符
{i:=1;
while (not ((((Byte(s)>=$81) and (Byte(S)<=$FE)) and (((Byte(S[i+1])<=$40) and (Byte(S[i+1])>=$7E)) or ((Byte(s[i+1])<=$80) and (Byte(s[i+1])>=$FE))))) ) do
inc(i,2);

if i<>1 then
begin
SResult:=SResult+copy(S,1,i-1)+Spiliter;
Delete(S,1,i-1);
end
}

//处理汉字
i:=3;
dd:=length(S);
while((i<=dd) and ((byte(s))>=176)) do
if ((byte(S))=32) then
begin
S:=copy(S,1,i-1)+copy(S,i+1,length(S)-i)
//删除空格
dec(dd);
end
else
inc(i,2)
//累加汉字

s1:=leftbstr(S,i-1);
SentenceToWords(s1)
//对串进行处理
delete(S,1,i-1);
end
//end 'while' 最外层循环结束

end;

function TInterfacedCutWord.GetResult: string;
begin
SResult:='<center>本程序由胃痛抽烟制作。</center><center><a href="mailto:wangpingdejiejie@21cn.com">发邮件给我</a></center>'+SResult;
SResult:='<html><head><meta http-equiv=Content-Type content="text/html
charset=gb2312"><title>分词结果</title></head> <body>'+SResult;
Result:=SResult+'</body></html>';
end;

function TInterfacedCutWord.NodesCount: LongWord;
begin
Result:=FDict.Count;
end;

procedure TInterfacedCutWord.ReadToFile(APath: string);
var
S,line:string;
F:TextFile;
i:Cardinal;
begin
S:='';
ClearNames
//清空保存姓名的字符串列表
try
AssignFile(F,APath);
Reset(F);
while (not Eof(F)) do
begin
Readln(F,line);
S:=S+line+#10#13;
end;

SetByteLen(Length(S))
//文件字节数
SetCharLen(ByteToCharLen(S, Length(S)))
//文件字总数

i:=GetTickCount
//开始记时
FileToSentence(S);
SetPassTime(GetTickCount-i);
finally
CloseFile(F);
end;

end;

procedure TInterfacedCutWord.SaveToFile(Apath: string);
begin

end;

{procedure TInterfacedCutWord.SentenceToWords(var S: string);
begin

end;}

procedure TInterfacedCutWord.SetByteLen(V: LongWord);
begin
ByteLen:=V;
end;

procedure TInterfacedCutWord.SetCharLen(V: LongWord);
begin
CharLen:=V;
end;

procedure TInterfacedCutWord.SetDict(ADict: TInterfacedDict);
begin
if Assigned(ADict) then
FDict:=ADict
else
begin
raise Exception.Create(StrNoCreateDictError);
Halt;
end;
end;

procedure TInterfacedCutWord.SetPassTime(V: Cardinal);
begin
PassTime:=v;
end;

{ TConverseCutWord }

destructor TConverseMaxCutWord.Destroy;
begin
if Assigned(FDict) then
TReverseDict(FDict).ReleaseInstance;
inherited;
end;

procedure TConverseMaxCutWord.SentenceToWords(var S: string);
var
SPart,sWord,sTemp,str,TempResult:string;
i:Integer;
nd,nd1:TCharTreeNode;
begin
sTemp:='';
TempResult:='';

str:=AnsiReverseString(S);

while (Str<>'') do
begin
i:=0;
nd:=nil;
repeat
SPart:=Copy(Str,i*2+1,2)
//依次取一个字
nd1:=nd
//保存上一次的值
nd:=FDict.FindCharacter(SPart,nd);
Inc(i,1);
until ((nd=nil) or (i*2+2>Length(Str)))
//直到没有匹配到或字串取完为止

if (i*2+2>Length(Str)) and (nd<>nil) then //字串取完了
sWord:=FDict.Trace(nd)
else if (nd1<>nil) then //没有匹配到，向后回溯
sWord:=FDict.Trace(nd1)
else raise Exception.CreateFmt(StrCharsetEncodingError,[SPart])

if sword='' then sWord:=Copy(Str,1,SysUtils.CharLength(Str,1))
//单字词

Delete(Str,1,Length(sWord));
if Length(Sword)>2 then
if sTemp='' then
begin
//ShowMessage(Sword);
TempResult:=AnsiReverseString(sWord)+' '+TempResult
//分词结果
end
else
begin
TempResult:=AnsiReverseString(sWord)+' '+color2+CheckName(sTemp,NameColor,EndColor)+endColor+TempResult;
sTemp:='';
end
else
begin
{SResult:=SResult+color2+sWord+endColor
}
sTemp:=sWord+sTemp;
end;
end;
if sTemp<>'' then
TempResult:=color2+CheckName(sTemp,NameColor,EndColor)+endColor+' '+TempResult;
SResult:=SResult+TempResult;
end;
{ TOrderMinCutWord }

destructor TOrderMinCutWord.Destroy;
begin
if Assigned(FDict) then
TDict(Dict).Free;
inherited;
end;

procedure TOrderMinCutWord.SentenceToWords(var S: string);
var
SPart,sWord,sTemp:string;
i:Integer;
nd,nd1:TCharTreeNode;
begin
sTemp:='';
while (S<>'') do
begin
i:=0;
nd:=nil;
repeat
SPart:=Copy(S,i*2+1,2)
//依次取一个字
nd1:=nd
//保存上一次的值
nd:=FDict.FindCharacter(SPart,nd);
Inc(i,1);
until ((nd=nil) or (nd.IsWordEnd) or (i*2+2>Length(S)))
//直到匹配到一个词或字串取完为止

if (nd<>nil) and ((i*2+2>Length(S)) or (nd.IsWordEnd)) then //字串取完了
sWord:=FDict.Trace(nd)
else if (nd=nil) and (nd1<>nil) then //没有匹配到，向后回溯
begin
sWord:=FDict.Trace(nd1);
end
else raise Exception.CreateFmt(StrCharsetEncodingError,[SPart]);

if sword='' then sWord:=Copy(S,1,2)
//单字词

Delete(S,1,Length(sWord));
if Length(Sword)>2 then
if sTemp='' then
SResult:=SResult+sWord+' ' //分词结果
else
begin
SResult:=SResult+color2+CheckName(sTemp,NameColor,EndColor)+endColor+sWord+' ';
sTemp:='';
end
else
begin
{SResult:=SResult+color2+sWord+endColor
}
sTemp:=sTemp+sWord;
end;
end;
if sTemp<>'' then
SResult:=SResult+color2+CheckName(sTemp,NameColor,EndColor)+endColor+' ';
end;

{ TReverseMinCutWord }

destructor TReverseMinCutWord.Destroy;
begin
if Assigned(FDict) then
TDict(Dict).Free;
inherited;
end;

procedure TReverseMinCutWord.SentenceToWords(var S: string);
var
SPart,sWord,sTemp,str,TempResult:string;
i:Integer;
nd,nd1:TCharTreeNode;
begin
sTemp:='';
TempResult:='';

str:=AnsiReverseString(S);

while (Str<>'') do
begin
i:=0;
nd:=nil;
repeat
SPart:=Copy(Str,i*2+1,2)
//依次取一个字
nd1:=nd
//保存上一次的值
nd:=FDict.FindCharacter(SPart,nd)
//查找SPart
Inc(i,1);
until ((nd=nil) or (nd.IsWordEnd) or (i*2+2>Length(Str))) ;
{直到没有匹配到或匹配到一个词或字串取完为止}
if (nd<>nil) and ((i*2+2>Length(Str)) or (nd.IsWordEnd)) then //字串取完了
sWord:=FDict.Trace(nd)
else if (nd=nil) and (nd1<>nil) then //没有匹配到，向后回溯
begin
sWord:=FDict.Trace(nd1);
end
else raise Exception.CreateFmt(StrCharsetEncodingError,[SPart]);

if sword='' then sWord:=Copy(Str,1,SysUtils.CharLength(Str,1))
//单字词

Delete(Str,1,Length(sWord));
if Length(Sword)>2 then
if sTemp='' then
begin
//ShowMessage(Sword);
TempResult:=AnsiReverseString(sWord)+' '+TempResult
//分词结果
end
else
begin
TempResult:=AnsiReverseString(sWord)+' '+color2+CheckName(sTemp,NameColor,EndColor)+endColor+TempResult;
sTemp:='';
end
else
begin
{SResult:=SResult+color2+sWord+endColor
}
sTemp:=sWord+sTemp;
end;
end;
if sTemp<>'' then
TempResult:=color2+CheckName(sTemp,NameColor,EndColor)+endColor+' '+TempResult;
SResult:=SResult+TempResult;
end;

{ TFactory }

function TFactory.CreateOrderMaxCutWord(ANameDictPath,ADictPath:string): TInterfacedCutWord;
{var
Adict:TInterfacedDict
}
begin
Result:=TOrderMaxCutWord.Create(ANameDictPath);
Result.Dict:=TDict.ChangeDict(ADictPath);
end;

function TFactory.CreateOrderMinCutWord(ANameDictPath,ADictPath:string):TInterfacedCutWord;
{var
Adict:TInterfacedDict
}
begin
Result:=TOrderMinCutWord.Create(ANameDictPath);
Result.Dict:=TDict.ChangeDict(ADictPath);
end;

function TFactory.CreateReverseMaxCutWord(ANameDictPath,ADictPath:string):TInterfacedCutWord;
{var
Adict:TInterfacedDict
}
begin
Result:=TConverseMaxCutWord.Create(ANameDictPath);
Result.Dict:=TReverseDict.ChangeDict(ADictPath);

end;

function TFactory.CreateReverseMinCutWord(ANameDictPath,ADictPath:string):TInterfacedCutWord;
begin
Result:=TReverseMinCutWord.Create(ANameDictPath);
Result.Dict:=TReverseDict.ChangeDict(ADictPath);
end;

{ TProbabilityCutWord }

destructor TProbabilityCutWord.Destroy;
begin
if Assigned(FDict) then
Dict.Destroy;
inherited;
end;

function TProbabilityCutWord.FindWordInDict(S: AnsiString): TCharTreeNode;
var
sPart:AnsiString;
nd: TCharTreeNode;
begin
{这个函数是写给概率分词用的,如果使用的是逆序字典必须转换......}
// if FDictType=dtReveser then
// S:=AnsiReverseString(S);

nd:=nil;
while (s<>'') do
begin
sPart:=Copy(S,1,2);
nd:=FDict.FindCharacter(sPart,nd);
if nd=nil then Break;
{$Message Hint 'Maybe Error here~'}
S:=Copy(S,3,Length(S)-2);
end;

if (length(S)<>0) or (nd=nil) or (not nd.IsWordEnd) then
Result:=nil
else
Result:=nd;
end;

procedure TProbabilityCutWord.GetPrev(i: Integer);
var
iGoodID:Integer;
j:Integer;
begin
if FWordInSentence.iOffset=1 then //第一个词
begin
FWordInSentence.iGoodPrev:=-1;
FWordInSentence.dSumFee:=FWordInSentence.dFee;
Exit;
end;

iGoodID:=-1;
for j:=i-1 downto 0 do
begin
if FWordInSentence[j].iOffset+FWordInSentence[j].iLength=FWordInSentence.iOffset then
if ((iGoodID=-1) or (FWordInSentence[j].dSumFee<=FWordInSentence[iGoodID].dSumFee)) then
iGoodID:=j;
if FWordInSentence.iOffset-FWordInSentence[j].iOffset>=8 then Break;
end;

FWordInSentence.iGoodPrev:=iGoodID;
FWordInSentence.dSumFee:=FWordInSentence.dFee+FWordInSentence[iGoodID].dSumFee;
end;

function TProbabilityCutWord.GetTempWord(S: AnsiString): Integer;
var
i,j,k,KK,iLen,iOffset:Integer;
sPart:AnsiString;
nd1,nd:TCharTreeNode;
wFreq:Word;
pItemFreqAndWord;
LS:TArrayString;
iWordCount:Integer;
begin
KK:=0;
for I:=0 to (Length(S) div 2)-1 do //加入单字
begin
K:=Length(S)-I*2
//剩余长度
J:=2;
while J<=MaxWordLength do
begin

if J<=K then
sPart:=Copy(S,I*2+1,J)
else
Break;

nd:=Self.FindWordInDict(sPart)
//没找到返回nil,否则返回节点.
if (nd=nil) and (J>2) then
begin
Inc(J,2);
Continue;
end
else if (nd=nil) and (J=2) then
iWordCount:=0
else
iWordCount:=nd.WordFreqs;

FWordInSentence[KK].iOffset:=I*2+1;
FWordInSentence[KK].iLength:=J;
FWordInSentence[KK].dFee:=-ln((iWordCount+1) / (MaxWordCount))
//FDict.GetFee(nd);
FWordInSentence[KK].dSumFee:=0.0;

Inc(KK);
Inc(J,2);
end;
end;

Result:=KK;
end;

procedure TProbabilityCutWord.SentenceToWords(var S: string);
var
iLen,iCount,iMinID,i{,j}:Integer;
sTemp,sWord:string;
sRet:string;
begin
iMinID:=-1;
iLen:=Length(S);
iCount:=GetTempWord(S);

for i:=0 to iCount-1 do
begin
GetPrev(i);
if FWordInSentence.iOffset+FWordInSentence.iLength-1=iLen then
if (iMinID=-1) or (FWordInSentence.dSumFee<=FWordInSentence[iMinID].dSumFee) then
iMinID:=i;
end;

sRet:='';
i:=iMinID;
while (i>=0) do
begin
sWord:=Copy(S,FWordInSentence.iOffset,FWordInSentence.iLength);
if Length(Sword)>2 then
if sTemp='' then
SRet:=sWord+' '+SRet //分词结果
else
begin
SRet:=sWord+' '+color2+CheckName(sTemp,NameColor,EndColor)+endColor+' '+SRet;
sTemp:='';
end
else
begin
{SResult:=SResult+color2+sWord+endColor
}
sTemp:=sWord+sTemp;
end;
i:=FWordInSentence.iGoodPrev;
end;
if sTemp<>'' then
SRet:=color2+CheckName(sTemp,NameColor,EndColor)+endColor+' '+SRet;

SResult:=SResult+sRet;
end;

end.

unit untDict;

interface
uses SysUtils,StrUtils, Variants, Classes,Dialogs;

const Words='words.txt'
//字典文件名

//链表元素
type
PChainElem=^ChainElem;
ChainElem=record
valueointer
//指向一个TCharTreeNode
sucElemChainElem
//后继节点指针(即：子树根节点及其兄弟节点)
end;
//链表类
type TChain=class
private
head,tailChainElem;
published
constructor create;
destructor destroy;override;
procedure ApendElem(curElemChainElem)
//在链表尾追加一个节点
function GetNext(curElemChainElem)ChainElem
//返回当前节点的下一个节点（兄弟）
{ property ChainHeadointer read GetHead write SetChainHead default nil
//头节点为空，说明是叶子了。（父节点）}
function GetHeadChainElem
//获得表头节点

end;

//树节点
type TCharTreeNode=class{(TChain)}
private
sValue:Word;//保存汉字的索引，通过该值可以计算出汉字的机内码，从而还原成汉字
WordEnd:Boolean
//成词标记
WordFreq:Word
//词频
Chain:TChain
//儿子节点组成的链表
ParentNode:TCharTreeNode
//父节点指针
function GetCharacter:string;
procedure SetCharacter(s:String);
procedure SetWordEnd(V:boolean);
procedure SetWordFreq(AFreq:Word);
published
constructor create;
destructor destroy;override;
function AddChild(const character:string;wFreq:longint=0;Wordend:Boolean=false):TCharTreeNode
//
function GetFirstChild:TCharTreeNode
//第一个孩子节点
function GetFirstElemChainElem;
function GetNextSibling(ElemChainElem):TCharTreeNode
//获得兄弟节点指针}
function GetNextElem(ElemChainElem)ChainElem;

property Character:string read GetCharacter write SetCharacter ;
property IsWordEnd:Boolean read wordEnd write SetWordEnd default false;
property WordFreqs:Word read WordFreq write SetWordFreq default 1;

function GetParent:TCharTreeNode
//获得父节点指针
function GetHeadChainElem;
end;

//树
type TCharTree=class
private
root:array[Word] of TCharTreeNode;
NodeCount:LongWord
//节点个数
procedure setCount(V:LongWord);
procedure FreeAllNode(Node:TCharTreeNode);
public
constructor create;
destructor destroy;override;
published
property Count:LongWord read NodeCount write SetCount;
function GetRoot(i:Word):TCharTreeNode;
end;

//数据库的数据载入到树---字典类
type TDictType=(dtOrder,dtReveser);

type PFreqAndWord=^TFreqAndWord;
TFreqAndWord=record
iFreq,
iLen:Integer;
end;

type TArrayString=array[0..249]of TFreqAndWord;
TArrayBool=array[0..249]of Boolean;

PArrayString=^TArrayString;
PArrayBool=^TArrayBool;

type TInterfacedDict=class(TInterfacedobject)
private
FTree:TCharTree;
FDictType:TDictType;
FDictPath:string;

function NodesCount:LongWord;
procedure SetNodeCount(ACount:LongWord);
protected
procedure SetDictType(AType:TDictType);
procedure SetDictPath(Apath:string);

public
class function ChangeDict(APath:string):TInterfacedDict;virtual
//切换字典
class function PlusDict(APath:string):TInterfacedDict;virtual;

function FindCharacter(S:AnsiString;nd:TCharTreeNode=nil):TCharTreeNode;//若nd=nil,则是查找根节点。否则，在nd的儿子里查找
//function FindWordInDict(S: string): TCharTreeNode;virtual
//S是否是一个词
function Trace(node1:TCharTreeNode):string;virtual
//没有匹配到，则须向上回溯
function TraceAll(node1:TCharTreeNode;var FItem:TArrayString):Integer;virtual
//没有匹配到，则须向上回溯(找出路径上所有可以成词的词)
function GetRoot(i:word):TCharTreeNode;overload;
function GetRoot(c:AnsiString):TCharTreeNode;overload;
function GetFreq(var nd:TCharTreeNode):word;virtual;
function GetFee(nd:TCharTreeNode):word;virtual;

constructor create;
destructor Destroy;override;

procedure LoadDict(APath: string);virtual
//载入不同的字典--->由子类去实现。

property Tree:TCharTree read FTree write FTree;
property Count:LongWord read NodesCount write SetNodeCount;
property DictType:TDictType read FDictType {write SetDictType}
//只读
property DictPath:string read FDictPath write SetDictPath;
procedure AfterConstruction
override;
end;
type TDict=class(TInterfacedDict) //单例类
private

published
procedure LoadDict(APath: string);override;

public
class function GetInstance(chooce:Integer):TDict;
constructor Create(APath: string);
class function CreateInstance(APath:string):Tdict
//创建单例类
class function ChangeDict(APath:string):TInterfacedDict;override;
class function PlusDict(APath:string):TInterfacedDict
override;

procedure ReleaseInstance
//释放单例类
destructor Destroy;override;
end;
type TReverseDict=class(TInterfacedDict) //逆序载入字典，供逆向最大匹配使用
published
procedure LoadDict(APath: string);override;

public
class function GetInstance(chooce:Integer):TReverseDict;
constructor Create(APath: string);
class function CreateInstance(APath:string):TReverseDict
//创建单例类
class function ChangeDict(APath:string):TInterfacedDict;override;
class function PlusDict(APath:string):TInterfacedDict;override;
procedure ReleaseInstance
//释放单例类
destructor Destroy;override;
end;

var
DictInstance:TDict
{正序字典和逆序字典的实例}
RevDictInstance:TReverseDict;

implementation
uses untConst{,IniOp};
var
iDictRefCount,iRevDictRefCount:Integer
{字典的引用计数}

{ TChain }

procedure TChain.ApendElem(curElem: PChainElem);
{var
curPointerChainElem;}
begin
if Self.head=nil then
begin
head:=curElem;
tail:=curElem;
Exit;
end;

tail.sucElem:=curElem;
tail:=curElem;
end;

constructor TChain.create;
begin
head:=nil;
tail:=nil;
end;

destructor TChain.destroy;
var
Pcur,PnextChainElem;
begin
Pcur:=head;
while (Pcur<>nil) do
begin
pNext:=pCur^.sucElem;
if Pcur^.value<>nil then
begin
TCharTreeNode(Pcur^.value).Free;
Dispose(pCur);
end;
pCur:=pNext;
end;
inherited;
end;

function TChain.GetHeadChainElem;
begin
Result:=head;
end;

function TChain.GetNext(curElem: PChainElem): PChainElem;
begin
Result:=curElem^.sucElem;
end;

{ TCharTreeNode }

function TCharTreeNode.AddChild(const character: string;wFreq:longint;Wordend:Boolean):TCharTreeNode;
var
tn:TCharTreeNode;
ChainItemChainElem;
begin
tn:=TCharTreeNode.create;
tn.sValue:=word(byte(character[1]) shl 8) + word(character[2]);
tn.WordEnd:=Wordend;
tn.WordFreq:=wFreq;
tn.ParentNode:=Self;

New(ChainItem);
ChainItem^.value:=pointer(tn)
///
ChainItem^.sucElem:=nil;

Self.Chain.ApendElem(ChainItem)
//加入到儿子链表中
Result:=tn;
end;

constructor TCharTreeNode.create;
begin
Chain:=TChain.create;
ParentNode:=nil;
end;

destructor TCharTreeNode.destroy;
begin
Chain.destroy;
inherited;
end;

function TCharTreeNode.GetCharacter: string;
{var
//S:string;
preByte,sucByte:Byte;}
begin
SetLength(Result,2);
Result[1]:=Char(Byte((sValue and 65280) shr 8));
Result[2]:=Char(Byte(sValue and 255));
end;

function TCharTreeNode.GetFirstChild: TCharTreeNode
//获得第一个孩子节点
begin
if Assigned(Chain.GetHead.value) then
Result:= TCharTreeNode(Chain.GetHead.value)
else
Result:=nil;
end;

function TCharTreeNode.GetParent: TCharTreeNode;
begin
result:=Self.ParentNode;
end;

function TCharTreeNode.GetNextSibling(ElemChainElem): TCharTreeNode
//获得当前节点的兄弟
begin //参数为当前节点所在的PChainElem
if Elem.sucElem<>nil then
result:= TCharTreeNode(Elem.sucElem.value)
else
Result:=nil;
end;

procedure TCharTreeNode.SetCharacter(s: String);
begin
sValue:=word(Byte(S[1]) shl 8) +word(Byte(s[2]));
end;

procedure TCharTreeNode.SetWordEnd(V: boolean);
begin
WordEnd:=V;
end;

function TCharTreeNode.GetNextElem(Elem: PChainElem): PChainElem;
begin
Result:=Elem.sucElem;
end;

function TCharTreeNode.GetFirstElem: PChainElem;
begin
Result:=Self.Chain.GetHead;
end;

function TCharTreeNode.GetHead: PChainElem;
begin
Result:=Chain.head;
end;

procedure TCharTreeNode.SetWordFreq(AFreq: Word);
begin
WordFreq:=AFreq;
end;

{ TCharTree }

constructor TCharTree.create;
var
I:Word;
begin

for i:=0 to 65535 do //根
begin
root:=TCharTreeNode.create;
root.sValue:=i;
root.WordEnd:=False
//默认不是单字词
end;
NodeCount:=65536;
end;

destructor TCharTree.destroy;
var
i:Word;
begin
//做树的遍历，释放每一个节点
for i:=0 to 65535 do
FreeAllNode(root);
inherited;
end;

procedure TCharTree.FreeAllNode(Node:TCharTreeNode);
var
Vointer;
begin
if Node=nil then Exit;
if (Node.Chain=nil) then
begin
Node.Free;
Exit;
end;
if (Node.GetHead=nil) then
begin
Node.Free;
Exit;
end;
if Node.GetHead.value<>nil then //如果它还有儿子节点，
begin
V:=Node.GetHead.value;
Node.Chain.head:=Node.GetHead.sucElem;
FreeAllNode(TCharTreeNode(V));
end
else
TCharTreeNode(V).Free;
end;

function TCharTree.GetRoot(i: Word): TCharTreeNode;
begin
Result:=root;
end;

procedure TCharTree.setCount(v:LongWord);
begin
NodeCount:=v;
end;

{ TDict }
class function TDict.ChangeDict(APath: string): TInterfacedDict;
begin
if Assigned(TDict.GetInstance(0)) then
begin
if TDict.GetInstance(0).DictPath=Trim(APath) then //字典相同
begin
Result:=TDict.GetInstance(0);
Exit;
end
else
TDict.GetInstance(0).ReleaseInstance
//字典不同,则释放以前字典,并重新加载新字典
end;
Result:=TDict.CreateInstance(APath);
try
Result.LoadDict(APath);
except
{$Message Hint 'show some error information abort Dict'}
Abort;
end;
end;

constructor TDict.Create(APath: string);
begin
// inherited Create;
// LoadDict(APath+words);
raise Exception.CreateFmt(StrCreateError,[ClassName]);
end;

class function TDict.CreateInstance(APath: string):TDict;
begin

Result:=GetInstance(1);
//LoadDict(APath+words);
end;

destructor TDict.Destroy;
begin
if GetInstance(0)=Self then GetInstance(2);
inherited;
end;

class function TDict.GetInstance(chooce: Integer): TDict;
//const instance:TDict=nil;
begin
case chooce of
0:
//供释放使用
1: //创建
begin
Inc(iDictRefCount,1);
if not Assigned(DictInstance) then
begin
DictInstance:=inherited Create;
//LoadDict(APath+words);
end ;
end;
2: DictInstance:=nil
//赋值
else
raise Exception.CreateFmt(strCreateParamError,[ClassName]);
end;

Result:=DictInstance;
end;

procedure TDict.LoadDict(APath: string);
var
nd,tmpNode:TCharTreeNode;
str:string;
index:Word;
i,j:LongWord;
{ItemChainElem
}
{isExsit:Boolean;}
F:TextFile;
PosDel1,PosDel2:Integer;
wfreq:string;

lst:TStringList;
sTip:string;
procedure Split(Splitor,Str:string)
//sSubstr分割sStr,存入TStringList.
var
PSplitor,PStr,PPosAnsiChar;
s:string;
begin
PSplitor:=PAnsiChar(Splitor);
PStr:=PAnsiChar(Str);
PPos:=PAnsiChar(Str);

while PPos<>#0 do
begin
while (PPos^<>PSplitor^) and (PPos^<>#0) do inc(PPos);

SetString(S,PStr,PPos-PStr);
lst.Add(S);

if PPos^<>#0 then inc(PPos)
else break;

pstr:=ppos;
end;
end;

begin
SetDictType(dtOrder)
//指定字典属性
FDictPath:=APath;

lst:=TStringList.Create;
try
AssignFile(F,Apath);
try
Reset(F);
except
MessageDlg((Format(StrFileNotFound,[APath])),mtWarning,[mbOK],0);
Abort;
end;
try
while (not Eof(F)) do //把每一个词加载到树上
begin
Readln(F,str)
//在字典中取词
sTip:=str;
Split(#9,str)
{格式如: 22072 "分段" 5}
str:=lst.Strings[1];
PosDel1:=Pos('"',str)
{如果是汉字,则左右两侧有双引号,去掉双引号}
PosDel2:=LastDelimiter('"',str);
str:=Copy(str,PosDel1+1 ,PosDel2-PosDel1-1)
//剔除双引号
if lst.Count>=3 then
wfreq:=lst.Strings[2];

lst.Clear;
if Length(str)<=1 then Continue
//不是汉字(应该不会出现)
try
index:=word(Byte(str[1]) shl 8)+ word(Byte(str[2]))
//树根节点索引
except
MessageDlg(Format(StrRowError,[sTip]),mtWarning,[mbOK],0);
Halt;
end;
nd:=Tree.root[index];

if Length(str)=2 then //单字词
begin
nd.WordFreq:=nd.WordFreq+StrToInt(wFreq);
nd.WordEnd:=True;
continue ;
end;
for i:=1 to (Length(str) div 2)-1 do
begin
tmpNode:=FindCharacter(Copy(str,i*2+1,2),nd);
if tmpNode=nil then
nd:=nd.AddChild(Copy(str,i*2+1,2)) //值相等的节点不存在。追加。
else
begin
nd:=tmpNode;
Continue;
end;
Tree.Count:=tree.count+1
//树节点个数加1
end
//end 'for'

nd.WordFreq:=StrToIntdef(wfreq,1)
//词频
nd.WordEnd:=True
//词结束标记
end;
except
MessageDlg(StrLoadDictError,mtInformation,[MBOK],0);
Abort;
end;
finally
lst.Free;
CloseFile(F);
end;
end;

class function TDict.PlusDict(APath: string): TInterfacedDict;
begin
{ if Assigned(TDict.GetInstance(0)) then
begin
TDict(TDict.GetInstance(0))
end
}
end;

procedure TDict.ReleaseInstance;
begin
Dec(iDictRefCount,1);
if iDictRefCount>0 then Exit;
if GetInstance(0)<>nil then
GetInstance(0).Free;
end;

{ TInterfacedDict }

procedure TInterfacedDict.AfterConstruction;
begin
inherited;
//
end;

class function TInterfacedDict.ChangeDict(APath: string): TInterfacedDict;
begin
//空. 须被覆盖.本想使用Virtui+Abstract修饰,但抽象类不能创建实例.
//下同.
end;

constructor TInterfacedDict.Create;
begin
inherited;
FTree:=TCharTree.Create;
end;

destructor TInterfacedDict.Destroy;
begin
FreeAndNil(FTree);
inherited;
end;

function TInterfacedDict.FindCharacter(S: AnsiString;
nd: TCharTreeNode): TCharTreeNode;
var //查找nd的子节点中值为S的节点。若找到了，则返回该子节点，否则返回nil
ItemChainElem
//若nd 缺省，则返回值为S的根节点
begin
if nd=nil then //返回根节点
Result:=GetRoot(S)
//Result:=GetRoot(word(Byte(S[1]) shl 8) +word(byte(s[2])))
else
begin
item:=nd.GetHead;
while(item<>nil) do
begin
if (TCharTreeNode(item.value)).Character=s then //找到值相等的儿子节点
begin
Result:= TCharTreeNode(item.value);
Exit;
end
else
item:=item.sucElem;
end;
Result:=nil;
end;
end;

function TInterfacedDict.GetFee(nd: TCharTreeNode): word;
begin
//
end;

function TInterfacedDict.GetFreq(var nd: TCharTreeNode): word;
begin
Result:=nd.WordFreq;
end;

function TInterfacedDict.GetRoot(i:word):TCharTreeNode;
begin
Result:=Tree.GetRoot(i);
end;

function TInterfacedDict.GetRoot(c: AnsiString): TCharTreeNode;
var
wIndex:word;
begin
wIndex:=Word(byte(c[1]) shl 8) + Word(c[2]);
//if FTree=nil then ShowMessage('tree nil');
Result:=Tree.GetRoot(wIndex);
end;

procedure TInterfacedDict.LoadDict(APath: string);
begin
//
end;

function TInterfacedDict.NodesCount: LongWord;
begin
Result:=Tree.NodeCount;
end;

class function TInterfacedDict.PlusDict(APath: string): TInterfacedDict;
begin
//
end;

procedure TInterfacedDict.SetDictPath(Apath: string);
begin
FDictPath:=Apath;
end;

procedure TInterfacedDict.SetDictType(AType: TDictType);
begin
FDictType:=AType;
end;

procedure TInterfacedDict.SetNodeCount(ACount: LongWord);
begin
Tree.NodeCount:=ACount;
end;

function TInterfacedDict.Trace(node1: TCharTreeNode): string;
var
isStart:Boolean;
nd:TCharTreeNode;
begin
isStart:=False;
Result:='';
if node1=nil then raise Exception.Create('Trace错误!')
//如果node1=nil 可能出现死循环
repeat

if node1.IsWordEnd then isStart:=True;
if isStart then Result:=node1.Character+Result
//分词结果

nd:=node1.GetParent;
if nd=nil then Break
{回溯到了根节点}

node1:=nd;
until (false)
//回溯
end;

function TInterfacedDict.TraceAll(node1: TCharTreeNode;var FItem:TArrayString):Integer;
var
nd:TCharTreeNode;
bItem:TArrayBool;
i,j:Integer;
begin
if node1=nil then raise Exception.CreateFmt(StrNotAssignedError,[node1.ClassName])
//如果node1=nil 可能出现死循环

FillChar(FItem,SizeOf(TArrayString),0);
i:=0;
repeat
if node1.IsWordEnd then
begin
bItem:=True;
FItem.iFreq:=node1.WordFreqs;
Inc(i);
end;

if i>10 then raise Exception.Create(StrOverBoundsError);

for j:=0 to i-1 do
if bItem[j] then
FItem[j].iLen:=FItem[j].iLen+Length(node1.Character)
//长度计数

nd:=node1.GetParent;

if nd=nil then Break
{回溯到了根节点}

node1:=nd;
until (false)
//回溯
result:=i
//左临词的个数
end;

{ TReverseDict }

class function TReverseDict.ChangeDict(APath: string): TInterfacedDict;
begin
if Assigned(TReverseDict.GetInstance(0)) then
begin
if TReverseDict.GetInstance(0).DictPath=Trim(APath) then //字典相同
begin
Result:=TReverseDict.GetInstance(0);
Exit;
end
else
TReverseDict.GetInstance(0).ReleaseInstance
//字典不同,则释放以前字典,并重新加载新字典
end;
Result:=TReverseDict.CreateInstance(APath);
Result.LoadDict(APath);
end;

constructor TReverseDict.Create(APath: string);
begin
raise Exception.CreateFmt(StrCreateError,[ClassName]);
end;

class function TReverseDict.CreateInstance(APath: string):TReverseDict;
begin

Result:=GetInstance(1);
//LoadDict(APath+words);
end;

destructor TReverseDict.Destroy;
begin
if GetInstance(0)=Self then GetInstance(2);
inherited;
end;

class function TReverseDict.GetInstance(chooce: Integer): TReverseDict;
//const instance:TDict=nil;
begin
case chooce of
0:
//供释放使用
1: //创建
begin
Inc(iRevDictRefCount,1);
if not Assigned(RevDictInstance) then
begin
RevDictInstance:=inherited Create;
//LoadDict(APath+words);
end ;
end;
2:RevDictInstance:=nil;
else
raise Exception.CreateFmt(strCreateParamError,[ClassName]);
end;

Result:=RevDictInstance;
end;

procedure TReverseDict.LoadDict(APath: string)
//载入字典
var
nd,tmpNode:TCharTreeNode;
str:string;
index:Word;
i,j:LongWord;
{ItemChainElem
}
{isExsit:Boolean;}
F:TextFile;
PosDel1,PosDel2:Integer;
wfreq:string;

lst:TStringList;
sTip:string;
procedure Split(Splitor,Str:string)
//sSubstr分割sStr,存入TStringList.
var
PSplitor,PStr,PPosChar;
s:string;
begin
PSplitor:=PChar(Splitor);
PStr:=PChar(Str);
PPos:=PChar(Str);

while PPos<>#0 do
begin
while (PPos^<>PSplitor^) and (PPos^<>#0) do inc(PPos);

SetString(S,PStr,PPos-PStr);
lst.Add(S);

if PPos^<>#0 then inc(PPos)
else break;

pstr:=ppos;
end;
end;
begin
FDictPath:=APath;
SetDictType(dtReveser)
//指定字典属性

lst:=TStringList.Create;
try
AssignFile(F,Apath);
Reset(F);
try
while (not Eof(F)) do //把每一个词加载到树上
begin
Readln(F,str)
//在字典中取词
sTip:=str;

Split(#9,str);
str:=lst.Strings[1];
PosDel1:=Pos('"',str);
PosDel2:=LastDelimiter('"',str);
str:=Copy(str,PosDel1+1 ,PosDel2-PosDel1-1)
//剔除双引号 ,找到词
if lst.Count>=3 then //词频
wfreq:=lst.Strings[2];

lst.Clear;
if Length(Trim(str))<=1 then Continue
//不是汉字(应该不会出现)
str:=AnsiReverseString(Trim(str))
//逆序

try
index:=word(Byte(str[1]) shl 8)+ word(Byte(str[2]))
//树根节点索引
except
MessageDlg(Format(StrRowError,[sTip]),mtWarning,[mbOK],0);
Halt;
end;

nd:=Tree.root[index];

if Length(str)=2 then //单字词
begin
nd.WordFreq:=StrToIntDef(wFreq,1);
nd.WordEnd:=True;
continue ;
end;
for i:=1 to (Length(str) div 2)-1 do
begin
tmpNode:=FindCharacter(Copy(str,i*2+1,2),nd);
if tmpNode=nil then
nd:=nd.AddChild(Copy(str,i*2+1,2)) //值相等的节点不存在。追加。
else
begin
nd:=tmpNode;
Continue;
end;
Tree.Count:=tree.count+1
//树节点个数加1
end
//end 'for'

nd.WordFreq:=StrToIntdef(wfreq,1)
//词频
nd.WordEnd:=True
//词结束标记
end;
except
MessageDlg(StrLoadDictError,mtInformation,[MBOK],0);
Abort;
end;
finally
CloseFile(F);
end;
end;

class function TReverseDict.PlusDict(APath: string): TInterfacedDict;
begin

end;

procedure TReverseDict.ReleaseInstance;
begin
Dec(iRevDictRefCount,1);
if iRevDictRefCount<=0 then
if GetInstance(0)<>nil then
GetInstance(0).Free;
end;

initialization
DictInstance:=nil;
RevDictInstance:=nil;

iRevDictRefCount:=0;
iDictRefCount:=0;
finalization
//...
end.

linuxping · 2008-05-24

这样子用：
var
cwInstance:TInterfacedCutWord;

//创建分词对象
case CutWordMethod of
mcOrderMax:
cwInstance:=TOrderMaxCutWord.Create(DictDir);
mcOrderMin:
cwInstance:=TOrderMinCutWord.Create(DictDir);
mcReveserMax: //逆向最大匹配
cwInstance:=TConverseMaxCutWord.Create(DictDir);
mcReveserMin: //逆向最小匹配
cwInstance:=TReverseMinCutWord.Create(DictDir);
end;
mcProbability: //概率分词
cwInstance:=TProbabilityCutWord.Create(DictDir);
end;
mcIntegrate:
begin

end;
end;

//创建字典
if ADictInfo.bAddDict then //追加字典
begin
if not Assigned(CurrentDict) then //raise Exception.Create(StrNoDictInstance);
begin
if ADictInfo.FMethod in [mcReveserMax,mcReveserMin] then
CurrentDict:= TReverseDict.ChangeDict(lst.Strings[0])
else
CurrentDict:=TDict.ChangeDict(lst.Strings[0]);
if lst.Count>1 then
for i:=1 to lst.Count-1 do
begin
CurrentDict.LoadDict(lst.Strings);
CurrentDict.DictPath:=CurrentDict.DictPath+'*'+lst.Strings;
end;
end
else
if lst.Count>0 then
for i:=0 to lst.Count-1 do
begin
CurrentDict.LoadDict(lst.Strings);
CurrentDict.DictPath:=CurrentDict.DictPath+'*'+lst.Strings;
end;
//ShowMessage('bAdd');
FreeAndNil(lst);
end;

cwInstance.Dict:=CurrentDict;

//对一个文件分词
cwInstance.ReadToFile('C:/sdasda.txt');

分词结果保存在 cwInstance.GetResult;

故居 · 2008-05-25

谢谢 creation-zy ，linuxping 的代码。

linuxping 的代码太强了！！！

delphi 英文分词(50分)

故居

Unregistered / Unconfirmed

onyliu

Unregistered / Unconfirmed

故居

Unregistered / Unconfirmed

creation-zy

Unregistered / Unconfirmed

linuxping

Unregistered / Unconfirmed

linuxping

Unregistered / Unconfirmed

故居

Unregistered / Unconfirmed

Similar threads