研究心得------->CSV文件解析类 (100分)

  • 主题发起人 wr960204
  • 开始时间
W

wr960204

Unregistered / Unconfirmed
GUEST, unregistred user!
朋友们常常会遇到CSV文件解析的问题,我写了一个解析的DataSet类,把CSV文件看成是数据库的表的形式表现出来。效率远远高于EXCEL,65536x65536的表,EXCEl要用上好几秒,这个基本感觉不到延迟
{=================================================================}
{ }
{ }
{ CSV DataSet }
{ }
{ }
{ }
{ 把CSV文件当成是数据表的DataSet }
{ }
{ }
{ }
{ wr960204(王锐 2003/12/2) }
{ QQ:42088303 }
{ }
{=================================================================}

unit UnitCSVDataSet;

interface

uses
DB, Classes;

const
MaxStrLen = 240;

type
PRecInfo = ^TRecInfo;
TRecInfo = packed record
Bookmark: Integer;
BookmarkFlag: TBookmarkFlag;
end;

type
{ TCSVStringList}
{
原来的TStringList不行,因为遇到回车他就会把他当成是换行符号
但是CSV文件的单元格内可以存在回车,因此从TStringList上派生一个类
覆盖掉SetTextStr的方法
}
TCSVStringList = class(TStringList)
private

protected
procedure SetTextStr(const Value: string); override;
public

end;

{ TCustomCSVDataSet }

TCustomCSVDataSet = class(TDataSet)
private
FAutoSaveToFile: Boolean;
FData: TStrings;
FRecBufSize: Integer;
FRecInfoOfs: Integer;
FCurRec: Integer;
FFileName: string;
FLastBookmark: Integer;
FSaveChanges: Boolean;
FFirstLineAsSchema: Boolean;
FRecordSize: Integer;
FSchemaFile: string;
FSchemaLine: string;
FFileMustExist: Boolean;
procedure SetSchemaFile(Value: string);
procedure SetFileMustExist(Value: Boolean);
procedure SetFirstLineAsSchema(Value: Boolean);
//跳过没有内容的行
procedure RemoveWhiteLines(List: TStrings; IsFileRecord: Boolean);
procedure SetFileName(const Value: string);
protected
{ Overriden abstract methods (required) }
function AllocRecordBuffer: PChar; override;
procedure FreeRecordBuffer(var Buffer: PChar); override;
procedure GetBookmarkData(Buffer: PChar; Data: Pointer); override;
function GetBookmarkFlag(Buffer: PChar): TBookmarkFlag; override;
function GetRecord(Buffer: PChar; GetMode: TGetMode; DoCheck: Boolean):
TGetResult; override;
function GetRecordSize: Word; override;
procedure InternalAddRecord(Buffer: Pointer; Append: Boolean); override;
procedure InternalClose; override;
procedure InternalDelete; override;
procedure InternalFirst; override;
procedure InternalGotoBookmark(Bookmark: Pointer); override;
procedure InternalHandleException; override;
procedure InternalInitFieldDefs; override;
procedure InternalInitRecord(Buffer: PChar); override;
procedure InternalLast; override;
procedure InternalOpen; override;
procedure InternalPost; override;
procedure InternalSetToRecord(Buffer: PChar); override;
function IsCursorOpen: Boolean; override;
procedure SetBookmarkFlag(Buffer: PChar; Value: TBookmarkFlag); override;
procedure SetBookmarkData(Buffer: PChar; Data: Pointer); override;
procedure SetFieldData(Field: TField; Buffer: Pointer); override;
protected
{ Additional overrides (optional) }
function GetRecordCount: Integer; override;
function GetRecNo: Integer; override;
procedure SetRecNo(Value: Integer); override;

property SchemaFile: string read FSchemaFile write SetSchemaFile;
property FileMustExist: Boolean read FFileMustExist write SetFileMustExist;
property FirstLineAsSchema: Boolean read FFirstLineAsSchema write
SetFirstLineAsSchema;
public

function GetFieldData(Field: TField; Buffer: Pointer): Boolean; override;
constructor Create(AOwner: TComponent); override;
published
property AutoSaveToFile: Boolean read FAutoSaveToFile write FAutoSaveToFile;
property FileName: string read FFileName write SetFileName;
property Active;
end;

{ TCSVDataSet }

TCSVDataSet = class(TCustomCSVDataSet)
protected
{ Overriden abstract methods }
procedure InternalOpen; override;
function GetRecordSize: Word; override;
function GetRecord(Buffer: PChar; GetMode: TGetMode; DoCheck: Boolean)
: TGetResult; override;
function GetRecordCount: Integer; override;

public
constructor Create(Owner: TComponent); override;
procedure RemoveBlankRecords;
procedure RemoveExtraColumns;
procedure SaveFileAs(strFileName: string);
function GetFieldData(Field: TField; Buffer: Pointer): Boolean; override;
procedure CreateDataSet;

published

property BeforeOpen;
property AfterOpen;
property BeforeClose;
property AfterClose;
property BeforeInsert;
property AfterInsert;
property BeforeEdit;
property AfterEdit;
property BeforePost;
property AfterPost;
property BeforeCancel;
property AfterCancel;
property BeforeDelete;
property AfterDelete;
property BeforeScroll;
property AfterScroll;
property BeforeRefresh;
property AfterRefresh;
property OnCalcFields;
property OnDeleteError;
property OnEditError;
property OnFilterRecord;
property OnNewRecord;
property OnPostError;

property FieldDefs;
property FileMustExist;
property FirstLineAsSchema;
end;

implementation

uses Windows, SysUtils, Forms, Dialogs;

const
DELIMITERS_GAP = 4;

{ TCustomCSVDataSet }

procedure TCustomCSVDataSet.InternalOpen;
var
I : Integer;
begin
//FData := TStringList.Create;
FData := TCSVStringList.Create;
FData.LoadFromFile(FileName);
if FData.Count = 0 then
FData.Add('');
///====================
if FFirstLineAsSchema then
begin
FSchemaLine := FData[0];
FData.Delete(0);
end;
RemoveWhiteLines(FData, True);
///====================
for I := 1 to FData.Count do
FData.Objects[I - 1] := Pointer(I);
FLastBookmark := FData.Count;
FCurRec := -1;

FRecInfoOfs := MaxStrLen;

FRecBufSize := FRecInfoOfs + SizeOf(TRecInfo);

BookmarkSize := SizeOf(Integer);

InternalInitFieldDefs;

if DefaultFields then
CreateFields;

BindFields(True);
end;

procedure TCustomCSVDataSet.InternalClose;
begin
if (FSaveChanges) and (FAutoSaveToFile) then
begin
if FFirstLineAsSchema then
FData.Insert(0, FSchemaLine);
FData.SaveToFile(FileName);
end;
FData.Free;
FData := nil;

if DefaultFields then
DestroyFields;

FLastBookmark := 0;
FCurRec := -1;
end;

function TCustomCSVDataSet.IsCursorOpen: Boolean;
begin
Result := Assigned(FData);
end;

procedure TCustomCSVDataSet.InternalInitFieldDefs;

function _GetField(const AName: string): TFieldDef;
var
I : Integer;
begin
Result := nil;
for I := 0 to FieldDefs.Count - 1 do
begin
if FieldDefs.Name = AName then
begin
Result := FieldDefs;
Exit;
end;
end;
end;

function _GetFieldName(const AName: string): string;
var
I : Integer;
begin
Result := AName;
I := 0;
while _GetField(Result) <> nil do
begin
Result := AName;
Result := Format('Name_%s_%d', [AName, I]);
Inc(I);
end;
end;

var
i, len : Integer;
UseSchema : Boolean;
LstFields : TStrings;
tmpSchema : TStrings;
tmpLen : Integer;
tmpFieldName : string;
begin
FieldDefs.Clear;

FRecordSize := 0;

for i := 0 to FData.Count - 1 do
begin
len := Length(FData.Strings);
if len > FRecordSize then
FRecordSize := len;
end;

if not Assigned(FData) then
exit;

LstFields := TStringList.Create;
tmpSchema := TStringList.Create;

// Load Schema Structure
if (SchemaFile <> '') then
begin
tmpSchema.LoadFromFile(SchemaFile);
RemoveWhiteLines(tmpSchema, FALSE);
if (tmpSchema.Count > 0) then
if StrScan(PChar(tmpSchema.Strings[0]), ',') <> nil then
begin
tmpSchema.CommaText := tmpSchema.Strings[0];
RemoveWhiteLines(tmpSchema, FALSE);
end;
end
else
//if (FData.Count > 0) then
//===============================
if FFirstLineAsSchema then
begin
tmpSchema.CommaText := FSchemaLine;
end
else
begin
tmpSchema.CommaText := FData.Strings[0];
end;
//===============================
UseSchema := (tmpSchema.Count > 0);

if ((not UseSchema) and ((FirstLineAsSchema) or (SchemaFile <> ''))) then
begin
FFirstLineAsSchema := FALSE;
FSchemaFile := '';
end;

// Interpret Schema
i := 1;

tmpLen := FRecordSize;

repeat
// Standardize variables on schema

if not UseSchema then
tmpFieldName := Format('Field%d=%d', [i, tmpLen])
else
begin
tmpFieldName := tmpSchema.Names[i - 1];
if (tmpFieldName = '') then
tmpFieldName := Format('%s=%d', [tmpSchema.Strings[i - 1], tmpLen])
else
tmpFieldName := tmpSchema.Strings[i - 1];
end;

LstFields.Add(tmpFieldName);

Inc(i)

until i > tmpSchema.Count;

tmpSchema.Free;
FRecordSize := 0;

// Add fields
with LstFields do
for i := 0 to Count - 1 do
begin
tmpFieldName := Names;
len := StrToInt(Values[tmpFieldName]);
if Len < 16 then
Len := 16;
tmpFieldName := _GetFieldName(tmpFieldName);
FieldDefs.Add(tmpFieldName, ftString, len, False);
Inc(FRecordSize, len);
Inc(FRecordSize, DELIMITERS_GAP);
end;

LstFields.Free;

if FRecordSize = 0 then
FRecordSize := MAXSTRLEN;

FRecInfoOfs := FRecordSize;
FRecBufSize := FRecInfoOfs + SizeOf(TRecInfo);

end;

procedure TCustomCSVDataSet.InternalHandleException;
begin
Application.HandleException(Self);
end;

procedure TCustomCSVDataSet.InternalGotoBookmark(Bookmark: Pointer);
var
Index : Integer;
begin
Index := FData.IndexOfObject(TObject(PInteger(Bookmark)^));
if Index <> -1 then
FCurRec := Index
else
DatabaseError('Bookmark not found');
end;

procedure TCustomCSVDataSet.InternalSetToRecord(Buffer: PChar);
begin
InternalGotoBookmark(@PRecInfo(Buffer + FRecInfoOfs).Bookmark);
end;

function TCustomCSVDataSet.GetBookmarkFlag(Buffer: PChar): TBookmarkFlag;
begin
Result := PRecInfo(Buffer + FRecInfoOfs).BookmarkFlag;
end;

procedure TCustomCSVDataSet.SetBookmarkFlag(Buffer: PChar; Value:
TBookmarkFlag);
begin
PRecInfo(Buffer + FRecInfoOfs).BookmarkFlag := Value;
end;

procedure TCustomCSVDataSet.GetBookmarkData(Buffer: PChar; Data: Pointer);
begin
PInteger(Data)^ := PRecInfo(Buffer + FRecInfoOfs).Bookmark;
end;

procedure TCustomCSVDataSet.SetBookmarkData(Buffer: PChar; Data: Pointer);
begin
PRecInfo(Buffer + FRecInfoOfs).Bookmark := PInteger(Data)^;
end;

function TCustomCSVDataSet.GetRecordSize: Word;
begin
Result := MaxStrLen;
end;

function TCustomCSVDataSet.AllocRecordBuffer: PChar;
begin
GetMem(Result, FRecBufSize);
end;

procedure TCustomCSVDataSet.FreeRecordBuffer(var Buffer: PChar);
begin
FreeMem(Buffer, FRecBufSize);
end;

function TCustomCSVDataSet.GetRecord(Buffer: PChar; GetMode: TGetMode;
DoCheck: Boolean): TGetResult;
begin
if FData.Count < 1 then
Result := grEOF
else
begin
Result := grOK;
case GetMode of
gmNext:
if FCurRec >= RecordCount - 1 then
Result := grEOF
else
Inc(FCurRec);
gmPrior:
if FCurRec <= 0 then
Result := grBOF
else
Dec(FCurRec);
gmCurrent:
if (FCurRec < 0) or (FCurRec >= RecordCount) then
Result := grError;
end;
if Result = grOK then
begin
StrLCopy(Buffer, PChar(FData[FCurRec]), MaxStrLen);
with PRecInfo(Buffer + FRecInfoOfs)^ do
begin
BookmarkFlag := bfCurrent;
Bookmark := Integer(FData.Objects[FCurRec]);
end;
end
else
if (Result = grError) and DoCheck then
DatabaseError('No Records');
end;
end;

procedure TCustomCSVDataSet.InternalInitRecord(Buffer: PChar);
begin
FillChar(Buffer^, RecordSize, 0);
end;

function TCustomCSVDataSet.GetFieldData(Field: TField; Buffer: Pointer):
Boolean;
begin
StrLCopy(Buffer, ActiveBuffer, Field.Size);
Result := PChar(Buffer)^ <> #0;
end;

procedure TCustomCSVDataSet.SetFieldData(Field: TField; Buffer: Pointer);
var
Temp : TStrings;
i : Integer;
begin
Temp := TStringList.Create;
Temp.CommaText := ActiveBuffer;

for i := Temp.Count to Field.FieldNo - 1 do
Temp.Add('');

Temp.Strings[Field.FieldNo - 1] := Copy(PChar(Buffer), 1, Field.DataSize);

StrLCopy(ActiveBuffer, PChar(Temp.CommaText), FRecordSize);
DataEvent(deFieldChange, Longint(Field));

Temp.Free;
end;

procedure TCustomCSVDataSet.InternalFirst;
begin
FCurRec := -1;
end;

procedure TCustomCSVDataSet.InternalLast;
begin
FCurRec := FData.Count;
end;

procedure TCustomCSVDataSet.InternalPost;
begin
FSaveChanges := True;
if State = dsEdit then
FData[FCurRec] := ActiveBuffer
else
begin
Inc(FLastBookmark);
//FData.InsertObject(FCurRec, ActiveBuffer, Pointer(FLastBookmark));
FData.InsertObject(RecordCount, ActiveBuffer, Pointer(FLastBookmark));
end;
end;

procedure TCustomCSVDataSet.InternalAddRecord(Buffer: Pointer; Append:
Boolean);
begin
FSaveChanges := True;
Inc(FLastBookmark);
if Append then
InternalLast;
FData.InsertObject(FCurRec, PChar(Buffer), Pointer(FLastBookmark));
end;

procedure TCustomCSVDataSet.InternalDelete;
begin
FSaveChanges := True;
FData.Delete(FCurRec);
if FCurRec >= FData.Count then
Dec(FCurRec);
end;

function TCustomCSVDataSet.GetRecordCount: Longint;
begin
Result := FData.Count;
end;

function TCustomCSVDataSet.GetRecNo: Longint;
begin
UpdateCursorPos;
if (FCurRec = -1) and (RecordCount > 0) then
Result := 1
else
Result := FCurRec + 1;
end;

procedure TCustomCSVDataSet.SetRecNo(Value: Integer);
begin
if (Value >= 0) and (Value < FData.Count) then
begin
FCurRec := Value - 1;
Resync([]);
end;
end;

{ TcsvTextDataSet }

constructor TCSVDataSet.Create(Owner: TComponent);
begin
inherited Create(Owner);
FFileMustExist := True;
FFirstLineAsSchema := True;
end;

procedure TCustomCSVDataSet.SetFileMustExist(Value: Boolean);
begin
if ((Active) or (FFileMustExist = Value)) then
exit;

FFileMustExist := Value;
end;

procedure TCustomCSVDataSet.SetFirstLineAsSchema(Value: Boolean);
begin
if (FFirstLineAsSchema = Value) then
Exit;
if (Active) then
begin
if csDesigning in ComponentState then
begin
ShowMessage('请先把数据及关闭才能够设置FirstLineAsSchema属性!');
end;
Exit;
end;
FFirstLineAsSchema := Value;

if FFirstLineAsSchema then
FSchemaFile := '';
end;

procedure TCustomCSVDataSet.SetSchemaFile(Value: string);
begin
if ((Active) or (FSchemaFile = Value)) then
exit;

FSchemaFile := Value;

if (FSchemaFile <> '') then
FFirstLineAsSchema := FALSE;
end;

procedure TCustomCSVDataSet.RemoveWhiteLines(List: TStrings; IsFileRecord:
Boolean);
var
i : integer;
begin
for i := List.Count - 1 downto 0 do
if (Trim(List.Strings) = '') then
if IsFileRecord then
begin

FCurRec := i;
InternalDelete;
end
else
List.Delete(i);
end;

procedure TCSVDataSet.RemoveBlankRecords;
begin
RemoveWhiteLines(FData, TRUE);
end;

procedure TCSVDataSet.RemoveExtraColumns;
var
i : Integer;
Temp : TStrings;
begin
Temp := TStringList.Create;

for i := 1 to FData.Count do
begin
Temp.CommaText := FData.Strings[i - 1];
if Temp.Count > FieldDefs.Count then // Remove columns at the end
begin
while Temp.Count > FieldDefs.Count do
Temp.Delete(Temp.Count - 1);

FData.Strings[i - 1] := Temp.CommaText;
end;
end;

Temp.Free;

FData.SaveToFile(FileName);
end;

procedure TCSVDataSet.SaveFileAs(strFileName: string);
begin
if FFirstLineAsSchema then
FData.insert(0, FSchemaLine);
FData.SaveToFile(strFileName);
inherited FileName := strFileName;
end;

procedure TCSVDataSet.InternalOpen;
var
Stream : TStream;
begin
if (not FileMustExist) and (not FileExists(FileName)) then
begin
Stream := TFileStream.Create(FileName, fmCreate);
Stream.Free;
end;

inherited InternalOpen;
end;

function TCSVDataSet.GetRecordSize: Word;
begin
Result := FRecordSize;
end;

function TCSVDataSet.GetFieldData(Field: TField; Buffer: Pointer): Boolean;
var
Temp : TStrings;
begin
if Buffer = nil then
begin
Result := FALSE;
Exit;
end;
if FData.Count = 0 then
Result := FALSE
else
begin
Temp := TStringList.Create;
Temp.CommaText := ActiveBuffer;

if ((Field.FieldNo > 0) and (Field.FieldNo <= Temp.Count)) then
StrLCopy(PChar(Buffer), PChar(Temp[Field.FieldNo - 1]), Field.DataSize)
else
StrCopy(PChar(Buffer), #0);

Temp.Free;

Result := PChar(Buffer)^ <> #0;
end;
end;


function TCSVDataSet.GetRecord(Buffer: PChar; GetMode: TGetMode;
DoCheck: Boolean): TGetResult;
begin
Result := grOk;

{if (FirstLineAsSchema) then // Avoid showing titles when FirstLineAsSchema
case GetMode of
gmNext:
if FCurRec >= RecordCount - 1 then
Result := grEOF
else
if FCurRec < 1 then
FCurRec := 0;
gmPrior:
if FCurRec <= 1 then
Result := grBOF;
end;
}
if (Result = grOk) then
Result := inherited GetRecord(Buffer, GetMode, DoCheck);

end;

procedure TCustomCSVDataSet.SetFileName(const Value: string);
begin
if FFileName = Value then
Exit;
if Active then
begin
if csDesigning in Self.ComponentState then
begin
ShowMessage('必须先将数据集关闭才能设置FileName属性!');
end;
Exit;
end;
FFileName := Value;
end;

constructor TCustomCSVDataSet.Create(AOwner: TComponent);
begin
inherited Create(AOwner);
FAutoSaveToFile := False;
end;

{ TCSVStringList }

procedure TCSVStringList.SetTextStr(const Value: string);
var
DoubleInver : Boolean; //引号是否是双数
P, Start : PChar;
S : string;
begin
BeginUpdate;
try
Clear;
P := Pointer(Value);
if P <> nil then
while P^ <> #0 do
begin
Start := P;
DoubleInver := True;
while True do
begin
case P^ of
'"': //遇到引号做个但双数的标识
DoubleInver := not DoubleInver;
#0:
break;
#10: //回车,如果引号是双数就说明是转换到下一行,单数就是单元格内的回车
begin
if DoubleInver then
Break;
end;
end;
Inc(P);
end;
if (P - 1)^ = #13 then
SetString(S, Start, P - Start - 1)
else
SetString(S, Start, P - Start);
Add(S);
if P^ = #10 then
Inc(P);
end;
finally
EndUpdate;
end;
end;

procedure TCSVDataSet.CreateDataSet;
var
I : Integer;
DBFile : TStrings;
AFieldNams : string;
begin
if (Self.Active) then
begin
if csDesigning in Self.ComponentState then
ShowMessage('数据源不能是活动的!');
Exit;
end;
if (FFileName = '') then
begin
ShowMessage('必须指定文件名!');
Exit;
end;
if FieldDefs.Count = 0 then
begin
ShowMessage('FieldDefs是空的!');
Exit;
end;
DBFile := TCSVStringList.Create;
try
AFieldNams := '';
for I := 0 to FieldDefs.Count - 1 do
begin
if I = 0 then
AFieldNams := FieldDefs.Name
else
AFieldNams := AFieldNams + ',' + FieldDefs.Name;
end;
DBFile.Add(AFieldNams);
DBFile.SaveToFile(FFileName);
FieldDefs.Clear;
finally
DBFile.Free;
end;
Active := True;
end;

function TCSVDataSet.GetRecordCount: Integer;
begin
//if FFirstLineAsSchema then
// Result := FData.Count - 1
//else
Result := inherited GetRecordCount;
end;

end.

 
好,试试。
 
好,看看!
 

Similar threads

S
回复
0
查看
610
SUNSTONE的Delphi笔记
S
S
回复
0
查看
610
SUNSTONE的Delphi笔记
S
I
回复
0
查看
570
import
I
顶部