好长的一段,南京的?雷软?没听过
。。
function DrawBmp(SrcBmp, DetBmp: TBitmap; SrcRect: TRect; DetPoint: TPoint; DrawStyle: Integer; Alpha:Byte; CPUCodeStyle: Integer; NeedEMMS: Boolean):Integer;
var
// DetWidth, DetHeight, SrcWidth, SrcHeight: Integer;
BytePerRowDet, BytePerRowSrc: Integer;
// SrcRect, ReginDet: TRect;
DetRect: TRect;
ReginWidth, ReginHeight: Integer;
。。。。。。
procedure Draw_MixAlpha_MMX;
label
NextLine, NextPixel, Finish, L1, L2, L3, L4, L5;
var
pDet, pSrc: PByteArray;
iReginWidth, iReginHeight: Integer;
iSubBytesDet, iSubBytesSrc: Integer;
iAlpha: Byte;
begin
if Alpha=0 then Exit;
pDet:=DetBmp.ScanLine[DetRect.Top];
Integer(pDet):=Integer(pDet)+(DetRect.Left)*4;
pSrc:=SrcBmp.ScanLine[SrcRect.Top];
Integer(pSrc):=Integer(pSrc)+(SrcRect.Left)*4;
iReginWidth:=ReginWidth;
iReginHeight:=ReginHeight;
iSubBytesDet:=BytePerRowDet;//+iReginWidth*4;
iSubBytesSrc:=BytePerRowSrc;//+iReginWidth*4;
iAlpha:=Alpha;
asm
//mm7=0
pxor mm7, mm7
//mm6=Mask($00ff000000000000)
mov eax, $FF000000
movd mm6, eax
punpcklbw mm6, mm7
//mm4=Alpha
mov dl, byte ptr iAlpha
movd mm4, edx;
PSLLQ mm4,48
punpckhwd mm4,mm4
punpckhwd mm4,mm4
//ecx=Det[y][x]
mov eax, dword ptr [pDet]
mov ecx, eax
//edx=Src[y][x]
mov eax, dword ptr [pSrc]
mov edx, eax
//for Y:=0 to iReginHeight-1
xor ebx, ebx
NextLine:
xor eax, eax
//for X:=0 to iReginWidth-1
NextPixel:
//mm0=Det
movd mm0, dword ptr [ecx+eax*4]
punpcklbw mm0, mm7
//mm1=Src
movd mm1, dword ptr [edx+eax*4]
punpcklbw mm1, mm7
//mm1*Alpha/256
pmullw mm1,mm4
psrlw mm1,8
//判断SrcAlpha和DetAlpha,避免255/256误差 //再议
// Push eax
// //check DetAlpha
// movq mm5, mm0
// packuswb mm5, mm5
// movd eax, mm5
// and eax, $FF000000
// jnz L1
// pxor mm0, mm0
// L1:
// //Check SrcAlpha
// movq mm5, mm1
// packuswb mm5, mm5
// movd eax, mm5
// and eax, $FF000000
// jnz L2
// pxor mm1, mm1
// L2:
// Pop eax
//MM2=(A2,A2,A2,A2);
movq mm2,mm1
punpckhwd mm2,mm2
punpckhdq mm2,mm2
//MM3=(255-A2,255-A2,255-A2,255-A2);
movq MM3,mm1
PXOR mm3,mm6
punpckhwd mm3,mm3
punpckhdq mm3,mm3
//MM1=(255,R2,G2,B2);
por mm1,mm6;
//(1)*(255-a2);
pmullw mm0, mm3
//(2)*(a2)
pmullw mm1, mm2
//(1)/255
psrlw mm0,8
//(2)/255
psrlw mm1,8
//(1)+(2)
paddusw mm0,mm1
Finish:
//64bit->32bit
packuswb mm0,mm0
//Save
movd dword ptr [ecx+eax*4],mm0
//for
Inc eax
cmp eax, iReginWidth
jnz NextPixel
//End Fro X
sub ecx, iSubBytesDet;
sub edx, iSubBytesSrc;
//for
Inc ebx
cmp ebx, iReginHeight
jnz NextLine;
//End Fro Y
end;
end;
。。主程序分块调他