526 lines
8.9 KiB
C++
526 lines
8.9 KiB
C++
// asmcoder.h
|
|
// by simon@acid.co.nz
|
|
|
|
// v0.5
|
|
// point sets nonused bits to 1
|
|
|
|
// v0.4
|
|
// direct pixel address now expected
|
|
// x,y args removed span removed
|
|
|
|
// v0.3
|
|
// span changed to bytes
|
|
// pixfield indirect dropped, passed directy to plot
|
|
// ret(n) added to remove args from fastcall stack
|
|
// CodePoint() method added
|
|
|
|
/*
|
|
|
|
asmcoder provides inline assembly generator for high speed runtime tasks
|
|
|
|
methods:
|
|
|
|
CodePlot(codebase,depth,amask,rmask,gmask,bmask)
|
|
CodePoint(codebase,depth,amask,rmask,gmask,bmask)
|
|
CodeSpan(codebase,depth,amask,rmask,gmask,bmask)
|
|
|
|
codebase=executable memory (64 bytes)
|
|
depth=pixel depth (8,16,24,32)
|
|
amask=32bit alpha pixel mask
|
|
rmask=32bit red pixel mask
|
|
gmask=32bit green pixel mask
|
|
bmask=32bit blue pixel mask
|
|
|
|
returns number of bytes of code generated (max 64 bytes)
|
|
|
|
call resulting code by typecasting codebase with
|
|
|
|
void (__fastcall *plot)(void *pix,int argb)
|
|
int (__fastcall *point)(void *pix)
|
|
void (__fastcall *span)(void *pix,int *argb,int count)
|
|
|
|
ToDo:
|
|
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
class IA32 {
|
|
public:
|
|
char* ptr;
|
|
int off;
|
|
|
|
enum Reg32 { eax, ecx, edx, ebx, esp, ebp, esi, edi };
|
|
|
|
void Reset(void* p)
|
|
{
|
|
ptr = (char*)p;
|
|
off = 0;
|
|
}
|
|
|
|
void Code(int c)
|
|
{
|
|
if (ptr)
|
|
ptr[off] = c;
|
|
off++;
|
|
}
|
|
void Code16(int c)
|
|
{
|
|
if (ptr)
|
|
*(short*)(ptr + off) = c;
|
|
off += 2;
|
|
}
|
|
void Code32(int c)
|
|
{
|
|
if (ptr)
|
|
*(int*)(ptr + off) = c;
|
|
off += 4;
|
|
}
|
|
|
|
void push(Reg32 reg)
|
|
{
|
|
Code(0x50 + reg);
|
|
}
|
|
void pop(Reg32 reg)
|
|
{
|
|
Code(0x58 + reg);
|
|
}
|
|
void ret(int n = 0)
|
|
{
|
|
if (n == 0)
|
|
Code(0xc3);
|
|
else {
|
|
Code(0xc2);
|
|
Code(n);
|
|
Code(0);
|
|
}
|
|
}
|
|
void mov(Reg32 dest, Reg32 src)
|
|
{
|
|
Code(0x8b);
|
|
Code((0xc0) | (dest << 3) | (src));
|
|
}
|
|
void or (Reg32 dest, Reg32 src)
|
|
{
|
|
Code(0x0b);
|
|
Code((0xc0) | (dest << 3) | (src));
|
|
}
|
|
void add(Reg32 dest, Reg32 src)
|
|
{
|
|
Code(0x03);
|
|
Code((0xc0) | (dest << 3) | (src));
|
|
}
|
|
void load32(Reg32 dest, Reg32 src, int disp = 0)
|
|
{
|
|
if (disp == 0 && src != ebp) {
|
|
Code(0x8b);
|
|
Code((dest << 3) | (src));
|
|
} else {
|
|
Code(0x8b);
|
|
Code((0x40) | (dest << 3) | (src));
|
|
Code(disp);
|
|
}
|
|
}
|
|
void load16(Reg32 dest, Reg32 src, int disp = 0)
|
|
{
|
|
Code(0x66);
|
|
load32(dest, src, disp);
|
|
}
|
|
void load8(Reg32 dest, Reg32 src, int disp = 0)
|
|
{
|
|
Code(0x8a);
|
|
if (disp == 0 && src != ebp) {
|
|
Code((dest << 3) | (src));
|
|
} else {
|
|
Code((0x40) | (dest << 3) | (src));
|
|
Code(disp);
|
|
}
|
|
}
|
|
void store32(Reg32 dest, int disp, Reg32 src)
|
|
{
|
|
if (disp == 0 && dest != ebp) {
|
|
Code(0x89);
|
|
Code((src << 3) | (dest));
|
|
Code(0);
|
|
} else {
|
|
Code(0x89);
|
|
Code((0x40) | (src << 3) | (dest));
|
|
Code(disp);
|
|
}
|
|
}
|
|
void store16(Reg32 dest, int disp, Reg32 src)
|
|
{
|
|
Code(0x66);
|
|
store32(dest, disp, src);
|
|
}
|
|
void store8(Reg32 dest, int disp, Reg32 src)
|
|
{
|
|
Code(0x88);
|
|
if (disp == 0 && dest != ebp) {
|
|
Code((src << 3) | (dest));
|
|
Code(0);
|
|
} else {
|
|
Code((0x40) | (src << 3) | (dest));
|
|
Code(disp);
|
|
}
|
|
}
|
|
void lea(Reg32 dest, int scale, Reg32 src1, Reg32 src2, int disp = 0)
|
|
{
|
|
int n, mod, sib;
|
|
if (disp == 0)
|
|
n = 0;
|
|
else {
|
|
if (disp >= -128 && disp < 128)
|
|
n = 1;
|
|
else
|
|
n = 2;
|
|
}
|
|
mod = (n << 6) | 4;
|
|
sib = (src1 << 3) | src2;
|
|
if (scale == 2)
|
|
sib |= 0x40;
|
|
if (scale == 4)
|
|
sib |= 0x80;
|
|
if (scale == 8)
|
|
sib |= 0xc0;
|
|
Code(0x8d);
|
|
Code(mod);
|
|
Code(sib);
|
|
if (n == 1)
|
|
Code(disp);
|
|
else if (n == 2)
|
|
Code32(disp);
|
|
}
|
|
void lea(Reg32 dest, int scale, Reg32 src, int disp = 0)
|
|
{
|
|
int mod, sib;
|
|
sib = (src << 3) | 5;
|
|
mod = (dest << 3) | 4;
|
|
if (scale == 2)
|
|
sib |= 0x40;
|
|
if (scale == 4)
|
|
sib |= 0x80;
|
|
if (scale == 8)
|
|
sib |= 0xc0;
|
|
Code(0x8d);
|
|
Code(mod);
|
|
Code(sib);
|
|
Code32(disp);
|
|
}
|
|
void imul(Reg32 dest, int imm, Reg32 src)
|
|
{
|
|
Code(0x69);
|
|
Code((0xc0) | (dest << 3) | (src));
|
|
Code32(imm);
|
|
}
|
|
void shift(Reg32 reg, int imm) //+imm=left -imm=right
|
|
{
|
|
int op;
|
|
if (imm == 0)
|
|
return;
|
|
if (imm > 0) {
|
|
op = 0xe0 + reg;
|
|
} else {
|
|
op = 0xe8 + reg;
|
|
imm = -imm;
|
|
}
|
|
if (imm == 1) {
|
|
Code(0xd1);
|
|
Code(op);
|
|
} else {
|
|
Code(0xc1);
|
|
Code(op);
|
|
Code(imm);
|
|
}
|
|
}
|
|
void and (Reg32 reg, int imm)
|
|
{
|
|
if (imm == 0xffffffff)
|
|
return;
|
|
if (imm >= -128 && imm < 128) {
|
|
Code(0x83);
|
|
Code(0xe0 + reg);
|
|
Code(imm);
|
|
} else {
|
|
if (reg == eax) {
|
|
Code(0x25);
|
|
} else {
|
|
Code(0x81);
|
|
Code(0xe0 + reg);
|
|
}
|
|
Code32(imm);
|
|
}
|
|
}
|
|
void or (Reg32 reg, int imm)
|
|
{
|
|
if (imm == 0)
|
|
return;
|
|
if (imm >= -128 && imm < 128) {
|
|
Code(0x83);
|
|
Code(0xc0 + reg);
|
|
Code(imm);
|
|
} else {
|
|
if (reg == eax) {
|
|
Code(0x0d);
|
|
} else {
|
|
Code(0x81);
|
|
Code(0xc0 + reg);
|
|
}
|
|
Code32(imm);
|
|
}
|
|
}
|
|
void jnz(int label)
|
|
{
|
|
Code(0x75);
|
|
Code(label - (off + 1));
|
|
}
|
|
void add(Reg32 reg, int imm)
|
|
{
|
|
Code(0x83);
|
|
Code(0xc0 + reg);
|
|
Code(imm);
|
|
}
|
|
void neg(Reg32 reg)
|
|
{
|
|
Code(0xf7);
|
|
Code(0xd8 + reg);
|
|
}
|
|
};
|
|
|
|
class AsmCoder : IA32 {
|
|
public:
|
|
int amsb, rmsb, gmsb, bmsb;
|
|
|
|
void CalcMSBs(int amask, int rmask, int gmask, int bmask)
|
|
{
|
|
unsigned int u;
|
|
amsb = 0;
|
|
if (u = amask) {
|
|
while (u != 1) {
|
|
u >>= 1;
|
|
amsb++;
|
|
}
|
|
}
|
|
rmsb = 0;
|
|
if (u = rmask) {
|
|
while (u != 1) {
|
|
u >>= 1;
|
|
rmsb++;
|
|
}
|
|
}
|
|
gmsb = 0;
|
|
if (u = gmask) {
|
|
while (u != 1) {
|
|
u >>= 1;
|
|
gmsb++;
|
|
}
|
|
}
|
|
bmsb = 0;
|
|
if (u = bmask) {
|
|
while (u != 1) {
|
|
u >>= 1;
|
|
bmsb++;
|
|
}
|
|
}
|
|
}
|
|
|
|
// ecx=void *t pixel
|
|
// edx=int argb
|
|
|
|
int CodePlot(void* code, int depth, int amask, int rmask, int gmask, int bmask)
|
|
{
|
|
Reset(code);
|
|
CalcMSBs(amask, rmask, gmask, bmask);
|
|
|
|
push(ebx);
|
|
push(ebp);
|
|
mov(eax, edx);
|
|
mov(ebp, ecx);
|
|
if (rmask == 0xff0000 && gmask == 0xff00 && bmask == 0xff) {
|
|
if (amask == 0 && depth > 24)
|
|
and(eax, 0xffffff);
|
|
} else {
|
|
mov(ebx, eax); //eax=b ebx=g ecx=r edx=a
|
|
shift(eax, bmsb - 7); //3-0);
|
|
mov(ecx, ebx);
|
|
shift(ebx, gmsb - 15);
|
|
if (amask)
|
|
mov(edx, ecx); //alph
|
|
if (amask)
|
|
shift(edx, amsb - 31); //alph
|
|
and(eax, bmask);
|
|
shift(ecx, rmsb - 23);
|
|
if (amask)
|
|
and(edx, amask); //alph
|
|
and(ebx, gmask);
|
|
if (amask)
|
|
or (eax, edx); //alph
|
|
and(ecx, rmask);
|
|
or (eax, ebx);
|
|
or (eax, ecx);
|
|
}
|
|
switch (depth) {
|
|
case 16:
|
|
store16(ebp, 0, eax);
|
|
break;
|
|
// case 24:store16(ebp,0,eax);shift(eax,-16);store8(ebp,2,eax);break;
|
|
case 24:
|
|
store8(ebp, 0, eax);
|
|
shift(eax, -8);
|
|
store16(ebp, 1, eax);
|
|
break;
|
|
case 32:
|
|
store32(ebp, 0, eax);
|
|
break;
|
|
default:
|
|
store8(ebp, 0, eax);
|
|
}
|
|
pop(ebp);
|
|
pop(ebx);
|
|
ret();
|
|
return off;
|
|
}
|
|
|
|
// ecx=void*pix
|
|
|
|
int CodePoint(void* code, int depth, int amask, int rmask, int gmask, int bmask)
|
|
{
|
|
Reset(code);
|
|
CalcMSBs(amask, rmask, gmask, bmask);
|
|
|
|
push(ebx);
|
|
switch (depth) {
|
|
case 16:
|
|
load16(eax, ecx);
|
|
break;
|
|
// case 24:load16(eax,ecx);shift(eax,-16);load8(eax,ecx,2);break;
|
|
case 24:
|
|
load16(eax, ecx, 1);
|
|
shift(eax, 8);
|
|
load8(eax, ecx);
|
|
break;
|
|
case 32:
|
|
load32(eax, ecx);
|
|
break;
|
|
default:
|
|
load8(eax, ecx);
|
|
}
|
|
if ((amask == 0 || amask == 0xff000000) && (rmask == 0xff0000 && gmask == 0xff00 && bmask == 0xff)) {
|
|
//
|
|
} else {
|
|
mov(ebx, eax); //eax=b ebx=g ecx=r edx=a
|
|
mov(ecx, eax);
|
|
if (amask)
|
|
mov(edx, eax);
|
|
and(eax, bmask);
|
|
shift(eax, 7 - bmsb);
|
|
and(ebx, gmask);
|
|
shift(ebx, 15 - gmsb);
|
|
and(ecx, rmask);
|
|
shift(ecx, 23 - rmsb);
|
|
if (amask)
|
|
and(edx, amask);
|
|
or (eax, ebx);
|
|
if (amask)
|
|
shift(edx, 31 - amsb);
|
|
or (eax, ecx);
|
|
if (amask)
|
|
or (eax, edx);
|
|
}
|
|
pop(ebx);
|
|
int oor = 0;
|
|
if (!amask)
|
|
oor |= 0xff000000;
|
|
if (!rmask)
|
|
oor |= 0x00ff0000;
|
|
if (!gmask)
|
|
oor |= 0x0000ff00;
|
|
if (!bmask)
|
|
oor |= 0x000000ff;
|
|
if (oor)
|
|
or (eax, oor);
|
|
ret();
|
|
return off;
|
|
}
|
|
|
|
// ecx=void *t pixel
|
|
// edx=int *argb
|
|
// 8(esp)=count
|
|
|
|
int CodeSpan(void* code, int depth, int amask, int rmask, int gmask, int bmask)
|
|
{
|
|
int loop;
|
|
|
|
Reset(code);
|
|
CalcMSBs(amask, rmask, gmask, bmask);
|
|
|
|
push(ebp);
|
|
mov(ebp, esp);
|
|
push(ebx);
|
|
push(esi);
|
|
push(edi);
|
|
|
|
load32(edi, ebp, 8); //edi=count
|
|
mov(esi, edx); //esi=[argb]
|
|
mov(ebp, ecx); //ebp=[pix]
|
|
neg(edi);
|
|
|
|
loop = off;
|
|
// loop
|
|
load32(eax, esi);
|
|
add(esi, 4);
|
|
|
|
if (rmask == 0xff0000 && gmask == 0xff00 && bmask == 0xff) {
|
|
if (amask == 0 && depth > 24)
|
|
and(eax, 0xffffff);
|
|
} else {
|
|
mov(ebx, eax); //eax=b ebx=g ecx=r edx=a
|
|
shift(eax, bmsb - 7); //3-0);
|
|
mov(ecx, ebx);
|
|
shift(ebx, gmsb - 15);
|
|
if (amask)
|
|
mov(edx, ecx); //alph
|
|
if (amask)
|
|
shift(edx, amsb - 31); //alph
|
|
and(eax, bmask);
|
|
shift(ecx, rmsb - 23);
|
|
if (amask)
|
|
and(edx, amask); //alph
|
|
and(ebx, gmask);
|
|
if (amask)
|
|
or (eax, edx); //alph
|
|
and(ecx, rmask);
|
|
or (eax, ebx);
|
|
or (eax, ecx);
|
|
}
|
|
switch (depth) {
|
|
case 16:
|
|
store16(ebp, 0, eax);
|
|
add(ebp, 2);
|
|
break;
|
|
case 24:
|
|
store8(ebp, 0, eax);
|
|
shift(eax, -8);
|
|
store16(ebp, 1, eax);
|
|
add(ebp, 3);
|
|
break;
|
|
case 32:
|
|
store32(ebp, 0, eax);
|
|
add(ebp, 4);
|
|
break;
|
|
default:
|
|
store8(ebp, 0, eax);
|
|
add(ebp, 1);
|
|
}
|
|
add(edi, 1);
|
|
jnz(loop);
|
|
|
|
pop(edi);
|
|
pop(esi);
|
|
pop(ebx);
|
|
pop(ebp);
|
|
ret(4);
|
|
return off;
|
|
}
|
|
};
|