嵌入式系统中的代码压缩(二)
_decompress(u8 *dst, u8 *src)
{
int i, sp, dp;
int flag, offset, len;
u8 *pbuf;
sp = 0;
dp = 0;
flag = 0x0100;
while(1){
if(flag&0x0100){
flag = src[sp++];
if(d_debug) printf("flag %02x at %04x\n", flag, sp-1);
flag |= 0x00010000;
}
if(flag&1){
/* raw byte */
if(d_debug) printf("%04x raw: %02x\n", dp, src[sp]);
dst[dp++] = src[sp++];
flag >>= 1;
}else{
flag >>= 1;
if(flag&0x0100){
flag = src[sp++];
if(d_debug) printf("flag %02x at %04x\n", flag, sp-1);
flag |= 0x00010000;
}
offset = src[sp++];
if(flag&1){
/* 01: long format */
len = offset>>4;
len += 3;
offset <<= 8;
offset |= src[sp++];
offset &= 0x0fff;
if(d_debug) printf("%04x long: pos=%4d len=%2d\n", dp, offset+1, len);
}else{
/* 00: short format */
if(offset==0xff)
break;
len = (offset>>6);
len += 2;
offset &= 0x3f;
if(d_debug) printf("%04x short: pos=%4d len=%2d\n", dp, offset+1, len);
}
flag >>= 1;
pbuf = &dst[dp-offset-1];
for(i=0; i
dst[dp++] = pbuf[i];
}
}
}
return dp;
}
PowerPC汇编实现的解压代码:
[
html]
/*
int tlz_decomp(u8 *dst, u8 *src);
r3: dst-1
r4: src-1
r5: flag
r8: match offset
r7: match len
*/
tlz_decomp:
li r5, 0x0100
_main_loop:
andi. r0, r5, 0x0100
beq 1f
lbzu r5, 1(r4)
oris r5, r5, 0x0001
1:
andi. r0, r5, 0x01
srawi r5, r5, 1
beq _match
_raw_byte:
lbzu r6, 1(r4)
stbu r6, 1(r3)
b _main_loop
_match:
andi. r0, r5, 0x0100
beq 1f
lbzu r5, 1(r4)
oris r5, r5, 0x0001
1:
lbzu r6, 1(r4)
andi. r0, r5, 0x01
beq _short_match
_long_match:
srawi r7, r6, 4
addi r7, r7, 3
lbzu r8, 1(r4)
rlwimi r8, r6, 8, 20, 23
b _copy
_short_match:
cmpwi r6, 0xff
beqlr
srawi r7, r6, 6
addi r7, r7, 2
andi. r8, r6, 0x3f
_copy:
srawi r5, r5, 1
sub r8, r3, r8
subi r8, r8, 1
mtctr r7
1:
lbzu r6, 1(r8)
stbu r6, 1(r3)
bdnz 1b
b _main_loop
这里与C实现不一样的是,入口参数都要减一。这是为了充分利用PPC的指令特点,可以节省两条指令。这段代码占144字节空间。