My first Target will be the ZeroAccess rootkit as there is already a full Step-by-Step description of the reverese engineering process for this malware sample. At the end i will be able to compare my results with this article.
r2 max++.exe
- First of all some general informations about the binary:
> iS
[Sections]
idx=00 addr=0x00001000 off=0x00000400 sz=76800 vsz=76795 perm=-rw- name=.text
idx=01 addr=0x00014000 off=0x00013000 sz=512 vsz=70 perm=-r-- name=.rdata
idx=02 addr=0x00015000 off=0x00013200 sz=1024 vsz=992 perm=-r-- name=.rsrc
> ir
[Relocations]
addr=0x00014000 off=0x00013000 type=SET_32 COMCTL32.dll_Ordinal_17
> ii
[Imports]
ordinal=017 plt=0x00000000 bind=NONE type=FUNC name=COMCTL32.dll_Ordinal_17
> ie
[Entrypoints]
addr=0x00013bc8 off=0x00012fc8 baddr=0x00000000
- The command pd 11 disassembles the next 11 instructions from the current position (entry0).
> pd 11
0x00413bc8 mov edi, edi
0x00413bca push ebp
0x00413bcb mov ebp, esp
0x00413bcd xor ecx, ecx
0x00413bcf mov edx, ecx
0x00413bd1 inc edx
0x00413bd2 mov eax, edx
0x00413bd4 leave
0x00413bd5 int 0x2d
0x00413bd7 ret
0x00413bd8 call 0x413a38
Nothing special here except the little anti debugging technique at 0x00413bd5.
The int 0x2d instruction calls the exception handler for a debugging
breakpoint, if no debugger is attached to the process nothing happens. Another
special characteristic of this instruction is that the byte after it will be
ignored (ret). Its useful to keep track of the stack,
so call 0x413a38 jumps to 0x413a38 but also push`s 0x00413bdd onto the
stack.
- The next block sets the direction flag with std. After this instruction the string mnemonics will work in negative direction (esi/edi--). call 0x413bb4 pushs also 0x413a40 on the stack.
>pd 3 @ 0x413a38
0x00413a38 std
0x00413a39 mov edi, edi
0x00413a3b call 0x413bb4
- This snipped pushs 0x413bb9 onto the stack.
pop esisets esi to 0x413bb9 andsub esi, 0x9decrements it by 9. In the previous part we set the direction flag, therefore lodsd will decrement esi by 4 and reads 00 to eax so that the resulting pointer in esi will be 0x413bac. The instruction pop ebp will set ebp to 0x413a40, that will be the end of our crypted buffer. ret guides us to 0x413bdd cause this address is the first on stack.
>pd 9 @ 0x413bb4
0x00413bb4 call 0x413bb9
0x00413bb9 mov eax, 0x3
0x00413bbe int 0x2d
0x00413bc0 ret
0x00413bc1 pop esi
0x00413bc2 sub esi, 0x9
0x00413bc5 lodsd
0x00413bc6 pop ebp
0x00413bc7 ret
- Sets edx to 0 and moves 0x413bac to edi (esi and edi now contains this value). Jump to 0x413a2b and push 0x413be8 to the stack.
>pd 4 @ 0x413bdd
0x00413bdd mov edx, eax
0x00413bdf mov edi, edi
0x00413be1 mov edi, esi
0x00413be3 call 0x413a2b
- This part will decrypt some data in memory at (0x413a40 - 0x413bac). The loop reads 4 bytes from esi, xors this value with the xor result from a previous iteration (first was 0), subtracts this value from the read value and finaly writes the result at the same position. The ret instruction coninues the execution path at 0x413be8.
>pd 7 @ 0x00413a2b
0x00413a2b lodsd
0x00413a2c xor edx, eax
0x00413a2e sub eax, edx
0x00413a30 stosd
0x00413a31 cmp edi, ebp
0x00413a33 jge 0x413a2b
0x00413a35 ret
Note Dump the crypted block with the following radare call:
px (0x413bac - 0x413a40) @ 0x413a40
- Example of writing the easy decrypting function in vala.
using Radare;
public static void entry(RCore core) {
uint8 buffer[512];
uint64 ebp = (uint64)0x413a40;
uint64 esi = (uint64)0x413bac;
uint edx = 0;
uint eax = 0;
while (esi >= ebp) {
core.read_at(esi, buffer, 4);
eax = *(uint*)buffer; // lodsd reads dword into eax
edx = eax ^ edx;
eax = eax - edx;
core.write_at(esi, (uint8*)(&eax), 4); // stosd saves eax in (normally edi but here esi)
//stdout.printf("[0x%08x]: 0x%08x\n", (uint)esi, *(uint*)buffer);
esi -= 4;
}
}
Note calling a vala script with:
#!vala scriptname.
- Set eip to 0x401000 and push 0x00413bed to the stack.
>pd 1 @ 0x413be8
0x00413be8 call 0x401000
- Reset the direction flag (
cld) move to 0x413a18 and push 0x401008 to the stack.
>pd 3 @ 0x401000
0x00401000 cld
0x00401001 mov edi, edi
0x00401003 call 0x413a18
- Pop 0x413a1d into edx and subtracts 5 from it and brings 0x401008 to ecx. After the block we continue at 0x00413bed.
>pd 8 @ 0x413a18
0x00413a18 call 0x413a1d
0x00413a1d mov eax, 0x1
0x00413a22 int 0x2d
0x00413a24 ret
0x00413a25 pop edx
0x00413a26 sub edx, 0x5
0x00413a29 pop ecx
0x00413a2a ret
- Finally we call the extracted buffer at 0x413a40.
0x00413bed mov eax, 0x1
0x00413bf2 call ebp
- Creates a stackframe at 0x00413a43 with 0x30c space for local variables, writes 0x401008 to ebx/eax and 0x00413a18 (0x00413a1d - 5) to [ebp-0xc]
>pd 13 @ 0x00413a40
0x00413a40 int 0x2d
0x00413a42 ret
0x00413a43 push ebp
0x00413a44 mov ebp, esp
0x00413a46 sub esp, 0x30c
0x00413a4c push ebx
0x00413a4d push esi
0x00413a4e mov ebx, ecx
0x00413a50 push edi
0x00413a51 lea esi, [ebp-0x20c]
0x00413a57 mov eax, ebx
0x00413a59 mov [ebp-0xc], edx
0x00413a5c call 0x413b21 # push 0x413a61
- Four words are read, xor`ed with different values and rewritten in the created local stackspace.
0x00413b21 push ebp
0x00413b22 mov ebp, esp
0x00413b24 sub esp, 0x14
0x00413b27 mov ecx, [eax]
0x00413b29 mov edx, [eax+0x4]
0x00413b2c xor ecx, 0x72ca4247
0x00413b32 mov [ebp-0x14], ecx
0x00413b35 mov ecx, [eax+0x8]
0x00413b38 mov eax, [eax+0xc]
0x00413b3b xor ecx, 0xbb5eb388
0x00413b41 xor eax, 0x9d76300a
0x00413b46 xor edx, 0x42408e2e
0x00413b4c mov [ebp-0xc], ecx
0x00413b4f xor cl, cl
0x00413b51 mov [ebp-0x8], eax
0x00413b54 mov [ebp-0x10], edx
0x00413b57 mov [ebp-0x3], cl
0x00413b5a mov [ebp-0x2], cl
0x00413b5d mov [ebp-0x1], cl
using Radare;
ublic static void entry(RCore core) {
uint8 buffer[512];
uint32 target[5];
uint ecx = 0;
uint edx = 0;
uint64 eax = (uint64)0x401008;
core.read_at(eax, buffer, 4);
ecx = *(uint*)buffer;
core.read_at(eax + 0x4, buffer, 4);
edx = *(uint*)buffer;
ecx = ecx ^ 0x72ca4247;
// write ecx to buffer - 0x14
target[5] = ecx;
core.read_at(eax + 0x8, buffer, 4);
ecx = *(uint*)buffer;
core.read_at(eax + 0xc, buffer, 4);
eax = *(uint*)buffer;
ecx = ecx ^ 0xbb5eb388;
eax = eax ^ 0x9d76300a;
eax = edx ^ 0x42408e2e;
// write ecx to buffer - 0x0c
target[3] = ecx;
// write eax to buffer - 0x08
target[2] = eax;
// write edx to buffer - 0x10
target[4] = edx;
target[0] = 0;
target[1] = 0;
}
Note it`s possible to read exactly one word from memory with r2, for the next word simply add 4 to the address.
>pxw 1 @ 0x00401008
0x00401008 0x1625d4ca
Note The xor instruction's can also be done with radare:
?v 0x1625d4ca^0x72ca4247
- esi contains the local address from the previous block, [ebp-0x01] saves the
counter of the following loop. This code counts from 0 to 0xff and writes the
results register indirect to eax so that the resulting buffer located in eax
will be
0x00 0x01 0x02 0x02 0x03...0xff.
0x00413b60 mov eax, esi
0x00413b62 mov dl, [ebp-0x1]
0x00413b65 inc byte [ebp-0x1]
0x00413b68 mov [eax], dl
0x00413b6a inc eax
0x00413b6b cmp [ebp-0x1], cl
0x00413b6e jne 0x413b62
- First i will define some names for the description of this block.
- [ebp-0x1] = idx1
- [ebp-0x3] = idx2
- [ebp-0x2] = tmp
- esi (the created 00 01 02 03 04 block) = ascii (byte array)
- [ebp-0x14] = xor (this is a byte array) At first we read xor[idx2] into al and add ascii[idx1] to it. We save the value located at ascii[idx1] also to dl. The result from the addition between ascii[idx1] and xor[idx2] will be added to our tmp value which we will use to locate another position in the ascii array. We will do a change between these two values located at ascii[idx1] and ascii[tmp]. Now we will increment idx1 and idx2. As idx2 defines a position in the xor block we will also mask this index with 0xf. We loop this procedure until the byte idx1 will get 0 after 0xff loops.
0x00413b70 push ebx
0x00413b71 movzx eax, byte [ebp-0x1]
0x00413b75 lea ecx, [eax+esi]
0x00413b78 movzx eax, byte [ebp-0x3]
0x00413b7c mov al, [ebp+eax-0x14]
0x00413b80 add al, [ecx]
0x00413b82 mov dl, [ecx]
0x00413b84 add [ebp-0x2], al
0x00413b87 movzx eax, byte [ebp-0x2]
0x00413b8b add eax, esi
0x00413b8d mov bl, [eax]
0x00413b8f mov [ecx], bl
0x00413b91 mov [eax], dl
0x00413b93 mov al, [ebp-0x3]
0x00413b96 inc al
0x00413b98 and al, 0xf
0x00413b9a inc byte [ebp-0x1]
0x00413b9d mov [ebp-0x3], al
0x00413ba0 jne 0x413b71
0x00413ba2 pop ebx
0x00413ba3 leave
0x00413ba4 ret
- The last part is split into four separate descriptions. The whole block loops through 0x401018 - 0x413a18 in 0x100 steps. First of all we use the last created buffer to get offsets which are used to spread values from 0xff till 0x00 in a new buffer.
0x00413a61 add ebx, 0x10
0x00413a64 mov [ebp-0x8], ebx
0x00413a67 or al, 0xff
0x00413a69 lea esi, [ebp-0x10d]
0x00413a6f movzx ecx, byte [esi]
0x00413a72 mov [ebp+ecx-0x30c], al
0x00413a79 mov cl, al
0x00413a7b dec al
0x00413a7d dec esi
0x00413a7e test cl, cl
0x00413a80 jne 0x413a6f
- After that we read at the current position for 0x40 dwords (first was 0x401018) and write the result to the stack ([ebp-0x10c]).
0x00413a82 push 0x40
0x00413a84 lea edi, [ebp-0x10c]
0x00413a8a mov esi, ebx
0x00413a8c pop ecx
0x00413a8d rep movsd
- I will describe this block with a vala snipped:
for (i = 0; i < 256; i++) {
idx++;
dl += ascii[idx];
uint8 bl = ascii[idx];
//ascii[dl];
tmp = bl;
bl = ascii[dl];
ascii[idx] = bl;
bl = tmp;
ascii[dl] = bl;
uint ecx = bl;
ecx += ascii[idx];
ecx &= 0xff;
al = ascii[ecx];
after[i] ^= al; //[ebp+esi-0x10c] ^= al;
}
It reads values from the ascii buffer, does some calculation to get another position and finally xors the previously read stuff from 0x401018.
0x00413a8f xor dl, dl
0x00413a91 mov byte [ebp-0x1], 0x0
0x00413a95 xor esi, esi
0x00413a97 inc byte [ebp-0x1]
0x00413a9a movzx eax, byte [ebp-0x1]
0x00413a9e lea eax, [ebp+eax-0x20c]
0x00413aa5 add dl, [eax]
0x00413aa7 mov bl, [eax]
0x00413aa9 movzx ecx, dl
0x00413aac lea ecx, [ebp+ecx-0x20c]
0x00413ab3 mov [ebp-0x2], bl
0x00413ab6 mov bl, [ecx]
0x00413ab8 mov [eax], bl
0x00413aba mov bl, [ebp-0x2]
0x00413abd mov [ecx], bl
0x00413abf movzx eax, byte [eax]
0x00413ac2 movzx ecx, bl
0x00413ac5 add ecx, eax
0x00413ac7 and ecx, 0xff
0x00413acd mov al, [ebp+ecx-0x20c]
0x00413ad4 xor [ebp+esi-0x10c], al
0x00413adb inc esi
0x00413adc mov eax, 0x100 ; 0x00000100
0x00413ae1 cmp esi, eax
0x00413ae3 jl 0x413a97 ; jump if esi is lower then 0x100
- Now we use the last buffer to get offsets where we read at for example 0x401018.
0x00413ae5 mov ebx, [ebp-0x8]
0x00413ae8 xor ecx, ecx
0x00413aea mov esi, eax
0x00413aec movzx edx, byte [ebp+ecx-0x20d]
0x00413af4 mov dl, [edx+ebx]
0x00413af7 dec esi
0x00413af8 mov [ebp+ecx-0xd], dl
0x00413afc dec ecx
0x00413afd test esi, esi
0x00413b00 ja 0x413aec
- Finally we write the resulting buffer to the code section that starts here at 0x401018. We will do this in 0x100 blocks as long as we reach the 0x00413a18.
0x00413b01 mov edi, ebx
0x00413b03 push 0x40
0x00413b05 add ebx, eax
0x00413b07 lea esi, [ebp-0x10c]
0x00413b0d pop ecx
0x00413b0e rep movsd
0x00413b10 mov [ebp-0x8], ebx
0x00413b13 cmp ebx, [ebp-0xc]
0x00413b16 jb 0x413a67
0x00413b1c pop edi
0x00413b1d pop esi
0x00413b1e pop ebx
0x00413b1f leave
0x00413b20 ret
- Thats the final result to unpack the binary in vala:
using Radare;
uint8 buf[512];
uint32 target[5];
public void create_ascii(RCore core, uint8 *ascii) {
uint i;
for ( i = 0; i <= 255; i++) {
ascii[i] = (uint8)i;
}
}
public void create_xor(RCore core) {
uint v = 0;
uint v_4 = 0;
uint v_8 = 0;
uint v_C = 0;
uint64 eax = (uint64)0x401008;
core.read_at(eax, buf, 4);
v = *(uint*)buf;
v = v ^ 0x72ca4247;
target[0] = v;
core.read_at(eax + 0x4, buf, 4);
v_4 = *(uint*)buf;
v_4 = v_4 ^ 0x42408e2e;
target[1] = v_4;
core.read_at(eax + 0x8, buf, 4);
v_8 = *(uint*)buf;
v_8 = v_8 ^ (uint32)0xbb5eb388;
target[2] = v_8;
core.read_at(eax + 0xc, buf, 4);
v_C = *(uint*)buf;
v_C = v_C ^ (uint32)0x9d76300a;
target[3] = v_C;
}
public static void first_decrypt(RCore core) {
uint8 buffer[512];
uint64 ebp = (uint64)0x413a40;
uint64 esi = (uint64)0x413bac;
uint edx = 0;
uint eax = 0;
uint index = 0;
while (esi >= ebp) {
core.read_at(esi, buffer, 4);
eax = *(uint*)buffer;
edx = eax ^ edx;
eax = eax - edx;
core.write_at(esi, (uint8*)(&eax), 4);
esi -= 4;
}
}
public void entry(RCore core) {
uint position = 0x401018;
uint end = 0x413a18;
uint8 result[780];
uint8* ascii = &result[256];
first_decrypt(core);
create_ascii(core, ascii);
create_xor(core);
int i;
int idx1 = 0;
int idx2 = 0;
uint8 tmp = 0;
uint8 *array2 = ((uint8*)target);
do {
uint8 al = ((uint8*)target)[idx2];
al += ascii[idx1];
uint8 dl = ascii[idx1];
tmp += al;
uint8 bl = ascii[tmp];
ascii[idx1] = bl;
ascii[tmp] = dl;
idx2++;
idx2 &= 0xf;
idx1++;
} while (idx1 <= 255);
while (position < end) {
uint8 al = 0xff;
uint8 cl = 0;
for (i = 255; i >= 0; i--) {
uint8 ecx = ascii[i];
result[ecx] = (uint8)i;
}
uint* esi = (uint*)0x401018;
uint* edi = &result[512];
for (i = 0; i < 64; i++) {
core.read_at((uint64)esi, buf, 4);
edi[i] = *(uint*)buf;
esi++;
}
uint8* after = &result[512];
uint8 dl = 0;
uint8 idx = 0; // [ebp-1]
tmp = 0; // [ebp-2]
for (i = 0; i < 256; i++) {
idx++;
dl += ascii[idx];
uint8 bl = ascii[idx];
tmp = bl;
bl = ascii[dl];
ascii[idx] = bl;
bl = tmp;
ascii[dl] = bl;
uint ecx = bl;
ecx += ascii[idx];
ecx &= 0xff;
al = ascii[ecx];
after[i] ^= al; //[ebp+esi-0x10c] ^= al;
}
uint8* buffer = &result[512];
for (i = 255; i >= 0; i--) {
core.read_at((uint64)(result[i] + position), buf, 1);
dl = *(uint8*)buf;
buffer[i] = dl;
}
core.write_at((uint64)position, buffer, (64*4));
position += 0x100;
}
}
- Thats all for now, unpacking done i think. Finally real analysis what this file does will be followed in the next article.