Fork me on GitHub

First steps in malware reversing

My first Target will be the ZeroAccess rootkit as there is already a full Step-by-Step description of the reverese engineering process for this malware sample. At the end i will be able to compare my results with this article.


r2 max++.exe
  • First of all some general informations about the binary:
> iS
idx=00 addr=0x00001000 off=0x00000400 sz=76800 vsz=76795 perm=-rw- name=.text
idx=01 addr=0x00014000 off=0x00013000 sz=512 vsz=70 perm=-r-- name=.rdata
idx=02 addr=0x00015000 off=0x00013200 sz=1024 vsz=992 perm=-r-- name=.rsrc
> ir
addr=0x00014000 off=0x00013000 type=SET_32 COMCTL32.dll_Ordinal_17
> ii
ordinal=017 plt=0x00000000 bind=NONE type=FUNC name=COMCTL32.dll_Ordinal_17
> ie
addr=0x00013bc8 off=0x00012fc8 baddr=0x00000000
  • The command pd 11 disassembles the next 11 instructions from the current position (entry0).
> pd 11
0x00413bc8    mov edi, edi
0x00413bca    push ebp
0x00413bcb    mov ebp, esp
0x00413bcd    xor ecx, ecx
0x00413bcf    mov edx, ecx
0x00413bd1    inc edx
0x00413bd2    mov eax, edx
0x00413bd4    leave
0x00413bd5    int 0x2d
0x00413bd7    ret
0x00413bd8    call 0x413a38

Nothing special here except the little anti debugging technique at 0x00413bd5. The int 0x2d instruction calls the exception handler for a debugging breakpoint, if no debugger is attached to the process nothing happens. Another special characteristic of this instruction is that the byte after it will be ignored (ret). Its useful to keep track of the stack, so call 0x413a38 jumps to 0x413a38 but also push`s 0x00413bdd onto the stack.

  • The next block sets the direction flag with std. After this instruction the string mnemonics will work in negative direction (esi/edi--). call 0x413bb4 pushs also 0x413a40 on the stack.
>pd 3 @ 0x413a38
0x00413a38    std                       
0x00413a39    mov edi, edi      
0x00413a3b    call 0x413bb4 
  • This snipped pushs 0x413bb9 onto the stack. pop esi sets esi to 0x413bb9 and sub esi, 0x9 decrements it by 9. In the previous part we set the direction flag, therefore lodsd will decrement esi by 4 and reads 00 to eax so that the resulting pointer in esi will be 0x413bac. The instruction pop ebp will set ebp to 0x413a40, that will be the end of our crypted buffer. ret guides us to 0x413bdd cause this address is the first on stack.
>pd 9 @ 0x413bb4
0x00413bb4    call 0x413bb9     
0x00413bb9    mov eax, 0x3      
0x00413bbe    int 0x2d          
0x00413bc0    ret                           
0x00413bc1    pop esi                   
0x00413bc2    sub esi, 0x9      
0x00413bc5    lodsd                 
0x00413bc6    pop ebp               
0x00413bc7    ret
  • Sets edx to 0 and moves 0x413bac to edi (esi and edi now contains this value). Jump to 0x413a2b and push 0x413be8 to the stack.
>pd 4 @ 0x413bdd
0x00413bdd    mov edx, eax
0x00413bdf    mov edi, edi
0x00413be1    mov edi, esi
0x00413be3    call 0x413a2b
  • This part will decrypt some data in memory at (0x413a40 - 0x413bac). The loop reads 4 bytes from esi, xors this value with the xor result from a previous iteration (first was 0), subtracts this value from the read value and finaly writes the result at the same position. The ret instruction coninues the execution path at 0x413be8.
>pd 7 @ 0x00413a2b
0x00413a2b      lodsd
0x00413a2c    xor edx, eax
0x00413a2e    sub eax, edx
0x00413a30    stosd
0x00413a31    cmp edi, ebp
0x00413a33    jge 0x413a2b
0x00413a35    ret

Note Dump the crypted block with the following radare call: px (0x413bac - 0x413a40) @ 0x413a40

  • Example of writing the easy decrypting function in vala.
using Radare;

public static void entry(RCore core) {
    uint8 buffer[512];
    uint64 ebp = (uint64)0x413a40;
    uint64 esi = (uint64)0x413bac;

    uint edx = 0;
    uint eax = 0;

    while (esi >= ebp) {
        core.read_at(esi, buffer, 4);
        eax = *(uint*)buffer; // lodsd reads dword into eax
        edx = eax ^ edx;
        eax = eax - edx;
        core.write_at(esi, (uint8*)(&eax), 4); // stosd saves eax in (normally edi but here esi)
        //stdout.printf("[0x%08x]: 0x%08x\n", (uint)esi, *(uint*)buffer);
        esi -= 4;

Note calling a vala script with: #!vala scriptname.

  • Set eip to 0x401000 and push 0x00413bed to the stack.
>pd 1 @ 0x413be8
0x00413be8      call 0x401000
  • Reset the direction flag (cld) move to 0x413a18 and push 0x401008 to the stack.
>pd 3 @ 0x401000
0x00401000      cld
0x00401001      mov edi, edi
0x00401003      call 0x413a18
  • Pop 0x413a1d into edx and subtracts 5 from it and brings 0x401008 to ecx. After the block we continue at 0x00413bed.
>pd 8 @ 0x413a18
0x00413a18      call 0x413a1d
0x00413a1d      mov eax, 0x1
0x00413a22      int 0x2d
0x00413a24      ret
0x00413a25      pop edx
0x00413a26      sub edx, 0x5
0x00413a29      pop ecx
0x00413a2a    ret
  • Finally we call the extracted buffer at 0x413a40.
0x00413bed      mov eax, 0x1 
0x00413bf2      call ebp
  • Creates a stackframe at 0x00413a43 with 0x30c space for local variables, writes 0x401008 to ebx/eax and 0x00413a18 (0x00413a1d - 5) to [ebp-0xc]
>pd 13 @ 0x00413a40
0x00413a40      int 0x2d
0x00413a42      ret
0x00413a43      push ebp
0x00413a44      mov ebp, esp
0x00413a46      sub esp, 0x30c
0x00413a4c      push ebx
0x00413a4d      push esi
0x00413a4e      mov ebx, ecx
0x00413a50      push edi
0x00413a51      lea esi, [ebp-0x20c]
0x00413a57      mov eax, ebx
0x00413a59      mov [ebp-0xc], edx
0x00413a5c      call 0x413b21 # push 0x413a61
  • Four words are read, xor`ed with different values and rewritten in the created local stackspace.
0x00413b21      push ebp
0x00413b22      mov ebp, esp
0x00413b24      sub esp, 0x14
0x00413b27      mov ecx, [eax]
0x00413b29      mov edx, [eax+0x4]
0x00413b2c      xor ecx, 0x72ca4247
0x00413b32      mov [ebp-0x14], ecx
0x00413b35      mov ecx, [eax+0x8]
0x00413b38      mov eax, [eax+0xc]
0x00413b3b      xor ecx, 0xbb5eb388
0x00413b41      xor eax, 0x9d76300a
0x00413b46      xor edx, 0x42408e2e
0x00413b4c      mov [ebp-0xc], ecx
0x00413b4f      xor cl, cl
0x00413b51      mov [ebp-0x8], eax
0x00413b54      mov [ebp-0x10], edx
0x00413b57      mov [ebp-0x3], cl
0x00413b5a      mov [ebp-0x2], cl
0x00413b5d      mov [ebp-0x1], cl
using Radare;

ublic static void entry(RCore core) {
    uint8 buffer[512];
    uint32 target[5];

    uint ecx = 0;
    uint edx = 0;
    uint64 eax = (uint64)0x401008;

    core.read_at(eax, buffer, 4);
    ecx = *(uint*)buffer;

    core.read_at(eax + 0x4, buffer, 4);
    edx = *(uint*)buffer;
    ecx = ecx ^ 0x72ca4247;

    // write ecx to buffer - 0x14
    target[5] = ecx;
    core.read_at(eax + 0x8, buffer, 4);
    ecx = *(uint*)buffer;
    core.read_at(eax + 0xc, buffer, 4);
    eax = *(uint*)buffer;
    ecx = ecx ^ 0xbb5eb388;
    eax = eax ^ 0x9d76300a;
    eax = edx ^ 0x42408e2e;
    // write ecx to buffer - 0x0c
    target[3] = ecx;
    // write eax to buffer - 0x08
    target[2] = eax;
    // write edx to buffer - 0x10
    target[4] = edx;
    target[0] = 0;
    target[1] = 0;

Note it`s possible to read exactly one word from memory with r2, for the next word simply add 4 to the address.

>pxw 1 @ 0x00401008
0x00401008      0x1625d4ca

Note The xor instruction's can also be done with radare: ?v 0x1625d4ca^0x72ca4247

  • esi contains the local address from the previous block, [ebp-0x01] saves the counter of the following loop. This code counts from 0 to 0xff and writes the results register indirect to eax so that the resulting buffer located in eax will be 0x00 0x01 0x02 0x02 0x03...0xff.
0x00413b60      mov eax, esi
0x00413b62      mov dl, [ebp-0x1]
0x00413b65      inc byte [ebp-0x1]
0x00413b68      mov [eax], dl
0x00413b6a      inc eax
0x00413b6b      cmp [ebp-0x1], cl
0x00413b6e      jne 0x413b62
  • First i will define some names for the description of this block.
  • [ebp-0x1] = idx1
  • [ebp-0x3] = idx2
  • [ebp-0x2] = tmp
  • esi (the created 00 01 02 03 04 block) = ascii (byte array)
  • [ebp-0x14] = xor (this is a byte array) At first we read xor[idx2] into al and add ascii[idx1] to it. We save the value located at ascii[idx1] also to dl. The result from the addition between ascii[idx1] and xor[idx2] will be added to our tmp value which we will use to locate another position in the ascii array. We will do a change between these two values located at ascii[idx1] and ascii[tmp]. Now we will increment idx1 and idx2. As idx2 defines a position in the xor block we will also mask this index with 0xf. We loop this procedure until the byte idx1 will get 0 after 0xff loops.
0x00413b70      push ebx                                        
0x00413b71      movzx eax, byte [ebp-0x1]       
0x00413b75      lea ecx, [eax+esi]                  
0x00413b78      movzx eax, byte [ebp-0x3]       
0x00413b7c      mov al, [ebp+eax-0x14]          
0x00413b80      add al, [ecx]                               
0x00413b82      mov dl, [ecx]                               
0x00413b84      add [ebp-0x2], al
0x00413b87      movzx eax, byte [ebp-0x2]
0x00413b8b      add eax, esi
0x00413b8d      mov bl, [eax]
0x00413b8f      mov [ecx], bl
0x00413b91      mov [eax], dl
0x00413b93      mov al, [ebp-0x3]
0x00413b96      inc al
0x00413b98      and al, 0xf
0x00413b9a      inc byte [ebp-0x1]
0x00413b9d      mov [ebp-0x3], al
0x00413ba0      jne 0x413b71
0x00413ba2      pop ebx 
0x00413ba3      leave
0x00413ba4      ret 
  • The last part is split into four separate descriptions. The whole block loops through 0x401018 - 0x413a18 in 0x100 steps. First of all we use the last created buffer to get offsets which are used to spread values from 0xff till 0x00 in a new buffer.
0x00413a61      add ebx, 0x10
0x00413a64      mov [ebp-0x8], ebx
0x00413a67      or al, 0xff
0x00413a69      lea esi, [ebp-0x10d]                
0x00413a6f      movzx ecx, byte [esi]           
0x00413a72      mov [ebp+ecx-0x30c], al 
0x00413a79      mov cl, al                      
0x00413a7b      dec al                          
0x00413a7d      dec esi
0x00413a7e      test cl, cl
0x00413a80      jne 0x413a6f
  • After that we read at the current position for 0x40 dwords (first was 0x401018) and write the result to the stack ([ebp-0x10c]).
0x00413a82      push 0x40                   
0x00413a84      lea edi, [ebp-0x10c]    
0x00413a8a      mov esi, ebx
0x00413a8c      pop ecx                         
0x00413a8d      rep movsd                   
  • I will describe this block with a vala snipped:
    for (i = 0; i < 256; i++) {
            dl += ascii[idx];
            uint8 bl = ascii[idx];
            tmp = bl;
            bl = ascii[dl];
            ascii[idx] = bl;
            bl = tmp;
            ascii[dl] = bl;
            uint ecx = bl;
            ecx += ascii[idx];
            ecx &= 0xff;
            al = ascii[ecx];
            after[i] ^= al; //[ebp+esi-0x10c] ^= al;

It reads values from the ascii buffer, does some calculation to get another position and finally xors the previously read stuff from 0x401018.

0x00413a8f      xor dl, dl          
0x00413a91      mov byte [ebp-0x1], 0x0     
0x00413a95      xor esi, esi
0x00413a97      inc byte [ebp-0x1]          
0x00413a9a      movzx eax, byte [ebp-0x1]   
0x00413a9e      lea eax, [ebp+eax-0x20c]
0x00413aa5      add dl, [eax]
0x00413aa7      mov bl, [eax]
0x00413aa9      movzx ecx, dl
0x00413aac      lea ecx, [ebp+ecx-0x20c]
0x00413ab3      mov [ebp-0x2], bl
0x00413ab6      mov bl, [ecx]
0x00413ab8      mov [eax], bl
0x00413aba      mov bl, [ebp-0x2]
0x00413abd      mov [ecx], bl
0x00413abf      movzx eax, byte [eax]
0x00413ac2      movzx ecx, bl
0x00413ac5      add ecx, eax
0x00413ac7      and ecx, 0xff
0x00413acd      mov al, [ebp+ecx-0x20c]
0x00413ad4      xor [ebp+esi-0x10c], al
0x00413adb      inc esi
0x00413adc      mov eax, 0x100 ;  0x00000100 
0x00413ae1      cmp esi, eax
0x00413ae3      jl 0x413a97                                 ; jump if esi is lower then 0x100
  • Now we use the last buffer to get offsets where we read at for example 0x401018.
0x00413ae5      mov ebx, [ebp-0x8]
0x00413ae8      xor ecx, ecx
0x00413aea      mov esi, eax
0x00413aec      movzx edx, byte [ebp+ecx-0x20d]
0x00413af4      mov dl, [edx+ebx]
0x00413af7      dec esi
0x00413af8      mov [ebp+ecx-0xd], dl
0x00413afc      dec ecx
0x00413afd      test esi, esi
0x00413b00      ja 0x413aec
  • Finally we write the resulting buffer to the code section that starts here at 0x401018. We will do this in 0x100 blocks as long as we reach the 0x00413a18.
0x00413b01      mov edi, ebx
0x00413b03      push 0x40 
0x00413b05      add ebx, eax
0x00413b07      lea esi, [ebp-0x10c]
0x00413b0d      pop ecx
0x00413b0e      rep movsd
0x00413b10      mov [ebp-0x8], ebx
0x00413b13      cmp ebx, [ebp-0xc]                  
0x00413b16      jb 0x413a67
0x00413b1c      pop edi
0x00413b1d      pop esi
0x00413b1e      pop ebx
0x00413b1f      leave
0x00413b20      ret
  • Thats the final result to unpack the binary in vala:
using Radare;

uint8 buf[512];
uint32 target[5];

public void create_ascii(RCore core, uint8 *ascii) {
    uint i;
    for ( i = 0; i <= 255; i++) {
        ascii[i] = (uint8)i;

public void create_xor(RCore core) {
  uint v = 0;
  uint v_4 = 0;
    uint v_8 = 0;
    uint v_C = 0;
  uint64 eax = (uint64)0x401008;

  core.read_at(eax, buf, 4); 
  v = *(uint*)buf;
  v = v ^ 0x72ca4247;
    target[0] = v;

  core.read_at(eax + 0x4, buf, 4); 
  v_4 = *(uint*)buf;
  v_4 = v_4 ^ 0x42408e2e;
    target[1] = v_4;

  core.read_at(eax + 0x8, buf, 4); 
  v_8 = *(uint*)buf;
  v_8 = v_8 ^ (uint32)0xbb5eb388;
    target[2] = v_8;

  core.read_at(eax + 0xc, buf, 4); 
  v_C = *(uint*)buf;
  v_C = v_C ^ (uint32)0x9d76300a;
    target[3] = v_C;

public static void first_decrypt(RCore core) {
    uint8 buffer[512];
    uint64 ebp = (uint64)0x413a40;
    uint64 esi = (uint64)0x413bac;

    uint edx = 0;
    uint eax = 0;
    uint index = 0;

    while (esi >= ebp) {
        core.read_at(esi, buffer, 4);
        eax = *(uint*)buffer; 
        edx = eax ^ edx;
        eax = eax - edx;
        core.write_at(esi, (uint8*)(&eax), 4); 
        esi -= 4;

public void entry(RCore core) {
    uint position = 0x401018;
    uint end = 0x413a18;
    uint8 result[780];
    uint8* ascii = &result[256];
    create_ascii(core, ascii);

    int i;
    int idx1 = 0;
    int idx2 = 0;
    uint8 tmp = 0;
    uint8 *array2 = ((uint8*)target);

    do {
        uint8 al = ((uint8*)target)[idx2];
        al += ascii[idx1];
        uint8 dl = ascii[idx1];
        tmp += al;
        uint8 bl = ascii[tmp];
        ascii[idx1] = bl;
        ascii[tmp] = dl;
        idx2 &= 0xf;
    } while (idx1 <= 255);

    while (position < end) {
            uint8 al = 0xff;
            uint8 cl = 0;
            for (i = 255; i >= 0; i--) {
                uint8 ecx = ascii[i];
                result[ecx] = (uint8)i;
            uint* esi = (uint*)0x401018;
            uint* edi = &result[512];
            for (i = 0; i < 64; i++) {
                core.read_at((uint64)esi, buf, 4);
                edi[i] = *(uint*)buf;
            uint8* after = &result[512];
            uint8 dl = 0;
            uint8 idx = 0; // [ebp-1]
            tmp = 0; // [ebp-2]
            for (i = 0; i < 256; i++) {
                dl += ascii[idx];
                uint8 bl = ascii[idx];
                tmp = bl;
                bl = ascii[dl];
                ascii[idx] = bl;
                bl = tmp;
                ascii[dl] = bl;
                uint ecx = bl;
                ecx += ascii[idx];
                ecx &= 0xff;
                al = ascii[ecx];
                after[i] ^= al; //[ebp+esi-0x10c] ^= al;
            uint8* buffer = &result[512];
            for (i = 255; i >= 0; i--) {
                core.read_at((uint64)(result[i] + position), buf, 1); 
                dl = *(uint8*)buf;
                buffer[i] = dl;
            core.write_at((uint64)position, buffer, (64*4));
            position += 0x100;
  • Thats all for now, unpacking done i think. Finally real analysis what this file does will be followed in the next article.

Part 2