Shellcode: Data Masking 2

Introduction

This is a quick follow up post to Data Masking that discussed how one might use the Fisher-Yates shuffle and a DRBG to mask shellcode. There’s a lot of ways to mask data that don’t involve using an XOR operation but despite being relatively simple to implement are rarely used. You can use involutions, partial encryption, base encoding, simple arithmetic using addition and subtraction or modular variants. In the past, I’ve found components of block and stream ciphers to be a good source of techniques because the operations need to be invertible. In this post we’ll look at how easy it is to use byte substitution.

Substitution Box

Although substitution ciphers date back to ancient times, DES was first to use fixed s-box arrays for encrypting data. Since then, such non-linear operations have become a standard component of many block ciphers. To implement, we perform the following steps:

Initialize 256-byte array we call “s-box” using [0, 255]
Shuffle s-box using random seed.
Create inverse s-box.

This S-Box can then be used for masking data and the inverse can be used for unmasking. The reason we use a random seed to shuffle the s-box is so that the masked data is always different. Here’s a snippet of C code to demonstrate…

//
// simple byte substitution using fisher-yates shuffle and DRBG
//
typedef struct _mask_ctx {
    uint8_t sbox[256], sbox_inv[256];
} mask_ctx;

void
init_mask(mask_ctx *c) {
    uint8_t seed[ENCRYPT_KEY_LEN];
    
    // initialise sbox
    for (int i=0; i<256; i++) {
        c->sbox[i] = (uint8_t)i;
    }
    // initialise seed/key
    random(seed, ENCRYPT_KEY_LEN);

    // shuffle sbox using random seed.
    shuffle(seed, c->sbox, 256);

    // create inverse
    for (int i=0; i<256; i++) {
        c->sbox_inv[c->sbox[i]] = i;
    }
}

// mask buf
void
encode(mask_ctx *c, void *buf, size_t len) {
    uint8_t *x = (uint8_t*)buf;
    
    for (size_t i=0; i<len; i++) {
        x[i] = c->sbox[x[i]];
    }
}

// unmask buf
void
decode(mask_ctx *c, void *buf, size_t len) {
    uint8_t *x = (uint8_t*)buf;
    
    for (size_t i=0; i<len; i++) {
        x[i] = c->sbox_inv[x[i]];
    }
}

void
dump(const char *str, void *buf, size_t len) {
    uint8_t *x = (uint8_t*)buf;
    
    printf("\n%s:\n", str);
    
    for (size_t i=0; i<len; i++) {
        printf(" %02X", x[i]);
    }
}

int
main(int argc, char *argv[]) {
    mask_ctx c;
    uint8_t buf[32];
    
    // using random bytes here for testing..
    random(buf, sizeof(buf));
    
    init_mask(&c);
    dump("raw", buf, sizeof(buf));
    
    encode(&c, buf, sizeof(buf));
    dump("encoded", buf, sizeof(buf));
    
    decode(&c, buf, sizeof(buf));
    dump("decoded", buf, sizeof(buf));
    
    return 0;
}

And here’s the output of the program.

You can simplify the above example by just using the srand(), rand() functions and a modulo operator instead of a DRBG. See the full example here.

Since it’s a bit more complicated than it needs to be. What we can do is repurpose the key initialization of RC4 and derive an inverse lookup table from that. Here’s a little more code to demonstrate.

typedef struct _mask_ctx {
    uint8_t sbox[256];
    uint8_t key[16];
    uint8_t sbox_inv[256];
} mask_ctx;

// initialise using RC4
void
init_mask(mask_ctx *c) {
    // initialise sbox
    for (size_t i=0; i<256; i++) {
        c->sbox[i] = (uint8_t)i;
    }
    // shuffle sbox
    for (size_t i=0, j=0; i<256; i++) {
        j = (j + (c->sbox[i] + c->key[i % 16])) & 255;
        uint8_t t = c->sbox[i] & 255;
        c->sbox[i] = c->sbox[j];
        c->sbox[j] = t;
    }
    // create inverse
    for (size_t i=0; i<256; i++) {
        c->sbox_inv[c->sbox[i]] = i;
    }
}

// mask or unmask
void
mask(uint8_t *sbox, size_t len, void *buf) {
    uint8_t *in = (uint8_t*)buf;
    uint8_t *out = (uint8_t*)buf;
    
    for (size_t i=0; i<len; i++) {
        out[i] = sbox[in[i]];
    }
}

In this case, mask() performs both encoding and decoding depending on the sbox array passed to it. And just for fun the 32-bit assembly code…

;
; Simple obfuscation using byte substitution.
;
    
    bits 32
    
%ifndef BIN
    global _init_mask_x86
    global init_mask_x86
    
    global _mask_x86
    global mask_x86
%endif

    section .text
    
    ;
    ; void init_mask_x86(mask_ctx*c);
    ;
_init_mask_x86:
init_mask_x86:
    pushad
    mov    edi, [esp+32+4]
    push   edi
    pop    esi
    xor    eax, eax          ; i=0
initialise_sbox:
    stosb                    ; c->sbox[i]=i
    inc    al                ; i++
    jnz    initialise_sbox   ; i<256
    cdq                      ; j=0
shuffle_sbox:
    ; j = (j + (c->sbox[i] + c->key[i % 16])) & 255;
    movzx  ecx, al           ; t = i % 16
    and    cl, 15            ;
    
    add    dl, [edi+ecx]     ; j += c->key[i % 16]
    mov    cl, [esi+eax]     ; t = c->sbox[i]
    add    dl, cl            ; j += c->sbox[i]
    xchg   cl, [esi+edx]     ; swap(t, s[j])
    mov    [esi+eax], cl
    
    inc    al                ; i++
    jnz    shuffle_sbox      ; i<256
    add    edi, 16
create_inverse:
    mov    dl, [esi+eax]     ; sbox_inv[sbox[i]] = i
    mov    [edi+edx], al     ; 
    inc    al
    jnz    create_inverse
    
    popad
    ret
    
    ;
    ; void mask_x86(void *sbox, size_t inlen, void *inbuf);
    ;
mask_x86:
_mask_x86:
    pushad
    lea    esi, [esp+32+4]
    lodsd
    xchg   ebx, eax          ; bx = sbox
    lodsd
    xchg   ecx, eax          ; cx = inlen
    lodsd
    xchg   esi, eax          ; si = inbuf
    push   esi
    pop    edi
mask_loop:
    lodsb                    ; al = in[i]
    xlatb                    ; al = sbox[al]
    stosb                    ; out[i] = al
    loop   mask_loop
    popad
    ret

Summary

There’s lots of ways to obfuscate data. Most people will use an XOR but this is a safe indicator of obfuscation and a way to detect it. Using byte substitution only requires a byte-to-byte mapping and probably harder to detect.