Emulation of AESENC and AESENCLAST instructions in x86 assembly

Introduction

aesenc and aesenclast are AES-NI instructions impelemented on the x86 architecture.

Recently, a well known cryptographer J.P Aumasson published code to emulate these instructions in C, which would be very useful for emulators, and virtual machines in general.

The combination of ShiftRows and SubBytes in one line inspired me to write an implementation in x86 assembly.

The following code is not optimized for speed nor does it counter against electromagnetic attacks.

Galois Multiplication

Used for the mix columns and substitution layers. Based on algorithm by Andreas Hoheisel.

uint32_t gf_mul2 (uint32_t w) {
    uint32_t t = w & 0x80808080;
    
    return ( (w ^ t ) << 1) ^ ( ( t >> 7) * 0x0000001B);
}

The assembly …

Sub Bytes

Derived from code here

uint8_t sub_byte (uint8_t x)
{
    uint8_t i, y=x, sb;

    if (x) {
      // calculate logarithm gen 3
      for (i=1, y=1; i != 0; i++) {
        y ^= gf_mul2(y);
        if (y == x) break;
      }
      x = ~i;
      // calculate anti-logarithm gen 3
      for (i=0, y=1; i<x; i++) {
        y ^= gf_mul2(y);
      }
    }

    sb = y;
    
    for (i=0; i<4; i++) {
      y   = ROTL8(y, 1);
      sb ^= y;
    }
    return sb ^ 0x63;
}

The assembly code, but bear in mind, it has no countermeasures to side channel attacks.

; *****************************   
; uint8_t sub_byte (uint8_t x)
; *****************************       
sub_byte:
    pushad
    test   al, al            ; if (x)
    xchg   eax, edx          ; y = x    
    jz     sb_l2
    ; calculate logarithm gen 3
    mov    bh, 1             ; i = 1
    mov    bl, 1             ; y = 1
sb_l0:    
    mov    al, bl            ; y ^= gf_mul2(y)
    call   gf_mul2
    xor    bl, al       
    cmp    bl, dl            ; if (y == x) break;
    jz     sb_lx
    inc    bh                ; i++
    jnz    sb_l0             ; i != 0
sb_lx:        
    ; calculate anti-logarithm gen 3
    xor    bl, bl            ; i = 0
    mov    dl, 1             ; y = 1
    xor    bh, -1            ; x = ~i (bitwise NOT doesn't affect ZF)
    jz     sb_l2    
sb_l1:
    mov    al, dl            ; al = y
    call   gf_mul2
    xor    dl, al            ; y ^= gf_mul2(y)
    inc    bl                ; i++
    cmp    bl, bh            ; i < x 
    jnz    sb_l1             ; 
sb_l2:
    mov    al, dl            ; if before sb_l0, dl is already zero
    mov    cl, 4             ; loop 4 times
sb_l3:
    rol    dl, 1             ; y = ROTL8(y, 1);
    xor    al, dl            ; sb ^= y;
    loop   sb_l3 
    xor    al, 0x63          ; sb ^= 0x63
    mov    [esp+1ch], al    
    popad
    ret

AESENC / AESCLAST

These 2 are combined by using a parameter last
Simply set last to zero or one.

void aesenc (void *state, void *key, int last) {
    w128_t  *s, *k, v;
    uint32_t i, w;
    
    s=(w128_t*)state;
    k=(w128_t*)key;

    // sub bytes and shift rows
    for (i=0; i<16; i++) {    
      v.b[((((i >> 2) + 4 - (i & 3) ) & 3) * 4) + (i & 3)] = sub_byte(s->b[i]);
    }
    
    // if not last round
    if (!last) {
      // mix columns
      for (i=0; i<4; i++) {
        w = v.w[i];
        v.w[i] = ROTR32(w,  8) ^ 
                 ROTR32(w, 16) ^ 
                 ROTL32(w,  8) ^ 
                 XT(ROTR32(w, 8) ^ w);
      }
    }
    // add round key
    for (i=0; i<4; i++) {
      s->w[i] = v.w[i] ^ k->w[i];
    }
}

The x86 assembly code..

; **********************************   
; uint8_t aesenc (void *s, void *rk)
; **********************************       
_aesencx:
    pushad
    xor    ecx, ecx          ; i = 0     
    lea    esi, [esp+32+4]
    lodsd
    push   eax               ; save state
    lodsd                    
    xchg   eax, ebx          ; ebx = round key 
    lodsd                    ; eax = last
    pop    esi               ; esi = state   
    pushad                   ; v = alloc(32)
    mov    edi, esp          ; edi = v
    dec    eax               ; last--
    pushfd
subbytes_shiftrows:    
    mov    al, [esi+ecx]     ; al = sub_byte(s[i])
    call   sub_byte
    mov    edx, ecx          ; edx = i
    mov    ebp, ecx          ; ebp = i
    shr    ebp, 2            ; ebp >>= 2
    and    edx, 3            ; edx &= 3
    sub    ebp, edx          ; ebp -= edx
    and    ebp, 3            ; ebp &= 3
    lea    edx, [edx+ebp*4]  ; edx = (edx + ebp * 4) 
    mov    [edi+edx], al     ; v.b[edx] = al
    inc    ecx               ; i++
    cmp    cl, 16            ; for (i=0; i<16; i++)
    jnz    subbytes_shiftrows
    popfd
    jz     add_round_key    
    pushad
    mov    cl, 4    
mix_columns:
    mov    ebx, [edi]        ; w0 = v.w[i]
    mov    eax, ebx          ; w1 = ROTR32(w0, 8)
    ror    eax, 8
    mov    esi, eax          ; w2 = ROTR32(w0, 8)
    xor    eax, ebx          ; w1 ^= w0 
    call   gf_mul2
    xor    esi, eax          ; w2 ^= gf_mul2(w1)
    ror    ebx, 16           ; w0 = ROTR32(w0, 16)
    xor    esi, ebx          ; w2 ^= w0
    ror    ebx, 8            ; w0 = ROTR32(w0, 8)
    xor    ebx, esi          ; w0 ^= w2
    xchg   ebx, eax          ; eax = w0
    stosd                    ; v.w[i] = eax
    loop   mix_columns
    popad
add_round_key:               ; for (i=0; i<16; i++) {
    mov    al, [edi]         ;   al = v.b[i] 
    xor    al, [ebx]         ;   al ^= rk[i]
    inc    ebx               ;   
    mov    [esi], al         ;   s[i] = al
    cmpsb                    ;   
    loop   add_round_key     ; }
    popad                    ; release memory
    popad                    ; restore registers
    ret

The size of assembly code is 195 bytes. Approximately 300 for C generated assembly.

See original code by Aumasson here

Advertisements
This entry was posted in assembly, cryptography, encryption, security and tagged , , , , , , , . Bookmark the permalink.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s