xorl %eax, %eax

GRKERNSEC_PROC_MEMMAP Remove Addresses From /proc//[smaps|maps|stat]

leave a comment »

This is another nice feature but it depends on some PaX protections (you’ll see later in this post) that I haven’t discussed yet. Nevertheless, here is its description…

config GRKERNSEC_PROC_MEMMAP
	bool "Remove addresses from /proc/<pid>/[smaps|maps|stat]"
	default y if (PAX_NOEXEC || PAX_ASLR)
	depends on PAX_NOEXEC || PAX_ASLR
	help
	  If you say Y here, the /proc/<pid>/maps and /proc/<pid>/stat files will
	  give no information about the addresses of its mappings if
	  PaX features that rely on random addresses are enabled on the task.
	  If you use PaX it is greatly recommended that you say Y here as it
	  closes up a hole that makes the full ASLR useless for suid
	  binaries.

The patching begins with kernel/futex.c and more specifically with the following system call…

/**
 * sys_get_robust_list() - Get the robust-futex list head of a task
 * @pid:        pid of the process [zero for current task]
 * @head_ptr:   pointer to a list-head pointer, the kernel fills it in
 * @len_ptr:    pointer to a length field, the kernel fills in the header size
 */
SYSCALL_DEFINE3(get_robust_list, int, pid,
                struct robust_list_head __user * __user *, head_ptr,
                size_t __user *, len_ptr)
{
        struct robust_list_head __user *head;
        unsigned long ret;
#ifndef CONFIG_GRKERNSEC_PROC_MEMMAP
 	const struct cred *cred = current_cred(), *pcred;
#endif

        if (!futex_cmpxchg_enabled)
                return -ENOSYS;
    ...
                ret = -EPERM;
#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP
		if (!ptrace_may_access(p, PTRACE_MODE_READ))
			goto err_unlock;
#else
                pcred = __task_cred(p);
                if (cred->euid != pcred->euid &&
                    cred->euid != pcred->uid &&
                    !capable(CAP_SYS_PTRACE))
                        goto err_unlock;
#endif
                head = p->robust_list;
                rcu_read_unlock();
        }
    ...
err_unlock:
        rcu_read_unlock();

        return ret;
}

If ‘PROC_MEMMAP’ is enabled here it will check using ptrace_may_access() that the task has the same credentials as the current process and this is done for reading operation. If it fails it will jump to ‘err_unlock’ label and if grsecurity’s protection is disabled it will execute Linux kernel’s code. The next one resides at kernel/futex_compat.c and it’s quite similar to the above one…

asmlinkage long
compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
                           compat_size_t __user *len_ptr)
{
        struct compat_robust_list_head __user *head;
        unsigned long ret;
#ifndef CONFIG_GRKERNSEC_PROC_MEMMAP
	const struct cred *cred = current_cred();
	const struct cred *pcred;
#endif

        if (!futex_cmpxchg_enabled)
                return -ENOSYS;
    ...
                p = find_task_by_vpid(pid);
                if (!p)
                        goto err_unlock;
                ret = -EPERM;
#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP
		if (!ptrace_may_access(p, PTRACE_MODE_READ))
			goto err_unlock;
#else
                pcred = __task_cred(p);
                if (cred->euid != pcred->euid &&
                    cred->euid != pcred->uid &&
                    !capable(CAP_SYS_PTRACE))
                        goto err_unlock;
#endif
                head = p->compat_robust_list;
    ...
err_unlock:
        rcu_read_unlock();

        return ret;
}

Another system call is patched at mm/mempolicy.c file.

SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
                const unsigned long __user *, old_nodes,
                const unsigned long __user *, new_nodes)
{
        const struct cred *cred = current_cred(), *tcred;
        struct mm_struct *mm = NULL;
        struct task_struct *task;
        nodemask_t task_nodes;
        int err;
        nodemask_t *old;
        nodemask_t *new;
        NODEMASK_SCRATCH(scratch);
    ...
        mm = get_task_mm(task);
        read_unlock(&tasklist_lock);

        err = -EINVAL;
        if (!mm)
                goto out;

#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP
	if (mm != current->mm &&
	    (mm->pax_flags & MF_PAX_RANDMMAP || mm->pax_flags & MF_PAX_SEGMEXEC)) {
		err = -EPERM;
		goto out;
	}
#endif

        /*
         * Check if this process has the right to modify the specified
         * process. The right exists if the process has administrative
         * capabilities, superuser privileges or the same
         * userid as the target process.
         */
        rcu_read_lock();
        tcred = __task_cred(task);
        if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
	    cred->uid  != tcred->suid && !capable(CAP_SYS_NICE)) {
                rcu_read_unlock();
                err = -EPERM;
                goto out;
        }
    ...
out:
        if (mm)
                mmput(mm);
        NODEMASK_SCRATCH_FREE(scratch);

        return err;
}

This system call is used to move all pages in a process to another set of nodes as we can read in its man page. The above patch will check that the task’s memory management structure is the same as the current process’ as well as that the latter structure doesn’t have ‘MF_PAX_RANDMMAP’ or ‘MF_PAX_SEGMEXEC’ flags set. If however, fails during any of these it will return with ‘-EPERM’ (Permission Denied) and jump to ‘out’ label. The two flags are part of PaX protection for randomizing mmap(2)’s address space and providing non-executable pages on IA-32 architecture using segmentation logic.
Similar patching is performed to move_pages(2) system call that can be found at mm/migrate.c…

/*
 * Move a list of pages in the address space of the currently executing
 * process.
 */
SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
                const void __user * __user *, pages,
                const int __user *, nodes,
                int __user *, status, int, flags)
{
        const struct cred *cred = current_cred(), *tcred;
        struct task_struct *task;
        struct mm_struct *mm;
        int err;
    ...
        mm = get_task_mm(task);
        read_unlock(&tasklist_lock);

        if (!mm)
                return -EINVAL;

#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP
	if (mm != current->mm &&
	    (mm->pax_flags & MF_PAX_RANDMMAP || mm->pax_flags & MF_PAX_SEGMEXEC)) {
		err = -EPERM;
		goto out;
	}
#endif

        /*
         * Check if this process has the right to modify the specified
         * process. The right exists if the process has administrative
         * capabilities, superuser privileges or the same
         * userid as the target process.
         */
        rcu_read_lock();
        tcred = __task_cred(task);
        if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
            cred->uid  != tcred->suid && !capable(CAP_SYS_NICE)) {
                rcu_read_unlock();
                err = -EPERM;
                goto out;
        }
    ...
out:
        mmput(mm);
        return err;
}

The aim of this patch in both of the above system calls is to ensure that users cannot move pages that are either randomized or non-executable using PaX’s implementations. The next addition is in the keyboard driver located at drivers/char/keyboard.c.

#define FN_HANDLERS\
        fn_null,        fn_enter,       fn_show_ptregs, fn_show_mem,\
        fn_show_state,  fn_send_intr,   fn_lastcons,    fn_caps_toggle,\
        fn_num,         fn_hold,        fn_scroll_forw, fn_scroll_back,\
        fn_boot_it,     fn_caps_on,     fn_compose,     fn_SAK,\
        fn_dec_console, fn_inc_console, fn_spawn_con,   fn_bare_num

typedef void (fn_handler_fn)(struct vc_data *vc);
static fn_handler_fn FN_HANDLERS;
static fn_handler_fn *fn_handler[] = { FN_HANDLERS };
    ...
static void k_spec(struct vc_data *vc, unsigned char value, char up_flag)
{
        if (up_flag)
                return;
        if (value >= ARRAY_SIZE(fn_handler))
                return;
        if ((kbd->kbdmode == VC_RAW ||
             kbd->kbdmode == VC_MEDIUMRAW) &&
             value != KVAL(K_SAK))
                return;         /* SAK is allowed even in raw mode */

#if defined(CONFIG_GRKERNSEC_PROC) || defined(CONFIG_GRKERNSEC_PROC_MEMMAP)
	{
		void *func = fn_handler[value];
		if (func == fn_show_state || func == fn_show_ptregs ||
		    func == fn_show_mem)
			return;
	}
#endif

        fn_handler[value](vc);
}

This routine is a special keyboard handler and as you can read if either ‘PROC_MEMMAP’ or ‘PROC’ of grsecurity are enabled it will add a check on the FN handler. If the function requested is either fn_show_state() (show task’s state information), fn_show_ptregs() (show task’s registers) or fn_show_mem() (show memory information) it will immediately return without executing the requested handler. This is done because all of them leak important kernel information.
Our next stop is the fs/proc/array.c file.

#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP
#define PAX_RAND_FLAGS(_mm) (_mm != NULL && _mm != current->mm && \
			     (_mm->pax_flags & MF_PAX_RANDMMAP || \
			      _mm->pax_flags & MF_PAX_SEGMEXEC))
#endif

This is a definition of a C macro that performs the exact check as the one we saw earlier on migrate_pages(2) and move_pages(2) system calls. A few lines later this is used like this:

static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                        struct pid *pid, struct task_struct *task, int whole)
{
        unsigned long vsize, eip, esp, wchan = ~0UL;
        long priority, nice;
        int tty_pgrp = -1, tty_nr = 0;
    ...
#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP
	if (PAX_RAND_FLAGS(mm)) {
		eip = 0;
		esp = 0;
		wchan = 0;
	}
#endif
    ...
        seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
                pid_nr_ns(pid, ns),
                tcomm,
                state,
                ppid,
                pgid,
                sid,
                tty_nr,
                tty_pgrp,
                task->flags,
                min_flt,
                cmin_flt,
                maj_flt,
                cmaj_flt,
                cputime_to_clock_t(utime),
                cputime_to_clock_t(stime),
                cputime_to_clock_t(cutime),
                cputime_to_clock_t(cstime),
                priority,
                nice,
                num_threads,
                start_time,
                vsize,
                mm ? get_mm_rss(mm) : 0,
                rsslim,
#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP
		PAX_RAND_FLAGS(mm) ? 1 : (mm ? mm->start_code : 0),
		PAX_RAND_FLAGS(mm) ? 1 : (mm ? mm->end_code : 0),
		PAX_RAND_FLAGS(mm) ? 0 : ((permitted && mm) ? mm->start_stack : 0),
#else
                mm ? mm->start_code : 0,
                mm ? mm->end_code : 0,
                (permitted && mm) ? mm->start_stack : 0,
#endif
                esp,
                eip,
                /* The signal information here is obsolete.
                 * It must be decimal for Linux 2.0 compatibility.
                 * Use /proc/#/status for real-time signals.
                 */
                task->pending.signal.sig[0] & 0x7fffffffUL,
                task->blocked.sig[0] & 0x7fffffffUL,
                sigign      .sig[0] & 0x7fffffffUL,
                sigcatch    .sig[0] & 0x7fffffffUL,
                wchan,
                0UL,
                0UL,
                task->exit_signal,
                task_cpu(task),
                task->rt_priority,
                task->policy,
                (unsigned long long)delayacct_blkio_ticks(task),
                cputime_to_clock_t(gtime),
                cputime_to_clock_t(cgtime));
        if (mm)
                mmput(mm);
        return 0;
}

To avoid EIP, ESP and WCHAN leaks in case of PaX randomization being enabled, it will set these three variables to zero before printing them using seq_printf(). Also, if randomization is enabled it will not print the beginning and ending address of code segment and beginning of the stack segment.
Moving next to fs/proc/base.c file we have the definition of an identical C macro…

#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP
#define PAX_RAND_FLAGS(_mm) (_mm != NULL && _mm != current->mm && \
			     (_mm->pax_flags & MF_PAX_RANDMMAP || \
			      _mm->pax_flags & MF_PAX_SEGMEXEC))
#endif

as well as this patched version of proc_pid_auxv() routine.

static int proc_pid_auxv(struct task_struct *task, char *buffer)
{
        int res = 0;
        struct mm_struct *mm = get_task_mm(task);
        if (mm) {
                unsigned int nwords = 0;

#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP
		/* allow if we're currently ptracing this task */
		if (PAX_RAND_FLAGS(mm) &&
		    (!(task->ptrace & PT_PTRACED) || (task->parent != current))) {
			mmput(mm);
			return res;
		}
#endif

                do {
                        nwords += 2;
                } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
                res = nwords * sizeof(mm->saved_auxv[0]);
                if (res > PAGE_SIZE)
                        res = PAGE_SIZE;
                memcpy(buffer, mm->saved_auxv, res);
                mmput(mm);
        }
        return res;
}

This operation will be allowed only if the system doesn’t have PaX randomization, there is no ptrace(2) on the task or parent task is different than the current one. If this is the case, it will not print anything to the ‘auxv’ /proc file. As the comment says, this means that it’s allowed on non-randomized systems when using ptrace(2).
Also, a few lines below we have:

##if defined(CONFIG_HAVE_ARCH_TRACEHOOK) && !defined(CONFIG_GRKERNSEC_PROC_MEMMAP)
static int proc_pid_syscall(struct task_struct *task, char *buffer)
{
    ...
        return sprintf(buffer,
                       "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
                       nr,
                       args[0], args[1], args[2], args[3], args[4], args[5],
                       sp, pc);
}
#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */

Which will be disabled by ‘PROC_MEMMAP’. This is a tracing function that prints the system call’s number along with its arguments and both stack and program pointers. Obviously, this is also removed from the thread groups’ file…

/*
 * Thread groups
 */
static const struct file_operations proc_task_operations;
static const struct inode_operations proc_task_inode_operations;

static const struct pid_entry tgid_base_stuff[] = {
    ...
#if defined(CONFIG_HAVE_ARCH_TRACEHOOK) && !defined(CONFIG_GRKERNSEC_PROC_MEMMAP)
        INF("syscall",    S_IRUSR, proc_pid_syscall),
#endif
    ...
};

using the same pre-processor conditions. And from the tasks’ entries respectively.

/*
 * Tasks
 */
static const struct pid_entry tid_base_stuff[] = {
    ...
#if defined(CONFIG_HAVE_ARCH_TRACEHOOK) && !defined(CONFIG_GRKERNSEC_PROC_MEMMAP)
        INF("syscall",   S_IRUSR, proc_pid_syscall),
#endif
};

Next, there is a VMA printing routine being patched at fs/proc/task_mmu.c like this:

static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
{
        struct mm_struct *mm = vma->vm_mm;
        struct file *file = vma->vm_file;
        int flags = vma->vm_flags;
        unsigned long ino = 0;
        unsigned long long pgoff = 0;
        unsigned long start;
        dev_t dev = 0;
        int len;

        if (file) {
                struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
                dev = inode->i_sb->s_dev;
                ino = inode->i_ino;
                pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
        }

        seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
                        start,
#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP
			PAX_RAND_FLAGS(mm) ? 0UL : vma->vm_start,
			PAX_RAND_FLAGS(mm) ? 0UL : vma->vm_end,
#else
			vma->vm_start,
			vma->vm_end,
#endif
                        flags & VM_READ ? 'r' : '-',
                        flags & VM_WRITE ? 'w' : '-',
                        flags & VM_EXEC ? 'x' : '-',
                        flags & VM_MAYSHARE ? 's' : 'p',
#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP
			PAX_RAND_FLAGS(mm) ? 0UL : pgoff,
#else
 			pgoff,
#endif
                        MAJOR(dev), MINOR(dev), ino, &len);
    ...
        seq_putc(m, '\n');
}

First of all, the Linux’s stack guard page check was removed and then you can see some checks added in the VMA printing routine above. If the memory space is randomized it won’t print it to avoid information leaks that could help a potential attacker.
Similar patch is applied to show_smap() routine which you can see here:

static int show_smap(struct seq_file *m, void *v)
{
        struct proc_maps_private *priv = m->private;
        struct task_struct *task = priv->task;
        struct vm_area_struct *vma = v;
        struct mem_size_stats mss;
        struct mm_walk smaps_walk = {
                .pmd_entry = smaps_pte_range,
                .mm = vma->vm_mm,
                .private = &mss,
        };

        memset(&mss, 0, sizeof mss);
        mss.vma = vma;

#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP
	if (!PAX_RAND_FLAGS(vma->vm_mm)) {
#endif
		mss.vma = vma;
		/* mmap_sem is held in m_start */
		if (vma->vm_mm && !is_vm_hugetlb_page(vma))
			walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP
	}
#endif
        show_map_vma(m, vma);

        seq_printf(m,
                   "Size:           %8lu kB\n"
                   "Rss:            %8lu kB\n"
                   "Pss:            %8lu kB\n"
                   "Shared_Clean:   %8lu kB\n"
                   "Shared_Dirty:   %8lu kB\n"
                   "Private_Clean:  %8lu kB\n"
                   "Private_Dirty:  %8lu kB\n"
                   "Referenced:     %8lu kB\n"
                   "Swap:           %8lu kB\n"
                   "KernelPageSize: %8lu kB\n"
                   "MMUPageSize:    %8lu kB\n",
#ifdef CONFIG_GRKERNSEC_PROC_MEMMAP
		   PAX_RAND_FLAGS(vma->vm_mm) ? 0UL : (vma->vm_end - vma->vm_start) >> 10,
#else
 		   (vma->vm_end - vma->vm_start) >> 10,
#endif
                   mss.resident >> 10,
                   (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
                   mss.shared_clean  >> 10,
                   mss.shared_dirty  >> 10,
                   mss.private_clean >> 10,
                   mss.private_dirty >> 10,
                   mss.referenced >> 10,
                   mss.swap >> 10,
                   vma_kernel_pagesize(vma) >> 10,
                   vma_mmu_pagesize(vma) >> 10);

        if (m->count < m->size)  /* vma is copied successfully */
                m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
        return 0;
}

If PaX randomization is enabled it will do nothing before printing the information. However, if it’s disabled it will perform the Linux kernel’s behavior of checking if the mmap semaphore is held in the starting address of the VMA. In addition to this, it will not print the size of the VMA if it’s randomized.

Written by xorl

November 20, 2010 at 22:09

Posted in grsecurity, linux, security

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s