BUG: cond vars badly broken

Ignacio García Pérez iggarpe at terra.es
Fri Oct 29 10:46:01 CEST 2004


Hi,

I've found what I think is a very nasty bug that results in internal
corruption of RTAI structures. I attach a program that demonstrates the
problem and a short explanation.

I am now knowledgeable enough in the RTAI internals as to fix this myself,
and I think this is a very serious bug and should be addressed as soon as
possible. We are not talking about some obscure and rearely used feature,
but about cond vars.

To summarize, suppose you have two tasks H and M. Task H has higher priority
than task M. You need also a mutex and a cond var:

Task M:

1- Lock the mutex
2- Start task H
3- Call rt_cond_wait
4- Unlock the mutex

Task H:

1- Lock the mutex
2- Unlock the mutex

And that's it. When task M starts task H, task H is immediately scheduled
because it has higher priority. As soon as it starts it becomes blocked on
the mutex (which belongs to task M). Task M is resumed by the scheduler, and
calls rt_cond_wait. This call should unlock the mutex and thus allow task H
to continue, however, this does not happen. Both tasks become blocked, and
if, from a third task, you have a look at the wait queue of the cond var,
you find a nice mess.

Have a look at this:

>>> task H: c885c820
>>> task M: c885cc00
>>> task L: c885cfe0
>>> mutex: c885d3c0
>>> cond:  c885d3e0

01 - 864619226540 task M: begin (c885cc00)
02 - 864619266709 task M: locking mutex
03 - 864619309872 SEM c885d3e0: type BIN_SEM(-1) count 0 qtype 0 owner
00000000 current c885cc00
04 - 864619361260     queue
05 - 864619396623 SEM c885d3c0: type RES_SEM(1) count 1 qtype 0 owner
00000000 current c885cc00
06 - 864619444021     queue
07 - 864619465656 task M: mutex locked, starting task H
08 - 864619511422 SEM c885d3e0: type BIN_SEM(-1) count 0 qtype 0 owner
00000000 current c885cc00
09 - 864619559301     queue
10 - 864619597248 SEM c885d3c0: type RES_SEM(1) count 0 qtype 0 owner
c885cc00 current c885cc00
11 - 864619644922     queue
12 - 864619668830 task H: begin (c885c820)
13 - 864619704170 task H: locking mutex
14 - 864619744536 SEM c885d3e0: type BIN_SEM(-1) count 0 qtype 0 owner
00000000 current c885c820
15 - 864619794781     queue
16 - 864619831832 SEM c885d3c0: type RES_SEM(1) count 0 qtype 0 owner
c885cc00 current c885c820
17 - 864619879520     queue
18 - 864619906756 task M: task H started, it should be now blocked on the
mutex
19 - 864619966612 SEM c885d3e0: type BIN_SEM(-1) count 0 qtype 0 owner
00000000 current c885cc00
20 - 864620016646     queue
21 - 864620053944 SEM c885d3c0: type RES_SEM(1) count -1 qtype 0 owner
c885cc00 current c885cc00
22 - 864620109783     queue c885c874:c885c820
23 - 864620136070 task M: calling rt_cond_wait
24 - 864620186184 task L: begin (c885cfe0)
25 - 865620252018 task L: now task M should have called rt_cond_wait, say
hello to mr. mess
26 - 865620351565 SEM c885d3e0: type BIN_SEM(-1) count 0 qtype 0 owner
00000000 current c885cfe0
27 - 865620447032     queue c885cc54:c885cc00 c885d3c0:00000000
c885cc54:c885cc00 c885d3c0:00000000 c885cc54:c885cc00 c885d3c0:00000000
c885cc54:c885cc00 ...
28 - 865620530899 SEM c885d3c0: type RES_SEM(1) count -1 qtype 0 owner
00000000 current c885cfe0
29 - 865620582363     queue c885cc54:c885cc00

In lines 1-11 task M locks the mutex and starts task H. As task H has higher
priority than task M, it is immediately scheduled, and we see it running in
lines 11-17: it tries to lock the mutex and beomes blocked, which causes
task M to resume (lines 18-23). Task M then calls rt_cond_wait. Since
rt_cond_wait, among other things, unlocks the mutex, we should see task H
resuming, but we don't. Lines 24-29 are printed by an auxiliary task (task
L) that just prints the cond and mutex status and wait queues. And here is
where we see the mess:

The first element in the cond var queue (line 27) is 0xc8855cc00 = task M.
The second element seems to be NULL. However, closer inspection reveals that
the queue node is indeed the mutex (!!!), and what we see next is the mutex
wait queue, repeated again and again and again, since the cond var wait
queue end is never found. A normal wait queue looks like this:

semaphore -> node1:task1 -> node2:task2 -> node3:task3 -> semaphore

That is, the queue end is detected because it points to the semaphore object
itself. In the mess I described, what we have is:

semaphore(cond) -> node1:task H -> semaphore(mutex) -> node2:task M ->
semaphore(mutex)

The last three elements are the normal mutex wait queue.

How does the mutex end up into the cond var wait queue?



Here is the program: the first two routines are there just to print the
internal RTAI structures and display the mess. Three tasks are used, task H,
task M and task L. The last one is there just to display the mess, since
when it happens the other two are blocked as described earlier.

#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/io.h>

#include <rtai.h>
#include <rtai_schedcore.h>
#include <rtai_sched.h>
#include <rtai_sem.h>
#include <rtai_fifos.h>

MODULE_LICENSE("GPL");
EXPORT_NO_SYMBOLS;

//======================================================================
//
//	Auxiliary functions to print debug information
//

static void _printk (const char *format, ...) {
	char buf [256], *ptr = buf;
	va_list v;
	va_start(v, format);
	ptr += sprintf(ptr, "%llu ", count2nano(rt_get_time()));
	ptr += vsprintf(ptr, format, v);
	va_end(v);
	rt_printk("%s", buf);
}

static void _sem_print (SEM *sem) {
	char buf [256], *ptr = buf;
	const char *type = "CNT_SEM";
	struct rt_queue *q; int i;
	RT_TASK *task = RT_CURRENT;

	if (sem->type > 0) type = "RES_SEM"; else if (sem->type < 0) type =
"BIN_SEM";

	ptr += sprintf(ptr, "SEM %p", sem);

	ptr += sprintf(ptr, ": type %s(%i) count %i qtype %i owner %p", type,
sem->type, sem->count, sem->qtype, sem->owndby);
	if (sem->owndby) {
		if (sem->owndby->magic != RT_TASK_MAGIC) ptr += sprintf(ptr, "(BAD
MAGIC)");
	}

	if (task != NULL) {
		ptr += sprintf(ptr, " current %p", task);
	}

	ptr += sprintf(ptr, "\n");
	_printk("%s", ptr = buf);

	ptr += sprintf(ptr, "    queue");
	for (i = 0, q = sem->queue.next; q != &sem->queue; q = q->next) {
		if (++i >= 8) { ptr += sprintf(ptr, " ..."); break; }
		ptr += sprintf(ptr, " %p:%p", q, q->task);
		if (q->task) {
			if (q->task->magic != RT_TASK_MAGIC) ptr += sprintf(ptr, "(MAGIC)");
		}
	}

	ptr += sprintf(ptr, "\n");
	_printk("%s", ptr = buf);
}

static RT_TASK _task_H;
static RT_TASK _task_M;
static RT_TASK _task_L;

static pmutex_t _mutex;
static CND _cond;

//======================================================================
//
//	This is the high priority task.
//

static void _task_H_proc(int arg) {

	_printk("task H: begin (%p)\n", &_task_H);

	_printk("task H: locking mutex\n");
	_sem_print(&_cond);
	_sem_print(&_mutex);

	rt_mutex_lock(&_mutex);

	_printk("task H: mutex locked\n");
	_sem_print(&_cond);
	_sem_print(&_mutex);

	// It reaches here, so no more debug messages
	rt_mutex_unlock(&_mutex);
}

//======================================================================
//
//	This is the medium priority task. Waits for the condition
//	to be signaled, and then does some processing.
//

static void _task_M_proc (int arg) {

	_printk("task M: begin (%p)\n", &_task_M);

	_printk("task M: locking mutex\n");
	_sem_print(&_cond);
	_sem_print(&_mutex);

	rt_mutex_lock(&_mutex);

	_printk("task M: mutex locked, starting task H\n");
	_sem_print(&_cond);
	_sem_print(&_mutex);

	rt_task_resume(&_task_H);

	_printk("task M: task H started, it should be now blocked on the mutex\n");
	_sem_print(&_cond);
	_sem_print(&_mutex);

	_printk("task M: calling rt_cond_wait\n");
	rt_cond_wait(&_cond, &_mutex);

	// It never reaches here, so no more debug messages...
	rt_mutex_unlock(&_mutex);
}

//======================================================================
//
//	This is the low priority task. It just waits for the mess to happen
//	and then shows it.
//

static void _task_L_proc (int arg) {

	_printk("task L: begin (%p)\n", &_task_L);

	// Give some time for the mess to happen
	rt_sleep(nano2count(1000000000LL));

	// Show the mess
	_printk("task L: now task M should have called rt_cond_wait, say hello to
mr. mess\n");
	_sem_print(&_cond);
	_sem_print(&_mutex);
}

//======================================================================
//======================================================================
//======================================================================
//======================================================================
//
//	Module init/cleanup.
//

int init_module(void) {

	// One shot mode !!!
	rt_set_oneshot_mode();
	rt_preempt_always(1);
	start_rt_timer(0);

    printk("task H: %p\n", &_task_H);
    printk("task M: %p\n", &_task_M);
    printk("task L: %p\n", &_task_L);

    printk("mutex: %p\n", &_mutex);
    printk("cond:  %p\n", &_cond);

	rt_mutex_init(&_mutex);
	rt_cond_init(&_cond);

	rt_task_init(&_task_H, &_task_H_proc, 0, 0x4000, 1, 0, NULL);
	rt_task_init(&_task_M, &_task_M_proc, 0, 0x4000, 2, 0, NULL);
	rt_task_init(&_task_L, &_task_L_proc, 0, 0x4000, 3, 0, NULL);

	rt_task_resume(&_task_M);
	rt_task_resume(&_task_L);

	return 0;
}

void cleanup_module(void) {

	stop_rt_timer();
	rt_task_delete(&_task_L);
	rt_task_delete(&_task_M);
	rt_task_delete(&_task_H);

	rt_cond_delete(&_cond);
	rt_mutex_delete(&_mutex);
}

//======================================================================





---
Outgoing mail is certified Virus Free.
Checked by AVG anti-virus system (http://www.grisoft.com).
Version: 6.0.784 / Virus Database: 530 - Release Date: 10/27/2004





More information about the Rtai mailing list