[PATCH] LTT for 2.5.41: Core infrastructure 3/3

* [PATCH] LTT for 2.5.41: Core infrastructure 3/3
@ 2002-10-10  6:05 Karim Yaghmour
  0 siblings, 0 replies; only message in thread
From: Karim Yaghmour @ 2002-10-10  6:05 UTC (permalink / raw)
  To: linux-kernel, LTT-Dev

This is the second piece of kernel/trace.c, just cat this to the
earlier piece. Sorry, I'd avoid this if I could.

+/**
+ *	init_buffer_control: - Init buffer control struct for new tracing run.
+ *	@buf_ctrl: buffer control struct to be initialized
+ *	@use_lockless: which tracing scheme to use, TRUE for lockless
+ *	@buffer_number_bits: number of bits in index word for buffer number
+ *	@offset_bits: number of bits in index word to use for buffer offset
+ *
+ *	Sanity of param values should be checked by caller. i.e. bufno_bits and
+ *	offset_bits must reflect sane buffer sizes/numbers.
+ */
+static void init_buffer_control(struct buffer_control * buf_ctrl,
+				int use_lockless,
+				u8 buffer_number_bits,
+				u8 offset_bits)
+{
+	unsigned i, j;
+	int n_buffers = TRACE_MAX_BUFFER_NUMBER(buffer_number_bits);
+	
+	using_lockless = use_lockless;
+	buffer_switches_pending = 0;
+	
+	for(i = 0; i < num_cpus; i++) {
+		_buffer_id(buf_ctrl, i) = 0;
+		_events_lost(buf_ctrl, i) = 0;
+
+		/* Set things up to trigger per-cpu initialization */ 
+		atomic_set(&_waiting_for_cpu(buf_ctrl, i), 
+			   LTT_INITIALIZE_TRACE);
+		_trace_buffer(buf_ctrl, i) = trace_buf + (i * cpu_buf_size);
+		if(using_lockless == 0) {
+			atomic_set(&_signal_sent(buf_ctrl, i), 0);
+			write_buf(i) = trace_buffer(i);
+			read_buf(i) = trace_buffer(i) + buf_size;
+			write_buf_end(i) = write_buf(i) + buf_size;
+			read_buf_end(i) = read_buf(i) + buf_size;
+			current_write_pos(i) = write_buf(i);
+			read_limit(i) = read_buf(i);
+			write_limit(i) = write_buf_end(i)
+				- TRACER_LAST_EVENT_SIZE;
+		} else {
+			_index(buf_ctrl, i) = start_reserve;
+			_bufno_bits(buf_ctrl, i) = buffer_number_bits;
+			_n_buffers(buf_ctrl, i) = 
+				TRACE_MAX_BUFFER_NUMBER(buffer_number_bits);
+			_offset_bits(buf_ctrl, i) = offset_bits;
+			_offset_mask(buf_ctrl, i) =  
+				TRACE_BUFFER_OFFSET_MASK(offset_bits);
+			_index_mask(buf_ctrl, i) =  
+				(1UL << (buffer_number_bits + offset_bits)) - 1;
+			_buffers_produced(buf_ctrl, i) = 0;
+			_buffers_consumed(buf_ctrl, i) = 0;
+			_buffers_full(buf_ctrl, i) = 0;
+
+			/* When a new buffer is switched to, TRACE_BUFFER_SIZE
+			   is subtracted from its fill_count in order to 
+			   initialize it to the empty state.  The reason it's 
+			   done this way is because an intervening event may 
+			   have already been written to the buffer while we 
+			   were in the process of switching and thus blindly 
+			   initializing to 0 would erase that event.  The first
+			   buffer is initialized to 0 and the others are 
+			   initialized to TRACE_BUFFER_SIZE because the very 
+			   first buffer we ever see won't be initialized in 
+			   that way by the switching code and since there's 
+			   never been an event, we know it should be 0 and that
+			   it must be explicitly initialized that way before 
+			   logging begins.  sStartReserve is is factored into 
+			   the end-of-buffer processing, so isn't added to the
+			   fill counts here, except for the first. */
+			atomic_set(&_fill_count(buf_ctrl, i, 0), 
+				   (int)start_reserve);
+			for(j = 1; j < n_buffers; j++)
+				atomic_set(&_fill_count(buf_ctrl, i, j),
+				   (int)TRACE_BUFFER_SIZE(offset_bits));
+			
+		}
+	}
+}
+
+/**
+ *	trace: - Tracing function per se.
+ *	@event_id: ID of event as defined in linux/trace.h
+ *	@event_struct: struct describing the event
+ *	@cpu_id: the CPU associated with the event
+ *
+ *	Returns: 
+ *	0, if everything went OK (event got registered)
+ *	-ENODEV, no tracing daemon opened the driver.
+ *	-ENOMEM, no more memory to store events.
+ *	-EBUSY, tracer not started yet.
+ */
+int trace(u8 event_id,
+	  void *event_struct,
+	  u8 cpu_id)
+{
+	int var_data_len = 0;		/* Length of variable length data to be copied, if any */
+	void *var_data_beg = NULL;	/* Begining of variable length data to be copied */
+	int send_signal = FALSE;	/* Should the daemon be summoned */
+	uint16_t data_size;		/* Size of tracing data */
+	struct siginfo daemon_sig_info;	/* Signal information */
+	struct timeval time_stamp;	/* Event time */
+	unsigned long int flags;	/* CPU flags for lock */
+	trace_time_delta time_delta;	/* The time elapsed between now and the last event */
+	struct task_struct *incoming_process = NULL;	/* Pointer to incoming process */
+
+	/* Is there a tracing daemon */
+	if (daemon_task_struct == NULL)
+		return -ENODEV;
+
+	/* Execute any tasks waiting for this CPU */
+	if(atomic_read(&waiting_for_cpu(cpu_id)) != 0)
+		do_waiting_tasks(cpu_id);
+
+	/* Is this the exit of a process? */
+	if ((event_id == TRACE_EV_PROCESS) &&
+	    (event_struct != NULL) &&
+	    ((((trace_process *) event_struct)->event_sub_id) == TRACE_EV_PROCESS_EXIT))
+		trace_destroy_owners_events(current->pid);
+
+	/* Do we trace the event */
+	if ((tracer_started == TRUE) || (event_id == TRACE_EV_START) || (event_id == TRACE_EV_BUFFER_START))
+		goto TraceEvent;
+
+	return -EBUSY;
+
+TraceEvent:
+	/* Are we monitoring this event */
+	if (!ltt_test_bit(event_id, &traced_events))
+		return 0;
+
+	/* Always let the start event pass, whatever the IDs */
+	if ((event_id != TRACE_EV_START) && (event_id != TRACE_EV_BUFFER_START)) {
+		/* Is this a scheduling change */
+		if (event_id == TRACE_EV_SCHEDCHANGE) {
+			/* Get pointer to incoming process */
+			incoming_process = (struct task_struct *) (((trace_schedchange *) event_struct)->in);
+
+			/* Set PID information in schedchange event */
+			(((trace_schedchange *) event_struct)->in) = incoming_process->pid;
+		}
+		/* Are we monitoring a particular process */
+		if ((tracing_pid == TRUE) && (current->pid != traced_pid)) {
+			/* Record this event if it is the scheduling change bringing in the traced PID */
+			if (incoming_process == NULL)
+				return 0;
+			else if (incoming_process->pid != traced_pid)
+				return 0;
+		}
+		/* Are we monitoring a particular process group */
+		if ((tracing_pgrp == TRUE) && (current->pgrp != traced_pgrp)) {
+			/* Record this event if it is the scheduling change bringing in a process of the traced PGRP */
+			if (incoming_process == NULL)
+				return 0;
+			else if (incoming_process->pgrp != traced_pgrp)
+				return 0;
+		}
+		/* Are we monitoring the processes of a given group of users */
+		if ((tracing_gid == TRUE) && (current->egid != traced_gid)) {
+			/* Record this event if it is the scheduling change bringing in a process of the traced GID */
+			if (incoming_process == NULL)
+				return 0;
+			else if (incoming_process->egid != traced_gid)
+				return 0;
+		}
+		/* Are we monitoring the processes of a given user */
+		if ((tracing_uid == TRUE) && (current->euid != traced_uid)) {
+			/* Record this event if it is the scheduling change bringing in a process of the traced UID */
+			if (incoming_process == NULL)
+				return 0;
+			else if (incoming_process->euid != traced_uid)
+				return 0;
+		}
+	}
+
+	/* Compute size of tracing data */
+	data_size = sizeof(event_id) + sizeof(time_delta) + sizeof(data_size);
+
+	/* Do we log the event details */
+	if (ltt_test_bit(event_id, &log_event_details_mask)) {
+		/* Update the size of the data entry */
+		data_size += event_struct_size[event_id];
+
+		/* Some events have variable length */
+		switch (event_id) {
+		/* Is there a file name in this */
+		case TRACE_EV_FILE_SYSTEM:
+			if ((((trace_file_system *) event_struct)->event_sub_id == TRACE_EV_FILE_SYSTEM_EXEC)
+			    || (((trace_file_system *) event_struct)->event_sub_id == TRACE_EV_FILE_SYSTEM_OPEN)) {
+				/* Remember the string's begining and update size variables */
+				var_data_beg = ((trace_file_system *) event_struct)->file_name;
+				var_data_len = ((trace_file_system *) event_struct)->event_data2 + 1;
+				data_size += (uint16_t) var_data_len;
+			}
+			break;
+
+		/* Logging of a custom event */
+		case TRACE_EV_CUSTOM:
+			var_data_beg = ((trace_custom *) event_struct)->data;
+			var_data_len = ((trace_custom *) event_struct)->data_size;
+			data_size += (uint16_t) var_data_len;
+			break;
+		}
+	}
+
+	/* Do we record the CPUID */
+	if ((log_cpuid == TRUE) && (event_id != TRACE_EV_START) && (event_id != TRACE_EV_BUFFER_START)) {
+		/* Update the size of the data entry */
+		data_size += sizeof(cpu_id);
+	}
+
+	/* If we're using the lockless scheme, we preempt the default path 
+	   here - nothing after this point in this function will be executed. 
+	   Note that even if we do have cmpxchg, we still want to have a 
+	   choice between the lock-free and locking schemes at run-time, thus 
+	   the using_lockless check.  This used to be implemented as a kernel 
+	   hook, and will be again when/if kernel hooks are accepted into the 
+	   kernel. */
+	if(using_lockless && have_cmpxchg())
+		return lockless_write_event(event_id, 
+					    event_struct,	
+					    data_size,
+					    cpu_id,
+					    var_data_beg,
+					    var_data_len);
+
+	/* Lock the kernel */
+	spin_lock_irqsave(&trace_spin_lock, flags);
+
+	/* The following time calculations have to be done within the spinlock because
+	   otherwise the event order could be inverted. */
+
+	/* Get the time of the event */
+	time_delta = get_time_delta(&time_stamp, cpu_id);
+
+	/* Is there enough space left in the write buffer */
+	if (current_write_pos(cpu_id) + data_size > write_limit(cpu_id)) {
+		/* Have we already switched buffers and informed the daemon of it */
+		if (atomic_read(&signal_sent(cpu_id)) == TRUE) {
+			/* We've lost another event */
+			(events_lost(cpu_id))++;
+
+			/* Bye, bye, now */
+			spin_unlock_irqrestore(&trace_spin_lock, flags);
+			return -ENOMEM;
+		}
+		/* We need to inform the daemon */
+		send_signal = TRUE;
+
+		/* Get the time and TSC of the start/end buffer event */
+		get_timestamp(&time_stamp, &time_delta);
+
+		/* Switch buffers, pass lTimeDelta in case it's really a TSC */
+		tracer_switch_buffers(time_stamp, time_delta, cpu_id);
+
+		/* Recompute the time delta since buffer_start_time has changed because of the buffer change */
+		recalc_time_delta(&time_stamp, &time_delta, cpu_id);
+	}
+
+	/* Write the CPUID to the tracing buffer, if required */
+	if ((log_cpuid == TRUE) && (event_id != TRACE_EV_START) && (event_id != TRACE_EV_BUFFER_START))
+		tracer_write_to_buffer(current_write_pos(cpu_id),
+				       &cpu_id,
+				       sizeof(cpu_id));
+
+	/* Write event type to tracing buffer */
+	tracer_write_to_buffer(current_write_pos(cpu_id),
+			       &event_id,
+			       sizeof(event_id));
+
+	/* Write event time delta to tracing buffer */
+	tracer_write_to_buffer(current_write_pos(cpu_id),
+			       &time_delta,
+			       sizeof(time_delta));
+
+	/* Do we log event details */
+	if (ltt_test_bit(event_id, &log_event_details_mask)) {
+		/* Write event structure */
+		tracer_write_to_buffer(current_write_pos(cpu_id),
+				       event_struct,
+				       event_struct_size[event_id]);
+
+		/* Write string if any */
+		if (var_data_len)
+			tracer_write_to_buffer(current_write_pos(cpu_id),
+					       var_data_beg,
+					       var_data_len);
+	}
+	/* Write the length of the event description */
+	tracer_write_to_buffer(current_write_pos(cpu_id),
+			       &data_size,
+			       sizeof(data_size));
+
+	/* Should the tracing daemon be notified  */
+	if (send_signal == TRUE) {
+		/* Remember that a signal has been sent */
+		atomic_set(&signal_sent(cpu_id), TRUE);
+		buffer_switches_pending |= (1UL << cpu_id);
+
+		/* Unlock the kernel */
+		spin_unlock_irqrestore(&trace_spin_lock, flags);
+
+		/* Setup signal information */
+		daemon_sig_info.si_signo = SIGIO;
+		daemon_sig_info.si_errno = 0;
+		daemon_sig_info.si_code = SI_KERNEL;
+
+		/* Signal the tracing daemon */
+		send_sig_info(SIGIO, &daemon_sig_info, daemon_task_struct);
+	} else
+		/* Unlock the kernel */
+		spin_unlock_irqrestore(&trace_spin_lock, flags);
+
+	return 0;
+}
+
+/**
+ *	tracer_switch_buffers: - Switches between read and write buffers.
+ *	@current_time: current time.
+ *	@current_tsc: the TSC associated with current_time, if applicable
+ *	@cpu_id: the CPU associated with the event
+ *
+ *	Put the current write buffer to be read and reset put the old read
+ *	buffer to be written to. Set the tracer variables in consequence.
+ *
+ *	No return values.
+ *
+ *	This should be called from within a spin_lock.
+ */
+void tracer_switch_buffers(struct timeval current_time,
+ 			   trace_time_delta current_tsc,
+			   u8 cpu_id)
+{
+	char *temp_buf;			/* Temporary buffer pointer */
+	char *temp_buf_end;		/* Temporary buffer end pointer */
+	char *init_write_pos;		/* Initial write position */
+	u8 event_id;			/* Event ID of last event */
+	uint16_t data_size;		/* Size of tracing data */
+	u32 size_lost;			/* Size delta between last event and end of buffer */
+	trace_time_delta time_delta;	/* The time elapsed between now and the last event */
+	trace_buffer_start start_buffer_event;	/* Start of the new buffer event */
+	trace_buffer_end end_buffer_event; /* End of buffer event */
+
+	/* Remember initial write position */
+	init_write_pos = current_write_pos(cpu_id);
+
+	/* Write the end event at the write of the buffer */
+	end_buffer_event.time = current_time;
+	end_buffer_event.tsc = current_tsc;
+
+	/* Write the CPUID to the tracing buffer, if required */
+	if (log_cpuid == TRUE) {
+		tracer_write_to_buffer(current_write_pos(cpu_id),
+				       &cpu_id,
+				       sizeof(cpu_id));
+	}
+	/* Write event type to tracing buffer */
+	event_id = TRACE_EV_BUFFER_END;
+	tracer_write_to_buffer(current_write_pos(cpu_id),
+			       &event_id,
+			       sizeof(event_id));
+
+	/* Write event time delta/TSC to tracing buffer */
+	time_delta = switch_time_delta(current_tsc);
+	tracer_write_to_buffer(current_write_pos(cpu_id),
+			       &time_delta,
+			       sizeof(time_delta));
+
+	/* Write event structure */
+	tracer_write_to_buffer(current_write_pos(cpu_id),
+			       &end_buffer_event,
+			       sizeof(end_buffer_event));
+
+	/* Compute the data size */
+	data_size = sizeof(event_id)
+		+ sizeof(time_delta)
+		+ sizeof(end_buffer_event)
+		+ sizeof(data_size);
+
+	/* Write the length of the event description */
+	tracer_write_to_buffer(current_write_pos(cpu_id),
+			       &data_size,
+			       sizeof(data_size));
+
+	/* Get size lost */
+	size_lost = write_buf_end(cpu_id) - init_write_pos;
+
+	/* Write size lost at the end of the buffer */
+	*((u32 *) (write_buf_end(cpu_id) - sizeof(size_lost))) = size_lost;
+
+	/* Switch buffers */
+	temp_buf = read_buf(cpu_id);
+	read_buf(cpu_id) = write_buf(cpu_id);
+	write_buf(cpu_id) = temp_buf;
+
+	/* Set buffer ends */
+	temp_buf_end = read_buf_end(cpu_id);
+	read_buf_end(cpu_id) = write_buf_end(cpu_id);
+	write_buf_end(cpu_id) = temp_buf_end;
+
+	/* Set read limit */
+	read_limit(cpu_id) = read_buf_end(cpu_id);
+
+	/* Set write limit */
+	write_limit(cpu_id) = write_buf_end(cpu_id) - TRACER_LAST_EVENT_SIZE;
+
+	/* Set write position */
+	current_write_pos(cpu_id) = write_buf(cpu_id);
+
+	/* Increment buffer ID */
+	(buffer_id(cpu_id))++;
+
+	/* Set the time/TSC of beginning of this buffer */
+	buffer_start_time(cpu_id) = current_time;
+	buffer_start_tsc(cpu_id) = current_tsc;
+
+	/* Write the start of buffer event */
+	start_buffer_event.id = buffer_id(cpu_id);
+	start_buffer_event.time = current_time;
+	start_buffer_event.tsc = current_tsc;
+
+	/* Write event type to tracing buffer */
+	event_id = TRACE_EV_BUFFER_START;
+	tracer_write_to_buffer(current_write_pos(cpu_id),
+			       &event_id,
+			       sizeof(event_id));
+
+	/* Write event time delta to tracing buffer */
+	time_delta = switch_time_delta(current_tsc);
+	tracer_write_to_buffer(current_write_pos(cpu_id),
+			       &time_delta,
+			       sizeof(time_delta));
+
+	/* Write event structure */
+	tracer_write_to_buffer(current_write_pos(cpu_id),
+			       &start_buffer_event,
+			       sizeof(start_buffer_event));
+
+	/* Compute the data size */
+	data_size = sizeof(event_id)
+	    + sizeof(time_delta)
+	    + sizeof(start_buffer_event)
+	    + sizeof(data_size);
+
+	/* Write the length of the event description */
+	tracer_write_to_buffer(current_write_pos(cpu_id),
+			       &data_size,
+			       sizeof(data_size));
+}
+
+/**
+ *	update_shared_buffer_control: - prepare for GET_BUFFER_CONTROL ioctl
+ *	@cpu_id: the CPU associated with the ioctl
+ *
+ *	Copies buffer control data into a common format that can be shared
+ *	between the tracer and the daemon, allowing alignment to be ignored.
+ */
+static inline void update_shared_buffer_control(u8 cpu_id)
+{
+	int i, n_buffers;
+	
+	shared_buffer_control.cpu_id = cpu_id;
+
+	/* Let the caller know if there are more buffer switches to process 
+	   AFTER this one */
+	shared_buffer_control.buffer_switches_pending =
+		buffer_switches_pending & ~(1UL << cpu_id);
+	shared_buffer_control.buffer_control_valid = TRUE;
+	if(using_lockless) {
+		shared_buffer_control.bufno_bits = bufno_bits(cpu_id);
+		shared_buffer_control.offset_bits = offset_bits(cpu_id);
+		shared_buffer_control.buffers_produced = 
+			buffers_produced(cpu_id);
+		shared_buffer_control.buffers_consumed = 
+			buffers_consumed(cpu_id);
+		n_buffers = TRACE_MAX_BUFFER_NUMBER(buf_no_bits);
+		for(i = 0; i < n_buffers; i++) {
+			shared_buffer_control.fill_count[i] = 
+				atomic_read(&fill_count(cpu_id, i));
+		}
+	}
+}
+
+/**
+ *	tracer_ioctl: - "ioctl" file op
+ *
+ *	@tracer_inode: the inode associated with the device
+ *	@task_file: file structure given to the acting process
+ *	@tracer_command: command given by the caller
+ *	@ioctl_arg: arguments to the command
+ *
+ *	Returns:
+ *	>0, In case the caller requested the number of events lost.
+ *	0, Everything went OK
+ *	-ENOSYS, no such command
+ *	-EINVAL, tracer not properly configured
+ *	-EBUSY, tracer can't be reconfigured while in operation
+ *	-ENOMEM, no more memory
+ *	-EFAULT, unable to access user space memory
+ *
+ *	Note:
+ *	In the future, this function should check to make sure that it's the
+ *	server that make thes ioctl.
+ */
+int tracer_ioctl(struct inode *tracer_inode,
+		 struct file *task_file,
+		 unsigned int tracer_command,
+		 unsigned long ioctl_arg)
+{
+	int return_val;			/* Function return value */
+	int dev_minor_no;		/* Device minor number */
+	int new_user_event_id;		/* ID of newly created user event */
+	unsigned long int flags;	/* CPU flags for lock */
+	u8 cpu_id;			/* Current CPU */
+	u8 i;				/* Counter */
+	u32 buffers_consumed;		/* # buffers consumed */
+	trace_custom user_event;	/* The user event to be logged */
+	trace_change_mask trace_mask;	/* Event mask */
+	trace_new_event new_user_event;	/* The event to be created for the user */
+	struct timeval current_time;   	/* The time elapsed between now and the last event */
+	trace_time_delta current_tsc;  	/* The time elapsed between now and the last event */
+	struct buffers_committed buffers_committed;  /* For COMMITTED case */
+
+	/* Get device's minor number */
+	dev_minor_no = minor(tracer_inode->i_rdev) & 0x0f;
+
+	/* If the tracer is started, the daemon can't modify the configuration */
+	if ((dev_minor_no == 0)
+	    && (tracer_started == TRUE)
+	    && (tracer_command != TRACER_STOP)
+	    && (tracer_command != TRACER_DATA_COMITTED)
+	    && (tracer_command != TRACER_GET_BUFFER_CONTROL))
+		return -EBUSY;
+
+	/* Only some operations are permitted to user processes trying to log events */
+	if ((dev_minor_no == 1)
+	    && (tracer_command != TRACER_CREATE_USER_EVENT)
+	    && (tracer_command != TRACER_DESTROY_USER_EVENT)
+	    && (tracer_command != TRACER_TRACE_USER_EVENT)
+	    && (tracer_command != TRACER_SET_EVENT_MASK)
+	    && (tracer_command != TRACER_GET_EVENT_MASK))
+		return -ENOSYS;
+
+	/* Depending on the command executed */
+	switch (tracer_command) {
+	/* Start the tracer */
+	case TRACER_START:
+		init_heartbeat_timer();
+		
+		/* Initialize buffer control regardless of scheme in use */
+		init_buffer_control(buffer_control,
+				    !use_locking,    /* using_lockless */
+				    buf_no_bits,     /* bufno_bits, 2**n */
+				    buf_offset_bits); /* offset_bits, 2**n */
+
+		/* Check if the device has been properly set up */
+		if (((use_syscall_eip_bounds == TRUE)
+		     && (syscall_eip_depth_set == TRUE))
+		    || ((use_syscall_eip_bounds == TRUE)
+			&& ((lower_eip_bound_set != TRUE)
+			    || (upper_eip_bound_set != TRUE)))
+		    || ((tracing_pid == TRUE)
+			&& (tracing_pgrp == TRUE)))
+			return -EINVAL;
+
+		/* Set the kernel-side trace configuration */
+		if (trace_set_config(syscall_eip_depth_set,
+				     use_syscall_eip_bounds,
+				     syscall_eip_depth,
+				     lower_eip_bound,
+				     upper_eip_bound) < 0)
+			return -EINVAL;
+
+		/* Always log the start event and the buffer start event */
+		ltt_set_bit(TRACE_EV_BUFFER_START, &traced_events);
+		ltt_set_bit(TRACE_EV_BUFFER_START, &log_event_details_mask);
+		ltt_set_bit(TRACE_EV_START, &traced_events);
+		ltt_set_bit(TRACE_EV_START, &log_event_details_mask);
+		ltt_set_bit(TRACE_EV_CHANGE_MASK, &traced_events);
+		ltt_set_bit(TRACE_EV_CHANGE_MASK, &log_event_details_mask);
+
+		/* If we're not using TSC, then we can initialize all now */
+		if(using_tsc == FALSE)
+			for(i = 0; i < num_cpus; i++)
+				initialize_trace(i);
+ 
+		/* Start tapping into Linux's syscall flow */
+		syscall_entry_trace_active = ltt_test_bit(TRACE_EV_SYSCALL_ENTRY, &traced_events);
+		syscall_exit_trace_active  = ltt_test_bit(TRACE_EV_SYSCALL_EXIT, &traced_events);
+
+		/* We can start tracing */
+		tracer_stopping = FALSE;
+		tracer_started = TRUE;
+
+		/* Reregister custom trace events created earlier */
+		trace_reregister_custom_events();
+		break;
+
+	/* Stop the tracer */
+	case TRACER_STOP:
+		if(using_tsc == TRUE)
+			del_timer(&heartbeat_timer);
+
+		/* Stop tracing */
+ 		/* We don't log new events, but old lockless ones can finish */
+		tracer_stopping = TRUE;
+		tracer_started = FALSE;
+
+		/* Stop interrupting the normal flow of system calls */
+		syscall_entry_trace_active = 0;
+		syscall_exit_trace_active  = 0;
+
+ 		/* Make sure the last buffer touched is finalized */
+		if(using_lockless) {
+			/* If we're not using TSC, we can finalize all now */
+			/* Write end buffer event as last event in old buf. */
+			if(using_tsc == FALSE) {
+				for(i = 0; i < num_cpus; i++)
+					finalize_lockless_trace(i);
+				tracer_stopping = FALSE;
+			} else
+				for(i = 0; i < num_cpus; i++)
+					set_waiting_for_cpu(i, LTT_FINALIZE_TRACE);
+			break;
+ 		} /* Else locking scheme */
+
+		/* Acquire the lock to avoid SMP case of where another CPU is writing a trace
+		   while buffer is being switched */
+		spin_lock_irqsave(&trace_spin_lock, flags);
+
+		if(using_tsc == FALSE) {
+			/* Get the time of the event */
+			get_timestamp(&current_time, &current_tsc);
+
+			/* If we're not using TSC, we can finalize all now */
+			for(i = 0; i < num_cpus; i++) {
+				buffer_switches_pending |= (1UL << i);
+				/* Switch the buffers to ensure that the end 
+				   of the buffer mark is set */
+				tracer_switch_buffers(current_time, 
+						      current_tsc, i);
+			}
+			tracer_stopping = FALSE;
+		} else {
+			for(i = 0; i < num_cpus; i++)
+				set_waiting_for_cpu(i, LTT_FINALIZE_TRACE);
+		}
+
+		/* Release lock */
+		spin_unlock_irqrestore(&trace_spin_lock, flags);
+		break;
+
+	/* Set the tracer to the default configuration */
+	case TRACER_CONFIG_DEFAULT:
+		tracer_set_default_config();
+		break;
+
+	/* Set the memory buffers the daemon wants us to use */
+	case TRACER_CONFIG_MEMORY_BUFFERS:
+		/* Is the given size "reasonable" */
+		if (use_locking == TRUE) {
+			if (ioctl_arg < TRACER_MIN_BUF_SIZE)
+				return -EINVAL;
+		} else {
+			if ((ioctl_arg < TRACER_LOCKLESS_MIN_BUF_SIZE) || 
+			    (ioctl_arg > TRACER_LOCKLESS_MAX_BUF_SIZE))
+				return -EINVAL;
+		}
+
+		/* Set the buffer's size */
+		return tracer_set_buffer_size(ioctl_arg);
+		break;
+
+	/* Set the number of memory buffers the daemon wants us to use */
+	case TRACER_CONFIG_N_MEMORY_BUFFERS:
+		/* Is the given size "reasonable" */
+		if ((use_locking == TRUE) || (ioctl_arg < TRACER_MIN_BUFFERS) || 
+		    (ioctl_arg > TRACER_MAX_BUFFERS))
+			return -EINVAL;
+
+		/* Set the number of buffers */
+		return tracer_set_n_buffers(ioctl_arg);
+		break;
+
+	/* Set locking scheme the daemon wants us to use */
+	case TRACER_CONFIG_USE_LOCKING:
+		/* Set the locking scheme in a global for later */
+		use_locking = ioctl_arg;
+		if((use_locking == FALSE) && (have_cmpxchg() == FALSE))
+                        /* Lock-free scheme not supported on this platform */
+			return -EINVAL; 
+		break;
+
+	/* Trace the given events */
+	case TRACER_CONFIG_EVENTS:
+		if (copy_from_user(&traced_events, (void *) ioctl_arg, sizeof(traced_events)))
+			return -EFAULT;
+		break;
+
+	/* Trace the given events */
+	case TRACER_CONFIG_TIMESTAMP:
+		using_tsc = ioctl_arg;
+		if((using_tsc == TRUE) && (have_tsc() == FALSE)) {
+			using_tsc = FALSE;
+			return -EINVAL;
+		}
+		break;
+
+	/* Record the details of the event, or not */
+	case TRACER_CONFIG_DETAILS:
+		if (copy_from_user(&log_event_details_mask, (void *) ioctl_arg, sizeof(log_event_details_mask)))
+			return -EFAULT;
+		break;
+
+	/* Record the CPUID associated with the event */
+	case TRACER_CONFIG_CPUID:
+		log_cpuid = TRUE;
+		break;
+
+	/* Trace only one process */
+	case TRACER_CONFIG_PID:
+		tracing_pid = TRUE;
+		traced_pid = ioctl_arg;
+		break;
+
+	/* Trace only the given process group */
+	case TRACER_CONFIG_PGRP:
+		tracing_pgrp = TRUE;
+		traced_pgrp = ioctl_arg;
+		break;
+
+	/* Trace the processes of a given group of users */
+	case TRACER_CONFIG_GID:
+		tracing_gid = TRUE;
+		traced_gid = ioctl_arg;
+		break;
+
+	/* Trace the processes of a given user */
+	case TRACER_CONFIG_UID:
+		tracing_uid = TRUE;
+		traced_uid = ioctl_arg;
+		break;
+
+	/* Set the call depth a which the EIP should be fetched on syscall */
+	case TRACER_CONFIG_SYSCALL_EIP_DEPTH:
+		syscall_eip_depth_set = TRUE;
+		syscall_eip_depth = ioctl_arg;
+		break;
+
+	/* Set the lowerbound address from which EIP is recorded on syscall */
+	case TRACER_CONFIG_SYSCALL_EIP_LOWER:
+		/* We are using bounds for fetching the EIP where syscall was made */
+		use_syscall_eip_bounds = TRUE;
+
+		/* Set the lower bound */
+		lower_eip_bound = (void *) ioctl_arg;
+
+		/* The lower bound has been set */
+		lower_eip_bound_set = TRUE;
+		break;
+
+	/* Set the upperbound address from which EIP is recorded on syscall */
+	case TRACER_CONFIG_SYSCALL_EIP_UPPER:
+		/* We are using bounds for fetching the EIP where syscall was made */
+		use_syscall_eip_bounds = TRUE;
+
+		/* Set the upper bound */
+		upper_eip_bound = (void *) ioctl_arg;
+
+		/* The upper bound has been set */
+		upper_eip_bound_set = TRUE;
+		break;
+
+	/* The daemon has comitted the last trace */
+	case TRACER_DATA_COMITTED:
+		/* Copy the information from user space */
+		if (copy_from_user(&buffers_committed, (void *)ioctl_arg, 
+				   sizeof(buffers_committed)))
+			return -EFAULT;
+
+		cpu_id = buffers_committed.cpu_id;
+		buffers_consumed = buffers_committed.buffers_consumed;
+
+		/* Turn off the bit indicating that the cpu's buffer switch
+		   needs servicing */ 
+		buffer_switches_pending &= ~(1 << cpu_id);
+
+		/* The lockless version doesn't use signal_sent.  ioctl_arg is
+		   the number of buffers the daemon has told us it just 
+		   consumed.  Add that to the global count. */
+		if(using_lockless) {
+			local_irq_save(flags);
+
+			/* We consumed some buffers, note it. */
+			buffers_consumed(cpu_id) += buffers_consumed;
+
+			/* If we were full, we no longer are */
+			if(buffers_full(cpu_id) && (buffers_consumed > 0)) {
+				set_waiting_for_cpu(cpu_id, LTT_CONTINUE_TRACE);
+			}
+
+			local_irq_restore(flags);
+			break;
+		} /* Else locking version below */
+
+		/* Safely set the signal sent flag to FALSE */
+		local_irq_save(flags);
+		atomic_set(&signal_sent(cpu_id), FALSE);
+		local_irq_restore(flags);
+		break;
+
+	/* Get the number of events lost */
+	case TRACER_GET_EVENTS_LOST:
+		return events_lost(ioctl_arg);
+		break;
+
+	/* Create a user event */
+	case TRACER_CREATE_USER_EVENT:
+		/* Copy the information from user space */
+		if (copy_from_user(&new_user_event, (void *) ioctl_arg, sizeof(new_user_event)))
+			return -EFAULT;
+
+		/* Create the event */
+		new_user_event_id = trace_create_owned_event(new_user_event.type,
+							     new_user_event.desc,
+							     new_user_event.format_type,
+							     new_user_event.form,
+							     current->pid);
+
+		/* Has the operation succeded */
+		if (new_user_event_id >= 0) {
+			/* Set the event ID */
+			new_user_event.id = new_user_event_id;
+
+			/* Copy the event information back to user space */
+			if (copy_to_user((void *) ioctl_arg, &new_user_event, sizeof(new_user_event))) {
+				/* Since we were unable to tell the user about the event, destroy it */
+				trace_destroy_event(new_user_event_id);
+				return -EFAULT;
+			}
+		} else
+			/* Forward trace_create_event()'s error code */
+			return new_user_event_id;
+		break;
+
+	/* Destroy a user event */
+	case TRACER_DESTROY_USER_EVENT:
+		/* Pass on the user's request */
+		trace_destroy_event((int) ioctl_arg);
+		break;
+
+	/* Trace a user event */
+	case TRACER_TRACE_USER_EVENT:
+		/* Copy the information from user space */
+		if (copy_from_user(&user_event, (void *) ioctl_arg, sizeof(user_event)))
+			return -EFAULT;
+
+		/* Copy the user event data */
+		if (copy_from_user(user_event_data, user_event.data, user_event.data_size))
+			return -EFAULT;
+
+		/* Log the raw event */
+		return_val = trace_raw_event(user_event.id,
+					     user_event.data_size,
+					     user_event_data);
+
+		/* Has the operation failed */
+		if (return_val < 0)
+			/* Forward trace_create_event()'s error code */
+			return return_val;
+		break;
+
+	/* Set event mask */
+	case TRACER_SET_EVENT_MASK:
+		/* Copy the information from user space */
+		if (copy_from_user(&(trace_mask.mask), (void *) ioctl_arg, sizeof(trace_mask.mask)))
+			return -EFAULT;
+
+		/* Trace the event */
+
+		/* Note that we log this only for whatever CPU happens to be 
+		   current - the visualizer tools need to pick this up and 
+		   correlate it with the other CPUs' events. */
+		return_val = trace(TRACE_EV_CHANGE_MASK, &trace_mask, 
+				  smp_processor_id());
+
+		/* Change the event mask. (This has to be done second or else may loose the
+		   information if the user decides to stop logging "change mask" events) */
+		memcpy(&traced_events, &(trace_mask.mask), sizeof(trace_mask.mask));
+		syscall_entry_trace_active = ltt_test_bit(TRACE_EV_SYSCALL_ENTRY, &traced_events);
+		syscall_exit_trace_active  = ltt_test_bit(TRACE_EV_SYSCALL_EXIT, &traced_events);
+
+		/* Always trace the buffer start, the trace start and the change mask */
+		ltt_set_bit(TRACE_EV_BUFFER_START, &traced_events);
+		ltt_set_bit(TRACE_EV_START, &traced_events);
+		ltt_set_bit(TRACE_EV_CHANGE_MASK, &traced_events);
+
+		/* Forward trace()'s error code */
+		return return_val;
+		break;
+
+	/* Get event mask */
+	case TRACER_GET_EVENT_MASK:
+		/* Copy the information to user space */
+		if (copy_to_user((void *) ioctl_arg, &traced_events, sizeof(traced_events)))
+			return -EFAULT;
+		break;
+
+	/* Get information about the CPU configuration */
+	case TRACER_GET_ARCH_INFO:
+		ltt_arch_info.n_cpus = num_cpus;
+		ltt_arch_info.page_shift = PAGE_SHIFT;
+		if(copy_to_user((void *) ioctl_arg, 
+				&ltt_arch_info, 
+				sizeof(ltt_arch_info)))
+			return -EFAULT;
+		break;
+
+	/* Get buffer control data */
+	case TRACER_GET_BUFFER_CONTROL:
+		for(i = 0; i < num_cpus; i++) {
+			/* Return the first buffer control with a buffer switch
+			   still needing to be serviced - the daemon will ask
+			   for the others later. */
+			if(buffer_switches_pending & (1UL << i)) {
+				update_shared_buffer_control(i);
+				/* Copy the buffer control information to user
+				   space.  We can't copy_to_user() with a lock
+				   held (accessing user memory may cause a page
+				   fault),  so buffers_produced may actually be
+				   larger than what the daemon sees when this
+				   snapshot is taken.  This isn't a problem 
+				   because the daemon will get a chance to 
+				   read the new buffer the next time it's 
+				   signaled. */
+				if(copy_to_user((void *) ioctl_arg, 
+						&shared_buffer_control, 
+						sizeof(shared_buffer_control)))
+					return -EFAULT;
+				return 0;
+			}
+		}
+
+		/* If we're here, there were no cpus ready - let the daemon
+		   know that.  Use cpu 0 marked as invalid for this purpose. */
+		shared_buffer_control.cpu_id = 0;
+		shared_buffer_control.buffer_control_valid = FALSE;
+		if(copy_to_user((void *) ioctl_arg, 
+				&shared_buffer_control, 
+				sizeof(shared_buffer_control)))
+			return -EFAULT;
+		break;
+
+	/* Unknown command */
+	default:
+		return -ENOSYS;
+	}
+
+	return 0;
+}
+
+/**
+ *	tracer_mmap: - "Mmap" file op
+ *	@tracer_inode: the inode associated with the device
+ *	@task_file: file structure given to the acting process
+ *	@tracer_vm_area: Virtual memory area description structure
+ *
+ *	Returns:
+ *	0 if ok
+ *	-EAGAIN, when remap failed
+ *	-EACCESS, permission denied
+ */
+int tracer_mmap(struct file *task_file,
+		struct vm_area_struct *tracer_vm_area)
+{
+	int return_val;		/* Function's return value */
+
+	/* Only the trace daemon is allowed access to mmap */
+	if (current != daemon_task_struct)
+		return -EACCES;
+
+	/* Remap trace buffer into the process's memory space */
+	return_val = tracer_mmap_region(tracer_vm_area,
+					(char *) tracer_vm_area->vm_start,
+					trace_buf,
+					tracer_vm_area->vm_end - tracer_vm_area->vm_start);
+
+	return return_val;
+}
+
+/**
+ *	tracer_open(): - "Open" file op
+ *	@tracer_inode: the inode associated with the device
+ *	@task_file: file structure given to the acting process
+ *
+ *	Returns:
+ *	0, everything went OK
+ *	-ENODEV, no such device.
+ *	-EBUSY, daemon channel (minor number 0) already in use.
+ */
+int tracer_open(struct inode *tracer_inode,
+		struct file *task_file)
+{
+	int dev_minor_no = minor(tracer_inode->i_rdev) & 0x0f;	/* Device minor number */
+
+	tracer_started = FALSE;
+	tracer_stopping = FALSE;
+
+	/* Only minor number 0 and 1 are used */
+	if ((dev_minor_no > 0) && (dev_minor_no != 1))
+		return -ENODEV;
+
+	/* If the device has already been opened */
+	if (open_count) {
+		/* Is there another process trying to open the daemon's channel (minor number 0) */
+		if (dev_minor_no == 0)
+			return -EBUSY;
+		else
+			/* Only increment use, this is just another user process trying to log user events */
+			goto IncrementUse;
+	}
+
+	/* Fetch the task structure of the process that opened the device */
+	daemon_task_struct = current;
+
+	/* Reset the default configuration since this is the daemon and he will complete the setup */
+	tracer_set_default_config();
+
+IncrementUse:
+	/* Lock the device */
+	open_count++;
+
+#ifdef MODULE
+	/* Increment module usage */
+	MOD_INC_USE_COUNT;
+#endif
+
+	return 0;
+}
+
+/**
+ *	tracer_release: - "Release" file op
+ *	@tracer_inode: the inode associated with the device
+ *	@task_file: file structure given to the acting process
+ *
+ *	Returns: 
+ *	0, everything went OK
+ *	-EBUSY, there are still event writes in progress so the buffer can't
+ *	be released.
+ *
+ *	Note:
+ *	It is assumed that if the tracing daemon dies, exits or simply stops
+ *	existing, the kernel or "someone" will call tracer_release. Otherwise,
+ *      we're in trouble ...
+ */
+int tracer_release(struct inode *tracer_inode,
+		   struct file *task_file)
+{
+	int event_writes_pending, i;
+	int dev_minor_no = minor(tracer_inode->i_rdev) & 0x0f;	/* Device minor number */
+
+	/* Is this a simple user process exiting? */
+	if (dev_minor_no != 0)
+		goto DecrementUse;
+
+	/* Did we loose any events */
+	for(i = 0; i < num_cpus; i++)
+		if (events_lost(i) > 0)
+			printk(KERN_ALERT "Tracer: Lost %d events on cpu %d\n",
+			       events_lost(i), i);
+
+	/* Reset the daemon PID */
+	daemon_task_struct = NULL;
+
+	/* Free the current buffers, if any, but only if they're not still
+	   in use */
+	if (trace_buf != NULL) {
+		event_writes_pending = trace_get_pending_write_count();
+		if(event_writes_pending == 0)
+			rvfree(trace_buf, alloc_size);
+		else {
+			printk(KERN_ERR "Tracer: Couldn't release tracer - %d event writes pending \n",
+			       event_writes_pending);
+			return -EBUSY;
+		}
+	}
+
+	/* Reset the read and write buffers */
+	trace_buf = NULL;
+	for(i = 0; i < num_cpus; i++) {
+		write_buf(i) = NULL;
+		read_buf(i) = NULL;
+		write_buf_end(i) = NULL;
+		read_buf_end(i) = NULL;
+		current_write_pos(i) = NULL;
+		read_limit(i) = NULL;
+		write_limit(i) = NULL;
+		events_lost(i) = 0;
+		atomic_set(&signal_sent(i), FALSE);
+	}
+
+	use_locking = TRUE;
+
+	/* Reset the tracer's configuration */
+	tracer_set_default_config();
+	tracer_started = FALSE;
+	tracer_stopping = FALSE;
+
+	/* Reset number of bytes recorded and number of events lost */
+	buf_read_complete = 0;
+	size_read_incomplete = 0;
+
+DecrementUse:
+	/* Unlock the device */
+	open_count--;
+
+#ifdef MODULE
+	/* Decrement module usage */
+	MOD_DEC_USE_COUNT;
+#endif
+
+	return 0;
+}
+
+/**
+ *	tracer_fsync: - "Fsync" file op
+ *	@task_file: file structure given to the acting process
+ *	@tracer_dentry: dentry associated with file
+ *
+ *	Returns:
+ *	0, everything went OK
+ *	-EACCESS, permission denied
+ *
+ *	Note:
+ *	We need to look the modifications of the values because they are read
+ *	and written by trace().
+ */
+int tracer_fsync(struct file *task_file,
+		 struct dentry *tracer_dentry,
+		 int data_sync)
+{
+	unsigned long int flags, i;
+
+	/* Only the trace daemon is allowed access to fsync */
+	if (current != daemon_task_struct)
+		return -EACCES;
+
+	/* Lock the kernel */
+	spin_lock_irqsave(&trace_spin_lock, flags);
+
+	for(i = 0; i < num_cpus; i++) {
+		/* Reset the write positions */
+		current_write_pos(i) = write_buf(i);
+
+		/* Reset read limit */
+		read_limit(i) = read_buf(i);
+		events_lost(i) = 0;
+		atomic_set(&signal_sent(i), FALSE);
+	}
+
+	/* Reset bytes recorded */
+	buf_read_complete = 0;
+	size_read_incomplete = 0;
+
+	/* Unlock the kernel */
+	spin_unlock_irqrestore(&trace_spin_lock, flags);
+
+	return 0;
+}
+
+/**
+ *	tracer_set_buffer_size: - Sets the size of the buffers.
+ *	@buffers_size: Size of buffers
+ *
+ *	Returns:
+ *	0, Size setting went OK
+ *	-ENOMEM, unable to get a hold of memory for tracer
+ *
+ *	buf_no_bits must have already been set before this function is called.
+ */
+int tracer_set_buffer_size(int buffers_size)
+{
+	int size_alloc;
+	int no_buffers = TRACE_MAX_BUFFER_NUMBER(buf_no_bits);
+
+	/* We want to make sure the number of buffers allocated matches
+	   the number of CPUs we use for the rest of the trace */
+	num_cpus = num_online_cpus();
+
+	if(use_locking == TRUE) {
+		/* Set size to allocate (= pmSize * 2) per CPU and fix it's 
+		   size to be on a page boundary */
+		cpu_buf_size = FIX_SIZE(buffers_size << 1);
+
+		/* Set size allocated for all CPUs */
+		size_alloc = cpu_buf_size * num_cpus;
+	} else {
+		/* Calculate power-of-2 buffer size */
+		if(hweight32(buffers_size) != 1)
+			/* Invalid if # set bits != 1 */
+			return -EINVAL;
+			
+		/* Find position of one and only set bit */
+		buf_offset_bits = ffs(buffers_size) - 1;
+
+		/* Set size to allocate (= pmSize * n buffers) per CPU and 
+		   fix it's size to be on a page boundary */
+		cpu_buf_size = FIX_SIZE(buffers_size * no_buffers);
+
+		/* Calculate total size of buffers for all CPUs*/
+		size_alloc = cpu_buf_size * num_cpus;
+
+		/* Sanity check */ 
+		if(size_alloc > TRACER_LOCKLESS_MAX_TOTAL_BUF_SIZE) 
+			return -EINVAL;
+	}
+
+	/* Free the current buffers, if any, but only if they're not still in use */
+	if (trace_buf != NULL) {
+		if(trace_get_pending_write_count() == 0)
+			rvfree(trace_buf, alloc_size);
+		else
+			return -EBUSY;
+	}
+
+	/* Allocate space for the tracing buffers */
+	if ((trace_buf = (char *) rvmalloc(size_alloc)) == NULL)
+		return -ENOMEM;
+
+	/* Remember the size set */
+	buf_size = buffers_size;
+	alloc_size = size_alloc;
+
+	return 0;
+}
+
+/**
+ *	tracer_set_default_config: - Sets the tracer in its default config
+ *
+ *	Returns:
+ *	0, everything went OK
+ *	-ENOMEM, unable to get a hold of memory for tracer
+ */
+int tracer_set_default_config(void)
+{
+	int i;
+	int return_val = 0;
+
+	/* Initialize the event mask */
+	traced_events = 0;
+
+	/* Initialize the event mask with all existing events with their details */
+	for (i = 0; i <= TRACE_EV_MAX; i++) {
+		ltt_set_bit(i, &traced_events);
+		ltt_set_bit(i, &log_event_details_mask);
+	}
+
+	/* Do not interfere with Linux's syscall flow until we actually start tracing */
+	syscall_entry_trace_active = 0;
+	syscall_exit_trace_active  = 0;
+
+	/* Forget about the CPUID */
+	log_cpuid = FALSE;
+
+	/* We aren't tracing any PID or GID in particular */
+	tracing_pid = FALSE;
+	tracing_pgrp = FALSE;
+	tracing_gid = FALSE;
+	tracing_uid = FALSE;
+
+	/* We aren't looking for a particular call depth */
+	syscall_eip_depth_set = FALSE;
+
+	/* We aren't going to place bounds on syscall EIP fetching */
+	use_syscall_eip_bounds = FALSE;
+	lower_eip_bound_set = FALSE;
+	upper_eip_bound_set = FALSE;
+
+	/* By default, use TSC timestamping */
+	using_tsc = TRUE;
+	
+	/* Set the kernel trace configuration to it's basics */
+	trace_set_config(syscall_eip_depth_set,
+			 use_syscall_eip_bounds,
+			 0,
+			 0,
+			 0);
+
+	return return_val;
+}
+
+/**
+ *	tracer_init: - Tracer initialization function.
+ *
+ *	Returns:
+ *	0, everything went OK
+ *	-ENONMEM, incapable of allocating necessary memory
+ *	Forwarded error code otherwise
+ */
+int __init tracer_init(void)
+{
+	int return_val = 0;
+
+	/* Initialize configuration */
+	if ((return_val = tracer_set_default_config()) < 0)
+		return return_val;
+
+	/* Initialize open count */
+	open_count = 0;
+
+	/* Initialize tracer lock */
+	trace_lock = 0;
+
+	/* Initialize bytes read and events lost */
+	buf_read_complete = 0;
+	size_read_incomplete = 0;
+
+	/* Initialize tracing daemon task structure */
+	daemon_task_struct = NULL;
+
+	/* Allocate memory for large data components */
+	if ((user_event_data = vmalloc(CUSTOM_EVENT_MAX_SIZE)) < 0)
+		return -ENOMEM;
+
+	/* Initialize spin lock */
+	trace_spin_lock = SPIN_LOCK_UNLOCKED;
+
+	/* By default, use locking scheme */
+	use_locking = TRUE;
+
+	/* Register the tracer as a char device */
+	major_number = register_chrdev(0, TRACER_NAME, &tracer_file_ops);
+
+	/* Initialize next event ID to be used */
+	next_event_id = TRACE_EV_MAX + 1;
+
+	/* Initialize custom events list */
+	custom_events = &custom_events_head;
+	custom_events->next = custom_events;
+	custom_events->prev = custom_events;
+
+	return return_val;
+}
+
+/**
+ *	trace_set_config: - Set the tracing configuration
+ *	@pm_trace_function: the trace function.
+ *	@pm_fetch_syscall_use_depth: Use depth to fetch eip
+ *	@pm_fetch_syscall_use_bounds: Use bounds to fetch eip
+ *	@pm_syscall_eip_depth: Detph to fetch eip
+ *	@pm_syscall_lower_bound: Lower bound eip address
+ *	@pm_syscall_upper_bound: Upper bound eip address
+ *
+ *	Returns: 
+ *	0, all is OK 
+ *	-ENOMEDIUM, there isn't a registered tracer
+ *	-ENXIO, wrong tracer
+ *	-EINVAL, invalid configuration
+ */
+int trace_set_config(int pm_fetch_syscall_use_depth,
+		     int pm_fetch_syscall_use_bounds,
+		     int pm_syscall_eip_depth,
+		     void *pm_syscall_lower_bound,
+		     void *pm_syscall_upper_bound)
+{
+	/* Is this a valid configuration */
+	if ((pm_fetch_syscall_use_depth && pm_fetch_syscall_use_bounds)
+	    || (pm_syscall_lower_bound > pm_syscall_upper_bound)
+	    || (pm_syscall_eip_depth < 0))
+		return -EINVAL;
+
+	/* Set the configuration */
+	fetch_syscall_eip_use_depth = pm_fetch_syscall_use_depth;
+	fetch_syscall_eip_use_bounds = pm_fetch_syscall_use_bounds;
+	syscall_eip_depth = pm_syscall_eip_depth;
+	syscall_lower_eip_bound = pm_syscall_lower_bound;
+	syscall_upper_eip_bound = pm_syscall_upper_bound;
+
+	return 0;
+}
+
+/**
+ *	trace_get_config: - Get the tracing configuration
+ *	@pm_fetch_syscall_use_depth: Use depth to fetch eip
+ *	@pm_fetch_syscall_use_bounds: Use bounds to fetch eip
+ *	@pm_syscall_eip_depth: Detph to fetch eip
+ *	@pm_syscall_lower_bound: Lower bound eip address
+ *	@pm_syscall_upper_bound: Upper bound eip address
+ *
+ *	Returns:
+ *	0, all is OK 
+ *	-ENOMEDIUM, there isn't a registered tracer
+ */
+int trace_get_config(int *pm_fetch_syscall_use_depth,
+		     int *pm_fetch_syscall_use_bounds,
+		     int *pm_syscall_eip_depth,
+		     void **pm_syscall_lower_bound,
+		     void **pm_syscall_upper_bound)
+{
+	/* Get the configuration */
+	*pm_fetch_syscall_use_depth = fetch_syscall_eip_use_depth;
+	*pm_fetch_syscall_use_bounds = fetch_syscall_eip_use_bounds;
+	*pm_syscall_eip_depth = syscall_eip_depth;
+	*pm_syscall_lower_bound = syscall_lower_eip_bound;
+	*pm_syscall_upper_bound = syscall_upper_eip_bound;
+
+	return 0;
+}
+
+/**
+ *	_trace_create_event: - Create a new traceable event type
+ *	@pm_event_type: string describing event type
+ *	@pm_event_desc: string used for standard formatting
+ *	@pm_format_type: type of formatting used to log event data
+ *	@pm_format_data: data specific to format
+ *	@pm_owner_pid: PID of event's owner (0 if none)
+ *
+ *	Returns:
+ *	New Event ID if all is OK
+ *	-ENOMEM, Unable to allocate new event
+ */
+int _trace_create_event(char *pm_event_type,
+			char *pm_event_desc,
+			int pm_format_type,
+			char *pm_format_data,
+			pid_t pm_owner_pid)
+{
+	trace_new_event *p_event;
+	struct custom_event_desc *p_new_event;
+
+	/* Create event */
+	if ((p_new_event = (struct custom_event_desc *) kmalloc(sizeof(struct custom_event_desc), GFP_ATOMIC)) == NULL)
+		 return -ENOMEM;
+	p_event = &(p_new_event->event);
+
+	/* Initialize event properties */
+	p_event->type[0] = '\0';
+	p_event->desc[0] = '\0';
+	p_event->form[0] = '\0';
+
+	/* Set basic event properties */
+	if (pm_event_type != NULL)
+		strncpy(p_event->type, pm_event_type, CUSTOM_EVENT_TYPE_STR_LEN);
+	if (pm_event_desc != NULL)
+		strncpy(p_event->desc, pm_event_desc, CUSTOM_EVENT_DESC_STR_LEN);
+	if (pm_format_data != NULL)
+		strncpy(p_event->form, pm_format_data, CUSTOM_EVENT_FORM_STR_LEN);
+
+	/* Ensure that strings are bound */
+	p_event->type[CUSTOM_EVENT_TYPE_STR_LEN - 1] = '\0';
+	p_event->desc[CUSTOM_EVENT_DESC_STR_LEN - 1] = '\0';
+	p_event->form[CUSTOM_EVENT_FORM_STR_LEN - 1] = '\0';
+
+	/* Set format type */
+	p_event->format_type = pm_format_type;
+
+	/* Give the new event a unique event ID */
+	p_event->id = next_event_id;
+	next_event_id++;
+
+	/* Set event's owner */
+	p_new_event->owner_pid = pm_owner_pid;
+
+	/* Insert new event in event list */
+	write_lock(&custom_list_lock);
+	p_new_event->next = custom_events;
+	p_new_event->prev = custom_events->prev;
+	custom_events->prev->next = p_new_event;
+	custom_events->prev = p_new_event;
+	write_unlock(&custom_list_lock);
+
+	/* Log the event creation event */
+	trace_event(TRACE_EV_NEW_EVENT, &(p_new_event->event));
+
+	return p_event->id;
+}
+int trace_create_event(char *pm_event_type,
+		       char *pm_event_desc,
+		       int pm_format_type,
+		       char *pm_format_data)
+{
+	return _trace_create_event(pm_event_type, pm_event_desc, pm_format_type, pm_format_data, 0);
+}
+int trace_create_owned_event(char *pm_event_type,
+			     char *pm_event_desc,
+			     int pm_format_type,
+			     char *pm_format_data,
+			     pid_t pm_owner_pid)
+{
+	return _trace_create_event(pm_event_type, pm_event_desc, pm_format_type, pm_format_data, pm_owner_pid);
+}
+
+/**
+ *	trace_destroy_event: - Destroy a created event type
+ *	@pm_event_id, the Id returned by trace_create_event()
+ *
+ *	No return values.
+ */
+void trace_destroy_event(int pm_event_id)
+{
+	struct custom_event_desc *p_event_desc;
+
+	write_lock(&custom_list_lock);
+
+	/* Find the event to destroy in the event description list */
+	for (p_event_desc = custom_events->next;
+	     p_event_desc != custom_events;
+	     p_event_desc = p_event_desc->next)
+		if (p_event_desc->event.id == pm_event_id)
+			break;
+
+	/* If we found something */
+	if (p_event_desc != custom_events) {
+		/* Remove the event fromt the list */
+		p_event_desc->next->prev = p_event_desc->prev;
+		p_event_desc->prev->next = p_event_desc->next;
+
+		/* Free the memory used by this event */
+		kfree(p_event_desc);
+	}
+	write_unlock(&custom_list_lock);
+}
+
+/**
+ *	trace_destroy_owners_events: Destroy an owner's events
+ *	@pm_owner_pid: the PID of the owner who's events are to be deleted.
+ *
+ *	No return values.
+ */
+void trace_destroy_owners_events(pid_t pm_owner_pid)
+{
+	struct custom_event_desc *p_temp_event;
+	struct custom_event_desc *p_event_desc;
+
+	write_lock(&custom_list_lock);
+
+	/* Start at the first event in the list */
+	p_event_desc = custom_events->next;
+
+	/* Find all events belonging to the PID */
+	while (p_event_desc != custom_events) {
+		p_temp_event = p_event_desc->next;
+
+		/* Does this event belong to the same owner */
+		if (p_event_desc->owner_pid == pm_owner_pid) {
+			/* Remove the event from the list */
+			p_event_desc->next->prev = p_event_desc->prev;
+			p_event_desc->prev->next = p_event_desc->next;
+
+			/* Free the memory used by this event */
+			kfree(p_event_desc);
+		}
+		p_event_desc = p_temp_event;
+	}
+
+	write_unlock(&custom_list_lock);
+}
+
+/**
+ *	trace_reregister_custom_events: - Relogs event creations.
+ *
+ *	Relog the declarations of custom events. This is necessary to make
+ *	sure that even though the event creation might not have taken place
+ *	during a previous trace, that all custom events be part of all traces.
+ *	Hence, if a custom event occurs during a new trace, we can be sure
+ *	that its definition will also be part of the trace.
+ *
+ *	No return values.
+ */
+void trace_reregister_custom_events(void)
+{
+	struct custom_event_desc *p_event_desc;
+
+	read_lock(&custom_list_lock);
+
+	/* Log an event creation for every description in the list */
+	for (p_event_desc = custom_events->next;
+	     p_event_desc != custom_events;
+	     p_event_desc = p_event_desc->next)
+		trace_event(TRACE_EV_NEW_EVENT, &(p_event_desc->event));
+
+	read_unlock(&custom_list_lock);
+}
+
+/**
+ *	trace_std_formatted_event: - Trace a formatted event
+ *	@pm_event_id: the event Id provided upon creation
+ *	@...: printf-like data that will be used to fill the event string.
+ *
+ *	Returns:
+ *	Trace fct return code if OK.
+ *	-ENOMEDIUM, there is no registered tracer or event doesn't exist.
+ */
+int trace_std_formatted_event(int pm_event_id,...)
+{
+	int l_string_size;	/* Size of the string outputed by vsprintf() */
+	char l_string[CUSTOM_EVENT_FINAL_STR_LEN];	/* Final formatted string */
+	va_list l_var_arg_list;	/* Variable argument list */
+	trace_custom l_custom;
+	struct custom_event_desc *p_event_desc;
+
+	read_lock(&custom_list_lock);
+
+	/* Find the event description matching this event */
+	for (p_event_desc = custom_events->next;
+	     p_event_desc != custom_events;
+	     p_event_desc = p_event_desc->next)
+		if (p_event_desc->event.id == pm_event_id)
+			break;
+
+	/* If we haven't found anything */
+	if (p_event_desc == custom_events) {
+		read_unlock(&custom_list_lock);
+
+		return -ENOMEDIUM;
+	}
+	/* Set custom event Id */
+	l_custom.id = pm_event_id;
+
+	/* Initialize variable argument list access */
+	va_start(l_var_arg_list, pm_event_id);
+
+	/* Print the description out to the temporary buffer */
+	l_string_size = vsprintf(l_string, p_event_desc->event.desc, l_var_arg_list);
+
+	read_unlock(&custom_list_lock);
+
+	/* Facilitate return to caller */
+	va_end(l_var_arg_list);
+
+	/* Set the size of the event */
+	l_custom.data_size = (u32) (l_string_size + 1);
+
+	/* Set the pointer to the event data */
+	l_custom.data = l_string;
+
+	/* Log the custom event */
+	return trace_event(TRACE_EV_CUSTOM, &l_custom);
+}
+
+/**
+ *	trace_raw_event: - Trace a raw event
+ *	@pm_event_id, the event Id provided upon creation
+ *	@pm_event_size, the size of the data provided
+ *	@pm_event_data, data buffer describing event
+ *
+ *	Returns:
+ *	Trace fct return code if OK.
+ *	-ENOMEDIUM, there is no registered tracer or event doesn't exist.
+ */
+int trace_raw_event(int pm_event_id, int pm_event_size, void *pm_event_data)
+{
+	trace_custom l_custom;
+	struct custom_event_desc *p_event_desc;
+
+	read_lock(&custom_list_lock);
+
+	/* Find the event description matching this event */
+	for (p_event_desc = custom_events->next;
+	     p_event_desc != custom_events;
+	     p_event_desc = p_event_desc->next)
+		if (p_event_desc->event.id == pm_event_id)
+			break;
+
+	read_unlock(&custom_list_lock);
+
+	/* If we haven't found anything */
+	if (p_event_desc == custom_events)
+		return -ENOMEDIUM;
+
+	/* Set custom event Id */
+	l_custom.id = pm_event_id;
+
+	/* Set the data size */
+	if (pm_event_size <= CUSTOM_EVENT_MAX_SIZE)
+		l_custom.data_size = (u32) pm_event_size;
+	else
+		l_custom.data_size = (u32) CUSTOM_EVENT_MAX_SIZE;
+
+	/* Set the pointer to the event data */
+	l_custom.data = pm_event_data;
+
+	/* Log the custom event */
+	return trace_event(TRACE_EV_CUSTOM, &l_custom);
+}
+
+/**
+ *	trace_event: - Trace an event
+ *	@pm_event_id, the event's ID (check out trace.h)
+ *	@pm_event_struct, the structure describing the event
+ *
+ *	Returns:
+ *	Trace fct return code if OK.
+ *	-ENOMEDIUM, there is no registered tracer
+ *	-ENOMEM, couldn't access ltt_info
+ */
+int trace_event(u8 pm_event_id,
+		void *pm_event_struct)
+{
+	int l_ret_value;
+
+	atomic_inc(&pending_write_count);
+
+	/* Call the tracer */
+	l_ret_value = trace(pm_event_id, 
+			    pm_event_struct, 
+			    smp_processor_id());
+	
+	atomic_dec(&pending_write_count);
+
+	return l_ret_value;
+}
+
+/**
+ *	trace_get_pending_write_count: - Get nbr pending writes.
+ *
+ *	Returns the number of trace event writes in progress.
+ */
+int trace_get_pending_write_count(void)
+{
+	return atomic_read(&pending_write_count);
+}
+
+module_init(tracer_init);
+
+/* Export symbols so that can be visible from outside this file */
+EXPORT_SYMBOL(trace_set_config);
+EXPORT_SYMBOL(trace_get_config);
+EXPORT_SYMBOL(trace_create_event);
+EXPORT_SYMBOL(trace_create_owned_event);
+EXPORT_SYMBOL(trace_destroy_event);
+EXPORT_SYMBOL(trace_destroy_owners_events);
+EXPORT_SYMBOL(trace_std_formatted_event);
+EXPORT_SYMBOL(trace_raw_event);
+EXPORT_SYMBOL(trace_event);
+
+EXPORT_SYMBOL(syscall_entry_trace_active);
+EXPORT_SYMBOL(syscall_exit_trace_active);

^ permalink raw reply	[flat|nested] only message in thread