Merge commit 'v2.6.30-rc3' into tracing/hw-branch-tracing

author Ingo Molnar <mingo@elte.hu>

Fri, 24 Apr 2009 08:11:18 +0000 (10:11 +0200)

committer Ingo Molnar <mingo@elte.hu>

Fri, 24 Apr 2009 08:11:23 +0000 (10:11 +0200)
author Ingo Molnar <mingo@elte.hu>
Fri, 24 Apr 2009 08:11:18 +0000 (10:11 +0200)
committer Ingo Molnar <mingo@elte.hu>
Fri, 24 Apr 2009 08:11:23 +0000 (10:11 +0200)
diff --combined arch/x86/include/asm/processor.h

index 7c39de7e709a995cfc03465c845191bdc6acdbf0,c2cceae709c8655338894c15b84c361e65caf727..0b2fab0051e06649a6a526b5e4d2e5b5dd2ca559
--- 1/arch/x86/include/asm/processor.h
--- 2/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@@ -138,7 -138,7 +138,7 @@@ extern struct tss_struct   doublefault_ts
   extern __u32                  cleared_cpu_caps[NCAPINTS];
   
   #ifdef CONFIG_SMP
- DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
+ DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
   #define cpu_data(cpu)         per_cpu(cpu_info, cpu)
   #define current_cpu_data      __get_cpu_var(cpu_info)
   #else
@@@ -270,7 -270,7 +270,7 @@@ struct tss_struct 
   
   } ____cacheline_aligned;
   
- DECLARE_PER_CPU(struct tss_struct, init_tss);
+ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss);
   
   /*
    * Save the original ist values for checking stack pointers during debugging
@@@ -352,6 -352,11 +352,11 @@@ struct i387_soft_struct 
         u32                     entry_eip;
   };
   
+ struct ymmh_struct {
+       /* 16 * 16 bytes for each YMMH-reg = 256 bytes */
+       u32 ymmh_space[64];
+ };
+ 
   struct xsave_hdr_struct {
         u64 xstate_bv;
         u64 reserved1[2];
@@@ -361,6 -366,7 +366,7 @@@
   struct xsave_struct {
         struct i387_fxsave_struct i387;
         struct xsave_hdr_struct xsave_hdr;
+       struct ymmh_struct ymmh;
         /* new processor state extensions will go here */
   } __attribute__ ((packed, aligned (64)));
   
@@@ -387,7 -393,7 +393,7 @@@ union irq_stack_union 
         };
   };
   
- DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
+ DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union);
   DECLARE_INIT_PER_CPU(irq_stack_union);
   
   DECLARE_PER_CPU(char *, irq_stack_ptr);
@@@ -454,8 -460,14 +460,8 @@@ struct thread_struct 
         unsigned                io_bitmap_max;
   /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
         unsigned long   debugctlmsr;
- -#ifdef CONFIG_X86_DS
- -/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
+ +      /* Debug Store context; see asm/ds.h */
         struct ds_context       *ds_ctx;
- -#endif /* CONFIG_X86_DS */
- -#ifdef CONFIG_X86_PTRACE_BTS
- -/* the signal to send on a bts buffer overflow */
- -      unsigned int    bts_ovfl_signal;
- -#endif /* CONFIG_X86_PTRACE_BTS */
   };
   
   static inline unsigned long native_get_debugreg(int regno)
@@@ -783,21 -795,6 +789,21 @@@ static inline unsigned long get_debugct
       return debugctlmsr;
   }
   
+ +static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
+ +{
+ +      u64 debugctlmsr = 0;
+ +      u32 val1, val2;
+ +
+ +#ifndef CONFIG_X86_DEBUGCTLMSR
+ +      if (boot_cpu_data.x86 < 6)
+ +              return 0;
+ +#endif
+ +      rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
+ +      debugctlmsr = val1 | ((u64)val2 << 32);
+ +
+ +      return debugctlmsr;
+ +}
+ +
   static inline void update_debugctlmsr(unsigned long debugctlmsr)
   {
   #ifndef CONFIG_X86_DEBUGCTLMSR
@@@ -807,18 -804,6 +813,18 @@@
         wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
   }
   
+ +static inline void update_debugctlmsr_on_cpu(int cpu,
+ +                                           unsigned long debugctlmsr)
+ +{
+ +#ifndef CONFIG_X86_DEBUGCTLMSR
+ +      if (boot_cpu_data.x86 < 6)
+ +              return;
+ +#endif
+ +      wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
+ +                   (u32)((u64)debugctlmsr),
+ +                   (u32)((u64)debugctlmsr >> 32));
+ +}
+ +
   /*
    * from system description table in BIOS. Mostly for MCA use, but
    * others may find it useful:
diff --combined arch/x86/kernel/ptrace.c

index b32a8ee533816eb60d28f32728dabcd9a11ade74,23b7c8f017e2afa74194c81c7720724a2604751b..d5252ae6c5205cb41face38652df00af422db52d
--- 1/arch/x86/kernel/ptrace.c
--- 2/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@@ -21,8 -21,6 +21,7 @@@
   #include <linux/audit.h>
   #include <linux/seccomp.h>
   #include <linux/signal.h>
- #include <linux/ftrace.h>
+ +#include <linux/workqueue.h>
   
   #include <asm/uaccess.h>
   #include <asm/pgtable.h>
@@@ -36,6 -34,8 +35,8 @@@
   #include <asm/proto.h>
   #include <asm/ds.h>
   
+ #include <trace/syscall.h>
+ 
   #include "tls.h"
   
   enum x86_regset {
@@@ -578,119 -578,17 +579,119 @@@ static int ioperm_get(struct task_struc
   }
   
   #ifdef CONFIG_X86_PTRACE_BTS
+ +/*
+ + * A branch trace store context.
+ + *
+ + * Contexts may only be installed by ptrace_bts_config() and only for
+ + * ptraced tasks.
+ + *
+ + * Contexts are destroyed when the tracee is detached from the tracer.
+ + * The actual destruction work requires interrupts enabled, so the
+ + * work is deferred and will be scheduled during __ptrace_unlink().
+ + *
+ + * Contexts hold an additional task_struct reference on the traced
+ + * task, as well as a reference on the tracer's mm.
+ + *
+ + * Ptrace already holds a task_struct for the duration of ptrace operations,
+ + * but since destruction is deferred, it may be executed after both
+ + * tracer and tracee exited.
+ + */
+ +struct bts_context {
+ +      /* The branch trace handle. */
+ +      struct bts_tracer       *tracer;
+ +
+ +      /* The buffer used to store the branch trace and its size. */
+ +      void                    *buffer;
+ +      unsigned int            size;
+ +
+ +      /* The mm that paid for the above buffer. */
+ +      struct mm_struct        *mm;
+ +
+ +      /* The task this context belongs to. */
+ +      struct task_struct      *task;
+ +
+ +      /* The signal to send on a bts buffer overflow. */
+ +      unsigned int            bts_ovfl_signal;
+ +
+ +      /* The work struct to destroy a context. */
+ +      struct work_struct      work;
+ +};
+ +
+ +static inline void alloc_bts_buffer(struct bts_context *context,
+ +                                  unsigned int size)
+ +{
+ +      void *buffer;
+ +
+ +      buffer = alloc_locked_buffer(size);
+ +      if (buffer) {
+ +              context->buffer = buffer;
+ +              context->size = size;
+ +              context->mm = get_task_mm(current);
+ +      }
+ +}
+ +
+ +static inline void free_bts_buffer(struct bts_context *context)
+ +{
+ +      if (!context->buffer)
+ +              return;
+ +
+ +      kfree(context->buffer);
+ +      context->buffer = NULL;
+ +
+ +      refund_locked_buffer_memory(context->mm, context->size);
+ +      context->size = 0;
+ +
+ +      mmput(context->mm);
+ +      context->mm = NULL;
+ +}
+ +
+ +static void free_bts_context_work(struct work_struct *w)
+ +{
+ +      struct bts_context *context;
+ +
+ +      context = container_of(w, struct bts_context, work);
+ +
+ +      ds_release_bts(context->tracer);
+ +      put_task_struct(context->task);
+ +      free_bts_buffer(context);
+ +      kfree(context);
+ +}
+ +
+ +static inline void free_bts_context(struct bts_context *context)
+ +{
+ +      INIT_WORK(&context->work, free_bts_context_work);
+ +      schedule_work(&context->work);
+ +}
+ +
+ +static inline struct bts_context *alloc_bts_context(struct task_struct *task)
+ +{
+ +      struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
+ +      if (context) {
+ +              context->task = task;
+ +              task->bts = context;
+ +
+ +              get_task_struct(task);
+ +      }
+ +
+ +      return context;
+ +}
+ +
   static int ptrace_bts_read_record(struct task_struct *child, size_t index,
                                   struct bts_struct __user *out)
   {
+ +      struct bts_context *context;
         const struct bts_trace *trace;
         struct bts_struct bts;
         const unsigned char *at;
         int error;
   
- -      trace = ds_read_bts(child->bts);
+ +      context = child->bts;
+ +      if (!context)
+ +              return -ESRCH;
+ +
+ +      trace = ds_read_bts(context->tracer);
         if (!trace)
- -              return -EPERM;
+ +              return -ESRCH;
   
         at = trace->ds.top - ((index + 1) * trace->ds.size);
         if ((void *)at < trace->ds.begin)
@@@ -699,7 -597,7 +700,7 @@@
         if (!trace->read)
                 return -EOPNOTSUPP;
   
- -      error = trace->read(child->bts, at, &bts);
+ +      error = trace->read(context->tracer, at, &bts);
         if (error < 0)
                 return error;
   
@@@ -713,18 -611,13 +714,18 @@@ static int ptrace_bts_drain(struct task
                             long size,
                             struct bts_struct __user *out)
   {
+ +      struct bts_context *context;
         const struct bts_trace *trace;
         const unsigned char *at;
         int error, drained = 0;
   
- -      trace = ds_read_bts(child->bts);
+ +      context = child->bts;
+ +      if (!context)
+ +              return -ESRCH;
+ +
+ +      trace = ds_read_bts(context->tracer);
         if (!trace)
- -              return -EPERM;
+ +              return -ESRCH;
   
         if (!trace->read)
                 return -EOPNOTSUPP;
@@@ -735,8 -628,9 +736,8 @@@
         for (at = trace->ds.begin; (void *)at < trace->ds.top;
              out++, drained++, at += trace->ds.size) {
                 struct bts_struct bts;
- -              int error;
   
- -              error = trace->read(child->bts, at, &bts);
+ +              error = trace->read(context->tracer, at, &bts);
                 if (error < 0)
                         return error;
   
@@@ -746,18 -640,35 +747,18 @@@
   
         memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
   
- -      error = ds_reset_bts(child->bts);
+ +      error = ds_reset_bts(context->tracer);
         if (error < 0)
                 return error;
   
         return drained;
   }
   
- -static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
- -{
- -      child->bts_buffer = alloc_locked_buffer(size);
- -      if (!child->bts_buffer)
- -              return -ENOMEM;
- -
- -      child->bts_size = size;
- -
- -      return 0;
- -}
- -
- -static void ptrace_bts_free_buffer(struct task_struct *child)
- -{
- -      free_locked_buffer(child->bts_buffer, child->bts_size);
- -      child->bts_buffer = NULL;
- -      child->bts_size = 0;
- -}
- -
   static int ptrace_bts_config(struct task_struct *child,
                              long cfg_size,
                              const struct ptrace_bts_config __user *ucfg)
   {
+ +      struct bts_context *context;
         struct ptrace_bts_config cfg;
         unsigned int flags = 0;
   
@@@ -767,31 -678,28 +768,31 @@@
         if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
                 return -EFAULT;
   
- -      if (child->bts) {
- -              ds_release_bts(child->bts);
- -              child->bts = NULL;
- -      }
+ +      context = child->bts;
+ +      if (!context)
+ +              context = alloc_bts_context(child);
+ +      if (!context)
+ +              return -ENOMEM;
   
         if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
                 if (!cfg.signal)
                         return -EINVAL;
   
- -              child->thread.bts_ovfl_signal = cfg.signal;
                 return -EOPNOTSUPP;
+ +              context->bts_ovfl_signal = cfg.signal;
         }
   
- -      if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
- -          (cfg.size != child->bts_size)) {
- -              int error;
+ +      ds_release_bts(context->tracer);
+ +      context->tracer = NULL;
   
- -              ptrace_bts_free_buffer(child);
+ +      if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
+ +              free_bts_buffer(context);
+ +              if (!cfg.size)
+ +                      return 0;
   
- -              error = ptrace_bts_allocate_buffer(child, cfg.size);
- -              if (error < 0)
- -                      return error;
+ +              alloc_bts_buffer(context, cfg.size);
+ +              if (!context->buffer)
+ +                      return -ENOMEM;
         }
   
         if (cfg.flags & PTRACE_BTS_O_TRACE)
@@@ -800,14 -708,15 +801,14 @@@
         if (cfg.flags & PTRACE_BTS_O_SCHED)
                 flags |= BTS_TIMESTAMPS;
   
- -      child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size,
- -                                  /* ovfl = */ NULL, /* th = */ (size_t)-1,
- -                                  flags);
- -      if (IS_ERR(child->bts)) {
- -              int error = PTR_ERR(child->bts);
- -
- -              ptrace_bts_free_buffer(child);
- -              child->bts = NULL;
+ +      context->tracer =
+ +              ds_request_bts_task(child, context->buffer, context->size,
+ +                                  NULL, (size_t)-1, flags);
+ +      if (unlikely(IS_ERR(context->tracer))) {
+ +              int error = PTR_ERR(context->tracer);
   
+ +              free_bts_buffer(context);
+ +              context->tracer = NULL;
                 return error;
         }
   
@@@ -818,25 -727,20 +819,25 @@@ static int ptrace_bts_status(struct tas
                              long cfg_size,
                              struct ptrace_bts_config __user *ucfg)
   {
+ +      struct bts_context *context;
         const struct bts_trace *trace;
         struct ptrace_bts_config cfg;
   
+ +      context = child->bts;
+ +      if (!context)
+ +              return -ESRCH;
+ +
         if (cfg_size < sizeof(cfg))
                 return -EIO;
   
- -      trace = ds_read_bts(child->bts);
+ +      trace = ds_read_bts(context->tracer);
         if (!trace)
- -              return -EPERM;
+ +              return -ESRCH;
   
         memset(&cfg, 0, sizeof(cfg));
- -      cfg.size = trace->ds.end - trace->ds.begin;
- -      cfg.signal = child->thread.bts_ovfl_signal;
- -      cfg.bts_size = sizeof(struct bts_struct);
+ +      cfg.size        = trace->ds.end - trace->ds.begin;
+ +      cfg.signal      = context->bts_ovfl_signal;
+ +      cfg.bts_size    = sizeof(struct bts_struct);
   
         if (cfg.signal)
                 cfg.flags |= PTRACE_BTS_O_SIGNAL;
@@@ -855,51 -759,80 +856,51 @@@
   
   static int ptrace_bts_clear(struct task_struct *child)
   {
+ +      struct bts_context *context;
         const struct bts_trace *trace;
   
- -      trace = ds_read_bts(child->bts);
+ +      context = child->bts;
+ +      if (!context)
+ +              return -ESRCH;
+ +
+ +      trace = ds_read_bts(context->tracer);
         if (!trace)
- -              return -EPERM;
+ +              return -ESRCH;
   
         memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
   
- -      return ds_reset_bts(child->bts);
+ +      return ds_reset_bts(context->tracer);
   }
   
   static int ptrace_bts_size(struct task_struct *child)
   {
+ +      struct bts_context *context;
         const struct bts_trace *trace;
   
- -      trace = ds_read_bts(child->bts);
+ +      context = child->bts;
+ +      if (!context)
+ +              return -ESRCH;
+ +
+ +      trace = ds_read_bts(context->tracer);
         if (!trace)
- -              return -EPERM;
+ +              return -ESRCH;
   
         return (trace->ds.top - trace->ds.begin) / trace->ds.size;
   }
   
- -static void ptrace_bts_fork(struct task_struct *tsk)
- -{
- -      tsk->bts = NULL;
- -      tsk->bts_buffer = NULL;
- -      tsk->bts_size = 0;
- -      tsk->thread.bts_ovfl_signal = 0;
- -}
- -
- -static void ptrace_bts_untrace(struct task_struct *child)
+ +/*
+ + * Called from __ptrace_unlink() after the child has been moved back
+ + * to its original parent.
+ + */
+ +void ptrace_bts_untrace(struct task_struct *child)
   {
         if (unlikely(child->bts)) {
- -              ds_release_bts(child->bts);
+ +              free_bts_context(child->bts);
                 child->bts = NULL;
- -
- -              /* We cannot update total_vm and locked_vm since
- -                 child's mm is already gone. But we can reclaim the
- -                 memory. */
- -              kfree(child->bts_buffer);
- -              child->bts_buffer = NULL;
- -              child->bts_size = 0;
         }
   }
- -
- -static void ptrace_bts_detach(struct task_struct *child)
- -{
- -      /*
- -       * Ptrace_detach() races with ptrace_untrace() in case
- -       * the child dies and is reaped by another thread.
- -       *
- -       * We only do the memory accounting at this point and
- -       * leave the buffer deallocation and the bts tracer
- -       * release to ptrace_bts_untrace() which will be called
- -       * later on with tasklist_lock held.
- -       */
- -      release_locked_buffer(child->bts_buffer, child->bts_size);
- -}
- -#else
- -static inline void ptrace_bts_fork(struct task_struct *tsk) {}
- -static inline void ptrace_bts_detach(struct task_struct *child) {}
- -static inline void ptrace_bts_untrace(struct task_struct *child) {}
   #endif /* CONFIG_X86_PTRACE_BTS */
   
- -void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags)
- -{
- -      ptrace_bts_fork(child);
- -}
- -
- -void x86_ptrace_untrace(struct task_struct *child)
- -{
- -      ptrace_bts_untrace(child);
- -}
- -
   /*
    * Called by kernel/ptrace.c when detaching..
    *
@@@ -911,6 -844,7 +912,6 @@@ void ptrace_disable(struct task_struct 
   #ifdef TIF_SYSCALL_EMU
         clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
   #endif
- -      ptrace_bts_detach(child);
   }
   
   #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
diff --combined include/linux/sched.h

index 451186a22ef559525e0ea5032a5eb918239eeb9c,b4c38bc8049cbbea17e0ca4f929f35df9cddbe1f..f4e3985c85930edb7f461ed8016596cabdc50dac
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -96,8 -96,8 +96,8 @@@ struct exec_domain
   struct futex_pi_state;
   struct robust_list_head;
   struct bio;
- -struct bts_tracer;
   struct fs_struct;
+ +struct bts_context;
   
   /*
    * List of flags we want to share for kernel threads,
@@@ -205,7 -205,8 +205,8 @@@ extern unsigned long long time_sync_thr
   #define task_is_stopped_or_traced(task)       \
                         ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
   #define task_contributes_to_load(task)        \
-                               ((task->state & TASK_UNINTERRUPTIBLE) != 0)
+                               ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
+                                (task->flags & PF_FROZEN) == 0)
   
   #define __set_task_state(tsk, state_value)            \
         do { (tsk)->state = (state_value); } while (0)
@@@ -300,17 -301,11 +301,11 @@@ extern int proc_dosoftlockup_thresh(str
                                     struct file *filp, void __user *buffer,
                                     size_t *lenp, loff_t *ppos);
   extern unsigned int  softlockup_panic;
- extern unsigned long sysctl_hung_task_check_count;
- extern unsigned long sysctl_hung_task_timeout_secs;
- extern unsigned long sysctl_hung_task_warnings;
   extern int softlockup_thresh;
   #else
   static inline void softlockup_tick(void)
   {
   }
- static inline void spawn_softlockup_task(void)
- {
- }
   static inline void touch_softlockup_watchdog(void)
   {
   }
@@@ -319,6 -314,15 +314,15 @@@ static inline void touch_all_softlockup
   }
   #endif
   
+ #ifdef CONFIG_DETECT_HUNG_TASK
+ extern unsigned int  sysctl_hung_task_panic;
+ extern unsigned long sysctl_hung_task_check_count;
+ extern unsigned long sysctl_hung_task_timeout_secs;
+ extern unsigned long sysctl_hung_task_warnings;
+ extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+                                        struct file *filp, void __user *buffer,
+                                        size_t *lenp, loff_t *ppos);
+ #endif
   
   /* Attach to any functions which should be ignored in wchan output. */
   #define __sched               __attribute__((__section__(".sched.text")))
@@@ -1205,11 -1209,18 +1209,11 @@@ struct task_struct 
         struct list_head ptraced;
         struct list_head ptrace_entry;
   
- -#ifdef CONFIG_X86_PTRACE_BTS
         /*
          * This is the tracer handle for the ptrace BTS extension.
          * This field actually belongs to the ptracer task.
          */
- -      struct bts_tracer *bts;
- -      /*
- -       * The buffer to hold the BTS data.
- -       */
- -      void *bts_buffer;
- -      size_t bts_size;
- -#endif /* CONFIG_X86_PTRACE_BTS */
+ +      struct bts_context *bts;
   
         /* PID/PID hash table linkage. */
         struct pid_link pids[PIDTYPE_MAX];
@@@ -1248,9 -1259,8 +1252,8 @@@
   /* ipc stuff */
         struct sysv_sem sysvsem;
   #endif
- #ifdef CONFIG_DETECT_SOFTLOCKUP
+ #ifdef CONFIG_DETECT_HUNG_TASK
   /* hung task detection */
-       unsigned long last_switch_timestamp;
         unsigned long last_switch_count;
   #endif
   /* CPU-specific state of this task */
@@@ -1287,6 -1297,11 +1290,11 @@@
   /* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
         spinlock_t alloc_lock;
   
+ #ifdef CONFIG_GENERIC_HARDIRQS
+       /* IRQ handler threads */
+       struct irqaction *irqaction;
+ #endif
+ 
         /* Protection of the PI data structures: */
         spinlock_t pi_lock;
   
@@@ -1986,10 -2001,8 +1994,10 @@@ extern void set_task_comm(struct task_s
   extern char *get_task_comm(char *to, struct task_struct *tsk);
   
   #ifdef CONFIG_SMP
+ +extern void wait_task_context_switch(struct task_struct *p);
   extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
   #else
+ +static inline void wait_task_context_switch(struct task_struct *p) {}
   static inline unsigned long wait_task_inactive(struct task_struct *p,
                                                long match_state)
   {
diff --combined kernel/Makefile

index c8e1be5f0b0018fdc9f8cd80f62af9d56ab9507d,42423665660a3d6e0a1fc6a37dd0da997643b47f..a35eee3436de081a7a35d9b8184a9349facee17d
--- 1/kernel/Makefile
--- 2/kernel/Makefile
+++ b/kernel/Makefile
@@@ -74,6 -74,7 +74,7 @@@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.
   obj-$(CONFIG_KPROBES) += kprobes.o
   obj-$(CONFIG_KGDB) += kgdb.o
   obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
+ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
   obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
   obj-$(CONFIG_SECCOMP) += seccomp.o
   obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
@@@ -92,7 -93,6 +93,7 @@@ obj-$(CONFIG_LATENCYTOP) += latencytop.
   obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
   obj-$(CONFIG_FUNCTION_TRACER) += trace/
   obj-$(CONFIG_TRACING) += trace/
+ +obj-$(CONFIG_X86_DS) += trace/
   obj-$(CONFIG_SMP) += sched_cpupri.o
   obj-$(CONFIG_SLOW_WORK) += slow-work.o
   
diff --combined kernel/fork.c

index 69bde7a22e9bfb971ecbbf79a526521edc7663dd,b9e2edd00726538467877d458d7aeb7771c03015..7d10c46cbb4e51475636c703184649b6f6e353f8
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -645,6 -645,9 +645,9 @@@ static int copy_mm(unsigned long clone_
   
         tsk->min_flt = tsk->maj_flt = 0;
         tsk->nvcsw = tsk->nivcsw = 0;
+ #ifdef CONFIG_DETECT_HUNG_TASK
+       tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+ #endif
   
         tsk->mm = NULL;
         tsk->active_mm = NULL;
@@@ -797,6 -800,12 +800,12 @@@ static void posix_cpu_timers_init_group
         sig->cputime_expires.virt_exp = cputime_zero;
         sig->cputime_expires.sched_exp = 0;
   
+       if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
+               sig->cputime_expires.prof_exp =
+                       secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
+               sig->cputimer.running = 1;
+       }
+ 
         /* The timer lists. */
         INIT_LIST_HEAD(&sig->cpu_timers[0]);
         INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@@ -812,11 -821,8 +821,8 @@@ static int copy_signal(unsigned long cl
                 atomic_inc(&current->signal->live);
                 return 0;
         }
-       sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
- 
-       if (sig)
-               posix_cpu_timers_init_group(sig);
   
+       sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
         tsk->signal = sig;
         if (!sig)
                 return -ENOMEM;
@@@ -856,6 -862,8 +862,8 @@@
         memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
         task_unlock(current->group_leader);
   
+       posix_cpu_timers_init_group(sig);
+ 
         acct_init_pacct(&sig->pacct);
   
         tty_audit_fork(sig);
@@@ -1032,11 -1040,6 +1040,6 @@@ static struct task_struct *copy_process
   
         p->default_timer_slack_ns = current->timer_slack_ns;
   
- #ifdef CONFIG_DETECT_SOFTLOCKUP
-       p->last_switch_count = 0;
-       p->last_switch_timestamp = 0;
- #endif
- 
         task_io_accounting_init(&p->ioac);
         acct_clear_integrals(p);
   
@@@ -1086,8 -1089,8 +1089,8 @@@
   #ifdef CONFIG_DEBUG_MUTEXES
         p->blocked_on = NULL; /* not blocked yet */
   #endif
- -      if (unlikely(current->ptrace))
- -              ptrace_fork(p, clone_flags);
+ +
+ +      p->bts = NULL;
   
         /* Perform scheduler related setup. Assign this task to a CPU. */
         sched_fork(p, clone_flags);
diff --combined kernel/ptrace.c

index 321127d965c29ca9e829742a9a79f0c944f4300b,dfcd83ceee3b246326cbec2a6eadb27abdba7823..4559e84f4b8a1b8c598004d95b9c62d73b0c7d7d
--- 1/kernel/ptrace.c
--- 2/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@@ -21,11 -21,19 +21,9 @@@
   #include <linux/audit.h>
   #include <linux/pid_namespace.h>
   #include <linux/syscalls.h>
- 
- #include <asm/pgtable.h>
- #include <asm/uaccess.h>
+ #include <linux/uaccess.h>
   
   
- -/*
- - * Initialize a new task whose father had been ptraced.
- - *
- - * Called from copy_process().
- - */
- -void ptrace_fork(struct task_struct *child, unsigned long clone_flags)
- -{
- -      arch_ptrace_fork(child, clone_flags);
- -}
- -
   /*
    * ptrace a task: make the debugger its new parent and
    * move it to the ptrace list.
@@@ -38,7 -46,7 +36,7 @@@ void __ptrace_link(struct task_struct *
         list_add(&child->ptrace_entry, &new_parent->ptraced);
         child->parent = new_parent;
   }
-  
+ 
   /*
    * Turn a tracing stop into a normal stop now, since with no tracer there
    * would be no way to wake it up with SIGCONT or SIGKILL.  If there was a
@@@ -163,7 -171,7 +161,7 @@@ bool ptrace_may_access(struct task_stru
         task_lock(task);
         err = __ptrace_may_access(task, mode);
         task_unlock(task);
-       return (!err ? true : false);
+       return !err;
   }
   
   int ptrace_attach(struct task_struct *task)
@@@ -348,7 -356,7 +346,7 @@@ int ptrace_readdata(struct task_struct 
                 copied += retval;
                 src += retval;
                 dst += retval;
-               len -= retval;                  
+               len -= retval;
         }
         return copied;
   }
@@@ -373,7 -381,7 +371,7 @@@ int ptrace_writedata(struct task_struc
                 copied += retval;
                 src += retval;
                 dst += retval;
-               len -= retval;                  
+               len -= retval;
         }
         return copied;
   }
@@@ -486,9 -494,9 +484,9 @@@ static int ptrace_resume(struct task_st
                 if (unlikely(!arch_has_single_step()))
                         return -EIO;
                 user_enable_single_step(child);
-       }
-       else
+       } else {
                 user_disable_single_step(child);
+       }
   
         child->exit_code = data;
         wake_up_process(child);
@@@ -596,10 -604,11 +594,11 @@@ repeat
                 ret = security_ptrace_traceme(current->parent);
   
                 /*
-                * Set the ptrace bit in the process ptrace flags.
-                * Then link us on our parent's ptraced list.
+                * Check PF_EXITING to ensure ->real_parent has not passed
+                * exit_ptrace(). Otherwise we don't report the error but
+                * pretend ->real_parent untraces us right after return.
                  */
-               if (!ret) {
+               if (!ret && !(current->real_parent->flags & PF_EXITING)) {
                         current->ptrace |= PT_PTRACED;
                         __ptrace_link(current, current->real_parent);
                 }
diff --combined kernel/sched.c

index f91bc8141dc346c018a1ab6670ce8a6f1e9ab10f,b902e587a3a03e32a3e26f676e419f692ba79145..36322e8682c70f56e23a4612993057b19f69cadd
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -1418,10 -1418,22 +1418,22 @@@ iter_move_one_task(struct rq *this_rq, 
                    struct rq_iterator *iterator);
   #endif
   
+ /* Time spent by the tasks of the cpu accounting group executing in ... */
+ enum cpuacct_stat_index {
+       CPUACCT_STAT_USER,      /* ... user mode */
+       CPUACCT_STAT_SYSTEM,    /* ... kernel mode */
+ 
+       CPUACCT_STAT_NSTATS,
+ };
+ 
   #ifdef CONFIG_CGROUP_CPUACCT
   static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+ static void cpuacct_update_stats(struct task_struct *tsk,
+               enum cpuacct_stat_index idx, cputime_t val);
   #else
   static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
+ static inline void cpuacct_update_stats(struct task_struct *tsk,
+               enum cpuacct_stat_index idx, cputime_t val) {}
   #endif
   
   static inline void inc_cpu_load(struct rq *rq, unsigned long load)
@@@ -2002,49 -2014,6 +2014,49 @@@ migrate_task(struct task_struct *p, in
         return 1;
   }
   
+ +/*
+ + * wait_task_context_switch - wait for a thread to complete at least one
+ + *                            context switch.
+ + *
+ + * @p must not be current.
+ + */
+ +void wait_task_context_switch(struct task_struct *p)
+ +{
+ +      unsigned long nvcsw, nivcsw, flags;
+ +      int running;
+ +      struct rq *rq;
+ +
+ +      nvcsw   = p->nvcsw;
+ +      nivcsw  = p->nivcsw;
+ +      for (;;) {
+ +              /*
+ +               * The runqueue is assigned before the actual context
+ +               * switch. We need to take the runqueue lock.
+ +               *
+ +               * We could check initially without the lock but it is
+ +               * very likely that we need to take the lock in every
+ +               * iteration.
+ +               */
+ +              rq = task_rq_lock(p, &flags);
+ +              running = task_running(rq, p);
+ +              task_rq_unlock(rq, &flags);
+ +
+ +              if (likely(!running))
+ +                      break;
+ +              /*
+ +               * The switch count is incremented before the actual
+ +               * context switch. We thus wait for two switches to be
+ +               * sure at least one completed.
+ +               */
+ +              if ((p->nvcsw - nvcsw) > 1)
+ +                      break;
+ +              if ((p->nivcsw - nivcsw) > 1)
+ +                      break;
+ +
+ +              cpu_relax();
+ +      }
+ +}
+ +
   /*
    * wait_task_inactive - wait for a thread to unschedule.
    *
@@@ -4554,9 -4523,25 +4566,25 @@@ DEFINE_PER_CPU(struct kernel_stat, ksta
   EXPORT_PER_CPU_SYMBOL(kstat);
   
   /*
-  * Return any ns on the sched_clock that have not yet been banked in
+  * Return any ns on the sched_clock that have not yet been accounted in
    * @p in case that task is currently running.
+  *
+  * Called with task_rq_lock() held on @rq.
    */
+ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
+ {
+       u64 ns = 0;
+ 
+       if (task_current(rq, p)) {
+               update_rq_clock(rq);
+               ns = rq->clock - p->se.exec_start;
+               if ((s64)ns < 0)
+                       ns = 0;
+       }
+ 
+       return ns;
+ }
+ 
   unsigned long long task_delta_exec(struct task_struct *p)
   {
         unsigned long flags;
@@@ -4564,16 -4549,49 +4592,49 @@@
         u64 ns = 0;
   
         rq = task_rq_lock(p, &flags);
+       ns = do_task_delta_exec(p, rq);
+       task_rq_unlock(rq, &flags);
   
-       if (task_current(rq, p)) {
-               u64 delta_exec;
+       return ns;
+ }
   
-               update_rq_clock(rq);
-               delta_exec = rq->clock - p->se.exec_start;
-               if ((s64)delta_exec > 0)
-                       ns = delta_exec;
-       }
+ /*
+  * Return accounted runtime for the task.
+  * In case the task is currently running, return the runtime plus current's
+  * pending runtime that have not been accounted yet.
+  */
+ unsigned long long task_sched_runtime(struct task_struct *p)
+ {
+       unsigned long flags;
+       struct rq *rq;
+       u64 ns = 0;
+ 
+       rq = task_rq_lock(p, &flags);
+       ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
+       task_rq_unlock(rq, &flags);
+ 
+       return ns;
+ }
+ 
+ /*
+  * Return sum_exec_runtime for the thread group.
+  * In case the task is currently running, return the sum plus current's
+  * pending runtime that have not been accounted yet.
+  *
+  * Note that the thread group might have other running tasks as well,
+  * so the return value not includes other pending runtime that other
+  * running tasks might have.
+  */
+ unsigned long long thread_group_sched_runtime(struct task_struct *p)
+ {
+       struct task_cputime totals;
+       unsigned long flags;
+       struct rq *rq;
+       u64 ns;
   
+       rq = task_rq_lock(p, &flags);
+       thread_group_cputime(p, &totals);
+       ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
         task_rq_unlock(rq, &flags);
   
         return ns;
@@@ -4602,6 -4620,8 +4663,8 @@@ void account_user_time(struct task_stru
                 cpustat->nice = cputime64_add(cpustat->nice, tmp);
         else
                 cpustat->user = cputime64_add(cpustat->user, tmp);
+ 
+       cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
         /* Account for user time used */
         acct_update_integrals(p);
   }
@@@ -4663,6 -4683,8 +4726,8 @@@ void account_system_time(struct task_st
         else
                 cpustat->system = cputime64_add(cpustat->system, tmp);
   
+       cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
+ 
         /* Account for system time used */
         acct_update_integrals(p);
   }
@@@ -4824,7 -4846,7 +4889,7 @@@ void scheduler_tick(void
   #endif
   }
   
- unsigned long get_parent_ip(unsigned long addr)
+ notrace unsigned long get_parent_ip(unsigned long addr)
   {
         if (in_lock_functions(addr)) {
                 addr = CALLER_ADDR2;
@@@ -7345,7 -7367,12 +7410,12 @@@ static int sched_domain_debug_one(struc
                 cpumask_or(groupmask, groupmask, sched_group_cpus(group));
   
                 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
+ 
                 printk(KERN_CONT " %s", str);
+               if (group->__cpu_power != SCHED_LOAD_SCALE) {
+                       printk(KERN_CONT " (__cpu_power = %d)",
+                               group->__cpu_power);
+               }
   
                 group = group->next;
         } while (group != sd->groups);
@@@ -9968,6 -9995,7 +10038,7 @@@ struct cpuacct 
         struct cgroup_subsys_state css;
         /* cpuusage holds pointer to a u64-type object on every cpu */
         u64 *cpuusage;
+       struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
         struct cpuacct *parent;
   };
   
@@@ -9992,20 -10020,32 +10063,32 @@@ static struct cgroup_subsys_state *cpua
         struct cgroup_subsys *ss, struct cgroup *cgrp)
   {
         struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+       int i;
   
         if (!ca)
-               return ERR_PTR(-ENOMEM);
+               goto out;
   
         ca->cpuusage = alloc_percpu(u64);
-       if (!ca->cpuusage) {
-               kfree(ca);
-               return ERR_PTR(-ENOMEM);
-       }
+       if (!ca->cpuusage)
+               goto out_free_ca;
+ 
+       for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
+               if (percpu_counter_init(&ca->cpustat[i], 0))
+                       goto out_free_counters;
   
         if (cgrp->parent)
                 ca->parent = cgroup_ca(cgrp->parent);
   
         return &ca->css;
+ 
+ out_free_counters:
+       while (--i >= 0)
+               percpu_counter_destroy(&ca->cpustat[i]);
+       free_percpu(ca->cpuusage);
+ out_free_ca:
+       kfree(ca);
+ out:
+       return ERR_PTR(-ENOMEM);
   }
   
   /* destroy an existing cpu accounting group */
@@@ -10013,7 -10053,10 +10096,10 @@@ static voi
   cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
   {
         struct cpuacct *ca = cgroup_ca(cgrp);
+       int i;
   
+       for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
+               percpu_counter_destroy(&ca->cpustat[i]);
         free_percpu(ca->cpuusage);
         kfree(ca);
   }
@@@ -10100,6 -10143,25 +10186,25 @@@ static int cpuacct_percpu_seq_read(stru
         return 0;
   }
   
+ static const char *cpuacct_stat_desc[] = {
+       [CPUACCT_STAT_USER] = "user",
+       [CPUACCT_STAT_SYSTEM] = "system",
+ };
+ 
+ static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
+               struct cgroup_map_cb *cb)
+ {
+       struct cpuacct *ca = cgroup_ca(cgrp);
+       int i;
+ 
+       for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
+               s64 val = percpu_counter_read(&ca->cpustat[i]);
+               val = cputime64_to_clock_t(val);
+               cb->fill(cb, cpuacct_stat_desc[i], val);
+       }
+       return 0;
+ }
+ 
   static struct cftype files[] = {
         {
                 .name = "usage",
@@@ -10110,7 -10172,10 +10215,10 @@@
                 .name = "usage_percpu",
                 .read_seq_string = cpuacct_percpu_seq_read,
         },
- 
+       {
+               .name = "stat",
+               .read_map = cpuacct_stats_show,
+       },
   };
   
   static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@@ -10132,12 -10197,38 +10240,38 @@@ static void cpuacct_charge(struct task_
                 return;
   
         cpu = task_cpu(tsk);
+ 
+       rcu_read_lock();
+ 
         ca = task_ca(tsk);
   
         for (; ca; ca = ca->parent) {
                 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
                 *cpuusage += cputime;
         }
+ 
+       rcu_read_unlock();
+ }
+ 
+ /*
+  * Charge the system/user time to the task's accounting group.
+  */
+ static void cpuacct_update_stats(struct task_struct *tsk,
+               enum cpuacct_stat_index idx, cputime_t val)
+ {
+       struct cpuacct *ca;
+ 
+       if (unlikely(!cpuacct_subsys.active))
+               return;
+ 
+       rcu_read_lock();
+       ca = task_ca(tsk);
+ 
+       do {
+               percpu_counter_add(&ca->cpustat[idx], val);
+               ca = ca->parent;
+       } while (ca);
+       rcu_read_unlock();
   }
   
   struct cgroup_subsys cpuacct_subsys = {
diff --combined kernel/trace/trace.h

index 9e15802cca9f5c81fb4723ac1e4b24e9a63e2a31,e685ac2b2ba10f1dcf24fef92180d94b75f76367..7c3f49aad6e2778ce3947a489ced53eaa9cd1af3
--- 1/kernel/trace/trace.h
--- 2/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@@ -599,12 -599,10 +599,12 @@@ extern int trace_selftest_startup_syspr
                                                struct trace_array *tr);
   extern int trace_selftest_startup_branch(struct tracer *trace,
                                          struct trace_array *tr);
+ +extern int trace_selftest_startup_hw_branches(struct tracer *trace,
+ +                                            struct trace_array *tr);
   #endif /* CONFIG_FTRACE_STARTUP_TEST */
   
   extern void *head_page(struct trace_array_cpu *data);
- extern long ns2usecs(cycle_t nsec);
+ extern unsigned long long ns2usecs(cycle_t nsec);
   extern int
   trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
   extern int
author	Ingo Molnar <mingo@elte.hu>
	Fri, 24 Apr 2009 08:11:18 +0000 (10:11 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Fri, 24 Apr 2009 08:11:23 +0000 (10:11 +0200)
		1	2
arch/x86/include/asm/processor.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/ptrace.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/ptrace.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace.h	patch \|	diff1 \|	diff2 \|	blob \| history