Skip to content

Commit

Permalink
cputime: Safely read cputime of full dynticks CPUs
Browse files Browse the repository at this point in the history
While remotely reading the cputime of a task running in a
full dynticks CPU, the values stored in utime/stime fields
of struct task_struct may be stale. Its values may be those
of the last kernel <-> user transition time snapshot and
we need to add the tickless time spent since this snapshot.

To fix this, flush the cputime of the dynticks CPUs on
kernel <-> user transition and record the time / context
where we did this. Then on top of this snapshot and the current
time, perform the fixup on the reader side from task_times()
accessors.

Signed-off-by: Frederic Weisbecker <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Li Zhong <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Paul E. McKenney <[email protected]>
Cc: Paul Gortmaker <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Thomas Gleixner <[email protected]>
[fixed kvm module related build errors]
Signed-off-by: Sedat Dilek <[email protected]>
  • Loading branch information
fweisbec committed Jan 27, 2013
1 parent c11f11f commit 6a61671
Show file tree
Hide file tree
Showing 11 changed files with 290 additions and 52 deletions.
6 changes: 3 additions & 3 deletions arch/s390/kernel/vtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ void vtime_account_user(struct task_struct *tsk)
* Update process times based on virtual cpu times stored by entry.S
* to the lowcore fields user_timer, system_timer & steal_clock.
*/
void vtime_account(struct task_struct *tsk)
void vtime_account_irq_enter(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
u64 timer, system;
Expand All @@ -145,10 +145,10 @@ void vtime_account(struct task_struct *tsk)

virt_timer_forward(system);
}
EXPORT_SYMBOL_GPL(vtime_account);
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);

void vtime_account_system(struct task_struct *tsk)
__attribute__((alias("vtime_account")));
__attribute__((alias("vtime_account_irq_enter")));
EXPORT_SYMBOL_GPL(vtime_account_system);

void __kprobes vtime_stop_cpu(void)
Expand Down
4 changes: 2 additions & 2 deletions include/linux/hardirq.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void);
*/
#define __irq_enter() \
do { \
vtime_account_irq_enter(current); \
account_irq_enter_time(current); \
add_preempt_count(HARDIRQ_OFFSET); \
trace_hardirq_enter(); \
} while (0)
Expand All @@ -169,7 +169,7 @@ extern void irq_enter(void);
#define __irq_exit() \
do { \
trace_hardirq_exit(); \
vtime_account_irq_exit(current); \
account_irq_exit_time(current); \
sub_preempt_count(HARDIRQ_OFFSET); \
} while (0)

Expand Down
11 changes: 11 additions & 0 deletions include/linux/init_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
#include <linux/securebits.h>
#include <linux/seqlock.h>
#include <net/net_namespace.h>

#ifdef CONFIG_SMP
Expand Down Expand Up @@ -141,6 +142,15 @@ extern struct task_group root_task_group;
# define INIT_PERF_EVENTS(tsk)
#endif

#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
# define INIT_VTIME(tsk) \
.vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \
.vtime_snap = 0, \
.vtime_snap_whence = VTIME_SYS,
#else
# define INIT_VTIME(tsk)
#endif

#define INIT_TASK_COMM "swapper"

/*
Expand Down Expand Up @@ -210,6 +220,7 @@ extern struct task_group root_task_group;
INIT_TRACE_RECURSION \
INIT_TASK_RCU_PREEMPT(tsk) \
INIT_CPUSET_SEQ \
INIT_VTIME(tsk) \
}


Expand Down
20 changes: 18 additions & 2 deletions include/linux/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,7 @@ static inline int kvm_deassign_device(struct kvm *kvm,
}
#endif /* CONFIG_IOMMU_API */

static inline void guest_enter(void)
static inline void __guest_enter(void)
{
/*
* This is running in ioctl context so we can avoid
Expand All @@ -751,7 +751,7 @@ static inline void guest_enter(void)
current->flags |= PF_VCPU;
}

static inline void guest_exit(void)
static inline void __guest_exit(void)
{
/*
* This is running in ioctl context so we can avoid
Expand All @@ -761,6 +761,22 @@ static inline void guest_exit(void)
current->flags &= ~PF_VCPU;
}

#ifdef CONFIG_CONTEXT_TRACKING
extern void guest_enter(void);
extern void guest_exit(void);

#else /* !CONFIG_CONTEXT_TRACKING */
static inline void guest_enter(void)
{
__guest_enter();
}

static inline void guest_exit(void)
{
__guest_exit();
}
#endif /* !CONFIG_CONTEXT_TRACKING */

static inline void kvm_guest_enter(void)
{
unsigned long flags;
Expand Down
27 changes: 22 additions & 5 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1367,6 +1367,15 @@ struct task_struct {
cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
struct cputime prev_cputime;
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
seqlock_t vtime_seqlock;
unsigned long long vtime_snap;
enum {
VTIME_SLEEPING = 0,
VTIME_USER,
VTIME_SYS,
} vtime_snap_whence;
#endif
unsigned long nvcsw, nivcsw; /* context switch counts */
struct timespec start_time; /* monotonic time */
Expand Down Expand Up @@ -1792,11 +1801,13 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t);
}

static inline cputime_t task_gtime(struct task_struct *t)
{
return t->gtime;
}

#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern void task_cputime(struct task_struct *t,
cputime_t *utime, cputime_t *stime);
extern void task_cputime_scaled(struct task_struct *t,
cputime_t *utimescaled, cputime_t *stimescaled);
extern cputime_t task_gtime(struct task_struct *t);
#else
static inline void task_cputime(struct task_struct *t,
cputime_t *utime, cputime_t *stime)
{
Expand All @@ -1815,6 +1826,12 @@ static inline void task_cputime_scaled(struct task_struct *t,
if (stimescaled)
*stimescaled = t->stimescaled;
}

static inline cputime_t task_gtime(struct task_struct *t)
{
return t->gtime;
}
#endif
extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);

Expand Down
47 changes: 23 additions & 24 deletions include/linux/vtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,35 +8,44 @@ extern void vtime_task_switch(struct task_struct *prev);
extern void vtime_account_system(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk);
extern void vtime_account_user(struct task_struct *tsk);
extern void vtime_account(struct task_struct *tsk);
extern void vtime_account_irq_enter(struct task_struct *tsk);

#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern bool vtime_accounting_enabled(void);
#else
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static inline bool vtime_accounting_enabled(void) { return true; }
#endif

#else /* !CONFIG_VIRT_CPU_ACCOUNTING */

static inline void vtime_task_switch(struct task_struct *prev) { }
static inline void vtime_account_system(struct task_struct *tsk) { }
static inline void vtime_account_user(struct task_struct *tsk) { }
static inline void vtime_account(struct task_struct *tsk) { }
static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
static inline bool vtime_accounting_enabled(void) { return false; }
#endif

#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
static inline void vtime_user_enter(struct task_struct *tsk)
{
vtime_account_system(tsk);
}
extern void arch_vtime_task_switch(struct task_struct *tsk);
extern void vtime_account_irq_exit(struct task_struct *tsk);
extern bool vtime_accounting_enabled(void);
extern void vtime_user_enter(struct task_struct *tsk);
static inline void vtime_user_exit(struct task_struct *tsk)
{
vtime_account_user(tsk);
}
extern void vtime_guest_enter(struct task_struct *tsk);
extern void vtime_guest_exit(struct task_struct *tsk);
extern void vtime_init_idle(struct task_struct *tsk);
#else
static inline void vtime_account_irq_exit(struct task_struct *tsk)
{
/* On hard|softirq exit we always account to hard|softirq cputime */
vtime_account_system(tsk);
}
static inline void vtime_user_enter(struct task_struct *tsk) { }
static inline void vtime_user_exit(struct task_struct *tsk) { }
static inline void vtime_guest_enter(struct task_struct *tsk) { }
static inline void vtime_guest_exit(struct task_struct *tsk) { }
static inline void vtime_init_idle(struct task_struct *tsk) { }
#endif

#ifdef CONFIG_IRQ_TIME_ACCOUNTING
Expand All @@ -45,25 +54,15 @@ extern void irqtime_account_irq(struct task_struct *tsk);
static inline void irqtime_account_irq(struct task_struct *tsk) { }
#endif

static inline void vtime_account_irq_enter(struct task_struct *tsk)
static inline void account_irq_enter_time(struct task_struct *tsk)
{
/*
* Hardirq can interrupt idle task anytime. So we need vtime_account()
* that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING.
* Softirq can also interrupt idle task directly if it calls
* local_bh_enable(). Such case probably don't exist but we never know.
* Ksoftirqd is not concerned because idle time is flushed on context
* switch. Softirqs in the end of hardirqs are also not a problem because
* the idle time is flushed on hardirq time already.
*/
vtime_account(tsk);
vtime_account_irq_enter(tsk);
irqtime_account_irq(tsk);
}

static inline void vtime_account_irq_exit(struct task_struct *tsk)
static inline void account_irq_exit_time(struct task_struct *tsk)
{
/* On hard|softirq exit we always account to hard|softirq cputime */
vtime_account_system(tsk);
vtime_account_irq_exit(tsk);
irqtime_account_irq(tsk);
}

Expand Down
21 changes: 20 additions & 1 deletion kernel/context_tracking.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#include <linux/context_tracking.h>
#include <linux/kvm_host.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/hardirq.h>

#include <linux/export.h>

DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
#ifdef CONFIG_CONTEXT_TRACKING_FORCE
Expand Down Expand Up @@ -61,6 +62,24 @@ void user_exit(void)
local_irq_restore(flags);
}

void guest_enter(void)
{
if (vtime_accounting_enabled())
vtime_guest_enter(current);
else
__guest_enter();
}
EXPORT_SYMBOL_GPL(guest_enter);

void guest_exit(void)
{
if (vtime_accounting_enabled())
vtime_guest_exit(current);
else
__guest_exit();
}
EXPORT_SYMBOL_GPL(guest_exit);

void context_tracking_task_switch(struct task_struct *prev,
struct task_struct *next)
{
Expand Down
6 changes: 6 additions & 0 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -1233,6 +1233,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
p->prev_cputime.utime = p->prev_cputime.stime = 0;
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
seqlock_init(&p->vtime_seqlock);
p->vtime_snap = 0;
p->vtime_snap_whence = VTIME_SLEEPING;
#endif

#if defined(SPLIT_RSS_COUNTING)
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
#endif
Expand Down
1 change: 1 addition & 0 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -4666,6 +4666,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
*/
idle->sched_class = &idle_sched_class;
ftrace_graph_init_idle_task(idle, cpu);
vtime_init_idle(idle);
#if defined(CONFIG_SMP)
sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
#endif
Expand Down
Loading

0 comments on commit 6a61671

Please sign in to comment.