diff --git a/include/linux/sched.h b/include/linux/sched.h index 416f0d2..6c8ede4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1114,6 +1114,7 @@ struct sched_sporadic_data { u64 init_budget; u64 repl_period; int repl_max; + ktime_t repl_max_change; /* current values */ ktime_t act_time; @@ -1122,6 +1123,7 @@ struct sched_sporadic_data { /* timer and buffer of pending recharges. */ struct hrtimer repl_timer; + struct hrtimer exhaust_timer; struct ss_repl repl[SS_REPL_MAX]; int first_repl, last_repl; @@ -1167,6 +1169,7 @@ struct task_struct { #endif int prio, static_prio, normal_prio; + int curr_prio; unsigned int rt_priority; const struct sched_class *sched_class; struct sched_entity se; diff --git a/include/linux/tick.h b/include/linux/tick.h index b6ec818..424ffcf 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -128,4 +128,6 @@ static inline ktime_t tick_nohz_get_sleep_length(void) static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } # endif /* !NO_HZ */ +extern void tick_reprogram(u64 delay); + #endif diff --git a/include/trace/irq.h b/include/trace/irq.h index 0b25757..6e5eab6 100644 --- a/include/trace/irq.h +++ b/include/trace/irq.h @@ -11,6 +11,43 @@ DECLARE_TRACE(irq_entry, DECLARE_TRACE(irq_exit, TPPROTO(irqreturn_t retval), TPARGS(retval)); + +DECLARE_TRACE(irq_custom, + TPPROTO(u64 delta_exec), + TPARGS(delta_exec)); + +DECLARE_TRACE(irq_prio_change, + TPPROTO(int old_prio, int new_prio), + TPARGS(old_prio, new_prio)); + +DECLARE_TRACE(irq_budget, + TPPROTO(s64 budget), + TPARGS(budget)); + +DECLARE_TRACE(irq_sched_repl, + TPPROTO(s64 sched_repl), + TPARGS(sched_repl)); + +DECLARE_TRACE(irq_repl, + TPPROTO(s64 repl), + TPARGS(repl)); + +DECLARE_TRACE(irq_sched_exhaust, + TPPROTO(s64 exhaust_time), + TPARGS(exhaust_time)); + +DECLARE_TRACE(irq_error, + TPPROTO(s64 error), + TPARGS(error)); + +DECLARE_TRACE(irq_code_pt, + TPPROTO(s64 code_pt), + TPARGS(code_pt)); + +DECLARE_TRACE(irq_set_act_time, + TPPROTO(s64 act_time), + TPARGS(act_time)); + DECLARE_TRACE(irq_softirq_entry, TPPROTO(struct softirq_action *h, struct softirq_action *softirq_vec), TPARGS(h, softirq_vec)); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index eafd821..1089d81 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -25,6 +25,15 @@ */ DEFINE_TRACE(irq_entry); DEFINE_TRACE(irq_exit); +DEFINE_TRACE(irq_custom); +DEFINE_TRACE(irq_prio_change); +DEFINE_TRACE(irq_budget); +DEFINE_TRACE(irq_repl); +DEFINE_TRACE(irq_sched_repl); +DEFINE_TRACE(irq_sched_exhaust); +DEFINE_TRACE(irq_error); +DEFINE_TRACE(irq_code_pt); +DEFINE_TRACE(irq_set_act_time); /** * handle_bad_irq - handle spurious and unhandled irqs diff --git a/kernel/sched.c b/kernel/sched.c index 469aef7..a57e016 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -77,6 +77,8 @@ #include #include +#include + #include "sched_cpupri.h" /* @@ -1091,7 +1093,9 @@ static void __hrtick_start(void *arg) struct rq *rq = arg; spin_lock(&rq->lock); - hrtimer_restart(&rq->hrtick_timer); + if (hrtimer_restart(&rq->hrtick_timer)) { + trace_irq_error(1); + } rq->hrtick_csd_pending = 0; spin_unlock(&rq->lock); } @@ -1101,6 +1105,11 @@ static void __hrtick_start(void *arg) * * called with rq->lock held and irqs disabled */ +/* + * Returns: + * 0 on success + * 1 when the timer was active + */ static void hrtick_start(struct rq *rq, u64 delay) { struct hrtimer *timer = &rq->hrtick_timer; @@ -1109,7 +1118,9 @@ static void hrtick_start(struct rq *rq, u64 delay) hrtimer_set_expires(timer, time); if (rq == this_rq()) { - hrtimer_restart(timer); + if (hrtimer_restart(timer)) { + trace_irq_error(2); + } } else if (!rq->hrtick_csd_pending) { __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd); rq->hrtick_csd_pending = 1; @@ -1145,9 +1156,9 @@ static __init void init_hrtick(void) * * called with rq->lock held and irqs disabled */ -static void hrtick_start(struct rq *rq, u64 delay) +static int hrtick_start(struct rq *rq, u64 delay) { - hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL); + return hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL); } static inline void init_hrtick(void) @@ -2622,6 +2633,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) */ if (task_is_sched_sporadic(prev)) { rt_ss_destroy_repl_timer(prev->rt.ss); + rt_ss_destroy_exhaust_timer(prev->rt.ss); kfree(prev->rt.ss); } #endif @@ -4526,6 +4538,8 @@ need_resched_nonpreemptible: if (likely(prev != next)) { sched_info_switch(prev, next); + switch_tasks(prev, next); + rq->nr_switches++; rq->curr = next; ++*switch_count; @@ -5187,12 +5201,16 @@ void __init_sched_sporadic_param(struct sched_sporadic_data *ss, ss->init_budget = timespec_to_ns(¶m->sched_ss_init_budget); ss->repl_period = timespec_to_ns(¶m->sched_ss_repl_period); ss->repl_max = param->sched_ss_max_repl; + ss->repl_max_change = ns_to_ktime(0); ss->act_time = ktime_set(0, 0); ss->curr_budget = ss->init_budget; ss->depl_budget = 0; rt_ss_init_repl_timer(ss); + rt_ss_init_exhaust_timer(ss); + + ss_proc_init(); ss->first_repl = ss->last_repl = 0; ss->nr_repl = 0; diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index a47932f..e9d3345 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1,4 +1,666 @@ /* + * + * *(task_rq(p)) [address] + * sched_sporadic_data(ss) task_struct(p) ----------------------------> rq(rq) + * ^ | ^| + * | | *(rt_se_of_ss(ss)) *(rt_task_of(rt_se))|| (p->rt)[structure] + * *(rt_se->ss) | | [address] [address] || + * [address] | --------------------------------> |v + * ----------------------------------- sched_rt_entity(rt_se) + * + * + */ +s64 min_exec_budget = 800 + 600; +int allow_neg_budget = 1; +int force_coalescing = 0; +long cs_time_ns = 6500; /* about 5 - 6 microsecs (5.727) */ +s64 exhaust_wakeup_latency = 2200; /* make sure that this relates with min_exec_budget */ +long min_repl_amount = 0; +int use_budget_with_no_replenishments = 0; +long min_separation = 0; +int posix_budget_amplification_fix = 1; + +/* procfs components */ +static struct proc_dir_entry *ss_proc_allow_neg_budget = NULL; +static struct proc_dir_entry *ss_proc_force_coalescing = NULL; +static struct proc_dir_entry *ss_proc_min_budget_allow_execution = NULL; +static struct proc_dir_entry *ss_proc_context_switch_ns = NULL; +static struct proc_dir_entry *ss_proc_apply_settings = NULL; +static struct proc_dir_entry *ss_proc_exhaust_wakeup_latency = NULL; +static struct proc_dir_entry *ss_proc_min_repl_amount = NULL; +static struct proc_dir_entry *ss_proc_use_budget_with_no_replenishments = NULL; +static struct proc_dir_entry *ss_proc_min_separation = NULL; +static struct proc_dir_entry *ss_proc_posix_budget_amplification_fix = NULL; + +/* procfs stats variables */ +atomic_t ss_allow_neg_budget = ATOMIC_INIT(0); +atomic_t ss_force_coalescing = ATOMIC_INIT(0); +atomic_t ss_min_budget_allow_execution = ATOMIC_INIT(0); +atomic_t ss_context_switch_ns = ATOMIC_INIT(0); +atomic_t ss_apply_settings = ATOMIC_INIT(0); +atomic_t ss_exhaust_wakeup_latency = ATOMIC_INIT(0); +atomic_t ss_min_repl_amount = ATOMIC_INIT(0); +atomic_t ss_use_budget_with_no_replenishments = ATOMIC_INIT(0); +atomic_t ss_min_separation = ATOMIC_INIT(0); +atomic_t ss_posix_budget_amplification_fix = ATOMIC_INIT(1); + +/** + * DESCRIPTION: + * Called when writing /proc/ss_apply_settings. + */ +int ss_proc_write_apply_settings(struct file *file, const char *buffer, unsigned long count, void *data) +{ + int rtn_val = count; + + atomic_set(&ss_apply_settings, 1); + //atomic_set(&ss_apply_settings, 0); + + /* TODO: variables on left should be written atomically */ + /* disable interrupts or something */ + min_exec_budget = atomic_read(&ss_min_budget_allow_execution); + cs_time_ns = atomic_read(&ss_context_switch_ns); + force_coalescing = atomic_read(&ss_force_coalescing); + allow_neg_budget = atomic_read(&ss_allow_neg_budget); + exhaust_wakeup_latency = atomic_read(&ss_exhaust_wakeup_latency); + min_repl_amount = atomic_read(&ss_min_repl_amount); + use_budget_with_no_replenishments = atomic_read(&ss_use_budget_with_no_replenishments); + min_separation = atomic_read(&ss_min_separation); + posix_budget_amplification_fix = atomic_read(&ss_posix_budget_amplification_fix); + + return rtn_val; +} + +/** + * DESCRIPTION: + * Called when reading /proc/ss_apply_settings. + */ +int ss_proc_read_apply_settings(char *page, char **start, + off_t offset, int count, int *eof, void *data) +{ + return snprintf(page, count, "%d", atomic_read(&ss_apply_settings)); +} + +/** + * DESCRIPTION: + * Called when writing /proc/ss_posix_budget_amplification_fix. + * This is a boolean value (i.e. 0 or 1) + */ +int ss_proc_write_posix_budget_amplification_fix(struct file *file, const char *buffer, unsigned long count, void *data) +{ + int rtn_val = count; + int enabled; + char buf[2]; + char *p; + + /* count should be 2 (the number and the newline). */ + if (count < 1) { + /* TODO: is this the correct error to return*/ + return -EFAULT; + } + + if(copy_from_user(buf, buffer, 1)) { + return -EFAULT; + } + buf[1] = '\0'; + + enabled = simple_strtoul(buf, &p, 10); + if (*p) { + /* TODO: is this the correct error to return*/ + return -EFAULT; + } + + if (enabled) { + atomic_set(&ss_posix_budget_amplification_fix, 1); + } else { + atomic_set(&ss_posix_budget_amplification_fix, 0); + } + + return rtn_val; +} + +/** + * DESCRIPTION: + * Called when reading /proc/ss_posix_budget_amplification_fix. + */ +int ss_proc_read_posix_budget_amplification_fix(char *page, char **start, + off_t offset, int count, int *eof, void *data) +{ + return snprintf(page, count, "%d", atomic_read(&ss_posix_budget_amplification_fix)); +} + + + +/** + * DESCRIPTION: + * Called when writing /proc/ss_min_separation. + * This is a value of maximum int size in characters. + */ +int ss_proc_write_min_separation(struct file *file, const char *buffer, unsigned long count, void *data) +{ + int new_val; + const int max_size = 64; /* large number to be safe */ + char buf[max_size+1]; /* +1 for terminating null */ + char *p; + int copy; + long max_len; + int i; + + /* return value excludes terminating null */ + max_len = snprintf(buf, max_size, "%d", INT_MAX); + copy = min((long)count, max_len); + + /* limits on the size of count */ + /* may have newline at the end (e.g. echo 1 > /proc/foo) */ + if (count < 1 || count > max_len+1) + return -EINVAL; + + if (copy_from_user(buf, buffer, copy)) + return -EFAULT; + + buf[copy] = '\0'; + + for (i=0; i max_size+1) return -EFAULT; + + not_copied = copy_from_user(buf, buffer, copy); + if (not_copied >= copy) { + /* nothing copied */ + return -EFAULT; + } + copied = copy - not_copied; + buf[copied] = '\0'; + + for (i=0; i max_size+1) return -EFAULT; + + not_copied = copy_from_user(buf, buffer, copy); + if (not_copied >= copy) { + /* nothing copied */ + return -EFAULT; + } + copied = copy - not_copied; + buf[copied] = '\0'; + + for (i=0; i max_size+1) return -EFAULT; + + not_copied = copy_from_user(buf, buffer, copy); + if (not_copied >= copy) { + /* nothing copied */ + return -EFAULT; + } + copied = copy - not_copied; + buf[copied] = '\0'; + + for (i=0; i max_size+1) return -EFAULT; + + not_copied = copy_from_user(buf, buffer, copy); + if (not_copied >= copy) { + /* nothing copied */ + return -EFAULT; + } + copied = copy - not_copied; + buf[copied] = '\0'; + + for (i=0; iowner = THIS_MODULE; + ss_proc_allow_neg_budget->data = NULL; + ss_proc_allow_neg_budget->read_proc = ss_proc_read_allow_neg_budget; + ss_proc_allow_neg_budget->write_proc = ss_proc_write_allow_neg_budget; + } + + if (!ss_proc_force_coalescing) { + ss_proc_force_coalescing = create_proc_entry("ss_force_coalescing", 0, NULL); + ss_proc_force_coalescing->owner = THIS_MODULE; + ss_proc_force_coalescing->data = NULL; + ss_proc_force_coalescing->read_proc = ss_proc_read_force_coalescing; + ss_proc_force_coalescing->write_proc = ss_proc_write_force_coalescing; + } + + if (!ss_proc_min_budget_allow_execution) { + ss_proc_min_budget_allow_execution = create_proc_entry("ss_min_budget_allow_execution", 0, NULL); + ss_proc_min_budget_allow_execution->owner = THIS_MODULE; + ss_proc_min_budget_allow_execution->data = NULL; + ss_proc_min_budget_allow_execution->read_proc = ss_proc_read_min_budget_allow_execution; + ss_proc_min_budget_allow_execution->write_proc = ss_proc_write_min_budget_allow_execution; + } + + if (!ss_proc_context_switch_ns) { + ss_proc_context_switch_ns = create_proc_entry("ss_context_switch_ns", 0, NULL); + ss_proc_context_switch_ns->owner = THIS_MODULE; + ss_proc_context_switch_ns->data = NULL; + ss_proc_context_switch_ns->read_proc = ss_proc_read_context_switch_ns; + ss_proc_context_switch_ns->write_proc = ss_proc_write_context_switch_ns; + } + + if (!ss_proc_apply_settings) { + ss_proc_apply_settings = create_proc_entry("ss_apply_settings", 0, NULL); + ss_proc_apply_settings->owner = THIS_MODULE; + ss_proc_apply_settings->data = NULL; + ss_proc_apply_settings->read_proc = ss_proc_read_apply_settings; + ss_proc_apply_settings->write_proc = ss_proc_write_apply_settings; + } + + if (!ss_proc_exhaust_wakeup_latency) { + ss_proc_exhaust_wakeup_latency = create_proc_entry("ss_exhaust_wakeup_latency", 0, NULL); + ss_proc_exhaust_wakeup_latency->owner = THIS_MODULE; + ss_proc_exhaust_wakeup_latency->data = NULL; + ss_proc_exhaust_wakeup_latency->read_proc = ss_proc_read_exhaust_wakeup_latency; + ss_proc_exhaust_wakeup_latency->write_proc = ss_proc_write_exhaust_wakeup_latency; + } + + if (!ss_proc_min_repl_amount) { + ss_proc_min_repl_amount = create_proc_entry("ss_min_repl_amount", 0, NULL); + ss_proc_min_repl_amount->owner = THIS_MODULE; + ss_proc_min_repl_amount->data = NULL; + ss_proc_min_repl_amount->read_proc = ss_proc_read_min_repl_amount; + ss_proc_min_repl_amount->write_proc = ss_proc_write_min_repl_amount; + } + + if (!ss_proc_use_budget_with_no_replenishments) { + ss_proc_use_budget_with_no_replenishments = create_proc_entry("ss_use_budget_with_no_replenishments", 0, NULL); + ss_proc_use_budget_with_no_replenishments->owner = THIS_MODULE; + ss_proc_use_budget_with_no_replenishments->data = NULL; + ss_proc_use_budget_with_no_replenishments->read_proc = ss_proc_read_use_budget_with_no_replenishments; + ss_proc_use_budget_with_no_replenishments->write_proc = ss_proc_write_use_budget_with_no_replenishments; + } + + if (!ss_proc_min_separation) { + ss_proc_min_separation = create_proc_entry("ss_min_separation", 0, NULL); + ss_proc_min_separation->owner = THIS_MODULE; + ss_proc_min_separation->data = NULL; + ss_proc_min_separation->read_proc = ss_proc_read_min_separation; + ss_proc_min_separation->write_proc = ss_proc_write_min_separation; + } + + if (!ss_proc_posix_budget_amplification_fix) { + ss_proc_posix_budget_amplification_fix = create_proc_entry("ss_posix_budget_amplification_fix", 0, NULL); + ss_proc_posix_budget_amplification_fix->owner = THIS_MODULE; + ss_proc_posix_budget_amplification_fix->data = NULL; + ss_proc_posix_budget_amplification_fix->read_proc = ss_proc_read_posix_budget_amplification_fix; + ss_proc_posix_budget_amplification_fix->write_proc = ss_proc_write_posix_budget_amplification_fix; + } +} + +void ss_proc_cleanup(void) +{ + remove_proc_entry("ss_allow_neg_budget", NULL); + remove_proc_entry("ss_force_coalescing", NULL); + remove_proc_entry("ss_min_budget_allow_execution", NULL); + remove_proc_entry("ss_context_switch_ns", NULL); + remove_proc_entry("ss_apply_settings", NULL); + remove_proc_entry("ss_exhaust_wakeup_latency", NULL); + remove_proc_entry("ss_min_repl_amount", NULL); + remove_proc_entry("ss_use_budget_with_no_replenishments", NULL); + remove_proc_entry("ss_min_separation", NULL); + remove_proc_entry("ss_posix_budget_amplification_fix", NULL); +} + +/* * Real-Time Scheduling Class (mapped to the SCHED_FIFO, * SCHED_SPORADIC and SCHED_RR policies) */ @@ -51,6 +713,7 @@ static void update_rt_migration(struct rq *rq) } #endif /* CONFIG_SMP */ +/* TODO: can we really do this? */ static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) { return container_of(rt_se, struct task_struct, rt); @@ -510,6 +1173,19 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) static void prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio, int running); +/** + * DESCRIPTION: + * Determine whether the ss has enough budget to execute at high prioirity. + * + * @return(boolean): + * 1 - enough budget to compete at high priority + * 0 - not enough budget to compete at high priority + */ +static inline int __ss_has_exec_budget(struct sched_sporadic_data *ss) +{ + return ss->curr_budget > min_exec_budget; +} + static inline int __ss_is_sched_prio(struct sched_sporadic_data *ss) { /* @@ -517,17 +1193,61 @@ static inline int __ss_is_sched_prio(struct sched_sporadic_data *ss) * level if its budget is positive and if it has room for at least * one replenishment. */ - return ss != NULL && - ss->curr_budget > 0 && - ss->nr_repl < ss->repl_max; + int rtn_val; + + rtn_val = ss != NULL && + __ss_has_exec_budget(ss); + + if (!use_budget_with_no_replenishments) { + rtn_val = rtn_val && ss->nr_repl < ss->repl_max; + } + + return rtn_val; } +static int rt_ss_post_recharge(struct sched_sporadic_data *ss); static void __ss_switch_prio(struct task_struct *p, int new_prio) { struct rt_rq *rt_rq = rt_rq_of_se(&p->rt); struct rq *rq = rq_of_rt_rq(rt_rq); int old_prio = rt_se_prio(&p->rt); + //struct sched_rt_entity *rt_se = &p->rt; + + /* we are lowering our priority */ + /* 4 is higher prio than 94 */ + if (old_prio < new_prio) { + struct sched_rt_entity *rt_se = &p->rt; + struct sched_sporadic_data *ss = rt_se->ss; + if (force_coalescing) { + /* + * We have used all our budget, should we decrement + * max_num_repl? That is, start transition towards polling + * server + * + * NOTE: this does not change the current number of replenishments + */ + if (ktime_to_ns(ss->repl_max_change) < ktime_to_ns(ktime_get())) { + if (ss->nr_repl > 0) { + s64 prev_amt; + + trace_irq_code_pt(170); + if (ss->last_repl - 1 < 0) { + ss->last_repl = SS_REPL_MAX - 1; + } else { + ss->last_repl -= 1; + } + + prev_amt = ss->repl[ss->last_repl].amount; + ss->depl_budget += -(prev_amt); + ss->nr_repl--; + rt_ss_post_recharge(ss); + ss->repl_max_change = ktime_add_ns(ktime_get(), ss->repl_period); + } + } + } + } + trace_irq_prio_change(old_prio, new_prio); /* * Change the priority of a SCHED_SPORADIC task taking * priority inheritance into account. @@ -549,10 +1269,31 @@ void __ss_update_budget(struct sched_sporadic_data *ss, ss->depl_budget += depl_time; ss->curr_budget -= exec_time; - if (ss->curr_budget < 0) - ss->curr_budget = 0; - else if (ss->curr_budget > ss->init_budget) - ss->curr_budget = ss->init_budget; + if (!allow_neg_budget) { + if (ss->curr_budget < 0) { + /* + * To disallow negative budgets: + * Set curr_budget to zero and make sure we + * don't allow a repl. for larger than the budget + * NOTE: fuzz should also be zero if we disallow neg. budgets + */ + if (posix_budget_amplification_fix) { + ss->depl_budget -= -ss->curr_budget; + } + ss->curr_budget = 0; + } else if (ss->curr_budget > ss->init_budget) { + /* + * Replenishment operation should not allow avail. budget to exceed + * the init budget + */ + ss->curr_budget = ss->init_budget; + } + } else { + if (ss->curr_budget > ss->init_budget) { + /* with neg. budget, the curr capactiy should not exceed inti. budget */ + trace_irq_error(100); + } + } } static inline @@ -567,12 +1308,22 @@ struct sched_rt_entity *rt_se_of_ss(struct sched_sporadic_data *ss) return ss->rt_se; } +/* TODO: uses of this are wrong? */ +/* + * set_curr_task_rt + * dequeue_task_rt + * enqueue_rt_entity + */ static inline int rt_se_is_ss_sched_prio(struct sched_rt_entity *rt_se) { return __rt_se_is_ss(rt_se) && __ss_is_sched_prio(rt_se->ss); } +/* TODO: uses of this are wrong? */ +/* + * enqueue_rt_entity + */ static inline int rt_se_is_ss_low_prio(struct sched_rt_entity *rt_se) { @@ -591,11 +1342,15 @@ static inline int __rt_rq_is_ss(struct rt_rq *rt_rq) tg->ss.repl_max != -1; } +/* TODO: uses of this are wrong? */ +/* only in group scheduling now */ static inline int rt_rq_is_ss_sched_prio(struct rt_rq *rt_rq) { return __rt_rq_is_ss(rt_rq) && __ss_is_sched_prio(&rt_rq->tg->ss); } +/* TODO: uses of this are wrong? */ +/* only in group scheduling now */ static inline int rt_rq_is_ss_low_prio(struct rt_rq *rt_rq) { return __rt_rq_is_ss(rt_rq) && !__ss_is_sched_prio(&rt_rq->tg->ss); @@ -608,68 +1363,187 @@ static inline void rt_ss_set_act_time(struct sched_sporadic_data *ss) if (!ktime_to_ns(ss->act_time) == 0) return; + trace_irq_set_act_time(1); /* * Since it is possible this function being called more than * what it should be required, we record the activation time of * the SCHED_SPORADIC task or task group only if act_time has * been previously set to zero. */ + /* TODO: want the most accurate time, will this do it? */ + update_rq_clock(this_rq()); ss->act_time = hrtimer_cb_get_time(&ss->repl_timer); + if (ss->depl_budget != 0) { + if (ss->depl_budget > 0) { + /* we lost time */ + trace_irq_error(-2000); + } else if (ss->depl_budget < 0) { + trace_irq_error(-2001); + } + trace_irq_error(ss->depl_budget); + } ss->depl_budget = 0; } static int rt_ss_decr_budget(struct sched_sporadic_data *ss, u64 exec_time) { + s64 max_negative_budget = -1000; + + trace_irq_custom(exec_time); __ss_update_budget(ss, (s64)exec_time, (s64)exec_time); + trace_irq_budget(ss->curr_budget); + + if (ss->curr_budget < max_negative_budget) { + trace_irq_error(3); + } return ss->curr_budget; } static int rt_ss_repl_budget(struct sched_sporadic_data *ss, s64 repl_amount) { + trace_irq_repl(repl_amount); __ss_update_budget(ss, 0, repl_amount); + trace_irq_budget(ss->curr_budget); return ss->curr_budget; } +static inline int ss_last_repl(struct sched_sporadic_data *ss) +{ + int latest_repl = 0; + + if (ss->nr_repl > 0) { + if (ss->last_repl - 1 < 0) { + latest_repl = SS_REPL_MAX - 1; + } else { + latest_repl = ss->last_repl - 1; + } + } else { + /* could be major error */ + trace_irq_error(-9300); + } + + return latest_repl; +} + +/** + * Schedule a replenishment. + */ static int rt_ss_post_recharge(struct sched_sporadic_data *ss) { ktime_t now = hrtimer_cb_get_time(&ss->repl_timer); ktime_t repl_time; s64 repl_amount; + /* TODO: return value not used? This doesn't matter */ + s64 fuzz = min_exec_budget; + trace_irq_code_pt(150); repl_time = ktime_add(ss->act_time, ns_to_ktime(ss->repl_period)); - repl_amount = (s64)-ss->depl_budget; - ss->act_time = ns_to_ktime(0); + repl_amount = (s64)(-ss->depl_budget); + + /* check if we should have some min replenishment separation */ + /* TODO: how does this interact with the recursive call below, when force a coalesce + * because there is no more replenishment slots? Does it still work correctly? + */ + if (min_separation && ss->nr_repl > 0) { + ktime_t min_repl_time; + ktime_t max_coalesce; + ktime_t last_repl_amt; + int last_repl = ss_last_repl(ss); + + trace_irq_code_pt(151); + /* min repl time in the future, considering last repl that exists */ + min_repl_time = ktime_add(ss->repl[last_repl].instant, ns_to_ktime(min_separation)); + + /* can we combine it with another repl? */ + last_repl_amt = ns_to_ktime(-(ss->repl[last_repl].amount)); + max_coalesce = ktime_add(ss->repl[last_repl].instant, last_repl_amt); + if (ktime_to_ns(max_coalesce) >= ktime_to_ns(repl_time)) { + + trace_irq_code_pt(152); + ss->repl[last_repl].amount = ss->repl[last_repl].amount + repl_amount; + + goto post_recharge_end; + } + + if (ktime_to_ns(repl_time) < ktime_to_ns(min_repl_time)) { + trace_irq_code_pt(153); + repl_time = min_repl_time; + } + } if (ktime_us_delta(repl_time, now) <= 0) { /* replenishment time in the past, so replenish and exit. */ + /* TODO: should rarely happen and is likely an error */ + /* or it may be that we were just preempted for a long time */ + trace_irq_error(-9000); rt_ss_repl_budget(ss, repl_amount); - rt_ss_set_act_time(ss); + //rt_ss_set_act_time(ss); // TODO: should be ok, but doesn't work? goto post_recharge_end; } - if (ss->nr_repl < ss->repl_max) { - int next_repl = ss->last_repl; - ss->repl[next_repl].instant = repl_time; - ss->repl[next_repl].amount = repl_amount; - ss->last_repl = (next_repl + 1) % SS_REPL_MAX; - ss->nr_repl++; + if (repl_amount <= 0) { + if (ss->nr_repl < ss->repl_max) { + int next_repl = ss->last_repl; - if (ss->nr_repl != 1) - goto post_recharge_end; + ss->repl[next_repl].instant = repl_time; + ss->repl[next_repl].amount = repl_amount; + ss->last_repl = (next_repl + 1) % SS_REPL_MAX; + ss->nr_repl++; - /* - * We have just added a replenishment event and it is the - * only one, so we need to start the replenishment timer. - */ - BUG_ON(hrtimer_active(&ss->repl_timer)); - hrtimer_start(&ss->repl_timer, repl_time, HRTIMER_MODE_ABS); + trace_irq_sched_repl(repl_amount); + + /* + * We have other replenishments that would have been scheduled earlier. So + * just exit. + */ + if (ss->nr_repl != 1) + goto post_recharge_end; + + /* + * We have just added a replenishment event and it is the + * only one, so */ + hrtimer_start(&ss->repl_timer, repl_time, HRTIMER_MODE_ABS); + } else { + /* + * we just executed for some amount of time, but couldn't schedule + * a replenishment. Therefore, the budget will be lost. + * + * Tack it onto the previous repl maybe? + * We need the activation time (ss->act_time) + */ + trace_irq_error(-9100); + + if (ss->nr_repl > 0) { + s64 prev_amt; + + trace_irq_code_pt(9170); + if (ss->last_repl - 1 < 0) { + ss->last_repl = SS_REPL_MAX - 1; + } else { + ss->last_repl -= 1; + } + + prev_amt = ss->repl[ss->last_repl].amount; + ss->depl_budget += -(prev_amt); + ss->nr_repl--; + rt_ss_post_recharge(ss); + } else { + // need to be able to put this time somewhere, + // right now we lost time + trace_irq_error(-9200); + } + } } post_recharge_end: - return ss->curr_budget <= 0; + // put at end because of recursive call + // TODO: reorder statements to make it cleaner + ss->act_time = ns_to_ktime(0); + ss->depl_budget = 0; + return ss->curr_budget <= fuzz; } static void enqueue_rt_entity(struct sched_rt_entity *rt_se); @@ -677,41 +1551,97 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se); static void __enqueue_rt_entity(struct sched_rt_entity *rt_se); static void __dequeue_rt_entity(struct sched_rt_entity *rt_se); +/* + * P just ran and we want to update the stats. + * + * If p is was running at sched_prio then update state. + * + * @return: + * 1 - budget depleted and we put SS at low priority + * 0 - some budget still remaining, therefore keep at high priority + */ static inline -int rt_se_ss_budget_exhausted(struct sched_rt_entity *rt_se, u64 delta_exec) +int rt_se_ss_budget_exhausted(struct task_struct *p, u64 delta_exec) { + struct sched_rt_entity *rt_se = &p->rt; struct sched_sporadic_data *ss = rt_se->ss; + /* time that is too small and we cannot reasonably expect to get any useful work done */ + s64 fuzz; + fuzz = min_exec_budget; + + trace_irq_code_pt(100); + if (!__rt_se_is_ss(rt_se)) { + return 0; + } - if (likely(!rt_se_is_ss_sched_prio(rt_se))) + trace_irq_code_pt(101); + if (p->curr_prio == ss->low_priority) { + /* we were running at low priority */ + /* TODO: but we could have just been non-preemptable */ + /* so really it is what type of time are we using high priority or low */ + /* curr_prio fixes this? */ return 0; + } + + trace_irq_code_pt(102); + /* + if (!rt_se_is_ss_sched_prio(rt_se) && ss->depl_budget >=0) { + return 0; + } + */ + + /* The amount of time we exceeded our budget */ + /* s64 amt_budget_exceeded = ss->curr_budget - delta_exec; */ + /* s64 prev_remain_budget = ss->curr_budget; */ /* * If the we are running out of budget * switch the task's priority and requeue. */ rt_ss_decr_budget(ss, delta_exec); - if (ss->curr_budget <= 0 && rt_ss_post_recharge(ss)) { - dequeue_rt_stack(rt_se); - - for_each_sched_rt_entity(rt_se) { - if (!group_rt_rq(rt_se)) - __ss_switch_prio(rt_task_of(rt_se), - ss->low_priority); - __enqueue_rt_entity(rt_se); + /* + * rt_ss_post_recharge() will schedule a replenishment for + * our executed time as well as the time we exceeded the budget. + * This is alright since we are charging it for that time. That + * is, the budget is allowed to go negative. + * + * If we only want the allowed amount, use + * ss->depl_budget -= prev_remain_budget - delta_exec; + */ + /* TODO: fix this if, if, else, if... */ + if (ss->curr_budget <= fuzz) { + //rt_ss_post_recharge(ss); // schedule a replenishment + // we could also get a replenishment if one was due + if (ss->curr_budget <= fuzz) { + dequeue_rt_stack(rt_se); + + for_each_sched_rt_entity(rt_se) { + if (!group_rt_rq(rt_se)) + __ss_switch_prio(rt_task_of(rt_se), + ss->low_priority); + __enqueue_rt_entity(rt_se); + } + } else { + return 0; } - return 1; } return 0; } +/** + * Applies(consumes) a replenishment that has arrived. + */ static inline void __rt_se_ss_repl_timer(struct sched_rt_entity *rt_se) { struct sched_sporadic_data *ss = rt_se->ss; struct rt_rq *rt_rq = rt_rq_of_se(rt_se); struct rq *rq = rq_of_rt_rq(rt_rq); ktime_t now = hrtimer_cb_get_time(&ss->repl_timer); + s64 fuzz; + + fuzz = min_exec_budget; spin_lock(&rq->lock); while (ss->nr_repl > 0 && @@ -727,12 +1657,21 @@ static inline void __rt_se_ss_repl_timer(struct sched_rt_entity *rt_se) ss->first_repl = (ss->first_repl + 1) % SS_REPL_MAX; ss->nr_repl--; - if (likely(!rt_se_is_ss_low_prio(rt_se))) { - __ss_switch_prio(rt_task_of(rt_se), - rt_se->ss->priority); - if (on_rt_rq(rt_se)) - /* do not enqueue a task if it was not! */ - enqueue_rt_entity(rt_se); + /* TODO: function enough budget */ + /* can this ever be false */ + if (ss->curr_budget >= fuzz) { + struct task_struct *p = rt_task_of(rt_se); + + if (likely(p->normal_prio == ss->low_priority)) { + /* + * TODO: we increased the budget, now all associated tasks + * can run at high priority + */ + __ss_switch_prio(p, rt_se->ss->priority); + if (on_rt_rq(rt_se)) + /* do not enqueue a task if it was not! */ + enqueue_rt_entity(rt_se); + } } } spin_unlock(&rq->lock); @@ -760,6 +1699,10 @@ int rt_rq_ss_budget_exhausted(struct rt_rq *rt_rq, u64 delta_exec) return 0; } + +/* + * Only for group scheduling. + */ static inline void __rt_rq_ss_repl_timer(struct rt_rq *rt_rq) { struct task_group *tg = rt_rq->tg; @@ -790,16 +1733,154 @@ static inline void __rt_rq_ss_repl_timer(struct rt_rq *rt_rq) #endif /* CONFIG_RT_GROUP_SCHED */ +/* TODO: get rid of rq parameter */ +static void set_exhaust_timer(struct rq *rq, struct sched_sporadic_data *ss, int in_handler) +{ + /* + * TODO: choose this number dynamically by taking an average + * of the neg. budgets encountered + */ + /* + * Have to make sure this coincides with min. budget to allow execution. + * If not, we will wake up and still have enough budget to execute. + */ + long fuzz = exhaust_wakeup_latency; + /* TODO: are we losing interrupts/invocations of this function call */ + /* This is due to a minimum nsec on the expire source */ + long min_delay = 600; + s64 delay = ss->curr_budget - fuzz; + + if (delay < min_delay) { + trace_irq_error(12); + delay = min_delay; + } + + if (!in_handler) { + /* set timer for budget exhaustion */ + if (hrtimer_start(&ss->exhaust_timer, ns_to_ktime(delay), HRTIMER_MODE_REL)) { + //trace_irq_error(11); + } + } else { + ktime_t now = hrtimer_cb_get_time(&ss->exhaust_timer); + ktime_t tim = ktime_add_safe(now, ns_to_ktime(delay)); + hrtimer_set_expires(&ss->exhaust_timer, tim); + } + trace_irq_sched_exhaust(delay); +} + +static void update_ss(struct task_struct *p); +static void update_curr_rt(struct rq *rq); + +static enum hrtimer_restart rt_ss_exhaust_timer(struct hrtimer *timer) +{ + struct sched_sporadic_data *ss = + container_of(timer, struct sched_sporadic_data, exhaust_timer); + + struct sched_rt_entity *rt_se = rt_se_of_ss(ss); + struct task_struct *p = rt_task_of(rt_se); + struct rq *rq = this_rq(); + enum hrtimer_restart ret = HRTIMER_NORESTART; + + trace_irq_code_pt(110); + +#if 0 + if (rt_se) { + trace_irq_code_pt(111); + /* TODO: when exhausting ss, must check all rq's (cpu's) */ + update_ss(this_rq()->curr); + } else { + trace_irq_error(-1000); + } +#endif + + spin_lock(&rq->lock); + update_rq_clock(rq); + update_curr_rt(rq); + spin_unlock(&rq->lock); + + if (p->normal_prio == ss->priority) { + /* TODO: we still had time left and are still running at high priority, + * so we need to reset the timer, but only if we are about to run. + */ + struct task_struct *curr = rq->curr; + + if (curr == p) { + /* we may not be running */ + set_exhaust_timer(rq, ss, 1); + ret = HRTIMER_RESTART; + } + } + + return ret; +} + static enum hrtimer_restart rt_ss_repl_timer(struct hrtimer *timer) { struct sched_sporadic_data *ss; struct sched_rt_entity *rt_se; enum hrtimer_restart ret = HRTIMER_NORESTART; + struct rq *rq = this_rq(); /* HACK */ + trace_irq_code_pt(140); ss = container_of(timer, struct sched_sporadic_data, repl_timer); rt_se = rt_se_of_ss(ss); - if (rt_se) + if (rt_se) { + s64 in_budget; + s64 after_repl_budget; + struct task_struct *curr = rq->curr; + + trace_irq_code_pt(141); + + in_budget = ss->curr_budget; + update_ss(rq->curr); __rt_se_ss_repl_timer(rt_se); + after_repl_budget = ss->curr_budget; + + /* + * Want to know if coalescing of replenishments occured. + * To do this, check if the replenishment occured for the + * currently executing process. + */ + if (curr->policy == SCHED_SPORADIC) { + struct sched_rt_entity *curr_rt_se = &curr->rt; + struct sched_sporadic_data *curr_ss = curr_rt_se->ss; + + if (curr_ss == ss) { + trace_irq_error(-8000); + } + } + + if (after_repl_budget <= 0) { + /* repl did not cause execution, too neg. */ + trace_irq_error(-5000); + } + + /* + * this can happen that we get a replenishment without calling pick next task + * switch task etc. + * Ex: expire timer, decrement priority, immediately get repl., incr. priority => + * set_exhaust_timer() never gets called. + */ + set_exhaust_timer(rq, ss, 0); + + /* TODO: if we are running and have more budget, readjust + * reset exhaust timer + */ + #if 0 + /* + * HACK: This should not be needed, but will get us out + * of bad situations if we get there + * That is take advantage of the fact that we can update + * the runtime and check for over run of budget. + */ + { + struct task_struct *p = rt_task_of(rt_se); + struct rq *rq = task_rq(p); + + update_curr_rt(rq); + } + #endif + } #ifdef CONFIG_RT_GROUP_SCHED else { cpumask_t span = cpu_online_map; @@ -815,14 +1896,22 @@ static enum hrtimer_restart rt_ss_repl_timer(struct hrtimer *timer) } } #endif + // TODO: need better check if we should rearm the repl_timer if (ss->nr_repl > 0) { - timer->_expires = ss->repl[ss->first_repl].instant; + hrtimer_set_expires(timer, ss->repl[ss->first_repl].instant); ret = HRTIMER_RESTART; } return ret; } +static inline void rt_ss_init_exhaust_timer(struct sched_sporadic_data *ss) +{ + hrtimer_init(&ss->exhaust_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + ss->exhaust_timer.function = rt_ss_exhaust_timer; + ss->exhaust_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; +} + static inline void rt_ss_init_repl_timer(struct sched_sporadic_data *ss) { hrtimer_init(&ss->repl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -830,11 +1919,52 @@ static inline void rt_ss_init_repl_timer(struct sched_sporadic_data *ss) ss->repl_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; } +static inline void rt_ss_destroy_exhaust_timer(struct sched_sporadic_data *ss) +{ + hrtimer_cancel(&ss->exhaust_timer); +} + static inline void rt_ss_destroy_repl_timer(struct sched_sporadic_data *ss) { hrtimer_cancel(&ss->repl_timer); } +/** + * DESCRIPTION: + * A more lightweight version of update_curr_rt(). + */ +static void update_ss(struct task_struct *p) +{ + if (p->policy == SCHED_SPORADIC) { + + struct sched_rt_entity *rt_se = &p->rt; + struct sched_sporadic_data *ss = rt_se->ss; + struct rt_rq *rt_rq = rt_rq_of_se(rt_se); + struct rq *rq = rq_of_rt_rq(rt_rq); + u64 delta_exec; + + spin_lock(&rq->lock); + update_rq_clock(rq); + + delta_exec = rq->clock - p->se.exec_start; + p->se.exec_start = rq->clock; + + if (unlikely((s64)delta_exec < 0)) + delta_exec = 0; + + trace_irq_custom(delta_exec); + + /* + * CHECK: only decrement budget if p was + * running at high priority + */ + if (p->normal_prio == ss->priority) { + rt_ss_decr_budget(ss, delta_exec); + } + spin_unlock(&rq->lock); + } +} + #endif /* CONFIG_POSIX_SCHED_SPORADIC */ /* @@ -850,9 +1980,17 @@ static void update_curr_rt(struct rq *rq) if (!task_has_rt_policy(curr)) return; + trace_irq_code_pt(130); + delta_exec = rq->clock - curr->se.exec_start; - if (unlikely((s64)delta_exec < 0)) + if (unlikely((s64)delta_exec < 0)) { + trace_irq_code_pt(131); delta_exec = 0; + } + + if (curr->policy == SCHED_SPORADIC) { + trace_irq_custom(delta_exec); + } schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); @@ -862,13 +2000,16 @@ static void update_curr_rt(struct rq *rq) curr->se.exec_start = rq->clock; cpuacct_charge(curr, delta_exec); +/* if (!rt_bandwidth_enabled()) return; +*/ #ifdef CONFIG_POSIX_SCHED_SPORADIC /* Check for the SCHED_SPORADIC rules for the task. */ - if (rt_se_ss_budget_exhausted(rt_se, delta_exec)) + if (rt_se_ss_budget_exhausted(curr, delta_exec)) { resched_task(curr); + } #endif for_each_sched_rt_entity(rt_se) { struct rt_rq *rt_rq = rt_rq_of_se(rt_se); @@ -894,6 +2035,10 @@ static void update_curr_rt(struct rq *rq) } } +/** + * Increment the number of tasks that are runnable in the rq of the + * schedulable entity. + */ static inline void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { @@ -1036,6 +2181,9 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se) } } +/** + * Task was just made runnable (woken up). + */ static void enqueue_rt_entity(struct sched_rt_entity *rt_se) { dequeue_rt_stack(rt_se); @@ -1045,11 +2193,15 @@ static void enqueue_rt_entity(struct sched_rt_entity *rt_se) struct rt_rq *rt_rq = group_rt_rq(rt_se); if (!rt_rq) { - if (rt_se_is_ss_sched_prio(rt_se)) + // this is where we check for max number of replenishments, so + // if we execute here we will not be able to schedule a replenishment + if (rt_se_is_ss_sched_prio(rt_se)) { rt_ss_set_act_time(rt_se->ss); - else if (rt_se_is_ss_low_prio(rt_se)) - __ss_switch_prio(rt_task_of(rt_se), - rt_se->ss->low_priority); + // here is where we want a switch to only check the budget and therefore + // switch to low priority + } else if (rt_se_is_ss_low_prio(rt_se)) { + __ss_switch_prio(rt_task_of(rt_se), rt_se->ss->low_priority); + } #ifdef CONFIG_RT_GROUP_SCHED } else { if (rt_rq_is_ss_sched_prio(rt_rq)) @@ -1103,8 +2255,11 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) update_curr_rt(rq); #ifdef CONFIG_POSIX_SCHED_SPORADIC +/* + trace_irq_code_pt(180); if (rt_se_is_ss_sched_prio(rt_se)) rt_ss_post_recharge(rt_se->ss); +*/ #endif dequeue_rt_entity(rt_se); @@ -1253,6 +2408,7 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, return next; } +extern void tick_reprogram(u64 delay); static struct task_struct *pick_next_task_rt(struct rq *rq) { struct sched_rt_entity *rt_se; @@ -1278,15 +2434,72 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) if (p->policy == SCHED_SPORADIC) { struct sched_sporadic_data *ss = rt_se->ss; - - /* set timer for budget exhaustion */ - //ss = &rt_rq->tg->ss; - hrtick_start(rq, ss->curr_budget); + /* Because of the amount of time it takes to get to this point we + * can actually overrun the budget if we want to charge this time + * to the SS task. + * + * Assumes we are going to charge this time to the ss task + */ + trace_irq_code_pt(160); + rt_ss_set_act_time(ss); } return p; } +static void switch_tasks(struct task_struct *prev, struct task_struct *next) +{ + if (prev->policy == SCHED_SPORADIC) { + /* + * TODO: We are going to switch tasks. + * May want to perform accounting. + */ + struct sched_rt_entity *prev_rt_se = &prev->rt; + struct sched_sporadic_data *prev_ss = prev_rt_se->ss; + + /* won't be needing the exhaust timer if we are not running */ + hrtimer_cancel(&prev_ss->exhaust_timer); + + /* TODO: it may happen that the recharge should happen immediately + * there we should increase the priority and not switch tasks! + * This only happens if the recharge time is in the past. + */ + /* should only schedule a replenishment if we are not being preempted. */ + /* TODO: should we use prio vs. normal_prio */ + if (next->normal_prio > prev->curr_prio) { + /* incoming task(next) has a lower priority than the exiting task(prev) => not preempted */ + /* (e.g. 4 is higher prio than 94) */ + if (prev->curr_prio != prev_ss->low_priority) { + /* must have been operating at high priority also in order to recharge */ + trace_irq_code_pt(190); + rt_ss_post_recharge(prev_ss); + } + } else if (!__ss_has_exec_budget(prev_ss)) { + if (prev->curr_prio != prev_ss->low_priority) { + trace_irq_code_pt(191); + rt_ss_post_recharge(prev_ss); + } + } + } + + if (next->policy == SCHED_SPORADIC) { + struct sched_rt_entity *next_rt_se = &next->rt; + struct sched_sporadic_data *next_ss = next_rt_se->ss; + struct rq *rq = task_rq(next); + + /* decrease the budget by some constant amount */ + // TODO: change activation time? + //next_ss->act_time = ktime_sub(next_ss->act_time, ns_to_ktime(cs_time_ns)); + next_ss->depl_budget += cs_time_ns; + next_ss->curr_budget -= cs_time_ns; + + set_exhaust_timer(rq, next_ss, 0); + } + + prev->curr_prio = prev->normal_prio; + next->curr_prio = next->normal_prio; +} + static void put_prev_task_rt(struct rq *rq, struct task_struct *p) { update_curr_rt(rq); @@ -1855,6 +3068,7 @@ static void watchdog(struct rq *rq, struct task_struct *p) static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) { + trace_irq_code_pt(120); update_curr_rt(rq); watchdog(rq, p); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 342fc9c..4bcc56b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -768,3 +768,18 @@ int tick_check_oneshot_change(int allow_nohz) tick_nohz_switch_to_nohz(); return 0; } + +/** + * Allow the periodic tick timer to be rescheduled. + * + * @delay: number of nanoseconds from now the tick should expire. + */ +void tick_reprogram(u64 delay) +{ + int cpu = smp_processor_id(); + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + + hrtimer_start(&ts->sched_timer, ns_to_ktime(delay), + HRTIMER_MODE_REL); +} +EXPORT_SYMBOL_GPL(tick_reprogram); diff --git a/ltt/probes/kernel-trace.c b/ltt/probes/kernel-trace.c index 160e2b4..90e3566 100644 --- a/ltt/probes/kernel-trace.c +++ b/ltt/probes/kernel-trace.c @@ -79,6 +79,173 @@ notrace void probe_irq_exit(irqreturn_t retval) &data, sizeof(data), sizeof(data)); } +/* kernel_irq_prio_change specialized tracepoint probe */ + +void probe_irq_prio_change(int old_prio, int new_prio); + +DEFINE_MARKER_TP(kernel, irq_prio_change, irq_prio_change, probe_irq_prio_change, + "old_prio %d new_prio %d junk #2d%ld"); + +notrace void probe_irq_prio_change(int old_prio, int new_prio) +{ + struct marker *marker; + struct serialize_int_int_short data; + + data.f1 = old_prio; + data.f2 = new_prio; + data.f3 = 0; + + marker = &GET_MARKER(kernel, irq_prio_change); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, serialize_sizeof(data), sizeof(int)); +} + +/* kernel_irq_sched_exhaust specialized tracepoint probe */ + +void probe_irq_sched_exhaust(s64 exhaust_time); + +DEFINE_MARKER_TP(kernel, irq_sched_exhaust, irq_sched_exhaust, probe_irq_sched_exhaust, + "exhaust_time #8d%lld"); + +notrace void probe_irq_sched_exhaust(s64 exhaust_time) +{ + struct marker *marker; + s64 data; + + data = exhaust_time; + marker = &GET_MARKER(kernel, irq_sched_exhaust); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); +} + +/* kernel_irq_error specialized tracepoint probe */ + +void probe_irq_error(s64 error); + +DEFINE_MARKER_TP(kernel, irq_error, irq_error, probe_irq_error, + "error #8d%lld"); + +notrace void probe_irq_error(s64 error) +{ + struct marker *marker; + s64 data; + + data = error; + marker = &GET_MARKER(kernel, irq_error); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); +} + +/* kernel_irq_sched_repl specialized tracepoint probe */ + +void probe_irq_sched_repl(s64 sched_repl); + +DEFINE_MARKER_TP(kernel, irq_sched_repl, irq_sched_repl, probe_irq_sched_repl, + "sched_repl #8d%lld"); + +notrace void probe_irq_sched_repl(s64 sched_repl) +{ + struct marker *marker; + s64 data; + + data = sched_repl; + marker = &GET_MARKER(kernel, irq_sched_repl); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); +} + +/* kernel_irq_repl specialized tracepoint probe */ + +void probe_irq_repl(s64 repl); + +DEFINE_MARKER_TP(kernel, irq_repl, irq_repl, probe_irq_repl, + "repl #8d%lld"); + +notrace void probe_irq_repl(s64 repl) +{ + struct marker *marker; + s64 data; + + data = repl; + marker = &GET_MARKER(kernel, irq_repl); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); +} + +/* kernel_irq_budget specialized tracepoint probe */ + +void probe_irq_budget(s64 budget); + +DEFINE_MARKER_TP(kernel, irq_budget, irq_budget, probe_irq_budget, + "budget #8d%lld"); + +notrace void probe_irq_budget(s64 budget) +{ + struct marker *marker; + s64 data; + + data = budget; + marker = &GET_MARKER(kernel, irq_budget); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); +} + +/* kernel_irq_set_act_time specialized tracepoint probe */ + +void probe_irq_set_act_time(s64 act_time); + +DEFINE_MARKER_TP(kernel, irq_set_act_time, irq_set_act_time, probe_irq_set_act_time, + "act_time #8d%lld"); + +notrace void probe_irq_set_act_time(s64 act_time) +{ + struct marker *marker; + s64 data; + + data = act_time; + marker = &GET_MARKER(kernel, irq_set_act_time); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); +} + +/* kernel_irq_code_pt specialized tracepoint probe */ + +void probe_irq_code_pt(s64 code_pt); + +DEFINE_MARKER_TP(kernel, irq_code_pt, irq_code_pt, probe_irq_code_pt, + "code_pt #8d%lld"); + +notrace void probe_irq_code_pt(s64 code_pt) +{ + struct marker *marker; + s64 data; + + data = code_pt; + marker = &GET_MARKER(kernel, irq_code_pt); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); +} + +/* kernel_irq_custom specialized tracepoint probe */ + +void probe_irq_custom(u64 delta_exec); + +DEFINE_MARKER_TP(kernel, irq_custom, irq_custom, probe_irq_custom, + "delta_exec #8u%llu"); + +notrace void probe_irq_custom(u64 delta_exec) +{ + struct marker *marker; + u64 data; + + data = delta_exec; + + marker = &GET_MARKER(kernel, irq_custom); + ltt_specialized_trace(marker, marker->single.probe_private, + &data, sizeof(data), sizeof(data)); +} + + /* kernel_softirq_entry specialized tracepoint probe */ void probe_irq_softirq_entry(struct softirq_action *h,