aboutsummaryrefslogtreecommitdiff
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/Kconfig5
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/alarmtimer.c96
-rw-r--r--kernel/time/clockevents.c42
-rw-r--r--kernel/time/clocksource.c40
-rw-r--r--kernel/time/hrtimer.c234
-rw-r--r--kernel/time/itimer.c22
-rw-r--r--kernel/time/ntp.c840
-rw-r--r--kernel/time/posix-cpu-timers.c72
-rw-r--r--kernel/time/posix-timers.c267
-rw-r--r--kernel/time/posix-timers.h8
-rw-r--r--kernel/time/sched_clock.c34
-rw-r--r--kernel/time/sleep_timeout.c377
-rw-r--r--kernel/time/tick-internal.h3
-rw-r--r--kernel/time/tick-sched.c27
-rw-r--r--kernel/time/time.c20
-rw-r--r--kernel/time/timekeeping.c649
-rw-r--r--kernel/time/timekeeping_debug.c13
-rw-r--r--kernel/time/timekeeping_internal.h25
-rw-r--r--kernel/time/timer.c197
-rw-r--r--kernel/time/vsyscall.c7
21 files changed, 1523 insertions, 1457 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 8ebb6d5a106b..b0b97a60aaa6 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -17,11 +17,6 @@ config ARCH_CLOCKSOURCE_DATA
config ARCH_CLOCKSOURCE_INIT
bool
-# Clocksources require validation of the clocksource against the last
-# cycle update - x86/TSC misfeature
-config CLOCKSOURCE_VALIDATE_LAST_CYCLE
- bool
-
# Timekeeping vsyscall support
config GENERIC_TIME_VSYSCALL
bool
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 4af2a264a160..fe0ae82124fe 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y += time.o timer.o hrtimer.o
+obj-y += time.o timer.o hrtimer.o sleep_timeout.o
obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
obj-y += timeconv.o timecounter.o alarmtimer.o
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 8bf888641694..0ddccdff119a 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -197,28 +197,15 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
{
struct alarm *alarm = container_of(timer, struct alarm, timer);
struct alarm_base *base = &alarm_bases[alarm->type];
- unsigned long flags;
- int ret = HRTIMER_NORESTART;
- int restart = ALARMTIMER_NORESTART;
- spin_lock_irqsave(&base->lock, flags);
- alarmtimer_dequeue(base, alarm);
- spin_unlock_irqrestore(&base->lock, flags);
+ scoped_guard (spinlock_irqsave, &base->lock)
+ alarmtimer_dequeue(base, alarm);
if (alarm->function)
- restart = alarm->function(alarm, base->get_ktime());
-
- spin_lock_irqsave(&base->lock, flags);
- if (restart != ALARMTIMER_NORESTART) {
- hrtimer_set_expires(&alarm->timer, alarm->node.expires);
- alarmtimer_enqueue(base, alarm);
- ret = HRTIMER_RESTART;
- }
- spin_unlock_irqrestore(&base->lock, flags);
+ alarm->function(alarm, base->get_ktime());
trace_alarmtimer_fired(alarm, base->get_ktime());
- return ret;
-
+ return HRTIMER_NORESTART;
}
ktime_t alarm_expires_remaining(const struct alarm *alarm)
@@ -334,10 +321,9 @@ static int alarmtimer_resume(struct device *dev)
static void
__alarm_init(struct alarm *alarm, enum alarmtimer_type type,
- enum alarmtimer_restart (*function)(struct alarm *, ktime_t))
+ void (*function)(struct alarm *, ktime_t))
{
timerqueue_init(&alarm->node);
- alarm->timer.function = alarmtimer_fired;
alarm->function = function;
alarm->type = type;
alarm->state = ALARMTIMER_STATE_INACTIVE;
@@ -350,10 +336,10 @@ __alarm_init(struct alarm *alarm, enum alarmtimer_type type,
* @function: callback that is run when the alarm fires
*/
void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
- enum alarmtimer_restart (*function)(struct alarm *, ktime_t))
+ void (*function)(struct alarm *, ktime_t))
{
- hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid,
- HRTIMER_MODE_ABS);
+ hrtimer_setup(&alarm->timer, alarmtimer_fired, alarm_bases[type].base_clockid,
+ HRTIMER_MODE_ABS);
__alarm_init(alarm, type, function);
}
EXPORT_SYMBOL_GPL(alarm_init);
@@ -480,35 +466,11 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
}
EXPORT_SYMBOL_GPL(alarm_forward);
-static u64 __alarm_forward_now(struct alarm *alarm, ktime_t interval, bool throttle)
+u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
{
struct alarm_base *base = &alarm_bases[alarm->type];
- ktime_t now = base->get_ktime();
-
- if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && throttle) {
- /*
- * Same issue as with posix_timer_fn(). Timers which are
- * periodic but the signal is ignored can starve the system
- * with a very small interval. The real fix which was
- * promised in the context of posix_timer_fn() never
- * materialized, but someone should really work on it.
- *
- * To prevent DOS fake @now to be 1 jiffy out which keeps
- * the overrun accounting correct but creates an
- * inconsistency vs. timer_gettime(2).
- */
- ktime_t kj = NSEC_PER_SEC / HZ;
- if (interval < kj)
- now = ktime_add(now, kj);
- }
-
- return alarm_forward(alarm, now, interval);
-}
-
-u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
-{
- return __alarm_forward_now(alarm, interval, false);
+ return alarm_forward(alarm, base->get_ktime(), interval);
}
EXPORT_SYMBOL_GPL(alarm_forward_now);
@@ -567,30 +529,12 @@ static enum alarmtimer_type clock2alarm(clockid_t clockid)
*
* Return: whether the timer is to be restarted
*/
-static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
- ktime_t now)
+static void alarm_handle_timer(struct alarm *alarm, ktime_t now)
{
- struct k_itimer *ptr = container_of(alarm, struct k_itimer,
- it.alarm.alarmtimer);
- enum alarmtimer_restart result = ALARMTIMER_NORESTART;
- unsigned long flags;
+ struct k_itimer *ptr = container_of(alarm, struct k_itimer, it.alarm.alarmtimer);
- spin_lock_irqsave(&ptr->it_lock, flags);
-
- if (posix_timer_queue_signal(ptr) && ptr->it_interval) {
- /*
- * Handle ignored signals and rearm the timer. This will go
- * away once we handle ignored signals proper. Ensure that
- * small intervals cannot starve the system.
- */
- ptr->it_overrun += __alarm_forward_now(alarm, ptr->it_interval, true);
- ++ptr->it_requeue_pending;
- ptr->it_active = 1;
- result = ALARMTIMER_RESTART;
- }
- spin_unlock_irqrestore(&ptr->it_lock, flags);
-
- return result;
+ guard(spinlock_irqsave)(&ptr->it_lock);
+ posix_timer_queue_signal(ptr);
}
/**
@@ -751,18 +695,14 @@ static int alarm_timer_create(struct k_itimer *new_timer)
* @now: time at the timer expiration
*
* Wakes up the task that set the alarmtimer
- *
- * Return: ALARMTIMER_NORESTART
*/
-static enum alarmtimer_restart alarmtimer_nsleep_wakeup(struct alarm *alarm,
- ktime_t now)
+static void alarmtimer_nsleep_wakeup(struct alarm *alarm, ktime_t now)
{
struct task_struct *task = alarm->data;
alarm->data = NULL;
if (task)
wake_up_process(task);
- return ALARMTIMER_NORESTART;
}
/**
@@ -814,10 +754,10 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp,
static void
alarm_init_on_stack(struct alarm *alarm, enum alarmtimer_type type,
- enum alarmtimer_restart (*function)(struct alarm *, ktime_t))
+ void (*function)(struct alarm *, ktime_t))
{
- hrtimer_init_on_stack(&alarm->timer, alarm_bases[type].base_clockid,
- HRTIMER_MODE_ABS);
+ hrtimer_setup_on_stack(&alarm->timer, alarmtimer_fired, alarm_bases[type].base_clockid,
+ HRTIMER_MODE_ABS);
__alarm_init(alarm, type, function);
}
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 78c7bd64d0dd..f3e831f62906 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -337,13 +337,21 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
}
/*
- * Called after a notify add to make devices available which were
- * released from the notifier call.
+ * Called after a clockevent has been added which might
+ * have replaced a current regular or broadcast device. A
+ * released normal device might be a suitable replacement
+ * for the current broadcast device. Similarly a released
+ * broadcast device might be a suitable replacement for a
+ * normal device.
*/
static void clockevents_notify_released(void)
{
struct clock_event_device *dev;
+ /*
+ * Keep iterating as long as tick_check_new_device()
+ * replaces a device.
+ */
while (!list_empty(&clockevents_released)) {
dev = list_entry(clockevents_released.next,
struct clock_event_device, list);
@@ -610,39 +618,30 @@ void clockevents_resume(void)
#ifdef CONFIG_HOTPLUG_CPU
-# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
/**
- * tick_offline_cpu - Take CPU out of the broadcast mechanism
+ * tick_offline_cpu - Shutdown all clock events related
+ * to this CPU and take it out of the
+ * broadcast mechanism.
* @cpu: The outgoing CPU
*
- * Called on the outgoing CPU after it took itself offline.
+ * Called by the dying CPU during teardown.
*/
void tick_offline_cpu(unsigned int cpu)
{
- raw_spin_lock(&clockevents_lock);
- tick_broadcast_offline(cpu);
- raw_spin_unlock(&clockevents_lock);
-}
-# endif
-
-/**
- * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu
- * @cpu: The dead CPU
- */
-void tick_cleanup_dead_cpu(int cpu)
-{
struct clock_event_device *dev, *tmp;
- unsigned long flags;
- raw_spin_lock_irqsave(&clockevents_lock, flags);
+ raw_spin_lock(&clockevents_lock);
+ tick_broadcast_offline(cpu);
tick_shutdown(cpu);
+
/*
* Unregister the clock event devices which were
- * released from the users in the notify chain.
+ * released above.
*/
list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
list_del(&dev->list);
+
/*
* Now check whether the CPU has left unused per cpu devices
*/
@@ -654,7 +653,8 @@ void tick_cleanup_dead_cpu(int cpu)
list_del(&dev->list);
}
}
- raw_spin_unlock_irqrestore(&clockevents_lock, flags);
+
+ raw_spin_unlock(&clockevents_lock);
}
#endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 23336eecb4f4..aab6472853fa 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -20,6 +20,8 @@
#include "tick-internal.h"
#include "timekeeping_internal.h"
+static void clocksource_enqueue(struct clocksource *cs);
+
static noinline u64 cycles_to_nsec_safe(struct clocksource *cs, u64 start, u64 end)
{
u64 delta = clocksource_delta(end, start, cs->mask);
@@ -171,7 +173,6 @@ static inline void clocksource_watchdog_unlock(unsigned long *flags)
}
static int clocksource_watchdog_kthread(void *data);
-static void __clocksource_change_rating(struct clocksource *cs, int rating);
static void clocksource_watchdog_work(struct work_struct *work)
{
@@ -191,6 +192,13 @@ static void clocksource_watchdog_work(struct work_struct *work)
kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
}
+static void clocksource_change_rating(struct clocksource *cs, int rating)
+{
+ list_del(&cs->list);
+ cs->rating = rating;
+ clocksource_enqueue(cs);
+}
+
static void __clocksource_unstable(struct clocksource *cs)
{
cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
@@ -697,7 +705,7 @@ static int __clocksource_watchdog_kthread(void)
list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
list_del_init(&cs->wd_list);
- __clocksource_change_rating(cs, 0);
+ clocksource_change_rating(cs, 0);
select = 1;
}
if (cs->flags & CLOCK_SOURCE_RESELECT) {
@@ -1255,34 +1263,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
}
EXPORT_SYMBOL_GPL(__clocksource_register_scale);
-static void __clocksource_change_rating(struct clocksource *cs, int rating)
-{
- list_del(&cs->list);
- cs->rating = rating;
- clocksource_enqueue(cs);
-}
-
-/**
- * clocksource_change_rating - Change the rating of a registered clocksource
- * @cs: clocksource to be changed
- * @rating: new rating
- */
-void clocksource_change_rating(struct clocksource *cs, int rating)
-{
- unsigned long flags;
-
- mutex_lock(&clocksource_mutex);
- clocksource_watchdog_lock(&flags);
- __clocksource_change_rating(cs, rating);
- clocksource_watchdog_unlock(&flags);
-
- clocksource_select();
- clocksource_select_watchdog(false);
- clocksource_suspend_select(false);
- mutex_unlock(&clocksource_mutex);
-}
-EXPORT_SYMBOL(clocksource_change_rating);
-
/*
* Unbind clocksource @cs. Called with clocksource_mutex held
*/
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index cddcd08ea827..80fe3749d2db 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -417,6 +417,11 @@ static inline void debug_hrtimer_init(struct hrtimer *timer)
debug_object_init(timer, &hrtimer_debug_descr);
}
+static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer)
+{
+ debug_object_init_on_stack(timer, &hrtimer_debug_descr);
+}
+
static inline void debug_hrtimer_activate(struct hrtimer *timer,
enum hrtimer_mode mode)
{
@@ -428,28 +433,6 @@ static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
debug_object_deactivate(timer, &hrtimer_debug_descr);
}
-static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
- enum hrtimer_mode mode);
-
-void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
- enum hrtimer_mode mode)
-{
- debug_object_init_on_stack(timer, &hrtimer_debug_descr);
- __hrtimer_init(timer, clock_id, mode);
-}
-EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
-
-static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
- clockid_t clock_id, enum hrtimer_mode mode);
-
-void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
- clockid_t clock_id, enum hrtimer_mode mode)
-{
- debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
- __hrtimer_init_sleeper(sl, clock_id, mode);
-}
-EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
-
void destroy_hrtimer_on_stack(struct hrtimer *timer)
{
debug_object_free(timer, &hrtimer_debug_descr);
@@ -459,6 +442,7 @@ EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack);
#else
static inline void debug_hrtimer_init(struct hrtimer *timer) { }
+static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer) { }
static inline void debug_hrtimer_activate(struct hrtimer *timer,
enum hrtimer_mode mode) { }
static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
@@ -472,6 +456,13 @@ debug_init(struct hrtimer *timer, clockid_t clockid,
trace_hrtimer_init(timer, clockid, mode);
}
+static inline void debug_init_on_stack(struct hrtimer *timer, clockid_t clockid,
+ enum hrtimer_mode mode)
+{
+ debug_hrtimer_init_on_stack(timer);
+ trace_hrtimer_init(timer, clockid, mode);
+}
+
static inline void debug_activate(struct hrtimer *timer,
enum hrtimer_mode mode)
{
@@ -1544,6 +1535,11 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id)
return HRTIMER_BASE_MONOTONIC;
}
+static enum hrtimer_restart hrtimer_dummy_timeout(struct hrtimer *unused)
+{
+ return HRTIMER_NORESTART;
+}
+
static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
enum hrtimer_mode mode)
{
@@ -1580,6 +1576,18 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
timerqueue_init(&timer->node);
}
+static void __hrtimer_setup(struct hrtimer *timer,
+ enum hrtimer_restart (*function)(struct hrtimer *),
+ clockid_t clock_id, enum hrtimer_mode mode)
+{
+ __hrtimer_init(timer, clock_id, mode);
+
+ if (WARN_ON_ONCE(!function))
+ timer->function = hrtimer_dummy_timeout;
+ else
+ timer->function = function;
+}
+
/**
* hrtimer_init - initialize a timer to the given clock
* @timer: the timer to be initialized
@@ -1600,6 +1608,46 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
}
EXPORT_SYMBOL_GPL(hrtimer_init);
+/**
+ * hrtimer_setup - initialize a timer to the given clock
+ * @timer: the timer to be initialized
+ * @function: the callback function
+ * @clock_id: the clock to be used
+ * @mode: The modes which are relevant for initialization:
+ * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT,
+ * HRTIMER_MODE_REL_SOFT
+ *
+ * The PINNED variants of the above can be handed in,
+ * but the PINNED bit is ignored as pinning happens
+ * when the hrtimer is started
+ */
+void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *),
+ clockid_t clock_id, enum hrtimer_mode mode)
+{
+ debug_init(timer, clock_id, mode);
+ __hrtimer_setup(timer, function, clock_id, mode);
+}
+EXPORT_SYMBOL_GPL(hrtimer_setup);
+
+/**
+ * hrtimer_setup_on_stack - initialize a timer on stack memory
+ * @timer: The timer to be initialized
+ * @function: the callback function
+ * @clock_id: The clock to be used
+ * @mode: The timer mode
+ *
+ * Similar to hrtimer_setup(), except that this one must be used if struct hrtimer is in stack
+ * memory.
+ */
+void hrtimer_setup_on_stack(struct hrtimer *timer,
+ enum hrtimer_restart (*function)(struct hrtimer *),
+ clockid_t clock_id, enum hrtimer_mode mode)
+{
+ debug_init_on_stack(timer, clock_id, mode);
+ __hrtimer_setup(timer, function, clock_id, mode);
+}
+EXPORT_SYMBOL_GPL(hrtimer_setup_on_stack);
+
/*
* A timer is active, when it is enqueued into the rbtree or the
* callback function is running or it's in the state of being migrated
@@ -1811,7 +1859,7 @@ retry:
if (!ktime_before(now, cpu_base->softirq_expires_next)) {
cpu_base->softirq_expires_next = KTIME_MAX;
cpu_base->softirq_activated = 1;
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ raise_timer_softirq(HRTIMER_SOFTIRQ);
}
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
@@ -1906,7 +1954,7 @@ void hrtimer_run_queues(void)
if (!ktime_before(now, cpu_base->softirq_expires_next)) {
cpu_base->softirq_expires_next = KTIME_MAX;
cpu_base->softirq_activated = 1;
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ raise_timer_softirq(HRTIMER_SOFTIRQ);
}
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
@@ -1944,7 +1992,7 @@ void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
* Make the enqueue delivery mode check work on RT. If the sleeper
* was initialized for hard interrupt delivery, force the mode bit.
* This is a special case for hrtimer_sleepers because
- * hrtimer_init_sleeper() determines the delivery mode on RT so the
+ * __hrtimer_init_sleeper() determines the delivery mode on RT so the
* fiddling with this decision is avoided at the call sites.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
@@ -1987,19 +2035,18 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
}
/**
- * hrtimer_init_sleeper - initialize sleeper to the given clock
+ * hrtimer_setup_sleeper_on_stack - initialize a sleeper in stack memory
* @sl: sleeper to be initialized
* @clock_id: the clock to be used
* @mode: timer mode abs/rel
*/
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
- enum hrtimer_mode mode)
+void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl,
+ clockid_t clock_id, enum hrtimer_mode mode)
{
- debug_init(&sl->timer, clock_id, mode);
+ debug_init_on_stack(&sl->timer, clock_id, mode);
__hrtimer_init_sleeper(sl, clock_id, mode);
-
}
-EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
+EXPORT_SYMBOL_GPL(hrtimer_setup_sleeper_on_stack);
int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
{
@@ -2060,8 +2107,7 @@ static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
struct hrtimer_sleeper t;
int ret;
- hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
- HRTIMER_MODE_ABS);
+ hrtimer_setup_sleeper_on_stack(&t, restart->nanosleep.clockid, HRTIMER_MODE_ABS);
hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
destroy_hrtimer_on_stack(&t.timer);
@@ -2075,7 +2121,7 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
struct hrtimer_sleeper t;
int ret = 0;
- hrtimer_init_sleeper_on_stack(&t, clockid, mode);
+ hrtimer_setup_sleeper_on_stack(&t, clockid, mode);
hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns);
ret = do_nanosleep(&t, mode);
if (ret != -ERESTART_RESTARTBLOCK)
@@ -2242,123 +2288,3 @@ void __init hrtimers_init(void)
hrtimers_prepare_cpu(smp_processor_id());
open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq);
}
-
-/**
- * schedule_hrtimeout_range_clock - sleep until timeout
- * @expires: timeout value (ktime_t)
- * @delta: slack in expires timeout (ktime_t)
- * @mode: timer mode
- * @clock_id: timer clock to be used
- */
-int __sched
-schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
- const enum hrtimer_mode mode, clockid_t clock_id)
-{
- struct hrtimer_sleeper t;
-
- /*
- * Optimize when a zero timeout value is given. It does not
- * matter whether this is an absolute or a relative time.
- */
- if (expires && *expires == 0) {
- __set_current_state(TASK_RUNNING);
- return 0;
- }
-
- /*
- * A NULL parameter means "infinite"
- */
- if (!expires) {
- schedule();
- return -EINTR;
- }
-
- hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
- hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
- hrtimer_sleeper_start_expires(&t, mode);
-
- if (likely(t.task))
- schedule();
-
- hrtimer_cancel(&t.timer);
- destroy_hrtimer_on_stack(&t.timer);
-
- __set_current_state(TASK_RUNNING);
-
- return !t.task ? 0 : -EINTR;
-}
-EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock);
-
-/**
- * schedule_hrtimeout_range - sleep until timeout
- * @expires: timeout value (ktime_t)
- * @delta: slack in expires timeout (ktime_t)
- * @mode: timer mode
- *
- * Make the current task sleep until the given expiry time has
- * elapsed. The routine will return immediately unless
- * the current task state has been set (see set_current_state()).
- *
- * The @delta argument gives the kernel the freedom to schedule the
- * actual wakeup to a time that is both power and performance friendly
- * for regular (non RT/DL) tasks.
- * The kernel give the normal best effort behavior for "@expires+@delta",
- * but may decide to fire the timer earlier, but no earlier than @expires.
- *
- * You can set the task state as follows -
- *
- * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
- * pass before the routine returns unless the current task is explicitly
- * woken up, (e.g. by wake_up_process()).
- *
- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task or the current task is explicitly woken
- * up.
- *
- * The current task state is guaranteed to be TASK_RUNNING when this
- * routine returns.
- *
- * Returns 0 when the timer has expired. If the task was woken before the
- * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
- * by an explicit wakeup, it returns -EINTR.
- */
-int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
- const enum hrtimer_mode mode)
-{
- return schedule_hrtimeout_range_clock(expires, delta, mode,
- CLOCK_MONOTONIC);
-}
-EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
-
-/**
- * schedule_hrtimeout - sleep until timeout
- * @expires: timeout value (ktime_t)
- * @mode: timer mode
- *
- * Make the current task sleep until the given expiry time has
- * elapsed. The routine will return immediately unless
- * the current task state has been set (see set_current_state()).
- *
- * You can set the task state as follows -
- *
- * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
- * pass before the routine returns unless the current task is explicitly
- * woken up, (e.g. by wake_up_process()).
- *
- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task or the current task is explicitly woken
- * up.
- *
- * The current task state is guaranteed to be TASK_RUNNING when this
- * routine returns.
- *
- * Returns 0 when the timer has expired. If the task was woken before the
- * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
- * by an explicit wakeup, it returns -EINTR.
- */
-int __sched schedule_hrtimeout(ktime_t *expires,
- const enum hrtimer_mode mode)
-{
- return schedule_hrtimeout_range(expires, 0, mode);
-}
-EXPORT_SYMBOL_GPL(schedule_hrtimeout);
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 00629e658ca1..876d389b2e21 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -151,7 +151,27 @@ COMPAT_SYSCALL_DEFINE2(getitimer, int, which,
#endif
/*
- * The timer is automagically restarted, when interval != 0
+ * Invoked from dequeue_signal() when SIG_ALRM is delivered.
+ *
+ * Restart the ITIMER_REAL timer if it is armed as periodic timer. Doing
+ * this in the signal delivery path instead of self rearming prevents a DoS
+ * with small increments in the high reolution timer case and reduces timer
+ * noise in general.
+ */
+void posixtimer_rearm_itimer(struct task_struct *tsk)
+{
+ struct hrtimer *tmr = &tsk->signal->real_timer;
+
+ if (!hrtimer_is_queued(tmr) && tsk->signal->it_real_incr != 0) {
+ hrtimer_forward(tmr, tmr->base->get_time(),
+ tsk->signal->it_real_incr);
+ hrtimer_restart(tmr);
+ }
+}
+
+/*
+ * Interval timers are restarted in the signal delivery path. See
+ * posixtimer_rearm_itimer().
*/
enum hrtimer_restart it_real_fn(struct hrtimer *timer)
{
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 802b336f4b8c..b550ebe0f03b 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -22,22 +22,79 @@
#include "ntp_internal.h"
#include "timekeeping_internal.h"
-
-/*
- * NTP timekeeping variables:
+/**
+ * struct ntp_data - Structure holding all NTP related state
+ * @tick_usec: USER_HZ period in microseconds
+ * @tick_length: Adjusted tick length
+ * @tick_length_base: Base value for @tick_length
+ * @time_state: State of the clock synchronization
+ * @time_status: Clock status bits
+ * @time_offset: Time adjustment in nanoseconds
+ * @time_constant: PLL time constant
+ * @time_maxerror: Maximum error in microseconds holding the NTP sync distance
+ * (NTP dispersion + delay / 2)
+ * @time_esterror: Estimated error in microseconds holding NTP dispersion
+ * @time_freq: Frequency offset scaled nsecs/secs
+ * @time_reftime: Time at last adjustment in seconds
+ * @time_adjust: Adjustment value
+ * @ntp_tick_adj: Constant boot-param configurable NTP tick adjustment (upscaled)
+ * @ntp_next_leap_sec: Second value of the next pending leapsecond, or TIME64_MAX if no leap
*
- * Note: All of the NTP state is protected by the timekeeping locks.
+ * @pps_valid: PPS signal watchdog counter
+ * @pps_tf: PPS phase median filter
+ * @pps_jitter: PPS current jitter in nanoseconds
+ * @pps_fbase: PPS beginning of the last freq interval
+ * @pps_shift: PPS current interval duration in seconds (shift value)
+ * @pps_intcnt: PPS interval counter
+ * @pps_freq: PPS frequency offset in scaled ns/s
+ * @pps_stabil: PPS current stability in scaled ns/s
+ * @pps_calcnt: PPS monitor: calibration intervals
+ * @pps_jitcnt: PPS monitor: jitter limit exceeded
+ * @pps_stbcnt: PPS monitor: stability limit exceeded
+ * @pps_errcnt: PPS monitor: calibration errors
+ *
+ * Protected by the timekeeping locks.
*/
+struct ntp_data {
+ unsigned long tick_usec;
+ u64 tick_length;
+ u64 tick_length_base;
+ int time_state;
+ int time_status;
+ s64 time_offset;
+ long time_constant;
+ long time_maxerror;
+ long time_esterror;
+ s64 time_freq;
+ time64_t time_reftime;
+ long time_adjust;
+ s64 ntp_tick_adj;
+ time64_t ntp_next_leap_sec;
+#ifdef CONFIG_NTP_PPS
+ int pps_valid;
+ long pps_tf[3];
+ long pps_jitter;
+ struct timespec64 pps_fbase;
+ int pps_shift;
+ int pps_intcnt;
+ s64 pps_freq;
+ long pps_stabil;
+ long pps_calcnt;
+ long pps_jitcnt;
+ long pps_stbcnt;
+ long pps_errcnt;
+#endif
+};
-
-/* USER_HZ period (usecs): */
-unsigned long tick_usec = USER_TICK_USEC;
-
-/* SHIFTED_HZ period (nsecs): */
-unsigned long tick_nsec;
-
-static u64 tick_length;
-static u64 tick_length_base;
+static struct ntp_data tk_ntp_data = {
+ .tick_usec = USER_TICK_USEC,
+ .time_state = TIME_OK,
+ .time_status = STA_UNSYNC,
+ .time_constant = 2,
+ .time_maxerror = NTP_PHASE_LIMIT,
+ .time_esterror = NTP_PHASE_LIMIT,
+ .ntp_next_leap_sec = TIME64_MAX,
+};
#define SECS_PER_DAY 86400
#define MAX_TICKADJ 500LL /* usecs */
@@ -45,46 +102,6 @@ static u64 tick_length_base;
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
#define MAX_TAI_OFFSET 100000
-/*
- * phase-lock loop variables
- */
-
-/*
- * clock synchronization status
- *
- * (TIME_ERROR prevents overwriting the CMOS clock)
- */
-static int time_state = TIME_OK;
-
-/* clock status bits: */
-static int time_status = STA_UNSYNC;
-
-/* time adjustment (nsecs): */
-static s64 time_offset;
-
-/* pll time constant: */
-static long time_constant = 2;
-
-/* maximum error (usecs): */
-static long time_maxerror = NTP_PHASE_LIMIT;
-
-/* estimated error (usecs): */
-static long time_esterror = NTP_PHASE_LIMIT;
-
-/* frequency offset (scaled nsecs/secs): */
-static s64 time_freq;
-
-/* time at last adjustment (secs): */
-static time64_t time_reftime;
-
-static long time_adjust;
-
-/* constant (boot-param configurable) NTP tick adjustment (upscaled) */
-static s64 ntp_tick_adj;
-
-/* second value of the next pending leapsecond, or TIME64_MAX if no leap */
-static time64_t ntp_next_leap_sec = TIME64_MAX;
-
#ifdef CONFIG_NTP_PPS
/*
@@ -101,128 +118,115 @@ static time64_t ntp_next_leap_sec = TIME64_MAX;
intervals to decrease it */
#define PPS_MAXWANDER 100000 /* max PPS freq wander (ns/s) */
-static int pps_valid; /* signal watchdog counter */
-static long pps_tf[3]; /* phase median filter */
-static long pps_jitter; /* current jitter (ns) */
-static struct timespec64 pps_fbase; /* beginning of the last freq interval */
-static int pps_shift; /* current interval duration (s) (shift) */
-static int pps_intcnt; /* interval counter */
-static s64 pps_freq; /* frequency offset (scaled ns/s) */
-static long pps_stabil; /* current stability (scaled ns/s) */
-
/*
- * PPS signal quality monitors
- */
-static long pps_calcnt; /* calibration intervals */
-static long pps_jitcnt; /* jitter limit exceeded */
-static long pps_stbcnt; /* stability limit exceeded */
-static long pps_errcnt; /* calibration errors */
-
-
-/* PPS kernel consumer compensates the whole phase error immediately.
+ * PPS kernel consumer compensates the whole phase error immediately.
* Otherwise, reduce the offset by a fixed factor times the time constant.
*/
-static inline s64 ntp_offset_chunk(s64 offset)
+static inline s64 ntp_offset_chunk(struct ntp_data *ntpdata, s64 offset)
{
- if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL)
+ if (ntpdata->time_status & STA_PPSTIME && ntpdata->time_status & STA_PPSSIGNAL)
return offset;
else
- return shift_right(offset, SHIFT_PLL + time_constant);
+ return shift_right(offset, SHIFT_PLL + ntpdata->time_constant);
}
-static inline void pps_reset_freq_interval(void)
+static inline void pps_reset_freq_interval(struct ntp_data *ntpdata)
{
- /* the PPS calibration interval may end
- surprisingly early */
- pps_shift = PPS_INTMIN;
- pps_intcnt = 0;
+ /* The PPS calibration interval may end surprisingly early */
+ ntpdata->pps_shift = PPS_INTMIN;
+ ntpdata->pps_intcnt = 0;
}
/**
* pps_clear - Clears the PPS state variables
+ * @ntpdata: Pointer to ntp data
*/
-static inline void pps_clear(void)
+static inline void pps_clear(struct ntp_data *ntpdata)
{
- pps_reset_freq_interval();
- pps_tf[0] = 0;
- pps_tf[1] = 0;
- pps_tf[2] = 0;
- pps_fbase.tv_sec = pps_fbase.tv_nsec = 0;
- pps_freq = 0;
+ pps_reset_freq_interval(ntpdata);
+ ntpdata->pps_tf[0] = 0;
+ ntpdata->pps_tf[1] = 0;
+ ntpdata->pps_tf[2] = 0;
+ ntpdata->pps_fbase.tv_sec = ntpdata->pps_fbase.tv_nsec = 0;
+ ntpdata->pps_freq = 0;
}
-/* Decrease pps_valid to indicate that another second has passed since
- * the last PPS signal. When it reaches 0, indicate that PPS signal is
- * missing.
+/*
+ * Decrease pps_valid to indicate that another second has passed since the
+ * last PPS signal. When it reaches 0, indicate that PPS signal is missing.
*/
-static inline void pps_dec_valid(void)
+static inline void pps_dec_valid(struct ntp_data *ntpdata)
{
- if (pps_valid > 0)
- pps_valid--;
- else {
- time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
- STA_PPSWANDER | STA_PPSERROR);
- pps_clear();
+ if (ntpdata->pps_valid > 0) {
+ ntpdata->pps_valid--;
+ } else {
+ ntpdata->time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
+ STA_PPSWANDER | STA_PPSERROR);
+ pps_clear(ntpdata);
}
}
-static inline void pps_set_freq(s64 freq)
+static inline void pps_set_freq(struct ntp_data *ntpdata)
{
- pps_freq = freq;
+ ntpdata->pps_freq = ntpdata->time_freq;
}
-static inline int is_error_status(int status)
+static inline bool is_error_status(int status)
{
return (status & (STA_UNSYNC|STA_CLOCKERR))
- /* PPS signal lost when either PPS time or
- * PPS frequency synchronization requested
+ /*
+ * PPS signal lost when either PPS time or PPS frequency
+ * synchronization requested
*/
|| ((status & (STA_PPSFREQ|STA_PPSTIME))
&& !(status & STA_PPSSIGNAL))
- /* PPS jitter exceeded when
- * PPS time synchronization requested */
+ /*
+ * PPS jitter exceeded when PPS time synchronization
+ * requested
+ */
|| ((status & (STA_PPSTIME|STA_PPSJITTER))
== (STA_PPSTIME|STA_PPSJITTER))
- /* PPS wander exceeded or calibration error when
- * PPS frequency synchronization requested
+ /*
+ * PPS wander exceeded or calibration error when PPS
+ * frequency synchronization requested
*/
|| ((status & STA_PPSFREQ)
&& (status & (STA_PPSWANDER|STA_PPSERROR)));
}
-static inline void pps_fill_timex(struct __kernel_timex *txc)
+static inline void pps_fill_timex(struct ntp_data *ntpdata, struct __kernel_timex *txc)
{
- txc->ppsfreq = shift_right((pps_freq >> PPM_SCALE_INV_SHIFT) *
+ txc->ppsfreq = shift_right((ntpdata->pps_freq >> PPM_SCALE_INV_SHIFT) *
PPM_SCALE_INV, NTP_SCALE_SHIFT);
- txc->jitter = pps_jitter;
- if (!(time_status & STA_NANO))
- txc->jitter = pps_jitter / NSEC_PER_USEC;
- txc->shift = pps_shift;
- txc->stabil = pps_stabil;
- txc->jitcnt = pps_jitcnt;
- txc->calcnt = pps_calcnt;
- txc->errcnt = pps_errcnt;
- txc->stbcnt = pps_stbcnt;
+ txc->jitter = ntpdata->pps_jitter;
+ if (!(ntpdata->time_status & STA_NANO))
+ txc->jitter = ntpdata->pps_jitter / NSEC_PER_USEC;
+ txc->shift = ntpdata->pps_shift;
+ txc->stabil = ntpdata->pps_stabil;
+ txc->jitcnt = ntpdata->pps_jitcnt;
+ txc->calcnt = ntpdata->pps_calcnt;
+ txc->errcnt = ntpdata->pps_errcnt;
+ txc->stbcnt = ntpdata->pps_stbcnt;
}
#else /* !CONFIG_NTP_PPS */
-static inline s64 ntp_offset_chunk(s64 offset)
+static inline s64 ntp_offset_chunk(struct ntp_data *ntpdata, s64 offset)
{
- return shift_right(offset, SHIFT_PLL + time_constant);
+ return shift_right(offset, SHIFT_PLL + ntpdata->time_constant);
}
-static inline void pps_reset_freq_interval(void) {}
-static inline void pps_clear(void) {}
-static inline void pps_dec_valid(void) {}
-static inline void pps_set_freq(s64 freq) {}
+static inline void pps_reset_freq_interval(struct ntp_data *ntpdata) {}
+static inline void pps_clear(struct ntp_data *ntpdata) {}
+static inline void pps_dec_valid(struct ntp_data *ntpdata) {}
+static inline void pps_set_freq(struct ntp_data *ntpdata) {}
-static inline int is_error_status(int status)
+static inline bool is_error_status(int status)
{
return status & (STA_UNSYNC|STA_CLOCKERR);
}
-static inline void pps_fill_timex(struct __kernel_timex *txc)
+static inline void pps_fill_timex(struct ntp_data *ntpdata, struct __kernel_timex *txc)
{
/* PPS is not implemented, so these are zero */
txc->ppsfreq = 0;
@@ -237,138 +241,123 @@ static inline void pps_fill_timex(struct __kernel_timex *txc)
#endif /* CONFIG_NTP_PPS */
-
-/**
- * ntp_synced - Returns 1 if the NTP status is not UNSYNC
- *
- */
-static inline int ntp_synced(void)
-{
- return !(time_status & STA_UNSYNC);
-}
-
-
/*
- * NTP methods:
+ * Update tick_length and tick_length_base, based on tick_usec, ntp_tick_adj and
+ * time_freq:
*/
-
-/*
- * Update (tick_length, tick_length_base, tick_nsec), based
- * on (tick_usec, ntp_tick_adj, time_freq):
- */
-static void ntp_update_frequency(void)
+static void ntp_update_frequency(struct ntp_data *ntpdata)
{
- u64 second_length;
- u64 new_base;
+ u64 second_length, new_base, tick_usec = (u64)ntpdata->tick_usec;
- second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
- << NTP_SCALE_SHIFT;
+ second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << NTP_SCALE_SHIFT;
- second_length += ntp_tick_adj;
- second_length += time_freq;
+ second_length += ntpdata->ntp_tick_adj;
+ second_length += ntpdata->time_freq;
- tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT;
new_base = div_u64(second_length, NTP_INTERVAL_FREQ);
/*
- * Don't wait for the next second_overflow, apply
- * the change to the tick length immediately:
+ * Don't wait for the next second_overflow, apply the change to the
+ * tick length immediately:
*/
- tick_length += new_base - tick_length_base;
- tick_length_base = new_base;
+ ntpdata->tick_length += new_base - ntpdata->tick_length_base;
+ ntpdata->tick_length_base = new_base;
}
-static inline s64 ntp_update_offset_fll(s64 offset64, long secs)
+static inline s64 ntp_update_offset_fll(struct ntp_data *ntpdata, s64 offset64, long secs)
{
- time_status &= ~STA_MODE;
+ ntpdata->time_status &= ~STA_MODE;
if (secs < MINSEC)
return 0;
- if (!(time_status & STA_FLL) && (secs <= MAXSEC))
+ if (!(ntpdata->time_status & STA_FLL) && (secs <= MAXSEC))
return 0;
- time_status |= STA_MODE;
+ ntpdata->time_status |= STA_MODE;
return div64_long(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs);
}
-static void ntp_update_offset(long offset)
+static void ntp_update_offset(struct ntp_data *ntpdata, long offset)
{
- s64 freq_adj;
- s64 offset64;
- long secs;
+ s64 freq_adj, offset64;
+ long secs, real_secs;
- if (!(time_status & STA_PLL))
+ if (!(ntpdata->time_status & STA_PLL))
return;
- if (!(time_status & STA_NANO)) {
+ if (!(ntpdata->time_status & STA_NANO)) {
/* Make sure the multiplication below won't overflow */
offset = clamp(offset, -USEC_PER_SEC, USEC_PER_SEC);
offset *= NSEC_PER_USEC;
}
- /*
- * Scale the phase adjustment and
- * clamp to the operating range.
- */
+ /* Scale the phase adjustment and clamp to the operating range. */
offset = clamp(offset, -MAXPHASE, MAXPHASE);
/*
* Select how the frequency is to be controlled
* and in which mode (PLL or FLL).
*/
- secs = (long)(__ktime_get_real_seconds() - time_reftime);
- if (unlikely(time_status & STA_FREQHOLD))
+ real_secs = __ktime_get_real_seconds();
+ secs = (long)(real_secs - ntpdata->time_reftime);
+ if (unlikely(ntpdata->time_status & STA_FREQHOLD))
secs = 0;
- time_reftime = __ktime_get_real_seconds();
+ ntpdata->time_reftime = real_secs;
offset64 = offset;
- freq_adj = ntp_update_offset_fll(offset64, secs);
+ freq_adj = ntp_update_offset_fll(ntpdata, offset64, secs);
/*
* Clamp update interval to reduce PLL gain with low
* sampling rate (e.g. intermittent network connection)
* to avoid instability.
*/
- if (unlikely(secs > 1 << (SHIFT_PLL + 1 + time_constant)))
- secs = 1 << (SHIFT_PLL + 1 + time_constant);
+ if (unlikely(secs > 1 << (SHIFT_PLL + 1 + ntpdata->time_constant)))
+ secs = 1 << (SHIFT_PLL + 1 + ntpdata->time_constant);
freq_adj += (offset64 * secs) <<
- (NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant));
+ (NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + ntpdata->time_constant));
- freq_adj = min(freq_adj + time_freq, MAXFREQ_SCALED);
+ freq_adj = min(freq_adj + ntpdata->time_freq, MAXFREQ_SCALED);
- time_freq = max(freq_adj, -MAXFREQ_SCALED);
+ ntpdata->time_freq = max(freq_adj, -MAXFREQ_SCALED);
- time_offset = div_s64(offset64 << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ);
+ ntpdata->time_offset = div_s64(offset64 << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ);
}
-/**
- * ntp_clear - Clears the NTP state variables
- */
-void ntp_clear(void)
+static void __ntp_clear(struct ntp_data *ntpdata)
{
- time_adjust = 0; /* stop active adjtime() */
- time_status |= STA_UNSYNC;
- time_maxerror = NTP_PHASE_LIMIT;
- time_esterror = NTP_PHASE_LIMIT;
+ /* Stop active adjtime() */
+ ntpdata->time_adjust = 0;
+ ntpdata->time_status |= STA_UNSYNC;
+ ntpdata->time_maxerror = NTP_PHASE_LIMIT;
+ ntpdata->time_esterror = NTP_PHASE_LIMIT;
- ntp_update_frequency();
+ ntp_update_frequency(ntpdata);
- tick_length = tick_length_base;
- time_offset = 0;
+ ntpdata->tick_length = ntpdata->tick_length_base;
+ ntpdata->time_offset = 0;
- ntp_next_leap_sec = TIME64_MAX;
+ ntpdata->ntp_next_leap_sec = TIME64_MAX;
/* Clear PPS state variables */
- pps_clear();
+ pps_clear(ntpdata);
+}
+
+/**
+ * ntp_clear - Clears the NTP state variables
+ */
+void ntp_clear(void)
+{
+ __ntp_clear(&tk_ntp_data);
}
u64 ntp_tick_length(void)
{
- return tick_length;
+ return tk_ntp_data.tick_length;
}
/**
@@ -379,16 +368,17 @@ u64 ntp_tick_length(void)
*/
ktime_t ntp_get_next_leap(void)
{
+ struct ntp_data *ntpdata = &tk_ntp_data;
ktime_t ret;
- if ((time_state == TIME_INS) && (time_status & STA_INS))
- return ktime_set(ntp_next_leap_sec, 0);
+ if ((ntpdata->time_state == TIME_INS) && (ntpdata->time_status & STA_INS))
+ return ktime_set(ntpdata->ntp_next_leap_sec, 0);
ret = KTIME_MAX;
return ret;
}
/*
- * this routine handles the overflow of the microsecond field
+ * This routine handles the overflow of the microsecond field
*
* The tricky bits of code to handle the accurate clock support
* were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
@@ -399,6 +389,7 @@ ktime_t ntp_get_next_leap(void)
*/
int second_overflow(time64_t secs)
{
+ struct ntp_data *ntpdata = &tk_ntp_data;
s64 delta;
int leap = 0;
s32 rem;
@@ -408,87 +399,84 @@ int second_overflow(time64_t secs)
* day, the system clock is set back one second; if in leap-delete
* state, the system clock is set ahead one second.
*/
- switch (time_state) {
+ switch (ntpdata->time_state) {
case TIME_OK:
- if (time_status & STA_INS) {
- time_state = TIME_INS;
+ if (ntpdata->time_status & STA_INS) {
+ ntpdata->time_state = TIME_INS;
div_s64_rem(secs, SECS_PER_DAY, &rem);
- ntp_next_leap_sec = secs + SECS_PER_DAY - rem;
- } else if (time_status & STA_DEL) {
- time_state = TIME_DEL;
+ ntpdata->ntp_next_leap_sec = secs + SECS_PER_DAY - rem;
+ } else if (ntpdata->time_status & STA_DEL) {
+ ntpdata->time_state = TIME_DEL;
div_s64_rem(secs + 1, SECS_PER_DAY, &rem);
- ntp_next_leap_sec = secs + SECS_PER_DAY - rem;
+ ntpdata->ntp_next_leap_sec = secs + SECS_PER_DAY - rem;
}
break;
case TIME_INS:
- if (!(time_status & STA_INS)) {
- ntp_next_leap_sec = TIME64_MAX;
- time_state = TIME_OK;
- } else if (secs == ntp_next_leap_sec) {
+ if (!(ntpdata->time_status & STA_INS)) {
+ ntpdata->ntp_next_leap_sec = TIME64_MAX;
+ ntpdata->time_state = TIME_OK;
+ } else if (secs == ntpdata->ntp_next_leap_sec) {
leap = -1;
- time_state = TIME_OOP;
- printk(KERN_NOTICE
- "Clock: inserting leap second 23:59:60 UTC\n");
+ ntpdata->time_state = TIME_OOP;
+ pr_notice("Clock: inserting leap second 23:59:60 UTC\n");
}
break;
case TIME_DEL:
- if (!(time_status & STA_DEL)) {
- ntp_next_leap_sec = TIME64_MAX;
- time_state = TIME_OK;
- } else if (secs == ntp_next_leap_sec) {
+ if (!(ntpdata->time_status & STA_DEL)) {
+ ntpdata->ntp_next_leap_sec = TIME64_MAX;
+ ntpdata->time_state = TIME_OK;
+ } else if (secs == ntpdata->ntp_next_leap_sec) {
leap = 1;
- ntp_next_leap_sec = TIME64_MAX;
- time_state = TIME_WAIT;
- printk(KERN_NOTICE
- "Clock: deleting leap second 23:59:59 UTC\n");
+ ntpdata->ntp_next_leap_sec = TIME64_MAX;
+ ntpdata->time_state = TIME_WAIT;
+ pr_notice("Clock: deleting leap second 23:59:59 UTC\n");
}
break;
case TIME_OOP:
- ntp_next_leap_sec = TIME64_MAX;
- time_state = TIME_WAIT;
+ ntpdata->ntp_next_leap_sec = TIME64_MAX;
+ ntpdata->time_state = TIME_WAIT;
break;
case TIME_WAIT:
- if (!(time_status & (STA_INS | STA_DEL)))
- time_state = TIME_OK;
+ if (!(ntpdata->time_status & (STA_INS | STA_DEL)))
+ ntpdata->time_state = TIME_OK;
break;
}
-
/* Bump the maxerror field */
- time_maxerror += MAXFREQ / NSEC_PER_USEC;
- if (time_maxerror > NTP_PHASE_LIMIT) {
- time_maxerror = NTP_PHASE_LIMIT;
- time_status |= STA_UNSYNC;
+ ntpdata->time_maxerror += MAXFREQ / NSEC_PER_USEC;
+ if (ntpdata->time_maxerror > NTP_PHASE_LIMIT) {
+ ntpdata->time_maxerror = NTP_PHASE_LIMIT;
+ ntpdata->time_status |= STA_UNSYNC;
}
/* Compute the phase adjustment for the next second */
- tick_length = tick_length_base;
+ ntpdata->tick_length = ntpdata->tick_length_base;
- delta = ntp_offset_chunk(time_offset);
- time_offset -= delta;
- tick_length += delta;
+ delta = ntp_offset_chunk(ntpdata, ntpdata->time_offset);
+ ntpdata->time_offset -= delta;
+ ntpdata->tick_length += delta;
/* Check PPS signal */
- pps_dec_valid();
+ pps_dec_valid(ntpdata);
- if (!time_adjust)
+ if (!ntpdata->time_adjust)
goto out;
- if (time_adjust > MAX_TICKADJ) {
- time_adjust -= MAX_TICKADJ;
- tick_length += MAX_TICKADJ_SCALED;
+ if (ntpdata->time_adjust > MAX_TICKADJ) {
+ ntpdata->time_adjust -= MAX_TICKADJ;
+ ntpdata->tick_length += MAX_TICKADJ_SCALED;
goto out;
}
- if (time_adjust < -MAX_TICKADJ) {
- time_adjust += MAX_TICKADJ;
- tick_length -= MAX_TICKADJ_SCALED;
+ if (ntpdata->time_adjust < -MAX_TICKADJ) {
+ ntpdata->time_adjust += MAX_TICKADJ;
+ ntpdata->tick_length -= MAX_TICKADJ_SCALED;
goto out;
}
- tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
- << NTP_SCALE_SHIFT;
- time_adjust = 0;
+ ntpdata->tick_length += (s64)(ntpdata->time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
+ << NTP_SCALE_SHIFT;
+ ntpdata->time_adjust = 0;
out:
return leap;
@@ -611,6 +599,15 @@ static inline int update_rtc(struct timespec64 *to_set, unsigned long *offset_ns
}
#endif
+/**
+ * ntp_synced - Tells whether the NTP status is not UNSYNC
+ * Returns: true if not UNSYNC, false otherwise
+ */
+static inline bool ntp_synced(void)
+{
+ return !(tk_ntp_data.time_status & STA_UNSYNC);
+}
+
/*
* If we have an externally synchronized Linux clock, then update RTC clock
* accordingly every ~11 minutes. Generally RTCs can only store second
@@ -691,162 +688,156 @@ static inline void __init ntp_init_cmos_sync(void) { }
/*
* Propagate a new txc->status value into the NTP state:
*/
-static inline void process_adj_status(const struct __kernel_timex *txc)
+static inline void process_adj_status(struct ntp_data *ntpdata, const struct __kernel_timex *txc)
{
- if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
- time_state = TIME_OK;
- time_status = STA_UNSYNC;
- ntp_next_leap_sec = TIME64_MAX;
- /* restart PPS frequency calibration */
- pps_reset_freq_interval();
+ if ((ntpdata->time_status & STA_PLL) && !(txc->status & STA_PLL)) {
+ ntpdata->time_state = TIME_OK;
+ ntpdata->time_status = STA_UNSYNC;
+ ntpdata->ntp_next_leap_sec = TIME64_MAX;
+ /* Restart PPS frequency calibration */
+ pps_reset_freq_interval(ntpdata);
}
/*
* If we turn on PLL adjustments then reset the
* reference time to current time.
*/
- if (!(time_status & STA_PLL) && (txc->status & STA_PLL))
- time_reftime = __ktime_get_real_seconds();
+ if (!(ntpdata->time_status & STA_PLL) && (txc->status & STA_PLL))
+ ntpdata->time_reftime = __ktime_get_real_seconds();
/* only set allowed bits */
- time_status &= STA_RONLY;
- time_status |= txc->status & ~STA_RONLY;
+ ntpdata->time_status &= STA_RONLY;
+ ntpdata->time_status |= txc->status & ~STA_RONLY;
}
-
-static inline void process_adjtimex_modes(const struct __kernel_timex *txc,
+static inline void process_adjtimex_modes(struct ntp_data *ntpdata, const struct __kernel_timex *txc,
s32 *time_tai)
{
if (txc->modes & ADJ_STATUS)
- process_adj_status(txc);
+ process_adj_status(ntpdata, txc);
if (txc->modes & ADJ_NANO)
- time_status |= STA_NANO;
+ ntpdata->time_status |= STA_NANO;
if (txc->modes & ADJ_MICRO)
- time_status &= ~STA_NANO;
+ ntpdata->time_status &= ~STA_NANO;
if (txc->modes & ADJ_FREQUENCY) {
- time_freq = txc->freq * PPM_SCALE;
- time_freq = min(time_freq, MAXFREQ_SCALED);
- time_freq = max(time_freq, -MAXFREQ_SCALED);
- /* update pps_freq */
- pps_set_freq(time_freq);
+ ntpdata->time_freq = txc->freq * PPM_SCALE;
+ ntpdata->time_freq = min(ntpdata->time_freq, MAXFREQ_SCALED);
+ ntpdata->time_freq = max(ntpdata->time_freq, -MAXFREQ_SCALED);
+ /* Update pps_freq */
+ pps_set_freq(ntpdata);
}
if (txc->modes & ADJ_MAXERROR)
- time_maxerror = clamp(txc->maxerror, 0, NTP_PHASE_LIMIT);
+ ntpdata->time_maxerror = clamp(txc->maxerror, 0, NTP_PHASE_LIMIT);
if (txc->modes & ADJ_ESTERROR)
- time_esterror = clamp(txc->esterror, 0, NTP_PHASE_LIMIT);
+ ntpdata->time_esterror = clamp(txc->esterror, 0, NTP_PHASE_LIMIT);
if (txc->modes & ADJ_TIMECONST) {
- time_constant = clamp(txc->constant, 0, MAXTC);
- if (!(time_status & STA_NANO))
- time_constant += 4;
- time_constant = clamp(time_constant, 0, MAXTC);
+ ntpdata->time_constant = clamp(txc->constant, 0, MAXTC);
+ if (!(ntpdata->time_status & STA_NANO))
+ ntpdata->time_constant += 4;
+ ntpdata->time_constant = clamp(ntpdata->time_constant, 0, MAXTC);
}
- if (txc->modes & ADJ_TAI &&
- txc->constant >= 0 && txc->constant <= MAX_TAI_OFFSET)
+ if (txc->modes & ADJ_TAI && txc->constant >= 0 && txc->constant <= MAX_TAI_OFFSET)
*time_tai = txc->constant;
if (txc->modes & ADJ_OFFSET)
- ntp_update_offset(txc->offset);
+ ntp_update_offset(ntpdata, txc->offset);
if (txc->modes & ADJ_TICK)
- tick_usec = txc->tick;
+ ntpdata->tick_usec = txc->tick;
if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET))
- ntp_update_frequency();
+ ntp_update_frequency(ntpdata);
}
-
/*
- * adjtimex mainly allows reading (and writing, if superuser) of
+ * adjtimex() mainly allows reading (and writing, if superuser) of
* kernel time-keeping variables. used by xntpd.
*/
int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts,
s32 *time_tai, struct audit_ntp_data *ad)
{
+ struct ntp_data *ntpdata = &tk_ntp_data;
int result;
if (txc->modes & ADJ_ADJTIME) {
- long save_adjust = time_adjust;
+ long save_adjust = ntpdata->time_adjust;
if (!(txc->modes & ADJ_OFFSET_READONLY)) {
/* adjtime() is independent from ntp_adjtime() */
- time_adjust = txc->offset;
- ntp_update_frequency();
+ ntpdata->time_adjust = txc->offset;
+ ntp_update_frequency(ntpdata);
audit_ntp_set_old(ad, AUDIT_NTP_ADJUST, save_adjust);
- audit_ntp_set_new(ad, AUDIT_NTP_ADJUST, time_adjust);
+ audit_ntp_set_new(ad, AUDIT_NTP_ADJUST, ntpdata->time_adjust);
}
txc->offset = save_adjust;
} else {
/* If there are input parameters, then process them: */
if (txc->modes) {
- audit_ntp_set_old(ad, AUDIT_NTP_OFFSET, time_offset);
- audit_ntp_set_old(ad, AUDIT_NTP_FREQ, time_freq);
- audit_ntp_set_old(ad, AUDIT_NTP_STATUS, time_status);
+ audit_ntp_set_old(ad, AUDIT_NTP_OFFSET, ntpdata->time_offset);
+ audit_ntp_set_old(ad, AUDIT_NTP_FREQ, ntpdata->time_freq);
+ audit_ntp_set_old(ad, AUDIT_NTP_STATUS, ntpdata->time_status);
audit_ntp_set_old(ad, AUDIT_NTP_TAI, *time_tai);
- audit_ntp_set_old(ad, AUDIT_NTP_TICK, tick_usec);
+ audit_ntp_set_old(ad, AUDIT_NTP_TICK, ntpdata->tick_usec);
- process_adjtimex_modes(txc, time_tai);
+ process_adjtimex_modes(ntpdata, txc, time_tai);
- audit_ntp_set_new(ad, AUDIT_NTP_OFFSET, time_offset);
- audit_ntp_set_new(ad, AUDIT_NTP_FREQ, time_freq);
- audit_ntp_set_new(ad, AUDIT_NTP_STATUS, time_status);
+ audit_ntp_set_new(ad, AUDIT_NTP_OFFSET, ntpdata->time_offset);
+ audit_ntp_set_new(ad, AUDIT_NTP_FREQ, ntpdata->time_freq);
+ audit_ntp_set_new(ad, AUDIT_NTP_STATUS, ntpdata->time_status);
audit_ntp_set_new(ad, AUDIT_NTP_TAI, *time_tai);
- audit_ntp_set_new(ad, AUDIT_NTP_TICK, tick_usec);
+ audit_ntp_set_new(ad, AUDIT_NTP_TICK, ntpdata->tick_usec);
}
- txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
- NTP_SCALE_SHIFT);
- if (!(time_status & STA_NANO))
+ txc->offset = shift_right(ntpdata->time_offset * NTP_INTERVAL_FREQ, NTP_SCALE_SHIFT);
+ if (!(ntpdata->time_status & STA_NANO))
txc->offset = (u32)txc->offset / NSEC_PER_USEC;
}
- result = time_state; /* mostly `TIME_OK' */
- /* check for errors */
- if (is_error_status(time_status))
+ result = ntpdata->time_state;
+ if (is_error_status(ntpdata->time_status))
result = TIME_ERROR;
- txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
+ txc->freq = shift_right((ntpdata->time_freq >> PPM_SCALE_INV_SHIFT) *
PPM_SCALE_INV, NTP_SCALE_SHIFT);
- txc->maxerror = time_maxerror;
- txc->esterror = time_esterror;
- txc->status = time_status;
- txc->constant = time_constant;
+ txc->maxerror = ntpdata->time_maxerror;
+ txc->esterror = ntpdata->time_esterror;
+ txc->status = ntpdata->time_status;
+ txc->constant = ntpdata->time_constant;
txc->precision = 1;
txc->tolerance = MAXFREQ_SCALED / PPM_SCALE;
- txc->tick = tick_usec;
+ txc->tick = ntpdata->tick_usec;
txc->tai = *time_tai;
- /* fill PPS status fields */
- pps_fill_timex(txc);
+ /* Fill PPS status fields */
+ pps_fill_timex(ntpdata, txc);
txc->time.tv_sec = ts->tv_sec;
txc->time.tv_usec = ts->tv_nsec;
- if (!(time_status & STA_NANO))
+ if (!(ntpdata->time_status & STA_NANO))
txc->time.tv_usec = ts->tv_nsec / NSEC_PER_USEC;
/* Handle leapsec adjustments */
- if (unlikely(ts->tv_sec >= ntp_next_leap_sec)) {
- if ((time_state == TIME_INS) && (time_status & STA_INS)) {
+ if (unlikely(ts->tv_sec >= ntpdata->ntp_next_leap_sec)) {
+ if ((ntpdata->time_state == TIME_INS) && (ntpdata->time_status & STA_INS)) {
result = TIME_OOP;
txc->tai++;
txc->time.tv_sec--;
}
- if ((time_state == TIME_DEL) && (time_status & STA_DEL)) {
+ if ((ntpdata->time_state == TIME_DEL) && (ntpdata->time_status & STA_DEL)) {
result = TIME_WAIT;
txc->tai--;
txc->time.tv_sec++;
}
- if ((time_state == TIME_OOP) &&
- (ts->tv_sec == ntp_next_leap_sec)) {
+ if ((ntpdata->time_state == TIME_OOP) && (ts->tv_sec == ntpdata->ntp_next_leap_sec))
result = TIME_WAIT;
- }
}
return result;
@@ -854,17 +845,21 @@ int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts,
#ifdef CONFIG_NTP_PPS
-/* actually struct pps_normtime is good old struct timespec, but it is
+/*
+ * struct pps_normtime is basically a struct timespec, but it is
* semantically different (and it is the reason why it was invented):
* pps_normtime.nsec has a range of ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ]
- * while timespec.tv_nsec has a range of [0, NSEC_PER_SEC) */
+ * while timespec.tv_nsec has a range of [0, NSEC_PER_SEC)
+ */
struct pps_normtime {
s64 sec; /* seconds */
long nsec; /* nanoseconds */
};
-/* normalize the timestamp so that nsec is in the
- ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] interval */
+/*
+ * Normalize the timestamp so that nsec is in the
+ * [ -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] interval
+ */
static inline struct pps_normtime pps_normalize_ts(struct timespec64 ts)
{
struct pps_normtime norm = {
@@ -880,54 +875,57 @@ static inline struct pps_normtime pps_normalize_ts(struct timespec64 ts)
return norm;
}
-/* get current phase correction and jitter */
-static inline long pps_phase_filter_get(long *jitter)
+/* Get current phase correction and jitter */
+static inline long pps_phase_filter_get(struct ntp_data *ntpdata, long *jitter)
{
- *jitter = pps_tf[0] - pps_tf[1];
+ *jitter = ntpdata->pps_tf[0] - ntpdata->pps_tf[1];
if (*jitter < 0)
*jitter = -*jitter;
/* TODO: test various filters */
- return pps_tf[0];
+ return ntpdata->pps_tf[0];
}
-/* add the sample to the phase filter */
-static inline void pps_phase_filter_add(long err)
+/* Add the sample to the phase filter */
+static inline void pps_phase_filter_add(struct ntp_data *ntpdata, long err)
{
- pps_tf[2] = pps_tf[1];
- pps_tf[1] = pps_tf[0];
- pps_tf[0] = err;
+ ntpdata->pps_tf[2] = ntpdata->pps_tf[1];
+ ntpdata->pps_tf[1] = ntpdata->pps_tf[0];
+ ntpdata->pps_tf[0] = err;
}
-/* decrease frequency calibration interval length.
- * It is halved after four consecutive unstable intervals.
+/*
+ * Decrease frequency calibration interval length. It is halved after four
+ * consecutive unstable intervals.
*/
-static inline void pps_dec_freq_interval(void)
+static inline void pps_dec_freq_interval(struct ntp_data *ntpdata)
{
- if (--pps_intcnt <= -PPS_INTCOUNT) {
- pps_intcnt = -PPS_INTCOUNT;
- if (pps_shift > PPS_INTMIN) {
- pps_shift--;
- pps_intcnt = 0;
+ if (--ntpdata->pps_intcnt <= -PPS_INTCOUNT) {
+ ntpdata->pps_intcnt = -PPS_INTCOUNT;
+ if (ntpdata->pps_shift > PPS_INTMIN) {
+ ntpdata->pps_shift--;
+ ntpdata->pps_intcnt = 0;
}
}
}
-/* increase frequency calibration interval length.
- * It is doubled after four consecutive stable intervals.
+/*
+ * Increase frequency calibration interval length. It is doubled after
+ * four consecutive stable intervals.
*/
-static inline void pps_inc_freq_interval(void)
+static inline void pps_inc_freq_interval(struct ntp_data *ntpdata)
{
- if (++pps_intcnt >= PPS_INTCOUNT) {
- pps_intcnt = PPS_INTCOUNT;
- if (pps_shift < PPS_INTMAX) {
- pps_shift++;
- pps_intcnt = 0;
+ if (++ntpdata->pps_intcnt >= PPS_INTCOUNT) {
+ ntpdata->pps_intcnt = PPS_INTCOUNT;
+ if (ntpdata->pps_shift < PPS_INTMAX) {
+ ntpdata->pps_shift++;
+ ntpdata->pps_intcnt = 0;
}
}
}
-/* update clock frequency based on MONOTONIC_RAW clock PPS signal
+/*
+ * Update clock frequency based on MONOTONIC_RAW clock PPS signal
* timestamps
*
* At the end of the calibration interval the difference between the
@@ -936,90 +934,88 @@ static inline void pps_inc_freq_interval(void)
* too long, the data are discarded.
* Returns the difference between old and new frequency values.
*/
-static long hardpps_update_freq(struct pps_normtime freq_norm)
+static long hardpps_update_freq(struct ntp_data *ntpdata, struct pps_normtime freq_norm)
{
long delta, delta_mod;
s64 ftemp;
- /* check if the frequency interval was too long */
- if (freq_norm.sec > (2 << pps_shift)) {
- time_status |= STA_PPSERROR;
- pps_errcnt++;
- pps_dec_freq_interval();
- printk_deferred(KERN_ERR
- "hardpps: PPSERROR: interval too long - %lld s\n",
- freq_norm.sec);
+ /* Check if the frequency interval was too long */
+ if (freq_norm.sec > (2 << ntpdata->pps_shift)) {
+ ntpdata->time_status |= STA_PPSERROR;
+ ntpdata->pps_errcnt++;
+ pps_dec_freq_interval(ntpdata);
+ printk_deferred(KERN_ERR "hardpps: PPSERROR: interval too long - %lld s\n",
+ freq_norm.sec);
return 0;
}
- /* here the raw frequency offset and wander (stability) is
- * calculated. If the wander is less than the wander threshold
- * the interval is increased; otherwise it is decreased.
+ /*
+ * Here the raw frequency offset and wander (stability) is
+ * calculated. If the wander is less than the wander threshold the
+ * interval is increased; otherwise it is decreased.
*/
ftemp = div_s64(((s64)(-freq_norm.nsec)) << NTP_SCALE_SHIFT,
freq_norm.sec);
- delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT);
- pps_freq = ftemp;
+ delta = shift_right(ftemp - ntpdata->pps_freq, NTP_SCALE_SHIFT);
+ ntpdata->pps_freq = ftemp;
if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) {
- printk_deferred(KERN_WARNING
- "hardpps: PPSWANDER: change=%ld\n", delta);
- time_status |= STA_PPSWANDER;
- pps_stbcnt++;
- pps_dec_freq_interval();
- } else { /* good sample */
- pps_inc_freq_interval();
+ printk_deferred(KERN_WARNING "hardpps: PPSWANDER: change=%ld\n", delta);
+ ntpdata->time_status |= STA_PPSWANDER;
+ ntpdata->pps_stbcnt++;
+ pps_dec_freq_interval(ntpdata);
+ } else {
+ /* Good sample */
+ pps_inc_freq_interval(ntpdata);
}
- /* the stability metric is calculated as the average of recent
- * frequency changes, but is used only for performance
- * monitoring
+ /*
+ * The stability metric is calculated as the average of recent
+ * frequency changes, but is used only for performance monitoring
*/
delta_mod = delta;
if (delta_mod < 0)
delta_mod = -delta_mod;
- pps_stabil += (div_s64(((s64)delta_mod) <<
- (NTP_SCALE_SHIFT - SHIFT_USEC),
- NSEC_PER_USEC) - pps_stabil) >> PPS_INTMIN;
-
- /* if enabled, the system clock frequency is updated */
- if ((time_status & STA_PPSFREQ) != 0 &&
- (time_status & STA_FREQHOLD) == 0) {
- time_freq = pps_freq;
- ntp_update_frequency();
+ ntpdata->pps_stabil += (div_s64(((s64)delta_mod) << (NTP_SCALE_SHIFT - SHIFT_USEC),
+ NSEC_PER_USEC) - ntpdata->pps_stabil) >> PPS_INTMIN;
+
+ /* If enabled, the system clock frequency is updated */
+ if ((ntpdata->time_status & STA_PPSFREQ) && !(ntpdata->time_status & STA_FREQHOLD)) {
+ ntpdata->time_freq = ntpdata->pps_freq;
+ ntp_update_frequency(ntpdata);
}
return delta;
}
-/* correct REALTIME clock phase error against PPS signal */
-static void hardpps_update_phase(long error)
+/* Correct REALTIME clock phase error against PPS signal */
+static void hardpps_update_phase(struct ntp_data *ntpdata, long error)
{
long correction = -error;
long jitter;
- /* add the sample to the median filter */
- pps_phase_filter_add(correction);
- correction = pps_phase_filter_get(&jitter);
+ /* Add the sample to the median filter */
+ pps_phase_filter_add(ntpdata, correction);
+ correction = pps_phase_filter_get(ntpdata, &jitter);
- /* Nominal jitter is due to PPS signal noise. If it exceeds the
+ /*
+ * Nominal jitter is due to PPS signal noise. If it exceeds the
* threshold, the sample is discarded; otherwise, if so enabled,
* the time offset is updated.
*/
- if (jitter > (pps_jitter << PPS_POPCORN)) {
- printk_deferred(KERN_WARNING
- "hardpps: PPSJITTER: jitter=%ld, limit=%ld\n",
- jitter, (pps_jitter << PPS_POPCORN));
- time_status |= STA_PPSJITTER;
- pps_jitcnt++;
- } else if (time_status & STA_PPSTIME) {
- /* correct the time using the phase offset */
- time_offset = div_s64(((s64)correction) << NTP_SCALE_SHIFT,
- NTP_INTERVAL_FREQ);
- /* cancel running adjtime() */
- time_adjust = 0;
+ if (jitter > (ntpdata->pps_jitter << PPS_POPCORN)) {
+ printk_deferred(KERN_WARNING "hardpps: PPSJITTER: jitter=%ld, limit=%ld\n",
+ jitter, (ntpdata->pps_jitter << PPS_POPCORN));
+ ntpdata->time_status |= STA_PPSJITTER;
+ ntpdata->pps_jitcnt++;
+ } else if (ntpdata->time_status & STA_PPSTIME) {
+ /* Correct the time using the phase offset */
+ ntpdata->time_offset = div_s64(((s64)correction) << NTP_SCALE_SHIFT,
+ NTP_INTERVAL_FREQ);
+ /* Cancel running adjtime() */
+ ntpdata->time_adjust = 0;
}
- /* update jitter */
- pps_jitter += (jitter - pps_jitter) >> PPS_INTMIN;
+ /* Update jitter */
+ ntpdata->pps_jitter += (jitter - ntpdata->pps_jitter) >> PPS_INTMIN;
}
/*
@@ -1037,60 +1033,62 @@ static void hardpps_update_phase(long error)
void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
{
struct pps_normtime pts_norm, freq_norm;
+ struct ntp_data *ntpdata = &tk_ntp_data;
pts_norm = pps_normalize_ts(*phase_ts);
- /* clear the error bits, they will be set again if needed */
- time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
+ /* Clear the error bits, they will be set again if needed */
+ ntpdata->time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
/* indicate signal presence */
- time_status |= STA_PPSSIGNAL;
- pps_valid = PPS_VALID;
+ ntpdata->time_status |= STA_PPSSIGNAL;
+ ntpdata->pps_valid = PPS_VALID;
- /* when called for the first time,
- * just start the frequency interval */
- if (unlikely(pps_fbase.tv_sec == 0)) {
- pps_fbase = *raw_ts;
+ /*
+ * When called for the first time, just start the frequency
+ * interval
+ */
+ if (unlikely(ntpdata->pps_fbase.tv_sec == 0)) {
+ ntpdata->pps_fbase = *raw_ts;
return;
}
- /* ok, now we have a base for frequency calculation */
- freq_norm = pps_normalize_ts(timespec64_sub(*raw_ts, pps_fbase));
-
- /* check that the signal is in the range
- * [1s - MAXFREQ us, 1s + MAXFREQ us], otherwise reject it */
- if ((freq_norm.sec == 0) ||
- (freq_norm.nsec > MAXFREQ * freq_norm.sec) ||
- (freq_norm.nsec < -MAXFREQ * freq_norm.sec)) {
- time_status |= STA_PPSJITTER;
- /* restart the frequency calibration interval */
- pps_fbase = *raw_ts;
+ /* Ok, now we have a base for frequency calculation */
+ freq_norm = pps_normalize_ts(timespec64_sub(*raw_ts, ntpdata->pps_fbase));
+
+ /*
+ * Check that the signal is in the range
+ * [1s - MAXFREQ us, 1s + MAXFREQ us], otherwise reject it
+ */
+ if ((freq_norm.sec == 0) || (freq_norm.nsec > MAXFREQ * freq_norm.sec) ||
+ (freq_norm.nsec < -MAXFREQ * freq_norm.sec)) {
+ ntpdata->time_status |= STA_PPSJITTER;
+ /* Restart the frequency calibration interval */
+ ntpdata->pps_fbase = *raw_ts;
printk_deferred(KERN_ERR "hardpps: PPSJITTER: bad pulse\n");
return;
}
- /* signal is ok */
-
- /* check if the current frequency interval is finished */
- if (freq_norm.sec >= (1 << pps_shift)) {
- pps_calcnt++;
- /* restart the frequency calibration interval */
- pps_fbase = *raw_ts;
- hardpps_update_freq(freq_norm);
+ /* Signal is ok. Check if the current frequency interval is finished */
+ if (freq_norm.sec >= (1 << ntpdata->pps_shift)) {
+ ntpdata->pps_calcnt++;
+ /* Restart the frequency calibration interval */
+ ntpdata->pps_fbase = *raw_ts;
+ hardpps_update_freq(ntpdata, freq_norm);
}
- hardpps_update_phase(pts_norm.nsec);
+ hardpps_update_phase(ntpdata, pts_norm.nsec);
}
#endif /* CONFIG_NTP_PPS */
static int __init ntp_tick_adj_setup(char *str)
{
- int rc = kstrtos64(str, 0, &ntp_tick_adj);
+ int rc = kstrtos64(str, 0, &tk_ntp_data.ntp_tick_adj);
if (rc)
return rc;
- ntp_tick_adj <<= NTP_SCALE_SHIFT;
+ tk_ntp_data.ntp_tick_adj <<= NTP_SCALE_SHIFT;
return 1;
}
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 6bcee4704059..50e8d04ab661 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -453,7 +453,6 @@ static void disarm_timer(struct k_itimer *timer, struct task_struct *p)
struct cpu_timer *ctmr = &timer->it.cpu;
struct posix_cputimer_base *base;
- timer->it_active = 0;
if (!cpu_timer_dequeue(ctmr))
return;
@@ -494,19 +493,28 @@ static int posix_cpu_timer_del(struct k_itimer *timer)
*/
WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node));
} else {
- if (timer->it.cpu.firing)
+ if (timer->it.cpu.firing) {
+ /*
+ * Prevent signal delivery. The timer cannot be dequeued
+ * because it is on the firing list which is not protected
+ * by sighand->lock. The delivery path is waiting for
+ * the timer lock. So go back, unlock and retry.
+ */
+ timer->it.cpu.firing = false;
ret = TIMER_RETRY;
- else
+ } else {
disarm_timer(timer, p);
-
+ }
unlock_task_sighand(p, &flags);
}
out:
rcu_read_unlock();
- if (!ret)
- put_pid(ctmr->pid);
+ if (!ret) {
+ put_pid(ctmr->pid);
+ timer->it_status = POSIX_TIMER_DISARMED;
+ }
return ret;
}
@@ -560,7 +568,7 @@ static void arm_timer(struct k_itimer *timer, struct task_struct *p)
struct cpu_timer *ctmr = &timer->it.cpu;
u64 newexp = cpu_timer_getexpires(ctmr);
- timer->it_active = 1;
+ timer->it_status = POSIX_TIMER_ARMED;
if (!cpu_timer_enqueue(&base->tqhead, ctmr))
return;
@@ -586,29 +594,20 @@ static void cpu_timer_fire(struct k_itimer *timer)
{
struct cpu_timer *ctmr = &timer->it.cpu;
- timer->it_active = 0;
- if (unlikely(timer->sigq == NULL)) {
+ timer->it_status = POSIX_TIMER_DISARMED;
+
+ if (unlikely(ctmr->nanosleep)) {
/*
* This a special case for clock_nanosleep,
* not a normal timer from sys_timer_create.
*/
wake_up_process(timer->it_process);
cpu_timer_setexpires(ctmr, 0);
- } else if (!timer->it_interval) {
- /*
- * One-shot timer. Clear it as soon as it's fired.
- */
+ } else {
posix_timer_queue_signal(timer);
- cpu_timer_setexpires(ctmr, 0);
- } else if (posix_timer_queue_signal(timer)) {
- /*
- * The signal did not get queued because the signal
- * was ignored, so we won't get any callback to
- * reload the timer. But we need to keep it
- * ticking in case the signal is deliverable next time.
- */
- posix_cpu_timer_rearm(timer);
- ++timer->it_requeue_pending;
+ /* Disable oneshot timers */
+ if (!timer->it_interval)
+ cpu_timer_setexpires(ctmr, 0);
}
}
@@ -667,11 +666,17 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
old_expires = cpu_timer_getexpires(ctmr);
if (unlikely(timer->it.cpu.firing)) {
- timer->it.cpu.firing = -1;
+ /*
+ * Prevent signal delivery. The timer cannot be dequeued
+ * because it is on the firing list which is not protected
+ * by sighand->lock. The delivery path is waiting for
+ * the timer lock. So go back, unlock and retry.
+ */
+ timer->it.cpu.firing = false;
ret = TIMER_RETRY;
} else {
cpu_timer_dequeue(ctmr);
- timer->it_active = 0;
+ timer->it_status = POSIX_TIMER_DISARMED;
}
/*
@@ -745,7 +750,7 @@ static void __posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *i
* - Timers which expired, but the signal has not yet been
* delivered
*/
- if (iv && ((timer->it_requeue_pending & REQUEUE_PENDING) || sigev_none))
+ if (iv && timer->it_status != POSIX_TIMER_ARMED)
expires = bump_cpu_timer(timer, now);
else
expires = cpu_timer_getexpires(&timer->it.cpu);
@@ -808,7 +813,7 @@ static u64 collect_timerqueue(struct timerqueue_head *head,
if (++i == MAX_COLLECTED || now < expires)
return expires;
- ctmr->firing = 1;
+ ctmr->firing = true;
/* See posix_cpu_timer_wait_running() */
rcu_assign_pointer(ctmr->handling, current);
cpu_timer_dequeue(ctmr);
@@ -1363,7 +1368,7 @@ static void handle_posix_cpu_timers(struct task_struct *tsk)
* timer call will interfere.
*/
list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) {
- int cpu_firing;
+ bool cpu_firing;
/*
* spin_lock() is sufficient here even independent of the
@@ -1375,13 +1380,13 @@ static void handle_posix_cpu_timers(struct task_struct *tsk)
spin_lock(&timer->it_lock);
list_del_init(&timer->it.cpu.elist);
cpu_firing = timer->it.cpu.firing;
- timer->it.cpu.firing = 0;
+ timer->it.cpu.firing = false;
/*
- * The firing flag is -1 if we collided with a reset
- * of the timer, which already reported this
- * almost-firing as an overrun. So don't generate an event.
+ * If the firing flag is cleared then this raced with a
+ * timer rearm/delete operation. So don't generate an
+ * event.
*/
- if (likely(cpu_firing >= 0))
+ if (likely(cpu_firing))
cpu_timer_fire(timer);
/* See posix_cpu_timer_wait_running() */
rcu_assign_pointer(timer->it.cpu.handling, NULL);
@@ -1478,6 +1483,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
timer.it_overrun = -1;
error = posix_cpu_timer_create(&timer);
timer.it_process = current;
+ timer.it.cpu.nanosleep = true;
if (!error) {
static struct itimerspec64 zero_it;
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 4576aaed13b2..881a9ce96af7 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -233,11 +233,12 @@ __initcall(init_posix_timers);
* The siginfo si_overrun field and the return value of timer_getoverrun(2)
* are of type int. Clamp the overrun value to INT_MAX
*/
-static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval)
+static inline int timer_overrun_to_int(struct k_itimer *timr)
{
- s64 sum = timr->it_overrun_last + (s64)baseval;
+ if (timr->it_overrun_last > (s64)INT_MAX)
+ return INT_MAX;
- return sum > (s64)INT_MAX ? INT_MAX : (int)sum;
+ return (int)timr->it_overrun_last;
}
static void common_hrtimer_rearm(struct k_itimer *timr)
@@ -249,62 +250,62 @@ static void common_hrtimer_rearm(struct k_itimer *timr)
hrtimer_restart(timer);
}
+static bool __posixtimer_deliver_signal(struct kernel_siginfo *info, struct k_itimer *timr)
+{
+ guard(spinlock)(&timr->it_lock);
+
+ /*
+ * Check if the timer is still alive or whether it got modified
+ * since the signal was queued. In either case, don't rearm and
+ * drop the signal.
+ */
+ if (timr->it_signal_seq != timr->it_sigqueue_seq || WARN_ON_ONCE(!timr->it_signal))
+ return false;
+
+ if (!timr->it_interval || WARN_ON_ONCE(timr->it_status != POSIX_TIMER_REQUEUE_PENDING))
+ return true;
+
+ timr->kclock->timer_rearm(timr);
+ timr->it_status = POSIX_TIMER_ARMED;
+ timr->it_overrun_last = timr->it_overrun;
+ timr->it_overrun = -1LL;
+ ++timr->it_signal_seq;
+ info->si_overrun = timer_overrun_to_int(timr);
+ return true;
+}
+
/*
- * This function is called from the signal delivery code if
- * info->si_sys_private is not zero, which indicates that the timer has to
- * be rearmed. Restart the timer and update info::si_overrun.
+ * This function is called from the signal delivery code. It decides
+ * whether the signal should be dropped and rearms interval timers. The
+ * timer can be unconditionally accessed as there is a reference held on
+ * it.
*/
-void posixtimer_rearm(struct kernel_siginfo *info)
+bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq)
{
- struct k_itimer *timr;
- unsigned long flags;
-
- timr = lock_timer(info->si_tid, &flags);
- if (!timr)
- return;
+ struct k_itimer *timr = container_of(timer_sigq, struct k_itimer, sigq);
+ bool ret;
- if (timr->it_interval && timr->it_requeue_pending == info->si_sys_private) {
- timr->kclock->timer_rearm(timr);
+ /*
+ * Release siglock to ensure proper locking order versus
+ * timr::it_lock. Keep interrupts disabled.
+ */
+ spin_unlock(&current->sighand->siglock);
- timr->it_active = 1;
- timr->it_overrun_last = timr->it_overrun;
- timr->it_overrun = -1LL;
- ++timr->it_requeue_pending;
+ ret = __posixtimer_deliver_signal(info, timr);
- info->si_overrun = timer_overrun_to_int(timr, info->si_overrun);
- }
+ /* Drop the reference which was acquired when the signal was queued */
+ posixtimer_putref(timr);
- unlock_timer(timr, flags);
+ spin_lock(&current->sighand->siglock);
+ return ret;
}
-int posix_timer_queue_signal(struct k_itimer *timr)
+void posix_timer_queue_signal(struct k_itimer *timr)
{
- int ret, si_private = 0;
- enum pid_type type;
-
lockdep_assert_held(&timr->it_lock);
- timr->it_active = 0;
- if (timr->it_interval)
- si_private = ++timr->it_requeue_pending;
-
- /*
- * FIXME: if ->sigq is queued we can race with
- * dequeue_signal()->posixtimer_rearm().
- *
- * If dequeue_signal() sees the "right" value of
- * si_sys_private it calls posixtimer_rearm().
- * We re-queue ->sigq and drop ->it_lock().
- * posixtimer_rearm() locks the timer
- * and re-schedules it while ->sigq is pending.
- * Not really bad, but not that we want.
- */
- timr->sigq->info.si_sys_private = si_private;
-
- type = !(timr->it_sigev_notify & SIGEV_THREAD_ID) ? PIDTYPE_TGID : PIDTYPE_PID;
- ret = send_sigqueue(timr->sigq, timr->it_pid, type);
- /* If we failed to send the signal the timer stops. */
- return ret > 0;
+ timr->it_status = timr->it_interval ? POSIX_TIMER_REQUEUE_PENDING : POSIX_TIMER_DISARMED;
+ posixtimer_send_sigqueue(timr);
}
/*
@@ -317,62 +318,10 @@ int posix_timer_queue_signal(struct k_itimer *timr)
static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
{
struct k_itimer *timr = container_of(timer, struct k_itimer, it.real.timer);
- enum hrtimer_restart ret = HRTIMER_NORESTART;
- unsigned long flags;
-
- spin_lock_irqsave(&timr->it_lock, flags);
-
- if (posix_timer_queue_signal(timr)) {
- /*
- * The signal was not queued due to SIG_IGN. As a
- * consequence the timer is not going to be rearmed from
- * the signal delivery path. But as a real signal handler
- * can be installed later the timer must be rearmed here.
- */
- if (timr->it_interval != 0) {
- ktime_t now = hrtimer_cb_get_time(timer);
-
- /*
- * FIXME: What we really want, is to stop this
- * timer completely and restart it in case the
- * SIG_IGN is removed. This is a non trivial
- * change to the signal handling code.
- *
- * For now let timers with an interval less than a
- * jiffy expire every jiffy and recheck for a
- * valid signal handler.
- *
- * This avoids interrupt starvation in case of a
- * very small interval, which would expire the
- * timer immediately again.
- *
- * Moving now ahead of time by one jiffy tricks
- * hrtimer_forward() to expire the timer later,
- * while it still maintains the overrun accuracy
- * for the price of a slight inconsistency in the
- * timer_gettime() case. This is at least better
- * than a timer storm.
- *
- * Only required when high resolution timers are
- * enabled as the periodic tick based timers are
- * automatically aligned to the next tick.
- */
- if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS)) {
- ktime_t kj = TICK_NSEC;
-
- if (timr->it_interval < kj)
- now = ktime_add(now, kj);
- }
-
- timr->it_overrun += hrtimer_forward(timer, now, timr->it_interval);
- ret = HRTIMER_RESTART;
- ++timr->it_requeue_pending;
- timr->it_active = 1;
- }
- }
- unlock_timer(timr, flags);
- return ret;
+ guard(spinlock_irqsave)(&timr->it_lock);
+ posix_timer_queue_signal(timr);
+ return HRTIMER_NORESTART;
}
static struct pid *good_sigevent(sigevent_t * event)
@@ -399,32 +348,27 @@ static struct pid *good_sigevent(sigevent_t * event)
}
}
-static struct k_itimer * alloc_posix_timer(void)
+static struct k_itimer *alloc_posix_timer(void)
{
struct k_itimer *tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
if (!tmr)
return tmr;
- if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
+
+ if (unlikely(!posixtimer_init_sigqueue(&tmr->sigq))) {
kmem_cache_free(posix_timers_cache, tmr);
return NULL;
}
- clear_siginfo(&tmr->sigq->info);
+ rcuref_init(&tmr->rcuref, 1);
return tmr;
}
-static void k_itimer_rcu_free(struct rcu_head *head)
-{
- struct k_itimer *tmr = container_of(head, struct k_itimer, rcu);
-
- kmem_cache_free(posix_timers_cache, tmr);
-}
-
-static void posix_timer_free(struct k_itimer *tmr)
+void posixtimer_free_timer(struct k_itimer *tmr)
{
put_pid(tmr->it_pid);
- sigqueue_free(tmr->sigq);
- call_rcu(&tmr->rcu, k_itimer_rcu_free);
+ if (tmr->sigq.ucounts)
+ dec_rlimit_put_ucounts(tmr->sigq.ucounts, UCOUNT_RLIMIT_SIGPENDING);
+ kfree_rcu(tmr, rcu);
}
static void posix_timer_unhash_and_free(struct k_itimer *tmr)
@@ -432,7 +376,7 @@ static void posix_timer_unhash_and_free(struct k_itimer *tmr)
spin_lock(&hash_lock);
hlist_del_rcu(&tmr->t_hash);
spin_unlock(&hash_lock);
- posix_timer_free(tmr);
+ posixtimer_putref(tmr);
}
static int common_timer_create(struct k_itimer *new_timer)
@@ -467,7 +411,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event,
*/
new_timer_id = posix_timer_add(new_timer);
if (new_timer_id < 0) {
- posix_timer_free(new_timer);
+ posixtimer_free_timer(new_timer);
return new_timer_id;
}
@@ -485,18 +429,23 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event,
goto out;
}
new_timer->it_sigev_notify = event->sigev_notify;
- new_timer->sigq->info.si_signo = event->sigev_signo;
- new_timer->sigq->info.si_value = event->sigev_value;
+ new_timer->sigq.info.si_signo = event->sigev_signo;
+ new_timer->sigq.info.si_value = event->sigev_value;
} else {
new_timer->it_sigev_notify = SIGEV_SIGNAL;
- new_timer->sigq->info.si_signo = SIGALRM;
- memset(&new_timer->sigq->info.si_value, 0, sizeof(sigval_t));
- new_timer->sigq->info.si_value.sival_int = new_timer->it_id;
+ new_timer->sigq.info.si_signo = SIGALRM;
+ memset(&new_timer->sigq.info.si_value, 0, sizeof(sigval_t));
+ new_timer->sigq.info.si_value.sival_int = new_timer->it_id;
new_timer->it_pid = get_pid(task_tgid(current));
}
- new_timer->sigq->info.si_tid = new_timer->it_id;
- new_timer->sigq->info.si_code = SI_TIMER;
+ if (new_timer->it_sigev_notify & SIGEV_THREAD_ID)
+ new_timer->it_pid_type = PIDTYPE_PID;
+ else
+ new_timer->it_pid_type = PIDTYPE_TGID;
+
+ new_timer->sigq.info.si_tid = new_timer->it_id;
+ new_timer->sigq.info.si_code = SI_TIMER;
if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) {
error = -EFAULT;
@@ -580,7 +529,14 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
* 1) Set timr::it_signal to NULL with timr::it_lock held
* 2) Release timr::it_lock
* 3) Remove from the hash under hash_lock
- * 4) Call RCU for removal after the grace period
+ * 4) Put the reference count.
+ *
+ * The reference count might not drop to zero if timr::sigq is
+ * queued. In that case the signal delivery or flush will put the
+ * last reference count.
+ *
+ * When the reference count reaches zero, the timer is scheduled
+ * for RCU removal after the grace period.
*
* Holding rcu_read_lock() accross the lookup ensures that
* the timer cannot be freed.
@@ -647,10 +603,10 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
/* interval timer ? */
if (iv) {
cur_setting->it_interval = ktime_to_timespec64(iv);
- } else if (!timr->it_active) {
+ } else if (timr->it_status == POSIX_TIMER_DISARMED) {
/*
* SIGEV_NONE oneshot timers are never queued and therefore
- * timr->it_active is always false. The check below
+ * timr->it_status is always DISARMED. The check below
* vs. remaining time will handle this case.
*
* For all other timers there is nothing to update here, so
@@ -667,7 +623,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
* is a SIGEV_NONE timer move the expiry time forward by intervals,
* so expiry is > now.
*/
- if (iv && (timr->it_requeue_pending & REQUEUE_PENDING || sig_none))
+ if (iv && timr->it_status != POSIX_TIMER_ARMED)
timr->it_overrun += kc->timer_forward(timr, now);
remaining = kc->timer_remaining(timr, now);
@@ -775,7 +731,7 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
if (!timr)
return -EINVAL;
- overrun = timer_overrun_to_int(timr, 0);
+ overrun = timer_overrun_to_int(timr);
unlock_timer(timr, flags);
return overrun;
@@ -867,8 +823,6 @@ void posix_timer_set_common(struct k_itimer *timer, struct itimerspec64 *new_set
else
timer->it_interval = 0;
- /* Prevent reloading in case there is a signal pending */
- timer->it_requeue_pending = (timer->it_requeue_pending + 2) & ~REQUEUE_PENDING;
/* Reset overrun accounting */
timer->it_overrun_last = 0;
timer->it_overrun = -1LL;
@@ -886,8 +840,6 @@ int common_timer_set(struct k_itimer *timr, int flags,
if (old_setting)
common_timer_get(timr, old_setting);
- /* Prevent rearming by clearing the interval */
- timr->it_interval = 0;
/*
* Careful here. On SMP systems the timer expiry function could be
* active and spinning on timr->it_lock.
@@ -895,7 +847,7 @@ int common_timer_set(struct k_itimer *timr, int flags,
if (kc->timer_try_to_cancel(timr) < 0)
return TIMER_RETRY;
- timr->it_active = 0;
+ timr->it_status = POSIX_TIMER_DISARMED;
posix_timer_set_common(timr, new_setting);
/* Keep timer disarmed when it_value is zero */
@@ -908,7 +860,8 @@ int common_timer_set(struct k_itimer *timr, int flags,
sigev_none = timr->it_sigev_notify == SIGEV_NONE;
kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none);
- timr->it_active = !sigev_none;
+ if (!sigev_none)
+ timr->it_status = POSIX_TIMER_ARMED;
return 0;
}
@@ -936,6 +889,9 @@ retry:
if (old_spec64)
old_spec64->it_interval = ktime_to_timespec64(timr->it_interval);
+ /* Prevent signal delivery and rearming. */
+ timr->it_signal_seq++;
+
kc = timr->kclock;
if (WARN_ON_ONCE(!kc || !kc->timer_set))
error = -EINVAL;
@@ -1004,17 +960,31 @@ int common_timer_del(struct k_itimer *timer)
{
const struct k_clock *kc = timer->kclock;
- timer->it_interval = 0;
if (kc->timer_try_to_cancel(timer) < 0)
return TIMER_RETRY;
- timer->it_active = 0;
+ timer->it_status = POSIX_TIMER_DISARMED;
return 0;
}
+/*
+ * If the deleted timer is on the ignored list, remove it and
+ * drop the associated reference.
+ */
+static inline void posix_timer_cleanup_ignored(struct k_itimer *tmr)
+{
+ if (!hlist_unhashed(&tmr->ignored_list)) {
+ hlist_del_init(&tmr->ignored_list);
+ posixtimer_putref(tmr);
+ }
+}
+
static inline int timer_delete_hook(struct k_itimer *timer)
{
const struct k_clock *kc = timer->kclock;
+ /* Prevent signal delivery and rearming. */
+ timer->it_signal_seq++;
+
if (WARN_ON_ONCE(!kc || !kc->timer_del))
return -EINVAL;
return kc->timer_del(timer);
@@ -1040,12 +1010,18 @@ retry_delete:
spin_lock(&current->sighand->siglock);
hlist_del(&timer->list);
- spin_unlock(&current->sighand->siglock);
+ posix_timer_cleanup_ignored(timer);
/*
* A concurrent lookup could check timer::it_signal lockless. It
* will reevaluate with timer::it_lock held and observe the NULL.
+ *
+ * It must be written with siglock held so that the signal code
+ * observes timer->it_signal == NULL in do_sigaction(SIG_IGN),
+ * which prevents it from moving a pending signal of a deleted
+ * timer to the ignore list.
*/
WRITE_ONCE(timer->it_signal, NULL);
+ spin_unlock(&current->sighand->siglock);
unlock_timer(timer, flags);
posix_timer_unhash_and_free(timer);
@@ -1091,6 +1067,8 @@ retry_delete:
}
hlist_del(&timer->list);
+ posix_timer_cleanup_ignored(timer);
+
/*
* Setting timer::it_signal to NULL is technically not required
* here as nothing can access the timer anymore legitimately via
@@ -1123,6 +1101,19 @@ void exit_itimers(struct task_struct *tsk)
/* The timers are not longer accessible via tsk::signal */
while (!hlist_empty(&timers))
itimer_delete(hlist_entry(timers.first, struct k_itimer, list));
+
+ /*
+ * There should be no timers on the ignored list. itimer_delete() has
+ * mopped them up.
+ */
+ if (!WARN_ON_ONCE(!hlist_empty(&tsk->signal->ignored_posix_timers)))
+ return;
+
+ hlist_move_list(&tsk->signal->ignored_posix_timers, &timers);
+ while (!hlist_empty(&timers)) {
+ posix_timer_cleanup_ignored(hlist_entry(timers.first, struct k_itimer,
+ ignored_list));
+ }
}
SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
index 4784ea65f685..61906f0688c1 100644
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -1,6 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
#define TIMER_RETRY 1
+enum posix_timer_state {
+ POSIX_TIMER_DISARMED,
+ POSIX_TIMER_ARMED,
+ POSIX_TIMER_REQUEUE_PENDING,
+};
+
struct k_clock {
int (*clock_getres)(const clockid_t which_clock,
struct timespec64 *tp);
@@ -36,7 +42,7 @@ extern const struct k_clock clock_process;
extern const struct k_clock clock_thread;
extern const struct k_clock alarm_clock;
-int posix_timer_queue_signal(struct k_itimer *timr);
+void posix_timer_queue_signal(struct k_itimer *timr);
void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting);
int common_timer_set(struct k_itimer *timr, int flags,
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 68d6c1190ac7..fcca4e72f1ef 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -71,16 +71,16 @@ static __always_inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift)
notrace struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
{
- *seq = raw_read_seqcount_latch(&cd.seq);
+ *seq = read_seqcount_latch(&cd.seq);
return cd.read_data + (*seq & 1);
}
notrace int sched_clock_read_retry(unsigned int seq)
{
- return raw_read_seqcount_latch_retry(&cd.seq, seq);
+ return read_seqcount_latch_retry(&cd.seq, seq);
}
-unsigned long long noinstr sched_clock_noinstr(void)
+static __always_inline unsigned long long __sched_clock(void)
{
struct clock_read_data *rd;
unsigned int seq;
@@ -98,11 +98,23 @@ unsigned long long noinstr sched_clock_noinstr(void)
return res;
}
+unsigned long long noinstr sched_clock_noinstr(void)
+{
+ return __sched_clock();
+}
+
unsigned long long notrace sched_clock(void)
{
unsigned long long ns;
preempt_disable_notrace();
- ns = sched_clock_noinstr();
+ /*
+ * All of __sched_clock() is a seqcount_latch reader critical section,
+ * but relies on the raw helpers which are uninstrumented. For KCSAN,
+ * mark all accesses in __sched_clock() as atomic.
+ */
+ kcsan_nestable_atomic_begin();
+ ns = __sched_clock();
+ kcsan_nestable_atomic_end();
preempt_enable_notrace();
return ns;
}
@@ -119,17 +131,19 @@ unsigned long long notrace sched_clock(void)
*/
static void update_clock_read_data(struct clock_read_data *rd)
{
- /* update the backup (odd) copy with the new data */
- cd.read_data[1] = *rd;
-
/* steer readers towards the odd copy */
- raw_write_seqcount_latch(&cd.seq);
+ write_seqcount_latch_begin(&cd.seq);
/* now its safe for us to update the normal (even) copy */
cd.read_data[0] = *rd;
/* switch readers back to the even copy */
- raw_write_seqcount_latch(&cd.seq);
+ write_seqcount_latch(&cd.seq);
+
+ /* update the backup (odd) copy with the new data */
+ cd.read_data[1] = *rd;
+
+ write_seqcount_latch_end(&cd.seq);
}
/*
@@ -267,7 +281,7 @@ void __init generic_sched_clock_init(void)
*/
static u64 notrace suspended_sched_clock_read(void)
{
- unsigned int seq = raw_read_seqcount_latch(&cd.seq);
+ unsigned int seq = read_seqcount_latch(&cd.seq);
return cd.read_data[seq & 1].epoch_cyc;
}
diff --git a/kernel/time/sleep_timeout.c b/kernel/time/sleep_timeout.c
new file mode 100644
index 000000000000..dfe939f6e4ec
--- /dev/null
+++ b/kernel/time/sleep_timeout.c
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Kernel internal schedule timeout and sleeping functions
+ */
+
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
+
+#include "tick-internal.h"
+
+/*
+ * Since schedule_timeout()'s timer is defined on the stack, it must store
+ * the target task on the stack as well.
+ */
+struct process_timer {
+ struct timer_list timer;
+ struct task_struct *task;
+};
+
+static void process_timeout(struct timer_list *t)
+{
+ struct process_timer *timeout = from_timer(timeout, t, timer);
+
+ wake_up_process(timeout->task);
+}
+
+/**
+ * schedule_timeout - sleep until timeout
+ * @timeout: timeout value in jiffies
+ *
+ * Make the current task sleep until @timeout jiffies have elapsed.
+ * The function behavior depends on the current task state
+ * (see also set_current_state() description):
+ *
+ * %TASK_RUNNING - the scheduler is called, but the task does not sleep
+ * at all. That happens because sched_submit_work() does nothing for
+ * tasks in %TASK_RUNNING state.
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
+ * pass before the routine returns unless the current task is explicitly
+ * woken up, (e.g. by wake_up_process()).
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task or the current task is explicitly woken
+ * up.
+ *
+ * The current task state is guaranteed to be %TASK_RUNNING when this
+ * routine returns.
+ *
+ * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
+ * the CPU away without a bound on the timeout. In this case the return
+ * value will be %MAX_SCHEDULE_TIMEOUT.
+ *
+ * Returns: 0 when the timer has expired otherwise the remaining time in
+ * jiffies will be returned. In all cases the return value is guaranteed
+ * to be non-negative.
+ */
+signed long __sched schedule_timeout(signed long timeout)
+{
+ struct process_timer timer;
+ unsigned long expire;
+
+ switch (timeout) {
+ case MAX_SCHEDULE_TIMEOUT:
+ /*
+ * These two special cases are useful to be comfortable
+ * in the caller. Nothing more. We could take
+ * MAX_SCHEDULE_TIMEOUT from one of the negative value
+ * but I' d like to return a valid offset (>=0) to allow
+ * the caller to do everything it want with the retval.
+ */
+ schedule();
+ goto out;
+ default:
+ /*
+ * Another bit of PARANOID. Note that the retval will be
+ * 0 since no piece of kernel is supposed to do a check
+ * for a negative retval of schedule_timeout() (since it
+ * should never happens anyway). You just have the printk()
+ * that will tell you if something is gone wrong and where.
+ */
+ if (timeout < 0) {
+ pr_err("%s: wrong timeout value %lx\n", __func__, timeout);
+ dump_stack();
+ __set_current_state(TASK_RUNNING);
+ goto out;
+ }
+ }
+
+ expire = timeout + jiffies;
+
+ timer.task = current;
+ timer_setup_on_stack(&timer.timer, process_timeout, 0);
+ timer.timer.expires = expire;
+ add_timer(&timer.timer);
+ schedule();
+ del_timer_sync(&timer.timer);
+
+ /* Remove the timer from the object tracker */
+ destroy_timer_on_stack(&timer.timer);
+
+ timeout = expire - jiffies;
+
+ out:
+ return timeout < 0 ? 0 : timeout;
+}
+EXPORT_SYMBOL(schedule_timeout);
+
+/*
+ * __set_current_state() can be used in schedule_timeout_*() functions, because
+ * schedule_timeout() calls schedule() unconditionally.
+ */
+
+/**
+ * schedule_timeout_interruptible - sleep until timeout (interruptible)
+ * @timeout: timeout value in jiffies
+ *
+ * See schedule_timeout() for details.
+ *
+ * Task state is set to TASK_INTERRUPTIBLE before starting the timeout.
+ */
+signed long __sched schedule_timeout_interruptible(signed long timeout)
+{
+ __set_current_state(TASK_INTERRUPTIBLE);
+ return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_interruptible);
+
+/**
+ * schedule_timeout_killable - sleep until timeout (killable)
+ * @timeout: timeout value in jiffies
+ *
+ * See schedule_timeout() for details.
+ *
+ * Task state is set to TASK_KILLABLE before starting the timeout.
+ */
+signed long __sched schedule_timeout_killable(signed long timeout)
+{
+ __set_current_state(TASK_KILLABLE);
+ return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_killable);
+
+/**
+ * schedule_timeout_uninterruptible - sleep until timeout (uninterruptible)
+ * @timeout: timeout value in jiffies
+ *
+ * See schedule_timeout() for details.
+ *
+ * Task state is set to TASK_UNINTERRUPTIBLE before starting the timeout.
+ */
+signed long __sched schedule_timeout_uninterruptible(signed long timeout)
+{
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_uninterruptible);
+
+/**
+ * schedule_timeout_idle - sleep until timeout (idle)
+ * @timeout: timeout value in jiffies
+ *
+ * See schedule_timeout() for details.
+ *
+ * Task state is set to TASK_IDLE before starting the timeout. It is similar to
+ * schedule_timeout_uninterruptible(), except this task will not contribute to
+ * load average.
+ */
+signed long __sched schedule_timeout_idle(signed long timeout)
+{
+ __set_current_state(TASK_IDLE);
+ return schedule_timeout(timeout);
+}
+EXPORT_SYMBOL(schedule_timeout_idle);
+
+/**
+ * schedule_hrtimeout_range_clock - sleep until timeout
+ * @expires: timeout value (ktime_t)
+ * @delta: slack in expires timeout (ktime_t)
+ * @mode: timer mode
+ * @clock_id: timer clock to be used
+ *
+ * Details are explained in schedule_hrtimeout_range() function description as
+ * this function is commonly used.
+ */
+int __sched schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
+ const enum hrtimer_mode mode, clockid_t clock_id)
+{
+ struct hrtimer_sleeper t;
+
+ /*
+ * Optimize when a zero timeout value is given. It does not
+ * matter whether this is an absolute or a relative time.
+ */
+ if (expires && *expires == 0) {
+ __set_current_state(TASK_RUNNING);
+ return 0;
+ }
+
+ /*
+ * A NULL parameter means "infinite"
+ */
+ if (!expires) {
+ schedule();
+ return -EINTR;
+ }
+
+ hrtimer_setup_sleeper_on_stack(&t, clock_id, mode);
+ hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
+ hrtimer_sleeper_start_expires(&t, mode);
+
+ if (likely(t.task))
+ schedule();
+
+ hrtimer_cancel(&t.timer);
+ destroy_hrtimer_on_stack(&t.timer);
+
+ __set_current_state(TASK_RUNNING);
+
+ return !t.task ? 0 : -EINTR;
+}
+EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock);
+
+/**
+ * schedule_hrtimeout_range - sleep until timeout
+ * @expires: timeout value (ktime_t)
+ * @delta: slack in expires timeout (ktime_t)
+ * @mode: timer mode
+ *
+ * Make the current task sleep until the given expiry time has
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * The @delta argument gives the kernel the freedom to schedule the
+ * actual wakeup to a time that is both power and performance friendly
+ * for regular (non RT/DL) tasks.
+ * The kernel give the normal best effort behavior for "@expires+@delta",
+ * but may decide to fire the timer earlier, but no earlier than @expires.
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
+ * pass before the routine returns unless the current task is explicitly
+ * woken up, (e.g. by wake_up_process()).
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task or the current task is explicitly woken
+ * up.
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Returns: 0 when the timer has expired. If the task was woken before the
+ * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
+ * by an explicit wakeup, it returns -EINTR.
+ */
+int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
+ const enum hrtimer_mode mode)
+{
+ return schedule_hrtimeout_range_clock(expires, delta, mode,
+ CLOCK_MONOTONIC);
+}
+EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
+
+/**
+ * schedule_hrtimeout - sleep until timeout
+ * @expires: timeout value (ktime_t)
+ * @mode: timer mode
+ *
+ * See schedule_hrtimeout_range() for details. @delta argument of
+ * schedule_hrtimeout_range() is set to 0 and has therefore no impact.
+ */
+int __sched schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode)
+{
+ return schedule_hrtimeout_range(expires, 0, mode);
+}
+EXPORT_SYMBOL_GPL(schedule_hrtimeout);
+
+/**
+ * msleep - sleep safely even with waitqueue interruptions
+ * @msecs: Requested sleep duration in milliseconds
+ *
+ * msleep() uses jiffy based timeouts for the sleep duration. Because of the
+ * design of the timer wheel, the maximum additional percentage delay (slack) is
+ * 12.5%. This is only valid for timers which will end up in level 1 or a higher
+ * level of the timer wheel. For explanation of those 12.5% please check the
+ * detailed description about the basics of the timer wheel.
+ *
+ * The slack of timers which will end up in level 0 depends on sleep duration
+ * (msecs) and HZ configuration and can be calculated in the following way (with
+ * the timer wheel design restriction that the slack is not less than 12.5%):
+ *
+ * ``slack = MSECS_PER_TICK / msecs``
+ *
+ * When the allowed slack of the callsite is known, the calculation could be
+ * turned around to find the minimal allowed sleep duration to meet the
+ * constraints. For example:
+ *
+ * * ``HZ=1000`` with ``slack=25%``: ``MSECS_PER_TICK / slack = 1 / (1/4) = 4``:
+ * all sleep durations greater or equal 4ms will meet the constraints.
+ * * ``HZ=1000`` with ``slack=12.5%``: ``MSECS_PER_TICK / slack = 1 / (1/8) = 8``:
+ * all sleep durations greater or equal 8ms will meet the constraints.
+ * * ``HZ=250`` with ``slack=25%``: ``MSECS_PER_TICK / slack = 4 / (1/4) = 16``:
+ * all sleep durations greater or equal 16ms will meet the constraints.
+ * * ``HZ=250`` with ``slack=12.5%``: ``MSECS_PER_TICK / slack = 4 / (1/8) = 32``:
+ * all sleep durations greater or equal 32ms will meet the constraints.
+ *
+ * See also the signal aware variant msleep_interruptible().
+ */
+void msleep(unsigned int msecs)
+{
+ unsigned long timeout = msecs_to_jiffies(msecs);
+
+ while (timeout)
+ timeout = schedule_timeout_uninterruptible(timeout);
+}
+EXPORT_SYMBOL(msleep);
+
+/**
+ * msleep_interruptible - sleep waiting for signals
+ * @msecs: Requested sleep duration in milliseconds
+ *
+ * See msleep() for some basic information.
+ *
+ * The difference between msleep() and msleep_interruptible() is that the sleep
+ * could be interrupted by a signal delivery and then returns early.
+ *
+ * Returns: The remaining time of the sleep duration transformed to msecs (see
+ * schedule_timeout() for details).
+ */
+unsigned long msleep_interruptible(unsigned int msecs)
+{
+ unsigned long timeout = msecs_to_jiffies(msecs);
+
+ while (timeout && !signal_pending(current))
+ timeout = schedule_timeout_interruptible(timeout);
+ return jiffies_to_msecs(timeout);
+}
+EXPORT_SYMBOL(msleep_interruptible);
+
+/**
+ * usleep_range_state - Sleep for an approximate time in a given state
+ * @min: Minimum time in usecs to sleep
+ * @max: Maximum time in usecs to sleep
+ * @state: State of the current task that will be while sleeping
+ *
+ * usleep_range_state() sleeps at least for the minimum specified time but not
+ * longer than the maximum specified amount of time. The range might reduce
+ * power usage by allowing hrtimers to coalesce an already scheduled interrupt
+ * with this hrtimer. In the worst case, an interrupt is scheduled for the upper
+ * bound.
+ *
+ * The sleeping task is set to the specified state before starting the sleep.
+ *
+ * In non-atomic context where the exact wakeup time is flexible, use
+ * usleep_range() or its variants instead of udelay(). The sleep improves
+ * responsiveness by avoiding the CPU-hogging busy-wait of udelay().
+ */
+void __sched usleep_range_state(unsigned long min, unsigned long max, unsigned int state)
+{
+ ktime_t exp = ktime_add_us(ktime_get(), min);
+ u64 delta = (u64)(max - min) * NSEC_PER_USEC;
+
+ if (WARN_ON_ONCE(max < min))
+ delta = 0;
+
+ for (;;) {
+ __set_current_state(state);
+ /* Do not return before the requested sleep time has elapsed */
+ if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
+ break;
+ }
+}
+EXPORT_SYMBOL(usleep_range_state);
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 5f2105e637bd..faac36de35b9 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -25,6 +25,7 @@ extern int tick_do_timer_cpu __read_mostly;
extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
extern void tick_handle_periodic(struct clock_event_device *dev);
extern void tick_check_new_device(struct clock_event_device *dev);
+extern void tick_offline_cpu(unsigned int cpu);
extern void tick_shutdown(unsigned int cpu);
extern void tick_suspend(void);
extern void tick_resume(void);
@@ -142,10 +143,8 @@ static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_
#endif /* !(BROADCAST && ONESHOT) */
#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU)
-extern void tick_offline_cpu(unsigned int cpu);
extern void tick_broadcast_offline(unsigned int cpu);
#else
-static inline void tick_offline_cpu(unsigned int cpu) { }
static inline void tick_broadcast_offline(unsigned int cpu) { }
#endif
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f203f000da1a..fa058510af9c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -311,14 +311,6 @@ static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer)
return HRTIMER_RESTART;
}
-static void tick_sched_timer_cancel(struct tick_sched *ts)
-{
- if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
- hrtimer_cancel(&ts->sched_timer);
- else if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
- tick_program_event(KTIME_MAX, 1);
-}
-
#ifdef CONFIG_NO_HZ_FULL
cpumask_var_t tick_nohz_full_mask;
EXPORT_SYMBOL_GPL(tick_nohz_full_mask);
@@ -865,7 +857,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
static inline bool local_timer_softirq_pending(void)
{
- return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
+ return local_timers_pending() & BIT(TIMER_SOFTIRQ);
}
/*
@@ -1061,7 +1053,10 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
* the tick timer.
*/
if (unlikely(expires == KTIME_MAX)) {
- tick_sched_timer_cancel(ts);
+ if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
+ hrtimer_cancel(&ts->sched_timer);
+ else
+ tick_program_event(KTIME_MAX, 1);
return;
}
@@ -1610,21 +1605,13 @@ void tick_setup_sched_timer(bool hrtimer)
*/
void tick_sched_timer_dying(int cpu)
{
- struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
- struct clock_event_device *dev = td->evtdev;
ktime_t idle_sleeptime, iowait_sleeptime;
unsigned long idle_calls, idle_sleeps;
/* This must happen before hrtimers are migrated! */
- tick_sched_timer_cancel(ts);
-
- /*
- * If the clockevents doesn't support CLOCK_EVT_STATE_ONESHOT_STOPPED,
- * make sure not to call low-res tick handler.
- */
- if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
- dev->event_handler = clockevents_handle_noop;
+ if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
+ hrtimer_cancel(&ts->sched_timer);
idle_sleeptime = ts->idle_sleeptime;
iowait_sleeptime = ts->iowait_sleeptime;
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 642647f5046b..1b69caa87480 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -556,9 +556,9 @@ EXPORT_SYMBOL(ns_to_timespec64);
* - all other values are converted to jiffies by either multiplying
* the input value by a factor or dividing it with a factor and
* handling any 32-bit overflows.
- * for the details see __msecs_to_jiffies()
+ * for the details see _msecs_to_jiffies()
*
- * __msecs_to_jiffies() checks for the passed in value being a constant
+ * msecs_to_jiffies() checks for the passed in value being a constant
* via __builtin_constant_p() allowing gcc to eliminate most of the
* code, __msecs_to_jiffies() is called if the value passed does not
* allow constant folding and the actual conversion must be done at
@@ -866,7 +866,7 @@ struct timespec64 timespec64_add_safe(const struct timespec64 lhs,
*
* Handles compat or 32-bit modes.
*
- * Return: %0 on success or negative errno on error
+ * Return: 0 on success or negative errno on error
*/
int get_timespec64(struct timespec64 *ts,
const struct __kernel_timespec __user *uts)
@@ -897,7 +897,7 @@ EXPORT_SYMBOL_GPL(get_timespec64);
* @ts: input &struct timespec64
* @uts: user's &struct __kernel_timespec
*
- * Return: %0 on success or negative errno on error
+ * Return: 0 on success or negative errno on error
*/
int put_timespec64(const struct timespec64 *ts,
struct __kernel_timespec __user *uts)
@@ -944,7 +944,7 @@ static int __put_old_timespec32(const struct timespec64 *ts64,
*
* Handles X86_X32_ABI compatibility conversion.
*
- * Return: %0 on success or negative errno on error
+ * Return: 0 on success or negative errno on error
*/
int get_old_timespec32(struct timespec64 *ts, const void __user *uts)
{
@@ -963,7 +963,7 @@ EXPORT_SYMBOL_GPL(get_old_timespec32);
*
* Handles X86_X32_ABI compatibility conversion.
*
- * Return: %0 on success or negative errno on error
+ * Return: 0 on success or negative errno on error
*/
int put_old_timespec32(const struct timespec64 *ts, void __user *uts)
{
@@ -979,7 +979,7 @@ EXPORT_SYMBOL_GPL(put_old_timespec32);
* @it: destination &struct itimerspec64
* @uit: user's &struct __kernel_itimerspec
*
- * Return: %0 on success or negative errno on error
+ * Return: 0 on success or negative errno on error
*/
int get_itimerspec64(struct itimerspec64 *it,
const struct __kernel_itimerspec __user *uit)
@@ -1002,7 +1002,7 @@ EXPORT_SYMBOL_GPL(get_itimerspec64);
* @it: input &struct itimerspec64
* @uit: user's &struct __kernel_itimerspec
*
- * Return: %0 on success or negative errno on error
+ * Return: 0 on success or negative errno on error
*/
int put_itimerspec64(const struct itimerspec64 *it,
struct __kernel_itimerspec __user *uit)
@@ -1024,7 +1024,7 @@ EXPORT_SYMBOL_GPL(put_itimerspec64);
* @its: destination &struct itimerspec64
* @uits: user's &struct old_itimerspec32
*
- * Return: %0 on success or negative errno on error
+ * Return: 0 on success or negative errno on error
*/
int get_old_itimerspec32(struct itimerspec64 *its,
const struct old_itimerspec32 __user *uits)
@@ -1043,7 +1043,7 @@ EXPORT_SYMBOL_GPL(get_old_itimerspec32);
* @its: input &struct itimerspec64
* @uits: user's &struct old_itimerspec32
*
- * Return: %0 on success or negative errno on error
+ * Return: 0 on success or negative errno on error
*/
int put_old_itimerspec32(const struct itimerspec64 *its,
struct old_itimerspec32 __user *uits)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 7e6f409bf311..0ca85ff4fbb4 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -30,8 +30,9 @@
#include "timekeeping_internal.h"
#define TK_CLEAR_NTP (1 << 0)
-#define TK_MIRROR (1 << 1)
-#define TK_CLOCK_WAS_SET (1 << 2)
+#define TK_CLOCK_WAS_SET (1 << 1)
+
+#define TK_UPDATE_ALL (TK_CLEAR_NTP | TK_CLOCK_WAS_SET)
enum timekeeping_adv_mode {
/* Update timekeeper when a tick has passed */
@@ -41,20 +42,18 @@ enum timekeeping_adv_mode {
TK_ADV_FREQ
};
-DEFINE_RAW_SPINLOCK(timekeeper_lock);
-
/*
* The most important data for readout fits into a single 64 byte
* cache line.
*/
-static struct {
+struct tk_data {
seqcount_raw_spinlock_t seq;
struct timekeeper timekeeper;
-} tk_core ____cacheline_aligned = {
- .seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_core.seq, &timekeeper_lock),
-};
+ struct timekeeper shadow_timekeeper;
+ raw_spinlock_t lock;
+} ____cacheline_aligned;
-static struct timekeeper shadow_timekeeper;
+static struct tk_data tk_core;
/* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended;
@@ -114,6 +113,36 @@ static struct tk_fast tk_fast_raw ____cacheline_aligned = {
.base[1] = FAST_TK_INIT,
};
+unsigned long timekeeper_lock_irqsave(void)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&tk_core.lock, flags);
+ return flags;
+}
+
+void timekeeper_unlock_irqrestore(unsigned long flags)
+{
+ raw_spin_unlock_irqrestore(&tk_core.lock, flags);
+}
+
+/*
+ * Multigrain timestamps require tracking the latest fine-grained timestamp
+ * that has been issued, and never returning a coarse-grained timestamp that is
+ * earlier than that value.
+ *
+ * mg_floor represents the latest fine-grained time that has been handed out as
+ * a file timestamp on the system. This is tracked as a monotonic ktime_t, and
+ * converted to a realtime clock value on an as-needed basis.
+ *
+ * Maintaining mg_floor ensures the multigrain interfaces never issue a
+ * timestamp earlier than one that has been previously issued.
+ *
+ * The exception to this rule is when there is a backward realtime clock jump. If
+ * such an event occurs, a timestamp can appear to be earlier than a previous one.
+ */
+static __cacheline_aligned_in_smp atomic64_t mg_floor;
+
static inline void tk_normalize_xtime(struct timekeeper *tk)
{
while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
@@ -161,13 +190,15 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
WARN_ON_ONCE(tk->offs_real != timespec64_to_ktime(tmp));
tk->wall_to_monotonic = wtm;
set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
- tk->offs_real = timespec64_to_ktime(tmp);
- tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
+ /* Paired with READ_ONCE() in ktime_mono_to_any() */
+ WRITE_ONCE(tk->offs_real, timespec64_to_ktime(tmp));
+ WRITE_ONCE(tk->offs_tai, ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0)));
}
static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
{
- tk->offs_boot = ktime_add(tk->offs_boot, delta);
+ /* Paired with READ_ONCE() in ktime_mono_to_any() */
+ WRITE_ONCE(tk->offs_boot, ktime_add(tk->offs_boot, delta));
/*
* Timespec representation for VDSO update to avoid 64bit division
* on every update.
@@ -184,7 +215,7 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
* the tkr's clocksource may change between the read reference, and the
* clock reference passed to the read function. This can cause crashes if
* the wrong clocksource is passed to the wrong read function.
- * This isn't necessary to use when holding the timekeeper_lock or doing
+ * This isn't necessary to use when holding the tk_core.lock or doing
* a read of the fast-timekeeper tkrs (which is protected by its own locking
* and update logic).
*/
@@ -195,97 +226,6 @@ static inline u64 tk_clock_read(const struct tk_read_base *tkr)
return clock->read(clock);
}
-#ifdef CONFIG_DEBUG_TIMEKEEPING
-#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
-
-static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
-{
-
- u64 max_cycles = tk->tkr_mono.clock->max_cycles;
- const char *name = tk->tkr_mono.clock->name;
-
- if (offset > max_cycles) {
- printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
- offset, name, max_cycles);
- printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
- } else {
- if (offset > (max_cycles >> 1)) {
- printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n",
- offset, name, max_cycles >> 1);
- printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
- }
- }
-
- if (tk->underflow_seen) {
- if (jiffies - tk->last_warning > WARNING_FREQ) {
- printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
- printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
- printk_deferred(" Your kernel is probably still fine.\n");
- tk->last_warning = jiffies;
- }
- tk->underflow_seen = 0;
- }
-
- if (tk->overflow_seen) {
- if (jiffies - tk->last_warning > WARNING_FREQ) {
- printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
- printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
- printk_deferred(" Your kernel is probably still fine.\n");
- tk->last_warning = jiffies;
- }
- tk->overflow_seen = 0;
- }
-}
-
-static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles);
-
-static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr)
-{
- struct timekeeper *tk = &tk_core.timekeeper;
- u64 now, last, mask, max, delta;
- unsigned int seq;
-
- /*
- * Since we're called holding a seqcount, the data may shift
- * under us while we're doing the calculation. This can cause
- * false positives, since we'd note a problem but throw the
- * results away. So nest another seqcount here to atomically
- * grab the points we are checking with.
- */
- do {
- seq = read_seqcount_begin(&tk_core.seq);
- now = tk_clock_read(tkr);
- last = tkr->cycle_last;
- mask = tkr->mask;
- max = tkr->clock->max_cycles;
- } while (read_seqcount_retry(&tk_core.seq, seq));
-
- delta = clocksource_delta(now, last, mask);
-
- /*
- * Try to catch underflows by checking if we are seeing small
- * mask-relative negative values.
- */
- if (unlikely((~delta & mask) < (mask >> 3)))
- tk->underflow_seen = 1;
-
- /* Check for multiplication overflows */
- if (unlikely(delta > max))
- tk->overflow_seen = 1;
-
- /* timekeeping_cycles_to_ns() handles both under and overflow */
- return timekeeping_cycles_to_ns(tkr, now);
-}
-#else
-static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
-{
-}
-static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr)
-{
- BUG();
-}
-#endif
-
/**
* tk_setup_internals - Set up internals to use clocksource clock.
*
@@ -390,19 +330,11 @@ static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 c
return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift;
}
-static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr)
+static __always_inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
{
return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr));
}
-static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
-{
- if (IS_ENABLED(CONFIG_DEBUG_TIMEKEEPING))
- return timekeeping_debug_get_ns(tkr);
-
- return __timekeeping_get_ns(tkr);
-}
-
/**
* update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
* @tkr: Timekeeping readout base from which we take the update
@@ -411,7 +343,7 @@ static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
* We want to use this from any context including NMI and tracing /
* instrumenting the timekeeping code itself.
*
- * Employ the latch technique; see @raw_write_seqcount_latch.
+ * Employ the latch technique; see @write_seqcount_latch.
*
* So if a NMI hits the update of base[0] then it will use base[1]
* which is still consistent. In the worst case this can result is a
@@ -424,16 +356,18 @@ static void update_fast_timekeeper(const struct tk_read_base *tkr,
struct tk_read_base *base = tkf->base;
/* Force readers off to base[1] */
- raw_write_seqcount_latch(&tkf->seq);
+ write_seqcount_latch_begin(&tkf->seq);
/* Update base[0] */
memcpy(base, tkr, sizeof(*base));
/* Force readers back to base[0] */
- raw_write_seqcount_latch(&tkf->seq);
+ write_seqcount_latch(&tkf->seq);
/* Update base[1] */
memcpy(base + 1, base, sizeof(*base));
+
+ write_seqcount_latch_end(&tkf->seq);
}
static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
@@ -443,11 +377,11 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
u64 now;
do {
- seq = raw_read_seqcount_latch(&tkf->seq);
+ seq = read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base);
- now += __timekeeping_get_ns(tkr);
- } while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
+ now += timekeeping_get_ns(tkr);
+ } while (read_seqcount_latch_retry(&tkf->seq, seq));
return now;
}
@@ -517,7 +451,7 @@ EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
* timekeeping_inject_sleeptime64()
* __timekeeping_inject_sleeptime(tk, delta);
* timestamp();
- * timekeeping_update(tk, TK_CLEAR_NTP...);
+ * timekeeping_update_staged(tkd, TK_CLEAR_NTP...);
*
* (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
* partially updated. Since the tk->offs_boot update is a rare event, this
@@ -562,7 +496,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
tkr = tkf->base + (seq & 0x01);
basem = ktime_to_ns(tkr->base);
baser = ktime_to_ns(tkr->base_real);
- delta = __timekeeping_get_ns(tkr);
+ delta = timekeeping_get_ns(tkr);
} while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
if (mono)
@@ -676,13 +610,11 @@ static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
int pvclock_gtod_register_notifier(struct notifier_block *nb)
{
struct timekeeper *tk = &tk_core.timekeeper;
- unsigned long flags;
int ret;
- raw_spin_lock_irqsave(&timekeeper_lock, flags);
+ guard(raw_spinlock_irqsave)(&tk_core.lock);
ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
update_pvclock_gtod(tk, true);
- raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
return ret;
}
@@ -695,14 +627,8 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
*/
int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
{
- unsigned long flags;
- int ret;
-
- raw_spin_lock_irqsave(&timekeeper_lock, flags);
- ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
- raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
-
- return ret;
+ guard(raw_spinlock_irqsave)(&tk_core.lock);
+ return raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
}
EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
@@ -718,6 +644,18 @@ static inline void tk_update_leap_state(struct timekeeper *tk)
}
/*
+ * Leap state update for both shadow and the real timekeeper
+ * Separate to spare a full memcpy() of the timekeeper.
+ */
+static void tk_update_leap_state_all(struct tk_data *tkd)
+{
+ write_seqcount_begin(&tkd->seq);
+ tk_update_leap_state(&tkd->shadow_timekeeper);
+ tkd->timekeeper.next_leap_ktime = tkd->shadow_timekeeper.next_leap_ktime;
+ write_seqcount_end(&tkd->seq);
+}
+
+/*
* Update the ktime_t based scalar nsec members of the timekeeper
*/
static inline void tk_update_ktime_data(struct timekeeper *tk)
@@ -750,9 +688,30 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
}
-/* must hold timekeeper_lock */
-static void timekeeping_update(struct timekeeper *tk, unsigned int action)
+/*
+ * Restore the shadow timekeeper from the real timekeeper.
+ */
+static void timekeeping_restore_shadow(struct tk_data *tkd)
{
+ lockdep_assert_held(&tkd->lock);
+ memcpy(&tkd->shadow_timekeeper, &tkd->timekeeper, sizeof(tkd->timekeeper));
+}
+
+static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int action)
+{
+ struct timekeeper *tk = &tk_core.shadow_timekeeper;
+
+ lockdep_assert_held(&tkd->lock);
+
+ /*
+ * Block out readers before running the updates below because that
+ * updates VDSO and other time related infrastructure. Not blocking
+ * the readers might let a reader see time going backwards when
+ * reading from the VDSO after the VDSO update and then reading in
+ * the kernel from the timekeeper before that got updated.
+ */
+ write_seqcount_begin(&tkd->seq);
+
if (action & TK_CLEAR_NTP) {
tk->ntp_error = 0;
ntp_clear();
@@ -770,14 +729,17 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
if (action & TK_CLOCK_WAS_SET)
tk->clock_was_set_seq++;
+
/*
- * The mirroring of the data to the shadow-timekeeper needs
- * to happen last here to ensure we don't over-write the
- * timekeeper structure on the next update with stale data
+ * Update the real timekeeper.
+ *
+ * We could avoid this memcpy() by switching pointers, but that has
+ * the downside that the reader side does not longer benefit from
+ * the cacheline optimized data layout of the timekeeper and requires
+ * another indirection.
*/
- if (action & TK_MIRROR)
- memcpy(&shadow_timekeeper, &tk_core.timekeeper,
- sizeof(tk_core.timekeeper));
+ memcpy(&tkd->timekeeper, tk, sizeof(*tk));
+ write_seqcount_end(&tkd->seq);
}
/**
@@ -930,6 +892,14 @@ ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
unsigned int seq;
ktime_t tconv;
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ /*
+ * Paired with WRITE_ONCE()s in tk_set_wall_to_mono() and
+ * tk_update_sleep_time().
+ */
+ return ktime_add(tmono, READ_ONCE(*offset));
+ }
+
do {
seq = read_seqcount_begin(&tk_core.seq);
tconv = ktime_add(tmono, *offset);
@@ -1060,6 +1030,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
unsigned int seq;
ktime_t base_raw;
ktime_t base_real;
+ ktime_t base_boot;
u64 nsec_raw;
u64 nsec_real;
u64 now;
@@ -1074,6 +1045,8 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
base_real = ktime_add(tk->tkr_mono.base,
tk_core.timekeeper.offs_real);
+ base_boot = ktime_add(tk->tkr_mono.base,
+ tk_core.timekeeper.offs_boot);
base_raw = tk->tkr_raw.base;
nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now);
nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
@@ -1081,6 +1054,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
systime_snapshot->cycles = now;
systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
+ systime_snapshot->boot = ktime_add_ns(base_boot, nsec_real);
systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
}
EXPORT_SYMBOL_GPL(ktime_get_snapshot);
@@ -1440,45 +1414,35 @@ EXPORT_SYMBOL_GPL(timekeeping_clocksource_has_base);
*/
int do_settimeofday64(const struct timespec64 *ts)
{
- struct timekeeper *tk = &tk_core.timekeeper;
struct timespec64 ts_delta, xt;
- unsigned long flags;
- int ret = 0;
if (!timespec64_valid_settod(ts))
return -EINVAL;
- raw_spin_lock_irqsave(&timekeeper_lock, flags);
- write_seqcount_begin(&tk_core.seq);
+ scoped_guard (raw_spinlock_irqsave, &tk_core.lock) {
+ struct timekeeper *tks = &tk_core.shadow_timekeeper;
- timekeeping_forward_now(tk);
+ timekeeping_forward_now(tks);
- xt = tk_xtime(tk);
- ts_delta = timespec64_sub(*ts, xt);
+ xt = tk_xtime(tks);
+ ts_delta = timespec64_sub(*ts, xt);
- if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) {
- ret = -EINVAL;
- goto out;
- }
-
- tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts_delta));
-
- tk_set_xtime(tk, ts);
-out:
- timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
+ if (timespec64_compare(&tks->wall_to_monotonic, &ts_delta) > 0) {
+ timekeeping_restore_shadow(&tk_core);
+ return -EINVAL;
+ }
- write_seqcount_end(&tk_core.seq);
- raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+ tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, ts_delta));
+ tk_set_xtime(tks, ts);
+ timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
+ }
/* Signal hrtimers about time change */
clock_was_set(CLOCK_SET_WALL);
- if (!ret) {
- audit_tk_injoffset(ts_delta);
- add_device_randomness(ts, sizeof(*ts));
- }
-
- return ret;
+ audit_tk_injoffset(ts_delta);
+ add_device_randomness(ts, sizeof(*ts));
+ return 0;
}
EXPORT_SYMBOL(do_settimeofday64);
@@ -1490,40 +1454,31 @@ EXPORT_SYMBOL(do_settimeofday64);
*/
static int timekeeping_inject_offset(const struct timespec64 *ts)
{
- struct timekeeper *tk = &tk_core.timekeeper;
- unsigned long flags;
- struct timespec64 tmp;
- int ret = 0;
-
if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
- raw_spin_lock_irqsave(&timekeeper_lock, flags);
- write_seqcount_begin(&tk_core.seq);
-
- timekeeping_forward_now(tk);
-
- /* Make sure the proposed value is valid */
- tmp = timespec64_add(tk_xtime(tk), *ts);
- if (timespec64_compare(&tk->wall_to_monotonic, ts) > 0 ||
- !timespec64_valid_settod(&tmp)) {
- ret = -EINVAL;
- goto error;
- }
+ scoped_guard (raw_spinlock_irqsave, &tk_core.lock) {
+ struct timekeeper *tks = &tk_core.shadow_timekeeper;
+ struct timespec64 tmp;
- tk_xtime_add(tk, ts);
- tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *ts));
+ timekeeping_forward_now(tks);
-error: /* even if we error out, we forwarded the time, so call update */
- timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
+ /* Make sure the proposed value is valid */
+ tmp = timespec64_add(tk_xtime(tks), *ts);
+ if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 ||
+ !timespec64_valid_settod(&tmp)) {
+ timekeeping_restore_shadow(&tk_core);
+ return -EINVAL;
+ }
- write_seqcount_end(&tk_core.seq);
- raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+ tk_xtime_add(tks, ts);
+ tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, *ts));
+ timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
+ }
/* Signal hrtimers about time change */
clock_was_set(CLOCK_SET_WALL);
-
- return ret;
+ return 0;
}
/*
@@ -1576,43 +1531,34 @@ static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
*/
static int change_clocksource(void *data)
{
- struct timekeeper *tk = &tk_core.timekeeper;
- struct clocksource *new, *old = NULL;
- unsigned long flags;
- bool change = false;
-
- new = (struct clocksource *) data;
+ struct clocksource *new = data, *old = NULL;
/*
- * If the cs is in module, get a module reference. Succeeds
- * for built-in code (owner == NULL) as well.
+ * If the clocksource is in a module, get a module reference.
+ * Succeeds for built-in code (owner == NULL) as well. Abort if the
+ * reference can't be acquired.
*/
- if (try_module_get(new->owner)) {
- if (!new->enable || new->enable(new) == 0)
- change = true;
- else
- module_put(new->owner);
- }
-
- raw_spin_lock_irqsave(&timekeeper_lock, flags);
- write_seqcount_begin(&tk_core.seq);
-
- timekeeping_forward_now(tk);
+ if (!try_module_get(new->owner))
+ return 0;
- if (change) {
- old = tk->tkr_mono.clock;
- tk_setup_internals(tk, new);
+ /* Abort if the device can't be enabled */
+ if (new->enable && new->enable(new) != 0) {
+ module_put(new->owner);
+ return 0;
}
- timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
+ scoped_guard (raw_spinlock_irqsave, &tk_core.lock) {
+ struct timekeeper *tks = &tk_core.shadow_timekeeper;
- write_seqcount_end(&tk_core.seq);
- raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+ timekeeping_forward_now(tks);
+ old = tks->tkr_mono.clock;
+ tk_setup_internals(tks, new);
+ timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
+ }
if (old) {
if (old->disable)
old->disable(old);
-
module_put(old->owner);
}
@@ -1737,6 +1683,12 @@ read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
*boot_offset = ns_to_timespec64(local_clock());
}
+static __init void tkd_basic_setup(struct tk_data *tkd)
+{
+ raw_spin_lock_init(&tkd->lock);
+ seqcount_raw_spinlock_init(&tkd->seq, &tkd->lock);
+}
+
/*
* Flag reflecting whether timekeeping_resume() has injected sleeptime.
*
@@ -1761,9 +1713,10 @@ static bool persistent_clock_exists;
void __init timekeeping_init(void)
{
struct timespec64 wall_time, boot_offset, wall_to_mono;
- struct timekeeper *tk = &tk_core.timekeeper;
+ struct timekeeper *tks = &tk_core.shadow_timekeeper;
struct clocksource *clock;
- unsigned long flags;
+
+ tkd_basic_setup(&tk_core);
read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
if (timespec64_valid_settod(&wall_time) &&
@@ -1783,24 +1736,21 @@ void __init timekeeping_init(void)
*/
wall_to_mono = timespec64_sub(boot_offset, wall_time);
- raw_spin_lock_irqsave(&timekeeper_lock, flags);
- write_seqcount_begin(&tk_core.seq);
+ guard(raw_spinlock_irqsave)(&tk_core.lock);
+
ntp_init();
clock = clocksource_default_clock();
if (clock->enable)
clock->enable(clock);
- tk_setup_internals(tk, clock);
+ tk_setup_internals(tks, clock);
- tk_set_xtime(tk, &wall_time);
- tk->raw_sec = 0;
+ tk_set_xtime(tks, &wall_time);
+ tks->raw_sec = 0;
- tk_set_wall_to_mono(tk, wall_to_mono);
+ tk_set_wall_to_mono(tks, wall_to_mono);
- timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
-
- write_seqcount_end(&tk_core.seq);
- raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+ timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET);
}
/* time in seconds when suspend began for persistent clock */
@@ -1878,22 +1828,14 @@ bool timekeeping_rtc_skipsuspend(void)
*/
void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
{
- struct timekeeper *tk = &tk_core.timekeeper;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&timekeeper_lock, flags);
- write_seqcount_begin(&tk_core.seq);
-
- suspend_timing_needed = false;
+ scoped_guard(raw_spinlock_irqsave, &tk_core.lock) {
+ struct timekeeper *tks = &tk_core.shadow_timekeeper;
- timekeeping_forward_now(tk);
-
- __timekeeping_inject_sleeptime(tk, delta);
-
- timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
-
- write_seqcount_end(&tk_core.seq);
- raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+ suspend_timing_needed = false;
+ timekeeping_forward_now(tks);
+ __timekeeping_inject_sleeptime(tks, delta);
+ timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL);
+ }
/* Signal hrtimers about time change */
clock_was_set(CLOCK_SET_WALL | CLOCK_SET_BOOT);
@@ -1905,20 +1847,19 @@ void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
*/
void timekeeping_resume(void)
{
- struct timekeeper *tk = &tk_core.timekeeper;
- struct clocksource *clock = tk->tkr_mono.clock;
- unsigned long flags;
+ struct timekeeper *tks = &tk_core.shadow_timekeeper;
+ struct clocksource *clock = tks->tkr_mono.clock;
struct timespec64 ts_new, ts_delta;
- u64 cycle_now, nsec;
bool inject_sleeptime = false;
+ u64 cycle_now, nsec;
+ unsigned long flags;
read_persistent_clock64(&ts_new);
clockevents_resume();
clocksource_resume();
- raw_spin_lock_irqsave(&timekeeper_lock, flags);
- write_seqcount_begin(&tk_core.seq);
+ raw_spin_lock_irqsave(&tk_core.lock, flags);
/*
* After system resumes, we need to calculate the suspended time and
@@ -1932,7 +1873,7 @@ void timekeeping_resume(void)
* The less preferred source will only be tried if there is no better
* usable source. The rtc part is handled separately in rtc core code.
*/
- cycle_now = tk_clock_read(&tk->tkr_mono);
+ cycle_now = tk_clock_read(&tks->tkr_mono);
nsec = clocksource_stop_suspend_timing(clock, cycle_now);
if (nsec > 0) {
ts_delta = ns_to_timespec64(nsec);
@@ -1944,18 +1885,17 @@ void timekeeping_resume(void)
if (inject_sleeptime) {
suspend_timing_needed = false;
- __timekeeping_inject_sleeptime(tk, &ts_delta);
+ __timekeeping_inject_sleeptime(tks, &ts_delta);
}
/* Re-base the last cycle value */
- tk->tkr_mono.cycle_last = cycle_now;
- tk->tkr_raw.cycle_last = cycle_now;
+ tks->tkr_mono.cycle_last = cycle_now;
+ tks->tkr_raw.cycle_last = cycle_now;
- tk->ntp_error = 0;
+ tks->ntp_error = 0;
timekeeping_suspended = 0;
- timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
- write_seqcount_end(&tk_core.seq);
- raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+ timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET);
+ raw_spin_unlock_irqrestore(&tk_core.lock, flags);
touch_softlockup_watchdog();
@@ -1967,11 +1907,11 @@ void timekeeping_resume(void)
int timekeeping_suspend(void)
{
- struct timekeeper *tk = &tk_core.timekeeper;
- unsigned long flags;
- struct timespec64 delta, delta_delta;
- static struct timespec64 old_delta;
+ struct timekeeper *tks = &tk_core.shadow_timekeeper;
+ struct timespec64 delta, delta_delta;
+ static struct timespec64 old_delta;
struct clocksource *curr_clock;
+ unsigned long flags;
u64 cycle_now;
read_persistent_clock64(&timekeeping_suspend_time);
@@ -1986,9 +1926,8 @@ int timekeeping_suspend(void)
suspend_timing_needed = true;
- raw_spin_lock_irqsave(&timekeeper_lock, flags);
- write_seqcount_begin(&tk_core.seq);
- timekeeping_forward_now(tk);
+ raw_spin_lock_irqsave(&tk_core.lock, flags);
+ timekeeping_forward_now(tks);
timekeeping_suspended = 1;
/*
@@ -1996,8 +1935,8 @@ int timekeeping_suspend(void)
* just read from the current clocksource. Save this to potentially
* use in suspend timing.
*/
- curr_clock = tk->tkr_mono.clock;
- cycle_now = tk->tkr_mono.cycle_last;
+ curr_clock = tks->tkr_mono.clock;
+ cycle_now = tks->tkr_mono.cycle_last;
clocksource_start_suspend_timing(curr_clock, cycle_now);
if (persistent_clock_exists) {
@@ -2007,7 +1946,7 @@ int timekeeping_suspend(void)
* try to compensate so the difference in system time
* and persistent_clock time stays close to constant.
*/
- delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
+ delta = timespec64_sub(tk_xtime(tks), timekeeping_suspend_time);
delta_delta = timespec64_sub(delta, old_delta);
if (abs(delta_delta.tv_sec) >= 2) {
/*
@@ -2022,10 +1961,9 @@ int timekeeping_suspend(void)
}
}
- timekeeping_update(tk, TK_MIRROR);
- halt_fast_timekeeper(tk);
- write_seqcount_end(&tk_core.seq);
- raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+ timekeeping_update_from_shadow(&tk_core, 0);
+ halt_fast_timekeeper(tks);
+ raw_spin_unlock_irqrestore(&tk_core.lock, flags);
tick_suspend();
clocksource_suspend();
@@ -2130,16 +2068,17 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
*/
static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
{
+ u64 ntp_tl = ntp_tick_length();
u32 mult;
/*
* Determine the multiplier from the current NTP tick length.
* Avoid expensive division when the tick length doesn't change.
*/
- if (likely(tk->ntp_tick == ntp_tick_length())) {
+ if (likely(tk->ntp_tick == ntp_tl)) {
mult = tk->tkr_mono.mult - tk->ntp_err_mult;
} else {
- tk->ntp_tick = ntp_tick_length();
+ tk->ntp_tick = ntp_tl;
mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) -
tk->xtime_remainder, tk->cycle_interval);
}
@@ -2278,28 +2217,24 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
*/
static bool timekeeping_advance(enum timekeeping_adv_mode mode)
{
+ struct timekeeper *tk = &tk_core.shadow_timekeeper;
struct timekeeper *real_tk = &tk_core.timekeeper;
- struct timekeeper *tk = &shadow_timekeeper;
- u64 offset;
- int shift = 0, maxshift;
unsigned int clock_set = 0;
- unsigned long flags;
+ int shift = 0, maxshift;
+ u64 offset;
- raw_spin_lock_irqsave(&timekeeper_lock, flags);
+ guard(raw_spinlock_irqsave)(&tk_core.lock);
/* Make sure we're fully resumed: */
if (unlikely(timekeeping_suspended))
- goto out;
+ return false;
offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
/* Check if there's really nothing to do */
if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
- goto out;
-
- /* Do some additional sanity checking */
- timekeeping_check_update(tk, offset);
+ return false;
/*
* With NO_HZ we may have to accumulate many cycle_intervals
@@ -2315,8 +2250,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode)
maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
shift = min(shift, maxshift);
while (offset >= tk->cycle_interval) {
- offset = logarithmic_accumulation(tk, offset, shift,
- &clock_set);
+ offset = logarithmic_accumulation(tk, offset, shift, &clock_set);
if (offset < tk->cycle_interval<<shift)
shift--;
}
@@ -2330,23 +2264,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode)
*/
clock_set |= accumulate_nsecs_to_secs(tk);
- write_seqcount_begin(&tk_core.seq);
- /*
- * Update the real timekeeper.
- *
- * We could avoid this memcpy by switching pointers, but that
- * requires changes to all other timekeeper usage sites as
- * well, i.e. move the timekeeper pointer getter into the
- * spinlocked/seqcount protected sections. And we trade this
- * memcpy under the tk_core.seq against one before we start
- * updating.
- */
- timekeeping_update(tk, clock_set);
- memcpy(real_tk, tk, sizeof(*tk));
- /* The memcpy must come last. Do not put anything here! */
- write_seqcount_end(&tk_core.seq);
-out:
- raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+ timekeeping_update_from_shadow(&tk_core, clock_set);
return !!clock_set;
}
@@ -2394,6 +2312,94 @@ void ktime_get_coarse_real_ts64(struct timespec64 *ts)
}
EXPORT_SYMBOL(ktime_get_coarse_real_ts64);
+/**
+ * ktime_get_coarse_real_ts64_mg - return latter of coarse grained time or floor
+ * @ts: timespec64 to be filled
+ *
+ * Fetch the global mg_floor value, convert it to realtime and compare it
+ * to the current coarse-grained time. Fill @ts with whichever is
+ * latest. Note that this is a filesystem-specific interface and should be
+ * avoided outside of that context.
+ */
+void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts)
+{
+ struct timekeeper *tk = &tk_core.timekeeper;
+ u64 floor = atomic64_read(&mg_floor);
+ ktime_t f_real, offset, coarse;
+ unsigned int seq;
+
+ do {
+ seq = read_seqcount_begin(&tk_core.seq);
+ *ts = tk_xtime(tk);
+ offset = tk_core.timekeeper.offs_real;
+ } while (read_seqcount_retry(&tk_core.seq, seq));
+
+ coarse = timespec64_to_ktime(*ts);
+ f_real = ktime_add(floor, offset);
+ if (ktime_after(f_real, coarse))
+ *ts = ktime_to_timespec64(f_real);
+}
+
+/**
+ * ktime_get_real_ts64_mg - attempt to update floor value and return result
+ * @ts: pointer to the timespec to be set
+ *
+ * Get a monotonic fine-grained time value and attempt to swap it into
+ * mg_floor. If that succeeds then accept the new floor value. If it fails
+ * then another task raced in during the interim time and updated the
+ * floor. Since any update to the floor must be later than the previous
+ * floor, either outcome is acceptable.
+ *
+ * Typically this will be called after calling ktime_get_coarse_real_ts64_mg(),
+ * and determining that the resulting coarse-grained timestamp did not effect
+ * a change in ctime. Any more recent floor value would effect a change to
+ * ctime, so there is no need to retry the atomic64_try_cmpxchg() on failure.
+ *
+ * @ts will be filled with the latest floor value, regardless of the outcome of
+ * the cmpxchg. Note that this is a filesystem specific interface and should be
+ * avoided outside of that context.
+ */
+void ktime_get_real_ts64_mg(struct timespec64 *ts)
+{
+ struct timekeeper *tk = &tk_core.timekeeper;
+ ktime_t old = atomic64_read(&mg_floor);
+ ktime_t offset, mono;
+ unsigned int seq;
+ u64 nsecs;
+
+ do {
+ seq = read_seqcount_begin(&tk_core.seq);
+
+ ts->tv_sec = tk->xtime_sec;
+ mono = tk->tkr_mono.base;
+ nsecs = timekeeping_get_ns(&tk->tkr_mono);
+ offset = tk_core.timekeeper.offs_real;
+ } while (read_seqcount_retry(&tk_core.seq, seq));
+
+ mono = ktime_add_ns(mono, nsecs);
+
+ /*
+ * Attempt to update the floor with the new time value. As any
+ * update must be later then the existing floor, and would effect
+ * a change to ctime from the perspective of the current task,
+ * accept the resulting floor value regardless of the outcome of
+ * the swap.
+ */
+ if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) {
+ ts->tv_nsec = 0;
+ timespec64_add_ns(ts, nsecs);
+ timekeeping_inc_mg_floor_swaps();
+ } else {
+ /*
+ * Another task changed mg_floor since "old" was fetched.
+ * "old" has been updated with the latest value of "mg_floor".
+ * That value is newer than the previous floor value, which
+ * is enough to effect a change to ctime. Accept it.
+ */
+ *ts = ktime_to_timespec64(ktime_add(old, offset));
+ }
+}
+
void ktime_get_coarse_ts64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
@@ -2551,13 +2557,10 @@ EXPORT_SYMBOL_GPL(random_get_entropy_fallback);
*/
int do_adjtimex(struct __kernel_timex *txc)
{
- struct timekeeper *tk = &tk_core.timekeeper;
struct audit_ntp_data ad;
bool offset_set = false;
bool clock_set = false;
struct timespec64 ts;
- unsigned long flags;
- s32 orig_tai, tai;
int ret;
/* Validate the data before disabling interrupts */
@@ -2568,6 +2571,7 @@ int do_adjtimex(struct __kernel_timex *txc)
if (txc->modes & ADJ_SETOFFSET) {
struct timespec64 delta;
+
delta.tv_sec = txc->time.tv_sec;
delta.tv_nsec = txc->time.tv_usec;
if (!(txc->modes & ADJ_NANO))
@@ -2585,21 +2589,21 @@ int do_adjtimex(struct __kernel_timex *txc)
ktime_get_real_ts64(&ts);
add_device_randomness(&ts, sizeof(ts));
- raw_spin_lock_irqsave(&timekeeper_lock, flags);
- write_seqcount_begin(&tk_core.seq);
+ scoped_guard (raw_spinlock_irqsave, &tk_core.lock) {
+ struct timekeeper *tks = &tk_core.shadow_timekeeper;
+ s32 orig_tai, tai;
- orig_tai = tai = tk->tai_offset;
- ret = __do_adjtimex(txc, &ts, &tai, &ad);
+ orig_tai = tai = tks->tai_offset;
+ ret = __do_adjtimex(txc, &ts, &tai, &ad);
- if (tai != orig_tai) {
- __timekeeping_set_tai_offset(tk, tai);
- timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
- clock_set = true;
+ if (tai != orig_tai) {
+ __timekeeping_set_tai_offset(tks, tai);
+ timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET);
+ clock_set = true;
+ } else {
+ tk_update_leap_state_all(&tk_core);
+ }
}
- tk_update_leap_state(tk);
-
- write_seqcount_end(&tk_core.seq);
- raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
audit_ntp_log(&ad);
@@ -2623,15 +2627,8 @@ int do_adjtimex(struct __kernel_timex *txc)
*/
void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&timekeeper_lock, flags);
- write_seqcount_begin(&tk_core.seq);
-
+ guard(raw_spinlock_irqsave)(&tk_core.lock);
__hardpps(phase_ts, raw_ts);
-
- write_seqcount_end(&tk_core.seq);
- raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
}
EXPORT_SYMBOL(hardpps);
#endif /* CONFIG_NTP_PPS */
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index b73e8850e58d..badeb222eab9 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -17,6 +17,9 @@
#define NUM_BINS 32
+/* Incremented every time mg_floor is updated */
+DEFINE_PER_CPU(unsigned long, timekeeping_mg_floor_swaps);
+
static unsigned int sleep_time_bin[NUM_BINS] = {0};
static int tk_debug_sleep_time_show(struct seq_file *s, void *data)
@@ -53,3 +56,13 @@ void tk_debug_account_sleep_time(const struct timespec64 *t)
(s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC);
}
+unsigned long timekeeping_get_mg_floor_swaps(void)
+{
+ unsigned long sum = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ sum += data_race(per_cpu(timekeeping_mg_floor_swaps, cpu));
+
+ return sum;
+}
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index 4ca2787d1642..63e600e943a7 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -10,12 +10,26 @@
* timekeeping debug functions
*/
#ifdef CONFIG_DEBUG_FS
+
+DECLARE_PER_CPU(unsigned long, timekeeping_mg_floor_swaps);
+
+static inline void timekeeping_inc_mg_floor_swaps(void)
+{
+ this_cpu_inc(timekeeping_mg_floor_swaps);
+}
+
extern void tk_debug_account_sleep_time(const struct timespec64 *t);
+
#else
+
#define tk_debug_account_sleep_time(x)
+
+static inline void timekeeping_inc_mg_floor_swaps(void)
+{
+}
+
#endif
-#ifdef CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE
static inline u64 clocksource_delta(u64 now, u64 last, u64 mask)
{
u64 ret = (now - last) & mask;
@@ -26,14 +40,9 @@ static inline u64 clocksource_delta(u64 now, u64 last, u64 mask)
*/
return ret & ~(mask >> 1) ? 0 : ret;
}
-#else
-static inline u64 clocksource_delta(u64 now, u64 last, u64 mask)
-{
- return (now - last) & mask;
-}
-#endif
/* Semi public for serialization of non timekeeper VDSO updates. */
-extern raw_spinlock_t timekeeper_lock;
+unsigned long timekeeper_lock_irqsave(void);
+void timekeeper_unlock_irqrestore(unsigned long flags);
#endif /* _TIMEKEEPING_INTERNAL_H */
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 0fc9d066a7be..a5860bf6d16f 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -37,7 +37,6 @@
#include <linux/tick.h>
#include <linux/kallsyms.h>
#include <linux/irq_work.h>
-#include <linux/sched/signal.h>
#include <linux/sched/sysctl.h>
#include <linux/sched/nohz.h>
#include <linux/sched/debug.h>
@@ -2422,7 +2421,8 @@ static inline void __run_timers(struct timer_base *base)
static void __run_timer_base(struct timer_base *base)
{
- if (time_before(jiffies, base->next_expiry))
+ /* Can race against a remote CPU updating next_expiry under the lock */
+ if (time_before(jiffies, READ_ONCE(base->next_expiry)))
return;
timer_base_lock_expiry(base);
@@ -2499,7 +2499,7 @@ static void run_local_timers(void)
*/
if (time_after_eq(jiffies, READ_ONCE(base->next_expiry)) ||
(i == BASE_DEF && tmigr_requires_handle_remote())) {
- raise_softirq(TIMER_SOFTIRQ);
+ raise_timer_softirq(TIMER_SOFTIRQ);
return;
}
}
@@ -2526,141 +2526,6 @@ void update_process_times(int user_tick)
run_posix_cpu_timers();
}
-/*
- * Since schedule_timeout()'s timer is defined on the stack, it must store
- * the target task on the stack as well.
- */
-struct process_timer {
- struct timer_list timer;
- struct task_struct *task;
-};
-
-static void process_timeout(struct timer_list *t)
-{
- struct process_timer *timeout = from_timer(timeout, t, timer);
-
- wake_up_process(timeout->task);
-}
-
-/**
- * schedule_timeout - sleep until timeout
- * @timeout: timeout value in jiffies
- *
- * Make the current task sleep until @timeout jiffies have elapsed.
- * The function behavior depends on the current task state
- * (see also set_current_state() description):
- *
- * %TASK_RUNNING - the scheduler is called, but the task does not sleep
- * at all. That happens because sched_submit_work() does nothing for
- * tasks in %TASK_RUNNING state.
- *
- * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
- * pass before the routine returns unless the current task is explicitly
- * woken up, (e.g. by wake_up_process()).
- *
- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task or the current task is explicitly woken
- * up.
- *
- * The current task state is guaranteed to be %TASK_RUNNING when this
- * routine returns.
- *
- * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
- * the CPU away without a bound on the timeout. In this case the return
- * value will be %MAX_SCHEDULE_TIMEOUT.
- *
- * Returns 0 when the timer has expired otherwise the remaining time in
- * jiffies will be returned. In all cases the return value is guaranteed
- * to be non-negative.
- */
-signed long __sched schedule_timeout(signed long timeout)
-{
- struct process_timer timer;
- unsigned long expire;
-
- switch (timeout)
- {
- case MAX_SCHEDULE_TIMEOUT:
- /*
- * These two special cases are useful to be comfortable
- * in the caller. Nothing more. We could take
- * MAX_SCHEDULE_TIMEOUT from one of the negative value
- * but I' d like to return a valid offset (>=0) to allow
- * the caller to do everything it want with the retval.
- */
- schedule();
- goto out;
- default:
- /*
- * Another bit of PARANOID. Note that the retval will be
- * 0 since no piece of kernel is supposed to do a check
- * for a negative retval of schedule_timeout() (since it
- * should never happens anyway). You just have the printk()
- * that will tell you if something is gone wrong and where.
- */
- if (timeout < 0) {
- printk(KERN_ERR "schedule_timeout: wrong timeout "
- "value %lx\n", timeout);
- dump_stack();
- __set_current_state(TASK_RUNNING);
- goto out;
- }
- }
-
- expire = timeout + jiffies;
-
- timer.task = current;
- timer_setup_on_stack(&timer.timer, process_timeout, 0);
- __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING);
- schedule();
- del_timer_sync(&timer.timer);
-
- /* Remove the timer from the object tracker */
- destroy_timer_on_stack(&timer.timer);
-
- timeout = expire - jiffies;
-
- out:
- return timeout < 0 ? 0 : timeout;
-}
-EXPORT_SYMBOL(schedule_timeout);
-
-/*
- * We can use __set_current_state() here because schedule_timeout() calls
- * schedule() unconditionally.
- */
-signed long __sched schedule_timeout_interruptible(signed long timeout)
-{
- __set_current_state(TASK_INTERRUPTIBLE);
- return schedule_timeout(timeout);
-}
-EXPORT_SYMBOL(schedule_timeout_interruptible);
-
-signed long __sched schedule_timeout_killable(signed long timeout)
-{
- __set_current_state(TASK_KILLABLE);
- return schedule_timeout(timeout);
-}
-EXPORT_SYMBOL(schedule_timeout_killable);
-
-signed long __sched schedule_timeout_uninterruptible(signed long timeout)
-{
- __set_current_state(TASK_UNINTERRUPTIBLE);
- return schedule_timeout(timeout);
-}
-EXPORT_SYMBOL(schedule_timeout_uninterruptible);
-
-/*
- * Like schedule_timeout_uninterruptible(), except this task will not contribute
- * to load average.
- */
-signed long __sched schedule_timeout_idle(signed long timeout)
-{
- __set_current_state(TASK_IDLE);
- return schedule_timeout(timeout);
-}
-EXPORT_SYMBOL(schedule_timeout_idle);
-
#ifdef CONFIG_HOTPLUG_CPU
static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *head)
{
@@ -2757,59 +2622,3 @@ void __init init_timers(void)
posix_cputimers_init_work();
open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
}
-
-/**
- * msleep - sleep safely even with waitqueue interruptions
- * @msecs: Time in milliseconds to sleep for
- */
-void msleep(unsigned int msecs)
-{
- unsigned long timeout = msecs_to_jiffies(msecs);
-
- while (timeout)
- timeout = schedule_timeout_uninterruptible(timeout);
-}
-
-EXPORT_SYMBOL(msleep);
-
-/**
- * msleep_interruptible - sleep waiting for signals
- * @msecs: Time in milliseconds to sleep for
- */
-unsigned long msleep_interruptible(unsigned int msecs)
-{
- unsigned long timeout = msecs_to_jiffies(msecs);
-
- while (timeout && !signal_pending(current))
- timeout = schedule_timeout_interruptible(timeout);
- return jiffies_to_msecs(timeout);
-}
-
-EXPORT_SYMBOL(msleep_interruptible);
-
-/**
- * usleep_range_state - Sleep for an approximate time in a given state
- * @min: Minimum time in usecs to sleep
- * @max: Maximum time in usecs to sleep
- * @state: State of the current task that will be while sleeping
- *
- * In non-atomic context where the exact wakeup time is flexible, use
- * usleep_range_state() instead of udelay(). The sleep improves responsiveness
- * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces
- * power usage by allowing hrtimers to take advantage of an already-
- * scheduled interrupt instead of scheduling a new one just for this sleep.
- */
-void __sched usleep_range_state(unsigned long min, unsigned long max,
- unsigned int state)
-{
- ktime_t exp = ktime_add_us(ktime_get(), min);
- u64 delta = (u64)(max - min) * NSEC_PER_USEC;
-
- for (;;) {
- __set_current_state(state);
- /* Do not return before the requested sleep time has elapsed */
- if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
- break;
- }
-}
-EXPORT_SYMBOL(usleep_range_state);
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c
index 9193d6133e5d..05d383143165 100644
--- a/kernel/time/vsyscall.c
+++ b/kernel/time/vsyscall.c
@@ -119,7 +119,7 @@ void update_vsyscall(struct timekeeper *tk)
if (clock_mode != VDSO_CLOCKMODE_NONE)
update_vdso_data(vdata, tk);
- __arch_update_vsyscall(vdata, tk);
+ __arch_update_vsyscall(vdata);
vdso_write_end(vdata);
@@ -151,9 +151,8 @@ void update_vsyscall_tz(void)
unsigned long vdso_update_begin(void)
{
struct vdso_data *vdata = __arch_get_k_vdso_data();
- unsigned long flags;
+ unsigned long flags = timekeeper_lock_irqsave();
- raw_spin_lock_irqsave(&timekeeper_lock, flags);
vdso_write_begin(vdata);
return flags;
}
@@ -172,5 +171,5 @@ void vdso_update_end(unsigned long flags)
vdso_write_end(vdata);
__arch_sync_vdso_data(vdata);
- raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+ timekeeper_unlock_irqrestore(flags);
}