From 184c6873874c350bfb0b74f9e08ec8d89750d603 Mon Sep 17 00:00:00 2001 From: Ned Bass Date: Fri, 1 Nov 2013 13:37:58 -0700 Subject: [PATCH] Emulate illumos interface cv_timedwait_hires() Needed for Illumos #3582. This interface is supposed to support a variable-resolution timeout with nanosecond granularity. This implementation rounds up to microsecond resolution, as nanosecond- precision timing is rarely needed for real-world performance tuning and may incur unnecessary busy-waiting. usleep_range() is used if available, otherwise udelay() or msleep() are used depending on the length of the delay interval. Add flags from sys/callo.h as these are used to control the behavior of cv_timedwait_hires(). Specifically, CALLOUT_FLAG_ABSOLUTE Normally, the expiration passed to the timeout API functions is an expiration interval. If this flag is specified, then it is interpreted as the expiration time itself. CALLOUT_FLAG_ROUNDUP Roundup the expiration time to the next resolution boundary. If this flag is not specified, the expiration time is rounded down. References: https://www.illumos.org/issues/3582 illumos/illumos-gate@0689f76 Signed-off-by: Brian Behlendorf Closes #304 --- config/spl-build.m4 | 23 ++++++++++ include/linux/Makefile.am | 1 + include/linux/delay_compat.h | 47 +++++++++++++++++++++ include/sys/Makefile.am | 1 + include/sys/callo.h | 52 +++++++++++++++++++++++ include/sys/condvar.h | 4 ++ include/sys/time.h | 3 ++ module/spl/spl-condvar.c | 81 ++++++++++++++++++++++++++++++++++++ 8 files changed, 212 insertions(+) create mode 100644 include/linux/delay_compat.h create mode 100644 include/sys/callo.h diff --git a/config/spl-build.m4 b/config/spl-build.m4 index f54c5b1..b0e3348 100644 --- a/config/spl-build.m4 +++ b/config/spl-build.m4 @@ -93,6 +93,7 @@ AC_DEFUN([SPL_AC_CONFIG_KERNEL], [ SPL_AC_RWSEM_SPINLOCK_IS_RAW SPL_AC_SCHED_RT_HEADER SPL_AC_2ARGS_VFS_GETATTR + SPL_AC_USLEEP_RANGE ]) AC_DEFUN([SPL_AC_MODULE_SYMVERS], [ @@ -2400,3 +2401,25 @@ AC_DEFUN([SPL_AC_2ARGS_VFS_GETATTR], [ ]) ]) ]) + +dnl # +dnl # 2.6.36 API compatibility. +dnl # Added usleep_range timer. +dnl # usleep_range is a finer precision implementation of msleep +dnl # designed to be a drop-in replacement for udelay where a precise +dnl # sleep / busy-wait is unnecessary. +dnl # +AC_DEFUN([SPL_AC_USLEEP_RANGE], [ + AC_MSG_CHECKING([whether usleep_range() is available]) + SPL_LINUX_TRY_COMPILE([ + #include + ],[ + usleep_range(0, 0); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_USLEEP_RANGE, 1, + [usleep_range is available]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/include/linux/Makefile.am b/include/linux/Makefile.am index 730f769..59f2ec5 100644 --- a/include/linux/Makefile.am +++ b/include/linux/Makefile.am @@ -3,6 +3,7 @@ COMMON_H = KERNEL_H = \ $(top_srcdir)/include/linux/bitops_compat.h \ $(top_srcdir)/include/linux/compiler_compat.h \ + $(top_srcdir)/include/linux/delay_compat.h \ $(top_srcdir)/include/linux/file_compat.h \ $(top_srcdir)/include/linux/kallsyms_compat.h \ $(top_srcdir)/include/linux/list_compat.h \ diff --git a/include/linux/delay_compat.h b/include/linux/delay_compat.h new file mode 100644 index 0000000..fc9ff66 --- /dev/null +++ b/include/linux/delay_compat.h @@ -0,0 +1,47 @@ +/*****************************************************************************\ + * Copyright (C) 2007-2013 Lawrence Livermore National Security, LLC. + * Copyright (C) 2007 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Brian Behlendorf . + * UCRL-CODE-235197 + * + * This file is part of the SPL, Solaris Porting Layer. + * For details, see . + * + * The SPL is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * The SPL is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with the SPL. If not, see . +\*****************************************************************************/ + +#ifndef _SPL_DELAY_COMPAT_H +#define _SPL_DELAY_COMPAT_H + +#include +#include + +/* usleep_range() introduced in 2.6.36 */ +#ifndef HAVE_USLEEP_RANGE + +static inline void +usleep_range(unsigned long min, unsigned long max) +{ + unsigned int min_ms = min / USEC_PER_MSEC; + + if (min >= MAX_UDELAY_MS) + msleep(min_ms); + else + udelay(min); +} + +#endif /* HAVE_USLEEP_RANGE */ + +#endif /* _SPL_DELAY_COMPAT_H */ diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am index 0e86a28..9d82636 100644 --- a/include/sys/Makefile.am +++ b/include/sys/Makefile.am @@ -13,6 +13,7 @@ KERNEL_H = \ $(top_srcdir)/include/sys/buf.h \ $(top_srcdir)/include/sys/byteorder.h \ $(top_srcdir)/include/sys/callb.h \ + $(top_srcdir)/include/sys/callo.h \ $(top_srcdir)/include/sys/cmn_err.h \ $(top_srcdir)/include/sys/compress.h \ $(top_srcdir)/include/sys/condvar.h \ diff --git a/include/sys/callo.h b/include/sys/callo.h new file mode 100644 index 0000000..0d9fbcb --- /dev/null +++ b/include/sys/callo.h @@ -0,0 +1,52 @@ +/*****************************************************************************\ + * Copyright (C) 2007-2013 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Brian Behlendorf . + * UCRL-CODE-235197 + * + * This file is part of the SPL, Solaris Porting Layer. + * For details, see . + * + * The SPL is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * The SPL is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with the SPL. If not, see . +\*****************************************************************************/ + +#ifndef _SPL_CALLO_H +#define _SPL_CALLO_H + +/* + * Callout flags: + * + * CALLOUT_FLAG_ROUNDUP + * Roundup the expiration time to the next resolution boundary. + * If this flag is not specified, the expiration time is rounded down. + * CALLOUT_FLAG_ABSOLUTE + * Normally, the expiration passed to the timeout API functions is an + * expiration interval. If this flag is specified, then it is + * interpreted as the expiration time itself. + * CALLOUT_FLAG_HRESTIME + * Normally, callouts are not affected by changes to system time + * (hrestime). This flag is used to create a callout that is affected + * by system time. If system time changes, these timers must be + * handled in a special way (see callout.c). These are used by condition + * variables and LWP timers that need this behavior. + * CALLOUT_FLAG_32BIT + * Legacy interfaces timeout() and realtime_timeout() pass this flag + * to timeout_generic() to indicate that a 32-bit ID should be allocated. + */ +#define CALLOUT_FLAG_ROUNDUP 0x1 +#define CALLOUT_FLAG_ABSOLUTE 0x2 +#define CALLOUT_FLAG_HRESTIME 0x4 +#define CALLOUT_FLAG_32BIT 0x8 + +#endif /* _SPL_CALLB_H */ diff --git a/include/sys/condvar.h b/include/sys/condvar.h index c825bd2..c9f2bea 100644 --- a/include/sys/condvar.h +++ b/include/sys/condvar.h @@ -27,8 +27,10 @@ #include #include +#include #include #include +#include /* * The kcondvar_t struct is protected by mutex taken externally before @@ -56,6 +58,8 @@ extern void __cv_wait_interruptible(kcondvar_t *cvp, kmutex_t *mp); extern clock_t __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time); extern clock_t __cv_timedwait_interruptible(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time); +extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, + hrtime_t tim, hrtime_t res, int flag); extern void __cv_signal(kcondvar_t *cvp); extern void __cv_broadcast(kcondvar_t *cvp); diff --git a/include/sys/time.h b/include/sys/time.h index f8d78d1..d8e81c9 100644 --- a/include/sys/time.h +++ b/include/sys/time.h @@ -47,6 +47,9 @@ #define MICROSEC 1000000 #define NANOSEC 1000000000 +#define MSEC2NSEC(m) ((hrtime_t)(m) * (NANOSEC / MILLISEC)) +#define NSEC2MSEC(n) ((n) / (NANOSEC / MILLISEC)) + /* Already defined in include/linux/time.h */ #undef CLOCK_THREAD_CPUTIME_ID #undef CLOCK_REALTIME diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c index 60cf726..283648a 100644 --- a/module/spl/spl-condvar.c +++ b/module/spl/spl-condvar.c @@ -226,6 +226,87 @@ __cv_timedwait_interruptible(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) } EXPORT_SYMBOL(__cv_timedwait_interruptible); +/* + *'expire_time' argument is an absolute clock time in nanoseconds. + * Return value is time left (expire_time - now) or -1 if timeout occurred. + */ +static clock_t +__cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, + hrtime_t expire_time, int state) +{ + DEFINE_WAIT(wait); + hrtime_t time_left, now; + unsigned long time_left_us; + SENTRY; + + ASSERT(cvp); + ASSERT(mp); + ASSERT(cvp->cv_magic == CV_MAGIC); + ASSERT(mutex_owned(mp)); + atomic_inc(&cvp->cv_refs); + + if (cvp->cv_mutex == NULL) + cvp->cv_mutex = mp; + + /* Ensure the same mutex is used by all callers */ + ASSERT(cvp->cv_mutex == mp); + + now = gethrtime(); + time_left = expire_time - now; + if (time_left <= 0) { + atomic_dec(&cvp->cv_refs); + SRETURN(-1); + } + time_left_us = time_left / NSEC_PER_USEC; + + prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); + atomic_inc(&cvp->cv_waiters); + + /* Mutex should be dropped after prepare_to_wait() this + * ensures we're linked in to the waiters list and avoids the + * race where 'cvp->cv_waiters > 0' but the list is empty. */ + mutex_exit(mp); + /* Allow a 100 us range to give kernel an opportunity to coalesce + * interrupts */ + usleep_range(time_left_us, time_left_us + 100); + mutex_enter(mp); + + /* No more waiters a different mutex could be used */ + if (atomic_dec_and_test(&cvp->cv_waiters)) { + cvp->cv_mutex = NULL; + wake_up(&cvp->cv_destroy); + } + + finish_wait(&cvp->cv_event, &wait); + atomic_dec(&cvp->cv_refs); + + time_left = expire_time - gethrtime(); + SRETURN(time_left > 0 ? time_left : -1); +} + +/* + * Compatibility wrapper for the cv_timedwait_hires() Illumos interface. + */ +clock_t +cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, + hrtime_t res, int flag) +{ + if (res > 1) { + /* + * Align expiration to the specified resolution. + */ + if (flag & CALLOUT_FLAG_ROUNDUP) + tim += res - 1; + tim = (tim / res) * res; + } + + if (!(flag & CALLOUT_FLAG_ABSOLUTE)) + tim += gethrtime(); + + return __cv_timedwait_hires(cvp, mp, tim, TASK_UNINTERRUPTIBLE); +} +EXPORT_SYMBOL(cv_timedwait_hires); + void __cv_signal(kcondvar_t *cvp) { -- 2.40.0