Needed for Illumos #3582. This interface is supposed to support
a variable-resolution timeout with nanosecond granularity. This
implementation rounds up to microsecond resolution, as nanosecond-
precision timing is rarely needed for real-world performance
tuning and may incur unnecessary busy-waiting. usleep_range() is
used if available, otherwise udelay() or msleep() are used
depending on the length of the delay interval.
Add flags from sys/callo.h as these are used to control the behavior of
cv_timedwait_hires(). Specifically,
CALLOUT_FLAG_ABSOLUTE
Normally, the expiration passed to the timeout API functions is
an expiration interval. If this flag is specified, then it is
interpreted as the expiration time itself.
CALLOUT_FLAG_ROUNDUP
Roundup the expiration time to the next resolution boundary. If this
flag is not specified, the expiration time is rounded down.
References:
https://www.illumos.org/issues/3582
illumos/illumos-gate@
0689f76
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #304
SPL_AC_RWSEM_SPINLOCK_IS_RAW
SPL_AC_SCHED_RT_HEADER
SPL_AC_2ARGS_VFS_GETATTR
+ SPL_AC_USLEEP_RANGE
])
AC_DEFUN([SPL_AC_MODULE_SYMVERS], [
])
])
])
+
+dnl #
+dnl # 2.6.36 API compatibility.
+dnl # Added usleep_range timer.
+dnl # usleep_range is a finer precision implementation of msleep
+dnl # designed to be a drop-in replacement for udelay where a precise
+dnl # sleep / busy-wait is unnecessary.
+dnl #
+AC_DEFUN([SPL_AC_USLEEP_RANGE], [
+ AC_MSG_CHECKING([whether usleep_range() is available])
+ SPL_LINUX_TRY_COMPILE([
+ #include <linux/delay.h>
+ ],[
+ usleep_range(0, 0);
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_USLEEP_RANGE, 1,
+ [usleep_range is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
KERNEL_H = \
$(top_srcdir)/include/linux/bitops_compat.h \
$(top_srcdir)/include/linux/compiler_compat.h \
+ $(top_srcdir)/include/linux/delay_compat.h \
$(top_srcdir)/include/linux/file_compat.h \
$(top_srcdir)/include/linux/kallsyms_compat.h \
$(top_srcdir)/include/linux/list_compat.h \
--- /dev/null
+/*****************************************************************************\
+ * Copyright (C) 2007-2013 Lawrence Livermore National Security, LLC.
+ * Copyright (C) 2007 The Regents of the University of California.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ * UCRL-CODE-235197
+ *
+ * This file is part of the SPL, Solaris Porting Layer.
+ * For details, see <http://zfsonlinux.org/>.
+ *
+ * The SPL is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * The SPL is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with the SPL. If not, see <http://www.gnu.org/licenses/>.
+\*****************************************************************************/
+
+#ifndef _SPL_DELAY_COMPAT_H
+#define _SPL_DELAY_COMPAT_H
+
+#include <linux/delay.h>
+#include <linux/time.h>
+
+/* usleep_range() introduced in 2.6.36 */
+#ifndef HAVE_USLEEP_RANGE
+
+static inline void
+usleep_range(unsigned long min, unsigned long max)
+{
+ unsigned int min_ms = min / USEC_PER_MSEC;
+
+ if (min >= MAX_UDELAY_MS)
+ msleep(min_ms);
+ else
+ udelay(min);
+}
+
+#endif /* HAVE_USLEEP_RANGE */
+
+#endif /* _SPL_DELAY_COMPAT_H */
$(top_srcdir)/include/sys/buf.h \
$(top_srcdir)/include/sys/byteorder.h \
$(top_srcdir)/include/sys/callb.h \
+ $(top_srcdir)/include/sys/callo.h \
$(top_srcdir)/include/sys/cmn_err.h \
$(top_srcdir)/include/sys/compress.h \
$(top_srcdir)/include/sys/condvar.h \
--- /dev/null
+/*****************************************************************************\
+ * Copyright (C) 2007-2013 Lawrence Livermore National Security, LLC.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ * UCRL-CODE-235197
+ *
+ * This file is part of the SPL, Solaris Porting Layer.
+ * For details, see <http://zfsonlinux.org/>.
+ *
+ * The SPL is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * The SPL is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with the SPL. If not, see <http://www.gnu.org/licenses/>.
+\*****************************************************************************/
+
+#ifndef _SPL_CALLO_H
+#define _SPL_CALLO_H
+
+/*
+ * Callout flags:
+ *
+ * CALLOUT_FLAG_ROUNDUP
+ * Roundup the expiration time to the next resolution boundary.
+ * If this flag is not specified, the expiration time is rounded down.
+ * CALLOUT_FLAG_ABSOLUTE
+ * Normally, the expiration passed to the timeout API functions is an
+ * expiration interval. If this flag is specified, then it is
+ * interpreted as the expiration time itself.
+ * CALLOUT_FLAG_HRESTIME
+ * Normally, callouts are not affected by changes to system time
+ * (hrestime). This flag is used to create a callout that is affected
+ * by system time. If system time changes, these timers must be
+ * handled in a special way (see callout.c). These are used by condition
+ * variables and LWP timers that need this behavior.
+ * CALLOUT_FLAG_32BIT
+ * Legacy interfaces timeout() and realtime_timeout() pass this flag
+ * to timeout_generic() to indicate that a 32-bit ID should be allocated.
+ */
+#define CALLOUT_FLAG_ROUNDUP 0x1
+#define CALLOUT_FLAG_ABSOLUTE 0x2
+#define CALLOUT_FLAG_HRESTIME 0x4
+#define CALLOUT_FLAG_32BIT 0x8
+
+#endif /* _SPL_CALLB_H */
#include <linux/module.h>
#include <linux/wait.h>
+#include <linux/delay_compat.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
+#include <sys/callo.h>
/*
* The kcondvar_t struct is protected by mutex taken externally before
extern clock_t __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time);
extern clock_t __cv_timedwait_interruptible(kcondvar_t *cvp, kmutex_t *mp,
clock_t exp_time);
+extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp,
+ hrtime_t tim, hrtime_t res, int flag);
extern void __cv_signal(kcondvar_t *cvp);
extern void __cv_broadcast(kcondvar_t *cvp);
#define MICROSEC 1000000
#define NANOSEC 1000000000
+#define MSEC2NSEC(m) ((hrtime_t)(m) * (NANOSEC / MILLISEC))
+#define NSEC2MSEC(n) ((n) / (NANOSEC / MILLISEC))
+
/* Already defined in include/linux/time.h */
#undef CLOCK_THREAD_CPUTIME_ID
#undef CLOCK_REALTIME
}
EXPORT_SYMBOL(__cv_timedwait_interruptible);
+/*
+ *'expire_time' argument is an absolute clock time in nanoseconds.
+ * Return value is time left (expire_time - now) or -1 if timeout occurred.
+ */
+static clock_t
+__cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp,
+ hrtime_t expire_time, int state)
+{
+ DEFINE_WAIT(wait);
+ hrtime_t time_left, now;
+ unsigned long time_left_us;
+ SENTRY;
+
+ ASSERT(cvp);
+ ASSERT(mp);
+ ASSERT(cvp->cv_magic == CV_MAGIC);
+ ASSERT(mutex_owned(mp));
+ atomic_inc(&cvp->cv_refs);
+
+ if (cvp->cv_mutex == NULL)
+ cvp->cv_mutex = mp;
+
+ /* Ensure the same mutex is used by all callers */
+ ASSERT(cvp->cv_mutex == mp);
+
+ now = gethrtime();
+ time_left = expire_time - now;
+ if (time_left <= 0) {
+ atomic_dec(&cvp->cv_refs);
+ SRETURN(-1);
+ }
+ time_left_us = time_left / NSEC_PER_USEC;
+
+ prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
+ atomic_inc(&cvp->cv_waiters);
+
+ /* Mutex should be dropped after prepare_to_wait() this
+ * ensures we're linked in to the waiters list and avoids the
+ * race where 'cvp->cv_waiters > 0' but the list is empty. */
+ mutex_exit(mp);
+ /* Allow a 100 us range to give kernel an opportunity to coalesce
+ * interrupts */
+ usleep_range(time_left_us, time_left_us + 100);
+ mutex_enter(mp);
+
+ /* No more waiters a different mutex could be used */
+ if (atomic_dec_and_test(&cvp->cv_waiters)) {
+ cvp->cv_mutex = NULL;
+ wake_up(&cvp->cv_destroy);
+ }
+
+ finish_wait(&cvp->cv_event, &wait);
+ atomic_dec(&cvp->cv_refs);
+
+ time_left = expire_time - gethrtime();
+ SRETURN(time_left > 0 ? time_left : -1);
+}
+
+/*
+ * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
+ */
+clock_t
+cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
+ hrtime_t res, int flag)
+{
+ if (res > 1) {
+ /*
+ * Align expiration to the specified resolution.
+ */
+ if (flag & CALLOUT_FLAG_ROUNDUP)
+ tim += res - 1;
+ tim = (tim / res) * res;
+ }
+
+ if (!(flag & CALLOUT_FLAG_ABSOLUTE))
+ tim += gethrtime();
+
+ return __cv_timedwait_hires(cvp, mp, tim, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(cv_timedwait_hires);
+
void
__cv_signal(kcondvar_t *cvp)
{