Peter Marheine submitted this change.

View Change

Approvals: build bot (Jenkins): Verified Brian Norris: Looks good to me, but someone else must approve Anastasia Klimchuk: Looks good to me, approved
udelay: only use OS time for delays, except on DOS

As proposed on the mailing list ("RFC: remove the calibrated delay
loop" [1]), this removes the calibrated delay loop and uses OS-based
timing functions for all delays because the calibrated delay loop can
delay for shorter times than intended.

When sleeping this now uses nanosleep() unconditionally, since usleep
was only used on DOS (where DJGPP lacks nanosleep). When busy-looping,
it uses clock_gettime() with CLOCK_MONOTONIC or CLOCK_REALTIME depending
on availability, and gettimeofday() otherwise.

The calibrated delay loop is retained for DOS only, because timer
resolution on DJGPP is only about 50 milliseconds. Since typical delays
in flashrom are around 10 microseconds, using OS timing there would
regress performance by around 500x. The old implementation is reused
with some branches removed based on the knowledge that timer resolution
will not be better than about 50 milliseconds.

Tested by reading and writing flash on several Intel and AMD systems:

* Lenovo P920 (Intel C620, read/verify only)
* "nissa" chromebook (Intel Alder Lake-N)
* "zork" chromebook (AMD Zen+)

[1]: https://mail.coreboot.org/hyperkitty/list/flashrom@flashrom.org/thread/HFH6UHPAKA4JDL4YKPSQPO72KXSSRGME/

Signed-off-by: Peter Marheine <pmarheine@chromium.org>
Change-Id: I7ac5450d194a475143698d65d64d8bcd2fd25e3f
Reviewed-on: https://review.coreboot.org/c/flashrom/+/81545
Reviewed-by: Anastasia Klimchuk <aklm@chromium.org>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
---
M Makefile
M include/programmer.h
M libflashrom.c
M meson.build
M tests/meson.build
M tests/tests.c
M tests/tests.h
A tests/udelay.c
M udelay.c
A udelay_dos.c
10 files changed, 268 insertions(+), 178 deletions(-)

diff --git a/Makefile b/Makefile
index 1c0904b..9b94e22 100644
--- a/Makefile
+++ b/Makefile
@@ -388,9 +388,14 @@
###############################################################################
# Library code.

-LIB_OBJS = libflashrom.o layout.o erasure_layout.o flashrom.o udelay.o parallel.o programmer.o programmer_table.o \
+LIB_OBJS = libflashrom.o layout.o erasure_layout.o flashrom.o parallel.o programmer.o programmer_table.o \
helpers.o helpers_fileio.o ich_descriptors.o fmap.o platform/endian_$(ENDIAN).o platform/memaccess.o

+ifeq ($(TARGET_OS), DOS)
+ LIB_OBJS += udelay_dos.o
+else
+ LIB_OBJS += udelay.o
+endif

###############################################################################
# Frontend related stuff.
diff --git a/include/programmer.h b/include/programmer.h
index 15df0d4..939c8de 100644
--- a/include/programmer.h
+++ b/include/programmer.h
@@ -219,8 +219,6 @@
#endif

/* udelay.c */
-void myusec_delay(unsigned int usecs);
-void myusec_calibrate_delay(void);
void internal_sleep(unsigned int usecs);
void default_delay(unsigned int usecs);

diff --git a/libflashrom.c b/libflashrom.c
index 4a59e2a..7c885cf 100644
--- a/libflashrom.c
+++ b/libflashrom.c
@@ -36,7 +36,6 @@
{
if (perform_selfcheck && selfcheck())
return 1;
- myusec_calibrate_delay();
return 0;
}

diff --git a/meson.build b/meson.build
index 7c46de4..c4d7717 100644
--- a/meson.build
+++ b/meson.build
@@ -100,13 +100,19 @@
'sst49lfxxxc.c',
'sst_fwhub.c',
'stm50.c',
- 'udelay.c',
'w29ee011.c',
'w39.c',
'writeprotect.c',
'writeprotect_ranges.c',
)

+# Select an appropriate delay implementation for the target OS
+delay_src = files('udelay.c')
+if target_machine.system() == 'dos'
+ delay_src = files('udelay_dos.c')
+endif
+srcs += delay_src
+
# check for required symbols
if cc.has_function('clock_gettime')
add_project_arguments('-DHAVE_CLOCK_GETTIME=1', language : 'c')
diff --git a/tests/meson.build b/tests/meson.build
index fd05166..e8de4b7 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -25,6 +25,7 @@
'selfcheck.c',
'io_real.c',
'erase_func_algo.c',
+ 'udelay.c',
)

if not programmer.get('dummy').get('active')
diff --git a/tests/tests.c b/tests/tests.c
index c38a66c..aa8353a 100644
--- a/tests/tests.c
+++ b/tests/tests.c
@@ -504,6 +504,11 @@
};
ret |= cmocka_run_group_tests_name("chip.c tests", chip_tests, NULL, NULL);

+ const struct CMUnitTest delay_tests[] = {
+ cmocka_unit_test(udelay_test_short),
+ };
+ ret |= cmocka_run_group_tests_name("udelay.c tests", delay_tests, NULL, NULL);
+
size_t n_erase_tests;
struct CMUnitTest *erase_func_algo_tests = get_erase_func_algo_tests(&n_erase_tests);
ret |= _cmocka_run_group_tests("erase_func_algo.c tests", erase_func_algo_tests, n_erase_tests, NULL, NULL);
diff --git a/tests/tests.h b/tests/tests.h
index af5f968..c22da12 100644
--- a/tests/tests.h
+++ b/tests/tests.h
@@ -110,4 +110,7 @@
void erase_function_algo_test_success(void **state);
void write_function_algo_test_success(void **state);

+/* udelay.c */
+void udelay_test_short(void **state);
+
#endif /* TESTS_H */
diff --git a/tests/udelay.c b/tests/udelay.c
new file mode 100644
index 0000000..277a15d
--- /dev/null
+++ b/tests/udelay.c
@@ -0,0 +1,51 @@
+/*
+ * This file is part of the flashrom project.
+ *
+ * Copyright 2024 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <include/test.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include <time.h>
+
+#include "programmer.h"
+#include "tests.h"
+
+static uint64_t now_us(void) {
+#if HAVE_CLOCK_GETTIME == 1
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return (ts.tv_nsec / 1000) + (ts.tv_sec * 1000000);
+#else
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return tv.tv_usec + (tv.tv_sec * 1000000);
+#endif
+}
+
+/*
+ * A short delay should delay for at least as long as requested,
+ * and more than 10x as long would be worrisome.
+ *
+ * This test could fail spuriously on a heavily-loaded system, or if we need
+ * to use gettimeofday() and a time change (such as DST) occurs during the
+ * test.
+ */
+void udelay_test_short(void **state) {
+ uint64_t start = now_us();
+ default_delay(100);
+ uint64_t elapsed = now_us() - start;
+
+ assert_in_range(elapsed, 100, 1000);
+}
diff --git a/udelay.c b/udelay.c
index cb7a9db..453cb34 100644
--- a/udelay.c
+++ b/udelay.c
@@ -3,6 +3,7 @@
*
* Copyright (C) 2000 Silicon Integrated System Corporation
* Copyright (C) 2009,2010 Carl-Daniel Hailfinger
+ * Copyright (C) 2024 Google LLC
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -27,20 +28,25 @@
#include "flash.h"
#include "programmer.h"

-static bool use_clock_gettime = false;
-
#if HAVE_CLOCK_GETTIME == 1

-#ifdef _POSIX_MONOTONIC_CLOCK
-static clockid_t clock_id = CLOCK_MONOTONIC;
-#else
-static clockid_t clock_id = CLOCK_REALTIME;
-#endif
-
static void clock_usec_delay(int usecs)
{
+ static clockid_t clock_id =
+#ifdef _POSIX_MONOTONIC_CLOCK
+ CLOCK_MONOTONIC;
+#else
+ CLOCK_REALTIME;
+#endif
+
struct timespec now;
- clock_gettime(clock_id, &now);
+ if (clock_gettime(clock_id, &now)) {
+ /* Fall back to realtime clock if monotonic doesn't work */
+ if (clock_id != CLOCK_REALTIME && errno == EINVAL) {
+ clock_id = CLOCK_REALTIME;
+ clock_gettime(clock_id, &now);
+ }
+ }

const long end_nsec = now.tv_nsec + usecs * 1000L;
const struct timespec end = {
@@ -52,183 +58,35 @@
} while (now.tv_sec < end.tv_sec || (now.tv_sec == end.tv_sec && now.tv_nsec < end.tv_nsec));
}

-static int clock_check_res(void)
-{
- struct timespec res;
- if (!clock_getres(clock_id, &res)) {
- if (res.tv_sec == 0 && res.tv_nsec <= 100) {
- msg_pinfo("Using clock_gettime for delay loops (clk_id: %d, resolution: %ldns).\n",
- (int)clock_id, res.tv_nsec);
- use_clock_gettime = true;
- return 1;
- }
- } else if (clock_id != CLOCK_REALTIME && errno == EINVAL) {
- /* Try again with CLOCK_REALTIME. */
- clock_id = CLOCK_REALTIME;
- return clock_check_res();
- }
- return 0;
-}
#else

-static inline void clock_usec_delay(int usecs) {}
-static inline int clock_check_res(void) { return 0; }
-
-#endif /* HAVE_CLOCK_GETTIME == 1 */
-
-/* loops per microsecond */
-static unsigned long micro = 1;
-
-__attribute__ ((noinline)) void myusec_delay(unsigned int usecs)
-{
- unsigned long i;
- for (i = 0; i < usecs * micro; i++) {
- /* Make sure the compiler doesn't optimize the loop away. */
- __asm__ volatile ("" : : "rm" (i) );
- }
-}
-
-static unsigned long measure_os_delay_resolution(void)
-{
- unsigned long timeusec;
+static void clock_usec_delay(unsigned int usecs) {
struct timeval start, end;
- unsigned long counter = 0;
+ unsigned long elapsed = 0;

gettimeofday(&start, NULL);
- timeusec = 0;

- while (!timeusec && (++counter < 1000000000)) {
+ while (elapsed < usecs) {
gettimeofday(&end, NULL);
- timeusec = 1000000 * (end.tv_sec - start.tv_sec) +
+ elapsed = 1000000 * (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec);
/* Protect against time going forward too much. */
if ((end.tv_sec > start.tv_sec) &&
((end.tv_sec - start.tv_sec) >= LONG_MAX / 1000000 - 1))
- timeusec = 0;
+ elapsed = 0;
/* Protect against time going backwards during leap seconds. */
- if ((end.tv_sec < start.tv_sec) || (timeusec > LONG_MAX))
- timeusec = 0;
+ if ((end.tv_sec < start.tv_sec) || (elapsed > LONG_MAX))
+ elapsed = 0;
}
- return timeusec;
}

-static unsigned long measure_delay(unsigned int usecs)
-{
- unsigned long timeusec;
- struct timeval start, end;
-
- gettimeofday(&start, NULL);
- myusec_delay(usecs);
- gettimeofday(&end, NULL);
- timeusec = 1000000 * (end.tv_sec - start.tv_sec) +
- (end.tv_usec - start.tv_usec);
- /* Protect against time going forward too much. */
- if ((end.tv_sec > start.tv_sec) &&
- ((end.tv_sec - start.tv_sec) >= LONG_MAX / 1000000 - 1))
- timeusec = LONG_MAX;
- /* Protect against time going backwards during leap seconds. */
- if ((end.tv_sec < start.tv_sec) || (timeusec > LONG_MAX))
- timeusec = 1;
-
- return timeusec;
-}
-
-void myusec_calibrate_delay(void)
-{
- if (clock_check_res())
- return;
-
- unsigned long count = 1000;
- unsigned long timeusec, resolution;
- int i, tries = 0;
-
- msg_pinfo("Calibrating delay loop... ");
- resolution = measure_os_delay_resolution();
- if (resolution) {
- msg_pdbg("OS timer resolution is %lu usecs, ", resolution);
- } else {
- msg_pinfo("OS timer resolution is unusable. ");
- }
-
-recalibrate:
- count = 1000;
- while (1) {
- timeusec = measure_delay(count);
- if (timeusec > 1000000 / 4)
- break;
- if (count >= ULONG_MAX / 2) {
- msg_pinfo("timer loop overflow, reduced precision. ");
- break;
- }
- count *= 2;
- }
- tries ++;
-
- /* Avoid division by zero, but in that case the loop is shot anyway. */
- if (!timeusec)
- timeusec = 1;
-
- /* Compute rounded up number of loops per microsecond. */
- micro = (count * micro) / timeusec + 1;
- msg_pdbg("%luM loops per second, ", micro);
-
- /* Did we try to recalibrate less than 5 times? */
- if (tries < 5) {
- /* Recheck our timing to make sure we weren't just hitting
- * a scheduler delay or something similar.
- */
- for (i = 0; i < 4; i++) {
- if (resolution && (resolution < 10)) {
- timeusec = measure_delay(100);
- } else if (resolution &&
- (resolution < ULONG_MAX / 200)) {
- timeusec = measure_delay(resolution * 10) *
- 100 / (resolution * 10);
- } else {
- /* This workaround should be active for broken
- * OS and maybe libpayload. The criterion
- * here is horrible or non-measurable OS timer
- * resolution which will result in
- * measure_delay(100)=0 whereas a longer delay
- * (1000 ms) may be sufficient
- * to get a nonzero time measurement.
- */
- timeusec = measure_delay(1000000) / 10000;
- }
- if (timeusec < 90) {
- msg_pdbg("delay more than 10%% too short (got "
- "%lu%% of expected delay), "
- "recalculating... ", timeusec);
- goto recalibrate;
- }
- }
- } else {
- msg_perr("delay loop is unreliable, trying to continue ");
- }
-
- /* We're interested in the actual precision. */
- timeusec = measure_delay(10);
- msg_pdbg("10 myus = %ld us, ", timeusec);
- timeusec = measure_delay(100);
- msg_pdbg("100 myus = %ld us, ", timeusec);
- timeusec = measure_delay(1000);
- msg_pdbg("1000 myus = %ld us, ", timeusec);
- timeusec = measure_delay(10000);
- msg_pdbg("10000 myus = %ld us, ", timeusec);
- timeusec = measure_delay(resolution * 4);
- msg_pdbg("%ld myus = %ld us, ", resolution * 4, timeusec);
-
- msg_pinfo("OK.\n");
-}
+#endif /* HAVE_CLOCK_GETTIME == 1 */

/* Not very precise sleep. */
void internal_sleep(unsigned int usecs)
{
#if IS_WINDOWS
Sleep((usecs + 999) / 1000);
-#elif defined(__DJGPP__)
- sleep(usecs / 1000000);
- usleep(usecs % 1000000);
#else
nanosleep(&(struct timespec){usecs / 1000000, (usecs * 1000) % 1000000000UL}, NULL);
#endif
@@ -240,21 +98,14 @@
/* If the delay is >0.1 s, use internal_sleep because timing does not need to be so precise. */
if (usecs > 100000) {
internal_sleep(usecs);
- } else if (use_clock_gettime) {
- clock_usec_delay(usecs);
} else {
- myusec_delay(usecs);
+ clock_usec_delay(usecs);
}
}

#else
#include <libpayload.h>

-void myusec_calibrate_delay(void)
-{
- get_cpu_speed();
-}
-
void default_delay(unsigned int usecs)
{
udelay(usecs);
diff --git a/udelay_dos.c b/udelay_dos.c
new file mode 100644
index 0000000..4a29f73
--- /dev/null
+++ b/udelay_dos.c
@@ -0,0 +1,171 @@
+/*
+ * This file is part of the flashrom project.
+ *
+ * Copyright (C) 2000 Silicon Integrated System Corporation
+ * Copyright (C) 2009,2010 Carl-Daniel Hailfinger
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdbool.h>
+#include <unistd.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "flash.h"
+#include "programmer.h"
+
+/* loops per microsecond */
+static unsigned long micro = 1;
+
+__attribute__ ((noinline)) void myusec_delay(unsigned int usecs)
+{
+ unsigned long i;
+ for (i = 0; i < usecs * micro; i++) {
+ /* Make sure the compiler doesn't optimize the loop away. */
+ __asm__ volatile ("" : : "rm" (i) );
+ }
+}
+
+static unsigned long measure_os_delay_resolution(void)
+{
+ unsigned long timeusec;
+ struct timeval start, end;
+ unsigned long counter = 0;
+
+ gettimeofday(&start, NULL);
+ timeusec = 0;
+
+ while (!timeusec && (++counter < 1000000000)) {
+ gettimeofday(&end, NULL);
+ timeusec = 1000000 * (end.tv_sec - start.tv_sec) +
+ (end.tv_usec - start.tv_usec);
+ /* Protect against time going forward too much. */
+ if ((end.tv_sec > start.tv_sec) &&
+ ((end.tv_sec - start.tv_sec) >= LONG_MAX / 1000000 - 1))
+ timeusec = 0;
+ /* Protect against time going backwards during leap seconds. */
+ if ((end.tv_sec < start.tv_sec) || (timeusec > LONG_MAX))
+ timeusec = 0;
+ }
+ return timeusec;
+}
+
+static unsigned long measure_delay(unsigned int usecs)
+{
+ unsigned long timeusec;
+ struct timeval start, end;
+
+ gettimeofday(&start, NULL);
+ myusec_delay(usecs);
+ gettimeofday(&end, NULL);
+ timeusec = 1000000 * (end.tv_sec - start.tv_sec) +
+ (end.tv_usec - start.tv_usec);
+ /* Protect against time going forward too much. */
+ if ((end.tv_sec > start.tv_sec) &&
+ ((end.tv_sec - start.tv_sec) >= LONG_MAX / 1000000 - 1))
+ timeusec = LONG_MAX;
+ /* Protect against time going backwards during leap seconds. */
+ if ((end.tv_sec < start.tv_sec) || (timeusec > LONG_MAX))
+ timeusec = 1;
+
+ return timeusec;
+}
+
+static void myusec_calibrate_delay(void)
+{
+ unsigned long count = 1000;
+ unsigned long timeusec, resolution;
+ int i, tries = 0;
+
+ msg_pinfo("Calibrating delay loop... ");
+ /* Timing resolution on DJGPP is about 50ms, but measure it precisely. */
+ resolution = measure_os_delay_resolution();
+ if (resolution) {
+ msg_pdbg("OS timer resolution is %lu usecs, ", resolution);
+ } else {
+ msg_pinfo("OS timer resolution is unusable. ");
+ }
+
+recalibrate:
+ count = 1000;
+ while (1) {
+ timeusec = measure_delay(count);
+ if (timeusec > 1000000 / 4)
+ break;
+ if (count >= ULONG_MAX / 2) {
+ msg_pinfo("timer loop overflow, reduced precision. ");
+ break;
+ }
+ count *= 2;
+ }
+ tries ++;
+
+ /* Avoid division by zero, but in that case the loop is shot anyway. */
+ if (!timeusec)
+ timeusec = 1;
+
+ /* Compute rounded up number of loops per microsecond. */
+ micro = (count * micro) / timeusec + 1;
+ msg_pdbg("%luM loops per second, ", micro);
+
+ /* Did we try to recalibrate less than 5 times? */
+ if (tries < 5) {
+ /* Recheck our timing to make sure we weren't just hitting
+ * a scheduler delay or something similar.
+ */
+ for (i = 0; i < 4; i++) {
+ timeusec = measure_delay(resolution * 10) *
+ 100 / (resolution * 10);
+
+ if (timeusec < 90) {
+ msg_pdbg("delay more than 10%% too short (got "
+ "%lu%% of expected delay), "
+ "recalculating... ", timeusec);
+ goto recalibrate;
+ }
+ }
+ } else {
+ msg_perr("delay loop is unreliable, trying to continue ");
+ }
+
+ /* We're interested in the actual precision. */
+ timeusec = measure_delay(resolution * 4);
+ msg_pdbg("%ld myus = %ld us, ", resolution * 4, timeusec);
+
+ msg_pinfo("OK.\n");
+}
+
+/* Not very precise sleep. */
+void internal_sleep(unsigned int usecs)
+{
+ sleep(usecs / 1000000);
+ usleep(usecs % 1000000);
+}
+
+/* Precise delay. */
+void default_delay(unsigned int usecs)
+{
+ static bool calibrated = false;
+
+ /* If the delay is >0.1 s, use internal_sleep because timing does not need to be so precise. */
+ if (usecs > 100000) {
+ internal_sleep(usecs);
+ } else {
+ if (!calibrated) {
+ myusec_calibrate_delay();
+ calibrated = true;
+ }
+ myusec_delay(usecs);
+ }
+}

To view, visit change 81545. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: flashrom
Gerrit-Branch: main
Gerrit-Change-Id: I7ac5450d194a475143698d65d64d8bcd2fd25e3f
Gerrit-Change-Number: 81545
Gerrit-PatchSet: 10
Gerrit-Owner: Peter Marheine <pmarheine@chromium.org>
Gerrit-Reviewer: Anastasia Klimchuk <aklm@chromium.org>
Gerrit-Reviewer: Brian Norris <briannorris@chromium.org>
Gerrit-Reviewer: Peter Marheine <pmarheine@chromium.org>
Gerrit-Reviewer: Thomas Heijligen <src@posteo.de>
Gerrit-Reviewer: build bot (Jenkins) <no-reply@coreboot.org>
Gerrit-MessageType: merged