From 9535680992da4509b22ba0bc9e3da58a21248e71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ond=C5=99ej=20Lyson=C4=9Bk?= <olysonek@redhat.com>
Date: Thu, 6 Feb 2020 17:39:22 +0100
Subject: [PATCH] Add accelerator-performance profile
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Resolves: rhbz#1795604
Signed-off-by: Ondřej Lysoněk <olysonek@redhat.com>
---
man/tuned-profiles.7 | 6 +++
profiles/accelerator-performance/tuned.conf | 60 +++++++++++++++++++++
2 files changed, 66 insertions(+)
create mode 100644 profiles/accelerator-performance/tuned.conf
diff --git a/man/tuned-profiles.7 b/man/tuned-profiles.7
index 60a5dc9..aaf140b 100644
--- a/man/tuned-profiles.7
+++ b/man/tuned-profiles.7
@@ -77,6 +77,12 @@ mechanisms and enables sysctl settings that improve the throughput performance
of your disk and network IO. CPU governor is set to performance and CPU energy
performance bias is set to performance. Disk readahead values are increased.
+.TP
+.BI "accelerator\-performance"
+This profile contains the same tuning as the throughput\-performance profile.
+Additionally, it locks the CPU to low C states so that the latency is less than
+100us. This improves the performance of certain accelerators, such as GPUs.
+
.TP
.BI "latency\-performance"
Profile for low latency performance tuning. Disables power saving mechanisms.
diff --git a/profiles/accelerator-performance/tuned.conf b/profiles/accelerator-performance/tuned.conf
new file mode 100644
index 0000000..8068b3b
--- /dev/null
+++ b/profiles/accelerator-performance/tuned.conf
@@ -0,0 +1,60 @@
+#
+# tuned configuration
+#
+
+[main]
+summary=Throughput performance based tuning with disabled higher latency STOP states
+
+[cpu]
+governor=performance
+energy_perf_bias=performance
+min_perf_pct=100
+force_latency=99
+
+[disk]
+readahead=>4096
+
+[sysctl]
+# ktune sysctl settings for rhel6 servers, maximizing i/o throughput
+#
+# Minimal preemption granularity for CPU-bound tasks:
+# (default: 1 msec# (1 + ilog(ncpus)), units: nanoseconds)
+kernel.sched_min_granularity_ns = 10000000
+
+# SCHED_OTHER wake-up granularity.
+# (default: 1 msec# (1 + ilog(ncpus)), units: nanoseconds)
+#
+# This option delays the preemption effects of decoupled workloads
+# and reduces their over-scheduling. Synchronous workloads will still
+# have immediate wakeup/sleep latencies.
+kernel.sched_wakeup_granularity_ns = 15000000
+
+# If a workload mostly uses anonymous memory and it hits this limit, the entire
+# working set is buffered for I/O, and any more write buffering would require
+# swapping, so it's time to throttle writes until I/O can catch up. Workloads
+# that mostly use file mappings may be able to use even higher values.
+#
+# The generator of dirty data starts writeback at this percentage (system default
+# is 20%)
+vm.dirty_ratio = 40
+
+# Start background writeback (via writeback threads) at this percentage (system
+# default is 10%)
+vm.dirty_background_ratio = 10
+
+# PID allocation wrap value. When the kernel's next PID value
+# reaches this value, it wraps back to a minimum PID value.
+# PIDs of value pid_max or larger are not allocated.
+#
+# A suggested value for pid_max is 1024 * <# of cpu cores/threads in system>
+# e.g., a box with 32 cpus, the default of 32768 is reasonable, for 64 cpus,
+# 65536, for 4096 cpus, 4194304 (which is the upper limit possible).
+#kernel.pid_max = 65536
+
+# The swappiness parameter controls the tendency of the kernel to move
+# processes out of physical memory and onto the swap disk.
+# 0 tells the kernel to avoid swapping processes out of physical memory
+# for as long as possible
+# 100 tells the kernel to aggressively swap processes out of physical memory
+# and move them to swap cache
+vm.swappiness=10
--
2.24.1