indexbus_platform_ops/linux/
preflight.rs1use std::path::{Path, PathBuf};
4
5use crate::errors::{Error, Result};
6
7#[derive(Debug, Default)]
8pub struct PreflightReport {
10 pub observations: Vec<(String, String)>,
12 pub warnings: Vec<String>,
14 pub errors: Vec<String>,
16}
17
18impl PreflightReport {
19 pub fn is_ok(&self) -> bool {
21 self.errors.is_empty()
22 }
23}
24
25#[derive(Debug, Clone)]
26pub struct PreflightInput {
28 pub shm_dir: PathBuf,
30 pub strict: bool,
32}
33
34pub fn run(input: &PreflightInput) -> Result<PreflightReport> {
36 let mut report = PreflightReport::default();
37
38 report.observations.push((
39 "shm_dir".to_string(),
40 format!(
41 "{} (exists={})",
42 input.shm_dir.display(),
43 input.shm_dir.exists()
44 ),
45 ));
46
47 if !input.shm_dir.exists() {
49 report.errors.push(format!(
50 "shm_dir does not exist: {}",
51 input.shm_dir.display()
52 ));
53 } else if let Err(e) = write_test_dir(&input.shm_dir) {
54 report.errors.push(format!(
55 "shm_dir is not writable: {} ({e})",
56 input.shm_dir.display()
57 ));
58 }
59
60 if let Err(e) = check_thp(
62 input.strict,
63 &mut report.observations,
64 &mut report.warnings,
65 &mut report.errors,
66 ) {
67 report.warnings.push(format!("THP check failed: {e}"));
68 }
69
70 if let Err(e) = check_cpu_governor(
71 input.strict,
72 &mut report.observations,
73 &mut report.warnings,
74 &mut report.errors,
75 ) {
76 report
77 .warnings
78 .push(format!("CPU governor check failed: {e}"));
79 }
80
81 if let Err(e) = check_memlock(
82 input.strict,
83 &mut report.observations,
84 &mut report.warnings,
85 &mut report.errors,
86 ) {
87 report.warnings.push(format!("memlock check failed: {e}"));
88 }
89
90 if let Err(e) = check_hugetlbfs(
91 &input.shm_dir,
92 &mut report.observations,
93 &mut report.warnings,
94 ) {
95 report.warnings.push(format!("hugetlbfs check failed: {e}"));
96 }
97
98 if let Err(e) = check_irq_policy(input.strict, &mut report.observations, &mut report.warnings) {
99 report
100 .warnings
101 .push(format!("IRQ/isolation check failed: {e}"));
102 }
103
104 Ok(report)
105}
106
107fn write_test_dir(dir: &Path) -> Result<()> {
108 let path = dir.join(".indexbus_preflight_write_test");
109
110 let mut f = std::fs::OpenOptions::new()
111 .create(true)
112 .write(true)
113 .truncate(true)
114 .open(&path)?;
115
116 const HUGETLBFS_MAGIC: u64 = 0x9584_58f6;
119
120 let mut s: libc::statfs = unsafe { std::mem::zeroed() };
121 let c_path = std::ffi::CString::new(dir.to_string_lossy().as_bytes())
122 .map_err(|e| Error::msg(format!("invalid shm_dir path for CString: {e}")))?;
123 let rc = unsafe { libc::statfs(c_path.as_ptr(), &mut s as *mut libc::statfs) };
124
125 if rc == 0 && (s.f_type as u64) == HUGETLBFS_MAGIC {
126 const TWO_MIB: u64 = 2 * 1024 * 1024;
127 f.set_len(TWO_MIB)?;
128 } else {
129 use std::io::Write;
130 f.write_all(b"ok\n")?;
131 }
132
133 drop(f);
134 let _ = std::fs::remove_file(&path);
135 Ok(())
136}
137
138fn read_first_line(path: impl AsRef<Path>) -> Result<String> {
139 let s = std::fs::read_to_string(path.as_ref())?;
140 Ok(s.lines().next().unwrap_or("").trim().to_string())
141}
142
143fn check_thp(
144 strict: bool,
145 observations: &mut Vec<(String, String)>,
146 warnings: &mut Vec<String>,
147 errors: &mut Vec<String>,
148) -> Result<()> {
149 let enabled =
151 read_first_line("/sys/kernel/mm/transparent_hugepage/enabled").unwrap_or_default();
152 let defrag = read_first_line("/sys/kernel/mm/transparent_hugepage/defrag").unwrap_or_default();
153
154 observations.push((
155 "thp".to_string(),
156 format!("enabled='{enabled}' defrag='{defrag}'"),
157 ));
158
159 let enabled_is_bad = enabled.contains("[always]");
161 let defrag_is_bad = defrag.contains("[always]");
162
163 if enabled_is_bad || defrag_is_bad {
164 let msg = format!(
165 "THP looks enabled (enabled='{enabled}', defrag='{defrag}'); for low-latency consider: echo never | sudo tee /sys/kernel/mm/transparent_hugepage/enabled and /defrag"
166 );
167 if strict {
168 errors.push(msg);
169 } else {
170 warnings.push(msg);
171 }
172 }
173
174 Ok(())
175}
176
177fn check_cpu_governor(
178 strict: bool,
179 observations: &mut Vec<(String, String)>,
180 warnings: &mut Vec<String>,
181 errors: &mut Vec<String>,
182) -> Result<()> {
183 let gov_path = "/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor";
185 let gov = std::fs::read_to_string(gov_path).unwrap_or_default();
186 let gov = gov.trim();
187
188 if !gov.is_empty() {
189 observations.push(("cpu_governor".to_string(), gov.to_string()));
190 }
191
192 if gov.is_empty() {
193 warnings.push(
194 "CPU governor not readable (cpufreq may be unavailable in this environment)"
195 .to_string(),
196 );
197 return Ok(());
198 }
199
200 if gov != "performance" {
201 let msg = format!(
202 "CPU governor is '{gov}' (expected 'performance' for lowest jitter); consider: sudo cpupower frequency-set -g performance"
203 );
204 if strict {
205 errors.push(msg);
206 } else {
207 warnings.push(msg);
208 }
209 }
210
211 Ok(())
212}
213
214fn check_memlock(
215 strict: bool,
216 observations: &mut Vec<(String, String)>,
217 warnings: &mut Vec<String>,
218 errors: &mut Vec<String>,
219) -> Result<()> {
220 let (cur, max) = rlimit_memlock()?;
223
224 observations.push(("memlock".to_string(), format!("cur={cur} max={max}")));
225
226 if cur < 1024 * 1024 * 64 {
227 let msg = format!(
228 "memlock ulimit is low (cur={cur}, max={max}); if using mlockall, set LimitMEMLOCK=infinity or `ulimit -l unlimited`"
229 );
230 if strict {
231 errors.push(msg);
232 } else {
233 warnings.push(msg);
234 }
235 }
236
237 Ok(())
238}
239
240fn rlimit_memlock() -> Result<(u64, u64)> {
241 let mut lim = libc::rlimit {
242 rlim_cur: 0,
243 rlim_max: 0,
244 };
245
246 let rc = unsafe { libc::getrlimit(libc::RLIMIT_MEMLOCK, &mut lim as *mut libc::rlimit) };
247 if rc != 0 {
248 return Err(Error::msg(format!(
249 "getrlimit(RLIMIT_MEMLOCK) failed: {}",
250 std::io::Error::last_os_error()
251 )));
252 }
253
254 Ok((lim.rlim_cur, lim.rlim_max))
255}
256
257fn check_hugetlbfs(
258 shm_dir: &Path,
259 observations: &mut Vec<(String, String)>,
260 warnings: &mut Vec<String>,
261) -> Result<()> {
262 let mut s: libc::statfs = unsafe { std::mem::zeroed() };
264 let cstr = std::ffi::CString::new(shm_dir.to_string_lossy().as_bytes())
265 .map_err(|e| Error::msg(format!("invalid shm_dir path for CString: {e}")))?;
266 let rc = unsafe { libc::statfs(cstr.as_ptr(), &mut s as *mut libc::statfs) };
267 if rc != 0 {
268 warnings.push(format!(
269 "statfs({}) failed: {}",
270 shm_dir.display(),
271 std::io::Error::last_os_error()
272 ));
273 return Ok(());
274 }
275
276 let is_hugetlbfs = (s.f_type as u64) == 0x9584_58f6;
278
279 observations.push(("hugetlbfs".to_string(), format!("{}", is_hugetlbfs)));
280 if !is_hugetlbfs {
281 let msg = format!(
282 "shm_dir ({}) does not appear to be hugetlbfs (ok, but for tighter latency you may mount hugetlbfs and point shm_dir there)",
283 shm_dir.display()
284 );
285 warnings.push(msg);
287 }
288
289 Ok(())
290}
291
292fn check_irq_policy(
293 _strict: bool,
294 observations: &mut Vec<(String, String)>,
295 warnings: &mut Vec<String>,
296) -> Result<()> {
297 let cmdline = std::fs::read_to_string("/proc/cmdline").unwrap_or_default();
299 let has_isol = cmdline.contains("isolcpus=");
300 let has_nohz = cmdline.contains("nohz_full=");
301 let has_rcu = cmdline.contains("rcu_nocbs=");
302
303 observations.push((
304 "kernel_isolation_flags".to_string(),
305 format!("isolcpus={has_isol} nohz_full={has_nohz} rcu_nocbs={has_rcu}"),
306 ));
307
308 if !has_isol && !has_nohz && !has_rcu {
309 let msg = "no kernel CPU isolation flags detected (ok if using cpusets/CPUAffinity); ensure IRQ affinity keeps noisy interrupts off your isolated CPU set".to_string();
310 warnings.push(msg);
312 }
313
314 let mut irqbalance_active = None;
316
317 if let Ok(out) = std::process::Command::new("systemctl")
318 .args(["is-active", "irqbalance"])
319 .output()
320 {
321 if out.status.success() {
322 let s = String::from_utf8_lossy(&out.stdout);
323 irqbalance_active = Some(s.trim() == "active");
324 }
325 }
326
327 if irqbalance_active.is_none() {
328 if let Ok(out) = std::process::Command::new("pgrep")
329 .args(["-x", "irqbalance"])
330 .output()
331 {
332 irqbalance_active = Some(out.status.success());
333 }
334 }
335
336 if let Some(active) = irqbalance_active {
337 observations.push(("irqbalance".to_string(), format!("active={active}")));
338 if active {
339 let msg = "irqbalance appears active; for isolated CPU sets ensure IRQ affinity and irqbalance config don't place interrupts on your isolated CPUs".to_string();
340 warnings.push(msg);
341 }
342 } else {
343 observations.push(("irqbalance".to_string(), "unknown".to_string()));
344 warnings.push(
345 "unable to determine irqbalance status (systemctl/pgrep unavailable?)".to_string(),
346 );
347 }
348
349 Ok(())
350}
351
352#[cfg(test)]
353mod tests {
354 use super::*;
355
356 #[test]
357 fn preflight_errors_when_shm_dir_missing() {
358 let missing = std::path::PathBuf::from(format!(
359 "/tmp/indexbus_missing_shm_{}_{}",
360 std::process::id(),
361 crate::time::monotonic_now_ns()
362 ));
363
364 let input = PreflightInput {
365 shm_dir: missing,
366 strict: true,
367 };
368
369 let report = run(&input).unwrap();
370 assert!(!report.is_ok());
371 assert!(report
372 .errors
373 .iter()
374 .any(|e| e.contains("shm_dir does not exist")));
375 }
376}