refactor: don't duplicate AMD GPU temperature retrieval (#1682)

* some file/struct renaming

* refactor: don't get AMD gpu temperatures twice
This commit is contained in:
Clement Tsang 2025-02-23 01:21:12 -05:00 committed by GitHub
parent f7d070f944
commit d2177ed022
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 135 additions and 201 deletions

View File

@ -104,6 +104,7 @@ That said, these are more guidelines rather than hardset rules, though the proje
- `--colors` is now `--theme` - `--colors` is now `--theme`
- [#1513](https://github.com/ClementTsang/bottom/pull/1513): Table headers are now bold by default. - [#1513](https://github.com/ClementTsang/bottom/pull/1513): Table headers are now bold by default.
- [#1515](https://github.com/ClementTsang/bottom/pull/1515): Show the config path in the error message if unable to read/create a config. - [#1515](https://github.com/ClementTsang/bottom/pull/1515): Show the config path in the error message if unable to read/create a config.
- [#1682](https://github.com/ClementTsang/bottom/pull/1682): On Linux, temperature sensor labels now always have their first letter capitalized (e.g. "k10temp: tctl" -> "k10temp: Tctl").
### Bug Fixes ### Bug Fixes

View File

@ -8,6 +8,11 @@ pub mod nvidia;
#[cfg(all(target_os = "linux", feature = "gpu"))] #[cfg(all(target_os = "linux", feature = "gpu"))]
pub mod amd; pub mod amd;
#[cfg(target_os = "linux")]
mod linux {
pub mod utils;
}
#[cfg(feature = "battery")] #[cfg(feature = "battery")]
pub mod batteries; pub mod batteries;
pub mod cpu; pub mod cpu;
@ -370,18 +375,9 @@ impl DataCollector {
} }
#[cfg(target_os = "linux")] #[cfg(target_os = "linux")]
if let Some(data) = amd::get_amd_vecs( if let Some(data) =
&self.filters.temp_filter, amd::get_amd_vecs(&self.widgets_to_harvest, self.last_collection_time)
&self.widgets_to_harvest, {
self.last_collection_time,
) {
if let Some(mut temp) = data.temperature {
if let Some(sensors) = &mut self.data.temperature_sensors {
sensors.append(&mut temp);
} else {
self.data.temperature_sensors = Some(temp);
}
}
if let Some(mut mem) = data.memory { if let Some(mut mem) = data.memory {
local_gpu.append(&mut mem); local_gpu.append(&mut mem);
} }

View File

@ -1,4 +1,4 @@
mod amdgpu_marketing; mod amd_gpu_marketing;
use std::{ use std::{
fs::{self, read_to_string}, fs::{self, read_to_string},
@ -10,30 +10,23 @@ use std::{
use hashbrown::{HashMap, HashSet}; use hashbrown::{HashMap, HashSet};
use crate::{ use crate::{app::layout_manager::UsedWidgets, collection::memory::MemData};
app::{filter::Filter, layout_manager::UsedWidgets},
collection::{memory::MemData, temperature::TempSensorData}, use super::linux::utils::is_device_awake;
};
// TODO: May be able to clean up some of these, Option<Vec> for example is a bit redundant. // TODO: May be able to clean up some of these, Option<Vec> for example is a bit redundant.
pub struct AMDGPUData { pub struct AmdGpuData {
pub memory: Option<Vec<(String, MemData)>>, pub memory: Option<Vec<(String, MemData)>>,
pub temperature: Option<Vec<TempSensorData>>,
pub procs: Option<(u64, Vec<HashMap<u32, (u64, u32)>>)>, pub procs: Option<(u64, Vec<HashMap<u32, (u64, u32)>>)>,
} }
pub struct AMDGPUMemory { pub struct AmdGpuMemory {
pub total: u64, pub total: u64,
pub used: u64, pub used: u64,
} }
pub struct AMDGPUTemperature {
pub name: String,
pub temperature: f32,
}
#[derive(Debug, Clone, Default, Eq, PartialEq)] #[derive(Debug, Clone, Default, Eq, PartialEq)]
pub struct AMDGPUProc { pub struct AmdGpuProc {
pub vram_usage: u64, pub vram_usage: u64,
pub gfx_usage: u64, pub gfx_usage: u64,
pub dma_usage: u64, pub dma_usage: u64,
@ -46,7 +39,7 @@ pub struct AMDGPUProc {
} }
// needs previous state for usage calculation // needs previous state for usage calculation
static PROC_DATA: LazyLock<Mutex<HashMap<PathBuf, HashMap<u32, AMDGPUProc>>>> = static PROC_DATA: LazyLock<Mutex<HashMap<PathBuf, HashMap<u32, AmdGpuProc>>>> =
LazyLock::new(|| Mutex::new(HashMap::new())); LazyLock::new(|| Mutex::new(HashMap::new()));
fn get_amd_devs() -> Option<Vec<PathBuf>> { fn get_amd_devs() -> Option<Vec<PathBuf>> {
@ -62,7 +55,18 @@ fn get_amd_devs() -> Option<Vec<PathBuf>> {
// test if it has a valid vendor path // test if it has a valid vendor path
let device_path = path.path(); let device_path = path.path();
let test_path = device_path.join("vendor"); if !device_path.is_dir() {
continue;
}
// Skip if asleep to avoid wakeups.
if !is_device_awake(&device_path) {
continue;
}
// This will exist for GPUs but not others, this is how we find their kernel
// name.
let test_path = device_path.join("drm");
if test_path.as_path().exists() { if test_path.as_path().exists() {
devices.push(device_path); devices.push(device_path);
} }
@ -75,7 +79,7 @@ fn get_amd_devs() -> Option<Vec<PathBuf>> {
} }
} }
fn get_amd_name(device_path: &Path) -> Option<String> { pub fn get_amd_name(device_path: &Path) -> Option<String> {
// get revision and device ids from sysfs // get revision and device ids from sysfs
let rev_path = device_path.join("revision"); let rev_path = device_path.join("revision");
let dev_path = device_path.join("device"); let dev_path = device_path.join("device");
@ -107,13 +111,13 @@ fn get_amd_name(device_path: &Path) -> Option<String> {
} }
// if it exists in our local database, use that name // if it exists in our local database, use that name
amdgpu_marketing::AMDGPU_MARKETING_NAME amd_gpu_marketing::AMD_GPU_MARKETING_NAME
.iter() .iter()
.find(|(did, rid, _)| (did, rid) == (&device_id, &revision_id)) .find(|(did, rid, _)| (did, rid) == (&device_id, &revision_id))
.map(|tuple| tuple.2.to_string()) .map(|tuple| tuple.2.to_string())
} }
fn get_amd_vram(device_path: &Path) -> Option<AMDGPUMemory> { fn get_amd_vram(device_path: &Path) -> Option<AmdGpuMemory> {
// get vram memory info from sysfs // get vram memory info from sysfs
let vram_total_path = device_path.join("mem_info_vram_total"); let vram_total_path = device_path.join("mem_info_vram_total");
let vram_used_path = device_path.join("mem_info_vram_used"); let vram_used_path = device_path.join("mem_info_vram_used");
@ -136,93 +140,12 @@ fn get_amd_vram(device_path: &Path) -> Option<AMDGPUMemory> {
return None; return None;
}; };
Some(AMDGPUMemory { Some(AmdGpuMemory {
total: vram_total, total: vram_total,
used: vram_used, used: vram_used,
}) })
} }
fn get_amd_temp(device_path: &Path) -> Option<Vec<AMDGPUTemperature>> {
let mut temperatures = Vec::new();
// get hardware monitoring sensor info from sysfs
let hwmon_root = device_path.join("hwmon");
let Ok(hwmon_paths) = fs::read_dir(hwmon_root) else {
return None;
};
for hwmon_dir in hwmon_paths {
let Ok(hwmon_dir) = hwmon_dir else {
continue;
};
let hwmon_binding = hwmon_dir.path();
let hwmon_path = hwmon_binding.as_path();
let Ok(hwmon_sensors) = fs::read_dir(hwmon_path) else {
continue;
};
for hwmon_sensor_ent in hwmon_sensors {
let Ok(hwmon_sensor_ent) = hwmon_sensor_ent else {
continue;
};
let hwmon_sensor_path = hwmon_sensor_ent.path();
let hwmon_sensor_binding = hwmon_sensor_ent.file_name();
let Some(hwmon_sensor_name) = hwmon_sensor_binding.to_str() else {
continue;
};
// temperature sensors are temp{number}_{input,label}
if !hwmon_sensor_name.starts_with("temp") || !hwmon_sensor_name.ends_with("_input") {
continue; // filename does not start with temp or ends with input
}
// construct label path
let hwmon_sensor_label_name = hwmon_sensor_name.replace("_input", "_label");
let hwmon_sensor_label_path = hwmon_path.join(hwmon_sensor_label_name);
// read and remove newlines
let Ok(mut hwmon_sensor_data) = read_to_string(hwmon_sensor_path) else {
continue;
};
let Ok(mut hwmon_sensor_label) = read_to_string(hwmon_sensor_label_path) else {
continue;
};
hwmon_sensor_data = hwmon_sensor_data.trim_end().to_string();
hwmon_sensor_label = hwmon_sensor_label.trim_end().to_string();
let Ok(hwmon_sensor) = hwmon_sensor_data.parse::<u64>() else {
continue;
};
// uppercase first character
if hwmon_sensor_label.is_ascii() {
let (hwmon_sensor_label_head, hwmon_sensor_label_tail) =
hwmon_sensor_label.split_at(1);
hwmon_sensor_label =
hwmon_sensor_label_head.to_uppercase() + hwmon_sensor_label_tail;
}
// 1 C is reported as 1000
temperatures.push(AMDGPUTemperature {
name: hwmon_sensor_label,
temperature: (hwmon_sensor as f32) / 1000.0f32,
});
}
}
if temperatures.is_empty() {
None
} else {
Some(temperatures)
}
}
// from amdgpu_top: https://github.com/Umio-Yasuno/amdgpu_top/blob/c961cf6625c4b6d63fda7f03348323048563c584/crates/libamdgpu_top/src/stat/fdinfo/proc_info.rs#L114 // from amdgpu_top: https://github.com/Umio-Yasuno/amdgpu_top/blob/c961cf6625c4b6d63fda7f03348323048563c584/crates/libamdgpu_top/src/stat/fdinfo/proc_info.rs#L114
fn diff_usage(pre: u64, cur: u64, interval: &Duration) -> u64 { fn diff_usage(pre: u64, cur: u64, interval: &Duration) -> u64 {
use std::ops::Mul; use std::ops::Mul;
@ -300,7 +223,7 @@ fn get_amdgpu_drm(device_path: &Path) -> Option<Vec<PathBuf>> {
} }
} }
fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AMDGPUProc>> { fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AmdGpuProc>> {
let mut fdinfo = HashMap::new(); let mut fdinfo = HashMap::new();
let drm_paths = get_amdgpu_drm(device_path)?; let drm_paths = get_amdgpu_drm(device_path)?;
@ -336,7 +259,7 @@ fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AMDGPUProc>> {
continue; continue;
}; };
let mut usage: AMDGPUProc = Default::default(); let mut usage: AmdGpuProc = Default::default();
let mut observed_ids: HashSet<usize> = HashSet::new(); let mut observed_ids: HashSet<usize> = HashSet::new();
@ -401,20 +324,17 @@ fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AMDGPUProc>> {
Some(fdinfo) Some(fdinfo)
} }
pub fn get_amd_vecs( pub fn get_amd_vecs(widgets_to_harvest: &UsedWidgets, prev_time: Instant) -> Option<AmdGpuData> {
filter: &Option<Filter>, widgets_to_harvest: &UsedWidgets, prev_time: Instant,
) -> Option<AMDGPUData> {
let device_path_list = get_amd_devs()?; let device_path_list = get_amd_devs()?;
let interval = Instant::now().duration_since(prev_time); let interval = Instant::now().duration_since(prev_time);
let num_gpu = device_path_list.len(); let num_gpu = device_path_list.len();
let mut temp_vec = Vec::with_capacity(num_gpu);
let mut mem_vec = Vec::with_capacity(num_gpu); let mut mem_vec = Vec::with_capacity(num_gpu);
let mut proc_vec = Vec::with_capacity(num_gpu); let mut proc_vec = Vec::with_capacity(num_gpu);
let mut total_mem = 0; let mut total_mem = 0;
for device_path in device_path_list { for device_path in device_path_list {
let device_name = let device_name = get_amd_name(&device_path)
get_amd_name(&device_path).unwrap_or(amdgpu_marketing::AMDGPU_DEFAULT_NAME.to_string()); .unwrap_or(amd_gpu_marketing::AMDGPU_DEFAULT_NAME.to_string());
if let Some(mem) = get_amd_vram(&device_path) { if let Some(mem) = get_amd_vram(&device_path) {
if widgets_to_harvest.use_mem { if widgets_to_harvest.use_mem {
@ -432,18 +352,6 @@ pub fn get_amd_vecs(
total_mem += mem.total total_mem += mem.total
} }
// TODO: Not sure if this overlaps with the existing generic temperature code.
if widgets_to_harvest.use_temp && Filter::optional_should_keep(filter, &device_name) {
if let Some(temperatures) = get_amd_temp(&device_path) {
for info in temperatures {
temp_vec.push(TempSensorData {
name: format!("{} {}", device_name, info.name),
temperature: Some(info.temperature),
});
}
}
}
if widgets_to_harvest.use_proc { if widgets_to_harvest.use_proc {
if let Some(procs) = get_amd_fdinfo(&device_path) { if let Some(procs) = get_amd_fdinfo(&device_path) {
let mut proc_info = PROC_DATA.lock().unwrap(); let mut proc_info = PROC_DATA.lock().unwrap();
@ -497,9 +405,8 @@ pub fn get_amd_vecs(
} }
} }
Some(AMDGPUData { Some(AmdGpuData {
memory: (!mem_vec.is_empty()).then_some(mem_vec), memory: (!mem_vec.is_empty()).then_some(mem_vec),
temperature: (!temp_vec.is_empty()).then_some(temp_vec),
procs: (!proc_vec.is_empty()).then_some((total_mem, proc_vec)), procs: (!proc_vec.is_empty()).then_some((total_mem, proc_vec)),
}) })
} }

View File

@ -2,7 +2,7 @@
pub const AMDGPU_DEFAULT_NAME: &str = "AMD Radeon Graphics"; pub const AMDGPU_DEFAULT_NAME: &str = "AMD Radeon Graphics";
pub const AMDGPU_MARKETING_NAME: &[(u32, u32, &str)] = &[ pub const AMD_GPU_MARKETING_NAME: &[(u32, u32, &str)] = &[
(0x6798, 0x00, "AMD Radeon R9 200 / HD 7900 Series"), (0x6798, 0x00, "AMD Radeon R9 200 / HD 7900 Series"),
(0x6799, 0x00, "AMD Radeon HD 7900 Series"), (0x6799, 0x00, "AMD Radeon HD 7900 Series"),
(0x679A, 0x00, "AMD Radeon HD 7900 Series"), (0x679A, 0x00, "AMD Radeon HD 7900 Series"),

View File

@ -0,0 +1,30 @@
use std::{fs, path::Path};
/// Whether the temperature should *actually* be read during enumeration.
/// Will return false if the state is not D0/unknown, or if it does not support
/// `device/power_state`.
///
/// `path` is a path to the device itself (e.g. `/sys/class/hwmon/hwmon1/device`).
#[inline]
pub fn is_device_awake(device: &Path) -> bool {
// Whether the temperature should *actually* be read during enumeration.
// Set to false if the device is in ACPI D3cold.
// Documented at https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-power_state
let power_state = device.join("power_state");
if power_state.exists() {
if let Ok(state) = fs::read_to_string(power_state) {
let state = state.trim();
// The zenpower3 kernel module (incorrectly?) reports "unknown", causing this
// check to fail and temperatures to appear as zero instead of
// having the file not exist.
//
// Their self-hosted git instance has disabled sign up, so this bug cant be
// reported either.
state == "D0" || state == "unknown"
} else {
true
}
} else {
true
}
}

View File

@ -1,7 +1,7 @@
//! Data collection for temperature metrics. //! Data collection for temperature metrics.
//! //!
//! For Linux and macOS, this is handled by Heim. //! For Linux, this is handled by custom code.
//! For Windows, this is handled by sysinfo. //! For everything else, this is handled by sysinfo.
cfg_if::cfg_if! { cfg_if::cfg_if! {
if #[cfg(target_os = "linux")] { if #[cfg(target_os = "linux")] {

View File

@ -9,12 +9,15 @@ use anyhow::Result;
use hashbrown::{HashMap, HashSet}; use hashbrown::{HashMap, HashSet};
use super::TempSensorData; use super::TempSensorData;
use crate::app::filter::Filter; use crate::{app::filter::Filter, collection::linux::utils::is_device_awake};
#[cfg(feature = "gpu")]
use crate::collection::amd::get_amd_name;
const EMPTY_NAME: &str = "Unknown"; const EMPTY_NAME: &str = "Unknown";
/// Returned results from grabbing hwmon/coretemp temperature sensor /// Returned results from grabbing hwmon/coretemp temperature sensor
/// values/names. /// values or names.
struct HwmonResults { struct HwmonResults {
temperatures: Vec<TempSensorData>, temperatures: Vec<TempSensorData>,
num_hwmon: usize, num_hwmon: usize,
@ -115,40 +118,42 @@ fn counted_name(seen_names: &mut HashMap<String, u32>, name: String) -> String {
} }
} }
#[inline] fn uppercase_first_letter(s: &mut str) {
if let Some(r) = s.get_mut(0..1) {
r.make_ascii_uppercase();
}
}
fn finalize_name( fn finalize_name(
hwmon_name: Option<String>, sensor_label: Option<String>, hwmon_name: Option<String>, sensor_label: Option<String>,
fallback_sensor_name: &Option<String>, seen_names: &mut HashMap<String, u32>, fallback_sensor_name: &Option<String>, seen_names: &mut HashMap<String, u32>,
) -> String { ) -> String {
let candidate_name = match (hwmon_name, sensor_label) { let candidate_name = match (hwmon_name, sensor_label) {
(Some(name), Some(label)) => match (name.is_empty(), label.is_empty()) { (Some(name), Some(mut label)) => match (name.is_empty(), label.is_empty()) {
(false, false) => { (false, false) => {
uppercase_first_letter(&mut label);
format!("{name}: {label}") format!("{name}: {label}")
} }
(true, false) => match fallback_sensor_name { (true, false) => {
Some(fallback) if !fallback.is_empty() => { uppercase_first_letter(&mut label);
if label.is_empty() {
fallback.to_owned() // We assume label must not be empty.
} else { match fallback_sensor_name {
Some(fallback) if !fallback.is_empty() => {
format!("{fallback}: {label}") format!("{fallback}: {label}")
} }
_ => label,
} }
_ => { }
if label.is_empty() {
EMPTY_NAME.to_string()
} else {
label
}
}
},
(false, true) => name.to_owned(), (false, true) => name.to_owned(),
(true, true) => EMPTY_NAME.to_string(), (true, true) => EMPTY_NAME.to_string(),
}, },
(None, Some(label)) => match fallback_sensor_name { (None, Some(mut label)) => match fallback_sensor_name {
Some(fallback) if !fallback.is_empty() => { Some(fallback) if !fallback.is_empty() => {
if label.is_empty() { if label.is_empty() {
fallback.to_owned() fallback.to_owned()
} else { } else {
uppercase_first_letter(&mut label);
format!("{fallback}: {label}") format!("{fallback}: {label}")
} }
} }
@ -156,6 +161,7 @@ fn finalize_name(
if label.is_empty() { if label.is_empty() {
EMPTY_NAME.to_string() EMPTY_NAME.to_string()
} else { } else {
uppercase_first_letter(&mut label);
label label
} }
} }
@ -176,34 +182,6 @@ fn finalize_name(
counted_name(seen_names, candidate_name) counted_name(seen_names, candidate_name)
} }
/// Whether the temperature should *actually* be read during enumeration.
/// Will return false if the state is not D0/unknown, or if it does not support
/// `device/power_state`.
#[inline]
fn is_device_awake(path: &Path) -> bool {
// Whether the temperature should *actually* be read during enumeration.
// Set to false if the device is in ACPI D3cold.
// Documented at https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-power_state
let device = path.join("device");
let power_state = device.join("power_state");
if power_state.exists() {
if let Ok(state) = fs::read_to_string(power_state) {
let state = state.trim();
// The zenpower3 kernel module (incorrectly?) reports "unknown", causing this
// check to fail and temperatures to appear as zero instead of
// having the file not exist.
//
// Their self-hosted git instance has disabled sign up, so this bug cant be
// reported either.
state == "D0" || state == "unknown"
} else {
true
}
} else {
true
}
}
/// Get temperature sensors from the linux sysfs interface `/sys/class/hwmon` /// Get temperature sensors from the linux sysfs interface `/sys/class/hwmon`
/// and `/sys/devices/platform/coretemp.*`. It returns all found temperature /// and `/sys/devices/platform/coretemp.*`. It returns all found temperature
/// sensors, and the number of checked hwmon directories (not coretemp /// sensors, and the number of checked hwmon directories (not coretemp
@ -243,8 +221,9 @@ fn hwmon_temperatures(filter: &Option<Filter>) -> HwmonResults {
// also allow easy cancellation/timeouts. // also allow easy cancellation/timeouts.
for file_path in dirs { for file_path in dirs {
let sensor_name = read_to_string_lossy(file_path.join("name")); let sensor_name = read_to_string_lossy(file_path.join("name"));
let device = file_path.join("device");
if !is_device_awake(&file_path) { if !is_device_awake(&device) {
let name = finalize_name(None, None, &sensor_name, &mut seen_names); let name = finalize_name(None, None, &sensor_name, &mut seen_names);
temperatures.push(TempSensorData { temperatures.push(TempSensorData {
name, name,
@ -284,23 +263,44 @@ fn hwmon_temperatures(filter: &Option<Filter>) -> HwmonResults {
if drm.exists() { if drm.exists() {
// This should never actually be empty. If it is though, we'll fall back to // This should never actually be empty. If it is though, we'll fall back to
// the sensor name later on. // the sensor name later on.
let mut gpu = None;
if let Ok(cards) = drm.read_dir() { #[cfg(feature = "gpu")]
for card in cards.flatten() { {
if let Some(name) = card.file_name().to_str() { if let Some(amd_gpu_name) = get_amd_name(&device) {
if name.starts_with("card") { Some(amd_gpu_name)
gpu = Some(humanize_name( } else if let Ok(cards) = drm.read_dir() {
name.trim().to_string(), cards.flatten().find_map(|card| {
sensor_name.as_ref(), card.file_name().to_str().and_then(|name| {
)); name.starts_with("card").then(|| {
break; humanize_name(
} name.trim().to_string(),
} sensor_name.as_ref(),
)
})
})
})
} else {
None
} }
} }
gpu #[cfg(not(feature = "gpu"))]
{
if let Ok(cards) = drm.read_dir() {
cards.flatten().find_map(|card| {
card.file_name().to_str().and_then(|name| {
name.starts_with("card").then(|| {
humanize_name(
name.trim().to_string(),
sensor_name.as_ref(),
)
})
})
})
} else {
None
}
}
} else { } else {
// This little mess is to account for stuff like k10temp. This is needed // This little mess is to account for stuff like k10temp. This is needed
// because the `device` symlink points to `nvme*` // because the `device` symlink points to `nvme*`
@ -419,7 +419,7 @@ mod tests {
&Some("test".to_string()), &Some("test".to_string()),
&mut seen_names &mut seen_names
), ),
"hwmon: sensor" "hwmon: Sensor"
); );
assert_eq!( assert_eq!(
@ -439,7 +439,7 @@ mod tests {
&Some("test".to_string()), &Some("test".to_string()),
&mut seen_names &mut seen_names
), ),
"test: sensor" "test: Sensor"
); );
assert_eq!( assert_eq!(
@ -449,7 +449,7 @@ mod tests {
&Some("test".to_string()), &Some("test".to_string()),
&mut seen_names &mut seen_names
), ),
"hwmon: sensor (1)" "hwmon: Sensor (1)"
); );
assert_eq!( assert_eq!(