refactor: don't duplicate AMD GPU temperature retrieval (#1682)

* some file/struct renaming

* refactor: don't get AMD gpu temperatures twice
This commit is contained in:
Clement Tsang 2025-02-23 01:21:12 -05:00 committed by GitHub
parent f7d070f944
commit d2177ed022
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 135 additions and 201 deletions

View File

@ -104,6 +104,7 @@ That said, these are more guidelines rather than hardset rules, though the proje
- `--colors` is now `--theme`
- [#1513](https://github.com/ClementTsang/bottom/pull/1513): Table headers are now bold by default.
- [#1515](https://github.com/ClementTsang/bottom/pull/1515): Show the config path in the error message if unable to read/create a config.
- [#1682](https://github.com/ClementTsang/bottom/pull/1682): On Linux, temperature sensor labels now always have their first letter capitalized (e.g. "k10temp: tctl" -> "k10temp: Tctl").
### Bug Fixes

View File

@ -8,6 +8,11 @@ pub mod nvidia;
#[cfg(all(target_os = "linux", feature = "gpu"))]
pub mod amd;
#[cfg(target_os = "linux")]
mod linux {
pub mod utils;
}
#[cfg(feature = "battery")]
pub mod batteries;
pub mod cpu;
@ -370,18 +375,9 @@ impl DataCollector {
}
#[cfg(target_os = "linux")]
if let Some(data) = amd::get_amd_vecs(
&self.filters.temp_filter,
&self.widgets_to_harvest,
self.last_collection_time,
) {
if let Some(mut temp) = data.temperature {
if let Some(sensors) = &mut self.data.temperature_sensors {
sensors.append(&mut temp);
} else {
self.data.temperature_sensors = Some(temp);
}
}
if let Some(data) =
amd::get_amd_vecs(&self.widgets_to_harvest, self.last_collection_time)
{
if let Some(mut mem) = data.memory {
local_gpu.append(&mut mem);
}

View File

@ -1,4 +1,4 @@
mod amdgpu_marketing;
mod amd_gpu_marketing;
use std::{
fs::{self, read_to_string},
@ -10,30 +10,23 @@ use std::{
use hashbrown::{HashMap, HashSet};
use crate::{
app::{filter::Filter, layout_manager::UsedWidgets},
collection::{memory::MemData, temperature::TempSensorData},
};
use crate::{app::layout_manager::UsedWidgets, collection::memory::MemData};
use super::linux::utils::is_device_awake;
// TODO: May be able to clean up some of these, Option<Vec> for example is a bit redundant.
pub struct AMDGPUData {
pub struct AmdGpuData {
pub memory: Option<Vec<(String, MemData)>>,
pub temperature: Option<Vec<TempSensorData>>,
pub procs: Option<(u64, Vec<HashMap<u32, (u64, u32)>>)>,
}
pub struct AMDGPUMemory {
pub struct AmdGpuMemory {
pub total: u64,
pub used: u64,
}
pub struct AMDGPUTemperature {
pub name: String,
pub temperature: f32,
}
#[derive(Debug, Clone, Default, Eq, PartialEq)]
pub struct AMDGPUProc {
pub struct AmdGpuProc {
pub vram_usage: u64,
pub gfx_usage: u64,
pub dma_usage: u64,
@ -46,7 +39,7 @@ pub struct AMDGPUProc {
}
// needs previous state for usage calculation
static PROC_DATA: LazyLock<Mutex<HashMap<PathBuf, HashMap<u32, AMDGPUProc>>>> =
static PROC_DATA: LazyLock<Mutex<HashMap<PathBuf, HashMap<u32, AmdGpuProc>>>> =
LazyLock::new(|| Mutex::new(HashMap::new()));
fn get_amd_devs() -> Option<Vec<PathBuf>> {
@ -62,7 +55,18 @@ fn get_amd_devs() -> Option<Vec<PathBuf>> {
// test if it has a valid vendor path
let device_path = path.path();
let test_path = device_path.join("vendor");
if !device_path.is_dir() {
continue;
}
// Skip if asleep to avoid wakeups.
if !is_device_awake(&device_path) {
continue;
}
// This will exist for GPUs but not others, this is how we find their kernel
// name.
let test_path = device_path.join("drm");
if test_path.as_path().exists() {
devices.push(device_path);
}
@ -75,7 +79,7 @@ fn get_amd_devs() -> Option<Vec<PathBuf>> {
}
}
fn get_amd_name(device_path: &Path) -> Option<String> {
pub fn get_amd_name(device_path: &Path) -> Option<String> {
// get revision and device ids from sysfs
let rev_path = device_path.join("revision");
let dev_path = device_path.join("device");
@ -107,13 +111,13 @@ fn get_amd_name(device_path: &Path) -> Option<String> {
}
// if it exists in our local database, use that name
amdgpu_marketing::AMDGPU_MARKETING_NAME
amd_gpu_marketing::AMD_GPU_MARKETING_NAME
.iter()
.find(|(did, rid, _)| (did, rid) == (&device_id, &revision_id))
.map(|tuple| tuple.2.to_string())
}
fn get_amd_vram(device_path: &Path) -> Option<AMDGPUMemory> {
fn get_amd_vram(device_path: &Path) -> Option<AmdGpuMemory> {
// get vram memory info from sysfs
let vram_total_path = device_path.join("mem_info_vram_total");
let vram_used_path = device_path.join("mem_info_vram_used");
@ -136,93 +140,12 @@ fn get_amd_vram(device_path: &Path) -> Option<AMDGPUMemory> {
return None;
};
Some(AMDGPUMemory {
Some(AmdGpuMemory {
total: vram_total,
used: vram_used,
})
}
fn get_amd_temp(device_path: &Path) -> Option<Vec<AMDGPUTemperature>> {
let mut temperatures = Vec::new();
// get hardware monitoring sensor info from sysfs
let hwmon_root = device_path.join("hwmon");
let Ok(hwmon_paths) = fs::read_dir(hwmon_root) else {
return None;
};
for hwmon_dir in hwmon_paths {
let Ok(hwmon_dir) = hwmon_dir else {
continue;
};
let hwmon_binding = hwmon_dir.path();
let hwmon_path = hwmon_binding.as_path();
let Ok(hwmon_sensors) = fs::read_dir(hwmon_path) else {
continue;
};
for hwmon_sensor_ent in hwmon_sensors {
let Ok(hwmon_sensor_ent) = hwmon_sensor_ent else {
continue;
};
let hwmon_sensor_path = hwmon_sensor_ent.path();
let hwmon_sensor_binding = hwmon_sensor_ent.file_name();
let Some(hwmon_sensor_name) = hwmon_sensor_binding.to_str() else {
continue;
};
// temperature sensors are temp{number}_{input,label}
if !hwmon_sensor_name.starts_with("temp") || !hwmon_sensor_name.ends_with("_input") {
continue; // filename does not start with temp or ends with input
}
// construct label path
let hwmon_sensor_label_name = hwmon_sensor_name.replace("_input", "_label");
let hwmon_sensor_label_path = hwmon_path.join(hwmon_sensor_label_name);
// read and remove newlines
let Ok(mut hwmon_sensor_data) = read_to_string(hwmon_sensor_path) else {
continue;
};
let Ok(mut hwmon_sensor_label) = read_to_string(hwmon_sensor_label_path) else {
continue;
};
hwmon_sensor_data = hwmon_sensor_data.trim_end().to_string();
hwmon_sensor_label = hwmon_sensor_label.trim_end().to_string();
let Ok(hwmon_sensor) = hwmon_sensor_data.parse::<u64>() else {
continue;
};
// uppercase first character
if hwmon_sensor_label.is_ascii() {
let (hwmon_sensor_label_head, hwmon_sensor_label_tail) =
hwmon_sensor_label.split_at(1);
hwmon_sensor_label =
hwmon_sensor_label_head.to_uppercase() + hwmon_sensor_label_tail;
}
// 1 C is reported as 1000
temperatures.push(AMDGPUTemperature {
name: hwmon_sensor_label,
temperature: (hwmon_sensor as f32) / 1000.0f32,
});
}
}
if temperatures.is_empty() {
None
} else {
Some(temperatures)
}
}
// from amdgpu_top: https://github.com/Umio-Yasuno/amdgpu_top/blob/c961cf6625c4b6d63fda7f03348323048563c584/crates/libamdgpu_top/src/stat/fdinfo/proc_info.rs#L114
fn diff_usage(pre: u64, cur: u64, interval: &Duration) -> u64 {
use std::ops::Mul;
@ -300,7 +223,7 @@ fn get_amdgpu_drm(device_path: &Path) -> Option<Vec<PathBuf>> {
}
}
fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AMDGPUProc>> {
fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AmdGpuProc>> {
let mut fdinfo = HashMap::new();
let drm_paths = get_amdgpu_drm(device_path)?;
@ -336,7 +259,7 @@ fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AMDGPUProc>> {
continue;
};
let mut usage: AMDGPUProc = Default::default();
let mut usage: AmdGpuProc = Default::default();
let mut observed_ids: HashSet<usize> = HashSet::new();
@ -401,20 +324,17 @@ fn get_amd_fdinfo(device_path: &Path) -> Option<HashMap<u32, AMDGPUProc>> {
Some(fdinfo)
}
pub fn get_amd_vecs(
filter: &Option<Filter>, widgets_to_harvest: &UsedWidgets, prev_time: Instant,
) -> Option<AMDGPUData> {
pub fn get_amd_vecs(widgets_to_harvest: &UsedWidgets, prev_time: Instant) -> Option<AmdGpuData> {
let device_path_list = get_amd_devs()?;
let interval = Instant::now().duration_since(prev_time);
let num_gpu = device_path_list.len();
let mut temp_vec = Vec::with_capacity(num_gpu);
let mut mem_vec = Vec::with_capacity(num_gpu);
let mut proc_vec = Vec::with_capacity(num_gpu);
let mut total_mem = 0;
for device_path in device_path_list {
let device_name =
get_amd_name(&device_path).unwrap_or(amdgpu_marketing::AMDGPU_DEFAULT_NAME.to_string());
let device_name = get_amd_name(&device_path)
.unwrap_or(amd_gpu_marketing::AMDGPU_DEFAULT_NAME.to_string());
if let Some(mem) = get_amd_vram(&device_path) {
if widgets_to_harvest.use_mem {
@ -432,18 +352,6 @@ pub fn get_amd_vecs(
total_mem += mem.total
}
// TODO: Not sure if this overlaps with the existing generic temperature code.
if widgets_to_harvest.use_temp && Filter::optional_should_keep(filter, &device_name) {
if let Some(temperatures) = get_amd_temp(&device_path) {
for info in temperatures {
temp_vec.push(TempSensorData {
name: format!("{} {}", device_name, info.name),
temperature: Some(info.temperature),
});
}
}
}
if widgets_to_harvest.use_proc {
if let Some(procs) = get_amd_fdinfo(&device_path) {
let mut proc_info = PROC_DATA.lock().unwrap();
@ -497,9 +405,8 @@ pub fn get_amd_vecs(
}
}
Some(AMDGPUData {
Some(AmdGpuData {
memory: (!mem_vec.is_empty()).then_some(mem_vec),
temperature: (!temp_vec.is_empty()).then_some(temp_vec),
procs: (!proc_vec.is_empty()).then_some((total_mem, proc_vec)),
})
}

View File

@ -2,7 +2,7 @@
pub const AMDGPU_DEFAULT_NAME: &str = "AMD Radeon Graphics";
pub const AMDGPU_MARKETING_NAME: &[(u32, u32, &str)] = &[
pub const AMD_GPU_MARKETING_NAME: &[(u32, u32, &str)] = &[
(0x6798, 0x00, "AMD Radeon R9 200 / HD 7900 Series"),
(0x6799, 0x00, "AMD Radeon HD 7900 Series"),
(0x679A, 0x00, "AMD Radeon HD 7900 Series"),

View File

@ -0,0 +1,30 @@
use std::{fs, path::Path};
/// Whether the temperature should *actually* be read during enumeration.
/// Will return false if the state is not D0/unknown, or if it does not support
/// `device/power_state`.
///
/// `path` is a path to the device itself (e.g. `/sys/class/hwmon/hwmon1/device`).
#[inline]
pub fn is_device_awake(device: &Path) -> bool {
// Whether the temperature should *actually* be read during enumeration.
// Set to false if the device is in ACPI D3cold.
// Documented at https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-power_state
let power_state = device.join("power_state");
if power_state.exists() {
if let Ok(state) = fs::read_to_string(power_state) {
let state = state.trim();
// The zenpower3 kernel module (incorrectly?) reports "unknown", causing this
// check to fail and temperatures to appear as zero instead of
// having the file not exist.
//
// Their self-hosted git instance has disabled sign up, so this bug cant be
// reported either.
state == "D0" || state == "unknown"
} else {
true
}
} else {
true
}
}

View File

@ -1,7 +1,7 @@
//! Data collection for temperature metrics.
//!
//! For Linux and macOS, this is handled by Heim.
//! For Windows, this is handled by sysinfo.
//! For Linux, this is handled by custom code.
//! For everything else, this is handled by sysinfo.
cfg_if::cfg_if! {
if #[cfg(target_os = "linux")] {

View File

@ -9,12 +9,15 @@ use anyhow::Result;
use hashbrown::{HashMap, HashSet};
use super::TempSensorData;
use crate::app::filter::Filter;
use crate::{app::filter::Filter, collection::linux::utils::is_device_awake};
#[cfg(feature = "gpu")]
use crate::collection::amd::get_amd_name;
const EMPTY_NAME: &str = "Unknown";
/// Returned results from grabbing hwmon/coretemp temperature sensor
/// values/names.
/// values or names.
struct HwmonResults {
temperatures: Vec<TempSensorData>,
num_hwmon: usize,
@ -115,40 +118,42 @@ fn counted_name(seen_names: &mut HashMap<String, u32>, name: String) -> String {
}
}
#[inline]
fn uppercase_first_letter(s: &mut str) {
if let Some(r) = s.get_mut(0..1) {
r.make_ascii_uppercase();
}
}
fn finalize_name(
hwmon_name: Option<String>, sensor_label: Option<String>,
fallback_sensor_name: &Option<String>, seen_names: &mut HashMap<String, u32>,
) -> String {
let candidate_name = match (hwmon_name, sensor_label) {
(Some(name), Some(label)) => match (name.is_empty(), label.is_empty()) {
(Some(name), Some(mut label)) => match (name.is_empty(), label.is_empty()) {
(false, false) => {
uppercase_first_letter(&mut label);
format!("{name}: {label}")
}
(true, false) => match fallback_sensor_name {
Some(fallback) if !fallback.is_empty() => {
if label.is_empty() {
fallback.to_owned()
} else {
(true, false) => {
uppercase_first_letter(&mut label);
// We assume label must not be empty.
match fallback_sensor_name {
Some(fallback) if !fallback.is_empty() => {
format!("{fallback}: {label}")
}
_ => label,
}
_ => {
if label.is_empty() {
EMPTY_NAME.to_string()
} else {
label
}
}
},
}
(false, true) => name.to_owned(),
(true, true) => EMPTY_NAME.to_string(),
},
(None, Some(label)) => match fallback_sensor_name {
(None, Some(mut label)) => match fallback_sensor_name {
Some(fallback) if !fallback.is_empty() => {
if label.is_empty() {
fallback.to_owned()
} else {
uppercase_first_letter(&mut label);
format!("{fallback}: {label}")
}
}
@ -156,6 +161,7 @@ fn finalize_name(
if label.is_empty() {
EMPTY_NAME.to_string()
} else {
uppercase_first_letter(&mut label);
label
}
}
@ -176,34 +182,6 @@ fn finalize_name(
counted_name(seen_names, candidate_name)
}
/// Whether the temperature should *actually* be read during enumeration.
/// Will return false if the state is not D0/unknown, or if it does not support
/// `device/power_state`.
#[inline]
fn is_device_awake(path: &Path) -> bool {
// Whether the temperature should *actually* be read during enumeration.
// Set to false if the device is in ACPI D3cold.
// Documented at https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-power_state
let device = path.join("device");
let power_state = device.join("power_state");
if power_state.exists() {
if let Ok(state) = fs::read_to_string(power_state) {
let state = state.trim();
// The zenpower3 kernel module (incorrectly?) reports "unknown", causing this
// check to fail and temperatures to appear as zero instead of
// having the file not exist.
//
// Their self-hosted git instance has disabled sign up, so this bug cant be
// reported either.
state == "D0" || state == "unknown"
} else {
true
}
} else {
true
}
}
/// Get temperature sensors from the linux sysfs interface `/sys/class/hwmon`
/// and `/sys/devices/platform/coretemp.*`. It returns all found temperature
/// sensors, and the number of checked hwmon directories (not coretemp
@ -243,8 +221,9 @@ fn hwmon_temperatures(filter: &Option<Filter>) -> HwmonResults {
// also allow easy cancellation/timeouts.
for file_path in dirs {
let sensor_name = read_to_string_lossy(file_path.join("name"));
let device = file_path.join("device");
if !is_device_awake(&file_path) {
if !is_device_awake(&device) {
let name = finalize_name(None, None, &sensor_name, &mut seen_names);
temperatures.push(TempSensorData {
name,
@ -284,23 +263,44 @@ fn hwmon_temperatures(filter: &Option<Filter>) -> HwmonResults {
if drm.exists() {
// This should never actually be empty. If it is though, we'll fall back to
// the sensor name later on.
let mut gpu = None;
if let Ok(cards) = drm.read_dir() {
for card in cards.flatten() {
if let Some(name) = card.file_name().to_str() {
if name.starts_with("card") {
gpu = Some(humanize_name(
name.trim().to_string(),
sensor_name.as_ref(),
));
break;
}
}
#[cfg(feature = "gpu")]
{
if let Some(amd_gpu_name) = get_amd_name(&device) {
Some(amd_gpu_name)
} else if let Ok(cards) = drm.read_dir() {
cards.flatten().find_map(|card| {
card.file_name().to_str().and_then(|name| {
name.starts_with("card").then(|| {
humanize_name(
name.trim().to_string(),
sensor_name.as_ref(),
)
})
})
})
} else {
None
}
}
gpu
#[cfg(not(feature = "gpu"))]
{
if let Ok(cards) = drm.read_dir() {
cards.flatten().find_map(|card| {
card.file_name().to_str().and_then(|name| {
name.starts_with("card").then(|| {
humanize_name(
name.trim().to_string(),
sensor_name.as_ref(),
)
})
})
})
} else {
None
}
}
} else {
// This little mess is to account for stuff like k10temp. This is needed
// because the `device` symlink points to `nvme*`
@ -419,7 +419,7 @@ mod tests {
&Some("test".to_string()),
&mut seen_names
),
"hwmon: sensor"
"hwmon: Sensor"
);
assert_eq!(
@ -439,7 +439,7 @@ mod tests {
&Some("test".to_string()),
&mut seen_names
),
"test: sensor"
"test: Sensor"
);
assert_eq!(
@ -449,7 +449,7 @@ mod tests {
&Some("test".to_string()),
&mut seen_names
),
"hwmon: sensor (1)"
"hwmon: Sensor (1)"
);
assert_eq!(