possible nvml fix

This commit is contained in:
Clement Tsang 2024-01-05 01:08:10 -05:00
parent d7c614b75b
commit 426ea091cb
No known key found for this signature in database
GPG Key ID: B17834EA2182446B
3 changed files with 66 additions and 39 deletions

View File

@ -2,14 +2,17 @@ use std::sync::OnceLock;
use hashbrown::HashMap;
use nvml_wrapper::{
enum_wrappers::device::TemperatureSensor, enums::device::UsedGpuMemory, error::NvmlError, Nvml,
enum_wrappers::device::{PerformanceState, TemperatureSensor},
enums::device::UsedGpuMemory,
error::NvmlError,
Nvml,
};
use crate::{
app::{filter::Filter, layout_manager::UsedWidgets},
data_collection::{
memory::MemHarvest,
temperature::{is_temp_filtered, TempHarvest, TemperatureType},
temperature::{TempHarvest, TemperatureType},
},
};
@ -53,17 +56,42 @@ pub fn get_nvidia_vecs(
));
}
}
if widgets_to_harvest.use_temp && is_temp_filtered(filter, &name) {
if let Ok(temperature) = device.temperature(TemperatureSensor::Gpu) {
let temperature = temp_type.convert_temp_unit(temperature as f32);
temp_vec.push(TempHarvest {
name: name.clone(),
temperature: TemperatureReading::Value(temperature),
});
if widgets_to_harvest.use_temp
&& filter
.as_ref()
.map(|filter| filter.keep_entry(&name))
.unwrap_or(true)
{
// Following https://docs.nvidia.com/gameworks/content/gameworkslibrary/coresdk/nvapi/group__gpupstate.html,
// it seems like performance state 12 and lower are "minimum idle power consumption".
match device.performance_state() {
Ok(PerformanceState::Fifteen)
| Ok(PerformanceState::Fourteen)
| Ok(PerformanceState::Thirteen)
| Ok(PerformanceState::Twelve) => {
temp_vec.push(TempHarvest {
name,
temperature: TemperatureReading::Off,
});
}
_ => {
if let Ok(temperature) =
device.temperature(TemperatureSensor::Gpu)
{
let temperature =
temp_type.convert_temp_unit(temperature as f32);
temp_vec.push(TempHarvest {
name,
temperature: TemperatureReading::Value(temperature),
});
}
}
}
}
}
if widgets_to_harvest.use_proc {
let mut procs = HashMap::new();
if let Ok(gpu_procs) = device.process_utilization_stats(None) {
@ -73,6 +101,7 @@ pub fn get_nvidia_vecs(
procs.insert(pid, (0, gpu_util));
}
}
if let Ok(compute_procs) = device.running_compute_processes() {
for proc in compute_procs {
let pid = proc.pid;
@ -87,7 +116,8 @@ pub fn get_nvidia_vecs(
}
}
}
// Use the legacy API too but prefer newer API results
// Use the legacy API too, but prefer newer API results
if let Ok(graphics_procs) = device.running_graphics_processes_v2() {
for proc in graphics_procs {
let pid = proc.pid;
@ -102,6 +132,7 @@ pub fn get_nvidia_vecs(
}
}
}
if let Ok(graphics_procs) = device.running_graphics_processes() {
for proc in graphics_procs {
let pid = proc.pid;
@ -116,9 +147,11 @@ pub fn get_nvidia_vecs(
}
}
}
if !procs.is_empty() {
proc_vec.push(procs);
}
// running total for proc %
if let Ok(mem) = device.memory_info() {
total_mem += mem.total;

View File

@ -13,8 +13,6 @@ cfg_if::cfg_if! {
}
}
use crate::app::filter::Filter;
#[derive(Default, Debug, Clone)]
pub enum TemperatureReading {
Value(f32),
@ -56,21 +54,6 @@ impl TemperatureType {
}
}
pub fn is_temp_filtered(filter: &Option<Filter>, text: &str) -> bool {
if let Some(filter) = filter {
let mut ret = filter.is_list_ignored;
for r in &filter.list {
if r.is_match(text) {
ret = !filter.is_list_ignored;
break;
}
}
ret
} else {
true
}
}
#[cfg(test)]
mod test {
use crate::data_collection::temperature::TemperatureType;

View File

@ -8,7 +8,7 @@ use std::{
use anyhow::Result;
use hashbrown::{HashMap, HashSet};
use super::{is_temp_filtered, TempHarvest, TemperatureReading, TemperatureType};
use super::{TempHarvest, TemperatureReading, TemperatureType};
use crate::{app::filter::Filter, utils::error::BottomError};
const EMPTY_NAME: &str = "Unknown";
@ -184,8 +184,12 @@ fn finalize_name(
/// If neither are found, it will always return true and be treated as "awake".
#[inline]
fn is_device_awake(path: &Path) -> bool {
// XXX: Should we initialize all devices that support runtime_status_path here,
// in a map, and take `power/autosuspend_delay_ms` into account?
// Try checking `power/runtime_status` if it exists! For more information, see
// https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-power
// https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-power and
// https://gitlab.com/mission-center-devs/mission-center/-/issues/30#note_1697130114
let runtime_status_path = path.join("power/runtime_status");
if runtime_status_path.exists() {
if let Ok(status) = fs::read_to_string(runtime_status_path) {
@ -337,15 +341,18 @@ fn hwmon_temperatures(temp_type: &TemperatureType, filter: &Option<Filter>) -> H
let name = finalize_name(hwmon_name, sensor_label, &sensor_name, &mut seen_names);
// TODO: It's possible we may want to move the filter check further up to avoid probing hwmon if not needed?
if is_temp_filtered(filter, &name) {
if let Ok(temp_celsius) = parse_temp(&temp_path) {
temperatures.push(TempHarvest {
name,
temperature: TemperatureReading::Value(
temp_type.convert_temp_unit(temp_celsius),
),
});
}
if filter
.as_ref()
.map(|filter| filter.keep_entry(&name))
.unwrap_or(true)
{
let temperature = if let Ok(temp_celsius) = parse_temp(&temp_path) {
TemperatureReading::Value(temp_type.convert_temp_unit(temp_celsius))
} else {
TemperatureReading::Unavailable
};
temperatures.push(TempHarvest { name, temperature });
}
}
}
@ -388,7 +395,11 @@ fn add_thermal_zone_temperatures(
name
};
if is_temp_filtered(filter, &name) {
if filter
.as_ref()
.map(|filter| filter.keep_entry(&name))
.unwrap_or(true)
{
let temp_path = file_path.join("temp");
if let Ok(temp_celsius) = parse_temp(&temp_path) {
let name = counted_name(&mut seen_names, name);