From ac5e2ce4a28ccc9689f8ba43fb7dfc3c819a3366 Mon Sep 17 00:00:00 2001 From: Clement Tsang <34804052+ClementTsang@users.noreply.github.com> Date: Mon, 5 Dec 2022 04:21:37 -0500 Subject: [PATCH] bug: fix incorrect text width calculation (#925) * bug: fix incorrect text calculation * actual fix, add tests * appease clippy * add link to inspiration --- src/components/data_table/sortable.rs | 8 +- src/utils/gen_util.rs | 243 +++++++++++++++--- src/widgets/cpu_graph.rs | 12 +- src/widgets/disk_table.rs | 16 +- src/widgets/process_table/proc_widget_data.rs | 4 +- src/widgets/process_table/sort_table.rs | 6 +- src/widgets/temperature_table.rs | 6 +- 7 files changed, 235 insertions(+), 60 deletions(-) diff --git a/src/components/data_table/sortable.rs b/src/components/data_table/sortable.rs index 06f3a86b..28262358 100644 --- a/src/components/data_table/sortable.rs +++ b/src/components/data_table/sortable.rs @@ -8,7 +8,7 @@ use super::{ ColumnHeader, ColumnWidthBounds, DataTable, DataTableColumn, DataTableProps, DataTableState, DataTableStyling, DataToCell, }; -use crate::utils::gen_util::truncate_text; +use crate::utils::gen_util::truncate_to_text; /// Denotes the sort order. #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -61,7 +61,7 @@ pub trait SortType: private::Sealed { if width == 0 { None } else { - Some(truncate_text(&c.header(), width)) + Some(truncate_to_text(&c.header(), width)) } })) } @@ -100,9 +100,9 @@ impl SortType for Sortable { SortOrder::Ascending => UP_ARROW, SortOrder::Descending => DOWN_ARROW, }; - Some(truncate_text(&concat_string!(c.header(), arrow), width)) + Some(truncate_to_text(&concat_string!(c.header(), arrow), width)) } else { - Some(truncate_text(&c.header(), width)) + Some(truncate_to_text(&c.header(), width)) } }), ) diff --git a/src/utils/gen_util.rs b/src/utils/gen_util.rs index 60311105..02b8bfd4 100644 --- a/src/utils/gen_util.rs +++ b/src/utils/gen_util.rs @@ -2,6 +2,7 @@ use std::cmp::Ordering; use tui::text::{Span, Spans, Text}; use unicode_segmentation::UnicodeSegmentation; +use unicode_width::UnicodeWidthStr; pub const KILO_LIMIT: u64 = 1000; pub const MEGA_LIMIT: u64 = 1_000_000; @@ -96,42 +97,61 @@ pub fn get_decimal_prefix(quantity: u64, unit: &str) -> (f64, String) { } /// Truncates text if it is too long, and adds an ellipsis at the end if needed. -pub fn truncate_text<'a, U: Into>(content: &str, width: U) -> Text<'a> { - let width = width.into(); - let mut graphemes = UnicodeSegmentation::graphemes(content, true); - let grapheme_len = { - let (_, upper) = graphemes.size_hint(); - match upper { - Some(upper) => upper, - None => graphemes.clone().count(), // Don't think this ever fires. - } - }; - - let text = if grapheme_len > width { - let mut text = String::with_capacity(width); - // Truncate with ellipsis. - - // Use a hack to reduce the size to size `width`. Think of it like removing - // The last `grapheme_len - width` graphemes, which reduces the length to - // `width` long. - // - // This is a way to get around the currently experimental`advance_back_by`. - graphemes.nth_back(grapheme_len - width); - - text.push_str(graphemes.as_str()); - text.push('…'); - - text - } else { - content.to_string() - }; - - // TODO: [OPT] maybe add interning here? +pub fn truncate_to_text<'a, U: Into>(content: &str, width: U) -> Text<'a> { Text { - lines: vec![Spans(vec![Span::raw(text)])], + lines: vec![Spans(vec![Span::raw(truncate_str(content, width))])], } } +/// Truncates a string with an ellipsis character. +/// +/// NB: This probably does not handle EVERY case, but I think it handles most cases +/// we will use this function for fine... hopefully. +fn truncate_str>(content: &str, width: U) -> String { + let width = width.into(); + let mut text = String::with_capacity(width); + + if width > 0 { + let mut curr_width = 0; + let mut early_break = false; + + // This tracks the length of the last added string - note this does NOT match the grapheme *width*. + let mut last_added_str_len = 0; + + // Cases to handle: + // - Completes adding the entire string. + // - Adds a character up to the boundary, then fails. + // - Adds a character not up to the boundary, then fails. + // Inspired by https://tomdebruijn.com/posts/rust-string-length-width-calculations/ + for g in UnicodeSegmentation::graphemes(content, true) { + let g_width = if g.contains('\u{200d}') { + 2 + } else { + UnicodeWidthStr::width(g) + }; + + if curr_width + g_width <= width { + curr_width += g_width; + last_added_str_len = g.len(); + text.push_str(g); + } else { + early_break = true; + break; + } + } + + if early_break { + if curr_width == width { + // Remove the last grapheme cluster added. + text.truncate(text.len() - last_added_str_len); + } + text.push('…'); + } + } + + text +} + #[inline] pub const fn sort_partial_fn(is_descending: bool) -> fn(T, T) -> Ordering { if is_descending { @@ -180,7 +200,162 @@ mod test { } #[test] - fn test_truncation() { - // TODO: Add tests for `truncate_text` + fn test_truncate() { + let cpu_header = "CPU(c)▲"; + + assert_eq!( + truncate_str(cpu_header, 8_usize), + cpu_header, + "should match base string as there is enough room" + ); + + assert_eq!( + truncate_str(cpu_header, 7_usize), + cpu_header, + "should match base string as there is enough room" + ); + + assert_eq!(truncate_str(cpu_header, 6_usize), "CPU(c…"); + assert_eq!(truncate_str(cpu_header, 5_usize), "CPU(…"); + assert_eq!(truncate_str(cpu_header, 4_usize), "CPU…"); + assert_eq!(truncate_str(cpu_header, 1_usize), "…"); + assert_eq!(truncate_str(cpu_header, 0_usize), ""); + } + + #[test] + fn test_truncate_cjk() { + let cjk = "施氏食獅史"; + + assert_eq!( + truncate_str(cjk, 11_usize), + cjk, + "should match base string as there is enough room" + ); + + assert_eq!( + truncate_str(cjk, 10_usize), + cjk, + "should match base string as there is enough room" + ); + + assert_eq!(truncate_str(cjk, 9_usize), "施氏食獅…"); + assert_eq!(truncate_str(cjk, 8_usize), "施氏食…"); + assert_eq!(truncate_str(cjk, 2_usize), "…"); + assert_eq!(truncate_str(cjk, 1_usize), "…"); + assert_eq!(truncate_str(cjk, 0_usize), ""); + } + + #[test] + fn test_truncate_mixed() { + let test = "Test (施氏食獅史) Test"; + + assert_eq!( + truncate_str(test, 30_usize), + test, + "should match base string as there is enough room" + ); + + assert_eq!( + truncate_str(test, 22_usize), + test, + "should match base string as there is just enough room" + ); + + assert_eq!( + truncate_str(test, 21_usize), + "Test (施氏食獅史) Te…", + "should truncate the t and replace the s with ellipsis" + ); + + assert_eq!(truncate_str(test, 18_usize), "Test (施氏食獅史)…"); + assert_eq!(truncate_str(test, 17_usize), "Test (施氏食獅史…"); + assert_eq!(truncate_str(test, 16_usize), "Test (施氏食獅…"); + assert_eq!(truncate_str(test, 15_usize), "Test (施氏食獅…"); + assert_eq!(truncate_str(test, 14_usize), "Test (施氏食…"); + assert_eq!(truncate_str(test, 13_usize), "Test (施氏食…"); + assert_eq!(truncate_str(test, 8_usize), "Test (…"); + assert_eq!(truncate_str(test, 7_usize), "Test (…"); + assert_eq!(truncate_str(test, 6_usize), "Test …"); + } + + #[test] + fn test_truncate_flags() { + let flag = "🇨🇦"; + assert_eq!(truncate_str(flag, 3_usize), flag); + assert_eq!(truncate_str(flag, 2_usize), flag); + assert_eq!(truncate_str(flag, 1_usize), "…"); + assert_eq!(truncate_str(flag, 0_usize), ""); + + let flag_text = "oh 🇨🇦"; + assert_eq!(truncate_str(flag_text, 6_usize), flag_text); + assert_eq!(truncate_str(flag_text, 5_usize), flag_text); + assert_eq!(truncate_str(flag_text, 4_usize), "oh …"); + + let flag_text_wrap = "!🇨🇦!"; + assert_eq!(truncate_str(flag_text_wrap, 6_usize), flag_text_wrap); + assert_eq!(truncate_str(flag_text_wrap, 4_usize), flag_text_wrap); + assert_eq!(truncate_str(flag_text_wrap, 3_usize), "!…"); + assert_eq!(truncate_str(flag_text_wrap, 2_usize), "!…"); + assert_eq!(truncate_str(flag_text_wrap, 1_usize), "…"); + + let flag_cjk = "加拿大🇨🇦"; + assert_eq!(truncate_str(flag_cjk, 9_usize), flag_cjk); + assert_eq!(truncate_str(flag_cjk, 8_usize), flag_cjk); + assert_eq!(truncate_str(flag_cjk, 7_usize), "加拿大…"); + assert_eq!(truncate_str(flag_cjk, 6_usize), "加拿…"); + assert_eq!(truncate_str(flag_cjk, 5_usize), "加拿…"); + assert_eq!(truncate_str(flag_cjk, 4_usize), "加…"); + + let flag_mix = "🇨🇦加gaa拿naa大daai🇨🇦"; + assert_eq!(truncate_str(flag_mix, 20_usize), flag_mix); + assert_eq!(truncate_str(flag_mix, 19_usize), "🇨🇦加gaa拿naa大daai…"); + assert_eq!(truncate_str(flag_mix, 18_usize), "🇨🇦加gaa拿naa大daa…"); + assert_eq!(truncate_str(flag_mix, 17_usize), "🇨🇦加gaa拿naa大da…"); + assert_eq!(truncate_str(flag_mix, 15_usize), "🇨🇦加gaa拿naa大…"); + assert_eq!(truncate_str(flag_mix, 14_usize), "🇨🇦加gaa拿naa…"); + assert_eq!(truncate_str(flag_mix, 13_usize), "🇨🇦加gaa拿naa…"); + assert_eq!(truncate_str(flag_mix, 3_usize), "🇨🇦…"); + assert_eq!(truncate_str(flag_mix, 2_usize), "…"); + assert_eq!(truncate_str(flag_mix, 1_usize), "…"); + assert_eq!(truncate_str(flag_mix, 0_usize), ""); + } + + /// This might not be the best way to handle it, but this at least tests that it doesn't crash... + #[test] + fn test_truncate_hindi() { + // cSpell:disable + let test = "हिन्दी"; + assert_eq!(truncate_str(test, 10_usize), test); + assert_eq!(truncate_str(test, 6_usize), "हिन्दी"); + assert_eq!(truncate_str(test, 5_usize), "हिन्दी"); + assert_eq!(truncate_str(test, 4_usize), "हिन्…"); + assert_eq!(truncate_str(test, 3_usize), "हि…"); + assert_eq!(truncate_str(test, 2_usize), "…"); + assert_eq!(truncate_str(test, 1_usize), "…"); + assert_eq!(truncate_str(test, 0_usize), ""); + // cSpell:enable + } + + #[test] + fn test_truncate_emoji() { + let heart = "❤️"; + assert_eq!(truncate_str(heart, 2_usize), heart); + assert_eq!(truncate_str(heart, 1_usize), heart); + assert_eq!(truncate_str(heart, 0_usize), ""); + + let emote = "💎"; + assert_eq!(truncate_str(emote, 2_usize), emote); + assert_eq!(truncate_str(emote, 1_usize), "…"); + assert_eq!(truncate_str(emote, 0_usize), ""); + + let family = "👨‍👨‍👧‍👦"; + assert_eq!(truncate_str(family, 2_usize), family); + assert_eq!(truncate_str(family, 1_usize), "…"); + assert_eq!(truncate_str(family, 0_usize), ""); + + let scientist = "👩‍🔬"; + assert_eq!(truncate_str(scientist, 2_usize), scientist); + assert_eq!(truncate_str(scientist, 1_usize), "…"); + assert_eq!(truncate_str(scientist, 0_usize), ""); } } diff --git a/src/widgets/cpu_graph.rs b/src/widgets/cpu_graph.rs index 7f0c1495..b3d72c78 100644 --- a/src/widgets/cpu_graph.rs +++ b/src/widgets/cpu_graph.rs @@ -11,7 +11,7 @@ use crate::{ DataToCell, }, data_conversion::CpuWidgetData, - utils::gen_util::truncate_text, + utils::gen_util::truncate_to_text, }; #[derive(Default)] @@ -88,7 +88,7 @@ impl DataToCell for CpuWidgetTableData { // it is too small. match &self { CpuWidgetTableData::All => match column { - CpuWidgetColumn::CPU => Some(truncate_text("All", calculated_width)), + CpuWidgetColumn::CPU => Some(truncate_to_text("All", calculated_width)), CpuWidgetColumn::Use => None, }, CpuWidgetTableData::Entry { @@ -100,13 +100,13 @@ impl DataToCell for CpuWidgetTableData { None } else { match data_type { - CpuDataType::Avg => Some(truncate_text("AVG", calculated_width)), + CpuDataType::Avg => Some(truncate_to_text("AVG", calculated_width)), CpuDataType::Cpu(index) => { let index_str = index.to_string(); let text = if calculated_width < CPU_HIDE_BREAKPOINT { - truncate_text(&index_str, calculated_width) + truncate_to_text(&index_str, calculated_width) } else { - truncate_text( + truncate_to_text( &concat_string!("CPU", index_str), calculated_width, ) @@ -117,7 +117,7 @@ impl DataToCell for CpuWidgetTableData { } } } - CpuWidgetColumn::Use => Some(truncate_text( + CpuWidgetColumn::Use => Some(truncate_to_text( &format!("{:.0}%", last_entry.round()), calculated_width, )), diff --git a/src/widgets/disk_table.rs b/src/widgets/disk_table.rs index 1d4cc604..b2a6db74 100644 --- a/src/widgets/disk_table.rs +++ b/src/widgets/disk_table.rs @@ -10,7 +10,7 @@ use crate::{ ColumnHeader, DataTableColumn, DataTableProps, DataTableStyling, DataToCell, SortColumn, SortDataTable, SortDataTableProps, SortOrder, SortsRow, }, - utils::gen_util::{get_decimal_bytes, sort_partial_fn, truncate_text}, + utils::gen_util::{get_decimal_bytes, sort_partial_fn, truncate_to_text}, }; #[derive(Clone, Debug)] @@ -84,13 +84,13 @@ impl ColumnHeader for DiskWidgetColumn { impl DataToCell for DiskWidgetData { fn to_cell<'a>(&'a self, column: &DiskWidgetColumn, calculated_width: u16) -> Option> { let text = match column { - DiskWidgetColumn::Disk => truncate_text(&self.name, calculated_width), - DiskWidgetColumn::Mount => truncate_text(&self.mount_point, calculated_width), - DiskWidgetColumn::Used => truncate_text(&self.usage(), calculated_width), - DiskWidgetColumn::Free => truncate_text(&self.free_space(), calculated_width), - DiskWidgetColumn::Total => truncate_text(&self.total_space(), calculated_width), - DiskWidgetColumn::IoRead => truncate_text(&self.io_read, calculated_width), - DiskWidgetColumn::IoWrite => truncate_text(&self.io_write, calculated_width), + DiskWidgetColumn::Disk => truncate_to_text(&self.name, calculated_width), + DiskWidgetColumn::Mount => truncate_to_text(&self.mount_point, calculated_width), + DiskWidgetColumn::Used => truncate_to_text(&self.usage(), calculated_width), + DiskWidgetColumn::Free => truncate_to_text(&self.free_space(), calculated_width), + DiskWidgetColumn::Total => truncate_to_text(&self.total_space(), calculated_width), + DiskWidgetColumn::IoRead => truncate_to_text(&self.io_read, calculated_width), + DiskWidgetColumn::IoWrite => truncate_to_text(&self.io_write, calculated_width), }; Some(text) diff --git a/src/widgets/process_table/proc_widget_data.rs b/src/widgets/process_table/proc_widget_data.rs index c067f92b..bc61de20 100644 --- a/src/widgets/process_table/proc_widget_data.rs +++ b/src/widgets/process_table/proc_widget_data.rs @@ -12,7 +12,7 @@ use crate::{ canvas::Painter, components::data_table::{DataTableColumn, DataToCell}, data_conversion::{binary_byte_string, dec_bytes_per_second_string, dec_bytes_string}, - utils::gen_util::truncate_text, + utils::gen_util::truncate_to_text, Pid, }; @@ -222,7 +222,7 @@ impl ProcWidgetData { impl DataToCell for ProcWidgetData { fn to_cell<'a>(&'a self, column: &ProcColumn, calculated_width: u16) -> Option> { // TODO: Optimize the string allocations here... - Some(truncate_text( + Some(truncate_to_text( &match column { ProcColumn::CpuPercent => { format!("{:.1}%", self.cpu_usage_percent) diff --git a/src/widgets/process_table/sort_table.rs b/src/widgets/process_table/sort_table.rs index 5ace0ecb..24febb29 100644 --- a/src/widgets/process_table/sort_table.rs +++ b/src/widgets/process_table/sort_table.rs @@ -4,7 +4,7 @@ use tui::text::Text; use crate::{ components::data_table::{ColumnHeader, DataTableColumn, DataToCell}, - utils::gen_util::truncate_text, + utils::gen_util::truncate_to_text, }; pub struct SortTableColumn; @@ -17,7 +17,7 @@ impl ColumnHeader for SortTableColumn { impl DataToCell for &'static str { fn to_cell<'a>(&'a self, _column: &SortTableColumn, calculated_width: u16) -> Option> { - Some(truncate_text(self, calculated_width)) + Some(truncate_to_text(self, calculated_width)) } fn column_widths>(data: &[Self], _columns: &[C]) -> Vec @@ -30,7 +30,7 @@ impl DataToCell for &'static str { impl DataToCell for Cow<'static, str> { fn to_cell<'a>(&'a self, _column: &SortTableColumn, calculated_width: u16) -> Option> { - Some(truncate_text(self, calculated_width)) + Some(truncate_to_text(self, calculated_width)) } fn column_widths>(data: &[Self], _columns: &[C]) -> Vec diff --git a/src/widgets/temperature_table.rs b/src/widgets/temperature_table.rs index b7ee16a2..42103dd2 100644 --- a/src/widgets/temperature_table.rs +++ b/src/widgets/temperature_table.rs @@ -11,7 +11,7 @@ use crate::{ ColumnHeader, DataTableColumn, DataTableProps, DataTableStyling, DataToCell, SortColumn, SortDataTable, SortDataTableProps, SortOrder, SortsRow, }, - utils::gen_util::{sort_partial_fn, truncate_text}, + utils::gen_util::{sort_partial_fn, truncate_to_text}, }; #[derive(Clone, Debug)] @@ -50,8 +50,8 @@ impl TempWidgetData { impl DataToCell for TempWidgetData { fn to_cell<'a>(&'a self, column: &TempWidgetColumn, calculated_width: u16) -> Option> { Some(match column { - TempWidgetColumn::Sensor => truncate_text(&self.sensor, calculated_width), - TempWidgetColumn::Temp => truncate_text(&self.temperature(), calculated_width), + TempWidgetColumn::Sensor => truncate_to_text(&self.sensor, calculated_width), + TempWidgetColumn::Temp => truncate_to_text(&self.temperature(), calculated_width), }) }