diff --git a/Cargo.lock b/Cargo.lock index 29b3902d..92c6dfca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -189,6 +189,7 @@ dependencies = [ "thiserror", "time", "toml_edit", + "unicode-ellipsis", "unicode-segmentation", "unicode-width", "windows 0.56.0", @@ -1476,6 +1477,16 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +[[package]] +name = "unicode-ellipsis" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c05e96c2778c2a4df66c381f3d68d143a7c863e6b7573c3654e116d4788c943" +dependencies = [ + "unicode-segmentation", + "unicode-width", +] + [[package]] name = "unicode-ident" version = "1.0.12" diff --git a/Cargo.toml b/Cargo.toml index d3979b2d..7b9a1a40 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,6 +96,7 @@ thiserror = "1.0.59" time = { version = "0.3.36", features = ["formatting", "macros"] } toml_edit = { version = "0.22.12", features = ["serde"] } tui = { version = "0.26.2", package = "ratatui" } +unicode-ellipsis = "0.1.4" unicode-segmentation = "1.11.0" unicode-width = "0.1.12" diff --git a/src/app/states.rs b/src/app/states.rs index ee69d90c..0088e53d 100644 --- a/src/app/states.rs +++ b/src/app/states.rs @@ -2,12 +2,12 @@ use std::{ops::Range, time::Instant}; use hashbrown::HashMap; use indexmap::IndexMap; +use unicode_ellipsis::grapheme_width; use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete, UnicodeSegmentation}; use crate::{ app::{layout_manager::BottomWidgetType, query::*}, constants, - utils::strings::str_width, widgets::{ BatteryWidgetState, CpuWidgetState, DiskTableWidget, MemWidgetState, NetWidgetState, ProcWidgetState, TempWidgetState, @@ -250,7 +250,7 @@ impl AppSearchState { for (index, grapheme) in UnicodeSegmentation::grapheme_indices(self.current_search_query.as_str(), true) { - let width = str_width(grapheme); + let width = grapheme_width(grapheme); let end = curr_offset + width; self.size_mappings.insert(index, curr_offset..end); diff --git a/src/utils/general.rs b/src/utils/general.rs index 6f3d14b4..789c73f5 100644 --- a/src/utils/general.rs +++ b/src/utils/general.rs @@ -88,4 +88,22 @@ mod test { let val: usize = 100; assert_eq!(val.clamp_lower(50), 100); } + + #[test] + fn test_sort_partial_fn() { + let mut x = vec![9, 5, 20, 15, 10, 5]; + let mut y = vec![1.0, 15.0, -1.0, -100.0, -100.1, 16.15, -100.0]; + + x.sort_by(|a, b| sort_partial_fn(false)(a, b)); + assert_eq!(x, vec![5, 5, 9, 10, 15, 20]); + + x.sort_by(|a, b| sort_partial_fn(true)(a, b)); + assert_eq!(x, vec![20, 15, 10, 9, 5, 5]); + + y.sort_by(|a, b| sort_partial_fn(false)(a, b)); + assert_eq!(y, vec![-100.1, -100.0, -100.0, -1.0, 1.0, 15.0, 16.15]); + + y.sort_by(|a, b| sort_partial_fn(true)(a, b)); + assert_eq!(y, vec![16.15, 15.0, 1.0, -1.0, -100.0, -100.0, -100.1]); + } } diff --git a/src/utils/strings.rs b/src/utils/strings.rs index b7c54188..1d970c9f 100644 --- a/src/utils/strings.rs +++ b/src/utils/strings.rs @@ -1,169 +1,12 @@ -use std::num::NonZeroUsize; - use tui::text::Text; -use unicode_segmentation::UnicodeSegmentation; -use unicode_width::UnicodeWidthStr; +use unicode_ellipsis::truncate_str; /// Truncates text if it is too long, and adds an ellipsis at the end if needed. /// /// TODO: Maybe cache results from this function for some cases? e.g. columns #[inline] pub fn truncate_to_text<'a, U: Into>(content: &str, width: U) -> Text<'a> { - Text::raw(truncate_str(content, width)) -} - -/// Returns the width of a str `s`. This takes into account some things like -/// joiners when calculating width. -#[inline] -pub fn str_width(s: &str) -> usize { - UnicodeSegmentation::graphemes(s, true) - .map(|g| { - if g.contains('\u{200d}') { - 2 - } else { - UnicodeWidthStr::width(g) - } - }) - .sum() -} - -/// Returns the "width" of grapheme `g`. This takes into account some things like -/// joiners when calculating width. -/// -/// Note that while you *can* pass in an entire string, the point is to check -/// individual graphemes (e.g. `"a"`, `"💎"`, `"大"`, `"🇨🇦"`). -#[inline] -fn grapheme_width(g: &str) -> usize { - if g.contains('\u{200d}') { - 2 - } else { - UnicodeWidthStr::width(g) - } -} - -enum AsciiIterationResult { - Complete, - Remaining(usize), -} - -/// Greedily add characters to the output until a non-ASCII grapheme is found, or -/// the output is `width` long. -#[inline] -fn greedy_ascii_add(content: &str, width: NonZeroUsize) -> (String, AsciiIterationResult) { - let width: usize = width.into(); - - const SIZE_OF_ELLIPSIS: usize = 3; - let mut text = Vec::with_capacity(width - 1 + SIZE_OF_ELLIPSIS); - - let s = content.as_bytes(); - - let mut current_index = 0; - - while current_index < width - 1 { - let current_byte = s[current_index]; - if current_byte.is_ascii() { - text.push(current_byte); - current_index += 1; - } else { - debug_assert!(text.is_ascii()); - - let current_index = AsciiIterationResult::Remaining(current_index); - - // SAFETY: This conversion is safe to do unchecked, we only push ASCII characters up to - // this point. - let current_text = unsafe { String::from_utf8_unchecked(text) }; - - return (current_text, current_index); - } - } - - // If we made it all the way through, then we probably hit the width limit. - debug_assert!(text.is_ascii()); - - let current_index = if s[current_index].is_ascii() { - let mut ellipsis = [0; SIZE_OF_ELLIPSIS]; - '…'.encode_utf8(&mut ellipsis); - text.extend_from_slice(&ellipsis); - AsciiIterationResult::Complete - } else { - AsciiIterationResult::Remaining(current_index) - }; - - // SAFETY: This conversion is safe to do unchecked, we only push ASCII characters up to - // this point. - let current_text = unsafe { String::from_utf8_unchecked(text) }; - - (current_text, current_index) -} - -/// Truncates a string to the specified width with an ellipsis character. -/// -/// NB: This probably does not handle EVERY case, but I think it handles most cases -/// we will use this function for fine... hopefully. -/// -/// TODO: Maybe fuzz this function? -/// TODO: Maybe release this as a lib? Testing against Fish's script [here](https://github.com/ridiculousfish/widecharwidth) might be useful. -#[inline] -fn truncate_str>(content: &str, width: U) -> String { - let width = width.into(); - - if content.len() <= width { - // If the entire string fits in the width, then we just - // need to copy the entire string over. - - content.to_owned() - } else if let Some(nz_width) = NonZeroUsize::new(width) { - // What we are essentially doing is optimizing for the case that - // most, if not all of the string is ASCII. As such: - // - Step through each byte until (width - 1) is hit or we find a non-ascii - // byte. - // - If the byte is ascii, then add it. - // - // If we didn't get a complete truncated string, then continue on treating the rest as graphemes. - - let (mut text, res) = greedy_ascii_add(content, nz_width); - match res { - AsciiIterationResult::Complete => text, - AsciiIterationResult::Remaining(current_index) => { - let mut curr_width = text.len(); - let mut early_break = false; - - // This tracks the length of the last added string - note this does NOT match the grapheme *width*. - // Since the previous characters are always ASCII, this is always initialized as 1, unless the string - // is empty. - let mut last_added_str_len = if text.is_empty() { 0 } else { 1 }; - - // Cases to handle: - // - Completes adding the entire string. - // - Adds a character up to the boundary, then fails. - // - Adds a character not up to the boundary, then fails. - // Inspired by https://tomdebruijn.com/posts/rust-string-length-width-calculations/ - for g in UnicodeSegmentation::graphemes(&content[current_index..], true) { - let g_width = grapheme_width(g); - - if curr_width + g_width <= width { - curr_width += g_width; - last_added_str_len = g.len(); - text.push_str(g); - } else { - early_break = true; - break; - } - } - - if early_break { - if curr_width == width { - // Remove the last grapheme cluster added. - text.truncate(text.len() - last_added_str_len); - } - text.push('…'); - } - text - } - } - } else { - String::default() - } + Text::raw(truncate_str(content, width.into())) } /// Checks that the first string is equal to any of the other ones in a ASCII case-insensitive match. @@ -191,261 +34,6 @@ macro_rules! multi_eq_ignore_ascii_case { #[cfg(test)] mod tests { - use crate::utils::general::sort_partial_fn; - - use super::*; - - #[test] - fn test_sort_partial_fn() { - let mut x = vec![9, 5, 20, 15, 10, 5]; - let mut y = vec![1.0, 15.0, -1.0, -100.0, -100.1, 16.15, -100.0]; - - x.sort_by(|a, b| sort_partial_fn(false)(a, b)); - assert_eq!(x, vec![5, 5, 9, 10, 15, 20]); - - x.sort_by(|a, b| sort_partial_fn(true)(a, b)); - assert_eq!(x, vec![20, 15, 10, 9, 5, 5]); - - y.sort_by(|a, b| sort_partial_fn(false)(a, b)); - assert_eq!(y, vec![-100.1, -100.0, -100.0, -1.0, 1.0, 15.0, 16.15]); - - y.sort_by(|a, b| sort_partial_fn(true)(a, b)); - assert_eq!(y, vec![16.15, 15.0, 1.0, -1.0, -100.0, -100.0, -100.1]); - } - - #[test] - fn test_truncate_str() { - let cpu_header = "CPU(c)▲"; - - assert_eq!( - truncate_str(cpu_header, 8_usize), - cpu_header, - "should match base string as there is extra room" - ); - - assert_eq!( - truncate_str(cpu_header, 7_usize), - cpu_header, - "should match base string as there is enough room" - ); - - assert_eq!(truncate_str(cpu_header, 6_usize), "CPU(c…"); - assert_eq!(truncate_str(cpu_header, 5_usize), "CPU(…"); - assert_eq!(truncate_str(cpu_header, 4_usize), "CPU…"); - assert_eq!(truncate_str(cpu_header, 1_usize), "…"); - assert_eq!(truncate_str(cpu_header, 0_usize), ""); - } - - #[test] - fn test_truncate_ascii() { - let content = "0123456"; - - assert_eq!( - truncate_str(content, 8_usize), - content, - "should match base string as there is extra room" - ); - - assert_eq!( - truncate_str(content, 7_usize), - content, - "should match base string as there is enough room" - ); - - assert_eq!(truncate_str(content, 6_usize), "01234…"); - assert_eq!(truncate_str(content, 5_usize), "0123…"); - assert_eq!(truncate_str(content, 4_usize), "012…"); - assert_eq!(truncate_str(content, 1_usize), "…"); - assert_eq!(truncate_str(content, 0_usize), ""); - } - - #[test] - fn test_truncate_cjk() { - let cjk = "施氏食獅史"; - - assert_eq!( - truncate_str(cjk, 11_usize), - cjk, - "should match base string as there is extra room" - ); - - assert_eq!( - truncate_str(cjk, 10_usize), - cjk, - "should match base string as there is enough room" - ); - - assert_eq!(truncate_str(cjk, 9_usize), "施氏食獅…"); - assert_eq!(truncate_str(cjk, 8_usize), "施氏食…"); - assert_eq!(truncate_str(cjk, 2_usize), "…"); - assert_eq!(truncate_str(cjk, 1_usize), "…"); - assert_eq!(truncate_str(cjk, 0_usize), ""); - - let cjk_2 = "你好嗎"; - assert_eq!(truncate_str(cjk_2, 5_usize), "你好…"); - assert_eq!(truncate_str(cjk_2, 4_usize), "你…"); - assert_eq!(truncate_str(cjk_2, 3_usize), "你…"); - assert_eq!(truncate_str(cjk_2, 2_usize), "…"); - assert_eq!(truncate_str(cjk_2, 1_usize), "…"); - assert_eq!(truncate_str(cjk_2, 0_usize), ""); - } - - #[test] - fn test_truncate_mixed_one() { - let test = "Test (施氏食獅史) Test"; - - assert_eq!( - truncate_str(test, 30_usize), - test, - "should match base string as there is extra room" - ); - - assert_eq!( - truncate_str(test, 22_usize), - test, - "should match base string as there is just enough room" - ); - - assert_eq!( - truncate_str(test, 21_usize), - "Test (施氏食獅史) Te…", - "should truncate the t and replace the s with ellipsis" - ); - - assert_eq!(truncate_str(test, 20_usize), "Test (施氏食獅史) T…"); - assert_eq!(truncate_str(test, 19_usize), "Test (施氏食獅史) …"); - assert_eq!(truncate_str(test, 18_usize), "Test (施氏食獅史)…"); - assert_eq!(truncate_str(test, 17_usize), "Test (施氏食獅史…"); - assert_eq!(truncate_str(test, 16_usize), "Test (施氏食獅…"); - assert_eq!(truncate_str(test, 15_usize), "Test (施氏食獅…"); - assert_eq!(truncate_str(test, 14_usize), "Test (施氏食…"); - assert_eq!(truncate_str(test, 13_usize), "Test (施氏食…"); - assert_eq!(truncate_str(test, 8_usize), "Test (…"); - assert_eq!(truncate_str(test, 7_usize), "Test (…"); - assert_eq!(truncate_str(test, 6_usize), "Test …"); - assert_eq!(truncate_str(test, 5_usize), "Test…"); - assert_eq!(truncate_str(test, 4_usize), "Tes…"); - } - - #[test] - fn test_truncate_mixed_two() { - let test = "Test (施氏abc食abc獅史) Test"; - - assert_eq!( - truncate_str(test, 30_usize), - test, - "should match base string as there is extra room" - ); - - assert_eq!( - truncate_str(test, 28_usize), - test, - "should match base string as there is just enough room" - ); - - assert_eq!(truncate_str(test, 26_usize), "Test (施氏abc食abc獅史) T…"); - assert_eq!(truncate_str(test, 21_usize), "Test (施氏abc食abc獅…"); - assert_eq!(truncate_str(test, 20_usize), "Test (施氏abc食abc…"); - assert_eq!(truncate_str(test, 16_usize), "Test (施氏abc食…"); - assert_eq!(truncate_str(test, 15_usize), "Test (施氏abc…"); - assert_eq!(truncate_str(test, 14_usize), "Test (施氏abc…"); - assert_eq!(truncate_str(test, 11_usize), "Test (施氏…"); - assert_eq!(truncate_str(test, 10_usize), "Test (施…"); - } - - #[test] - fn test_truncate_flags() { - let flag = "🇨🇦"; - assert_eq!(truncate_str(flag, 3_usize), flag); - assert_eq!(truncate_str(flag, 2_usize), flag); - assert_eq!(truncate_str(flag, 1_usize), "…"); - assert_eq!(truncate_str(flag, 0_usize), ""); - - let flag_text = "oh 🇨🇦"; - assert_eq!(truncate_str(flag_text, 6_usize), flag_text); - assert_eq!(truncate_str(flag_text, 5_usize), flag_text); - assert_eq!(truncate_str(flag_text, 4_usize), "oh …"); - - let flag_text_wrap = "!🇨🇦!"; - assert_eq!(truncate_str(flag_text_wrap, 6_usize), flag_text_wrap); - assert_eq!(truncate_str(flag_text_wrap, 4_usize), flag_text_wrap); - assert_eq!(truncate_str(flag_text_wrap, 3_usize), "!…"); - assert_eq!(truncate_str(flag_text_wrap, 2_usize), "!…"); - assert_eq!(truncate_str(flag_text_wrap, 1_usize), "…"); - - let flag_cjk = "加拿大🇨🇦"; - assert_eq!(truncate_str(flag_cjk, 9_usize), flag_cjk); - assert_eq!(truncate_str(flag_cjk, 8_usize), flag_cjk); - assert_eq!(truncate_str(flag_cjk, 7_usize), "加拿大…"); - assert_eq!(truncate_str(flag_cjk, 6_usize), "加拿…"); - assert_eq!(truncate_str(flag_cjk, 5_usize), "加拿…"); - assert_eq!(truncate_str(flag_cjk, 4_usize), "加…"); - - let flag_mix = "🇨🇦加gaa拿naa大daai🇨🇦"; - assert_eq!(truncate_str(flag_mix, 20_usize), flag_mix); - assert_eq!(truncate_str(flag_mix, 19_usize), "🇨🇦加gaa拿naa大daai…"); - assert_eq!(truncate_str(flag_mix, 18_usize), "🇨🇦加gaa拿naa大daa…"); - assert_eq!(truncate_str(flag_mix, 17_usize), "🇨🇦加gaa拿naa大da…"); - assert_eq!(truncate_str(flag_mix, 15_usize), "🇨🇦加gaa拿naa大…"); - assert_eq!(truncate_str(flag_mix, 14_usize), "🇨🇦加gaa拿naa…"); - assert_eq!(truncate_str(flag_mix, 13_usize), "🇨🇦加gaa拿naa…"); - assert_eq!(truncate_str(flag_mix, 3_usize), "🇨🇦…"); - assert_eq!(truncate_str(flag_mix, 2_usize), "…"); - assert_eq!(truncate_str(flag_mix, 1_usize), "…"); - assert_eq!(truncate_str(flag_mix, 0_usize), ""); - } - - /// This might not be the best way to handle it, but this at least tests that it doesn't crash... - #[test] - fn test_truncate_hindi() { - // cSpell:disable - let test = "हिन्दी"; - assert_eq!(truncate_str(test, 10_usize), test); - assert_eq!(truncate_str(test, 6_usize), "हिन्दी"); - assert_eq!(truncate_str(test, 5_usize), "हिन्दी"); - assert_eq!(truncate_str(test, 4_usize), "हिन्…"); - assert_eq!(truncate_str(test, 3_usize), "हि…"); - assert_eq!(truncate_str(test, 2_usize), "…"); - assert_eq!(truncate_str(test, 1_usize), "…"); - assert_eq!(truncate_str(test, 0_usize), ""); - // cSpell:enable - } - - #[test] - fn truncate_emoji() { - let heart_1 = "♥"; - assert_eq!(truncate_str(heart_1, 2_usize), heart_1); - assert_eq!(truncate_str(heart_1, 1_usize), heart_1); - assert_eq!(truncate_str(heart_1, 0_usize), ""); - - let heart_2 = "❤"; - assert_eq!(truncate_str(heart_2, 2_usize), heart_2); - assert_eq!(truncate_str(heart_2, 1_usize), heart_2); - assert_eq!(truncate_str(heart_2, 0_usize), ""); - - // This one has a U+FE0F modifier at the end, and is thus considered "emoji-presentation", - // see https://github.com/fish-shell/fish-shell/issues/10461#issuecomment-2079624670. - // This shouldn't really be a common issue in a terminal but eh. - let heart_emoji_pres = "❤️"; - assert_eq!(truncate_str(heart_emoji_pres, 2_usize), heart_emoji_pres); - assert_eq!(truncate_str(heart_emoji_pres, 1_usize), "…"); - assert_eq!(truncate_str(heart_emoji_pres, 0_usize), ""); - - let emote = "💎"; - assert_eq!(truncate_str(emote, 2_usize), emote); - assert_eq!(truncate_str(emote, 1_usize), "…"); - assert_eq!(truncate_str(emote, 0_usize), ""); - - let family = "👨‍👨‍👧‍👦"; - assert_eq!(truncate_str(family, 2_usize), family); - assert_eq!(truncate_str(family, 1_usize), "…"); - assert_eq!(truncate_str(family, 0_usize), ""); - - let scientist = "👩‍🔬"; - assert_eq!(truncate_str(scientist, 2_usize), scientist); - assert_eq!(truncate_str(scientist, 1_usize), "…"); - assert_eq!(truncate_str(scientist, 0_usize), ""); - } #[test] fn test_multi_eq_ignore_ascii_case() {