mirror of
https://github.com/alexpasmantier/television.git
synced 2025-06-07 20:15:23 +00:00
tests and docs for strings.rs
This commit is contained in:
parent
6e794e07b7
commit
19b8cb5068
2
TODO.md
2
TODO.md
@ -36,7 +36,7 @@
|
|||||||
- [ ] more syntaxes for the previewer https://www.sublimetext.com/docs/syntax.html#include-syntax
|
- [ ] more syntaxes for the previewer https://www.sublimetext.com/docs/syntax.html#include-syntax
|
||||||
- [ ] more preview colorschemes
|
- [ ] more preview colorschemes
|
||||||
|
|
||||||
## features
|
## feature ideas
|
||||||
|
|
||||||
- [x] environment variables
|
- [x] environment variables
|
||||||
- [x] aliases
|
- [x] aliases
|
||||||
|
@ -1,15 +1,59 @@
|
|||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
|
|
||||||
|
/// Returns the index of the next character boundary in the given string.
|
||||||
|
///
|
||||||
|
/// If the given index is already a character boundary, it is returned as is.
|
||||||
|
/// If the given index is out of bounds, the length of the string is returned.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// use strings::next_char_boundary;
|
||||||
|
///
|
||||||
|
/// let s = "Hello, World!";
|
||||||
|
/// assert_eq!(next_char_boundary(s, 0), 0);
|
||||||
|
/// assert_eq!(next_char_boundary(s, 1), 1);
|
||||||
|
/// assert_eq!(next_char_boundary(s, 13), 13);
|
||||||
|
/// assert_eq!(next_char_boundary(s, 30), 13);
|
||||||
|
///
|
||||||
|
/// let s = "👋🌍!";
|
||||||
|
/// assert_eq!(next_char_boundary(s, 0), 0);
|
||||||
|
/// assert_eq!(next_char_boundary(s, 1), 4);
|
||||||
|
/// assert_eq!(next_char_boundary(s, 4), 4);
|
||||||
|
/// assert_eq!(next_char_boundary(s, 7), 8);
|
||||||
|
/// assert_eq!(next_char_boundary(s, 8), 8);
|
||||||
|
/// ```
|
||||||
pub fn next_char_boundary(s: &str, start: usize) -> usize {
|
pub fn next_char_boundary(s: &str, start: usize) -> usize {
|
||||||
let mut i = start;
|
let mut i = start;
|
||||||
let len = s.len();
|
let len = s.len();
|
||||||
while !s.is_char_boundary(i) && i < len - 1 {
|
if i >= len {
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
while !s.is_char_boundary(i) && i < len {
|
||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
i
|
i
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the index of the previous character boundary in the given string.
|
||||||
|
///
|
||||||
|
/// If the given index is already a character boundary, it is returned as is.
|
||||||
|
/// If the given index is out of bounds, 0 is returned.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// use strings::prev_char_boundary;
|
||||||
|
///
|
||||||
|
/// let s = "Hello, World!";
|
||||||
|
/// assert_eq!(prev_char_boundary(s, 0), 0);
|
||||||
|
/// assert_eq!(prev_char_boundary(s, 1), 1);
|
||||||
|
/// assert_eq!(prev_char_boundary(s, 5), 5);
|
||||||
|
///
|
||||||
|
/// let s = "👋🌍!";
|
||||||
|
/// assert_eq!(prev_char_boundary(s, 0), 0);
|
||||||
|
/// assert_eq!(prev_char_boundary(s, 4), 4);
|
||||||
|
/// assert_eq!(prev_char_boundary(s, 6), 4);
|
||||||
|
/// ```
|
||||||
pub fn prev_char_boundary(s: &str, start: usize) -> usize {
|
pub fn prev_char_boundary(s: &str, start: usize) -> usize {
|
||||||
let mut i = start;
|
let mut i = start;
|
||||||
while !s.is_char_boundary(i) && i > 0 {
|
while !s.is_char_boundary(i) && i > 0 {
|
||||||
@ -18,6 +62,24 @@ pub fn prev_char_boundary(s: &str, start: usize) -> usize {
|
|||||||
i
|
i
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns a slice of the given string that starts and ends at character boundaries.
|
||||||
|
///
|
||||||
|
/// If the given start index is greater than the end index, or if either index is out of bounds,
|
||||||
|
/// an empty string is returned.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// use strings::slice_at_char_boundaries;
|
||||||
|
///
|
||||||
|
/// let s = "Hello, World!";
|
||||||
|
/// assert_eq!(slice_at_char_boundaries(s, 0, 0), "");
|
||||||
|
/// assert_eq!(slice_at_char_boundaries(s, 0, 1), "H");
|
||||||
|
///
|
||||||
|
/// let s = "👋🌍!";
|
||||||
|
/// assert_eq!(slice_at_char_boundaries(s, 0, 0), "");
|
||||||
|
/// assert_eq!(slice_at_char_boundaries(s, 0, 2), "👋");
|
||||||
|
/// assert_eq!(slice_at_char_boundaries(s, 0, 5), "👋🌍");
|
||||||
|
/// ```
|
||||||
pub fn slice_at_char_boundaries(
|
pub fn slice_at_char_boundaries(
|
||||||
s: &str,
|
s: &str,
|
||||||
start_byte_index: usize,
|
start_byte_index: usize,
|
||||||
@ -33,14 +95,33 @@ pub fn slice_at_char_boundaries(
|
|||||||
..next_char_boundary(s, end_byte_index)]
|
..next_char_boundary(s, end_byte_index)]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns a slice of the given string that starts at the beginning and ends at a character
|
||||||
|
/// boundary.
|
||||||
|
///
|
||||||
|
/// If the given index is out of bounds, the whole string is returned.
|
||||||
|
/// If the given index is already a character boundary, the string up to that index is returned.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// use strings::slice_up_to_char_boundary;
|
||||||
|
///
|
||||||
|
/// let s = "Hello, World!";
|
||||||
|
/// assert_eq!(slice_up_to_char_boundary(s, 0), "");
|
||||||
|
/// assert_eq!(slice_up_to_char_boundary(s, 1), "H");
|
||||||
|
/// assert_eq!(slice_up_to_char_boundary(s, 13), "Hello, World!");
|
||||||
|
///
|
||||||
|
/// let s = "👋
|
||||||
|
/// 🌍!";
|
||||||
|
/// assert_eq!(slice_up_to_char_boundary(s, 0), "");
|
||||||
|
/// assert_eq!(slice_up_to_char_boundary(s, 1), "👋");
|
||||||
|
/// assert_eq!(slice_up_to_char_boundary(s, 4), "👋");
|
||||||
|
/// assert_eq!(slice_up_to_char_boundary(s, 7), "👋🌍");
|
||||||
|
/// ```
|
||||||
pub fn slice_up_to_char_boundary(s: &str, byte_index: usize) -> &str {
|
pub fn slice_up_to_char_boundary(s: &str, byte_index: usize) -> &str {
|
||||||
let mut char_index = byte_index;
|
&s[..next_char_boundary(s, byte_index)]
|
||||||
while !s.is_char_boundary(char_index) {
|
|
||||||
char_index -= 1;
|
|
||||||
}
|
|
||||||
&s[..char_index]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Attempts to parse a UTF-8 character from the given byte slice.
|
||||||
fn try_parse_utf8_char(input: &[u8]) -> Option<(char, usize)> {
|
fn try_parse_utf8_char(input: &[u8]) -> Option<(char, usize)> {
|
||||||
let str_from_utf8 = |seq| std::str::from_utf8(seq).ok();
|
let str_from_utf8 = |seq| std::str::from_utf8(seq).ok();
|
||||||
|
|
||||||
@ -56,6 +137,7 @@ fn try_parse_utf8_char(input: &[u8]) -> Option<(char, usize)> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
|
/// The Unicode symbol to use for non-printable characters.
|
||||||
static ref NULL_SYMBOL: char = char::from_u32(0x2400).unwrap();
|
static ref NULL_SYMBOL: char = char::from_u32(0x2400).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -71,6 +153,35 @@ const NULL_CHARACTER: char = '\x00';
|
|||||||
const UNIT_SEPARATOR_CHARACTER: char = '\u{001F}';
|
const UNIT_SEPARATOR_CHARACTER: char = '\u{001F}';
|
||||||
const APPLICATION_PROGRAM_COMMAND_CHARACTER: char = '\u{009F}';
|
const APPLICATION_PROGRAM_COMMAND_CHARACTER: char = '\u{009F}';
|
||||||
|
|
||||||
|
/// Replaces non-printable characters in the given byte slice with default printable characters.
|
||||||
|
///
|
||||||
|
/// The tab width is used to determine how many spaces to replace a tab character with.
|
||||||
|
/// The default printable character for non-printable characters is the Unicode symbol for NULL.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// use strings::replace_non_printable;
|
||||||
|
///
|
||||||
|
/// let input = b"Hello, World!";
|
||||||
|
/// let output = replace_non_printable(input, 2);
|
||||||
|
/// assert_eq!(output, "Hello, World!");
|
||||||
|
///
|
||||||
|
/// let input = b"Hello\tWorld!";
|
||||||
|
/// let output = replace_non_printable(input, 2);
|
||||||
|
/// assert_eq!(output, "Hello World!");
|
||||||
|
///
|
||||||
|
/// let input = b"Hello\nWorld!";
|
||||||
|
/// let output = replace_non_printable(input, 2);
|
||||||
|
/// assert_eq!(output, "HelloWorld!");
|
||||||
|
///
|
||||||
|
/// let input = b"Hello\x00World!";
|
||||||
|
/// let output = replace_non_printable(input, 2);
|
||||||
|
/// assert_eq!(output, "Hello␀World!");
|
||||||
|
///
|
||||||
|
/// let input = b"Hello\x7FWorld!";
|
||||||
|
/// let output = replace_non_printable(input, 2);
|
||||||
|
/// assert_eq!(output, "Hello␀World!");
|
||||||
|
/// ```
|
||||||
pub fn replace_non_printable(input: &[u8], tab_width: usize) -> String {
|
pub fn replace_non_printable(input: &[u8], tab_width: usize) -> String {
|
||||||
let mut output = String::new();
|
let mut output = String::new();
|
||||||
|
|
||||||
@ -120,10 +231,30 @@ pub fn replace_non_printable(input: &[u8], tab_width: usize) -> String {
|
|||||||
/// based on a sample of its contents.
|
/// based on a sample of its contents.
|
||||||
pub const PRINTABLE_ASCII_THRESHOLD: f32 = 0.7;
|
pub const PRINTABLE_ASCII_THRESHOLD: f32 = 0.7;
|
||||||
|
|
||||||
|
/// Returns the proportion of printable ASCII characters in the given buffer.
|
||||||
|
///
|
||||||
|
/// This really is a cheap way to determine if a buffer is likely to be a text file.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// use strings::proportion_of_printable_ascii_characters;
|
||||||
|
///
|
||||||
|
/// let buffer = b"Hello, World!";
|
||||||
|
/// let proportion = proportion_of_printable_ascii_characters(buffer);
|
||||||
|
/// assert_eq!(proportion, 1.0);
|
||||||
|
///
|
||||||
|
/// let buffer = b"Hello, World!\x00";
|
||||||
|
/// let proportion = proportion_of_printable_ascii_characters(buffer);
|
||||||
|
/// assert_eq!(proportion, 0.9285714);
|
||||||
|
///
|
||||||
|
/// let buffer = b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F";
|
||||||
|
/// let proportion = proportion_of_printable_ascii_characters(buffer);
|
||||||
|
/// assert_eq!(proportion, 0.0);
|
||||||
|
/// ```
|
||||||
pub fn proportion_of_printable_ascii_characters(buffer: &[u8]) -> f32 {
|
pub fn proportion_of_printable_ascii_characters(buffer: &[u8]) -> f32 {
|
||||||
let mut printable = 0;
|
let mut printable: usize = 0;
|
||||||
for &byte in buffer {
|
for &byte in buffer {
|
||||||
if byte > 32 && byte < 127 {
|
if (32..127).contains(&byte) {
|
||||||
printable += 1;
|
printable += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -132,6 +263,27 @@ pub fn proportion_of_printable_ascii_characters(buffer: &[u8]) -> f32 {
|
|||||||
|
|
||||||
const MAX_LINE_LENGTH: usize = 300;
|
const MAX_LINE_LENGTH: usize = 300;
|
||||||
|
|
||||||
|
/// Preprocesses a line of text for display.
|
||||||
|
///
|
||||||
|
/// This function trims the line, replaces non-printable characters, and truncates the line if it
|
||||||
|
/// is too long.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// use strings::preprocess_line;
|
||||||
|
///
|
||||||
|
/// let line = "Hello, World!";
|
||||||
|
/// let processed = preprocess_line(line);
|
||||||
|
/// assert_eq!(processed, "Hello, World!");
|
||||||
|
///
|
||||||
|
/// let line = "\x00World\x7F!";
|
||||||
|
/// let processed = preprocess_line(line);
|
||||||
|
/// assert_eq!(processed, "␀World␀!");
|
||||||
|
///
|
||||||
|
/// let line = "a".repeat(400);
|
||||||
|
/// let processed = preprocess_line(&line);
|
||||||
|
/// assert_eq!(processed.len(), 300);
|
||||||
|
/// ```
|
||||||
pub fn preprocess_line(line: &str) -> String {
|
pub fn preprocess_line(line: &str) -> String {
|
||||||
replace_non_printable(
|
replace_non_printable(
|
||||||
{
|
{
|
||||||
@ -147,6 +299,20 @@ pub fn preprocess_line(line: &str) -> String {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Shrink a string to a maximum length, adding an ellipsis in the middle.
|
||||||
|
///
|
||||||
|
/// If the string is shorter than the maximum length, it is returned as is.
|
||||||
|
/// If the string is longer than the maximum length, it is shortened and an ellipsis is added in
|
||||||
|
/// the middle.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// ```
|
||||||
|
/// use strings::shrink_with_ellipsis;
|
||||||
|
///
|
||||||
|
/// let s = "Hello, World!";
|
||||||
|
/// assert_eq!(shrink_with_ellipsis(s, 13), "Hello, World!");
|
||||||
|
/// assert_eq!(shrink_with_ellipsis(s, 6), "He…d!");
|
||||||
|
/// ```
|
||||||
pub fn shrink_with_ellipsis(s: &str, max_length: usize) -> String {
|
pub fn shrink_with_ellipsis(s: &str, max_length: usize) -> String {
|
||||||
if s.len() <= max_length {
|
if s.len() <= max_length {
|
||||||
return s.to_string();
|
return s.to_string();
|
||||||
@ -163,6 +329,79 @@ pub fn shrink_with_ellipsis(s: &str, max_length: usize) -> String {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
fn test_next_char_boundary(input: &str, start: usize, expected: usize) {
|
||||||
|
let actual = next_char_boundary(input, start);
|
||||||
|
assert_eq!(actual, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_next_char_boundary_ascii() {
|
||||||
|
test_next_char_boundary("Hello, World!", 0, 0);
|
||||||
|
test_next_char_boundary("Hello, World!", 1, 1);
|
||||||
|
test_next_char_boundary("Hello, World!", 13, 13);
|
||||||
|
test_next_char_boundary("Hello, World!", 30, 13);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_next_char_boundary_emoji() {
|
||||||
|
test_next_char_boundary("👋🌍!", 0, 0);
|
||||||
|
test_next_char_boundary("👋🌍!", 1, 4);
|
||||||
|
test_next_char_boundary("👋🌍!", 4, 4);
|
||||||
|
test_next_char_boundary("👋🌍!", 8, 8);
|
||||||
|
test_next_char_boundary("👋🌍!", 7, 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_previous_char_boundary(
|
||||||
|
input: &str,
|
||||||
|
start: usize,
|
||||||
|
expected: usize,
|
||||||
|
) {
|
||||||
|
let actual = prev_char_boundary(input, start);
|
||||||
|
assert_eq!(actual, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_previous_char_boundary_ascii() {
|
||||||
|
test_previous_char_boundary("Hello, World!", 0, 0);
|
||||||
|
test_previous_char_boundary("Hello, World!", 1, 1);
|
||||||
|
test_previous_char_boundary("Hello, World!", 5, 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_previous_char_boundary_emoji() {
|
||||||
|
test_previous_char_boundary("👋🌍!", 0, 0);
|
||||||
|
test_previous_char_boundary("👋🌍!", 4, 4);
|
||||||
|
test_previous_char_boundary("👋🌍!", 6, 4);
|
||||||
|
test_previous_char_boundary("👋🌍!", 8, 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_slice_at_char_boundaries(
|
||||||
|
input: &str,
|
||||||
|
start: usize,
|
||||||
|
end: usize,
|
||||||
|
expected: &str,
|
||||||
|
) {
|
||||||
|
let actual = slice_at_char_boundaries(input, start, end);
|
||||||
|
assert_eq!(actual, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_slice_at_char_boundaries_ascii() {
|
||||||
|
test_slice_at_char_boundaries("Hello, World!", 0, 0, "");
|
||||||
|
test_slice_at_char_boundaries("Hello, World!", 0, 1, "H");
|
||||||
|
test_slice_at_char_boundaries("Hello, World!", 0, 13, "Hello, World!");
|
||||||
|
test_slice_at_char_boundaries("Hello, World!", 0, 30, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_slice_at_char_boundaries_emoji() {
|
||||||
|
test_slice_at_char_boundaries("👋🌍!", 0, 0, "");
|
||||||
|
test_slice_at_char_boundaries("👋🌍!", 0, 4, "👋");
|
||||||
|
test_slice_at_char_boundaries("👋🌍!", 0, 8, "👋🌍");
|
||||||
|
test_slice_at_char_boundaries("👋🌍!", 0, 7, "👋🌍");
|
||||||
|
test_slice_at_char_boundaries("👋🌍!", 0, 9, "👋🌍!");
|
||||||
|
}
|
||||||
|
|
||||||
fn test_replace_non_printable(input: &str, expected: &str) {
|
fn test_replace_non_printable(input: &str, expected: &str) {
|
||||||
let actual = replace_non_printable(input.as_bytes(), 2);
|
let actual = replace_non_printable(input.as_bytes(), 2);
|
||||||
assert_eq!(actual, expected);
|
assert_eq!(actual, expected);
|
||||||
@ -207,4 +446,41 @@ mod tests {
|
|||||||
fn test_replace_non_printable_start_txt() {
|
fn test_replace_non_printable_start_txt() {
|
||||||
test_replace_non_printable("Àì", "Àì␀");
|
test_replace_non_printable("Àì", "Àì␀");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn test_proportion_of_printable_ascii_characters(
|
||||||
|
input: &str,
|
||||||
|
expected: f32,
|
||||||
|
) {
|
||||||
|
let actual =
|
||||||
|
proportion_of_printable_ascii_characters(input.as_bytes());
|
||||||
|
assert_eq!(actual, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_proportion_of_printable_ascii_characters_ascii() {
|
||||||
|
test_proportion_of_printable_ascii_characters("Hello, World!", 1.0);
|
||||||
|
test_proportion_of_printable_ascii_characters(
|
||||||
|
"Hello, World!\x00",
|
||||||
|
0.9285714,
|
||||||
|
);
|
||||||
|
test_proportion_of_printable_ascii_characters(
|
||||||
|
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
|
||||||
|
0.0,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_preprocess_line(input: &str, expected: &str) {
|
||||||
|
let actual = preprocess_line(input);
|
||||||
|
assert_eq!(actual, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_preprocess_line_cases() {
|
||||||
|
test_preprocess_line("Hello, World!", "Hello, World!");
|
||||||
|
test_preprocess_line("Hello, World!\n", "Hello, World!");
|
||||||
|
test_preprocess_line("Hello, World!\x00", "Hello, World!");
|
||||||
|
test_preprocess_line("Hello, World!\x7F", "Hello, World!␀");
|
||||||
|
test_preprocess_line("Hello, World!\u{FEFF}", "Hello, World!");
|
||||||
|
test_preprocess_line(&"a".repeat(400), &"a".repeat(300));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user