refactoring matcher

This commit is contained in:
Alexandre Pasmantier 2024-11-05 00:31:23 +01:00
parent 19b8cb5068
commit 635ea8a774
9 changed files with 493 additions and 540 deletions

View File

@ -1,12 +1,9 @@
use std::sync::Arc;
use crate::fuzzy::matcher::{Config, Injector, Matcher};
use devicons::FileIcon;
use nucleo::{Config, Injector, Nucleo};
use tracing::debug;
use crate::channels::OnAir;
use crate::entry::Entry;
use crate::fuzzy::MATCHER;
use crate::previewers::PreviewType;
use crate::utils::indices::sep_name_and_value_indices;
use crate::utils::strings::preprocess_line;
@ -24,12 +21,8 @@ impl Alias {
}
pub struct Channel {
matcher: Nucleo<Alias>,
last_pattern: String,
matcher: Matcher<Alias>,
file_icon: FileIcon,
result_count: u32,
total_count: u32,
running: bool,
}
const NUM_THREADS: usize = 1;
@ -54,26 +47,15 @@ fn get_raw_aliases(shell: &str) -> Vec<String> {
impl Channel {
pub fn new() -> Self {
let matcher = Nucleo::new(
Config::DEFAULT,
Arc::new(|| {}),
Some(NUM_THREADS),
1,
);
let matcher = Matcher::new(Config::default().n_threads(NUM_THREADS));
let injector = matcher.injector();
tokio::spawn(load_aliases(injector));
Self {
matcher,
last_pattern: String::new(),
file_icon: FileIcon::from(FILE_ICON_STR),
result_count: 0,
total_count: 0,
running: false,
}
}
const MATCHER_TICK_TIMEOUT: u64 = 2;
}
impl Default for Channel {
@ -84,59 +66,29 @@ impl Default for Channel {
impl OnAir for Channel {
fn find(&mut self, pattern: &str) {
if pattern != self.last_pattern {
self.matcher.pattern.reparse(
0,
pattern,
nucleo::pattern::CaseMatching::Smart,
nucleo::pattern::Normalization::Smart,
pattern.starts_with(&self.last_pattern),
);
self.last_pattern = pattern.to_string();
}
self.matcher.find(pattern);
}
fn results(&mut self, num_entries: u32, offset: u32) -> Vec<Entry> {
let status = self.matcher.tick(Self::MATCHER_TICK_TIMEOUT);
let snapshot = self.matcher.snapshot();
if status.changed {
self.result_count = snapshot.matched_item_count();
self.total_count = snapshot.item_count();
}
self.running = status.running;
let mut col_indices = Vec::new();
let mut matcher = MATCHER.lock();
let icon = self.file_icon;
snapshot
.matched_items(
offset
..(num_entries + offset)
.min(snapshot.matched_item_count()),
)
.map(move |item| {
snapshot.pattern().column_pattern(0).indices(
item.matcher_columns[0].slice(..),
&mut matcher,
&mut col_indices,
);
col_indices.sort_unstable();
col_indices.dedup();
self.matcher.tick();
self.matcher
.results(num_entries, offset)
.into_iter()
.map(|item| {
let (
name_indices,
value_indices,
should_add_name_indices,
should_add_value_indices,
) = sep_name_and_value_indices(
&mut col_indices,
u32::try_from(item.data.name.len()).unwrap(),
&mut item.match_indices.iter().map(|i| i.0).collect(),
u32::try_from(item.inner.name.len()).unwrap(),
);
let mut entry =
Entry::new(item.data.name.clone(), PreviewType::EnvVar)
.with_value(item.data.value.clone())
.with_icon(icon);
Entry::new(item.inner.name.clone(), PreviewType::EnvVar)
.with_value(item.inner.value.clone())
.with_icon(self.file_icon);
if should_add_name_indices {
entry = entry.with_name_match_ranges(
@ -159,24 +111,23 @@ impl OnAir for Channel {
}
fn get_result(&self, index: u32) -> Option<Entry> {
let snapshot = self.matcher.snapshot();
snapshot.get_matched_item(index).map(|item| {
Entry::new(item.data.name.clone(), PreviewType::EnvVar)
.with_value(item.data.value.clone())
self.matcher.get_result(index).map(|item| {
Entry::new(item.inner.name.clone(), PreviewType::EnvVar)
.with_value(item.inner.value.clone())
.with_icon(self.file_icon)
})
}
fn result_count(&self) -> u32 {
self.result_count
self.matcher.matched_item_count
}
fn total_count(&self) -> u32 {
self.total_count
self.matcher.total_item_count
}
fn running(&self) -> bool {
self.running
self.matcher.status.running
}
fn shutdown(&self) {}
@ -204,7 +155,7 @@ async fn load_aliases(injector: Injector<Alias>) {
None
})
.for_each(|alias| {
let _ = injector.push(alias.clone(), |_, cols| {
let () = injector.push(alias.clone(), |_, cols| {
cols[0] = (alias.name.clone() + &alias.value).into();
});
});

View File

@ -1,17 +1,13 @@
use devicons::FileIcon;
use nucleo::{
pattern::{CaseMatching, Normalization},
Config, Nucleo,
};
use std::sync::Arc;
use super::OnAir;
use crate::entry::Entry;
use crate::fuzzy::MATCHER;
use crate::fuzzy::matcher::{Config, Matcher};
use crate::previewers::PreviewType;
use crate::utils::indices::sep_name_and_value_indices;
use crate::utils::strings::preprocess_line;
#[derive(Debug, Clone)]
struct EnvVar {
name: String,
value: String,
@ -19,12 +15,8 @@ struct EnvVar {
#[allow(clippy::module_name_repetitions)]
pub struct Channel {
matcher: Nucleo<EnvVar>,
last_pattern: String,
matcher: Matcher<EnvVar>,
file_icon: FileIcon,
result_count: u32,
total_count: u32,
running: bool,
}
const NUM_THREADS: usize = 1;
@ -32,15 +24,10 @@ const FILE_ICON_STR: &str = "config";
impl Channel {
pub fn new() -> Self {
let matcher = Nucleo::new(
Config::DEFAULT,
Arc::new(|| {}),
Some(NUM_THREADS),
1,
);
let matcher = Matcher::new(Config::default().n_threads(NUM_THREADS));
let injector = matcher.injector();
for (name, value) in std::env::vars() {
let _ = injector.push(
let () = injector.push(
EnvVar {
name: preprocess_line(&name),
value: preprocess_line(&value),
@ -52,15 +39,9 @@ impl Channel {
}
Channel {
matcher,
last_pattern: String::new(),
file_icon: FileIcon::from(FILE_ICON_STR),
result_count: 0,
total_count: 0,
running: false,
}
}
const MATCHER_TICK_TIMEOUT: u64 = 2;
}
impl Default for Channel {
@ -71,59 +52,29 @@ impl Default for Channel {
impl OnAir for Channel {
fn find(&mut self, pattern: &str) {
if pattern != self.last_pattern {
self.matcher.pattern.reparse(
0,
pattern,
CaseMatching::Smart,
Normalization::Smart,
pattern.starts_with(&self.last_pattern),
);
self.last_pattern = pattern.to_string();
}
self.matcher.find(pattern);
}
fn results(&mut self, num_entries: u32, offset: u32) -> Vec<Entry> {
let status = self.matcher.tick(Self::MATCHER_TICK_TIMEOUT);
let snapshot = self.matcher.snapshot();
if status.changed {
self.result_count = snapshot.matched_item_count();
self.total_count = snapshot.item_count();
}
self.running = status.running;
let mut col_indices = Vec::new();
let mut matcher = MATCHER.lock();
let icon = self.file_icon;
snapshot
.matched_items(
offset
..(num_entries + offset)
.min(snapshot.matched_item_count()),
)
.map(move |item| {
snapshot.pattern().column_pattern(0).indices(
item.matcher_columns[0].slice(..),
&mut matcher,
&mut col_indices,
);
col_indices.sort_unstable();
col_indices.dedup();
self.matcher.tick();
self.matcher
.results(num_entries, offset)
.into_iter()
.map(|item| {
let (
name_indices,
value_indices,
should_add_name_indices,
should_add_value_indices,
) = sep_name_and_value_indices(
&mut col_indices,
u32::try_from(item.data.name.len()).unwrap(),
&mut item.match_indices.iter().map(|i| i.0).collect(),
u32::try_from(item.inner.name.len()).unwrap(),
);
let mut entry =
Entry::new(item.data.name.clone(), PreviewType::EnvVar)
.with_value(item.data.value.clone())
.with_icon(icon);
Entry::new(item.inner.name.clone(), PreviewType::EnvVar)
.with_value(item.inner.value.clone())
.with_icon(self.file_icon);
if should_add_name_indices {
entry = entry.with_name_match_ranges(
@ -146,26 +97,23 @@ impl OnAir for Channel {
}
fn get_result(&self, index: u32) -> Option<Entry> {
let snapshot = self.matcher.snapshot();
snapshot.get_matched_item(index).map(|item| {
let name = item.data.name.clone();
let value = item.data.value.clone();
Entry::new(name, PreviewType::EnvVar)
.with_value(value)
self.matcher.get_result(index).map(|item| {
Entry::new(item.inner.name.clone(), PreviewType::EnvVar)
.with_value(item.inner.value.clone())
.with_icon(self.file_icon)
})
}
fn result_count(&self) -> u32 {
self.result_count
self.matcher.matched_item_count
}
fn total_count(&self) -> u32 {
self.total_count
self.matcher.total_item_count
}
fn running(&self) -> bool {
self.running
self.matcher.status.running
}
fn shutdown(&self) {}

View File

@ -1,23 +1,15 @@
use super::{OnAir, TelevisionChannel};
use crate::channels::{OnAir, TelevisionChannel};
use crate::entry::Entry;
use crate::fuzzy::MATCHER;
use crate::fuzzy::matcher::{Config, Injector, Matcher};
use crate::previewers::PreviewType;
use crate::utils::files::{walk_builder, DEFAULT_NUM_THREADS};
use crate::utils::strings::preprocess_line;
use devicons::FileIcon;
use nucleo::{
pattern::{CaseMatching, Normalization},
Config, Injector, Nucleo,
};
use std::collections::HashSet;
use std::{path::PathBuf, sync::Arc};
use std::path::PathBuf;
pub struct Channel {
matcher: Nucleo<String>,
last_pattern: String,
result_count: u32,
total_count: u32,
running: bool,
matcher: Matcher<String>,
crawl_handle: tokio::task::JoinHandle<()>,
// PERF: cache results (to make deleting characters smoother) with
// a shallow stack of sub-patterns as keys (e.g. "a", "ab", "abc")
@ -25,25 +17,14 @@ pub struct Channel {
impl Channel {
pub fn new(paths: Vec<PathBuf>) -> Self {
let matcher = Nucleo::new(
Config::DEFAULT.match_paths(),
Arc::new(|| {}),
None,
1,
);
let matcher = Matcher::new(Config::default().match_paths(true));
// start loading files in the background
let crawl_handle = tokio::spawn(load_files(paths, matcher.injector()));
Channel {
matcher,
last_pattern: String::new(),
result_count: 0,
total_count: 0,
running: false,
crawl_handle,
}
}
const MATCHER_TICK_TIMEOUT: u64 = 2;
}
impl Default for Channel {
@ -91,74 +72,41 @@ impl From<&mut TelevisionChannel> for Channel {
impl OnAir for Channel {
fn find(&mut self, pattern: &str) {
if pattern != self.last_pattern {
self.matcher.pattern.reparse(
0,
pattern,
CaseMatching::Smart,
Normalization::Smart,
pattern.starts_with(&self.last_pattern),
);
self.last_pattern = pattern.to_string();
}
self.matcher.find(pattern);
}
fn results(&mut self, num_entries: u32, offset: u32) -> Vec<Entry> {
let status = self.matcher.tick(Self::MATCHER_TICK_TIMEOUT);
let snapshot = self.matcher.snapshot();
if status.changed {
self.result_count = snapshot.matched_item_count();
self.total_count = snapshot.item_count();
}
self.running = status.running;
let mut indices = Vec::new();
let mut matcher = MATCHER.lock();
snapshot
.matched_items(
offset
..(num_entries + offset)
.min(snapshot.matched_item_count()),
)
.map(move |item| {
snapshot.pattern().column_pattern(0).indices(
item.matcher_columns[0].slice(..),
&mut matcher,
&mut indices,
);
indices.sort_unstable();
indices.dedup();
let indices = indices.drain(..);
let path = item.matcher_columns[0].to_string();
self.matcher.tick();
self.matcher
.results(num_entries, offset)
.into_iter()
.map(|item| {
let path = item.matched_string;
Entry::new(path.clone(), PreviewType::Files)
.with_name_match_ranges(
indices.map(|i| (i, i + 1)).collect(),
)
.with_name_match_ranges(item.match_indices)
.with_icon(FileIcon::from(&path))
})
.collect()
}
fn get_result(&self, index: u32) -> Option<Entry> {
let snapshot = self.matcher.snapshot();
snapshot.get_matched_item(index).map(|item| {
let path = item.matcher_columns[0].to_string();
self.matcher.get_result(index).map(|item| {
let path = item.matched_string;
Entry::new(path.clone(), PreviewType::Files)
.with_icon(FileIcon::from(&path))
})
}
fn result_count(&self) -> u32 {
self.result_count
self.matcher.matched_item_count
}
fn total_count(&self) -> u32 {
self.total_count
self.matcher.total_item_count
}
fn running(&self) -> bool {
self.running
self.matcher.status.running
}
fn shutdown(&self) {
@ -192,7 +140,7 @@ async fn load_files(paths: Vec<PathBuf>, injector: Injector<String>) {
.unwrap_or(entry.path())
.to_string_lossy(),
);
let _ = injector.push(file_path, |e, cols| {
let () = injector.push(file_path, |e, cols| {
cols[0] = e.clone().into();
});
}

View File

@ -1,42 +1,29 @@
use devicons::FileIcon;
use directories::BaseDirs;
use ignore::overrides::OverrideBuilder;
use nucleo::{
pattern::{CaseMatching, Normalization},
Config, Nucleo,
};
use std::{path::PathBuf, sync::Arc};
use std::path::PathBuf;
use tokio::task::JoinHandle;
use tracing::debug;
use crate::{
entry::Entry,
fuzzy::MATCHER,
previewers::PreviewType,
utils::files::{walk_builder, DEFAULT_NUM_THREADS},
};
use crate::channels::OnAir;
use crate::fuzzy::matcher::{Config, Injector, Matcher};
use crate::utils::strings::preprocess_line;
pub struct Channel {
matcher: Nucleo<String>,
last_pattern: String,
result_count: u32,
total_count: u32,
running: bool,
matcher: Matcher<String>,
icon: FileIcon,
crawl_handle: JoinHandle<()>,
}
impl Channel {
pub fn new() -> Self {
let matcher = Nucleo::new(
Config::DEFAULT.match_paths(),
Arc::new(|| {}),
None,
1,
);
let matcher = Matcher::new(Config::default().match_paths(true));
let base_dirs = BaseDirs::new().unwrap();
let crawl_handle = tokio::spawn(crawl_for_repos(
base_dirs.home_dir().to_path_buf(),
@ -44,16 +31,10 @@ impl Channel {
));
Channel {
matcher,
last_pattern: String::new(),
result_count: 0,
total_count: 0,
running: false,
icon: FileIcon::from("git"),
crawl_handle,
}
}
const MATCHER_TICK_TIMEOUT: u64 = 2;
}
impl Default for Channel {
@ -64,75 +45,41 @@ impl Default for Channel {
impl OnAir for Channel {
fn find(&mut self, pattern: &str) {
if pattern != self.last_pattern {
self.matcher.pattern.reparse(
0,
pattern,
CaseMatching::Smart,
Normalization::Smart,
pattern.starts_with(&self.last_pattern),
);
self.last_pattern = pattern.to_string();
}
self.matcher.find(pattern);
}
fn results(&mut self, num_entries: u32, offset: u32) -> Vec<Entry> {
let status = self.matcher.tick(Self::MATCHER_TICK_TIMEOUT);
let snapshot = self.matcher.snapshot();
if status.changed {
self.result_count = snapshot.matched_item_count();
self.total_count = snapshot.item_count();
}
self.running = status.running;
let mut indices = Vec::new();
let mut matcher = MATCHER.lock();
let icon = self.icon;
snapshot
.matched_items(
offset
..(num_entries + offset)
.min(snapshot.matched_item_count()),
)
.map(move |item| {
snapshot.pattern().column_pattern(0).indices(
item.matcher_columns[0].slice(..),
&mut matcher,
&mut indices,
);
indices.sort_unstable();
indices.dedup();
let indices = indices.drain(..);
let path = item.matcher_columns[0].to_string();
self.matcher.tick();
self.matcher
.results(num_entries, offset)
.into_iter()
.map(|item| {
let path = item.matched_string;
Entry::new(path.clone(), PreviewType::Directory)
.with_name_match_ranges(
indices.map(|i| (i, i + 1)).collect(),
)
.with_icon(icon)
.with_name_match_ranges(item.match_indices)
.with_icon(self.icon)
})
.collect()
}
fn get_result(&self, index: u32) -> Option<Entry> {
let snapshot = self.matcher.snapshot();
snapshot.get_matched_item(index).map(|item| {
let path = item.matcher_columns[0].to_string();
self.matcher.get_result(index).map(|item| {
let path = item.matched_string;
Entry::new(path.clone(), PreviewType::Directory)
.with_icon(self.icon)
})
}
fn result_count(&self) -> u32 {
self.result_count
self.matcher.matched_item_count
}
fn total_count(&self) -> u32 {
self.total_count
self.matcher.total_item_count
}
fn running(&self) -> bool {
self.running
self.matcher.status.running
}
fn shutdown(&self) {
@ -189,10 +136,7 @@ fn get_ignored_paths() -> Vec<PathBuf> {
ignored_paths
}
#[allow(clippy::unused_async)]
async fn crawl_for_repos(
starting_point: PathBuf,
injector: nucleo::Injector<String>,
) {
async fn crawl_for_repos(starting_point: PathBuf, injector: Injector<String>) {
let mut walker_overrides_builder = OverrideBuilder::new(&starting_point);
walker_overrides_builder.add(".git").unwrap();
let walker = walk_builder(
@ -214,7 +158,7 @@ async fn crawl_for_repos(
&entry.path().parent().unwrap().to_string_lossy(),
);
debug!("Found git repo: {:?}", parent_path);
let _ = injector.push(parent_path, |e, cols| {
let () = injector.push(parent_path, |e, cols| {
cols[0] = e.clone().into();
});
return ignore::WalkState::Skip;

View File

@ -1,51 +1,30 @@
use std::sync::Arc;
use clap::ValueEnum;
use devicons::FileIcon;
use nucleo::{
pattern::{CaseMatching, Normalization},
Config, Nucleo,
};
use crate::channels::{TelevisionChannel, UnitChannel};
use crate::{
channels::{CliTvChannel, OnAir},
entry::Entry,
fuzzy::MATCHER,
fuzzy::matcher::{Config, Matcher},
previewers::PreviewType,
};
pub struct RemoteControl {
matcher: Nucleo<String>,
last_pattern: String,
result_count: u32,
total_count: u32,
running: bool,
matcher: Matcher<String>,
}
const NUM_THREADS: usize = 1;
impl RemoteControl {
pub fn new(channels: Vec<UnitChannel>) -> Self {
let matcher = Nucleo::new(
Config::DEFAULT,
Arc::new(|| {}),
Some(NUM_THREADS),
1,
);
let matcher = Matcher::new(Config::default().n_threads(NUM_THREADS));
let injector = matcher.injector();
for channel in channels {
let _ = injector.push(channel.to_string(), |e, cols| {
let () = injector.push(channel.to_string(), |e, cols| {
cols[0] = e.clone().into();
});
}
RemoteControl {
matcher,
last_pattern: String::new(),
result_count: 0,
total_count: 0,
running: false,
}
RemoteControl { matcher }
}
pub fn with_transitions_from(
@ -53,8 +32,6 @@ impl RemoteControl {
) -> Self {
Self::new(television_channel.available_transitions())
}
const MATCHER_TICK_TIMEOUT: u64 = 2;
}
impl Default for RemoteControl {
@ -70,80 +47,45 @@ impl Default for RemoteControl {
const TV_ICON: FileIcon = FileIcon {
icon: '📺',
color: "#ffffff",
color: "#000000",
};
impl OnAir for RemoteControl {
fn find(&mut self, pattern: &str) {
if pattern != self.last_pattern {
self.matcher.pattern.reparse(
0,
pattern,
CaseMatching::Smart,
Normalization::Smart,
pattern.starts_with(&self.last_pattern),
);
self.last_pattern = pattern.to_string();
}
self.matcher.find(pattern);
}
fn results(&mut self, num_entries: u32, offset: u32) -> Vec<Entry> {
let status = self.matcher.tick(Self::MATCHER_TICK_TIMEOUT);
let snapshot = self.matcher.snapshot();
if status.changed {
self.result_count = snapshot.matched_item_count();
self.total_count = snapshot.item_count();
}
self.running = status.running;
let mut indices = Vec::new();
let mut matcher = MATCHER.lock();
snapshot
.matched_items(
offset
..(num_entries + offset)
.min(snapshot.matched_item_count()),
)
.map(move |item| {
snapshot.pattern().column_pattern(0).indices(
item.matcher_columns[0].slice(..),
&mut matcher,
&mut indices,
);
indices.sort_unstable();
indices.dedup();
let indices = indices.drain(..);
let name = item.matcher_columns[0].to_string();
Entry::new(name, PreviewType::Basic)
.with_name_match_ranges(
indices.map(|i| (i, i + 1)).collect(),
)
self.matcher.tick();
self.matcher
.results(num_entries, offset)
.into_iter()
.map(|item| {
let path = item.matched_string;
Entry::new(path.clone(), PreviewType::Basic)
.with_name_match_ranges(item.match_indices)
.with_icon(TV_ICON)
})
.collect()
}
fn get_result(&self, index: u32) -> Option<Entry> {
let snapshot = self.matcher.snapshot();
snapshot.get_matched_item(index).map(|item| {
let name = item.matcher_columns[0].to_string();
// TODO: Add new Previewer for Channel selection which displays a
// short description of the channel
Entry::new(name.clone(), PreviewType::Basic).with_icon(TV_ICON)
self.matcher.get_result(index).map(|item| {
let path = item.matched_string;
Entry::new(path.clone(), PreviewType::Basic).with_icon(TV_ICON)
})
}
fn result_count(&self) -> u32 {
self.result_count
self.matcher.matched_item_count
}
fn total_count(&self) -> u32 {
self.total_count
self.matcher.total_item_count
}
fn running(&self) -> bool {
self.running
self.matcher.status.running
}
fn shutdown(&self) {}

View File

@ -1,22 +1,17 @@
use std::io::BufRead;
use std::path::Path;
use std::{io::BufRead, sync::Arc};
use devicons::FileIcon;
use nucleo::{Config, Nucleo};
use super::OnAir;
use crate::entry::Entry;
use crate::fuzzy::MATCHER;
use crate::fuzzy::matcher::{Config, Matcher};
use crate::previewers::PreviewType;
use crate::utils::strings::preprocess_line;
pub struct Channel {
matcher: Nucleo<String>,
last_pattern: String,
result_count: u32,
total_count: u32,
matcher: Matcher<String>,
icon: FileIcon,
running: bool,
}
const NUM_THREADS: usize = 2;
@ -27,29 +22,18 @@ impl Channel {
for line in std::io::stdin().lock().lines().map_while(Result::ok) {
lines.push(preprocess_line(&line));
}
let matcher = Nucleo::new(
Config::DEFAULT,
Arc::new(|| {}),
Some(NUM_THREADS),
1,
);
let matcher = Matcher::new(Config::default().n_threads(NUM_THREADS));
let injector = matcher.injector();
for line in &lines {
let _ = injector.push(line.clone(), |e, cols| {
let () = injector.push(line.clone(), |e, cols| {
cols[0] = e.clone().into();
});
}
Self {
matcher,
last_pattern: String::new(),
result_count: 0,
total_count: 0,
icon: FileIcon::from("nu"),
running: false,
}
}
const MATCHER_TICK_TIMEOUT: u64 = 2;
}
impl Default for Channel {
@ -59,95 +43,57 @@ impl Default for Channel {
}
impl OnAir for Channel {
// maybe this could be sort of automatic with a blanket impl (making Finder generic over
// its matcher type or something)
fn find(&mut self, pattern: &str) {
if pattern != self.last_pattern {
self.matcher.pattern.reparse(
0,
pattern,
nucleo::pattern::CaseMatching::Smart,
nucleo::pattern::Normalization::Smart,
pattern.starts_with(&self.last_pattern),
);
self.last_pattern = pattern.to_string();
}
self.matcher.find(pattern);
}
fn results(&mut self, num_entries: u32, offset: u32) -> Vec<Entry> {
let status = self.matcher.tick(Self::MATCHER_TICK_TIMEOUT);
let snapshot = self.matcher.snapshot();
if status.changed {
self.result_count = snapshot.matched_item_count();
self.total_count = snapshot.item_count();
}
self.running = status.running;
let mut indices = Vec::new();
let mut matcher = MATCHER.lock();
let icon = self.icon;
snapshot
.matched_items(
offset
..(num_entries + offset)
.min(snapshot.matched_item_count()),
)
.map(move |item| {
snapshot.pattern().column_pattern(0).indices(
item.matcher_columns[0].slice(..),
&mut matcher,
&mut indices,
);
indices.sort_unstable();
indices.dedup();
let indices = indices.drain(..);
let content = item.matcher_columns[0].to_string();
let path = Path::new(&content);
self.matcher.tick();
self.matcher
.results(num_entries, offset)
.into_iter()
.map(|item| {
let path = Path::new(&item.matched_string);
let icon = if path.try_exists().unwrap_or(false) {
FileIcon::from(path)
} else {
icon
self.icon
};
Entry::new(content.clone(), PreviewType::Basic)
.with_name_match_ranges(
indices.map(|i| (i, i + 1)).collect(),
)
Entry::new(item.matched_string, PreviewType::Basic)
.with_name_match_ranges(item.match_indices)
.with_icon(icon)
})
.collect()
}
fn get_result(&self, index: u32) -> Option<Entry> {
let snapshot = self.matcher.snapshot();
snapshot.get_matched_item(index).map(|item| {
let content = item.matcher_columns[0].to_string();
self.matcher.get_result(index).map(|item| {
let path = Path::new(&item.matched_string);
// if we recognize a file path, use a file icon
// and set the preview type to "Files"
let path = Path::new(&content);
if path.is_file() {
Entry::new(content.clone(), PreviewType::Files)
Entry::new(item.matched_string.clone(), PreviewType::Files)
.with_icon(FileIcon::from(path))
} else if path.is_dir() {
Entry::new(content.clone(), PreviewType::Directory)
Entry::new(item.matched_string.clone(), PreviewType::Directory)
.with_icon(FileIcon::from(path))
} else {
Entry::new(content.clone(), PreviewType::Basic)
Entry::new(item.matched_string.clone(), PreviewType::Basic)
.with_icon(self.icon)
}
})
}
fn result_count(&self) -> u32 {
self.result_count
self.matcher.matched_item_count
}
fn total_count(&self) -> u32 {
self.total_count
self.matcher.total_item_count
}
fn running(&self) -> bool {
self.running
self.matcher.status.running
}
fn shutdown(&self) {}

View File

@ -1,9 +1,5 @@
use devicons::FileIcon;
use ignore::WalkState;
use nucleo::{
pattern::{CaseMatching, Normalization},
Config, Injector, Nucleo,
};
use std::{
fs::File,
io::{BufRead, Read, Seek},
@ -13,7 +9,7 @@ use std::{
use tracing::{debug, warn};
use super::{OnAir, TelevisionChannel};
use crate::previewers::PreviewType;
use crate::utils::strings::PRINTABLE_ASCII_THRESHOLD;
use crate::utils::{
files::{is_not_text, walk_builder, DEFAULT_NUM_THREADS},
strings::preprocess_line,
@ -21,9 +17,12 @@ use crate::utils::{
use crate::{
entry::Entry, utils::strings::proportion_of_printable_ascii_characters,
};
use crate::{fuzzy::MATCHER, utils::strings::PRINTABLE_ASCII_THRESHOLD};
use crate::{
fuzzy::matcher::{Config, Injector, Matcher},
previewers::PreviewType,
};
#[derive(Debug)]
#[derive(Debug, Clone)]
struct CandidateLine {
path: PathBuf,
line: String,
@ -42,17 +41,13 @@ impl CandidateLine {
#[allow(clippy::module_name_repetitions)]
pub struct Channel {
matcher: Nucleo<CandidateLine>,
last_pattern: String,
result_count: u32,
total_count: u32,
running: bool,
matcher: Matcher<CandidateLine>,
crawl_handle: tokio::task::JoinHandle<()>,
}
impl Channel {
pub fn new(directories: Vec<PathBuf>) -> Self {
let matcher = Nucleo::new(Config::DEFAULT, Arc::new(|| {}), None, 1);
let matcher = Matcher::new(Config::default());
// start loading files in the background
let crawl_handle = tokio::spawn(crawl_for_candidates(
directories,
@ -60,21 +55,12 @@ impl Channel {
));
Channel {
matcher,
last_pattern: String::new(),
result_count: 0,
total_count: 0,
running: false,
crawl_handle,
}
}
fn from_file_paths(file_paths: Vec<PathBuf>) -> Self {
let matcher = Nucleo::new(
Config::DEFAULT.match_paths(),
Arc::new(|| {}),
None,
1,
);
let matcher = Matcher::new(Config::default());
let injector = matcher.injector();
let current_dir = std::env::current_dir().unwrap();
let crawl_handle = tokio::spawn(async move {
@ -93,21 +79,12 @@ impl Channel {
Channel {
matcher,
last_pattern: String::new(),
result_count: 0,
total_count: 0,
running: false,
crawl_handle,
}
}
fn from_text_entries(entries: Vec<Entry>) -> Self {
let matcher = Nucleo::new(
Config::DEFAULT.match_paths(),
Arc::new(|| {}),
None,
1,
);
let matcher = Matcher::new(Config::default());
let injector = matcher.injector();
let load_handle = tokio::spawn(async move {
for entry in entries.into_iter().take(MAX_LINES_IN_MEM) {
@ -126,15 +103,9 @@ impl Channel {
Channel {
matcher,
last_pattern: String::new(),
result_count: 0,
total_count: 0,
running: false,
crawl_handle: load_handle,
}
}
const MATCHER_TICK_TIMEOUT: u64 = 2;
}
impl Default for Channel {
@ -192,86 +163,55 @@ impl From<&mut TelevisionChannel> for Channel {
impl OnAir for Channel {
fn find(&mut self, pattern: &str) {
if pattern != self.last_pattern {
self.matcher.pattern.reparse(
0,
pattern,
CaseMatching::Smart,
Normalization::Smart,
pattern.starts_with(&self.last_pattern),
);
self.last_pattern = pattern.to_string();
}
self.matcher.find(pattern);
}
fn results(&mut self, num_entries: u32, offset: u32) -> Vec<Entry> {
let status = self.matcher.tick(Self::MATCHER_TICK_TIMEOUT);
let snapshot = self.matcher.snapshot();
if status.changed {
self.result_count = snapshot.matched_item_count();
self.total_count = snapshot.item_count();
}
self.running = status.running;
let mut indices = Vec::new();
let mut matcher = MATCHER.lock();
snapshot
.matched_items(
offset
..(num_entries + offset)
.min(snapshot.matched_item_count()),
)
.map(move |item| {
snapshot.pattern().column_pattern(0).indices(
item.matcher_columns[0].slice(..),
&mut matcher,
&mut indices,
);
indices.sort_unstable();
indices.dedup();
let indices = indices.drain(..);
let line = item.matcher_columns[0].to_string();
self.matcher.tick();
self.matcher
.results(num_entries, offset)
.into_iter()
.map(|item| {
let line = item.matched_string;
let display_path =
item.data.path.to_string_lossy().to_string();
item.inner.path.to_string_lossy().to_string();
Entry::new(
display_path.clone() + &item.data.line_number.to_string(),
display_path.clone() + &item.inner.line_number.to_string(),
PreviewType::Files,
)
.with_display_name(display_path)
.with_value(line)
.with_value_match_ranges(indices.map(|i| (i, i + 1)).collect())
.with_icon(FileIcon::from(item.data.path.as_path()))
.with_line_number(item.data.line_number)
.with_value_match_ranges(item.match_indices)
.with_icon(FileIcon::from(item.inner.path.as_path()))
.with_line_number(item.inner.line_number)
})
.collect()
}
fn get_result(&self, index: u32) -> Option<Entry> {
let snapshot = self.matcher.snapshot();
snapshot.get_matched_item(index).map(|item| {
let display_path = item.data.path.to_string_lossy().to_string();
self.matcher.get_result(index).map(|item| {
let display_path = item.inner.path.to_string_lossy().to_string();
Entry::new(display_path.clone(), PreviewType::Files)
.with_display_name(
display_path.clone()
+ ":"
+ &item.data.line_number.to_string(),
+ &item.inner.line_number.to_string(),
)
.with_icon(FileIcon::from(item.data.path.as_path()))
.with_line_number(item.data.line_number)
.with_icon(FileIcon::from(item.inner.path.as_path()))
.with_line_number(item.inner.line_number)
})
}
fn result_count(&self) -> u32 {
self.result_count
self.matcher.matched_item_count
}
fn total_count(&self) -> u32 {
self.total_count
self.matcher.total_item_count
}
fn running(&self) -> bool {
self.running
self.matcher.status.running
}
fn shutdown(&self) {
@ -310,9 +250,9 @@ async fn crawl_for_candidates(
let current_dir = std::env::current_dir().unwrap();
let mut walker =
walk_builder(&directories[0], *DEFAULT_NUM_THREADS, None, None);
for path in directories[1..].iter() {
directories[1..].iter().for_each(|path| {
walker.add(path);
}
});
let lines_in_mem = Arc::new(AtomicUsize::new(0));
@ -393,7 +333,7 @@ fn try_inject_lines(
line,
line_number,
);
let _ = injector.push(candidate, |c, cols| {
let () = injector.push(candidate, |c, cols| {
cols[0] = c.line.clone().into();
});
injected_lines += 1;

View File

@ -1,6 +1,8 @@
use parking_lot::Mutex;
use std::ops::DerefMut;
pub mod matcher;
pub struct LazyMutex<T> {
inner: Mutex<Option<T>>,
init: fn() -> T,

View File

@ -0,0 +1,332 @@
use std::sync::Arc;
use super::MATCHER;
const MATCHER_TICK_TIMEOUT: u64 = 2;
/// A matched item.
///
/// This contains the matched item, the dimension against which it was matched,
/// represented as a string, and the indices of the matched characters.
///
/// The indices are pairs of `(start, end)` where `start` is the index of the
/// first character in the match, and `end` is the index of the character after
/// the last character in the match.
pub struct MatchedItem<I>
where
I: Sync + Send + Clone + 'static,
{
pub inner: I,
pub matched_string: String,
pub match_indices: Vec<(u32, u32)>,
}
/// The status of the fuzzy matcher.
///
/// This currently only contains a boolean indicating whether the matcher is
/// running in the background.
/// This mostly serves as a way to communicate the status of the matcher to the
/// front-end and display a loading indicator.
#[derive(Default)]
pub struct Status {
pub running: bool,
}
impl From<nucleo::Status> for Status {
fn from(status: nucleo::Status) -> Self {
Self {
running: status.running,
}
}
}
/// The configuration of the fuzzy matcher.
///
/// This contains the number of threads to use, whether to ignore case, whether
/// to prefer prefix matches, and whether to optimize for matching paths.
///
/// The default configuration uses the default configuration of the `Nucleo`
/// fuzzy matcher, e.g. case-insensitive matching, no preference for prefix
/// matches, and no optimization for matching paths as well as using the
/// default number of threads (which corresponds to the number of available logical
/// cores on the current machine).
#[derive(Copy, Clone)]
pub struct Config {
pub n_threads: Option<usize>,
pub ignore_case: bool,
pub prefer_prefix: bool,
pub match_paths: bool,
}
impl Default for Config {
fn default() -> Self {
Self {
n_threads: None,
ignore_case: true,
prefer_prefix: false,
match_paths: false,
}
}
}
impl Config {
/// Set the number of threads to use.
pub fn n_threads(mut self, n_threads: usize) -> Self {
self.n_threads = Some(n_threads);
self
}
/// Set whether to ignore case.
pub fn ignore_case(mut self, ignore_case: bool) -> Self {
self.ignore_case = ignore_case;
self
}
/// Set whether to prefer prefix matches.
pub fn prefer_prefix(mut self, prefer_prefix: bool) -> Self {
self.prefer_prefix = prefer_prefix;
self
}
/// Set whether to optimize for matching paths.
pub fn match_paths(mut self, match_paths: bool) -> Self {
self.match_paths = match_paths;
self
}
}
impl From<&Config> for nucleo::Config {
fn from(config: &Config) -> Self {
let mut matcher_config = nucleo::Config::DEFAULT;
matcher_config.ignore_case = config.ignore_case;
matcher_config.prefer_prefix = config.prefer_prefix;
if config.match_paths {
matcher_config = matcher_config.match_paths();
}
matcher_config
}
}
/// An injector that can be used to push items of type `I` into the fuzzy matcher.
///
/// This is a wrapper around the `Injector` type from the `Nucleo` fuzzy matcher.
///
/// The `push` method takes an item of type `I` and a closure that produces the
/// string to match against based on the item.
#[derive(Clone)]
pub struct Injector<I>
where
I: Sync + Send + Clone + 'static,
{
inner: nucleo::Injector<I>,
}
impl<I> Injector<I>
where
I: Sync + Send + Clone + 'static,
{
pub fn new(inner: nucleo::Injector<I>) -> Self {
Self { inner }
}
/// Push an item into the fuzzy matcher.
///
/// The closure `f` should produce the string to match against based on the
/// item.
///
/// # Example
/// ```
/// let config = Config::default();
/// let matcher = Matcher::new(config);
///
/// let injector = matcher.injector();
/// injector.push(
/// ("some string", 3, "some other string"),
/// // Say we want to match against the third element of the tuple
/// |s, cols| cols[0] = s.2.into()
/// );
/// ```
pub fn push<F>(&self, item: I, f: F)
where
F: FnOnce(&I, &mut [nucleo::Utf32String]),
{
self.inner.push(item, f);
}
}
/// A fuzzy matcher that can be used to match items of type `I`.
///
/// `I` should be `Sync`, `Send`, `Clone`, and `'static`.
/// This is a wrapper around the `Nucleo` fuzzy matcher that only matches
/// on a single dimension.
///
/// The matcher can be used to find items that match a given pattern and to
/// retrieve the matched items as well as the indices of the matched characters.
pub struct Matcher<I>
where
I: Sync + Send + Clone + 'static,
{
inner: nucleo::Nucleo<I>,
pub total_item_count: u32,
pub matched_item_count: u32,
pub status: Status,
pub last_pattern: String,
}
impl<I> Matcher<I>
where
I: Sync + Send + Clone + 'static,
{
/// Create a new fuzzy matcher with the given configuration.
pub fn new(config: Config) -> Self {
Self {
inner: nucleo::Nucleo::new(
(&config).into(),
Arc::new(|| {}),
config.n_threads,
1,
),
total_item_count: 0,
matched_item_count: 0,
status: Status::default(),
last_pattern: String::new(),
}
}
/// Tick the fuzzy matcher.
///
/// This should be called periodically to update the state of the matcher.
pub fn tick(&mut self) {
self.status = self.inner.tick(MATCHER_TICK_TIMEOUT).into();
}
/// Get an injector that can be used to push items into the fuzzy matcher.
///
/// This can be used at any time to push items into the fuzzy matcher.
///
/// # Example
/// ```
/// let config = Config::default();
/// let matcher = Matcher::new(config);
/// let injector = matcher.injector();
///
/// injector.push(
/// ("some string", 3, "some other string"),
/// // Say we want to match against the third element of the tuple
/// |s, cols| cols[0] = s.2.into()
/// );
/// ```
pub fn injector(&self) -> Injector<I> {
Injector::new(self.inner.injector())
}
/// Find items that match the given pattern.
///
/// This should be called whenever the pattern changes.
/// The `Matcher` will keep track of the last pattern and only reparse the
/// pattern if it has changed, allowing for more efficient matching when
/// `self.last_pattern` is a prefix of the new `pattern`.
pub fn find(&mut self, pattern: &str) {
if pattern != self.last_pattern {
self.inner.pattern.reparse(
0,
pattern,
nucleo::pattern::CaseMatching::Smart,
nucleo::pattern::Normalization::Smart,
pattern.starts_with(&self.last_pattern),
);
self.last_pattern = pattern.to_string();
}
}
/// Get the matched items.
///
/// This should be called to retrieve the matched items after calling
/// `find`.
///
/// The `num_entries` parameter specifies the number of entries to return,
/// and the `offset` parameter specifies the offset of the first entry to
/// return.
///
/// The returned items are `MatchedItem`s that contain the matched item, the
/// dimension against which it was matched, represented as a string, and the
/// indices of the matched characters.
///
/// # Example
/// ```
/// let config = Config::default();
/// let matcher = Matcher::new(config);
/// matcher.find("some pattern");
///
/// let results = matcher.results(10, 0);
/// for item in results {
/// println!("{:?}", item);
/// // Do something with the matched item
/// // ...
/// // Do something with the matched indices
/// // ...
/// }
/// ```
pub fn results(
&mut self,
num_entries: u32,
offset: u32,
) -> Vec<MatchedItem<I>> {
let snapshot = self.inner.snapshot();
self.total_item_count = snapshot.item_count();
self.matched_item_count = snapshot.matched_item_count();
let mut col_indices = Vec::new();
let mut matcher = MATCHER.lock();
snapshot
.matched_items(
offset..(num_entries + offset).min(self.matched_item_count),
)
.map(move |item| {
snapshot.pattern().column_pattern(0).indices(
item.matcher_columns[0].slice(..),
&mut matcher,
&mut col_indices,
);
col_indices.sort_unstable();
col_indices.dedup();
let indices = col_indices.drain(..);
let matched_string = item.matcher_columns[0].to_string();
MatchedItem {
inner: item.data.clone(),
matched_string,
match_indices: indices.map(|i| (i, i + 1)).collect(),
}
})
.collect()
}
/// Get a single matched item.
///
/// # Example
/// ```
/// let config = Config::default();
/// let matcher = Matcher::new(config);
/// matcher.find("some pattern");
///
/// if let Some(item) = matcher.get_result(0) {
/// println!("{:?}", item);
/// // Do something with the matched item
/// // ...
/// }
/// ```
pub fn get_result(&self, index: u32) -> Option<MatchedItem<I>> {
let snapshot = self.inner.snapshot();
snapshot.get_matched_item(index).map(|item| {
let matched_string = item.matcher_columns[0].to_string();
MatchedItem {
inner: item.data.clone(),
matched_string,
match_indices: Vec::new(),
}
})
}
}