Make unicode-segmentation a hard dependency
Run cargo lints / Lint on ${{ matrix.build }} (linux-amd64, ubuntu-latest, stable, x86_64-unknown-linux-gnu) (pull_request) Successful in 7m48s
Details
Run Tests / Test on ${{ matrix.build }} (linux-amd64, ubuntu-latest, stable, x86_64-unknown-linux-gnu) (pull_request) Successful in 15m42s
Details
Cargo manifest lints / Lint Cargo manifests on ${{ matrix.build }} (linux-amd64, ubuntu-latest, stable, x86_64-unknown-linux-gnu) (pull_request) Successful in 11m5s
Details
Run cargo lints / Lint on ${{ matrix.build }} (linux-amd64, ubuntu-latest, stable, x86_64-unknown-linux-gnu) (pull_request) Successful in 7m48s
Details
Run Tests / Test on ${{ matrix.build }} (linux-amd64, ubuntu-latest, stable, x86_64-unknown-linux-gnu) (pull_request) Successful in 15m42s
Details
Cargo manifest lints / Lint Cargo manifests on ${{ matrix.build }} (linux-amd64, ubuntu-latest, stable, x86_64-unknown-linux-gnu) (pull_request) Successful in 11m5s
Details
meli/melib are UTF8 software, so we should have proper Unicode support. A compile-time env var is added, `UNICODE_REGENERATE_TABLES` to force network access and rebuild the cached unicode tables. Signed-off-by: Manos Pitsidianakis <manos@pitsidianak.is>pull/377/head
parent
07072e2e3f
commit
ae96038fbf
5
BUILD.md
5
BUILD.md
|
@ -3,7 +3,7 @@
|
|||
For a quick start, build and install locally:
|
||||
|
||||
```sh
|
||||
PREFIX=~/.local make install
|
||||
PREFIX=~/.local make install
|
||||
```
|
||||
|
||||
Available subcommands for `make` are listed with `make help`.
|
||||
|
@ -34,6 +34,9 @@ Some functionality is held behind "feature gates", or compile-time flags. The fo
|
|||
Since it's actual use in the code is very limited, it is not recommended to use this (off by default).
|
||||
- `static` and `*-static` bundle C libraries in dependencies so that you don't need them installed in your system (on by default).
|
||||
|
||||
Though not a feature, the presence of the environment variable `UNICODE_REGENERATE_TABLES` in compile-time of the `melib` crate will force the regeneration of unicode tables.
|
||||
Otherwise the tables are included with the source code, and there's no real reason to regenerate them unless you intend to modify the code or update to a new Unicode version.
|
||||
|
||||
## Build Debian package (*deb*)
|
||||
|
||||
Building with Debian's packaged cargo might require the installation of these two packages: `librust-openssl-sys-dev librust-libdbus-sys-dev`
|
||||
|
|
|
@ -14,10 +14,7 @@ path = "fuzz_targets/envelope_parse.rs"
|
|||
|
||||
[dependencies]
|
||||
libfuzzer-sys = "0.3"
|
||||
|
||||
[dependencies.melib]
|
||||
path = "../melib"
|
||||
features = ["unicode-algorithms"]
|
||||
melib = { path = "../melib" }
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
|
|
|
@ -31,7 +31,7 @@ indexmap = { version = "^1.6", features = ["serde-1"] }
|
|||
libc = { version = "0.2.125", default-features = false, features = ["extra_traits"] }
|
||||
libz-sys = { version = "1.1", features = ["static"], optional = true }
|
||||
linkify = { version = "^0.8", default-features = false }
|
||||
melib = { path = "../melib", version = "0.8.5-rc.3", features = ["unicode-algorithms"] }
|
||||
melib = { path = "../melib", version = "0.8.5-rc.3", features = [] }
|
||||
nix = { version = "0.27", default-features = false, features = ["signal", "poll", "term", "ioctl", "process"] }
|
||||
notify = { version = "4.0.1", default-features = false } # >:c
|
||||
num_cpus = "1.12.0"
|
||||
|
|
|
@ -50,7 +50,7 @@ serde_path_to_error = { version = "0.1" }
|
|||
smallvec = { version = "^1.5.0", features = ["serde"] }
|
||||
smol = "1.0.0"
|
||||
socket2 = { version = "0.5", features = [] }
|
||||
unicode-segmentation = { version = "1.2.1", default-features = false, optional = true }
|
||||
unicode-segmentation = { version = "1.2.1", default-features = false }
|
||||
url = { version = "2.4", optional = true }
|
||||
uuid = { version = "^1", features = ["serde", "v4", "v5"] }
|
||||
xdg = "2.1.0"
|
||||
|
@ -77,9 +77,6 @@ sqlite3 = ["rusqlite"]
|
|||
sqlite3-static = ["sqlite3", "rusqlite/bundled-full"]
|
||||
tls = ["native-tls"]
|
||||
tls-static = ["tls", "native-tls/vendored"]
|
||||
text-processing = []
|
||||
unicode-algorithms = ["text-processing", "unicode-segmentation"]
|
||||
unicode-algorithms-cached = ["text-processing", "unicode-segmentation"]
|
||||
vcard = []
|
||||
|
||||
[build-dependencies]
|
||||
|
|
|
@ -22,24 +22,6 @@ Library for handling mail.
|
|||
|------------------------------|-------------------------------------|--------------------------|
|
||||
| `sqlite` | `rusqlite` | Used in IMAP cache. |
|
||||
|------------------------------|-------------------------------------|--------------------------|
|
||||
| `unicode-algorithms` | `unicode-segmentation` | Linebreaking algo etc |
|
||||
| | | For a fresh clean build, |
|
||||
| | | Network access is |
|
||||
| | | required to fetch data |
|
||||
| | | from Unicode's website. |
|
||||
|------------------------------|-------------------------------------|--------------------------|
|
||||
| `unicode-algorithms-cached` | `unicode-segmentation` | Linebreaking algo etc |
|
||||
| | | but it uses a cached |
|
||||
| | | version of Unicode data |
|
||||
| | | which might be stale. |
|
||||
| | | |
|
||||
| | | Use this feature instead |
|
||||
| | | of the previous one for |
|
||||
| | | building without network |
|
||||
| | | access. |
|
||||
|------------------------------|-------------------------------------|--------------------------|
|
||||
| `unicode-algorithms` | `unicode-segmentation` | |
|
||||
|------------------------------|-------------------------------------|--------------------------|
|
||||
| `vcard` | | vcard parsing |
|
||||
|------------------------------|-------------------------------------|--------------------------|
|
||||
| `gpgme` | | GPG use with libgpgme |
|
||||
|
|
|
@ -21,15 +21,14 @@
|
|||
|
||||
#![allow(clippy::needless_range_loop)]
|
||||
|
||||
#[cfg(any(feature = "unicode-algorithms", feature = "unicode-algorithms-cached"))]
|
||||
include!("src/text/types.rs");
|
||||
|
||||
fn main() -> Result<(), std::io::Error> {
|
||||
#[cfg(any(feature = "unicode-algorithms", feature = "unicode-algorithms-cached"))]
|
||||
{
|
||||
const MOD_PATH: &str = "src/text/tables.rs";
|
||||
println!("cargo:rerun-if-env-changed=UNICODE_REGENERATE_TABLES");
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
println!("cargo:rerun-if-changed={}", MOD_PATH);
|
||||
println!("cargo:rerun-if-changed={MOD_PATH}");
|
||||
/* Line break tables */
|
||||
use std::{
|
||||
fs::File,
|
||||
|
@ -54,7 +53,7 @@ fn main() -> Result<(), std::io::Error> {
|
|||
);
|
||||
return Ok(());
|
||||
}
|
||||
if cfg!(feature = "unicode-algorithms-cached") {
|
||||
if std::env::var("UNICODE_REGENERATE_TABLES").is_err() {
|
||||
const CACHED_MODULE: &[u8] = include_bytes!(concat!("./src/text/tables.rs.gz"));
|
||||
|
||||
let mut gz = GzDecoder::new(CACHED_MODULE);
|
||||
|
|
|
@ -20,14 +20,12 @@
|
|||
*/
|
||||
|
||||
use super::*;
|
||||
#[cfg(feature = "text-processing")]
|
||||
use crate::text::grapheme_clusters::TextProcessing;
|
||||
|
||||
pub fn encode_header(value: &str) -> String {
|
||||
let mut ret = String::with_capacity(value.len());
|
||||
let mut is_current_window_ascii = true;
|
||||
let mut current_window_start = 0;
|
||||
#[cfg(feature = "text-processing")]
|
||||
{
|
||||
let graphemes = value.graphemes_indices();
|
||||
for (idx, g) in graphemes {
|
||||
|
@ -81,63 +79,6 @@ pub fn encode_header(value: &str) -> String {
|
|||
}
|
||||
}
|
||||
}
|
||||
#[cfg(not(feature = "text-processing"))]
|
||||
{
|
||||
/* [ref:VERIFY] [ref:TODO]: test this. If it works as fine as the one above, there's no need to
|
||||
* keep the above implementation. */
|
||||
for (i, g) in value.char_indices() {
|
||||
match (g.is_ascii(), is_current_window_ascii) {
|
||||
(true, true) => {
|
||||
ret.push(g);
|
||||
}
|
||||
(true, false) => {
|
||||
/* If !g.is_whitespace()
|
||||
*
|
||||
* Whitespaces inside encoded tokens must be greedily taken,
|
||||
* instead of splitting each non-ascii word into separate encoded tokens. */
|
||||
if !g.is_whitespace() && value.is_char_boundary(i) {
|
||||
ret.push_str(&format!(
|
||||
"=?UTF-8?B?{}?=",
|
||||
BASE64_MIME
|
||||
.encode(value[current_window_start..i].as_bytes())
|
||||
.trim()
|
||||
));
|
||||
if i != value.len() - 1 {
|
||||
ret.push(' ');
|
||||
}
|
||||
is_current_window_ascii = true;
|
||||
current_window_start = i;
|
||||
ret.push(g);
|
||||
}
|
||||
}
|
||||
(false, true) => {
|
||||
current_window_start = i;
|
||||
is_current_window_ascii = false;
|
||||
}
|
||||
/* RFC2047 recommends:
|
||||
* 'While there is no limit to the length of a multiple-line header field, each
|
||||
* line of a header field that contains one or more
|
||||
* 'encoded-word's is limited to 76 characters.'
|
||||
* This is a rough compliance.
|
||||
*/
|
||||
(false, false)
|
||||
if value.is_char_boundary(i) && value[current_window_start..i].len() > 76 =>
|
||||
{
|
||||
ret.push_str(&format!(
|
||||
"=?UTF-8?B?{}?=",
|
||||
BASE64_MIME
|
||||
.encode(value[current_window_start..i].as_bytes())
|
||||
.trim()
|
||||
));
|
||||
if i != value.len() - 1 {
|
||||
ret.push(' ');
|
||||
}
|
||||
current_window_start = i;
|
||||
}
|
||||
(false, false) => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* If the last part of the header value is encoded, it won't be pushed inside
|
||||
* the previous for block */
|
||||
if !is_current_window_ascii {
|
||||
|
|
|
@ -132,7 +132,6 @@ pub mod dbg {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "text-processing")]
|
||||
pub mod text;
|
||||
|
||||
pub use utils::{
|
||||
|
|
|
@ -29,12 +29,12 @@
|
|||
|
||||
*/
|
||||
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
use super::{
|
||||
types::Reflow,
|
||||
wcwidth::{wcwidth, CodePointsIter},
|
||||
};
|
||||
extern crate unicode_segmentation;
|
||||
use self::unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
pub trait TextProcessing: UnicodeSegmentation + CodePointsIter {
|
||||
fn split_graphemes(&self) -> Vec<&str> {
|
||||
|
|
|
@ -19,12 +19,11 @@
|
|||
* along with meli. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
extern crate unicode_segmentation;
|
||||
use std::{cmp::Ordering, collections::VecDeque, iter::Peekable, str::FromStr};
|
||||
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
use LineBreakClass::*;
|
||||
|
||||
use self::unicode_segmentation::UnicodeSegmentation;
|
||||
use super::{
|
||||
grapheme_clusters::TextProcessing,
|
||||
tables::LINE_BREAK_RULES,
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
* along with meli. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
pub mod grapheme_clusters;
|
||||
pub mod line_break;
|
||||
pub mod search;
|
||||
|
@ -43,8 +45,6 @@ impl Truncate for &str {
|
|||
return;
|
||||
}
|
||||
|
||||
extern crate unicode_segmentation;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
if let Some((last, _)) = UnicodeSegmentation::grapheme_indices(*self, true)
|
||||
.take(new_len)
|
||||
.last()
|
||||
|
@ -58,8 +58,6 @@ impl Truncate for &str {
|
|||
return self;
|
||||
}
|
||||
|
||||
extern crate unicode_segmentation;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
if let Some((last, _)) = UnicodeSegmentation::grapheme_indices(*self, true)
|
||||
.take(new_len)
|
||||
.last()
|
||||
|
@ -75,8 +73,6 @@ impl Truncate for &str {
|
|||
return "";
|
||||
}
|
||||
|
||||
extern crate unicode_segmentation;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
if let Some((first, _)) = UnicodeSegmentation::grapheme_indices(*self, true).nth(skip_len) {
|
||||
&self[first..]
|
||||
} else {
|
||||
|
@ -90,8 +86,6 @@ impl Truncate for &str {
|
|||
return;
|
||||
}
|
||||
|
||||
extern crate unicode_segmentation;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
if let Some((first, _)) = UnicodeSegmentation::grapheme_indices(*self, true).nth(skip_len) {
|
||||
*self = &self[first..];
|
||||
}
|
||||
|
@ -104,8 +98,6 @@ impl Truncate for String {
|
|||
return;
|
||||
}
|
||||
|
||||
extern crate unicode_segmentation;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
if let Some((last, _)) = UnicodeSegmentation::grapheme_indices(self.as_str(), true)
|
||||
.take(new_len)
|
||||
.last()
|
||||
|
@ -119,8 +111,6 @@ impl Truncate for String {
|
|||
return self;
|
||||
}
|
||||
|
||||
extern crate unicode_segmentation;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
if let Some((last, _)) = UnicodeSegmentation::grapheme_indices(self.as_str(), true)
|
||||
.take(new_len)
|
||||
.last()
|
||||
|
@ -136,8 +126,6 @@ impl Truncate for String {
|
|||
return "";
|
||||
}
|
||||
|
||||
extern crate unicode_segmentation;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
if let Some((first, _)) =
|
||||
UnicodeSegmentation::grapheme_indices(self.as_str(), true).nth(skip_len)
|
||||
{
|
||||
|
@ -153,8 +141,6 @@ impl Truncate for String {
|
|||
return;
|
||||
}
|
||||
|
||||
extern crate unicode_segmentation;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
if let Some((first, _)) =
|
||||
UnicodeSegmentation::grapheme_indices(self.as_str(), true).nth(skip_len)
|
||||
{
|
||||
|
|
|
@ -52,7 +52,6 @@ pub use iterators::*;
|
|||
use smallvec::SmallVec;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[cfg(feature = "text-processing")]
|
||||
use crate::text::grapheme_clusters::*;
|
||||
|
||||
type Envelopes = Arc<RwLock<HashMap<EnvelopeHash, Envelope>>>;
|
||||
|
@ -1223,16 +1222,11 @@ impl Threads {
|
|||
}
|
||||
let ma = &envelopes[&a.unwrap()];
|
||||
let mb = &envelopes[&b.unwrap()];
|
||||
#[cfg(feature = "text-processing")]
|
||||
{
|
||||
ma.subject()
|
||||
.split_graphemes()
|
||||
.cmp(&mb.subject().split_graphemes())
|
||||
}
|
||||
#[cfg(not(feature = "text-processing"))]
|
||||
{
|
||||
ma.subject().cmp(&mb.subject())
|
||||
}
|
||||
}
|
||||
(SortField::Subject, SortOrder::Asc) => {
|
||||
let a = &self.thread_nodes[&self.thread_ref(*a).root()].message();
|
||||
|
@ -1252,18 +1246,12 @@ impl Threads {
|
|||
}
|
||||
let ma = &envelopes[&a.unwrap()];
|
||||
let mb = &envelopes[&b.unwrap()];
|
||||
#[cfg(feature = "text-processing")]
|
||||
{
|
||||
mb.subject()
|
||||
.as_ref()
|
||||
.split_graphemes()
|
||||
.cmp(&ma.subject().split_graphemes())
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "text-processing"))]
|
||||
{
|
||||
mb.subject().as_ref().cmp(&ma.subject())
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -1303,16 +1291,11 @@ impl Threads {
|
|||
}
|
||||
let ma = &envelopes[&a.unwrap()];
|
||||
let mb = &envelopes[&b.unwrap()];
|
||||
#[cfg(feature = "text-processing")]
|
||||
{
|
||||
ma.subject()
|
||||
.split_graphemes()
|
||||
.cmp(&mb.subject().split_graphemes())
|
||||
}
|
||||
#[cfg(not(feature = "text-processing"))]
|
||||
{
|
||||
ma.subject().cmp(&mb.subject())
|
||||
}
|
||||
}
|
||||
(SortField::Subject, SortOrder::Asc) => {
|
||||
let a = &self.thread_nodes[a].message();
|
||||
|
@ -1332,18 +1315,12 @@ impl Threads {
|
|||
}
|
||||
let ma = &envelopes[&a.unwrap()];
|
||||
let mb = &envelopes[&b.unwrap()];
|
||||
#[cfg(feature = "text-processing")]
|
||||
{
|
||||
mb.subject()
|
||||
.as_ref()
|
||||
.split_graphemes()
|
||||
.cmp(&ma.subject().split_graphemes())
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "text-processing"))]
|
||||
{
|
||||
mb.subject().as_ref().cmp(&ma.subject())
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -1379,16 +1356,11 @@ impl Threads {
|
|||
}
|
||||
let ma = &envelopes[&a.unwrap()];
|
||||
let mb = &envelopes[&b.unwrap()];
|
||||
#[cfg(feature = "text-processing")]
|
||||
{
|
||||
ma.subject()
|
||||
.split_graphemes()
|
||||
.cmp(&mb.subject().split_graphemes())
|
||||
}
|
||||
#[cfg(not(feature = "text-processing"))]
|
||||
{
|
||||
ma.subject().cmp(&mb.subject())
|
||||
}
|
||||
}
|
||||
(SortField::Subject, SortOrder::Asc) => {
|
||||
let a = &self.thread_nodes[a].message();
|
||||
|
@ -1408,18 +1380,12 @@ impl Threads {
|
|||
}
|
||||
let ma = &envelopes[&a.unwrap()];
|
||||
let mb = &envelopes[&b.unwrap()];
|
||||
#[cfg(feature = "text-processing")]
|
||||
{
|
||||
mb.subject()
|
||||
.as_ref()
|
||||
.split_graphemes()
|
||||
.cmp(&ma.subject().split_graphemes())
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "text-processing"))]
|
||||
{
|
||||
mb.subject().as_ref().cmp(&ma.subject())
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@ required-features = ["melib/imap"]
|
|||
[dependencies]
|
||||
crossbeam = { version = "^0.8" }
|
||||
meli = { path = "../meli", version = "0.8" }
|
||||
melib = { path = "../melib", version = "0.8", features = ["debug-tracing", "unicode-algorithms"] }
|
||||
melib = { path = "../melib", version = "0.8", features = ["debug-tracing" ] }
|
||||
nix = { version = "^0.24", default-features = false }
|
||||
signal-hook = { version = "^0.3", default-features = false, features = ["iterator"] }
|
||||
signal-hook-registry = { version = "1.2.0", default-features = false }
|
||||
|
|
Loading…
Reference in New Issue