use std::io::{BufWriter, Stdout, Write};
use std::{cmp, mem};
use crate::FmtOptions;
use crate::parasplit::{ParaWords, Paragraph, WordInfo};
struct BreakArgs<'a> {
opts: &'a FmtOptions,
init_len: usize,
indent_str: &'a str,
indent_len: usize,
uniform: bool,
ostream: &'a mut BufWriter<Stdout>,
}
impl BreakArgs<'_> {
fn compute_width(&self, winfo: &WordInfo, posn: usize, fresh: bool) -> usize {
if fresh {
0
} else {
let post = winfo.after_tab;
match winfo.before_tab {
None => post,
Some(pre) => {
post + ((pre + posn) / self.opts.tabwidth + 1) * self.opts.tabwidth - posn
}
}
}
}
}
pub fn break_lines(
para: &Paragraph,
opts: &FmtOptions,
ostream: &mut BufWriter<Stdout>,
) -> std::io::Result<()> {
let p_indent = ¶.indent_str;
let p_indent_len = para.indent_len;
let p_words = ParaWords::new(opts, para);
let mut p_words_words = p_words.words();
let Some(winfo) = p_words_words.next() else {
return ostream.write_all(b"\n");
};
let p_init_len = winfo.word_nchars
+ if opts.crown || opts.tagged {
ostream.write_all(para.init_str.as_bytes())?;
para.init_len
} else if !para.mail_header {
ostream.write_all(p_indent.as_bytes())?;
p_indent_len
} else {
0
};
ostream.write_all(winfo.word.as_bytes())?;
let uniform = para.mail_header || opts.uniform;
let mut break_args = BreakArgs {
opts,
init_len: p_init_len,
indent_str: p_indent,
indent_len: p_indent_len,
uniform,
ostream,
};
if opts.quick || para.mail_header {
break_simple(p_words_words, &mut break_args)
} else {
break_knuth_plass(p_words_words, &mut break_args)
}
}
fn break_simple<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
mut iter: T,
args: &mut BreakArgs<'a>,
) -> std::io::Result<()> {
iter.try_fold((args.init_len, false), |(l, prev_punct), winfo| {
accum_words_simple(args, l, prev_punct, winfo)
})?;
args.ostream.write_all(b"\n")
}
fn accum_words_simple<'a>(
args: &mut BreakArgs<'a>,
l: usize,
prev_punct: bool,
winfo: &'a WordInfo<'a>,
) -> std::io::Result<(usize, bool)> {
let wlen = winfo.word_nchars + args.compute_width(winfo, l, false);
let slen = compute_slen(
args.uniform,
winfo.new_line,
winfo.sentence_start,
prev_punct,
);
if l + wlen + slen > args.opts.width {
write_newline(args.indent_str, args.ostream)?;
write_with_spaces(&winfo.word[winfo.word_start..], 0, args.ostream)?;
Ok((args.indent_len + winfo.word_nchars, winfo.ends_punct))
} else {
write_with_spaces(winfo.word, slen, args.ostream)?;
Ok((l + wlen + slen, winfo.ends_punct))
}
}
fn break_knuth_plass<'a, T: Clone + Iterator<Item = &'a WordInfo<'a>>>(
mut iter: T,
args: &mut BreakArgs<'a>,
) -> std::io::Result<()> {
let breakpoints = find_kp_breakpoints(iter.clone(), args);
let result: std::io::Result<(bool, bool)> = breakpoints.iter().rev().try_fold(
(false, false),
|(mut prev_punct, mut fresh), &(next_break, break_before)| {
if fresh {
write_newline(args.indent_str, args.ostream)?;
}
for winfo in &mut iter {
let (slen, word) = slice_if_fresh(
fresh,
winfo.word,
winfo.word_start,
args.uniform,
winfo.new_line,
winfo.sentence_start,
prev_punct,
);
fresh = false;
prev_punct = winfo.ends_punct;
if std::ptr::eq(winfo, next_break) {
if break_before {
write_newline(args.indent_str, args.ostream)?;
write_with_spaces(&winfo.word[winfo.word_start..], 0, args.ostream)?;
} else {
write_with_spaces(word, slen, args.ostream)?;
fresh = true;
}
break;
}
write_with_spaces(word, slen, args.ostream)?;
}
Ok((prev_punct, fresh))
},
);
let (mut prev_punct, mut fresh) = result?;
for winfo in iter {
if fresh {
write_newline(args.indent_str, args.ostream)?;
}
let (slen, word) = slice_if_fresh(
fresh,
winfo.word,
winfo.word_start,
args.uniform,
winfo.new_line,
winfo.sentence_start,
prev_punct,
);
prev_punct = winfo.ends_punct;
fresh = false;
write_with_spaces(word, slen, args.ostream)?;
}
args.ostream.write_all(b"\n")
}
struct LineBreak<'a> {
prev: usize,
linebreak: Option<&'a WordInfo<'a>>,
break_before: bool,
demerits: i64,
prev_rat: f32,
length: usize,
fresh: bool,
}
#[allow(clippy::cognitive_complexity)]
fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
iter: T,
args: &BreakArgs<'a>,
) -> Vec<(&'a WordInfo<'a>, bool)> {
let mut iter = iter.peekable();
let mut linebreaks = vec![LineBreak {
prev: 0,
linebreak: None,
break_before: false,
demerits: 0,
prev_rat: 0.0,
length: args.init_len,
fresh: false,
}];
let mut active_breaks = vec![0];
let mut next_active_breaks = vec![];
let stretch = args.opts.width - args.opts.goal;
let minlength = if args.opts.goal <= 10 {
1
} else {
args.opts.goal.max(stretch + 1) - stretch
};
let mut new_linebreaks = vec![];
let mut is_sentence_start = false;
let mut least_demerits = 0;
while let Some(w) = iter.next() {
let (is_last_word, is_sentence_end) = match iter.peek() {
None => (true, true),
Some(&&WordInfo {
sentence_start: st,
new_line: nl,
..
}) => (false, st || (nl && w.ends_punct)),
};
let slen = compute_slen(args.uniform, w.new_line, is_sentence_start, false);
let mut ld_new = i64::MAX;
let mut ld_next = i64::MAX;
let mut ld_idx = 0;
new_linebreaks.clear();
next_active_breaks.clear();
#[allow(clippy::explicit_iter_loop)]
for &i in active_breaks.iter() {
let active = &mut linebreaks[i];
active.demerits -= least_demerits;
if active.demerits < ld_next {
ld_next = active.demerits;
ld_idx = i;
}
let tlen = w.word_nchars
+ args.compute_width(w, active.length, active.fresh)
+ slen
+ active.length;
if tlen <= args.opts.width {
next_active_breaks.push(i);
active.fresh = false;
active.length = tlen;
if tlen >= minlength {
let (new_demerits, new_ratio) = if is_last_word {
(0, 0.0)
} else {
compute_demerits(
args.opts.goal as isize - tlen as isize,
stretch,
w.word_nchars,
active.prev_rat,
)
};
let total_demerits = new_demerits + active.demerits;
if new_demerits < BAD_INFTY_SQ
&& total_demerits < ld_new
&& active.demerits.signum() <= new_demerits.signum()
{
ld_new = total_demerits;
new_linebreaks.push(LineBreak {
prev: i,
linebreak: Some(w),
break_before: false,
demerits: total_demerits,
prev_rat: new_ratio,
length: args.indent_len,
fresh: true,
});
}
}
}
}
match new_linebreaks.pop() {
None => (),
Some(lb) => {
next_active_breaks.push(linebreaks.len());
linebreaks.push(lb);
}
}
if next_active_breaks.is_empty() {
let new_break =
restart_active_breaks(args, &linebreaks[ld_idx], ld_idx, w, slen, minlength);
next_active_breaks.push(linebreaks.len());
linebreaks.push(new_break);
least_demerits = 0;
} else {
least_demerits = cmp::max(ld_next, 0);
}
mem::swap(&mut active_breaks, &mut next_active_breaks);
is_sentence_start = is_sentence_end;
}
build_best_path(&linebreaks, &active_breaks)
}
fn build_best_path<'a>(paths: &[LineBreak<'a>], active: &[usize]) -> Vec<(&'a WordInfo<'a>, bool)> {
active
.iter()
.min_by_key(|&&a| paths[a].demerits)
.map(|&(mut best_idx)| {
let mut breakwords = vec![];
loop {
let next_best = &paths[best_idx];
match next_best.linebreak {
None => return breakwords,
Some(prev) => {
breakwords.push((prev, next_best.break_before));
best_idx = next_best.prev;
}
}
}
})
.unwrap_or_default()
}
const BAD_INFTY: i64 = 10_000_000;
const BAD_INFTY_SQ: i64 = BAD_INFTY * BAD_INFTY;
const BAD_MULT: f32 = 200.0;
const DR_MULT: f32 = 600.0;
const DL_MULT: f32 = 10.0;
fn compute_demerits(delta_len: isize, stretch: usize, wlen: usize, prev_rat: f32) -> (i64, f32) {
let ratio = if delta_len == 0 {
0.0f32
} else {
delta_len as f32 / stretch as f32
};
let bad_linelen = if ratio.abs() > 1.0f32 {
BAD_INFTY
} else {
(BAD_MULT * ratio.powi(3).abs()) as i64
};
let bad_wordlen = if wlen >= stretch {
0
} else {
(DL_MULT
* ((stretch - wlen) as f32 / (stretch - 1) as f32)
.powi(3)
.abs()) as i64
};
let bad_delta_r = (DR_MULT * ((ratio - prev_rat) / 2.0).powi(3).abs()) as i64;
let demerits = i64::pow(1 + bad_linelen + bad_wordlen + bad_delta_r, 2);
(demerits, ratio)
}
fn restart_active_breaks<'a>(
args: &BreakArgs<'a>,
active: &LineBreak<'a>,
act_idx: usize,
w: &'a WordInfo<'a>,
slen: usize,
min: usize,
) -> LineBreak<'a> {
let (break_before, line_length) = if active.fresh {
(false, args.indent_len)
} else {
let wlen = w.word_nchars + args.compute_width(w, active.length, active.fresh);
let underlen = min as isize - active.length as isize;
let overlen = (wlen + slen + active.length) as isize - args.opts.width as isize;
if overlen > underlen {
(true, args.indent_len + w.word_nchars)
} else {
(false, args.indent_len)
}
};
LineBreak {
prev: act_idx,
linebreak: Some(w),
break_before,
demerits: 0, prev_rat: if break_before { 1.0 } else { -1.0 },
length: line_length,
fresh: !break_before,
}
}
fn compute_slen(uniform: bool, newline: bool, start: bool, punct: bool) -> usize {
if uniform || newline {
if start || (newline && punct) { 2 } else { 1 }
} else {
0
}
}
fn slice_if_fresh(
fresh: bool,
word: &str,
start: usize,
uniform: bool,
newline: bool,
sstart: bool,
punct: bool,
) -> (usize, &str) {
if fresh {
(0, &word[start..])
} else {
(compute_slen(uniform, newline, sstart, punct), word)
}
}
fn write_newline(indent: &str, ostream: &mut BufWriter<Stdout>) -> std::io::Result<()> {
ostream.write_all(b"\n")?;
ostream.write_all(indent.as_bytes())
}
fn write_with_spaces(
word: &str,
slen: usize,
ostream: &mut BufWriter<Stdout>,
) -> std::io::Result<()> {
if slen == 2 {
ostream.write_all(b" ")?;
} else if slen == 1 {
ostream.write_all(b" ")?;
}
ostream.write_all(word.as_bytes())
}