use std::cmp::Ordering;
use std::fs::{File, metadata};
use std::io::{self, BufRead, BufReader, Read, Stdin, stdin};
use uucore::error::{FromIo, UResult, USimpleError};
use uucore::fs::paths_refer_to_same_file;
use uucore::line_ending::LineEnding;
use uucore::{format_usage, help_about, help_usage};
use clap::{Arg, ArgAction, ArgMatches, Command};
const ABOUT: &str = help_about!("comm.md");
const USAGE: &str = help_usage!("comm.md");
mod options {
pub const COLUMN_1: &str = "1";
pub const COLUMN_2: &str = "2";
pub const COLUMN_3: &str = "3";
pub const DELIMITER: &str = "output-delimiter";
pub const DELIMITER_DEFAULT: &str = "\t";
pub const FILE_1: &str = "FILE1";
pub const FILE_2: &str = "FILE2";
pub const TOTAL: &str = "total";
pub const ZERO_TERMINATED: &str = "zero-terminated";
pub const CHECK_ORDER: &str = "check-order";
pub const NO_CHECK_ORDER: &str = "nocheck-order";
}
#[derive(Debug, Clone, Copy)]
enum FileNumber {
One,
Two,
}
impl FileNumber {
fn as_str(&self) -> &'static str {
match self {
FileNumber::One => "1",
FileNumber::Two => "2",
}
}
}
struct OrderChecker {
last_line: Vec<u8>,
file_num: FileNumber,
check_order: bool,
has_error: bool,
}
enum Input {
Stdin(Stdin),
FileIn(BufReader<File>),
}
struct LineReader {
line_ending: LineEnding,
input: Input,
}
impl LineReader {
fn new(input: Input, line_ending: LineEnding) -> Self {
Self { line_ending, input }
}
fn read_line(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
let line_ending = self.line_ending.into();
let result = match &mut self.input {
Input::Stdin(r) => r.lock().read_until(line_ending, buf),
Input::FileIn(r) => r.read_until(line_ending, buf),
};
if !buf.ends_with(&[line_ending]) {
buf.push(line_ending);
}
result
}
}
impl OrderChecker {
fn new(file_num: FileNumber, check_order: bool) -> Self {
Self {
last_line: Vec::new(),
file_num,
check_order,
has_error: false,
}
}
fn verify_order(&mut self, current_line: &[u8]) -> bool {
if self.last_line.is_empty() {
self.last_line = current_line.to_vec();
return true;
}
let is_ordered = current_line >= &self.last_line;
if !is_ordered && !self.has_error {
eprintln!(
"comm: file {} is not in sorted order",
self.file_num.as_str()
);
self.has_error = true;
}
self.last_line = current_line.to_vec();
is_ordered || !self.check_order
}
}
pub fn are_files_identical(path1: &str, path2: &str) -> io::Result<bool> {
let metadata1 = metadata(path1)?;
let metadata2 = metadata(path2)?;
if metadata1.len() != metadata2.len() {
return Ok(false);
}
let file1 = File::open(path1)?;
let file2 = File::open(path2)?;
let mut reader1 = BufReader::new(file1);
let mut reader2 = BufReader::new(file2);
let mut buffer1 = [0; 8192];
let mut buffer2 = [0; 8192];
loop {
let bytes1 = reader1.read(&mut buffer1)?;
let bytes2 = reader2.read(&mut buffer2)?;
if bytes1 != bytes2 {
return Ok(false);
}
if bytes1 == 0 {
return Ok(true);
}
if buffer1[..bytes1] != buffer2[..bytes2] {
return Ok(false);
}
}
}
fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) -> UResult<()> {
let width_col_1 = usize::from(!opts.get_flag(options::COLUMN_1));
let width_col_2 = usize::from(!opts.get_flag(options::COLUMN_2));
let delim_col_2 = delim.repeat(width_col_1);
let delim_col_3 = delim.repeat(width_col_1 + width_col_2);
let ra = &mut Vec::new();
let mut na = a.read_line(ra);
let rb = &mut Vec::new();
let mut nb = b.read_line(rb);
let mut total_col_1 = 0;
let mut total_col_2 = 0;
let mut total_col_3 = 0;
let check_order = opts.get_flag(options::CHECK_ORDER);
let no_check_order = opts.get_flag(options::NO_CHECK_ORDER);
let should_check_order = !no_check_order
&& (check_order
|| if let (Some(file1), Some(file2)) = (
opts.get_one::<String>(options::FILE_1),
opts.get_one::<String>(options::FILE_2),
) {
!(paths_refer_to_same_file(file1, file2, true)
|| are_files_identical(file1, file2).unwrap_or(false))
} else {
true
});
let mut checker1 = OrderChecker::new(FileNumber::One, check_order);
let mut checker2 = OrderChecker::new(FileNumber::Two, check_order);
let mut input_error = false;
while na.is_ok() || nb.is_ok() {
let ord = match (na.is_ok(), nb.is_ok()) {
(false, true) => Ordering::Greater,
(true, false) => Ordering::Less,
(true, true) => match (&na, &nb) {
(&Ok(0), &Ok(0)) => break,
(&Ok(0), _) => Ordering::Greater,
(_, &Ok(0)) => Ordering::Less,
_ => ra.cmp(&rb),
},
_ => unreachable!(),
};
match ord {
Ordering::Less => {
if should_check_order && !checker1.verify_order(ra) {
break;
}
if !opts.get_flag(options::COLUMN_1) {
print!("{}", String::from_utf8_lossy(ra));
}
ra.clear();
na = a.read_line(ra);
total_col_1 += 1;
}
Ordering::Greater => {
if should_check_order && !checker2.verify_order(rb) {
break;
}
if !opts.get_flag(options::COLUMN_2) {
print!("{delim_col_2}{}", String::from_utf8_lossy(rb));
}
rb.clear();
nb = b.read_line(rb);
total_col_2 += 1;
}
Ordering::Equal => {
if should_check_order && (!checker1.verify_order(ra) || !checker2.verify_order(rb))
{
break;
}
if !opts.get_flag(options::COLUMN_3) {
print!("{delim_col_3}{}", String::from_utf8_lossy(ra));
}
ra.clear();
rb.clear();
na = a.read_line(ra);
nb = b.read_line(rb);
total_col_3 += 1;
}
}
if (checker1.has_error || checker2.has_error) && !input_error && !check_order {
input_error = true;
}
}
if opts.get_flag(options::TOTAL) {
let line_ending = LineEnding::from_zero_flag(opts.get_flag(options::ZERO_TERMINATED));
print!("{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}total{line_ending}");
}
if should_check_order && (checker1.has_error || checker2.has_error) {
if input_error {
eprintln!("comm: input is not in sorted order");
}
Err(USimpleError::new(1, ""))
} else {
Ok(())
}
}
fn open_file(name: &str, line_ending: LineEnding) -> io::Result<LineReader> {
if name == "-" {
Ok(LineReader::new(Input::Stdin(stdin()), line_ending))
} else {
if metadata(name)?.is_dir() {
return Err(io::Error::other("Is a directory"));
}
let f = File::open(name)?;
Ok(LineReader::new(
Input::FileIn(BufReader::new(f)),
line_ending,
))
}
}
#[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let matches = uu_app().try_get_matches_from(args)?;
let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO_TERMINATED));
let filename1 = matches.get_one::<String>(options::FILE_1).unwrap();
let filename2 = matches.get_one::<String>(options::FILE_2).unwrap();
let mut f1 = open_file(filename1, line_ending).map_err_context(|| filename1.to_string())?;
let mut f2 = open_file(filename2, line_ending).map_err_context(|| filename2.to_string())?;
let all_delimiters = matches
.get_many::<String>(options::DELIMITER)
.unwrap()
.map(String::from)
.collect::<Vec<_>>();
for delim in &all_delimiters[1..] {
if delim != &all_delimiters[0] {
return Err(USimpleError::new(
1,
"multiple conflicting output delimiters specified",
));
}
}
let delim = match &*all_delimiters[0] {
"" => "\0",
delim => delim,
};
comm(&mut f1, &mut f2, delim, &matches)
}
pub fn uu_app() -> Command {
Command::new(uucore::util_name())
.version(uucore::crate_version!())
.about(ABOUT)
.override_usage(format_usage(USAGE))
.infer_long_args(true)
.args_override_self(true)
.arg(
Arg::new(options::COLUMN_1)
.short('1')
.help("suppress column 1 (lines unique to FILE1)")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::COLUMN_2)
.short('2')
.help("suppress column 2 (lines unique to FILE2)")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::COLUMN_3)
.short('3')
.help("suppress column 3 (lines that appear in both files)")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::DELIMITER)
.long(options::DELIMITER)
.help("separate columns with STR")
.value_name("STR")
.default_value(options::DELIMITER_DEFAULT)
.allow_hyphen_values(true)
.action(ArgAction::Append)
.hide_default_value(true),
)
.arg(
Arg::new(options::ZERO_TERMINATED)
.long(options::ZERO_TERMINATED)
.short('z')
.overrides_with(options::ZERO_TERMINATED)
.help("line delimiter is NUL, not newline")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::FILE_1)
.required(true)
.value_hint(clap::ValueHint::FilePath),
)
.arg(
Arg::new(options::FILE_2)
.required(true)
.value_hint(clap::ValueHint::FilePath),
)
.arg(
Arg::new(options::TOTAL)
.long(options::TOTAL)
.help("output a summary")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::CHECK_ORDER)
.long(options::CHECK_ORDER)
.help("check that the input is correctly sorted, even if all input lines are pairable")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::NO_CHECK_ORDER)
.long(options::NO_CHECK_ORDER)
.help("do not check that the input is correctly sorted")
.action(ArgAction::SetTrue)
.conflicts_with(options::CHECK_ORDER),
)
}