1use std::cmp::Ordering;
9use std::fs::{metadata, File};
10use std::io::{self, stdin, BufRead, BufReader, Read, Stdin};
11use uucore::error::{FromIo, UResult, USimpleError};
12use uucore::fs::paths_refer_to_same_file;
13use uucore::line_ending::LineEnding;
14use uucore::{format_usage, help_about, help_usage};
15
16use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
17
18const ABOUT: &str = help_about!("comm.md");
19const USAGE: &str = help_usage!("comm.md");
20
21mod options {
22 pub const COLUMN_1: &str = "1";
23 pub const COLUMN_2: &str = "2";
24 pub const COLUMN_3: &str = "3";
25 pub const DELIMITER: &str = "output-delimiter";
26 pub const DELIMITER_DEFAULT: &str = "\t";
27 pub const FILE_1: &str = "FILE1";
28 pub const FILE_2: &str = "FILE2";
29 pub const TOTAL: &str = "total";
30 pub const ZERO_TERMINATED: &str = "zero-terminated";
31 pub const CHECK_ORDER: &str = "check-order";
32 pub const NO_CHECK_ORDER: &str = "nocheck-order";
33}
34
35#[derive(Debug, Clone, Copy)]
36enum FileNumber {
37 One,
38 Two,
39}
40
41impl FileNumber {
42 fn as_str(&self) -> &'static str {
43 match self {
44 FileNumber:: "1",
45 FileNumber::Two => "2",
46 }
47 }
48}
49
50struct OrderChecker {
51 last_line: Vec<u8>,
52 file_num: FileNumber,
53 check_order: bool,
54 has_error: bool,
55}
56
57enum Input {
58 Stdin(Stdin),
59 FileIn(BufReader<File>),
60}
61
62struct LineReader {
63 line_ending: LineEnding,
64 input: Input,
65}
66
67impl LineReader {
68 fn new(input: Input, line_ending: LineEnding) -> Self {
69 Self { line_ending, input }
70 }
71
72 fn read_line(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
73 let line_ending = self.line_ending.into();
74
75 let result = match &mut self.input {
76 Input::Stdin(r) => r.lock().read_until(line_ending, buf),
77 Input::FileIn(r) => r.read_until(line_ending, buf),
78 };
79
80 if !buf.ends_with(&[line_ending]) {
81 buf.push(line_ending);
82 }
83
84 result
85 }
86}
87
88impl OrderChecker {
89 fn new(file_num: FileNumber, check_order: bool) -> Self {
90 Self {
91 last_line: Vec::new(),
92 file_num,
93 check_order,
94 has_error: false,
95 }
96 }
97
98 fn verify_order(&mut self, current_line: &[u8]) -> bool {
99 if self.last_line.is_empty() {
100 self.last_line = current_line.to_vec();
101 return true;
102 }
103
104 let is_ordered = current_line >= &self.last_line;
105 if !is_ordered && !self.has_error {
106 eprintln!(
107 "comm: file {} is not in sorted order",
108 self.file_num.as_str()
109 );
110 self.has_error = true;
111 }
112
113 self.last_line = current_line.to_vec();
114 is_ordered || !self.check_order
115 }
116}
117
118pub fn are_files_identical(path1: &str, path2: &str) -> io::Result<bool> {
120 let metadata1 = std::fs::metadata(path1)?;
122 let metadata2 = std::fs::metadata(path2)?;
123
124 if metadata1.len() != metadata2.len() {
125 return Ok(false);
126 }
127
128 let file1 = File::open(path1)?;
129 let file2 = File::open(path2)?;
130
131 let mut reader1 = BufReader::new(file1);
132 let mut reader2 = BufReader::new(file2);
133
134 let mut buffer1 = [0; 8192];
135 let mut buffer2 = [0; 8192];
136
137 loop {
138 let bytes1 = reader1.read(&mut buffer1)?;
139 let bytes2 = reader2.read(&mut buffer2)?;
140
141 if bytes1 != bytes2 {
142 return Ok(false);
143 }
144
145 if bytes1 == 0 {
146 return Ok(true);
147 }
148
149 if buffer1[..bytes1] != buffer2[..bytes2] {
150 return Ok(false);
151 }
152 }
153}
154
155fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) -> UResult<()> {
156 let width_col_1 = usize::from(!opts.get_flag(options::COLUMN_1));
157 let width_col_2 = usize::from(!opts.get_flag(options::COLUMN_2));
158
159 let delim_col_2 = delim.repeat(width_col_1);
160 let delim_col_3 = delim.repeat(width_col_1 + width_col_2);
161
162 let ra = &mut Vec::new();
163 let mut na = a.read_line(ra);
164 let rb = &mut Vec::new();
165 let mut nb = b.read_line(rb);
166
167 let mut total_col_1 = 0;
168 let mut total_col_2 = 0;
169 let mut total_col_3 = 0;
170
171 let check_order = opts.get_flag(options::CHECK_ORDER);
172 let no_check_order = opts.get_flag(options::NO_CHECK_ORDER);
173
174 let should_check_order = !no_check_order
176 && (check_order
177 || if let (Some(file1), Some(file2)) = (
178 opts.get_one::<String>(options::FILE_1),
179 opts.get_one::<String>(options::FILE_2),
180 ) {
181 !(paths_refer_to_same_file(file1, file2, true)
182 || are_files_identical(file1, file2).unwrap_or(false))
183 } else {
184 true
185 });
186
187 let mut checker1 = OrderChecker::new(FileNumber::One, check_order);
188 let mut checker2 = OrderChecker::new(FileNumber::Two, check_order);
189 let mut input_error = false;
190
191 while na.is_ok() || nb.is_ok() {
192 let ord = match (na.is_ok(), nb.is_ok()) {
193 (false, true) => Ordering::Greater,
194 (true, false) => Ordering::Less,
195 (true, true) => match (&na, &nb) {
196 (&Ok(0), &Ok(0)) => break,
197 (&Ok(0), _) => Ordering::Greater,
198 (_, &Ok(0)) => Ordering::Less,
199 _ => ra.cmp(&rb),
200 },
201 _ => unreachable!(),
202 };
203
204 match ord {
205 Ordering::Less => {
206 if should_check_order && !checker1.verify_order(ra) {
207 break;
208 }
209 if !opts.get_flag(options::COLUMN_1) {
210 print!("{}", String::from_utf8_lossy(ra));
211 }
212 ra.clear();
213 na = a.read_line(ra);
214 total_col_1 += 1;
215 }
216 Ordering::Greater => {
217 if should_check_order && !checker2.verify_order(rb) {
218 break;
219 }
220 if !opts.get_flag(options::COLUMN_2) {
221 print!("{delim_col_2}{}", String::from_utf8_lossy(rb));
222 }
223 rb.clear();
224 nb = b.read_line(rb);
225 total_col_2 += 1;
226 }
227 Ordering::Equal => {
228 if should_check_order && (!checker1.verify_order(ra) || !checker2.verify_order(rb))
229 {
230 break;
231 }
232 if !opts.get_flag(options::COLUMN_3) {
233 print!("{delim_col_3}{}", String::from_utf8_lossy(ra));
234 }
235 ra.clear();
236 rb.clear();
237 na = a.read_line(ra);
238 nb = b.read_line(rb);
239 total_col_3 += 1;
240 }
241 }
242
243 if (checker1.has_error || checker2.has_error) && !input_error && !check_order {
245 input_error = true;
246 }
247 }
248
249 if opts.get_flag(options::TOTAL) {
250 let line_ending = LineEnding::from_zero_flag(opts.get_flag(options::ZERO_TERMINATED));
251 print!("{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}total{line_ending}");
252 }
253
254 if should_check_order && (checker1.has_error || checker2.has_error) {
255 if input_error {
257 eprintln!("comm: input is not in sorted order");
258 }
259 Err(USimpleError::new(1, ""))
260 } else {
261 Ok(())
262 }
263}
264
265fn open_file(name: &str, line_ending: LineEnding) -> io::Result<LineReader> {
266 if name == "-" {
267 Ok(LineReader::new(Input::Stdin(stdin()), line_ending))
268 } else {
269 if metadata(name)?.is_dir() {
270 return Err(io::Error::new(io::ErrorKind::Other, "Is a directory"));
271 }
272 let f = File::open(name)?;
273 Ok(LineReader::new(
274 Input::FileIn(BufReader::new(f)),
275 line_ending,
276 ))
277 }
278}
279
280#[uucore::main]
281pub fn uumain(args: impl uucore::Args) -> UResult<()> {
282 let matches = uu_app().try_get_matches_from(args)?;
283 let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO_TERMINATED));
284 let filename1 = matches.get_one::<String>(options::FILE_1).unwrap();
285 let filename2 = matches.get_one::<String>(options::FILE_2).unwrap();
286 let mut f1 = open_file(filename1, line_ending).map_err_context(|| filename1.to_string())?;
287 let mut f2 = open_file(filename2, line_ending).map_err_context(|| filename2.to_string())?;
288
289 let all_delimiters = matches
291 .get_many::<String>(options::DELIMITER)
292 .unwrap()
293 .map(String::from)
294 .collect::<Vec<_>>();
295 for delim in &all_delimiters[1..] {
296 if delim != &all_delimiters[0] {
299 return Err(USimpleError::new(
301 1,
302 "multiple conflicting output delimiters specified",
303 ));
304 }
305 }
306 let delim = match &*all_delimiters[0] {
307 "" => "\0",
308 delim => delim,
309 };
310
311 comm(&mut f1, &mut f2, delim, &matches)
312}
313
314pub fn uu_app() -> Command {
315 Command::new(uucore::util_name())
316 .version(crate_version!())
317 .about(ABOUT)
318 .override_usage(format_usage(USAGE))
319 .infer_long_args(true)
320 .args_override_self(true)
321 .arg(
322 Arg::new(options::COLUMN_1)
323 .short('1')
324 .help("suppress column 1 (lines unique to FILE1)")
325 .action(ArgAction::SetTrue),
326 )
327 .arg(
328 Arg::new(options::COLUMN_2)
329 .short('2')
330 .help("suppress column 2 (lines unique to FILE2)")
331 .action(ArgAction::SetTrue),
332 )
333 .arg(
334 Arg::new(options::COLUMN_3)
335 .short('3')
336 .help("suppress column 3 (lines that appear in both files)")
337 .action(ArgAction::SetTrue),
338 )
339 .arg(
340 Arg::new(options::DELIMITER)
341 .long(options::DELIMITER)
342 .help("separate columns with STR")
343 .value_name("STR")
344 .default_value(options::DELIMITER_DEFAULT)
345 .allow_hyphen_values(true)
346 .action(ArgAction::Append)
347 .hide_default_value(true),
348 )
349 .arg(
350 Arg::new(options::ZERO_TERMINATED)
351 .long(options::ZERO_TERMINATED)
352 .short('z')
353 .overrides_with(options::ZERO_TERMINATED)
354 .help("line delimiter is NUL, not newline")
355 .action(ArgAction::SetTrue),
356 )
357 .arg(
358 Arg::new(options::FILE_1)
359 .required(true)
360 .value_hint(clap::ValueHint::FilePath),
361 )
362 .arg(
363 Arg::new(options::FILE_2)
364 .required(true)
365 .value_hint(clap::ValueHint::FilePath),
366 )
367 .arg(
368 Arg::new(options::TOTAL)
369 .long(options::TOTAL)
370 .help("output a summary")
371 .action(ArgAction::SetTrue),
372 )
373 .arg(
374 Arg::new(options::CHECK_ORDER)
375 .long(options::CHECK_ORDER)
376 .help("check that the input is correctly sorted, even if all input lines are pairable")
377 .action(ArgAction::SetTrue),
378 )
379 .arg(
380 Arg::new(options::NO_CHECK_ORDER)
381 .long(options::NO_CHECK_ORDER)
382 .help("do not check that the input is correctly sorted")
383 .action(ArgAction::SetTrue)
384 .conflicts_with(options::CHECK_ORDER),
385 )
386}