[go: up one dir, main page]

uu_comm/
comm.rs

1// This file is part of the uutils coreutils package.
2//
3// For the full copyright and license information, please view the LICENSE
4// file that was distributed with this source code.
5
6// spell-checker:ignore (ToDO) delim mkdelim pairable
7
8use std::cmp::Ordering;
9use std::fs::{metadata, File};
10use std::io::{self, stdin, BufRead, BufReader, Read, Stdin};
11use uucore::error::{FromIo, UResult, USimpleError};
12use uucore::fs::paths_refer_to_same_file;
13use uucore::line_ending::LineEnding;
14use uucore::{format_usage, help_about, help_usage};
15
16use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
17
18const ABOUT: &str = help_about!("comm.md");
19const USAGE: &str = help_usage!("comm.md");
20
21mod options {
22    pub const COLUMN_1: &str = "1";
23    pub const COLUMN_2: &str = "2";
24    pub const COLUMN_3: &str = "3";
25    pub const DELIMITER: &str = "output-delimiter";
26    pub const DELIMITER_DEFAULT: &str = "\t";
27    pub const FILE_1: &str = "FILE1";
28    pub const FILE_2: &str = "FILE2";
29    pub const TOTAL: &str = "total";
30    pub const ZERO_TERMINATED: &str = "zero-terminated";
31    pub const CHECK_ORDER: &str = "check-order";
32    pub const NO_CHECK_ORDER: &str = "nocheck-order";
33}
34
35#[derive(Debug, Clone, Copy)]
36enum FileNumber {
37    One,
38    Two,
39}
40
41impl FileNumber {
42    fn as_str(&self) -> &'static str {
43        match self {
44            FileNumber:: "1",
45            FileNumber::Two => "2",
46        }
47    }
48}
49
50struct OrderChecker {
51    last_line: Vec<u8>,
52    file_num: FileNumber,
53    check_order: bool,
54    has_error: bool,
55}
56
57enum Input {
58    Stdin(Stdin),
59    FileIn(BufReader<File>),
60}
61
62struct LineReader {
63    line_ending: LineEnding,
64    input: Input,
65}
66
67impl LineReader {
68    fn new(input: Input, line_ending: LineEnding) -> Self {
69        Self { line_ending, input }
70    }
71
72    fn read_line(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
73        let line_ending = self.line_ending.into();
74
75        let result = match &mut self.input {
76            Input::Stdin(r) => r.lock().read_until(line_ending, buf),
77            Input::FileIn(r) => r.read_until(line_ending, buf),
78        };
79
80        if !buf.ends_with(&[line_ending]) {
81            buf.push(line_ending);
82        }
83
84        result
85    }
86}
87
88impl OrderChecker {
89    fn new(file_num: FileNumber, check_order: bool) -> Self {
90        Self {
91            last_line: Vec::new(),
92            file_num,
93            check_order,
94            has_error: false,
95        }
96    }
97
98    fn verify_order(&mut self, current_line: &[u8]) -> bool {
99        if self.last_line.is_empty() {
100            self.last_line = current_line.to_vec();
101            return true;
102        }
103
104        let is_ordered = current_line >= &self.last_line;
105        if !is_ordered && !self.has_error {
106            eprintln!(
107                "comm: file {} is not in sorted order",
108                self.file_num.as_str()
109            );
110            self.has_error = true;
111        }
112
113        self.last_line = current_line.to_vec();
114        is_ordered || !self.check_order
115    }
116}
117
118// Check if two files are identical by comparing their contents
119pub fn are_files_identical(path1: &str, path2: &str) -> io::Result<bool> {
120    // First compare file sizes
121    let metadata1 = std::fs::metadata(path1)?;
122    let metadata2 = std::fs::metadata(path2)?;
123
124    if metadata1.len() != metadata2.len() {
125        return Ok(false);
126    }
127
128    let file1 = File::open(path1)?;
129    let file2 = File::open(path2)?;
130
131    let mut reader1 = BufReader::new(file1);
132    let mut reader2 = BufReader::new(file2);
133
134    let mut buffer1 = [0; 8192];
135    let mut buffer2 = [0; 8192];
136
137    loop {
138        let bytes1 = reader1.read(&mut buffer1)?;
139        let bytes2 = reader2.read(&mut buffer2)?;
140
141        if bytes1 != bytes2 {
142            return Ok(false);
143        }
144
145        if bytes1 == 0 {
146            return Ok(true);
147        }
148
149        if buffer1[..bytes1] != buffer2[..bytes2] {
150            return Ok(false);
151        }
152    }
153}
154
155fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) -> UResult<()> {
156    let width_col_1 = usize::from(!opts.get_flag(options::COLUMN_1));
157    let width_col_2 = usize::from(!opts.get_flag(options::COLUMN_2));
158
159    let delim_col_2 = delim.repeat(width_col_1);
160    let delim_col_3 = delim.repeat(width_col_1 + width_col_2);
161
162    let ra = &mut Vec::new();
163    let mut na = a.read_line(ra);
164    let rb = &mut Vec::new();
165    let mut nb = b.read_line(rb);
166
167    let mut total_col_1 = 0;
168    let mut total_col_2 = 0;
169    let mut total_col_3 = 0;
170
171    let check_order = opts.get_flag(options::CHECK_ORDER);
172    let no_check_order = opts.get_flag(options::NO_CHECK_ORDER);
173
174    // Determine if we should perform order checking
175    let should_check_order = !no_check_order
176        && (check_order
177            || if let (Some(file1), Some(file2)) = (
178                opts.get_one::<String>(options::FILE_1),
179                opts.get_one::<String>(options::FILE_2),
180            ) {
181                !(paths_refer_to_same_file(file1, file2, true)
182                    || are_files_identical(file1, file2).unwrap_or(false))
183            } else {
184                true
185            });
186
187    let mut checker1 = OrderChecker::new(FileNumber::One, check_order);
188    let mut checker2 = OrderChecker::new(FileNumber::Two, check_order);
189    let mut input_error = false;
190
191    while na.is_ok() || nb.is_ok() {
192        let ord = match (na.is_ok(), nb.is_ok()) {
193            (false, true) => Ordering::Greater,
194            (true, false) => Ordering::Less,
195            (true, true) => match (&na, &nb) {
196                (&Ok(0), &Ok(0)) => break,
197                (&Ok(0), _) => Ordering::Greater,
198                (_, &Ok(0)) => Ordering::Less,
199                _ => ra.cmp(&rb),
200            },
201            _ => unreachable!(),
202        };
203
204        match ord {
205            Ordering::Less => {
206                if should_check_order && !checker1.verify_order(ra) {
207                    break;
208                }
209                if !opts.get_flag(options::COLUMN_1) {
210                    print!("{}", String::from_utf8_lossy(ra));
211                }
212                ra.clear();
213                na = a.read_line(ra);
214                total_col_1 += 1;
215            }
216            Ordering::Greater => {
217                if should_check_order && !checker2.verify_order(rb) {
218                    break;
219                }
220                if !opts.get_flag(options::COLUMN_2) {
221                    print!("{delim_col_2}{}", String::from_utf8_lossy(rb));
222                }
223                rb.clear();
224                nb = b.read_line(rb);
225                total_col_2 += 1;
226            }
227            Ordering::Equal => {
228                if should_check_order && (!checker1.verify_order(ra) || !checker2.verify_order(rb))
229                {
230                    break;
231                }
232                if !opts.get_flag(options::COLUMN_3) {
233                    print!("{delim_col_3}{}", String::from_utf8_lossy(ra));
234                }
235                ra.clear();
236                rb.clear();
237                na = a.read_line(ra);
238                nb = b.read_line(rb);
239                total_col_3 += 1;
240            }
241        }
242
243        // Track if we've seen any order errors
244        if (checker1.has_error || checker2.has_error) && !input_error && !check_order {
245            input_error = true;
246        }
247    }
248
249    if opts.get_flag(options::TOTAL) {
250        let line_ending = LineEnding::from_zero_flag(opts.get_flag(options::ZERO_TERMINATED));
251        print!("{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}total{line_ending}");
252    }
253
254    if should_check_order && (checker1.has_error || checker2.has_error) {
255        // Print the input error message once at the end
256        if input_error {
257            eprintln!("comm: input is not in sorted order");
258        }
259        Err(USimpleError::new(1, ""))
260    } else {
261        Ok(())
262    }
263}
264
265fn open_file(name: &str, line_ending: LineEnding) -> io::Result<LineReader> {
266    if name == "-" {
267        Ok(LineReader::new(Input::Stdin(stdin()), line_ending))
268    } else {
269        if metadata(name)?.is_dir() {
270            return Err(io::Error::new(io::ErrorKind::Other, "Is a directory"));
271        }
272        let f = File::open(name)?;
273        Ok(LineReader::new(
274            Input::FileIn(BufReader::new(f)),
275            line_ending,
276        ))
277    }
278}
279
280#[uucore::main]
281pub fn uumain(args: impl uucore::Args) -> UResult<()> {
282    let matches = uu_app().try_get_matches_from(args)?;
283    let line_ending = LineEnding::from_zero_flag(matches.get_flag(options::ZERO_TERMINATED));
284    let filename1 = matches.get_one::<String>(options::FILE_1).unwrap();
285    let filename2 = matches.get_one::<String>(options::FILE_2).unwrap();
286    let mut f1 = open_file(filename1, line_ending).map_err_context(|| filename1.to_string())?;
287    let mut f2 = open_file(filename2, line_ending).map_err_context(|| filename2.to_string())?;
288
289    // Due to default_value(), there must be at least one value here, thus unwrap() must not panic.
290    let all_delimiters = matches
291        .get_many::<String>(options::DELIMITER)
292        .unwrap()
293        .map(String::from)
294        .collect::<Vec<_>>();
295    for delim in &all_delimiters[1..] {
296        // Note that this check is very different from ".conflicts_with_self(true).action(ArgAction::Set)",
297        // as this accepts duplicate *identical* arguments.
298        if delim != &all_delimiters[0] {
299            // Note: This intentionally deviate from the GNU error message by inserting the word "conflicting".
300            return Err(USimpleError::new(
301                1,
302                "multiple conflicting output delimiters specified",
303            ));
304        }
305    }
306    let delim = match &*all_delimiters[0] {
307        "" => "\0",
308        delim => delim,
309    };
310
311    comm(&mut f1, &mut f2, delim, &matches)
312}
313
314pub fn uu_app() -> Command {
315    Command::new(uucore::util_name())
316        .version(crate_version!())
317        .about(ABOUT)
318        .override_usage(format_usage(USAGE))
319        .infer_long_args(true)
320        .args_override_self(true)
321        .arg(
322            Arg::new(options::COLUMN_1)
323                .short('1')
324                .help("suppress column 1 (lines unique to FILE1)")
325                .action(ArgAction::SetTrue),
326        )
327        .arg(
328            Arg::new(options::COLUMN_2)
329                .short('2')
330                .help("suppress column 2 (lines unique to FILE2)")
331                .action(ArgAction::SetTrue),
332        )
333        .arg(
334            Arg::new(options::COLUMN_3)
335                .short('3')
336                .help("suppress column 3 (lines that appear in both files)")
337                .action(ArgAction::SetTrue),
338        )
339        .arg(
340            Arg::new(options::DELIMITER)
341                .long(options::DELIMITER)
342                .help("separate columns with STR")
343                .value_name("STR")
344                .default_value(options::DELIMITER_DEFAULT)
345                .allow_hyphen_values(true)
346                .action(ArgAction::Append)
347                .hide_default_value(true),
348        )
349        .arg(
350            Arg::new(options::ZERO_TERMINATED)
351                .long(options::ZERO_TERMINATED)
352                .short('z')
353                .overrides_with(options::ZERO_TERMINATED)
354                .help("line delimiter is NUL, not newline")
355                .action(ArgAction::SetTrue),
356        )
357        .arg(
358            Arg::new(options::FILE_1)
359                .required(true)
360                .value_hint(clap::ValueHint::FilePath),
361        )
362        .arg(
363            Arg::new(options::FILE_2)
364                .required(true)
365                .value_hint(clap::ValueHint::FilePath),
366        )
367        .arg(
368            Arg::new(options::TOTAL)
369                .long(options::TOTAL)
370                .help("output a summary")
371                .action(ArgAction::SetTrue),
372        )
373        .arg(
374            Arg::new(options::CHECK_ORDER)
375                .long(options::CHECK_ORDER)
376                .help("check that the input is correctly sorted, even if all input lines are pairable")
377                .action(ArgAction::SetTrue),
378        )
379        .arg(
380            Arg::new(options::NO_CHECK_ORDER)
381                .long(options::NO_CHECK_ORDER)
382                .help("do not check that the input is correctly sorted")
383                .action(ArgAction::SetTrue)
384                .conflicts_with(options::CHECK_ORDER),
385        )
386}