File: dedup.rs
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `rustc` command
  27 as follows:
  28 
  29 rustc -C lto=true -C codegen-units=1 -C debuginfo=0 -C strip=symbols \
  30     -C opt-level=3 dedup.rs
  31 */
  32 
  33 use std::collections::HashSet;
  34 use std::env;
  35 use std::ffi::OsString;
  36 use std::fs::File;
  37 use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Error, Read, Write};
  38 use std::io::{Seek, SeekFrom};
  39 use std::process::exit;
  40 
  41 static INFO: &[u8] = b"\
  42 dedup [options...] [file...]
  43 
  44 
  45 DEDUPlicate lines prevents the same line from appearing again in the output,
  46 after the first time. Unique lines are remembered across inputs.
  47 
  48 Input is assumed to be UTF-8, and all trailing CRLF byte-pairs on lines are
  49 turned into line feeds.
  50 
  51 All (optional) leading options start with either single or double-dash:
  52 
  53     -h          show this help message
  54     -help       show this help message
  55 ";
  56 
  57 fn main() {
  58     let mut w = stdout();
  59     let mut skip_args = 1;
  60 
  61     if let Some(s) = env::args_os().nth(1) {
  62         if let Some(s) = s.to_str() {
  63             match s {
  64                 "-h" | "--h" | "-help" | "--help" => {
  65                     _ = (&mut w).write(INFO);
  66                     return;
  67                 }
  68                 "--" => skip_args = 2,
  69                 _ => (),
  70             }
  71         }
  72     }
  73 
  74     let live = match is_live() {
  75         Ok(ok) => ok,
  76         Err(e) => {
  77             eprintln!("error checking /dev/stdout: {}", e);
  78             exit(1)
  79         }
  80     };
  81 
  82     let mut errors = 0;
  83     let mut files: HashSet<OsString> = HashSet::new();
  84     let mut seen: HashSet<String> = HashSet::new();
  85 
  86     for arg in env::args_os().skip(skip_args) {
  87         if files.contains(&arg) {
  88             continue;
  89         }
  90         files.insert(arg.clone());
  91 
  92         if arg == "-" {
  93             match dedup(&mut w, stdin(), &mut seen, live) {
  94                 Ok(()) => continue,
  95                 Err(_) => return,
  96             }
  97         }
  98 
  99         let file = File::open(arg);
 100         match file {
 101             Ok(r) => match dedup(&mut w, r, &mut seen, live) {
 102                 Ok(()) => (),
 103                 Err(_) => return,
 104             },
 105 
 106             Err(e) => {
 107                 errors += 1;
 108                 eprintln!("{}", e);
 109             }
 110         }
 111     }
 112 
 113     if env::args_os().len() < 2 {
 114         _ = dedup(&mut w, stdin(), &mut seen, live);
 115     }
 116 
 117     exit(if errors > 0 { 1 } else { 0 });
 118 }
 119 
 120 fn is_live() -> Result<bool, Error> {
 121     let file = File::open("/dev/stdout");
 122     return match file {
 123         Ok(mut w) => match w.seek(SeekFrom::Current(0)) {
 124             Ok(_) => Ok(false),
 125             Err(_) => Ok(true),
 126         },
 127         Err(e) => Err(e),
 128     }
 129 }
 130 
 131 fn dedup(w: impl Write, r: impl Read, seen: &mut HashSet<String>, live: bool)
 132     -> Result<(), Error> {
 133     let br = BufReader::new(r);
 134     let mut bw = BufWriter::new(w);
 135 
 136     for line in br.lines() {
 137         match line {
 138             Ok(l) => {
 139                 if seen.contains(&l) {
 140                     continue;
 141                 }
 142 
 143                 seen.insert(l.clone());
 144                 _ = bw.write(l.as_bytes());
 145                 _ = bw.write(b"\n");
 146 
 147                 if !live {
 148                     continue
 149                 }
 150 
 151                 match bw.flush() {
 152                     Ok(()) => (),
 153                     Err(e) => return Err(e),
 154                 };
 155             }
 156 
 157             Err(e) => {
 158                 _ = bw.flush();
 159                 return Err(e)
 160             },
 161         }
 162     }
 163 
 164     Ok(())
 165 }