File: dedup.rs
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `rustc` command
  27 as follows:
  28 
  29 rustc -C lto=true -C codegen-units=1 -C debuginfo=0 -C strip=symbols \
  30     -C opt-level=3 dedup.rs
  31 */
  32 
  33 use std::collections::HashSet;
  34 use std::env;
  35 use std::ffi::OsString;
  36 use std::fs::File;
  37 use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Error, Read, Write};
  38 use std::io::{Seek, SeekFrom};
  39 use std::process::exit;
  40 
  41 static INFO: &[u8] = b"\
  42 dedup [options...] [file...]
  43 
  44 
  45 DEDUPlicate lines prevents the same line from appearing again in the output,
  46 after the first time. Unique lines are remembered across inputs.
  47 
  48 Input is assumed to be UTF-8, and all trailing CRLF byte-pairs on lines are
  49 turned into line feeds.
  50 
  51 All (optional) leading options start with either single or double-dash:
  52 
  53     -h          show this help message
  54     -help       show this help message
  55 ";
  56 
  57 fn main() {
  58     let mut w = stdout();
  59     let mut skip_args = 1;
  60     let mut buffered = false;
  61 
  62     if let Some(s) = env::args_os().nth(skip_args) {
  63         if let Some(s) = s.to_str() {
  64             match s {
  65                 "-h" | "--h" | "-help" | "--help" => {
  66                     _ = (&mut w).write(INFO);
  67                     return;
  68                 }
  69                 "-b" | "--b" | "-buffered" | "--buffered" => {
  70                     buffered = true;
  71                     skip_args += 1
  72                 }
  73                 _ => (),
  74             }
  75         }
  76     }
  77 
  78     if let Some(s) = env::args_os().nth(skip_args) {
  79         if let Some("--") = s.to_str() {
  80             skip_args += 1
  81         }
  82     }
  83 
  84     let live = !buffered
  85         && match is_live() {
  86             Ok(ok) => ok,
  87             Err(e) => {
  88                 eprintln!("error checking /dev/stdout: {}", e);
  89                 exit(1)
  90             }
  91         };
  92 
  93     let mut errors = 0;
  94     let mut files: HashSet<OsString> = HashSet::new();
  95     let mut seen: HashSet<String> = HashSet::new();
  96 
  97     for arg in env::args_os().skip(skip_args) {
  98         if files.contains(&arg) {
  99             continue;
 100         }
 101         files.insert(arg.clone());
 102 
 103         if arg == "-" {
 104             match dedup(&mut w, stdin(), &mut seen, live) {
 105                 Ok(()) => continue,
 106                 Err(_) => return,
 107             }
 108         }
 109 
 110         let file = File::open(arg);
 111         match file {
 112             Ok(r) => match dedup(&mut w, r, &mut seen, live) {
 113                 Ok(()) => (),
 114                 Err(_) => return,
 115             },
 116 
 117             Err(e) => {
 118                 errors += 1;
 119                 eprintln!("{}", e)
 120             }
 121         }
 122     }
 123 
 124     if files.len() == 0 {
 125         _ = dedup(&mut w, stdin(), &mut seen, live);
 126     }
 127 
 128     exit(if errors > 0 { 1 } else { 0 });
 129 }
 130 
 131 fn is_live() -> Result<bool, Error> {
 132     let file = File::open("/dev/stdout");
 133     return match file {
 134         Ok(mut w) => match w.seek(SeekFrom::Current(0)) {
 135             Ok(_) => Ok(false),
 136             Err(_) => Ok(true),
 137         },
 138         Err(e) => Err(e),
 139     };
 140 }
 141 
 142 fn dedup(w: impl Write, r: impl Read, seen: &mut HashSet<String>, live: bool) -> Result<(), Error> {
 143     let br = BufReader::new(r);
 144     let mut bw = BufWriter::new(w);
 145 
 146     for line in br.lines() {
 147         match line {
 148             Ok(l) => {
 149                 if seen.contains(&l) {
 150                     continue;
 151                 }
 152 
 153                 seen.insert(l.clone());
 154                 _ = bw.write(l.as_bytes());
 155                 _ = bw.write(b"\n");
 156 
 157                 if !live {
 158                     continue;
 159                 }
 160 
 161                 match bw.flush() {
 162                     Ok(()) => (),
 163                     Err(e) => return Err(e),
 164                 };
 165             }
 166 
 167             Err(e) => return Err(e),
 168         }
 169     }
 170 
 171     Ok(())
 172 }