File: dedup.rs
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 Single-file source-code for dedup.
  27 
  28 To compile a smaller-sized command-line app, you can use the `rustc` command
  29 as follows:
  30 
  31 rustc -C lto=true -C codegen-units=1 -C debuginfo=0 -C strip=symbols -C opt-level=3 dedup.rs
  32 */
  33 
  34 use std::collections::HashSet;
  35 use std::env;
  36 use std::ffi::OsString;
  37 use std::fs::File;
  38 use std::io::{stdin, stdout, BufRead, BufReader, Error, Read, Write};
  39 use std::io::{Seek, SeekFrom, StdoutLock};
  40 use std::process::exit;
  41 
  42 static INFO: &[u8] = b"dedup [files...]
  43 
  44 DEDUPlicate lines, emitting each distinct line once, when it first appears.
  45 Unlike `uniq`, this app doesn't require pre-sorted lines to work correctly,
  46 and keeps lines in their original order.
  47 ";
  48 
  49 fn main() {
  50     let w = stdout();
  51     let mut files: HashSet<OsString> = HashSet::new();
  52     let mut seen: HashSet<String> = HashSet::new();
  53     let mut errors = 0;
  54     let mut skip_args = 1;
  55 
  56     if let Some(s) = env::args_os().nth(1) {
  57         if let Some(s) = s.to_str() {
  58             match s {
  59                 "-h" | "--h" | "-help" | "--help" => {
  60                     _ = (&mut w.lock()).write(INFO);
  61                     return;
  62                 }
  63                 "--" => skip_args = 2,
  64                 _ => (),
  65             }
  66         }
  67     }
  68 
  69     // let live = match (&mut w.lock()).seek(SeekFrom::Current(0)) {
  70     //     Ok(_) => false,
  71     //     Err(_) => true,
  72     // };
  73     let live = match is_live() {
  74         Ok(ok) => ok,
  75         Err(e) => {
  76             eprintln!("error checking /dev/stdout: {}", e);
  77             exit(1)
  78         }
  79     };
  80 
  81     for arg in env::args_os().skip(skip_args) {
  82         if files.contains(&arg) {
  83             continue;
  84         }
  85         files.insert(arg.clone());
  86 
  87         if arg == "-" {
  88             match dedup(&mut w.lock(), stdin().lock(), &mut seen, live) {
  89                 Ok(()) => continue,
  90                 Err(_) => return,
  91             }
  92         }
  93 
  94         let file = File::open(arg);
  95         match file {
  96             Ok(r) => match dedup(&mut w.lock(), r, &mut seen, live) {
  97                 Ok(()) => (),
  98                 Err(_) => return,
  99             },
 100 
 101             Err(e) => {
 102                 errors += 1;
 103                 eprintln!("\x1b[31m{}\x1b[0m", e);
 104             }
 105         }
 106     }
 107 
 108     if env::args_os().len() < 2 {
 109         _ = dedup(&mut w.lock(), stdin().lock(), &mut seen, live);
 110     }
 111 
 112     exit(if errors > 0 { 1 } else { 0 });
 113 }
 114 
 115 fn is_live() -> Result<bool, Error> {
 116     let file = File::open("/dev/stdout");
 117     return match file {
 118         Ok(mut w) => match w.seek(SeekFrom::Current(0)) {
 119             Ok(_) => Ok(false),
 120             Err(_) => Ok(true),
 121         },
 122         Err(e) => Err(e),
 123     }
 124 }
 125 
 126 fn dedup(w: impl Write, r: impl Read, seen: &mut HashSet<String>, live: bool) -> Result<(), Error> {
 127     let br = BufReader::new(r);
 128 
 129     for line in br.lines() {
 130         match line {
 131             Ok(l) => {
 132                 if seen.contains(&l) {
 133                     continue;
 134                 }
 135 
 136                 seen.insert(l.clone());
 137                 _ = w.write(l.as_bytes());
 138                 _ = w.write(b"\n");
 139 
 140                 if !live {
 141                     continue
 142                 }
 143 
 144                 match w.flush() {
 145                     Ok(()) => (),
 146                     Err(e) => return Err(e),
 147                 };
 148             }
 149 
 150             Err(e) => return Err(e),
 151         }
 152     }
 153 
 154     Ok(())
 155 }
 156 
 157 fn dedup(w: &mut StdoutLock, r: impl Read, seen: &mut HashSet<String>, live: bool) -> Result<(), Error> {
 158     let br = BufReader::new(r);
 159 
 160     for line in br.lines() {
 161         match line {
 162             Ok(l) => {
 163                 if seen.contains(&l) {
 164                     continue;
 165                 }
 166 
 167                 seen.insert(l.clone());
 168                 _ = w.write(l.as_bytes());
 169                 _ = w.write(b"\n");
 170 
 171                 if !live {
 172                     continue
 173                 }
 174 
 175                 match w.flush() {
 176                     Ok(()) => (),
 177                     Err(e) => return Err(e),
 178                 };
 179             }
 180 
 181             Err(e) => return Err(e),
 182         }
 183     }
 184 
 185     Ok(())
 186 }