File: dedup.rs 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Single-file source-code for dedup. 27 28 To compile a smaller-sized command-line app, you can use the `rustc` command 29 as follows: 30 31 rustc -C lto=true -C codegen-units=1 -C debuginfo=0 -C strip=symbols -C opt-level=3 dedup.rs 32 */ 33 34 use std::collections::HashSet; 35 use std::env; 36 use std::ffi::OsString; 37 use std::fs::File; 38 use std::io::{stdin, stdout, BufRead, BufReader, Error, Read, Write}; 39 use std::io::{Seek, SeekFrom, StdoutLock}; 40 use std::process::exit; 41 42 static INFO: &[u8] = b"dedup [files...] 43 44 DEDUPlicate lines, emitting each distinct line once, when it first appears. 45 Unlike `uniq`, this app doesn't require pre-sorted lines to work correctly, 46 and keeps lines in their original order. 47 "; 48 49 fn main() { 50 let w = stdout(); 51 let mut files: HashSet<OsString> = HashSet::new(); 52 let mut seen: HashSet<String> = HashSet::new(); 53 let mut errors = 0; 54 let mut skip_args = 1; 55 56 if let Some(s) = env::args_os().nth(1) { 57 if let Some(s) = s.to_str() { 58 match s { 59 "-h" | "--h" | "-help" | "--help" => { 60 _ = (&mut w.lock()).write(INFO); 61 return; 62 } 63 "--" => skip_args = 2, 64 _ => (), 65 } 66 } 67 } 68 69 // let live = match (&mut w.lock()).seek(SeekFrom::Current(0)) { 70 // Ok(_) => false, 71 // Err(_) => true, 72 // }; 73 let live = match is_live() { 74 Ok(ok) => ok, 75 Err(e) => { 76 eprintln!("error checking /dev/stdout: {}", e); 77 exit(1) 78 } 79 }; 80 81 for arg in env::args_os().skip(skip_args) { 82 if files.contains(&arg) { 83 continue; 84 } 85 files.insert(arg.clone()); 86 87 if arg == "-" { 88 match dedup(&mut w.lock(), stdin().lock(), &mut seen, live) { 89 Ok(()) => continue, 90 Err(_) => return, 91 } 92 } 93 94 let file = File::open(arg); 95 match file { 96 Ok(r) => match dedup(&mut w.lock(), r, &mut seen, live) { 97 Ok(()) => (), 98 Err(_) => return, 99 }, 100 101 Err(e) => { 102 errors += 1; 103 eprintln!("\x1b[31m{}\x1b[0m", e); 104 } 105 } 106 } 107 108 if env::args_os().len() < 2 { 109 _ = dedup(&mut w.lock(), stdin().lock(), &mut seen, live); 110 } 111 112 exit(if errors > 0 { 1 } else { 0 }); 113 } 114 115 fn is_live() -> Result<bool, Error> { 116 let file = File::open("/dev/stdout"); 117 return match file { 118 Ok(mut w) => match w.seek(SeekFrom::Current(0)) { 119 Ok(_) => Ok(false), 120 Err(_) => Ok(true), 121 }, 122 Err(e) => Err(e), 123 } 124 } 125 126 fn dedup(w: impl Write, r: impl Read, seen: &mut HashSet<String>, live: bool) -> Result<(), Error> { 127 let br = BufReader::new(r); 128 129 for line in br.lines() { 130 match line { 131 Ok(l) => { 132 if seen.contains(&l) { 133 continue; 134 } 135 136 seen.insert(l.clone()); 137 _ = w.write(l.as_bytes()); 138 _ = w.write(b"\n"); 139 140 if !live { 141 continue 142 } 143 144 match w.flush() { 145 Ok(()) => (), 146 Err(e) => return Err(e), 147 }; 148 } 149 150 Err(e) => return Err(e), 151 } 152 } 153 154 Ok(()) 155 } 156 157 fn dedup(w: &mut StdoutLock, r: impl Read, seen: &mut HashSet<String>, live: bool) -> Result<(), Error> { 158 let br = BufReader::new(r); 159 160 for line in br.lines() { 161 match line { 162 Ok(l) => { 163 if seen.contains(&l) { 164 continue; 165 } 166 167 seen.insert(l.clone()); 168 _ = w.write(l.as_bytes()); 169 _ = w.write(b"\n"); 170 171 if !live { 172 continue 173 } 174 175 match w.flush() { 176 Ok(()) => (), 177 Err(e) => return Err(e), 178 }; 179 } 180 181 Err(e) => return Err(e), 182 } 183 } 184 185 Ok(()) 186 }