use std::cell::Cell;
use std::collections::{HashMap, HashSet};
use std::rc::Rc;
use csv::Error;
use itoa;
use maplit::hashset;
type ID = Rc<[u8]>;
fn main() -> Result<(), Error> {
let t0 = std::time::Instant::now();
let mut bubbles = HashMap::new();
let input_files = [
"TestDatei_Typ1_Nr1.csv",
"TestDatei_Typ1_Nr1.csv",
"TestDatei_Typ1_Nr2.csv",
"TestDatei_Typ1_Nr3.csv",
"TestDatei_Typ1_Nr4.csv",
"TestDatei_Typ2_Nr1.csv",
"TestDatei_Typ2_Nr2.csv",
"TestDatei_Typ2_Nr3.csv",
"TestDatei_Typ2_Nr4.csv",
];
let mut intermediate_representations = Vec::with_capacity(input_files.len());
for f in input_files {
let intermediate_representation = read_file(&mut bubbles, f)?;
intermediate_representations.push(intermediate_representation);
}
println!("read complete: {}", t0.elapsed().as_secs_f64());
let output_files = input_files.iter().map(|file_name| format!("New{}",file_name));
intermediate_representations
.into_iter()
.zip(output_files)
.try_for_each::<_, Result<(), Error>>(|(entries, f)| {
write_file(&bubbles, entries, f.as_str())?;
Ok(())
})?;
println!("write complete: {}", t0.elapsed().as_secs_f64());
Ok(())
}
fn read_file(
bubbles: &mut HashMap<ID, Rc<Cell<HashSet<ID>>>>,
file_name: &str,
) -> Result<HashMap<ID, Cell<Option<i64>>>, Error> {
let mut entries = HashMap::new();
let mut reader = csv::ReaderBuilder::new()
.delimiter(b';')
.from_path(file_name)?;
let mut record = csv::ByteRecord::new();
reader.read_byte_record(&mut record)?;
while reader.read_byte_record(&mut record)? {
let first_id: Rc<[u8]> = Rc::from(&record[0]);
if &record[1] != b"" {
memorize_connection(bubbles, first_id.clone(), &record[1]);
}
entries.insert(
first_id,
Cell::new(Some(0.into())), //TODO: placeholder; decode fields from data
);
}
Ok(entries)
}
fn memorize_connection(hash_map: &mut HashMap<ID, Rc<Cell<HashSet<ID>>>>, id1_rc: ID, id2: &[u8]) {
let id1_opt = hash_map.get(&id1_rc).cloned();
let id2_opt = hash_map.get(id2).cloned();
match (id1_opt, id2_opt) {
(Some(h1), Some(h2)) => {
let mut h1_local = h1.take();
let h2_local = h2.take();
if h1_local != h2_local {
let second_set_local = hash_map.remove(id2).unwrap().take();
for key in second_set_local.iter() {
hash_map.insert(key.clone(), h1.clone());
}
h1_local.extend(second_set_local);
};
h1.set(h1_local)
}
(Some(h1), None) => add_unknown_key_to_bubble(hash_map, Rc::from(id2), &h1),
(None, Some(h2)) => add_unknown_key_to_bubble(hash_map, id1_rc, &h2),
(None, None) => {
let id2_rc: Rc<[u8]> = Rc::from(id2);
let new_set = Rc::new(Cell::new(hashset! {id1_rc.clone(), id2_rc.clone()}));
hash_map.insert(id1_rc, new_set.clone());
hash_map.insert(id2_rc, new_set);
}
}
}
fn add_unknown_key_to_bubble(
bubbles: &mut HashMap<ID, Rc<Cell<HashSet<ID>>>>,
unknown_id: ID,
hash_set_found: &Rc<Cell<HashSet<ID>>>,
) {
let mut local_hashset = hash_set_found.take();
local_hashset.insert(unknown_id.clone());
hash_set_found.set(local_hashset);
bubbles.insert(unknown_id, hash_set_found.clone());
}
fn write_file(
bubbles: &HashMap<ID, Rc<Cell<HashSet<ID>>>>,
entries: HashMap<ID, Cell<Option<i64>>>,
file_name: &str,
) -> Result<(), Error> {
let mut writer = csv::Writer::from_path(file_name)?;
for (key, value) in entries.iter() {
match value.get() {
Some(value) => {
let sum = extract_aggregation(bubbles, &entries, key, value);
//TODO: generate and write new ID, write other fields
writer.write_field(key)?;
let mut buffer = itoa::Buffer::new();
writer.write_field(buffer.format(sum))?;
writer.write_record(None::<&[u8]>)?;
}
None => {}
}
}
writer.flush()?;
Ok(())
}
fn extract_aggregation(
bubbles: &HashMap<ID, Rc<Cell<HashSet<ID>>>>,
entries: &HashMap<ID, Cell<Option<i64>>>,
key: &ID,
value: i64,
) -> i64 {
let bubble_cell = match bubbles.get(key){
None => return value,
Some(bubble_cell) => bubble_cell
};
let bubble = bubble_cell.take();
let sum = bubble
.iter()
.filter_map(|id| entries.get(id))
.map(|cell| cell.replace(None))
.filter_map(|perhaps_existing_value| perhaps_existing_value)
.sum();
bubble_cell.set(bubble); // keep it reusable
sum
}