Skip to content

Commit

Permalink
fix edgecases around small data slices or few sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
Moritz Borcherding committed Nov 21, 2024
1 parent 4dc00ee commit 61c3480
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 28 deletions.
56 changes: 29 additions & 27 deletions src/encoding/blocks/compressed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,34 +71,36 @@ pub fn compress_block<'a>(matcher: &mut MatchGenerator<'a>, data: &'a [u8], outp
writer.write_bits(of_add_bits, of_num_bits);

// encode backwards so the decoder reads the first sequence first
for sequence in (0..=sequences.len() - 2).rev() {
let sequence = sequences[sequence];
let (ll_code, ll_add_bits, ll_num_bits) = encode_literal_length(sequence.ll);
let (of_code, of_add_bits, of_num_bits) = encode_offset(sequence.of);
let (ml_code, ml_add_bits, ml_num_bits) = encode_match_len(sequence.ml);

{
let next = of_table.next_state(of_code, of_state.index);
let diff = of_state.index - next.baseline;
writer.write_bits(diff as u64, next.num_bits as usize);
of_state = next;
if sequences.len() > 1 {
for sequence in (0..=sequences.len() - 2).rev() {
let sequence = sequences[sequence];
let (ll_code, ll_add_bits, ll_num_bits) = encode_literal_length(sequence.ll);
let (of_code, of_add_bits, of_num_bits) = encode_offset(sequence.of);
let (ml_code, ml_add_bits, ml_num_bits) = encode_match_len(sequence.ml);

{
let next = of_table.next_state(of_code, of_state.index);
let diff = of_state.index - next.baseline;
writer.write_bits(diff as u64, next.num_bits as usize);
of_state = next;
}
{
let next = ml_table.next_state(ml_code, ml_state.index);
let diff = ml_state.index - next.baseline;
writer.write_bits(diff as u64, next.num_bits as usize);
ml_state = next;
}
{
let next = ll_table.next_state(ll_code, ll_state.index);
let diff = ll_state.index - next.baseline;
writer.write_bits(diff as u64, next.num_bits as usize);
ll_state = next;
}

writer.write_bits(ll_add_bits, ll_num_bits);
writer.write_bits(ml_add_bits, ml_num_bits);
writer.write_bits(of_add_bits, of_num_bits);
}
{
let next = ml_table.next_state(ml_code, ml_state.index);
let diff = ml_state.index - next.baseline;
writer.write_bits(diff as u64, next.num_bits as usize);
ml_state = next;
}
{
let next = ll_table.next_state(ll_code, ll_state.index);
let diff = ll_state.index - next.baseline;
writer.write_bits(diff as u64, next.num_bits as usize);
ll_state = next;
}

writer.write_bits(ll_add_bits, ll_num_bits);
writer.write_bits(ml_add_bits, ml_num_bits);
writer.write_bits(of_add_bits, of_num_bits);
}
writer.write_bits(ml_state.index as u64, ml_table.table_size.ilog2() as usize);
writer.write_bits(of_state.index as u64, of_table.table_size.ilog2() as usize);
Expand Down
4 changes: 3 additions & 1 deletion src/encoding/match_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ impl<'data> MatchGenerator<'data> {
let literals = &last_entry.data[self.last_idx_in_sequence..self.suffix_idx];
let offset = match_entry.base_offset + self.suffix_idx - match_index;


#[cfg(debug_assertions)]
{
let unprocessed = last_entry.data.len() - self.suffix_idx;
Expand Down Expand Up @@ -139,6 +138,9 @@ impl<'data> MatchGenerator<'data> {

fn add_suffixes_till(&mut self, idx: usize) {
let last_entry = self.window.last_mut().unwrap();
if last_entry.data.len() < MIN_MATCH_LEN {
return;
}
let last_idx = usize::min(idx, last_entry.data.len() - MIN_MATCH_LEN);
for idx in self.suffix_idx..=last_idx {
let mut key = [0u8; MIN_MATCH_LEN];
Expand Down

0 comments on commit 61c3480

Please sign in to comment.