libStatGen Software 1
Loading...
Searching...
No Matches
MiniDeflate.h
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __MINIDEFLATE_H__
19#define __MINIDEFLATE_H__
20
21#include <stdio.h>
22
23// MiniDeflate reads and writes files in a simple Deflate like format
24// A quick overview of this format follows, at the bottom of this file
25//
26
27// Performance tuning constants
28//
29
30// Hash table size is HASH_SIZE (a prime)
31#define HASH_SIZE 4093
32// Hash table depth is HASH_DEPTH (a power of 2)
33#define HASH_DEPTH 8
34// Matches that are not at least OKAY_MATCH chars are added to hash table
35#define OKAY_MATCH 32
36// Buffer size for FILE I/O
37#define BUFFER_SIZE (32 * 1024)
38
40{
41public:
44
45 void Deflate(FILE * output, void * input, size_t bytes);
46 void Inflate(FILE * input, void * ouput, size_t bytes);
47
48private:
49 unsigned char * buffer;
50 unsigned char * hash_keys;
51 unsigned char ** hash_values;
52
53 // Inline functions used during file compression
54 inline void EvaluateMatch(unsigned char * in, int len, int hash,
55 unsigned char * & best_pos, int & best_match);
56 inline void QuoteLiterals(unsigned char * & in, int literal,
57 unsigned char * & out, int & buffer_len,
58 FILE * output);
59 inline void OutputLiterals(unsigned char * & in, int literal,
60 unsigned char * & out, int & buffer_len,
61 FILE * output);
62 inline void CiteLiteral(unsigned char * & out, int literal,
63 unsigned char * & in, int & buffer_len,
64 FILE * input);
65};
66
67// Format specification for deflate files
68//
69// A compressed file is a sequence of bytes {0 .. N}.
70// Each byte is a sequence of bits [0 .. 7] with 0 as the Most Significant Bit.
71//
72// The following tokens are recognized:
73//
74// Literal quotes -- refer to unique strings
75//
76// BYTE0 BYTE1 BYTE2 Description
77// 0 HI LO Quote of 31 bytes of more
78// Followed by (HI << 8 + LO + 31) quoted chars
79// 0:4|LEN Quote of up to 1-15 bytes
80// Followed by LEN quoted chars
81//
82// String matches -- refer to previous strings in the input stream
83//
84// BYTE0 BYTE1 BYTE2 BYTE3 BYTE4 Description
85// 1:4|OFF OFF1 OFF2:2|0 HI LO Long match of > 66 bytes
86// Offset of OFF|OFF1|OFF2 + 1
87// Length of HI|LO + 66
88// 1:4|OFF OFF1 OFF2:2|LEN Distant match of < 66 bytes
89// Offset of OFF|OFF1|OFF2 + 1
90// Length of LEN + 2
91// LEN|OFF OFF1 Nearby short match
92// Offset OFF|OFF1 + 1
93// Length LEN
94//
95
96// NOTE: When partitioning bytes, I use the notation X:n|Y so that
97// X takes the n MSB bits of byte and Y takes the remaining bits.
98
99
100#endif
101
102