1 /*-
2  * Copyright (c) 2008 Joerg Sonnenberger
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 /*-
27  * Copyright (c) 1985, 1986, 1992, 1993
28  *        The Regents of the University of California.  All rights reserved.
29  *
30  * This code is derived from software contributed to Berkeley by
31  * Diomidis Spinellis and James A. Woods, derived from original
32  * work by Spencer Thomas and Joseph Orost.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  * 1. Redistributions of source code must retain the above copyright
38  *    notice, this list of conditions and the following disclaimer.
39  * 2. Redistributions in binary form must reproduce the above copyright
40  *    notice, this list of conditions and the following disclaimer in the
41  *    documentation and/or other materials provided with the distribution.
42  * 3. Neither the name of the University nor the names of its contributors
43  *    may be used to endorse or promote products derived from this software
44  *    without specific prior written permission.
45  *
46  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56  * SUCH DAMAGE.
57  */
58 
59 #include "archive_platform.h"
60 
61 #ifdef HAVE_ERRNO_H
62 #include <errno.h>
63 #endif
64 #ifdef HAVE_STDLIB_H
65 #include <stdlib.h>
66 #endif
67 #ifdef HAVE_STRING_H
68 #include <string.h>
69 #endif
70 
71 #include "archive.h"
72 #include "archive_private.h"
73 #include "archive_write_private.h"
74 
75 #define   HSIZE               69001     /* 95% occupancy */
76 #define   HSHIFT              8         /* 8 - trunc(log2(HSIZE / 65536)) */
77 #define   CHECK_GAP 10000               /* Ratio check interval. */
78 
79 #define   MAXCODE(bits)       ((1 << (bits)) - 1)
80 
81 /*
82  * the next two codes should not be changed lightly, as they must not
83  * lie within the contiguous general code space.
84  */
85 #define   FIRST     257                 /* First free entry. */
86 #define   CLEAR     256                 /* Table clear output code. */
87 
88 struct private_data {
89           int64_t in_count, out_count, checkpoint;
90 
91           int code_len;                           /* Number of bits/code. */
92           int cur_maxcode;              /* Maximum code, given n_bits. */
93           int max_maxcode;              /* Should NEVER generate this code. */
94           int hashtab [HSIZE];
95           unsigned short codetab [HSIZE];
96           int first_free;               /* First unused entry. */
97           int compress_ratio;
98 
99           int cur_code, cur_fcode;
100 
101           int bit_offset;
102           unsigned char bit_buf;
103 
104           unsigned char       *compressed;
105           size_t               compressed_buffer_size;
106           size_t               compressed_offset;
107 };
108 
109 static int archive_compressor_compress_open(struct archive_write_filter *);
110 static int archive_compressor_compress_write(struct archive_write_filter *,
111                         const void *, size_t);
112 static int archive_compressor_compress_close(struct archive_write_filter *);
113 static int archive_compressor_compress_free(struct archive_write_filter *);
114 
115 #if ARCHIVE_VERSION_NUMBER < 4000000
116 int
archive_write_set_compression_compress(struct archive * a)117 archive_write_set_compression_compress(struct archive *a)
118 {
119           __archive_write_filters_free(a);
120           return (archive_write_add_filter_compress(a));
121 }
122 #endif
123 
124 /*
125  * Add a compress filter to this write handle.
126  */
127 int
archive_write_add_filter_compress(struct archive * _a)128 archive_write_add_filter_compress(struct archive *_a)
129 {
130           struct archive_write *a = (struct archive_write *)_a;
131           struct archive_write_filter *f = __archive_write_allocate_filter(_a);
132 
133           archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC,
134               ARCHIVE_STATE_NEW, "archive_write_add_filter_compress");
135           f->open = &archive_compressor_compress_open;
136           f->code = ARCHIVE_FILTER_COMPRESS;
137           f->name = "compress";
138           return (ARCHIVE_OK);
139 }
140 
141 /*
142  * Setup callback.
143  */
144 static int
archive_compressor_compress_open(struct archive_write_filter * f)145 archive_compressor_compress_open(struct archive_write_filter *f)
146 {
147           struct private_data *state;
148           size_t bs = 65536, bpb;
149 
150           f->code = ARCHIVE_FILTER_COMPRESS;
151           f->name = "compress";
152 
153           state = calloc(1, sizeof(*state));
154           if (state == NULL) {
155                     archive_set_error(f->archive, ENOMEM,
156                         "Can't allocate data for compression");
157                     return (ARCHIVE_FATAL);
158           }
159 
160           if (f->archive->magic == ARCHIVE_WRITE_MAGIC) {
161                     /* Buffer size should be a multiple number of the bytes
162                      * per block for performance. */
163                     bpb = archive_write_get_bytes_per_block(f->archive);
164                     if (bpb > bs)
165                               bs = bpb;
166                     else if (bpb != 0)
167                               bs -= bs % bpb;
168           }
169           state->compressed_buffer_size = bs;
170           state->compressed = malloc(state->compressed_buffer_size);
171 
172           if (state->compressed == NULL) {
173                     archive_set_error(f->archive, ENOMEM,
174                         "Can't allocate data for compression buffer");
175                     free(state);
176                     return (ARCHIVE_FATAL);
177           }
178 
179           f->write = archive_compressor_compress_write;
180           f->close = archive_compressor_compress_close;
181           f->free = archive_compressor_compress_free;
182 
183           state->max_maxcode = 0x10000; /* Should NEVER generate this code. */
184           state->in_count = 0;                    /* Length of input. */
185           state->bit_buf = 0;
186           state->bit_offset = 0;
187           state->out_count = 3;                   /* Includes 3-byte header mojo. */
188           state->compress_ratio = 0;
189           state->checkpoint = CHECK_GAP;
190           state->code_len = 9;
191           state->cur_maxcode = MAXCODE(state->code_len);
192           state->first_free = FIRST;
193 
194           memset(state->hashtab, 0xff, sizeof(state->hashtab));
195 
196           /* Prime output buffer with a gzip header. */
197           state->compressed[0] = 0x1f; /* Compress */
198           state->compressed[1] = 0x9d;
199           state->compressed[2] = 0x90; /* Block mode, 16bit max */
200           state->compressed_offset = 3;
201 
202           f->data = state;
203           return (0);
204 }
205 
206 /*-
207  * Output the given code.
208  * Inputs:
209  *        code:     A n_bits-bit integer.  If == -1, then EOF.  This assumes
210  *                  that n_bits <= (long)wordsize - 1.
211  * Outputs:
212  *        Outputs code to the file.
213  * Assumptions:
214  *        Chars are 8 bits long.
215  * Algorithm:
216  *        Maintain a BITS character long buffer (so that 8 codes will
217  * fit in it exactly).  Use the VAX insv instruction to insert each
218  * code in turn.  When the buffer fills up empty it and start over.
219  */
220 
221 static const unsigned char rmask[9] =
222           {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
223 
224 static int
output_byte(struct archive_write_filter * f,unsigned char c)225 output_byte(struct archive_write_filter *f, unsigned char c)
226 {
227           struct private_data *state = f->data;
228 
229           state->compressed[state->compressed_offset++] = c;
230           ++state->out_count;
231 
232           if (state->compressed_buffer_size == state->compressed_offset) {
233                     int ret = __archive_write_filter(f->next_filter,
234                         state->compressed, state->compressed_buffer_size);
235                     if (ret != ARCHIVE_OK)
236                               return ARCHIVE_FATAL;
237                     state->compressed_offset = 0;
238           }
239 
240           return ARCHIVE_OK;
241 }
242 
243 static int
output_code(struct archive_write_filter * f,int ocode)244 output_code(struct archive_write_filter *f, int ocode)
245 {
246           struct private_data *state = f->data;
247           int bits, ret, clear_flg, bit_offset;
248 
249           clear_flg = ocode == CLEAR;
250 
251           /*
252            * Since ocode is always >= 8 bits, only need to mask the first
253            * hunk on the left.
254            */
255           bit_offset = state->bit_offset % 8;
256           state->bit_buf |= (ocode << bit_offset) & 0xff;
257           output_byte(f, state->bit_buf);
258 
259           bits = state->code_len - (8 - bit_offset);
260           ocode >>= 8 - bit_offset;
261           /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
262           if (bits >= 8) {
263                     output_byte(f, ocode & 0xff);
264                     ocode >>= 8;
265                     bits -= 8;
266           }
267           /* Last bits. */
268           state->bit_offset += state->code_len;
269           state->bit_buf = ocode & rmask[bits];
270           if (state->bit_offset == state->code_len * 8)
271                     state->bit_offset = 0;
272 
273           /*
274            * If the next entry is going to be too big for the ocode size,
275            * then increase it, if possible.
276            */
277           if (clear_flg || state->first_free > state->cur_maxcode) {
278                  /*
279                     * Write the whole buffer, because the input side won't
280                     * discover the size increase until after it has read it.
281                     */
282                     if (state->bit_offset > 0) {
283                               while (state->bit_offset < state->code_len * 8) {
284                                         ret = output_byte(f, state->bit_buf);
285                                         if (ret != ARCHIVE_OK)
286                                                   return ret;
287                                         state->bit_offset += 8;
288                                         state->bit_buf = 0;
289                               }
290                     }
291                     state->bit_buf = 0;
292                     state->bit_offset = 0;
293 
294                     if (clear_flg) {
295                               state->code_len = 9;
296                               state->cur_maxcode = MAXCODE(state->code_len);
297                     } else {
298                               state->code_len++;
299                               if (state->code_len == 16)
300                                         state->cur_maxcode = state->max_maxcode;
301                               else
302                                         state->cur_maxcode = MAXCODE(state->code_len);
303                     }
304           }
305 
306           return (ARCHIVE_OK);
307 }
308 
309 static int
output_flush(struct archive_write_filter * f)310 output_flush(struct archive_write_filter *f)
311 {
312           struct private_data *state = f->data;
313           int ret;
314 
315           /* At EOF, write the rest of the buffer. */
316           if (state->bit_offset % 8) {
317                     state->code_len = (state->bit_offset % 8 + 7) / 8;
318                     ret = output_byte(f, state->bit_buf);
319                     if (ret != ARCHIVE_OK)
320                               return ret;
321           }
322 
323           return (ARCHIVE_OK);
324 }
325 
326 /*
327  * Write data to the compressed stream.
328  */
329 static int
archive_compressor_compress_write(struct archive_write_filter * f,const void * buff,size_t length)330 archive_compressor_compress_write(struct archive_write_filter *f,
331     const void *buff, size_t length)
332 {
333           struct private_data *state = (struct private_data *)f->data;
334           int i;
335           int ratio;
336           int c, disp, ret;
337           const unsigned char *bp;
338 
339           if (length == 0)
340                     return ARCHIVE_OK;
341 
342           bp = buff;
343 
344           if (state->in_count == 0) {
345                     state->cur_code = *bp++;
346                     ++state->in_count;
347                     --length;
348           }
349 
350           while (length--) {
351                     c = *bp++;
352                     state->in_count++;
353                     state->cur_fcode = (c << 16) | state->cur_code;
354                     i = ((c << HSHIFT) ^ state->cur_code);  /* Xor hashing. */
355 
356                     if (state->hashtab[i] == state->cur_fcode) {
357                               state->cur_code = state->codetab[i];
358                               continue;
359                     }
360                     if (state->hashtab[i] < 0)    /* Empty slot. */
361                               goto nomatch;
362                     /* Secondary hash (after G. Knott). */
363                     if (i == 0)
364                               disp = 1;
365                     else
366                               disp = HSIZE - i;
367  probe:
368                     if ((i -= disp) < 0)
369                               i += HSIZE;
370 
371                     if (state->hashtab[i] == state->cur_fcode) {
372                               state->cur_code = state->codetab[i];
373                               continue;
374                     }
375                     if (state->hashtab[i] >= 0)
376                               goto probe;
377  nomatch:
378                     ret = output_code(f, state->cur_code);
379                     if (ret != ARCHIVE_OK)
380                               return ret;
381                     state->cur_code = c;
382                     if (state->first_free < state->max_maxcode) {
383                               state->codetab[i] = state->first_free++;          /* code -> hashtable */
384                               state->hashtab[i] = state->cur_fcode;
385                               continue;
386                     }
387                     if (state->in_count < state->checkpoint)
388                               continue;
389 
390                     state->checkpoint = state->in_count + CHECK_GAP;
391 
392                     if (state->in_count <= 0x007fffff && state->out_count != 0)
393                               ratio = (int)(state->in_count * 256 / state->out_count);
394                     else if ((ratio = (int)(state->out_count / 256)) == 0)
395                               ratio = 0x7fffffff;
396                     else
397                               ratio = (int)(state->in_count / ratio);
398 
399                     if (ratio > state->compress_ratio)
400                               state->compress_ratio = ratio;
401                     else {
402                               state->compress_ratio = 0;
403                               memset(state->hashtab, 0xff, sizeof(state->hashtab));
404                               state->first_free = FIRST;
405                               ret = output_code(f, CLEAR);
406                               if (ret != ARCHIVE_OK)
407                                         return ret;
408                     }
409           }
410 
411           return (ARCHIVE_OK);
412 }
413 
414 
415 /*
416  * Finish the compression...
417  */
418 static int
archive_compressor_compress_close(struct archive_write_filter * f)419 archive_compressor_compress_close(struct archive_write_filter *f)
420 {
421           struct private_data *state = (struct private_data *)f->data;
422           int ret;
423 
424           ret = output_code(f, state->cur_code);
425           if (ret != ARCHIVE_OK)
426                     return ret;
427           ret = output_flush(f);
428           if (ret != ARCHIVE_OK)
429                     return ret;
430 
431           /* Write the last block */
432           ret = __archive_write_filter(f->next_filter,
433               state->compressed, state->compressed_offset);
434           return (ret);
435 }
436 
437 static int
archive_compressor_compress_free(struct archive_write_filter * f)438 archive_compressor_compress_free(struct archive_write_filter *f)
439 {
440           struct private_data *state = (struct private_data *)f->data;
441 
442           free(state->compressed);
443           free(state);
444           return (ARCHIVE_OK);
445 }
446