1 /*-
2  * Copyright (c) 2003-2011 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 /*
27  * This file contains the "essential" portions of the read API, that
28  * is, stuff that will probably always be used by any client that
29  * actually needs to read an archive.  Optional pieces have been, as
30  * far as possible, separated out into separate files to avoid
31  * needlessly bloating statically-linked clients.
32  */
33 
34 #include "archive_platform.h"
35 
36 #ifdef HAVE_ERRNO_H
37 #include <errno.h>
38 #endif
39 #include <stdio.h>
40 #ifdef HAVE_STDLIB_H
41 #include <stdlib.h>
42 #endif
43 #ifdef HAVE_STRING_H
44 #include <string.h>
45 #endif
46 #ifdef HAVE_UNISTD_H
47 #include <unistd.h>
48 #endif
49 
50 #include "archive.h"
51 #include "archive_entry.h"
52 #include "archive_private.h"
53 #include "archive_read_private.h"
54 
55 #define minimum(a, b) (a < b ? a : b)
56 
57 static int          choose_filters(struct archive_read *);
58 static int          choose_format(struct archive_read *);
59 static int          close_filters(struct archive_read *);
60 static int64_t      _archive_filter_bytes(struct archive *, int);
61 static int          _archive_filter_code(struct archive *, int);
62 static const char *_archive_filter_name(struct archive *, int);
63 static int  _archive_filter_count(struct archive *);
64 static int          _archive_read_close(struct archive *);
65 static int          _archive_read_data_block(struct archive *,
66                         const void **, size_t *, int64_t *);
67 static int          _archive_read_free(struct archive *);
68 static int          _archive_read_next_header(struct archive *,
69                         struct archive_entry **);
70 static int          _archive_read_next_header2(struct archive *,
71                         struct archive_entry *);
72 static int64_t  advance_file_pointer(struct archive_read_filter *, int64_t);
73 
74 static const struct archive_vtable
75 archive_read_vtable = {
76           .archive_filter_bytes = _archive_filter_bytes,
77           .archive_filter_code = _archive_filter_code,
78           .archive_filter_name = _archive_filter_name,
79           .archive_filter_count = _archive_filter_count,
80           .archive_read_data_block = _archive_read_data_block,
81           .archive_read_next_header = _archive_read_next_header,
82           .archive_read_next_header2 = _archive_read_next_header2,
83           .archive_free = _archive_read_free,
84           .archive_close = _archive_read_close,
85 };
86 
87 /*
88  * Allocate, initialize and return a struct archive object.
89  */
90 struct archive *
archive_read_new(void)91 archive_read_new(void)
92 {
93           struct archive_read *a;
94 
95           a = calloc(1, sizeof(*a));
96           if (a == NULL)
97                     return (NULL);
98           a->archive.magic = ARCHIVE_READ_MAGIC;
99 
100           a->archive.state = ARCHIVE_STATE_NEW;
101           a->entry = archive_entry_new2(&a->archive);
102           a->archive.vtable = &archive_read_vtable;
103 
104           a->passphrases.last = &a->passphrases.first;
105 
106           return (&a->archive);
107 }
108 
109 /*
110  * Record the do-not-extract-to file. This belongs in archive_read_extract.c.
111  */
112 void
archive_read_extract_set_skip_file(struct archive * _a,la_int64_t d,la_int64_t i)113 archive_read_extract_set_skip_file(struct archive *_a, la_int64_t d,
114     la_int64_t i)
115 {
116           struct archive_read *a = (struct archive_read *)_a;
117 
118           if (ARCHIVE_OK != __archive_check_magic(_a, ARCHIVE_READ_MAGIC,
119                     ARCHIVE_STATE_ANY, "archive_read_extract_set_skip_file"))
120                     return;
121           a->skip_file_set = 1;
122           a->skip_file_dev = d;
123           a->skip_file_ino = i;
124 }
125 
126 /*
127  * Open the archive
128  */
129 int
archive_read_open(struct archive * a,void * client_data,archive_open_callback * client_opener,archive_read_callback * client_reader,archive_close_callback * client_closer)130 archive_read_open(struct archive *a, void *client_data,
131     archive_open_callback *client_opener, archive_read_callback *client_reader,
132     archive_close_callback *client_closer)
133 {
134           /* Old archive_read_open() is just a thin shell around
135            * archive_read_open1. */
136           archive_read_set_open_callback(a, client_opener);
137           archive_read_set_read_callback(a, client_reader);
138           archive_read_set_close_callback(a, client_closer);
139           archive_read_set_callback_data(a, client_data);
140           return archive_read_open1(a);
141 }
142 
143 
144 int
archive_read_open2(struct archive * a,void * client_data,archive_open_callback * client_opener,archive_read_callback * client_reader,archive_skip_callback * client_skipper,archive_close_callback * client_closer)145 archive_read_open2(struct archive *a, void *client_data,
146     archive_open_callback *client_opener,
147     archive_read_callback *client_reader,
148     archive_skip_callback *client_skipper,
149     archive_close_callback *client_closer)
150 {
151           /* Old archive_read_open2() is just a thin shell around
152            * archive_read_open1. */
153           archive_read_set_callback_data(a, client_data);
154           archive_read_set_open_callback(a, client_opener);
155           archive_read_set_read_callback(a, client_reader);
156           archive_read_set_skip_callback(a, client_skipper);
157           archive_read_set_close_callback(a, client_closer);
158           return archive_read_open1(a);
159 }
160 
161 static ssize_t
client_read_proxy(struct archive_read_filter * self,const void ** buff)162 client_read_proxy(struct archive_read_filter *self, const void **buff)
163 {
164           ssize_t r;
165           r = (self->archive->client.reader)(&self->archive->archive,
166               self->data, buff);
167           return (r);
168 }
169 
170 static int64_t
client_skip_proxy(struct archive_read_filter * self,int64_t request)171 client_skip_proxy(struct archive_read_filter *self, int64_t request)
172 {
173           if (request < 0)
174                     __archive_errx(1, "Negative skip requested.");
175           if (request == 0)
176                     return 0;
177 
178           if (self->archive->client.skipper != NULL) {
179                     /* Seek requests over 1GiB are broken down into
180                      * multiple seeks.  This avoids overflows when the
181                      * requests get passed through 32-bit arguments. */
182                     int64_t skip_limit = (int64_t)1 << 30;
183                     int64_t total = 0;
184                     for (;;) {
185                               int64_t get, ask = request;
186                               if (ask > skip_limit)
187                                         ask = skip_limit;
188                               get = (self->archive->client.skipper)
189                                         (&self->archive->archive, self->data, ask);
190                               total += get;
191                               if (get == 0 || get == request)
192                                         return (total);
193                               if (get > request)
194                                         return ARCHIVE_FATAL;
195                               request -= get;
196                     }
197           } else if (self->archive->client.seeker != NULL
198                     && request > 64 * 1024) {
199                     /* If the client provided a seeker but not a skipper,
200                      * we can use the seeker to skip forward.
201                      *
202                      * Note: This isn't always a good idea.  The client
203                      * skipper is allowed to skip by less than requested
204                      * if it needs to maintain block alignment.  The
205                      * seeker is not allowed to play such games, so using
206                      * the seeker here may be a performance loss compared
207                      * to just reading and discarding.  That's why we
208                      * only do this for skips of over 64k.
209                      */
210                     int64_t before = self->position;
211                     int64_t after = (self->archive->client.seeker)
212                         (&self->archive->archive, self->data, request, SEEK_CUR);
213                     if (after != before + request)
214                               return ARCHIVE_FATAL;
215                     return after - before;
216           }
217           return 0;
218 }
219 
220 static int64_t
client_seek_proxy(struct archive_read_filter * self,int64_t offset,int whence)221 client_seek_proxy(struct archive_read_filter *self, int64_t offset, int whence)
222 {
223           /* DO NOT use the skipper here!  If we transparently handled
224            * forward seek here by using the skipper, that will break
225            * other libarchive code that assumes a successful forward
226            * seek means it can also seek backwards.
227            */
228           if (self->archive->client.seeker == NULL) {
229                     archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
230                         "Current client reader does not support seeking a device");
231                     return (ARCHIVE_FAILED);
232           }
233           return (self->archive->client.seeker)(&self->archive->archive,
234               self->data, offset, whence);
235 }
236 
237 static int
read_client_close_proxy(struct archive_read * a)238 read_client_close_proxy(struct archive_read *a)
239 {
240           int r = ARCHIVE_OK, r2;
241           unsigned int i;
242 
243           if (a->client.closer == NULL)
244                     return (r);
245           for (i = 0; i < a->client.nodes; i++)
246           {
247                     r2 = (a->client.closer)
248                               ((struct archive *)a, a->client.dataset[i].data);
249                     if (r > r2)
250                               r = r2;
251           }
252           return (r);
253 }
254 
255 static int
client_close_proxy(struct archive_read_filter * self)256 client_close_proxy(struct archive_read_filter *self)
257 {
258           return read_client_close_proxy(self->archive);
259 }
260 
261 static int
client_open_proxy(struct archive_read_filter * self)262 client_open_proxy(struct archive_read_filter *self)
263 {
264   int r = ARCHIVE_OK;
265           if (self->archive->client.opener != NULL)
266                     r = (self->archive->client.opener)(
267                         (struct archive *)self->archive, self->data);
268           return (r);
269 }
270 
271 static int
client_switch_proxy(struct archive_read_filter * self,unsigned int iindex)272 client_switch_proxy(struct archive_read_filter *self, unsigned int iindex)
273 {
274   int r1 = ARCHIVE_OK, r2 = ARCHIVE_OK;
275           void *data2 = NULL;
276 
277           /* Don't do anything if already in the specified data node */
278           if (self->archive->client.cursor == iindex)
279                     return (ARCHIVE_OK);
280 
281           self->archive->client.cursor = iindex;
282           data2 = self->archive->client.dataset[self->archive->client.cursor].data;
283           if (self->archive->client.switcher != NULL)
284           {
285                     r1 = r2 = (self->archive->client.switcher)
286                               ((struct archive *)self->archive, self->data, data2);
287                     self->data = data2;
288           }
289           else
290           {
291                     /* Attempt to call close and open instead */
292                     if (self->archive->client.closer != NULL)
293                               r1 = (self->archive->client.closer)
294                                         ((struct archive *)self->archive, self->data);
295                     self->data = data2;
296                     r2 = client_open_proxy(self);
297           }
298           return (r1 < r2) ? r1 : r2;
299 }
300 
301 int
archive_read_set_open_callback(struct archive * _a,archive_open_callback * client_opener)302 archive_read_set_open_callback(struct archive *_a,
303     archive_open_callback *client_opener)
304 {
305           struct archive_read *a = (struct archive_read *)_a;
306           archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
307               "archive_read_set_open_callback");
308           a->client.opener = client_opener;
309           return ARCHIVE_OK;
310 }
311 
312 int
archive_read_set_read_callback(struct archive * _a,archive_read_callback * client_reader)313 archive_read_set_read_callback(struct archive *_a,
314     archive_read_callback *client_reader)
315 {
316           struct archive_read *a = (struct archive_read *)_a;
317           archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
318               "archive_read_set_read_callback");
319           a->client.reader = client_reader;
320           return ARCHIVE_OK;
321 }
322 
323 int
archive_read_set_skip_callback(struct archive * _a,archive_skip_callback * client_skipper)324 archive_read_set_skip_callback(struct archive *_a,
325     archive_skip_callback *client_skipper)
326 {
327           struct archive_read *a = (struct archive_read *)_a;
328           archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
329               "archive_read_set_skip_callback");
330           a->client.skipper = client_skipper;
331           return ARCHIVE_OK;
332 }
333 
334 int
archive_read_set_seek_callback(struct archive * _a,archive_seek_callback * client_seeker)335 archive_read_set_seek_callback(struct archive *_a,
336     archive_seek_callback *client_seeker)
337 {
338           struct archive_read *a = (struct archive_read *)_a;
339           archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
340               "archive_read_set_seek_callback");
341           a->client.seeker = client_seeker;
342           return ARCHIVE_OK;
343 }
344 
345 int
archive_read_set_close_callback(struct archive * _a,archive_close_callback * client_closer)346 archive_read_set_close_callback(struct archive *_a,
347     archive_close_callback *client_closer)
348 {
349           struct archive_read *a = (struct archive_read *)_a;
350           archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
351               "archive_read_set_close_callback");
352           a->client.closer = client_closer;
353           return ARCHIVE_OK;
354 }
355 
356 int
archive_read_set_switch_callback(struct archive * _a,archive_switch_callback * client_switcher)357 archive_read_set_switch_callback(struct archive *_a,
358     archive_switch_callback *client_switcher)
359 {
360           struct archive_read *a = (struct archive_read *)_a;
361           archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
362               "archive_read_set_switch_callback");
363           a->client.switcher = client_switcher;
364           return ARCHIVE_OK;
365 }
366 
367 int
archive_read_set_callback_data(struct archive * _a,void * client_data)368 archive_read_set_callback_data(struct archive *_a, void *client_data)
369 {
370           return archive_read_set_callback_data2(_a, client_data, 0);
371 }
372 
373 int
archive_read_set_callback_data2(struct archive * _a,void * client_data,unsigned int iindex)374 archive_read_set_callback_data2(struct archive *_a, void *client_data,
375     unsigned int iindex)
376 {
377           struct archive_read *a = (struct archive_read *)_a;
378           archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
379               "archive_read_set_callback_data2");
380 
381           if (a->client.nodes == 0)
382           {
383                     a->client.dataset = (struct archive_read_data_node *)
384                         calloc(1, sizeof(*a->client.dataset));
385                     if (a->client.dataset == NULL)
386                     {
387                               archive_set_error(&a->archive, ENOMEM,
388                                         "No memory.");
389                               return ARCHIVE_FATAL;
390                     }
391                     a->client.nodes = 1;
392           }
393 
394           if (iindex > a->client.nodes - 1)
395           {
396                     archive_set_error(&a->archive, EINVAL,
397                               "Invalid index specified.");
398                     return ARCHIVE_FATAL;
399           }
400           a->client.dataset[iindex].data = client_data;
401           a->client.dataset[iindex].begin_position = -1;
402           a->client.dataset[iindex].total_size = -1;
403           return ARCHIVE_OK;
404 }
405 
406 int
archive_read_add_callback_data(struct archive * _a,void * client_data,unsigned int iindex)407 archive_read_add_callback_data(struct archive *_a, void *client_data,
408     unsigned int iindex)
409 {
410           struct archive_read *a = (struct archive_read *)_a;
411           void *p;
412           unsigned int i;
413 
414           archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
415               "archive_read_add_callback_data");
416           if (iindex > a->client.nodes) {
417                     archive_set_error(&a->archive, EINVAL,
418                               "Invalid index specified.");
419                     return ARCHIVE_FATAL;
420           }
421           p = realloc(a->client.dataset, sizeof(*a->client.dataset)
422                     * (++(a->client.nodes)));
423           if (p == NULL) {
424                     archive_set_error(&a->archive, ENOMEM,
425                               "No memory.");
426                     return ARCHIVE_FATAL;
427           }
428           a->client.dataset = (struct archive_read_data_node *)p;
429           for (i = a->client.nodes - 1; i > iindex; i--) {
430                     a->client.dataset[i].data = a->client.dataset[i-1].data;
431                     a->client.dataset[i].begin_position = -1;
432                     a->client.dataset[i].total_size = -1;
433           }
434           a->client.dataset[iindex].data = client_data;
435           a->client.dataset[iindex].begin_position = -1;
436           a->client.dataset[iindex].total_size = -1;
437           return ARCHIVE_OK;
438 }
439 
440 int
archive_read_append_callback_data(struct archive * _a,void * client_data)441 archive_read_append_callback_data(struct archive *_a, void *client_data)
442 {
443           struct archive_read *a = (struct archive_read *)_a;
444           return archive_read_add_callback_data(_a, client_data, a->client.nodes);
445 }
446 
447 int
archive_read_prepend_callback_data(struct archive * _a,void * client_data)448 archive_read_prepend_callback_data(struct archive *_a, void *client_data)
449 {
450           return archive_read_add_callback_data(_a, client_data, 0);
451 }
452 
453 static const struct archive_read_filter_vtable
454 none_reader_vtable = {
455           .read = client_read_proxy,
456           .close = client_close_proxy,
457 };
458 
459 int
archive_read_open1(struct archive * _a)460 archive_read_open1(struct archive *_a)
461 {
462           struct archive_read *a = (struct archive_read *)_a;
463           struct archive_read_filter *filter, *tmp;
464           int slot, e = ARCHIVE_OK;
465 
466           archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
467               "archive_read_open");
468           archive_clear_error(&a->archive);
469 
470           if (a->client.reader == NULL) {
471                     archive_set_error(&a->archive, EINVAL,
472                         "No reader function provided to archive_read_open");
473                     a->archive.state = ARCHIVE_STATE_FATAL;
474                     return (ARCHIVE_FATAL);
475           }
476 
477           /* Open data source. */
478           if (a->client.opener != NULL) {
479                     e = (a->client.opener)(&a->archive, a->client.dataset[0].data);
480                     if (e != 0) {
481                               /* If the open failed, call the closer to clean up. */
482                               read_client_close_proxy(a);
483                               return (e);
484                     }
485           }
486 
487           filter = calloc(1, sizeof(*filter));
488           if (filter == NULL)
489                     return (ARCHIVE_FATAL);
490           filter->bidder = NULL;
491           filter->upstream = NULL;
492           filter->archive = a;
493           filter->data = a->client.dataset[0].data;
494           filter->vtable = &none_reader_vtable;
495           filter->name = "none";
496           filter->code = ARCHIVE_FILTER_NONE;
497           filter->can_skip = 1;
498           filter->can_seek = 1;
499 
500           a->client.dataset[0].begin_position = 0;
501           if (!a->filter || !a->bypass_filter_bidding)
502           {
503                     a->filter = filter;
504                     /* Build out the input pipeline. */
505                     e = choose_filters(a);
506                     if (e < ARCHIVE_WARN) {
507                               a->archive.state = ARCHIVE_STATE_FATAL;
508                               return (ARCHIVE_FATAL);
509                     }
510           }
511           else
512           {
513                     /* Need to add "NONE" type filter at the end of the filter chain */
514                     tmp = a->filter;
515                     while (tmp->upstream)
516                               tmp = tmp->upstream;
517                     tmp->upstream = filter;
518           }
519 
520           if (!a->format)
521           {
522                     slot = choose_format(a);
523                     if (slot < 0) {
524                               close_filters(a);
525                               a->archive.state = ARCHIVE_STATE_FATAL;
526                               return (ARCHIVE_FATAL);
527                     }
528                     a->format = &(a->formats[slot]);
529           }
530 
531           a->archive.state = ARCHIVE_STATE_HEADER;
532 
533           /* Ensure libarchive starts from the first node in a multivolume set */
534           client_switch_proxy(a->filter, 0);
535           return (e);
536 }
537 
538 /*
539  * Allow each registered stream transform to bid on whether
540  * it wants to handle this stream.  Repeat until we've finished
541  * building the pipeline.
542  */
543 
544 /* We won't build a filter pipeline with more stages than this. */
545 #define MAX_NUMBER_FILTERS 25
546 
547 static int
choose_filters(struct archive_read * a)548 choose_filters(struct archive_read *a)
549 {
550           int number_bidders, i, bid, best_bid, number_filters;
551           struct archive_read_filter_bidder *bidder, *best_bidder;
552           struct archive_read_filter *filter;
553           ssize_t avail;
554           int r;
555 
556           for (number_filters = 0; number_filters < MAX_NUMBER_FILTERS; ++number_filters) {
557                     number_bidders = sizeof(a->bidders) / sizeof(a->bidders[0]);
558 
559                     best_bid = 0;
560                     best_bidder = NULL;
561 
562                     bidder = a->bidders;
563                     for (i = 0; i < number_bidders; i++, bidder++) {
564                               if (bidder->vtable == NULL)
565                                         continue;
566                               bid = (bidder->vtable->bid)(bidder, a->filter);
567                               if (bid > best_bid) {
568                                         best_bid = bid;
569                                         best_bidder = bidder;
570                               }
571                     }
572 
573                     /* If no bidder, we're done. */
574                     if (best_bidder == NULL) {
575                               /* Verify the filter by asking it for some data. */
576                               __archive_read_filter_ahead(a->filter, 1, &avail);
577                               if (avail < 0) {
578                                         __archive_read_free_filters(a);
579                                         return (ARCHIVE_FATAL);
580                               }
581                               return (ARCHIVE_OK);
582                     }
583 
584                     filter
585                         = calloc(1, sizeof(*filter));
586                     if (filter == NULL)
587                               return (ARCHIVE_FATAL);
588                     filter->bidder = best_bidder;
589                     filter->archive = a;
590                     filter->upstream = a->filter;
591                     a->filter = filter;
592                     r = (best_bidder->vtable->init)(a->filter);
593                     if (r != ARCHIVE_OK) {
594                               __archive_read_free_filters(a);
595                               return (ARCHIVE_FATAL);
596                     }
597           }
598           archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
599               "Input requires too many filters for decoding");
600           return (ARCHIVE_FATAL);
601 }
602 
603 int
__archive_read_header(struct archive_read * a,struct archive_entry * entry)604 __archive_read_header(struct archive_read *a, struct archive_entry *entry)
605 {
606           if (!a->filter->vtable->read_header)
607                     return (ARCHIVE_OK);
608           return a->filter->vtable->read_header(a->filter, entry);
609 }
610 
611 /*
612  * Read header of next entry.
613  */
614 static int
_archive_read_next_header2(struct archive * _a,struct archive_entry * entry)615 _archive_read_next_header2(struct archive *_a, struct archive_entry *entry)
616 {
617           struct archive_read *a = (struct archive_read *)_a;
618           int r1 = ARCHIVE_OK, r2;
619 
620           archive_check_magic(_a, ARCHIVE_READ_MAGIC,
621               ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA,
622               "archive_read_next_header");
623 
624           archive_entry_clear(entry);
625           archive_clear_error(&a->archive);
626 
627           /*
628            * If client didn't consume entire data, skip any remainder
629            * (This is especially important for GNU incremental directories.)
630            */
631           if (a->archive.state == ARCHIVE_STATE_DATA) {
632                     r1 = archive_read_data_skip(&a->archive);
633                     if (r1 == ARCHIVE_EOF)
634                               archive_set_error(&a->archive, EIO,
635                                   "Premature end-of-file.");
636                     if (r1 == ARCHIVE_EOF || r1 == ARCHIVE_FATAL) {
637                               a->archive.state = ARCHIVE_STATE_FATAL;
638                               return (ARCHIVE_FATAL);
639                     }
640           }
641 
642           /* Record start-of-header offset in uncompressed stream. */
643           a->header_position = a->filter->position;
644 
645           ++_a->file_count;
646           r2 = (a->format->read_header)(a, entry);
647 
648           /*
649            * EOF and FATAL are persistent at this layer.  By
650            * modifying the state, we guarantee that future calls to
651            * read a header or read data will fail.
652            */
653           switch (r2) {
654           case ARCHIVE_EOF:
655                     a->archive.state = ARCHIVE_STATE_EOF;
656                     --_a->file_count;/* Revert a file counter. */
657                     break;
658           case ARCHIVE_OK:
659                     a->archive.state = ARCHIVE_STATE_DATA;
660                     break;
661           case ARCHIVE_WARN:
662                     a->archive.state = ARCHIVE_STATE_DATA;
663                     break;
664           case ARCHIVE_RETRY:
665                     break;
666           case ARCHIVE_FATAL:
667                     a->archive.state = ARCHIVE_STATE_FATAL;
668                     break;
669           }
670 
671           __archive_reset_read_data(&a->archive);
672 
673           a->data_start_node = a->client.cursor;
674           /* EOF always wins; otherwise return the worst error. */
675           return (r2 < r1 || r2 == ARCHIVE_EOF) ? r2 : r1;
676 }
677 
678 static int
_archive_read_next_header(struct archive * _a,struct archive_entry ** entryp)679 _archive_read_next_header(struct archive *_a, struct archive_entry **entryp)
680 {
681           int ret;
682           struct archive_read *a = (struct archive_read *)_a;
683           *entryp = NULL;
684           ret = _archive_read_next_header2(_a, a->entry);
685           *entryp = a->entry;
686           return ret;
687 }
688 
689 /*
690  * Allow each registered format to bid on whether it wants to handle
691  * the next entry.  Return index of winning bidder.
692  */
693 static int
choose_format(struct archive_read * a)694 choose_format(struct archive_read *a)
695 {
696           int slots;
697           int i;
698           int bid, best_bid;
699           int best_bid_slot;
700 
701           slots = sizeof(a->formats) / sizeof(a->formats[0]);
702           best_bid = -1;
703           best_bid_slot = -1;
704 
705           /* Set up a->format for convenience of bidders. */
706           a->format = &(a->formats[0]);
707           for (i = 0; i < slots; i++, a->format++) {
708                     if (a->format->bid) {
709                               bid = (a->format->bid)(a, best_bid);
710                               if (bid == ARCHIVE_FATAL)
711                                         return (ARCHIVE_FATAL);
712                               if (a->filter->position != 0)
713                                         __archive_read_seek(a, 0, SEEK_SET);
714                               if ((bid > best_bid) || (best_bid_slot < 0)) {
715                                         best_bid = bid;
716                                         best_bid_slot = i;
717                               }
718                     }
719           }
720 
721           /*
722            * There were no bidders; this is a serious programmer error
723            * and demands a quick and definitive abort.
724            */
725           if (best_bid_slot < 0) {
726                     archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
727                         "No formats registered");
728                     return (ARCHIVE_FATAL);
729           }
730 
731           /*
732            * There were bidders, but no non-zero bids; this means we
733            * can't support this stream.
734            */
735           if (best_bid < 1) {
736                     archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
737                         "Unrecognized archive format");
738                     return (ARCHIVE_FATAL);
739           }
740 
741           return (best_bid_slot);
742 }
743 
744 /*
745  * Return the file offset (within the uncompressed data stream) where
746  * the last header started.
747  */
748 la_int64_t
archive_read_header_position(struct archive * _a)749 archive_read_header_position(struct archive *_a)
750 {
751           struct archive_read *a = (struct archive_read *)_a;
752           archive_check_magic(_a, ARCHIVE_READ_MAGIC,
753               ARCHIVE_STATE_ANY, "archive_read_header_position");
754           return (a->header_position);
755 }
756 
757 /*
758  * Returns 1 if the archive contains at least one encrypted entry.
759  * If the archive format not support encryption at all
760  * ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED is returned.
761  * If for any other reason (e.g. not enough data read so far)
762  * we cannot say whether there are encrypted entries, then
763  * ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW is returned.
764  * In general, this function will return values below zero when the
765  * reader is uncertain or totally incapable of encryption support.
766  * When this function returns 0 you can be sure that the reader
767  * supports encryption detection but no encrypted entries have
768  * been found yet.
769  *
770  * NOTE: If the metadata/header of an archive is also encrypted, you
771  * cannot rely on the number of encrypted entries. That is why this
772  * function does not return the number of encrypted entries but#
773  * just shows that there are some.
774  */
775 int
archive_read_has_encrypted_entries(struct archive * _a)776 archive_read_has_encrypted_entries(struct archive *_a)
777 {
778           struct archive_read *a = (struct archive_read *)_a;
779           int format_supports_encryption = archive_read_format_capabilities(_a)
780                               & (ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_DATA | ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_METADATA);
781 
782           if (!_a || !format_supports_encryption) {
783                     /* Format in general doesn't support encryption */
784                     return ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED;
785           }
786 
787           /* A reader potentially has read enough data now. */
788           if (a->format && a->format->has_encrypted_entries) {
789                     return (a->format->has_encrypted_entries)(a);
790           }
791 
792           /* For any other reason we cannot say how many entries are there. */
793           return ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW;
794 }
795 
796 /*
797  * Returns a bitmask of capabilities that are supported by the archive format reader.
798  * If the reader has no special capabilities, ARCHIVE_READ_FORMAT_CAPS_NONE is returned.
799  */
800 int
archive_read_format_capabilities(struct archive * _a)801 archive_read_format_capabilities(struct archive *_a)
802 {
803           struct archive_read *a = (struct archive_read *)_a;
804           if (a && a->format && a->format->format_capabilties) {
805                     return (a->format->format_capabilties)(a);
806           }
807           return ARCHIVE_READ_FORMAT_CAPS_NONE;
808 }
809 
810 /*
811  * Read data from an archive entry, using a read(2)-style interface.
812  * This is a convenience routine that just calls
813  * archive_read_data_block and copies the results into the client
814  * buffer, filling any gaps with zero bytes.  Clients using this
815  * API can be completely ignorant of sparse-file issues; sparse files
816  * will simply be padded with nulls.
817  *
818  * DO NOT intermingle calls to this function and archive_read_data_block
819  * to read a single entry body.
820  */
821 la_ssize_t
archive_read_data(struct archive * _a,void * buff,size_t s)822 archive_read_data(struct archive *_a, void *buff, size_t s)
823 {
824           struct archive *a = (struct archive *)_a;
825           char      *dest;
826           const void *read_buf;
827           size_t     bytes_read;
828           size_t     len;
829           int        r;
830 
831           bytes_read = 0;
832           dest = (char *)buff;
833 
834           while (s > 0) {
835                     if (a->read_data_offset == a->read_data_output_offset &&
836                         a->read_data_remaining == 0) {
837                               read_buf = a->read_data_block;
838                               a->read_data_is_posix_read = 1;
839                               a->read_data_requested = s;
840                               r = archive_read_data_block(a, &read_buf,
841                                   &a->read_data_remaining, &a->read_data_offset);
842                               a->read_data_block = read_buf;
843                               if (r == ARCHIVE_EOF)
844                                         return (bytes_read);
845                               /*
846                                * Error codes are all negative, so the status
847                                * return here cannot be confused with a valid
848                                * byte count.  (ARCHIVE_OK is zero.)
849                                */
850                               if (r < ARCHIVE_OK)
851                                         return (r);
852                     }
853 
854                     if (a->read_data_offset < a->read_data_output_offset) {
855                               archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
856                                   "Encountered out-of-order sparse blocks");
857                               return (ARCHIVE_RETRY);
858                     }
859 
860                     /* Compute the amount of zero padding needed. */
861                     if (a->read_data_output_offset + (int64_t)s <
862                         a->read_data_offset) {
863                               len = s;
864                     } else if (a->read_data_output_offset <
865                         a->read_data_offset) {
866                               len = (size_t)(a->read_data_offset -
867                                   a->read_data_output_offset);
868                     } else
869                               len = 0;
870 
871                     /* Add zeroes. */
872                     memset(dest, 0, len);
873                     s -= len;
874                     a->read_data_output_offset += len;
875                     dest += len;
876                     bytes_read += len;
877 
878                     /* Copy data if there is any space left. */
879                     if (s > 0) {
880                               len = a->read_data_remaining;
881                               if (len > s)
882                                         len = s;
883                               if (len) {
884                                         memcpy(dest, a->read_data_block, len);
885                                         s -= len;
886                                         a->read_data_block += len;
887                                         a->read_data_remaining -= len;
888                                         a->read_data_output_offset += len;
889                                         a->read_data_offset += len;
890                                         dest += len;
891                                         bytes_read += len;
892                               }
893                     }
894           }
895           a->read_data_is_posix_read = 0;
896           a->read_data_requested = 0;
897           return (bytes_read);
898 }
899 
900 /*
901  * Reset the read_data_* variables, used for starting a new entry.
902  */
__archive_reset_read_data(struct archive * a)903 void __archive_reset_read_data(struct archive * a)
904 {
905           a->read_data_output_offset = 0;
906           a->read_data_remaining = 0;
907           a->read_data_is_posix_read = 0;
908           a->read_data_requested = 0;
909 
910    /* extra resets, from rar.c */
911    a->read_data_block = NULL;
912    a->read_data_offset = 0;
913 }
914 
915 /*
916  * Skip over all remaining data in this entry.
917  */
918 int
archive_read_data_skip(struct archive * _a)919 archive_read_data_skip(struct archive *_a)
920 {
921           struct archive_read *a = (struct archive_read *)_a;
922           int r;
923           const void *buff;
924           size_t size;
925           int64_t offset;
926 
927           archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA,
928               "archive_read_data_skip");
929 
930           if (a->format->read_data_skip != NULL)
931                     r = (a->format->read_data_skip)(a);
932           else {
933                     while ((r = archive_read_data_block(&a->archive,
934                                   &buff, &size, &offset))
935                         == ARCHIVE_OK)
936                               ;
937           }
938 
939           if (r == ARCHIVE_EOF)
940                     r = ARCHIVE_OK;
941 
942           a->archive.state = ARCHIVE_STATE_HEADER;
943           return (r);
944 }
945 
946 la_int64_t
archive_seek_data(struct archive * _a,int64_t offset,int whence)947 archive_seek_data(struct archive *_a, int64_t offset, int whence)
948 {
949           struct archive_read *a = (struct archive_read *)_a;
950           archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA,
951               "archive_seek_data_block");
952 
953           if (a->format->seek_data == NULL) {
954                     archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
955                         "Internal error: "
956                         "No format_seek_data_block function registered");
957                     return (ARCHIVE_FATAL);
958           }
959 
960           return (a->format->seek_data)(a, offset, whence);
961 }
962 
963 /*
964  * Read the next block of entry data from the archive.
965  * This is a zero-copy interface; the client receives a pointer,
966  * size, and file offset of the next available block of data.
967  *
968  * Returns ARCHIVE_OK if the operation is successful, ARCHIVE_EOF if
969  * the end of entry is encountered.
970  */
971 static int
_archive_read_data_block(struct archive * _a,const void ** buff,size_t * size,int64_t * offset)972 _archive_read_data_block(struct archive *_a,
973     const void **buff, size_t *size, int64_t *offset)
974 {
975           struct archive_read *a = (struct archive_read *)_a;
976           archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA,
977               "archive_read_data_block");
978 
979           if (a->format->read_data == NULL) {
980                     archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
981                         "Internal error: "
982                         "No format->read_data function registered");
983                     return (ARCHIVE_FATAL);
984           }
985 
986           return (a->format->read_data)(a, buff, size, offset);
987 }
988 
989 static int
close_filters(struct archive_read * a)990 close_filters(struct archive_read *a)
991 {
992           struct archive_read_filter *f = a->filter;
993           int r = ARCHIVE_OK;
994           /* Close each filter in the pipeline. */
995           while (f != NULL) {
996                     struct archive_read_filter *t = f->upstream;
997                     if (!f->closed && f->vtable != NULL) {
998                               int r1 = (f->vtable->close)(f);
999                               f->closed = 1;
1000                               if (r1 < r)
1001                                         r = r1;
1002                     }
1003                     free(f->buffer);
1004                     f->buffer = NULL;
1005                     f = t;
1006           }
1007           return r;
1008 }
1009 
1010 void
__archive_read_free_filters(struct archive_read * a)1011 __archive_read_free_filters(struct archive_read *a)
1012 {
1013           /* Make sure filters are closed and their buffers are freed */
1014           close_filters(a);
1015 
1016           while (a->filter != NULL) {
1017                     struct archive_read_filter *t = a->filter->upstream;
1018                     free(a->filter);
1019                     a->filter = t;
1020           }
1021 }
1022 
1023 /*
1024  * return the count of # of filters in use
1025  */
1026 static int
_archive_filter_count(struct archive * _a)1027 _archive_filter_count(struct archive *_a)
1028 {
1029           struct archive_read *a = (struct archive_read *)_a;
1030           struct archive_read_filter *p = a->filter;
1031           int count = 0;
1032           while(p) {
1033                     count++;
1034                     p = p->upstream;
1035           }
1036           return count;
1037 }
1038 
1039 /*
1040  * Close the file and all I/O.
1041  */
1042 static int
_archive_read_close(struct archive * _a)1043 _archive_read_close(struct archive *_a)
1044 {
1045           struct archive_read *a = (struct archive_read *)_a;
1046           int r = ARCHIVE_OK, r1 = ARCHIVE_OK;
1047 
1048           archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC,
1049               ARCHIVE_STATE_ANY | ARCHIVE_STATE_FATAL, "archive_read_close");
1050           if (a->archive.state == ARCHIVE_STATE_CLOSED)
1051                     return (ARCHIVE_OK);
1052           archive_clear_error(&a->archive);
1053           a->archive.state = ARCHIVE_STATE_CLOSED;
1054 
1055           /* TODO: Clean up the formatters. */
1056 
1057           /* Release the filter objects. */
1058           r1 = close_filters(a);
1059           if (r1 < r)
1060                     r = r1;
1061 
1062           return (r);
1063 }
1064 
1065 /*
1066  * Release memory and other resources.
1067  */
1068 static int
_archive_read_free(struct archive * _a)1069 _archive_read_free(struct archive *_a)
1070 {
1071           struct archive_read *a = (struct archive_read *)_a;
1072           struct archive_read_passphrase *p;
1073           int i, n;
1074           int slots;
1075           int r = ARCHIVE_OK;
1076 
1077           if (_a == NULL)
1078                     return (ARCHIVE_OK);
1079           archive_check_magic(_a, ARCHIVE_READ_MAGIC,
1080               ARCHIVE_STATE_ANY | ARCHIVE_STATE_FATAL, "archive_read_free");
1081           if (a->archive.state != ARCHIVE_STATE_CLOSED
1082               && a->archive.state != ARCHIVE_STATE_FATAL)
1083                     r = archive_read_close(&a->archive);
1084 
1085           /* Call cleanup functions registered by optional components. */
1086           if (a->cleanup_archive_extract != NULL)
1087                     r = (a->cleanup_archive_extract)(a);
1088 
1089           /* Cleanup format-specific data. */
1090           slots = sizeof(a->formats) / sizeof(a->formats[0]);
1091           for (i = 0; i < slots; i++) {
1092                     a->format = &(a->formats[i]);
1093                     if (a->formats[i].cleanup)
1094                               (a->formats[i].cleanup)(a);
1095           }
1096 
1097           /* Free the filters */
1098           __archive_read_free_filters(a);
1099 
1100           /* Release the bidder objects. */
1101           n = sizeof(a->bidders)/sizeof(a->bidders[0]);
1102           for (i = 0; i < n; i++) {
1103                     if (a->bidders[i].vtable == NULL ||
1104                         a->bidders[i].vtable->free == NULL)
1105                               continue;
1106                     (a->bidders[i].vtable->free)(&a->bidders[i]);
1107           }
1108 
1109           /* Release passphrase list. */
1110           p = a->passphrases.first;
1111           while (p != NULL) {
1112                     struct archive_read_passphrase *np = p->next;
1113 
1114                     /* A passphrase should be cleaned. */
1115                     memset(p->passphrase, 0, strlen(p->passphrase));
1116                     free(p->passphrase);
1117                     free(p);
1118                     p = np;
1119           }
1120 
1121           archive_string_free(&a->archive.error_string);
1122           archive_entry_free(a->entry);
1123           a->archive.magic = 0;
1124           __archive_clean(&a->archive);
1125           free(a->client.dataset);
1126           free(a);
1127           return (r);
1128 }
1129 
1130 static struct archive_read_filter *
get_filter(struct archive * _a,int n)1131 get_filter(struct archive *_a, int n)
1132 {
1133           struct archive_read *a = (struct archive_read *)_a;
1134           struct archive_read_filter *f = a->filter;
1135           /* We use n == -1 for 'the last filter', which is always the
1136            * client proxy. */
1137           if (n == -1 && f != NULL) {
1138                     struct archive_read_filter *last = f;
1139                     f = f->upstream;
1140                     while (f != NULL) {
1141                               last = f;
1142                               f = f->upstream;
1143                     }
1144                     return (last);
1145           }
1146           if (n < 0)
1147                     return NULL;
1148           while (n > 0 && f != NULL) {
1149                     f = f->upstream;
1150                     --n;
1151           }
1152           return (f);
1153 }
1154 
1155 static int
_archive_filter_code(struct archive * _a,int n)1156 _archive_filter_code(struct archive *_a, int n)
1157 {
1158           struct archive_read_filter *f = get_filter(_a, n);
1159           return f == NULL ? -1 : f->code;
1160 }
1161 
1162 static const char *
_archive_filter_name(struct archive * _a,int n)1163 _archive_filter_name(struct archive *_a, int n)
1164 {
1165           struct archive_read_filter *f = get_filter(_a, n);
1166           return f != NULL ? f->name : NULL;
1167 }
1168 
1169 static int64_t
_archive_filter_bytes(struct archive * _a,int n)1170 _archive_filter_bytes(struct archive *_a, int n)
1171 {
1172           struct archive_read_filter *f = get_filter(_a, n);
1173           return f == NULL ? -1 : f->position;
1174 }
1175 
1176 /*
1177  * Used internally by read format handlers to register their bid and
1178  * initialization functions.
1179  */
1180 int
__archive_read_register_format(struct archive_read * a,void * format_data,const char * name,int (* bid)(struct archive_read *,int),int (* options)(struct archive_read *,const char *,const char *),int (* read_header)(struct archive_read *,struct archive_entry *),int (* read_data)(struct archive_read *,const void **,size_t *,int64_t *),int (* read_data_skip)(struct archive_read *),int64_t (* seek_data)(struct archive_read *,int64_t,int),int (* cleanup)(struct archive_read *),int (* format_capabilities)(struct archive_read *),int (* has_encrypted_entries)(struct archive_read *))1181 __archive_read_register_format(struct archive_read *a,
1182     void *format_data,
1183     const char *name,
1184     int (*bid)(struct archive_read *, int),
1185     int (*options)(struct archive_read *, const char *, const char *),
1186     int (*read_header)(struct archive_read *, struct archive_entry *),
1187     int (*read_data)(struct archive_read *, const void **, size_t *, int64_t *),
1188     int (*read_data_skip)(struct archive_read *),
1189     int64_t (*seek_data)(struct archive_read *, int64_t, int),
1190     int (*cleanup)(struct archive_read *),
1191     int (*format_capabilities)(struct archive_read *),
1192     int (*has_encrypted_entries)(struct archive_read *))
1193 {
1194           int i, number_slots;
1195 
1196           archive_check_magic(&a->archive,
1197               ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
1198               "__archive_read_register_format");
1199 
1200           number_slots = sizeof(a->formats) / sizeof(a->formats[0]);
1201 
1202           for (i = 0; i < number_slots; i++) {
1203                     if (a->formats[i].bid == bid)
1204                               return (ARCHIVE_WARN); /* We've already installed */
1205                     if (a->formats[i].bid == NULL) {
1206                               a->formats[i].bid = bid;
1207                               a->formats[i].options = options;
1208                               a->formats[i].read_header = read_header;
1209                               a->formats[i].read_data = read_data;
1210                               a->formats[i].read_data_skip = read_data_skip;
1211                               a->formats[i].seek_data = seek_data;
1212                               a->formats[i].cleanup = cleanup;
1213                               a->formats[i].data = format_data;
1214                               a->formats[i].name = name;
1215                               a->formats[i].format_capabilties = format_capabilities;
1216                               a->formats[i].has_encrypted_entries = has_encrypted_entries;
1217                               return (ARCHIVE_OK);
1218                     }
1219           }
1220 
1221           archive_set_error(&a->archive, ENOMEM,
1222               "Not enough slots for format registration");
1223           return (ARCHIVE_FATAL);
1224 }
1225 
1226 /*
1227  * Used internally by decompression routines to register their bid and
1228  * initialization functions.
1229  */
1230 int
__archive_read_register_bidder(struct archive_read * a,void * bidder_data,const char * name,const struct archive_read_filter_bidder_vtable * vtable)1231 __archive_read_register_bidder(struct archive_read *a,
1232           void *bidder_data,
1233           const char *name,
1234           const struct archive_read_filter_bidder_vtable *vtable)
1235 {
1236           struct archive_read_filter_bidder *bidder;
1237           int i, number_slots;
1238 
1239           archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC,
1240               ARCHIVE_STATE_NEW, "__archive_read_register_bidder");
1241 
1242           number_slots = sizeof(a->bidders) / sizeof(a->bidders[0]);
1243 
1244           for (i = 0; i < number_slots; i++) {
1245                     if (a->bidders[i].vtable != NULL)
1246                               continue;
1247                     memset(a->bidders + i, 0, sizeof(a->bidders[0]));
1248                     bidder = (a->bidders + i);
1249                     bidder->data = bidder_data;
1250                     bidder->name = name;
1251                     bidder->vtable = vtable;
1252                     if (bidder->vtable->bid == NULL || bidder->vtable->init == NULL) {
1253                               archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1254                                                   "Internal error: "
1255                                                   "no bid/init for filter bidder");
1256                               return (ARCHIVE_FATAL);
1257                     }
1258 
1259                     return (ARCHIVE_OK);
1260           }
1261 
1262           archive_set_error(&a->archive, ENOMEM,
1263               "Not enough slots for filter registration");
1264           return (ARCHIVE_FATAL);
1265 }
1266 
1267 /*
1268  * The next section implements the peek/consume internal I/O
1269  * system used by archive readers.  This system allows simple
1270  * read-ahead for consumers while preserving zero-copy operation
1271  * most of the time.
1272  *
1273  * The two key operations:
1274  *  * The read-ahead function returns a pointer to a block of data
1275  *    that satisfies a minimum request.
1276  *  * The consume function advances the file pointer.
1277  *
1278  * In the ideal case, filters generate blocks of data
1279  * and __archive_read_ahead() just returns pointers directly into
1280  * those blocks.  Then __archive_read_consume() just bumps those
1281  * pointers.  Only if your request would span blocks does the I/O
1282  * layer use a copy buffer to provide you with a contiguous block of
1283  * data.
1284  *
1285  * A couple of useful idioms:
1286  *  * "I just want some data."  Ask for 1 byte and pay attention to
1287  *    the "number of bytes available" from __archive_read_ahead().
1288  *    Consume whatever you actually use.
1289  *  * "I want to output a large block of data."  As above, ask for 1 byte,
1290  *    emit all that's available (up to whatever limit you have), consume
1291  *    it all, then repeat until you're done.  This effectively means that
1292  *    you're passing along the blocks that came from your provider.
1293  *  * "I want to peek ahead by a large amount."  Ask for 4k or so, then
1294  *    double and repeat until you get an error or have enough.  Note
1295  *    that the I/O layer will likely end up expanding its copy buffer
1296  *    to fit your request, so use this technique cautiously.  This
1297  *    technique is used, for example, by some of the format tasting
1298  *    code that has uncertain look-ahead needs.
1299  */
1300 
1301 /*
1302  * Looks ahead in the input stream:
1303  *  * If 'avail' pointer is provided, that returns number of bytes available
1304  *    in the current buffer, which may be much larger than requested.
1305  *  * If end-of-file, *avail gets set to zero.
1306  *  * If error, *avail gets error code.
1307  *  * If request can be met, returns pointer to data.
1308  *  * If minimum request cannot be met, returns NULL.
1309  *
1310  * Note: If you just want "some data", ask for 1 byte and pay attention
1311  * to *avail, which will have the actual amount available.  If you
1312  * know exactly how many bytes you need, just ask for that and treat
1313  * a NULL return as an error.
1314  *
1315  * Important:  This does NOT move the file pointer.  See
1316  * __archive_read_consume() below.
1317  */
1318 const void *
__archive_read_ahead(struct archive_read * a,size_t min,ssize_t * avail)1319 __archive_read_ahead(struct archive_read *a, size_t min, ssize_t *avail)
1320 {
1321           return (__archive_read_filter_ahead(a->filter, min, avail));
1322 }
1323 
1324 const void *
__archive_read_filter_ahead(struct archive_read_filter * filter,size_t min,ssize_t * avail)1325 __archive_read_filter_ahead(struct archive_read_filter *filter,
1326     size_t min, ssize_t *avail)
1327 {
1328           ssize_t bytes_read;
1329           size_t tocopy;
1330 
1331           if (filter->fatal) {
1332                     if (avail)
1333                               *avail = ARCHIVE_FATAL;
1334                     return (NULL);
1335           }
1336 
1337           /*
1338            * Keep pulling more data until we can satisfy the request.
1339            */
1340           for (;;) {
1341 
1342                     /*
1343                      * If we can satisfy from the copy buffer (and the
1344                      * copy buffer isn't empty), we're done.  In particular,
1345                      * note that min == 0 is a perfectly well-defined
1346                      * request.
1347                      */
1348                     if (filter->avail >= min && filter->avail > 0) {
1349                               if (avail != NULL)
1350                                         *avail = filter->avail;
1351                               return (filter->next);
1352                     }
1353 
1354                     /*
1355                      * We can satisfy directly from client buffer if everything
1356                      * currently in the copy buffer is still in the client buffer.
1357                      */
1358                     if (filter->client_total >= filter->client_avail + filter->avail
1359                         && filter->client_avail + filter->avail >= min) {
1360                               /* "Roll back" to client buffer. */
1361                               filter->client_avail += filter->avail;
1362                               filter->client_next -= filter->avail;
1363                               /* Copy buffer is now empty. */
1364                               filter->avail = 0;
1365                               filter->next = filter->buffer;
1366                               /* Return data from client buffer. */
1367                               if (avail != NULL)
1368                                         *avail = filter->client_avail;
1369                               return (filter->client_next);
1370                     }
1371 
1372                     /* Move data forward in copy buffer if necessary. */
1373                     if (filter->next > filter->buffer &&
1374                         filter->next + min > filter->buffer + filter->buffer_size) {
1375                               if (filter->avail > 0)
1376                                         memmove(filter->buffer, filter->next,
1377                                             filter->avail);
1378                               filter->next = filter->buffer;
1379                     }
1380 
1381                     /* If we've used up the client data, get more. */
1382                     if (filter->client_avail <= 0) {
1383                               if (filter->end_of_file) {
1384                                         if (avail != NULL)
1385                                                   *avail = filter->avail;
1386                                         return (NULL);
1387                               }
1388                               bytes_read = (filter->vtable->read)(filter,
1389                                   &filter->client_buff);
1390                               if (bytes_read < 0) {                   /* Read error. */
1391                                         filter->client_total = filter->client_avail = 0;
1392                                         filter->client_next =
1393                                             filter->client_buff = NULL;
1394                                         filter->fatal = 1;
1395                                         if (avail != NULL)
1396                                                   *avail = ARCHIVE_FATAL;
1397                                         return (NULL);
1398                               }
1399                               if (bytes_read == 0) {
1400                                         /* Check for another client object first */
1401                                         if (filter->archive->client.cursor !=
1402                                               filter->archive->client.nodes - 1) {
1403                                                   if (client_switch_proxy(filter,
1404                                                       filter->archive->client.cursor + 1)
1405                                                       == ARCHIVE_OK)
1406                                                             continue;
1407                                         }
1408                                         /* Premature end-of-file. */
1409                                         filter->client_total = filter->client_avail = 0;
1410                                         filter->client_next =
1411                                             filter->client_buff = NULL;
1412                                         filter->end_of_file = 1;
1413                                         /* Return whatever we do have. */
1414                                         if (avail != NULL)
1415                                                   *avail = filter->avail;
1416                                         return (NULL);
1417                               }
1418                               filter->client_total = bytes_read;
1419                               filter->client_avail = filter->client_total;
1420                               filter->client_next = filter->client_buff;
1421                     } else {
1422                               /*
1423                                * We can't satisfy the request from the copy
1424                                * buffer or the existing client data, so we
1425                                * need to copy more client data over to the
1426                                * copy buffer.
1427                                */
1428 
1429                               /* Ensure the buffer is big enough. */
1430                               if (min > filter->buffer_size) {
1431                                         size_t s, t;
1432                                         char *p;
1433 
1434                                         /* Double the buffer; watch for overflow. */
1435                                         s = t = filter->buffer_size;
1436                                         if (s == 0)
1437                                                   s = min;
1438                                         while (s < min) {
1439                                                   t *= 2;
1440                                                   if (t <= s) { /* Integer overflow! */
1441                                                             archive_set_error(
1442                                                                 &filter->archive->archive,
1443                                                                 ENOMEM,
1444                                                                 "Unable to allocate copy"
1445                                                                 " buffer");
1446                                                             filter->fatal = 1;
1447                                                             if (avail != NULL)
1448                                                                       *avail = ARCHIVE_FATAL;
1449                                                             return (NULL);
1450                                                   }
1451                                                   s = t;
1452                                         }
1453                                         /* Now s >= min, so allocate a new buffer. */
1454                                         p = malloc(s);
1455                                         if (p == NULL) {
1456                                                   archive_set_error(
1457                                                             &filter->archive->archive,
1458                                                             ENOMEM,
1459                                                       "Unable to allocate copy buffer");
1460                                                   filter->fatal = 1;
1461                                                   if (avail != NULL)
1462                                                             *avail = ARCHIVE_FATAL;
1463                                                   return (NULL);
1464                                         }
1465                                         /* Move data into newly-enlarged buffer. */
1466                                         if (filter->avail > 0)
1467                                                   memmove(p, filter->next, filter->avail);
1468                                         free(filter->buffer);
1469                                         filter->next = filter->buffer = p;
1470                                         filter->buffer_size = s;
1471                               }
1472 
1473                               /* We can add client data to copy buffer. */
1474                               /* First estimate: copy to fill rest of buffer. */
1475                               tocopy = (filter->buffer + filter->buffer_size)
1476                                   - (filter->next + filter->avail);
1477                               /* Don't waste time buffering more than we need to. */
1478                               if (tocopy + filter->avail > min)
1479                                         tocopy = min - filter->avail;
1480                               /* Don't copy more than is available. */
1481                               if (tocopy > filter->client_avail)
1482                                         tocopy = filter->client_avail;
1483 
1484                               memcpy(filter->next + filter->avail,
1485                                   filter->client_next, tocopy);
1486                               /* Remove this data from client buffer. */
1487                               filter->client_next += tocopy;
1488                               filter->client_avail -= tocopy;
1489                               /* add it to copy buffer. */
1490                               filter->avail += tocopy;
1491                     }
1492           }
1493 }
1494 
1495 /*
1496  * Move the file pointer forward.
1497  */
1498 int64_t
__archive_read_consume(struct archive_read * a,int64_t request)1499 __archive_read_consume(struct archive_read *a, int64_t request)
1500 {
1501           return (__archive_read_filter_consume(a->filter, request));
1502 }
1503 
1504 int64_t
__archive_read_filter_consume(struct archive_read_filter * filter,int64_t request)1505 __archive_read_filter_consume(struct archive_read_filter * filter,
1506     int64_t request)
1507 {
1508           int64_t skipped;
1509 
1510           if (request < 0)
1511                     return ARCHIVE_FATAL;
1512           if (request == 0)
1513                     return 0;
1514 
1515           skipped = advance_file_pointer(filter, request);
1516           if (skipped == request)
1517                     return (skipped);
1518           /* We hit EOF before we satisfied the skip request. */
1519           if (skipped < 0)  /* Map error code to 0 for error message below. */
1520                     skipped = 0;
1521           archive_set_error(&filter->archive->archive,
1522               ARCHIVE_ERRNO_MISC,
1523               "Truncated input file (needed %jd bytes, only %jd available)",
1524               (intmax_t)request, (intmax_t)skipped);
1525           return (ARCHIVE_FATAL);
1526 }
1527 
1528 /*
1529  * Advance the file pointer by the amount requested.
1530  * Returns the amount actually advanced, which may be less than the
1531  * request if EOF is encountered first.
1532  * Returns a negative value if there's an I/O error.
1533  */
1534 static int64_t
advance_file_pointer(struct archive_read_filter * filter,int64_t request)1535 advance_file_pointer(struct archive_read_filter *filter, int64_t request)
1536 {
1537           int64_t bytes_skipped, total_bytes_skipped = 0;
1538           ssize_t bytes_read;
1539           size_t min;
1540 
1541           if (filter->fatal)
1542                     return (-1);
1543 
1544           /* Use up the copy buffer first. */
1545           if (filter->avail > 0) {
1546                     min = (size_t)minimum(request, (int64_t)filter->avail);
1547                     filter->next += min;
1548                     filter->avail -= min;
1549                     request -= min;
1550                     filter->position += min;
1551                     total_bytes_skipped += min;
1552           }
1553 
1554           /* Then use up the client buffer. */
1555           if (filter->client_avail > 0) {
1556                     min = (size_t)minimum(request, (int64_t)filter->client_avail);
1557                     filter->client_next += min;
1558                     filter->client_avail -= min;
1559                     request -= min;
1560                     filter->position += min;
1561                     total_bytes_skipped += min;
1562           }
1563           if (request == 0)
1564                     return (total_bytes_skipped);
1565 
1566           /* If there's an optimized skip function, use it. */
1567           if (filter->can_skip != 0) {
1568                     bytes_skipped = client_skip_proxy(filter, request);
1569                     if (bytes_skipped < 0) {      /* error */
1570                               filter->fatal = 1;
1571                               return (bytes_skipped);
1572                     }
1573                     filter->position += bytes_skipped;
1574                     total_bytes_skipped += bytes_skipped;
1575                     request -= bytes_skipped;
1576                     if (request == 0)
1577                               return (total_bytes_skipped);
1578           }
1579 
1580           /* Use ordinary reads as necessary to complete the request. */
1581           for (;;) {
1582                     bytes_read = (filter->vtable->read)(filter, &filter->client_buff);
1583                     if (bytes_read < 0) {
1584                               filter->client_buff = NULL;
1585                               filter->fatal = 1;
1586                               return (bytes_read);
1587                     }
1588 
1589                     if (bytes_read == 0) {
1590                               if (filter->archive->client.cursor !=
1591                                     filter->archive->client.nodes - 1) {
1592                                         if (client_switch_proxy(filter,
1593                                             filter->archive->client.cursor + 1)
1594                                             == ARCHIVE_OK)
1595                                                   continue;
1596                               }
1597                               filter->client_buff = NULL;
1598                               filter->end_of_file = 1;
1599                               return (total_bytes_skipped);
1600                     }
1601 
1602                     if (bytes_read >= request) {
1603                               filter->client_next =
1604                                   ((const char *)filter->client_buff) + request;
1605                               filter->client_avail = (size_t)(bytes_read - request);
1606                               filter->client_total = bytes_read;
1607                               total_bytes_skipped += request;
1608                               filter->position += request;
1609                               return (total_bytes_skipped);
1610                     }
1611 
1612                     filter->position += bytes_read;
1613                     total_bytes_skipped += bytes_read;
1614                     request -= bytes_read;
1615           }
1616 }
1617 
1618 /**
1619  * Returns ARCHIVE_FAILED if seeking isn't supported.
1620  */
1621 int64_t
__archive_read_seek(struct archive_read * a,int64_t offset,int whence)1622 __archive_read_seek(struct archive_read *a, int64_t offset, int whence)
1623 {
1624           return __archive_read_filter_seek(a->filter, offset, whence);
1625 }
1626 
1627 int64_t
__archive_read_filter_seek(struct archive_read_filter * filter,int64_t offset,int whence)1628 __archive_read_filter_seek(struct archive_read_filter *filter, int64_t offset,
1629     int whence)
1630 {
1631           struct archive_read_client *client;
1632           int64_t r;
1633           unsigned int cursor;
1634 
1635           if (filter->closed || filter->fatal)
1636                     return (ARCHIVE_FATAL);
1637           if (filter->can_seek == 0)
1638                     return (ARCHIVE_FAILED);
1639 
1640           client = &(filter->archive->client);
1641           switch (whence) {
1642           case SEEK_CUR:
1643                     /* Adjust the offset and use SEEK_SET instead */
1644                     offset += filter->position;
1645                     __LA_FALLTHROUGH;
1646           case SEEK_SET:
1647                     cursor = 0;
1648                     while (1)
1649                     {
1650                               if (client->dataset[cursor].begin_position < 0 ||
1651                                   client->dataset[cursor].total_size < 0 ||
1652                                   client->dataset[cursor].begin_position +
1653                                     client->dataset[cursor].total_size - 1 > offset ||
1654                                   cursor + 1 >= client->nodes)
1655                                         break;
1656                               r = client->dataset[cursor].begin_position +
1657                                         client->dataset[cursor].total_size;
1658                               client->dataset[++cursor].begin_position = r;
1659                     }
1660                     while (1) {
1661                               r = client_switch_proxy(filter, cursor);
1662                               if (r != ARCHIVE_OK)
1663                                         return r;
1664                               if ((r = client_seek_proxy(filter, 0, SEEK_END)) < 0)
1665                                         return r;
1666                               client->dataset[cursor].total_size = r;
1667                               if (client->dataset[cursor].begin_position +
1668                                   client->dataset[cursor].total_size - 1 > offset ||
1669                                   cursor + 1 >= client->nodes)
1670                                         break;
1671                               r = client->dataset[cursor].begin_position +
1672                                         client->dataset[cursor].total_size;
1673                               client->dataset[++cursor].begin_position = r;
1674                     }
1675                     offset -= client->dataset[cursor].begin_position;
1676                     if (offset < 0
1677                         || offset > client->dataset[cursor].total_size)
1678                               return ARCHIVE_FATAL;
1679                     if ((r = client_seek_proxy(filter, offset, SEEK_SET)) < 0)
1680                               return r;
1681                     break;
1682 
1683           case SEEK_END:
1684                     cursor = 0;
1685                     while (1) {
1686                               if (client->dataset[cursor].begin_position < 0 ||
1687                                   client->dataset[cursor].total_size < 0 ||
1688                                   cursor + 1 >= client->nodes)
1689                                         break;
1690                               r = client->dataset[cursor].begin_position +
1691                                         client->dataset[cursor].total_size;
1692                               client->dataset[++cursor].begin_position = r;
1693                     }
1694                     while (1) {
1695                               r = client_switch_proxy(filter, cursor);
1696                               if (r != ARCHIVE_OK)
1697                                         return r;
1698                               if ((r = client_seek_proxy(filter, 0, SEEK_END)) < 0)
1699                                         return r;
1700                               client->dataset[cursor].total_size = r;
1701                               r = client->dataset[cursor].begin_position +
1702                                         client->dataset[cursor].total_size;
1703                               if (cursor + 1 >= client->nodes)
1704                                         break;
1705                               client->dataset[++cursor].begin_position = r;
1706                     }
1707                     while (1) {
1708                               if (r + offset >=
1709                                   client->dataset[cursor].begin_position)
1710                                         break;
1711                               offset += client->dataset[cursor].total_size;
1712                               if (cursor == 0)
1713                                         break;
1714                               cursor--;
1715                               r = client->dataset[cursor].begin_position +
1716                                         client->dataset[cursor].total_size;
1717                     }
1718                     offset = (r + offset) - client->dataset[cursor].begin_position;
1719                     if ((r = client_switch_proxy(filter, cursor)) != ARCHIVE_OK)
1720                               return r;
1721                     r = client_seek_proxy(filter, offset, SEEK_SET);
1722                     if (r < ARCHIVE_OK)
1723                               return r;
1724                     break;
1725 
1726           default:
1727                     return (ARCHIVE_FATAL);
1728           }
1729           r += client->dataset[cursor].begin_position;
1730 
1731           if (r >= 0) {
1732                     /*
1733                      * Ouch.  Clearing the buffer like this hurts, especially
1734                      * at bid time.  A lot of our efficiency at bid time comes
1735                      * from having bidders reuse the data we've already read.
1736                      *
1737                      * TODO: If the seek request is in data we already
1738                      * have, then don't call the seek callback.
1739                      *
1740                      * TODO: Zip seeks to end-of-file at bid time.  If
1741                      * other formats also start doing this, we may need to
1742                      * find a way for clients to fudge the seek offset to
1743                      * a block boundary.
1744                      *
1745                      * Hmmm... If whence was SEEK_END, we know the file
1746                      * size is (r - offset).  Can we use that to simplify
1747                      * the TODO items above?
1748                      */
1749                     filter->avail = filter->client_avail = 0;
1750                     filter->next = filter->buffer;
1751                     filter->position = r;
1752                     filter->end_of_file = 0;
1753           }
1754           return r;
1755 }
1756