1 /* $NetBSD: http.c,v 1.6 2024/09/01 15:07:31 christos Exp $ */
2 /*-
3 * Copyright (c) 2000-2004 Dag-Erling Co�dan Sm�rgrav
4 * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org>
5 * Copyright (c) 2008, 2009 Joerg Sonnenberger <joerg@NetBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer
13 * in this position and unchanged.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * $FreeBSD: http.c,v 1.83 2008/02/06 11:39:55 des Exp $
32 */
33
34 /*
35 * The following copyright applies to the base64 code:
36 *
37 *-
38 * Copyright 1997 Massachusetts Institute of Technology
39 *
40 * Permission to use, copy, modify, and distribute this software and
41 * its documentation for any purpose and without fee is hereby
42 * granted, provided that both the above copyright notice and this
43 * permission notice appear in all copies, that both the above
44 * copyright notice and this permission notice appear in all
45 * supporting documentation, and that the name of M.I.T. not be used
46 * in advertising or publicity pertaining to distribution of the
47 * software without specific, written prior permission. M.I.T. makes
48 * no representations about the suitability of this software for any
49 * purpose. It is provided "as is" without express or implied
50 * warranty.
51 *
52 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
53 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
54 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
55 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
56 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
59 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
60 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
61 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
62 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 */
65
66 #if defined(__linux__) || defined(__MINT__) || defined(__FreeBSD_kernel__)
67 /* Keep this down to Linux or MiNT, it can create surprises elsewhere. */
68 /*
69 __FreeBSD_kernel__ is defined for GNU/kFreeBSD.
70 See http://glibc-bsd.alioth.debian.org/porting/PORTING .
71 */
72 #define _GNU_SOURCE
73 #endif
74
75 #ifndef _REENTRANT
76 /* Needed for gmtime_r on Interix */
77 #define _REENTRANT
78 #endif
79
80 #if HAVE_CONFIG_H
81 #include "config.h"
82 #endif
83 #ifndef NETBSD
84 #include <nbcompat.h>
85 #endif
86
87 #include <sys/types.h>
88 #include <sys/socket.h>
89
90 #include <ctype.h>
91 #include <errno.h>
92 #include <locale.h>
93 #include <stdarg.h>
94 #ifndef NETBSD
95 #include <nbcompat/stdio.h>
96 #else
97 #include <stdio.h>
98 #endif
99 #include <stdlib.h>
100 #include <string.h>
101 #include <time.h>
102 #include <unistd.h>
103
104 #include <netinet/in.h>
105 #include <netinet/tcp.h>
106
107 #ifndef NETBSD
108 #include <nbcompat/netdb.h>
109 #else
110 #include <netdb.h>
111 #endif
112
113 #include <arpa/inet.h>
114
115 #include "fetch.h"
116 #include "common.h"
117 #include "httperr.h"
118
119 /* Maximum number of redirects to follow */
120 #define MAX_REDIRECT 5
121
122 /* Symbolic names for reply codes we care about */
123 #define HTTP_OK 200
124 #define HTTP_PARTIAL 206
125 #define HTTP_MOVED_PERM 301
126 #define HTTP_MOVED_TEMP 302
127 #define HTTP_SEE_OTHER 303
128 #define HTTP_NOT_MODIFIED 304
129 #define HTTP_TEMP_REDIRECT 307
130 #define HTTP_NEED_AUTH 401
131 #define HTTP_NEED_PROXY_AUTH 407
132 #define HTTP_BAD_RANGE 416
133 #define HTTP_PROTOCOL_ERROR 999
134
135 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
136 || (xyz) == HTTP_MOVED_TEMP \
137 || (xyz) == HTTP_TEMP_REDIRECT \
138 || (xyz) == HTTP_SEE_OTHER)
139
140 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
141
142
143 /*****************************************************************************
144 * I/O functions for decoding chunked streams
145 */
146
147 struct httpio
148 {
149 conn_t *conn; /* connection */
150 int chunked; /* chunked mode */
151 int keep_alive; /* keep-alive mode */
152 char *buf; /* chunk buffer */
153 size_t bufsize; /* size of chunk buffer */
154 ssize_t buflen; /* amount of data currently in buffer */
155 size_t bufpos; /* current read offset in buffer */
156 int eof; /* end-of-file flag */
157 int error; /* error flag */
158 size_t chunksize; /* remaining size of current chunk */
159 off_t contentlength; /* remaining size of the content */
160 };
161
162 /*
163 * Get next chunk header
164 */
165 static ssize_t
http_new_chunk(struct httpio * io)166 http_new_chunk(struct httpio *io)
167 {
168 char *p;
169
170 if (fetch_getln(io->conn) == -1)
171 return (-1);
172
173 if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf))
174 return (-1);
175
176 for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) {
177 if (*p == ';')
178 break;
179 if (!isxdigit((unsigned char)*p))
180 return (-1);
181 if (isdigit((unsigned char)*p)) {
182 io->chunksize = io->chunksize * 16 +
183 *p - '0';
184 } else {
185 io->chunksize = io->chunksize * 16 +
186 10 + tolower((unsigned char)*p) - 'a';
187 }
188 }
189
190 return (io->chunksize);
191 }
192
193 /*
194 * Grow the input buffer to at least len bytes
195 */
196 static int
http_growbuf(struct httpio * io,size_t len)197 http_growbuf(struct httpio *io, size_t len)
198 {
199 char *tmp;
200
201 if (io->bufsize >= len)
202 return (0);
203
204 if ((tmp = realloc(io->buf, len)) == NULL)
205 return (-1);
206 io->buf = tmp;
207 io->bufsize = len;
208 return (0);
209 }
210
211 /*
212 * Fill the input buffer, do chunk decoding on the fly
213 */
214 static ssize_t
http_fillbuf(struct httpio * io,size_t len)215 http_fillbuf(struct httpio *io, size_t len)
216 {
217 if (io->error)
218 return (-1);
219 if (io->eof)
220 return (0);
221
222 if (io->contentlength >= 0 && (off_t)len > io->contentlength)
223 len = io->contentlength;
224
225 if (io->chunked == 0) {
226 if (http_growbuf(io, len) == -1)
227 return (-1);
228 if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
229 io->error = 1;
230 return (-1);
231 }
232 if (io->contentlength)
233 io->contentlength -= io->buflen;
234 io->bufpos = 0;
235 return (io->buflen);
236 }
237
238 if (io->chunksize == 0) {
239 switch (http_new_chunk(io)) {
240 case -1:
241 io->error = 1;
242 return (-1);
243 case 0:
244 io->eof = 1;
245 if (fetch_getln(io->conn) == -1)
246 return (-1);
247 return (0);
248 }
249 }
250
251 if (len > io->chunksize)
252 len = io->chunksize;
253 if (http_growbuf(io, len) == -1)
254 return (-1);
255 if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
256 io->error = 1;
257 return (-1);
258 }
259 io->chunksize -= io->buflen;
260 if (io->contentlength >= 0)
261 io->contentlength -= io->buflen;
262
263 if (io->chunksize == 0) {
264 char endl[2];
265 ssize_t len2;
266
267 len2 = fetch_read(io->conn, endl, 2);
268 if (len2 == 1 && fetch_read(io->conn, endl + 1, 1) != 1)
269 return (-1);
270 if (len2 == -1 || endl[0] != '\r' || endl[1] != '\n')
271 return (-1);
272 }
273
274 io->bufpos = 0;
275
276 return (io->buflen);
277 }
278
279 /*
280 * Read function
281 */
282 static ssize_t
http_readfn(void * v,void * buf,size_t len)283 http_readfn(void *v, void *buf, size_t len)
284 {
285 struct httpio *io = (struct httpio *)v;
286 size_t l, pos;
287
288 if (io->error)
289 return (-1);
290 if (io->eof)
291 return (0);
292
293 for (pos = 0; len > 0; pos += l, len -= l) {
294 /* empty buffer */
295 if (!io->buf || (ssize_t)io->bufpos == io->buflen)
296 if (http_fillbuf(io, len) < 1)
297 break;
298 l = io->buflen - io->bufpos;
299 if (len < l)
300 l = len;
301 memcpy((char *)buf + pos, io->buf + io->bufpos, l);
302 io->bufpos += l;
303 }
304
305 if (!pos && io->error)
306 return (-1);
307 return (pos);
308 }
309
310 /*
311 * Write function
312 */
313 static ssize_t
http_writefn(void * v,const void * buf,size_t len)314 http_writefn(void *v, const void *buf, size_t len)
315 {
316 struct httpio *io = (struct httpio *)v;
317
318 return (fetch_write(io->conn, buf, len));
319 }
320
321 /*
322 * Close function
323 */
324 static void
http_closefn(void * v)325 http_closefn(void *v)
326 {
327 struct httpio *io = (struct httpio *)v;
328
329 if (io->keep_alive) {
330 int val;
331
332 val = 0;
333 setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
334 (socklen_t)sizeof(val));
335 fetch_cache_put(io->conn, fetch_close);
336 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
337 val = 1;
338 setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
339 sizeof(val));
340 #endif
341 } else {
342 fetch_close(io->conn);
343 }
344
345 free(io->buf);
346 free(io);
347 }
348
349 /*
350 * Wrap a file descriptor up
351 */
352 static fetchIO *
http_funopen(conn_t * conn,int chunked,int keep_alive,off_t clength)353 http_funopen(conn_t *conn, int chunked, int keep_alive, off_t clength)
354 {
355 struct httpio *io;
356 fetchIO *f;
357
358 if ((io = calloc(1, sizeof(*io))) == NULL) {
359 fetch_syserr();
360 return (NULL);
361 }
362 io->conn = conn;
363 io->chunked = chunked;
364 io->contentlength = clength;
365 io->keep_alive = keep_alive;
366 f = fetchIO_unopen(io, http_readfn, http_writefn, http_closefn);
367 if (f == NULL) {
368 fetch_syserr();
369 free(io);
370 return (NULL);
371 }
372 return (f);
373 }
374
375
376 /*****************************************************************************
377 * Helper functions for talking to the server and parsing its replies
378 */
379
380 /* Header types */
381 typedef enum {
382 hdr_syserror = -2,
383 hdr_error = -1,
384 hdr_end = 0,
385 hdr_unknown = 1,
386 hdr_connection,
387 hdr_content_length,
388 hdr_content_range,
389 hdr_last_modified,
390 hdr_location,
391 hdr_transfer_encoding,
392 hdr_www_authenticate
393 } hdr_t;
394
395 /* Names of interesting headers */
396 static struct {
397 hdr_t num;
398 const char *name;
399 } hdr_names[] = {
400 { hdr_connection, "Connection" },
401 { hdr_content_length, "Content-Length" },
402 { hdr_content_range, "Content-Range" },
403 { hdr_last_modified, "Last-Modified" },
404 { hdr_location, "Location" },
405 { hdr_transfer_encoding, "Transfer-Encoding" },
406 { hdr_www_authenticate, "WWW-Authenticate" },
407 { hdr_unknown, NULL },
408 };
409
410 /*
411 * Send a formatted line; optionally echo to terminal
412 */
413 LIBFETCH_PRINTFLIKE(2, 3)
414 static int
http_cmd(conn_t * conn,const char * fmt,...)415 http_cmd(conn_t *conn, const char *fmt, ...)
416 {
417 va_list ap;
418 size_t len;
419 char *msg;
420 ssize_t r;
421
422 va_start(ap, fmt);
423 len = vasprintf(&msg, fmt, ap);
424 va_end(ap);
425
426 if (msg == NULL) {
427 errno = ENOMEM;
428 fetch_syserr();
429 return (-1);
430 }
431
432 r = fetch_write(conn, msg, len);
433 free(msg);
434
435 if (r == -1) {
436 fetch_syserr();
437 return (-1);
438 }
439
440 return (0);
441 }
442
443 /*
444 * Get and parse status line
445 */
446 static int
http_get_reply(conn_t * conn)447 http_get_reply(conn_t *conn)
448 {
449 char *p;
450
451 if (fetch_getln(conn) == -1)
452 return (-1);
453 /*
454 * A valid status line looks like "HTTP/m.n xyz reason" where m
455 * and n are the major and minor protocol version numbers and xyz
456 * is the reply code.
457 * Unfortunately, there are servers out there (NCSA 1.5.1, to name
458 * just one) that do not send a version number, so we can't rely
459 * on finding one, but if we do, insist on it being 1.0 or 1.1.
460 * We don't care about the reason phrase.
461 */
462 if (strncmp(conn->buf, "HTTP", 4) != 0)
463 return (HTTP_PROTOCOL_ERROR);
464 p = conn->buf + 4;
465 if (*p == '/') {
466 if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
467 return (HTTP_PROTOCOL_ERROR);
468 p += 4;
469 }
470 if (*p != ' ' ||
471 !isdigit((unsigned char)p[1]) ||
472 !isdigit((unsigned char)p[2]) ||
473 !isdigit((unsigned char)p[3]))
474 return (HTTP_PROTOCOL_ERROR);
475
476 conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0');
477 return (conn->err);
478 }
479
480 /*
481 * Check a header; if the type matches the given string, return a pointer
482 * to the beginning of the value.
483 */
484 static const char *
http_match(const char * str,const char * hdr)485 http_match(const char *str, const char *hdr)
486 {
487 while (*str && *hdr &&
488 tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++))
489 /* nothing */;
490 if (*str || *hdr != ':')
491 return (NULL);
492 while (*hdr && isspace((unsigned char)*++hdr))
493 /* nothing */;
494 return (hdr);
495 }
496
497 /*
498 * Get the next header and return the appropriate symbolic code.
499 */
500 static hdr_t
http_next_header(conn_t * conn,const char ** p)501 http_next_header(conn_t *conn, const char **p)
502 {
503 int i;
504
505 if (fetch_getln(conn) == -1)
506 return (hdr_syserror);
507 while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1]))
508 conn->buflen--;
509 conn->buf[conn->buflen] = '\0';
510 if (conn->buflen == 0)
511 return (hdr_end);
512 /*
513 * We could check for malformed headers but we don't really care.
514 * A valid header starts with a token immediately followed by a
515 * colon; a token is any sequence of non-control, non-whitespace
516 * characters except "()<>@,;:\\\"{}".
517 */
518 for (i = 0; hdr_names[i].num != hdr_unknown; i++)
519 if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL)
520 return (hdr_names[i].num);
521 return (hdr_unknown);
522 }
523
524 /*
525 * Parse a last-modified header
526 */
527 static int
http_parse_mtime(const char * p,time_t * mtime)528 http_parse_mtime(const char *p, time_t *mtime)
529 {
530 struct tm tm;
531 char *r;
532
533 #ifdef LC_C_LOCALE
534 r = strptime_l(p, "%a, %d %b %Y %H:%M:%S GMT", &tm, LC_C_LOCALE);
535 #else
536 char *locale;
537
538 locale = strdup(setlocale(LC_TIME, NULL));
539 if (locale == NULL)
540 return (-1);
541
542 setlocale(LC_TIME, "C");
543 r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
544 /* XXX should add support for date-2 and date-3 */
545 setlocale(LC_TIME, locale);
546 free(locale);
547 #endif
548 if (r == NULL)
549 return (-1);
550 *mtime = timegm(&tm);
551 return (0);
552 }
553
554 /*
555 * Parse a content-length header
556 */
557 static int
http_parse_length(const char * p,off_t * length)558 http_parse_length(const char *p, off_t *length)
559 {
560 off_t len;
561
562 for (len = 0; *p && isdigit((unsigned char)*p); ++p)
563 len = len * 10 + (*p - '0');
564 if (*p)
565 return (-1);
566 *length = len;
567 return (0);
568 }
569
570 /*
571 * Parse a content-range header
572 */
573 static int
http_parse_range(const char * p,off_t * offset,off_t * length,off_t * size)574 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
575 {
576 off_t first, last, len;
577
578 if (strncasecmp(p, "bytes ", 6) != 0)
579 return (-1);
580 p += 6;
581 if (*p == '*') {
582 first = last = -1;
583 ++p;
584 } else {
585 for (first = 0; *p && isdigit((unsigned char)*p); ++p)
586 first = first * 10 + *p - '0';
587 if (*p != '-')
588 return (-1);
589 for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
590 last = last * 10 + *p - '0';
591 }
592 if (first > last || *p != '/')
593 return (-1);
594 for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
595 len = len * 10 + *p - '0';
596 if (*p || len < last - first + 1)
597 return (-1);
598 if (first == -1)
599 *length = 0;
600 else
601 *length = last - first + 1;
602 *offset = first;
603 *size = len;
604 return (0);
605 }
606
607
608 /*****************************************************************************
609 * Helper functions for authorization
610 */
611
612 /*
613 * Base64 encoding
614 */
615 static char *
http_base64(const char * src)616 http_base64(const char *src)
617 {
618 static const char base64[] =
619 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
620 "abcdefghijklmnopqrstuvwxyz"
621 "0123456789+/";
622 char *str, *dst;
623 size_t l;
624 unsigned int t, r;
625
626 l = strlen(src);
627 if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL)
628 return (NULL);
629 dst = str;
630 r = 0;
631
632 while (l >= 3) {
633 t = (src[0] << 16) | (src[1] << 8) | src[2];
634 dst[0] = base64[(t >> 18) & 0x3f];
635 dst[1] = base64[(t >> 12) & 0x3f];
636 dst[2] = base64[(t >> 6) & 0x3f];
637 dst[3] = base64[(t >> 0) & 0x3f];
638 src += 3; l -= 3;
639 dst += 4; r += 4;
640 }
641
642 switch (l) {
643 case 2:
644 t = (src[0] << 16) | (src[1] << 8);
645 dst[0] = base64[(t >> 18) & 0x3f];
646 dst[1] = base64[(t >> 12) & 0x3f];
647 dst[2] = base64[(t >> 6) & 0x3f];
648 dst[3] = '=';
649 dst += 4;
650 r += 4;
651 break;
652 case 1:
653 t = src[0] << 16;
654 dst[0] = base64[(t >> 18) & 0x3f];
655 dst[1] = base64[(t >> 12) & 0x3f];
656 dst[2] = dst[3] = '=';
657 dst += 4;
658 r += 4;
659 break;
660 case 0:
661 break;
662 }
663
664 *dst = 0;
665 return (str);
666 }
667
668 /*
669 * Encode username and password
670 */
671 static int
http_basic_auth(conn_t * conn,const char * hdr,const char * usr,const char * pwd)672 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
673 {
674 char *upw, *auth;
675 int r;
676
677 if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
678 return (-1);
679 auth = http_base64(upw);
680 free(upw);
681 if (auth == NULL)
682 return (-1);
683 r = http_cmd(conn, "%s: Basic %s\r\n", hdr, auth);
684 free(auth);
685 return (r);
686 }
687
688 /*
689 * Send an authorization header
690 */
691 static int
http_authorize(conn_t * conn,const char * hdr,const char * p)692 http_authorize(conn_t *conn, const char *hdr, const char *p)
693 {
694 /* basic authorization */
695 if (strncasecmp(p, "basic:", 6) == 0) {
696 char *user, *pwd, *str;
697 int r;
698
699 /* skip realm */
700 for (p += 6; *p && *p != ':'; ++p)
701 /* nothing */ ;
702 if (!*p || strchr(++p, ':') == NULL)
703 return (-1);
704 if ((str = strdup(p)) == NULL)
705 return (-1); /* XXX */
706 user = str;
707 pwd = strchr(str, ':');
708 *pwd++ = '\0';
709 r = http_basic_auth(conn, hdr, user, pwd);
710 free(str);
711 return (r);
712 }
713 return (-1);
714 }
715
716
717 /*****************************************************************************
718 * Helper functions for connecting to a server or proxy
719 */
720
721 /*
722 * Connect to the correct HTTP server or proxy.
723 */
724 static conn_t *
http_connect(struct url * URL,struct url * purl,const char * flags,int * cached)725 http_connect(struct url *URL, struct url *purl, const char *flags, int *cached)
726 {
727 struct url *curl;
728 conn_t *conn;
729 hdr_t h;
730 const char *p;
731 int af, verbose;
732 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
733 int val;
734 #endif
735
736 *cached = 0;
737
738 #ifdef INET6
739 af = AF_UNSPEC;
740 #else
741 af = AF_INET;
742 #endif
743
744 verbose = CHECK_FLAG('v');
745 if (CHECK_FLAG('4'))
746 af = AF_INET;
747 #ifdef INET6
748 else if (CHECK_FLAG('6'))
749 af = AF_INET6;
750 #endif
751
752 curl = (purl != NULL) ? purl : URL;
753 if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
754 URL = purl;
755 } else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
756 /* can't talk http to an ftp server */
757 /* XXX should set an error code */
758 return (NULL);
759 }
760
761 if ((conn = fetch_cache_get(curl, af)) != NULL) {
762 *cached = 1;
763 return (conn);
764 }
765
766 if ((conn = fetch_connect(curl, af, verbose)) == NULL)
767 /* fetch_connect() has already set an error code */
768 return (NULL);
769 if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 && purl) {
770 http_cmd(conn, "CONNECT %s:%d HTTP/1.1\r\n",
771 URL->host, URL->port);
772 http_cmd(conn, "Host: %s:%d\r\n",
773 URL->host, URL->port);
774 /* proxy authorization */
775 if (*purl->user || *purl->pwd)
776 http_basic_auth(conn, "Proxy-Authorization",
777 purl->user, purl->pwd);
778 else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
779 http_authorize(conn, "Proxy-Authorization", p);
780 http_cmd(conn, "\r\n");
781 if (http_get_reply(conn) != HTTP_OK) {
782 http_seterr(conn->err);
783 goto ouch;
784 }
785 /* Read and discard the rest of the proxy response (if any) */
786 do {
787 switch ((h = http_next_header(conn, &p))) {
788 case hdr_syserror:
789 fetch_syserr();
790 goto ouch;
791 case hdr_error:
792 http_seterr(HTTP_PROTOCOL_ERROR);
793 goto ouch;
794 default:
795 /* ignore */ ;
796 }
797 } while (h > hdr_end);
798 }
799 if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 &&
800 fetch_ssl(conn, URL, verbose) == -1) {
801 /* grrr */
802 #ifdef EAUTH
803 errno = EAUTH;
804 #else
805 errno = EPERM;
806 #endif
807 goto ouch;
808 }
809
810 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
811 val = 1;
812 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val));
813 #endif
814
815 return (conn);
816 ouch:
817 fetch_close(conn);
818 return (NULL);
819 }
820
821 static struct url *
http_get_proxy(struct url * url,const char * flags)822 http_get_proxy(struct url * url, const char *flags)
823 {
824 struct url *purl;
825 char *p;
826
827 if (flags != NULL && strchr(flags, 'd') != NULL)
828 return (NULL);
829 if (fetch_no_proxy_match(url->host))
830 return (NULL);
831 if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
832 *p && (purl = fetchParseURL(p))) {
833 if (!*purl->scheme)
834 strcpy(purl->scheme, SCHEME_HTTP);
835 if (!purl->port)
836 purl->port = fetch_default_proxy_port(purl->scheme);
837 if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
838 return (purl);
839 fetchFreeURL(purl);
840 }
841 return (NULL);
842 }
843
844 static void
set_if_modified_since(conn_t * conn,time_t last_modified)845 set_if_modified_since(conn_t *conn, time_t last_modified)
846 {
847 static const char weekdays[] = "SunMonTueWedThuFriSat";
848 static const char months[] = "JanFebMarAprMayJunJulAugSepOctNovDec";
849 struct tm tm;
850 char buf[80];
851 gmtime_r(&last_modified, &tm);
852 snprintf(buf, sizeof(buf), "%.3s, %02d %.3s %4ld %02d:%02d:%02d GMT",
853 weekdays + tm.tm_wday * 3, tm.tm_mday, months + tm.tm_mon * 3,
854 (long)tm.tm_year + 1900, tm.tm_hour, tm.tm_min, tm.tm_sec);
855 http_cmd(conn, "If-Modified-Since: %s\r\n", buf);
856 }
857
858
859 /*****************************************************************************
860 * Core
861 */
862
863 /*
864 * Send a request and process the reply
865 *
866 * XXX This function is way too long, the do..while loop should be split
867 * XXX off into a separate function.
868 */
869 fetchIO *
http_request(struct url * URL,const char * op,struct url_stat * us,struct url * purl,const char * flags)870 http_request(struct url *URL, const char *op, struct url_stat *us,
871 struct url *purl, const char *flags)
872 {
873 conn_t *conn;
874 struct url *url, *new;
875 int chunked, direct, if_modified_since, need_auth, noredirect;
876 int keep_alive, verbose, cached;
877 int e, i, n, val;
878 off_t offset, clength, length, size;
879 time_t mtime;
880 const char *p;
881 fetchIO *f;
882 hdr_t h;
883 char hbuf[URL_HOSTLEN + 7], *host;
884
885 direct = CHECK_FLAG('d');
886 noredirect = CHECK_FLAG('A');
887 verbose = CHECK_FLAG('v');
888 if_modified_since = CHECK_FLAG('i');
889 keep_alive = 0;
890
891 if (direct && purl) {
892 fetchFreeURL(purl);
893 purl = NULL;
894 }
895
896 /* try the provided URL first */
897 url = URL;
898
899 /* if the A flag is set, we only get one try */
900 n = noredirect ? 1 : MAX_REDIRECT;
901 i = 0;
902
903 e = HTTP_PROTOCOL_ERROR;
904 need_auth = 0;
905 do {
906 new = NULL;
907 chunked = 0;
908 offset = 0;
909 clength = -1;
910 length = -1;
911 size = -1;
912 mtime = 0;
913
914 /* check port */
915 if (!url->port)
916 url->port = fetch_default_port(url->scheme);
917
918 /* were we redirected to an FTP URL? */
919 if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
920 if (strcmp(op, "GET") == 0)
921 return (ftp_request(url, "RETR", NULL, us, purl, flags));
922 else if (strcmp(op, "HEAD") == 0)
923 return (ftp_request(url, "STAT", NULL, us, purl, flags));
924 }
925
926 /* connect to server or proxy */
927 if ((conn = http_connect(url, purl, flags, &cached)) == NULL)
928 goto ouch;
929
930 host = url->host;
931 #ifdef INET6
932 if (strchr(url->host, ':')) {
933 snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
934 host = hbuf;
935 }
936 #endif
937 if (url->port != fetch_default_port(url->scheme)) {
938 if (host != hbuf) {
939 strcpy(hbuf, host);
940 host = hbuf;
941 }
942 snprintf(hbuf + strlen(hbuf),
943 sizeof(hbuf) - strlen(hbuf), ":%d", url->port);
944 }
945
946 /* send request */
947 if (verbose)
948 fetch_info("requesting %s://%s%s",
949 url->scheme, host, url->doc);
950 if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
951 http_cmd(conn, "%s %s://%s%s HTTP/1.1\r\n",
952 op, url->scheme, host, url->doc);
953 } else {
954 http_cmd(conn, "%s %s HTTP/1.1\r\n",
955 op, url->doc);
956 }
957
958 if (if_modified_since && url->last_modified > 0)
959 set_if_modified_since(conn, url->last_modified);
960
961 /* virtual host */
962 http_cmd(conn, "Host: %s\r\n", host);
963
964 /* proxy authorization */
965 if (purl) {
966 if (*purl->user || *purl->pwd)
967 http_basic_auth(conn, "Proxy-Authorization",
968 purl->user, purl->pwd);
969 else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
970 http_authorize(conn, "Proxy-Authorization", p);
971 }
972
973 /* server authorization */
974 if (need_auth || *url->user || *url->pwd) {
975 if (*url->user || *url->pwd)
976 http_basic_auth(conn, "Authorization", url->user, url->pwd);
977 else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
978 http_authorize(conn, "Authorization", p);
979 else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
980 http_basic_auth(conn, "Authorization", url->user, url->pwd);
981 } else {
982 http_seterr(HTTP_NEED_AUTH);
983 goto ouch;
984 }
985 }
986
987 /* other headers */
988 if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') {
989 if (strcasecmp(p, "auto") == 0)
990 http_cmd(conn, "Referer: %s://%s%s\r\n",
991 url->scheme, host, url->doc);
992 else
993 http_cmd(conn, "Referer: %s\r\n", p);
994 }
995 if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
996 http_cmd(conn, "User-Agent: %s\r\n", p);
997 else
998 http_cmd(conn, "User-Agent: %s\r\n", _LIBFETCH_VER);
999 if (url->offset > 0)
1000 http_cmd(conn, "Range: bytes=%lld-\r\n", (long long)url->offset);
1001 http_cmd(conn, "\r\n");
1002
1003 /*
1004 * Force the queued request to be dispatched. Normally, one
1005 * would do this with shutdown(2) but squid proxies can be
1006 * configured to disallow such half-closed connections. To
1007 * be compatible with such configurations, fiddle with socket
1008 * options to force the pending data to be written.
1009 */
1010 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
1011 val = 0;
1012 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
1013 sizeof(val));
1014 #endif
1015 val = 1;
1016 setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
1017 (socklen_t)sizeof(val));
1018
1019 /* get reply */
1020 switch (http_get_reply(conn)) {
1021 case HTTP_OK:
1022 case HTTP_PARTIAL:
1023 case HTTP_NOT_MODIFIED:
1024 /* fine */
1025 break;
1026 case HTTP_MOVED_PERM:
1027 case HTTP_MOVED_TEMP:
1028 case HTTP_SEE_OTHER:
1029 /*
1030 * Not so fine, but we still have to read the
1031 * headers to get the new location.
1032 */
1033 break;
1034 case HTTP_NEED_AUTH:
1035 if (need_auth) {
1036 /*
1037 * We already sent out authorization code,
1038 * so there's nothing more we can do.
1039 */
1040 http_seterr(conn->err);
1041 goto ouch;
1042 }
1043 /* try again, but send the password this time */
1044 if (verbose)
1045 fetch_info("server requires authorization");
1046 break;
1047 case HTTP_NEED_PROXY_AUTH:
1048 /*
1049 * If we're talking to a proxy, we already sent
1050 * our proxy authorization code, so there's
1051 * nothing more we can do.
1052 */
1053 http_seterr(conn->err);
1054 goto ouch;
1055 case HTTP_BAD_RANGE:
1056 /*
1057 * This can happen if we ask for 0 bytes because
1058 * we already have the whole file. Consider this
1059 * a success for now, and check sizes later.
1060 */
1061 break;
1062 case HTTP_PROTOCOL_ERROR:
1063 /* fall through */
1064 case -1:
1065 --i;
1066 if (cached)
1067 continue;
1068 fetch_syserr();
1069 goto ouch;
1070 default:
1071 http_seterr(conn->err);
1072 if (!verbose)
1073 goto ouch;
1074 /* fall through so we can get the full error message */
1075 }
1076
1077 /* get headers */
1078 do {
1079 switch ((h = http_next_header(conn, &p))) {
1080 case hdr_syserror:
1081 fetch_syserr();
1082 goto ouch;
1083 case hdr_error:
1084 http_seterr(HTTP_PROTOCOL_ERROR);
1085 goto ouch;
1086 case hdr_connection:
1087 /* XXX too weak? */
1088 keep_alive = (strcasecmp(p, "keep-alive") == 0);
1089 break;
1090 case hdr_content_length:
1091 http_parse_length(p, &clength);
1092 break;
1093 case hdr_content_range:
1094 http_parse_range(p, &offset, &length, &size);
1095 break;
1096 case hdr_last_modified:
1097 http_parse_mtime(p, &mtime);
1098 break;
1099 case hdr_location:
1100 if (!HTTP_REDIRECT(conn->err))
1101 break;
1102 if (new)
1103 free(new);
1104 if (verbose)
1105 fetch_info("%d redirect to %s", conn->err, p);
1106 if (*p == '/')
1107 /* absolute path */
1108 new = fetchMakeURL(url->scheme, url->host, url->port, p,
1109 url->user, url->pwd);
1110 else
1111 new = fetchParseURL(p);
1112 if (new == NULL) {
1113 /* XXX should set an error code */
1114 goto ouch;
1115 }
1116 if (!*new->user && !*new->pwd) {
1117 strcpy(new->user, url->user);
1118 strcpy(new->pwd, url->pwd);
1119 }
1120 new->offset = url->offset;
1121 new->length = url->length;
1122 break;
1123 case hdr_transfer_encoding:
1124 /* XXX weak test*/
1125 chunked = (strcasecmp(p, "chunked") == 0);
1126 break;
1127 case hdr_www_authenticate:
1128 if (conn->err != HTTP_NEED_AUTH)
1129 break;
1130 /* if we were smarter, we'd check the method and realm */
1131 break;
1132 case hdr_end:
1133 /* fall through */
1134 case hdr_unknown:
1135 /* ignore */
1136 break;
1137 }
1138 } while (h > hdr_end);
1139
1140 /* we need to provide authentication */
1141 if (conn->err == HTTP_NEED_AUTH) {
1142 e = conn->err;
1143 need_auth = 1;
1144 fetch_close(conn);
1145 conn = NULL;
1146 continue;
1147 }
1148
1149 /* requested range not satisfiable */
1150 if (conn->err == HTTP_BAD_RANGE) {
1151 if (url->offset == size && url->length == 0) {
1152 /* asked for 0 bytes; fake it */
1153 offset = url->offset;
1154 conn->err = HTTP_OK;
1155 break;
1156 } else {
1157 http_seterr(conn->err);
1158 goto ouch;
1159 }
1160 }
1161
1162 /* we have a hit or an error */
1163 if (conn->err == HTTP_OK ||
1164 conn->err == HTTP_PARTIAL ||
1165 conn->err == HTTP_NOT_MODIFIED ||
1166 HTTP_ERROR(conn->err))
1167 break;
1168
1169 /* all other cases: we got a redirect */
1170 e = conn->err;
1171 need_auth = 0;
1172 fetch_close(conn);
1173 conn = NULL;
1174 if (!new)
1175 break;
1176 if (url != URL)
1177 fetchFreeURL(url);
1178 url = new;
1179 } while (++i < n);
1180
1181 /* we failed, or ran out of retries */
1182 if (conn == NULL) {
1183 http_seterr(e);
1184 goto ouch;
1185 }
1186
1187 /* check for inconsistencies */
1188 if (clength != -1 && length != -1 && clength != length) {
1189 http_seterr(HTTP_PROTOCOL_ERROR);
1190 goto ouch;
1191 }
1192 if (clength == -1)
1193 clength = length;
1194 if (clength != -1)
1195 length = offset + clength;
1196 if (length != -1 && size != -1 && length != size) {
1197 http_seterr(HTTP_PROTOCOL_ERROR);
1198 goto ouch;
1199 }
1200 if (size == -1)
1201 size = length;
1202
1203 /* fill in stats */
1204 if (us) {
1205 us->size = size;
1206 us->atime = us->mtime = mtime;
1207 }
1208
1209 /* too far? */
1210 if (URL->offset > 0 && offset > URL->offset) {
1211 http_seterr(HTTP_PROTOCOL_ERROR);
1212 goto ouch;
1213 }
1214
1215 /* report back real offset and size */
1216 URL->offset = offset;
1217 URL->length = clength;
1218
1219 if (clength == -1 && !chunked)
1220 keep_alive = 0;
1221
1222 if (conn->err == HTTP_NOT_MODIFIED) {
1223 http_seterr(HTTP_NOT_MODIFIED);
1224 if (keep_alive) {
1225 fetch_cache_put(conn, fetch_close);
1226 conn = NULL;
1227 }
1228 goto ouch;
1229 }
1230
1231 /* wrap it up in a fetchIO */
1232 if ((f = http_funopen(conn, chunked, keep_alive, clength)) == NULL) {
1233 fetch_syserr();
1234 goto ouch;
1235 }
1236
1237 if (url != URL)
1238 fetchFreeURL(url);
1239 if (purl)
1240 fetchFreeURL(purl);
1241
1242 if (HTTP_ERROR(conn->err)) {
1243
1244 if (keep_alive) {
1245 char buf[512];
1246 do {
1247 } while (fetchIO_read(f, buf, sizeof(buf)) > 0);
1248 }
1249
1250 fetchIO_close(f);
1251 f = NULL;
1252 }
1253
1254 return (f);
1255
1256 ouch:
1257 if (url != URL)
1258 fetchFreeURL(url);
1259 if (purl)
1260 fetchFreeURL(purl);
1261 if (conn != NULL)
1262 fetch_close(conn);
1263 return (NULL);
1264 }
1265
1266
1267 /*****************************************************************************
1268 * Entry points
1269 */
1270
1271 /*
1272 * Retrieve and stat a file by HTTP
1273 */
1274 fetchIO *
fetchXGetHTTP(struct url * URL,struct url_stat * us,const char * flags)1275 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1276 {
1277 return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags));
1278 }
1279
1280 /*
1281 * Retrieve a file by HTTP
1282 */
1283 fetchIO *
fetchGetHTTP(struct url * URL,const char * flags)1284 fetchGetHTTP(struct url *URL, const char *flags)
1285 {
1286 return (fetchXGetHTTP(URL, NULL, flags));
1287 }
1288
1289 /*
1290 * Store a file by HTTP
1291 */
1292 fetchIO *
1293 /*ARGSUSED*/
fetchPutHTTP(struct url * URL __unused,const char * flags __unused)1294 fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
1295 {
1296 fprintf(stderr, "fetchPutHTTP(): not implemented\n");
1297 return (NULL);
1298 }
1299
1300 /*
1301 * Get an HTTP document's metadata
1302 */
1303 int
fetchStatHTTP(struct url * URL,struct url_stat * us,const char * flags)1304 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1305 {
1306 fetchIO *f;
1307
1308 f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags);
1309 if (f == NULL)
1310 return (-1);
1311 fetchIO_close(f);
1312 return (0);
1313 }
1314
1315 enum http_states {
1316 ST_NONE,
1317 ST_LT,
1318 ST_LTA,
1319 ST_TAGA,
1320 ST_H,
1321 ST_R,
1322 ST_E,
1323 ST_F,
1324 ST_HREF,
1325 ST_HREFQ,
1326 ST_TAG,
1327 ST_TAGAX,
1328 ST_TAGAQ
1329 };
1330
1331 struct index_parser {
1332 struct url_list *ue;
1333 struct url *url;
1334 enum http_states state;
1335 };
1336
1337 static ssize_t
parse_index(struct index_parser * parser,const char * buf,size_t len)1338 parse_index(struct index_parser *parser, const char *buf, size_t len)
1339 {
1340 char *end_attr, p = *buf;
1341
1342 switch (parser->state) {
1343 case ST_NONE:
1344 /* Plain text, not in markup */
1345 if (p == '<')
1346 parser->state = ST_LT;
1347 return 1;
1348 case ST_LT:
1349 /* In tag -- "<" already found */
1350 if (p == '>')
1351 parser->state = ST_NONE;
1352 else if (p == 'a' || p == 'A')
1353 parser->state = ST_LTA;
1354 else if (!isspace((unsigned char)p))
1355 parser->state = ST_TAG;
1356 return 1;
1357 case ST_LTA:
1358 /* In tag -- "<a" already found */
1359 if (p == '>')
1360 parser->state = ST_NONE;
1361 else if (p == '"')
1362 parser->state = ST_TAGAQ;
1363 else if (isspace((unsigned char)p))
1364 parser->state = ST_TAGA;
1365 else
1366 parser->state = ST_TAG;
1367 return 1;
1368 case ST_TAG:
1369 /* In tag, but not "<a" -- disregard */
1370 if (p == '>')
1371 parser->state = ST_NONE;
1372 return 1;
1373 case ST_TAGA:
1374 /* In a-tag -- "<a " already found */
1375 if (p == '>')
1376 parser->state = ST_NONE;
1377 else if (p == '"')
1378 parser->state = ST_TAGAQ;
1379 else if (p == 'h' || p == 'H')
1380 parser->state = ST_H;
1381 else if (!isspace((unsigned char)p))
1382 parser->state = ST_TAGAX;
1383 return 1;
1384 case ST_TAGAX:
1385 /* In unknown keyword in a-tag */
1386 if (p == '>')
1387 parser->state = ST_NONE;
1388 else if (p == '"')
1389 parser->state = ST_TAGAQ;
1390 else if (isspace((unsigned char)p))
1391 parser->state = ST_TAGA;
1392 return 1;
1393 case ST_TAGAQ:
1394 /* In a-tag, unknown argument for keys. */
1395 if (p == '>')
1396 parser->state = ST_NONE;
1397 else if (p == '"')
1398 parser->state = ST_TAGA;
1399 return 1;
1400 case ST_H:
1401 /* In a-tag -- "<a h" already found */
1402 if (p == '>')
1403 parser->state = ST_NONE;
1404 else if (p == '"')
1405 parser->state = ST_TAGAQ;
1406 else if (p == 'r' || p == 'R')
1407 parser->state = ST_R;
1408 else if (isspace((unsigned char)p))
1409 parser->state = ST_TAGA;
1410 else
1411 parser->state = ST_TAGAX;
1412 return 1;
1413 case ST_R:
1414 /* In a-tag -- "<a hr" already found */
1415 if (p == '>')
1416 parser->state = ST_NONE;
1417 else if (p == '"')
1418 parser->state = ST_TAGAQ;
1419 else if (p == 'e' || p == 'E')
1420 parser->state = ST_E;
1421 else if (isspace((unsigned char)p))
1422 parser->state = ST_TAGA;
1423 else
1424 parser->state = ST_TAGAX;
1425 return 1;
1426 case ST_E:
1427 /* In a-tag -- "<a hre" already found */
1428 if (p == '>')
1429 parser->state = ST_NONE;
1430 else if (p == '"')
1431 parser->state = ST_TAGAQ;
1432 else if (p == 'f' || p == 'F')
1433 parser->state = ST_F;
1434 else if (isspace((unsigned char)p))
1435 parser->state = ST_TAGA;
1436 else
1437 parser->state = ST_TAGAX;
1438 return 1;
1439 case ST_F:
1440 /* In a-tag -- "<a href" already found */
1441 if (p == '>')
1442 parser->state = ST_NONE;
1443 else if (p == '"')
1444 parser->state = ST_TAGAQ;
1445 else if (p == '=')
1446 parser->state = ST_HREF;
1447 else if (!isspace((unsigned char)p))
1448 parser->state = ST_TAGAX;
1449 return 1;
1450 case ST_HREF:
1451 /* In a-tag -- "<a href=" already found */
1452 if (p == '>')
1453 parser->state = ST_NONE;
1454 else if (p == '"')
1455 parser->state = ST_HREFQ;
1456 else if (!isspace((unsigned char)p))
1457 parser->state = ST_TAGA;
1458 return 1;
1459 case ST_HREFQ:
1460 /* In href of the a-tag */
1461 end_attr = memchr(buf, '"', len);
1462 if (end_attr == NULL)
1463 return 0;
1464 *end_attr = '\0';
1465 parser->state = ST_TAGA;
1466 if (fetch_add_entry(parser->ue, parser->url, buf, 1))
1467 return -1;
1468 return end_attr + 1 - buf;
1469 }
1470 /* NOTREACHED */
1471 abort();
1472 }
1473
1474 struct http_index_cache {
1475 struct http_index_cache *next;
1476 struct url *location;
1477 struct url_list ue;
1478 };
1479
1480 static struct http_index_cache *index_cache;
1481
1482 /*
1483 * List a directory
1484 */
1485 int
1486 /*ARGSUSED*/
fetchListHTTP(struct url_list * ue,struct url * url,const char * pattern __unused,const char * flags)1487 fetchListHTTP(struct url_list *ue, struct url *url, const char *pattern __unused, const char *flags)
1488 {
1489 fetchIO *f;
1490 char buf[2 * PATH_MAX];
1491 size_t buf_len, sum_processed;
1492 ssize_t read_len, processed;
1493 struct index_parser state;
1494 struct http_index_cache *cache = NULL;
1495 int do_cache, ret;
1496
1497 do_cache = CHECK_FLAG('c');
1498
1499 if (do_cache) {
1500 for (cache = index_cache; cache != NULL; cache = cache->next) {
1501 if (strcmp(cache->location->scheme, url->scheme))
1502 continue;
1503 if (strcmp(cache->location->user, url->user))
1504 continue;
1505 if (strcmp(cache->location->pwd, url->pwd))
1506 continue;
1507 if (strcmp(cache->location->host, url->host))
1508 continue;
1509 if (cache->location->port != url->port)
1510 continue;
1511 if (strcmp(cache->location->doc, url->doc))
1512 continue;
1513 return fetchAppendURLList(ue, &cache->ue);
1514 }
1515
1516 cache = malloc(sizeof(*cache));
1517 fetchInitURLList(&cache->ue);
1518 cache->location = fetchCopyURL(url);
1519 }
1520
1521 f = fetchGetHTTP(url, flags);
1522 if (f == NULL) {
1523 if (do_cache) {
1524 fetchFreeURLList(&cache->ue);
1525 fetchFreeURL(cache->location);
1526 free(cache);
1527 }
1528 return -1;
1529 }
1530
1531 state.url = url;
1532 state.state = ST_NONE;
1533 if (do_cache) {
1534 state.ue = &cache->ue;
1535 } else {
1536 state.ue = ue;
1537 }
1538
1539 buf_len = 0;
1540
1541 while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) {
1542 buf_len += read_len;
1543 sum_processed = 0;
1544 do {
1545 processed = parse_index(&state, buf + sum_processed, buf_len);
1546 if (processed == -1)
1547 break;
1548 buf_len -= processed;
1549 sum_processed += processed;
1550 } while (processed != 0 && buf_len > 0);
1551 if (processed == -1) {
1552 read_len = -1;
1553 break;
1554 }
1555 memmove(buf, buf + sum_processed, buf_len);
1556 }
1557
1558 fetchIO_close(f);
1559
1560 ret = read_len < 0 ? -1 : 0;
1561
1562 if (do_cache) {
1563 if (ret == 0) {
1564 cache->next = index_cache;
1565 index_cache = cache;
1566 }
1567
1568 if (fetchAppendURLList(ue, &cache->ue))
1569 ret = -1;
1570 }
1571
1572 return ret;
1573 }
1574