1 /*        $NetBSD: http.c,v 1.6 2024/09/01 15:07:31 christos Exp $    */
2 /*-
3  * Copyright (c) 2000-2004 Dag-Erling Co�dan Sm�rgrav
4  * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org>
5  * Copyright (c) 2008, 2009 Joerg Sonnenberger <joerg@NetBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer
13  *    in this position and unchanged.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. The name of the author may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * $FreeBSD: http.c,v 1.83 2008/02/06 11:39:55 des Exp $
32  */
33 
34 /*
35  * The following copyright applies to the base64 code:
36  *
37  *-
38  * Copyright 1997 Massachusetts Institute of Technology
39  *
40  * Permission to use, copy, modify, and distribute this software and
41  * its documentation for any purpose and without fee is hereby
42  * granted, provided that both the above copyright notice and this
43  * permission notice appear in all copies, that both the above
44  * copyright notice and this permission notice appear in all
45  * supporting documentation, and that the name of M.I.T. not be used
46  * in advertising or publicity pertaining to distribution of the
47  * software without specific, written prior permission.  M.I.T. makes
48  * no representations about the suitability of this software for any
49  * purpose.  It is provided "as is" without express or implied
50  * warranty.
51  *
52  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
53  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
54  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
55  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
56  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
59  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
60  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
61  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
62  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  */
65 
66 #if defined(__linux__) || defined(__MINT__) || defined(__FreeBSD_kernel__)
67 /* Keep this down to Linux or MiNT, it can create surprises elsewhere. */
68 /*
69    __FreeBSD_kernel__ is defined for GNU/kFreeBSD.
70    See http://glibc-bsd.alioth.debian.org/porting/PORTING .
71 */
72 #define _GNU_SOURCE
73 #endif
74 
75 #ifndef _REENTRANT
76 /* Needed for gmtime_r on Interix */
77 #define _REENTRANT
78 #endif
79 
80 #if HAVE_CONFIG_H
81 #include "config.h"
82 #endif
83 #ifndef NETBSD
84 #include <nbcompat.h>
85 #endif
86 
87 #include <sys/types.h>
88 #include <sys/socket.h>
89 
90 #include <ctype.h>
91 #include <errno.h>
92 #include <locale.h>
93 #include <stdarg.h>
94 #ifndef NETBSD
95 #include <nbcompat/stdio.h>
96 #else
97 #include <stdio.h>
98 #endif
99 #include <stdlib.h>
100 #include <string.h>
101 #include <time.h>
102 #include <unistd.h>
103 
104 #include <netinet/in.h>
105 #include <netinet/tcp.h>
106 
107 #ifndef NETBSD
108 #include <nbcompat/netdb.h>
109 #else
110 #include <netdb.h>
111 #endif
112 
113 #include <arpa/inet.h>
114 
115 #include "fetch.h"
116 #include "common.h"
117 #include "httperr.h"
118 
119 /* Maximum number of redirects to follow */
120 #define MAX_REDIRECT 5
121 
122 /* Symbolic names for reply codes we care about */
123 #define HTTP_OK                         200
124 #define HTTP_PARTIAL                    206
125 #define HTTP_MOVED_PERM                 301
126 #define HTTP_MOVED_TEMP                 302
127 #define HTTP_SEE_OTHER                  303
128 #define HTTP_NOT_MODIFIED     304
129 #define HTTP_TEMP_REDIRECT    307
130 #define HTTP_NEED_AUTH                  401
131 #define HTTP_NEED_PROXY_AUTH  407
132 #define HTTP_BAD_RANGE                  416
133 #define HTTP_PROTOCOL_ERROR   999
134 
135 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
136                                   || (xyz) == HTTP_MOVED_TEMP \
137                                   || (xyz) == HTTP_TEMP_REDIRECT \
138                                   || (xyz) == HTTP_SEE_OTHER)
139 
140 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
141 
142 
143 /*****************************************************************************
144  * I/O functions for decoding chunked streams
145  */
146 
147 struct httpio
148 {
149           conn_t              *conn;              /* connection */
150           int                  chunked; /* chunked mode */
151           int                  keep_alive;        /* keep-alive mode */
152           char                *buf;               /* chunk buffer */
153           size_t               bufsize; /* size of chunk buffer */
154           ssize_t              buflen;  /* amount of data currently in buffer */
155           size_t               bufpos;  /* current read offset in buffer */
156           int                  eof;               /* end-of-file flag */
157           int                  error;             /* error flag */
158           size_t               chunksize;         /* remaining size of current chunk */
159           off_t                contentlength;     /* remaining size of the content */
160 };
161 
162 /*
163  * Get next chunk header
164  */
165 static ssize_t
http_new_chunk(struct httpio * io)166 http_new_chunk(struct httpio *io)
167 {
168           char *p;
169 
170           if (fetch_getln(io->conn) == -1)
171                     return (-1);
172 
173           if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf))
174                     return (-1);
175 
176           for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) {
177                     if (*p == ';')
178                               break;
179                     if (!isxdigit((unsigned char)*p))
180                               return (-1);
181                     if (isdigit((unsigned char)*p)) {
182                               io->chunksize = io->chunksize * 16 +
183                                   *p - '0';
184                     } else {
185                               io->chunksize = io->chunksize * 16 +
186                                   10 + tolower((unsigned char)*p) - 'a';
187                     }
188           }
189 
190           return (io->chunksize);
191 }
192 
193 /*
194  * Grow the input buffer to at least len bytes
195  */
196 static int
http_growbuf(struct httpio * io,size_t len)197 http_growbuf(struct httpio *io, size_t len)
198 {
199           char *tmp;
200 
201           if (io->bufsize >= len)
202                     return (0);
203 
204           if ((tmp = realloc(io->buf, len)) == NULL)
205                     return (-1);
206           io->buf = tmp;
207           io->bufsize = len;
208           return (0);
209 }
210 
211 /*
212  * Fill the input buffer, do chunk decoding on the fly
213  */
214 static ssize_t
http_fillbuf(struct httpio * io,size_t len)215 http_fillbuf(struct httpio *io, size_t len)
216 {
217           if (io->error)
218                     return (-1);
219           if (io->eof)
220                     return (0);
221 
222           if (io->contentlength >= 0 && (off_t)len > io->contentlength)
223                     len = io->contentlength;
224 
225           if (io->chunked == 0) {
226                     if (http_growbuf(io, len) == -1)
227                               return (-1);
228                     if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
229                               io->error = 1;
230                               return (-1);
231                     }
232                     if (io->contentlength)
233                               io->contentlength -= io->buflen;
234                     io->bufpos = 0;
235                     return (io->buflen);
236           }
237 
238           if (io->chunksize == 0) {
239                     switch (http_new_chunk(io)) {
240                     case -1:
241                               io->error = 1;
242                               return (-1);
243                     case 0:
244                               io->eof = 1;
245                               if (fetch_getln(io->conn) == -1)
246                                         return (-1);
247                               return (0);
248                     }
249           }
250 
251           if (len > io->chunksize)
252                     len = io->chunksize;
253           if (http_growbuf(io, len) == -1)
254                     return (-1);
255           if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
256                     io->error = 1;
257                     return (-1);
258           }
259           io->chunksize -= io->buflen;
260           if (io->contentlength >= 0)
261                     io->contentlength -= io->buflen;
262 
263           if (io->chunksize == 0) {
264                     char endl[2];
265                     ssize_t len2;
266 
267                     len2 = fetch_read(io->conn, endl, 2);
268                     if (len2 == 1 && fetch_read(io->conn, endl + 1, 1) != 1)
269                               return (-1);
270                     if (len2 == -1 || endl[0] != '\r' || endl[1] != '\n')
271                               return (-1);
272           }
273 
274           io->bufpos = 0;
275 
276           return (io->buflen);
277 }
278 
279 /*
280  * Read function
281  */
282 static ssize_t
http_readfn(void * v,void * buf,size_t len)283 http_readfn(void *v, void *buf, size_t len)
284 {
285           struct httpio *io = (struct httpio *)v;
286           size_t l, pos;
287 
288           if (io->error)
289                     return (-1);
290           if (io->eof)
291                     return (0);
292 
293           for (pos = 0; len > 0; pos += l, len -= l) {
294                     /* empty buffer */
295                     if (!io->buf || (ssize_t)io->bufpos == io->buflen)
296                               if (http_fillbuf(io, len) < 1)
297                                         break;
298                     l = io->buflen - io->bufpos;
299                     if (len < l)
300                               l = len;
301                     memcpy((char *)buf + pos, io->buf + io->bufpos, l);
302                     io->bufpos += l;
303           }
304 
305           if (!pos && io->error)
306                     return (-1);
307           return (pos);
308 }
309 
310 /*
311  * Write function
312  */
313 static ssize_t
http_writefn(void * v,const void * buf,size_t len)314 http_writefn(void *v, const void *buf, size_t len)
315 {
316           struct httpio *io = (struct httpio *)v;
317 
318           return (fetch_write(io->conn, buf, len));
319 }
320 
321 /*
322  * Close function
323  */
324 static void
http_closefn(void * v)325 http_closefn(void *v)
326 {
327           struct httpio *io = (struct httpio *)v;
328 
329           if (io->keep_alive) {
330                     int val;
331 
332                     val = 0;
333                     setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
334                                  (socklen_t)sizeof(val));
335                                 fetch_cache_put(io->conn, fetch_close);
336 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
337                     val = 1;
338                     setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
339                         sizeof(val));
340 #endif
341           } else {
342                     fetch_close(io->conn);
343           }
344 
345           free(io->buf);
346           free(io);
347 }
348 
349 /*
350  * Wrap a file descriptor up
351  */
352 static fetchIO *
http_funopen(conn_t * conn,int chunked,int keep_alive,off_t clength)353 http_funopen(conn_t *conn, int chunked, int keep_alive, off_t clength)
354 {
355           struct httpio *io;
356           fetchIO *f;
357 
358           if ((io = calloc(1, sizeof(*io))) == NULL) {
359                     fetch_syserr();
360                     return (NULL);
361           }
362           io->conn = conn;
363           io->chunked = chunked;
364           io->contentlength = clength;
365           io->keep_alive = keep_alive;
366           f = fetchIO_unopen(io, http_readfn, http_writefn, http_closefn);
367           if (f == NULL) {
368                     fetch_syserr();
369                     free(io);
370                     return (NULL);
371           }
372           return (f);
373 }
374 
375 
376 /*****************************************************************************
377  * Helper functions for talking to the server and parsing its replies
378  */
379 
380 /* Header types */
381 typedef enum {
382           hdr_syserror = -2,
383           hdr_error = -1,
384           hdr_end = 0,
385           hdr_unknown = 1,
386           hdr_connection,
387           hdr_content_length,
388           hdr_content_range,
389           hdr_last_modified,
390           hdr_location,
391           hdr_transfer_encoding,
392           hdr_www_authenticate
393 } hdr_t;
394 
395 /* Names of interesting headers */
396 static struct {
397           hdr_t                num;
398           const char          *name;
399 } hdr_names[] = {
400           { hdr_connection,             "Connection" },
401           { hdr_content_length,                   "Content-Length" },
402           { hdr_content_range,                    "Content-Range" },
403           { hdr_last_modified,                    "Last-Modified" },
404           { hdr_location,                         "Location" },
405           { hdr_transfer_encoding,      "Transfer-Encoding" },
406           { hdr_www_authenticate,                 "WWW-Authenticate" },
407           { hdr_unknown,                          NULL },
408 };
409 
410 /*
411  * Send a formatted line; optionally echo to terminal
412  */
413 LIBFETCH_PRINTFLIKE(2, 3)
414 static int
http_cmd(conn_t * conn,const char * fmt,...)415 http_cmd(conn_t *conn, const char *fmt, ...)
416 {
417           va_list ap;
418           size_t len;
419           char *msg;
420           ssize_t r;
421 
422           va_start(ap, fmt);
423           len = vasprintf(&msg, fmt, ap);
424           va_end(ap);
425 
426           if (msg == NULL) {
427                     errno = ENOMEM;
428                     fetch_syserr();
429                     return (-1);
430           }
431 
432           r = fetch_write(conn, msg, len);
433           free(msg);
434 
435           if (r == -1) {
436                     fetch_syserr();
437                     return (-1);
438           }
439 
440           return (0);
441 }
442 
443 /*
444  * Get and parse status line
445  */
446 static int
http_get_reply(conn_t * conn)447 http_get_reply(conn_t *conn)
448 {
449           char *p;
450 
451           if (fetch_getln(conn) == -1)
452                     return (-1);
453           /*
454            * A valid status line looks like "HTTP/m.n xyz reason" where m
455            * and n are the major and minor protocol version numbers and xyz
456            * is the reply code.
457            * Unfortunately, there are servers out there (NCSA 1.5.1, to name
458            * just one) that do not send a version number, so we can't rely
459            * on finding one, but if we do, insist on it being 1.0 or 1.1.
460            * We don't care about the reason phrase.
461            */
462           if (strncmp(conn->buf, "HTTP", 4) != 0)
463                     return (HTTP_PROTOCOL_ERROR);
464           p = conn->buf + 4;
465           if (*p == '/') {
466                     if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
467                               return (HTTP_PROTOCOL_ERROR);
468                     p += 4;
469           }
470           if (*p != ' ' ||
471               !isdigit((unsigned char)p[1]) ||
472               !isdigit((unsigned char)p[2]) ||
473               !isdigit((unsigned char)p[3]))
474                     return (HTTP_PROTOCOL_ERROR);
475 
476           conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0');
477           return (conn->err);
478 }
479 
480 /*
481  * Check a header; if the type matches the given string, return a pointer
482  * to the beginning of the value.
483  */
484 static const char *
http_match(const char * str,const char * hdr)485 http_match(const char *str, const char *hdr)
486 {
487           while (*str && *hdr &&
488               tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++))
489                     /* nothing */;
490           if (*str || *hdr != ':')
491                     return (NULL);
492           while (*hdr && isspace((unsigned char)*++hdr))
493                     /* nothing */;
494           return (hdr);
495 }
496 
497 /*
498  * Get the next header and return the appropriate symbolic code.
499  */
500 static hdr_t
http_next_header(conn_t * conn,const char ** p)501 http_next_header(conn_t *conn, const char **p)
502 {
503           int i;
504 
505           if (fetch_getln(conn) == -1)
506                     return (hdr_syserror);
507           while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1]))
508                     conn->buflen--;
509           conn->buf[conn->buflen] = '\0';
510           if (conn->buflen == 0)
511                     return (hdr_end);
512           /*
513            * We could check for malformed headers but we don't really care.
514            * A valid header starts with a token immediately followed by a
515            * colon; a token is any sequence of non-control, non-whitespace
516            * characters except "()<>@,;:\\\"{}".
517            */
518           for (i = 0; hdr_names[i].num != hdr_unknown; i++)
519                     if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL)
520                               return (hdr_names[i].num);
521           return (hdr_unknown);
522 }
523 
524 /*
525  * Parse a last-modified header
526  */
527 static int
http_parse_mtime(const char * p,time_t * mtime)528 http_parse_mtime(const char *p, time_t *mtime)
529 {
530           struct tm tm;
531           char *r;
532 
533 #ifdef LC_C_LOCALE
534           r = strptime_l(p, "%a, %d %b %Y %H:%M:%S GMT", &tm, LC_C_LOCALE);
535 #else
536           char *locale;
537 
538           locale = strdup(setlocale(LC_TIME, NULL));
539           if (locale == NULL)
540                     return (-1);
541 
542           setlocale(LC_TIME, "C");
543           r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
544           /* XXX should add support for date-2 and date-3 */
545           setlocale(LC_TIME, locale);
546           free(locale);
547 #endif
548           if (r == NULL)
549                     return (-1);
550           *mtime = timegm(&tm);
551           return (0);
552 }
553 
554 /*
555  * Parse a content-length header
556  */
557 static int
http_parse_length(const char * p,off_t * length)558 http_parse_length(const char *p, off_t *length)
559 {
560           off_t len;
561 
562           for (len = 0; *p && isdigit((unsigned char)*p); ++p)
563                     len = len * 10 + (*p - '0');
564           if (*p)
565                     return (-1);
566           *length = len;
567           return (0);
568 }
569 
570 /*
571  * Parse a content-range header
572  */
573 static int
http_parse_range(const char * p,off_t * offset,off_t * length,off_t * size)574 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
575 {
576           off_t first, last, len;
577 
578           if (strncasecmp(p, "bytes ", 6) != 0)
579                     return (-1);
580           p += 6;
581           if (*p == '*') {
582                     first = last = -1;
583                     ++p;
584           } else {
585                     for (first = 0; *p && isdigit((unsigned char)*p); ++p)
586                               first = first * 10 + *p - '0';
587                     if (*p != '-')
588                               return (-1);
589                     for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
590                               last = last * 10 + *p - '0';
591           }
592           if (first > last || *p != '/')
593                     return (-1);
594           for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
595                     len = len * 10 + *p - '0';
596           if (*p || len < last - first + 1)
597                     return (-1);
598           if (first == -1)
599                     *length = 0;
600           else
601                     *length = last - first + 1;
602           *offset = first;
603           *size = len;
604           return (0);
605 }
606 
607 
608 /*****************************************************************************
609  * Helper functions for authorization
610  */
611 
612 /*
613  * Base64 encoding
614  */
615 static char *
http_base64(const char * src)616 http_base64(const char *src)
617 {
618           static const char base64[] =
619               "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
620               "abcdefghijklmnopqrstuvwxyz"
621               "0123456789+/";
622           char *str, *dst;
623           size_t l;
624           unsigned int t, r;
625 
626           l = strlen(src);
627           if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL)
628                     return (NULL);
629           dst = str;
630           r = 0;
631 
632           while (l >= 3) {
633                     t = (src[0] << 16) | (src[1] << 8) | src[2];
634                     dst[0] = base64[(t >> 18) & 0x3f];
635                     dst[1] = base64[(t >> 12) & 0x3f];
636                     dst[2] = base64[(t >> 6) & 0x3f];
637                     dst[3] = base64[(t >> 0) & 0x3f];
638                     src += 3; l -= 3;
639                     dst += 4; r += 4;
640           }
641 
642           switch (l) {
643           case 2:
644                     t = (src[0] << 16) | (src[1] << 8);
645                     dst[0] = base64[(t >> 18) & 0x3f];
646                     dst[1] = base64[(t >> 12) & 0x3f];
647                     dst[2] = base64[(t >> 6) & 0x3f];
648                     dst[3] = '=';
649                     dst += 4;
650                     r += 4;
651                     break;
652           case 1:
653                     t = src[0] << 16;
654                     dst[0] = base64[(t >> 18) & 0x3f];
655                     dst[1] = base64[(t >> 12) & 0x3f];
656                     dst[2] = dst[3] = '=';
657                     dst += 4;
658                     r += 4;
659                     break;
660           case 0:
661                     break;
662           }
663 
664           *dst = 0;
665           return (str);
666 }
667 
668 /*
669  * Encode username and password
670  */
671 static int
http_basic_auth(conn_t * conn,const char * hdr,const char * usr,const char * pwd)672 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
673 {
674           char *upw, *auth;
675           int r;
676 
677           if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
678                     return (-1);
679           auth = http_base64(upw);
680           free(upw);
681           if (auth == NULL)
682                     return (-1);
683           r = http_cmd(conn, "%s: Basic %s\r\n", hdr, auth);
684           free(auth);
685           return (r);
686 }
687 
688 /*
689  * Send an authorization header
690  */
691 static int
http_authorize(conn_t * conn,const char * hdr,const char * p)692 http_authorize(conn_t *conn, const char *hdr, const char *p)
693 {
694           /* basic authorization */
695           if (strncasecmp(p, "basic:", 6) == 0) {
696                     char *user, *pwd, *str;
697                     int r;
698 
699                     /* skip realm */
700                     for (p += 6; *p && *p != ':'; ++p)
701                               /* nothing */ ;
702                     if (!*p || strchr(++p, ':') == NULL)
703                               return (-1);
704                     if ((str = strdup(p)) == NULL)
705                               return (-1); /* XXX */
706                     user = str;
707                     pwd = strchr(str, ':');
708                     *pwd++ = '\0';
709                     r = http_basic_auth(conn, hdr, user, pwd);
710                     free(str);
711                     return (r);
712           }
713           return (-1);
714 }
715 
716 
717 /*****************************************************************************
718  * Helper functions for connecting to a server or proxy
719  */
720 
721 /*
722  * Connect to the correct HTTP server or proxy.
723  */
724 static conn_t *
http_connect(struct url * URL,struct url * purl,const char * flags,int * cached)725 http_connect(struct url *URL, struct url *purl, const char *flags, int *cached)
726 {
727           struct url *curl;
728           conn_t *conn;
729           hdr_t h;
730           const char *p;
731           int af, verbose;
732 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
733           int val;
734 #endif
735 
736           *cached = 0;
737 
738 #ifdef INET6
739           af = AF_UNSPEC;
740 #else
741           af = AF_INET;
742 #endif
743 
744           verbose = CHECK_FLAG('v');
745           if (CHECK_FLAG('4'))
746                     af = AF_INET;
747 #ifdef INET6
748           else if (CHECK_FLAG('6'))
749                     af = AF_INET6;
750 #endif
751 
752           curl = (purl != NULL) ? purl : URL;
753           if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
754                     URL = purl;
755           } else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
756                     /* can't talk http to an ftp server */
757                     /* XXX should set an error code */
758                     return (NULL);
759           }
760 
761           if ((conn = fetch_cache_get(curl, af)) != NULL) {
762                     *cached = 1;
763                     return (conn);
764           }
765 
766           if ((conn = fetch_connect(curl, af, verbose)) == NULL)
767                     /* fetch_connect() has already set an error code */
768                     return (NULL);
769           if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 && purl) {
770                     http_cmd(conn, "CONNECT %s:%d HTTP/1.1\r\n",
771                                         URL->host, URL->port);
772                     http_cmd(conn, "Host: %s:%d\r\n",
773                                         URL->host, URL->port);
774                     /* proxy authorization */
775                     if (*purl->user || *purl->pwd)
776                               http_basic_auth(conn, "Proxy-Authorization",
777                                   purl->user, purl->pwd);
778                     else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
779                               http_authorize(conn, "Proxy-Authorization", p);
780                     http_cmd(conn, "\r\n");
781                     if (http_get_reply(conn) != HTTP_OK) {
782                               http_seterr(conn->err);
783                               goto ouch;
784                     }
785                     /* Read and discard the rest of the proxy response (if any) */
786                     do {
787                               switch ((h = http_next_header(conn, &p))) {
788                               case hdr_syserror:
789                                         fetch_syserr();
790                                         goto ouch;
791                               case hdr_error:
792                                         http_seterr(HTTP_PROTOCOL_ERROR);
793                                         goto ouch;
794                               default:
795                                         /* ignore */ ;
796                               }
797                     } while (h > hdr_end);
798           }
799           if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 &&
800               fetch_ssl(conn, URL, verbose) == -1) {
801                     /* grrr */
802 #ifdef EAUTH
803                     errno = EAUTH;
804 #else
805                     errno = EPERM;
806 #endif
807                     goto ouch;
808           }
809 
810 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
811           val = 1;
812           setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val));
813 #endif
814 
815           return (conn);
816 ouch:
817           fetch_close(conn);
818           return (NULL);
819 }
820 
821 static struct url *
http_get_proxy(struct url * url,const char * flags)822 http_get_proxy(struct url * url, const char *flags)
823 {
824           struct url *purl;
825           char *p;
826 
827           if (flags != NULL && strchr(flags, 'd') != NULL)
828                     return (NULL);
829           if (fetch_no_proxy_match(url->host))
830                     return (NULL);
831           if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
832               *p && (purl = fetchParseURL(p))) {
833                     if (!*purl->scheme)
834                               strcpy(purl->scheme, SCHEME_HTTP);
835                     if (!purl->port)
836                               purl->port = fetch_default_proxy_port(purl->scheme);
837                     if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
838                               return (purl);
839                     fetchFreeURL(purl);
840           }
841           return (NULL);
842 }
843 
844 static void
set_if_modified_since(conn_t * conn,time_t last_modified)845 set_if_modified_since(conn_t *conn, time_t last_modified)
846 {
847           static const char weekdays[] = "SunMonTueWedThuFriSat";
848           static const char months[] = "JanFebMarAprMayJunJulAugSepOctNovDec";
849           struct tm tm;
850           char buf[80];
851           gmtime_r(&last_modified, &tm);
852           snprintf(buf, sizeof(buf), "%.3s, %02d %.3s %4ld %02d:%02d:%02d GMT",
853               weekdays + tm.tm_wday * 3, tm.tm_mday, months + tm.tm_mon * 3,
854               (long)tm.tm_year + 1900, tm.tm_hour, tm.tm_min, tm.tm_sec);
855           http_cmd(conn, "If-Modified-Since: %s\r\n", buf);
856 }
857 
858 
859 /*****************************************************************************
860  * Core
861  */
862 
863 /*
864  * Send a request and process the reply
865  *
866  * XXX This function is way too long, the do..while loop should be split
867  * XXX off into a separate function.
868  */
869 fetchIO *
http_request(struct url * URL,const char * op,struct url_stat * us,struct url * purl,const char * flags)870 http_request(struct url *URL, const char *op, struct url_stat *us,
871     struct url *purl, const char *flags)
872 {
873           conn_t *conn;
874           struct url *url, *new;
875           int chunked, direct, if_modified_since, need_auth, noredirect;
876           int keep_alive, verbose, cached;
877           int e, i, n, val;
878           off_t offset, clength, length, size;
879           time_t mtime;
880           const char *p;
881           fetchIO *f;
882           hdr_t h;
883           char hbuf[URL_HOSTLEN + 7], *host;
884 
885           direct = CHECK_FLAG('d');
886           noredirect = CHECK_FLAG('A');
887           verbose = CHECK_FLAG('v');
888           if_modified_since = CHECK_FLAG('i');
889           keep_alive = 0;
890 
891           if (direct && purl) {
892                     fetchFreeURL(purl);
893                     purl = NULL;
894           }
895 
896           /* try the provided URL first */
897           url = URL;
898 
899           /* if the A flag is set, we only get one try */
900           n = noredirect ? 1 : MAX_REDIRECT;
901           i = 0;
902 
903           e = HTTP_PROTOCOL_ERROR;
904           need_auth = 0;
905           do {
906                     new = NULL;
907                     chunked = 0;
908                     offset = 0;
909                     clength = -1;
910                     length = -1;
911                     size = -1;
912                     mtime = 0;
913 
914                     /* check port */
915                     if (!url->port)
916                               url->port = fetch_default_port(url->scheme);
917 
918                     /* were we redirected to an FTP URL? */
919                     if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
920                               if (strcmp(op, "GET") == 0)
921                                         return (ftp_request(url, "RETR", NULL, us, purl, flags));
922                               else if (strcmp(op, "HEAD") == 0)
923                                         return (ftp_request(url, "STAT", NULL, us, purl, flags));
924                     }
925 
926                     /* connect to server or proxy */
927                     if ((conn = http_connect(url, purl, flags, &cached)) == NULL)
928                               goto ouch;
929 
930                     host = url->host;
931 #ifdef INET6
932                     if (strchr(url->host, ':')) {
933                               snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
934                               host = hbuf;
935                     }
936 #endif
937                     if (url->port != fetch_default_port(url->scheme)) {
938                               if (host != hbuf) {
939                                         strcpy(hbuf, host);
940                                         host = hbuf;
941                               }
942                               snprintf(hbuf + strlen(hbuf),
943                                   sizeof(hbuf) - strlen(hbuf), ":%d", url->port);
944                     }
945 
946                     /* send request */
947                     if (verbose)
948                               fetch_info("requesting %s://%s%s",
949                                   url->scheme, host, url->doc);
950                     if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
951                               http_cmd(conn, "%s %s://%s%s HTTP/1.1\r\n",
952                                   op, url->scheme, host, url->doc);
953                     } else {
954                               http_cmd(conn, "%s %s HTTP/1.1\r\n",
955                                   op, url->doc);
956                     }
957 
958                     if (if_modified_since && url->last_modified > 0)
959                               set_if_modified_since(conn, url->last_modified);
960 
961                     /* virtual host */
962                     http_cmd(conn, "Host: %s\r\n", host);
963 
964                     /* proxy authorization */
965                     if (purl) {
966                               if (*purl->user || *purl->pwd)
967                                         http_basic_auth(conn, "Proxy-Authorization",
968                                             purl->user, purl->pwd);
969                               else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
970                                         http_authorize(conn, "Proxy-Authorization", p);
971                     }
972 
973                     /* server authorization */
974                     if (need_auth || *url->user || *url->pwd) {
975                               if (*url->user || *url->pwd)
976                                         http_basic_auth(conn, "Authorization", url->user, url->pwd);
977                               else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
978                                         http_authorize(conn, "Authorization", p);
979                               else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
980                                         http_basic_auth(conn, "Authorization", url->user, url->pwd);
981                               } else {
982                                         http_seterr(HTTP_NEED_AUTH);
983                                         goto ouch;
984                               }
985                     }
986 
987                     /* other headers */
988                     if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') {
989                               if (strcasecmp(p, "auto") == 0)
990                                         http_cmd(conn, "Referer: %s://%s%s\r\n",
991                                             url->scheme, host, url->doc);
992                               else
993                                         http_cmd(conn, "Referer: %s\r\n", p);
994                     }
995                     if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
996                               http_cmd(conn, "User-Agent: %s\r\n", p);
997                     else
998                               http_cmd(conn, "User-Agent: %s\r\n", _LIBFETCH_VER);
999                     if (url->offset > 0)
1000                               http_cmd(conn, "Range: bytes=%lld-\r\n", (long long)url->offset);
1001                     http_cmd(conn, "\r\n");
1002 
1003                     /*
1004                      * Force the queued request to be dispatched.  Normally, one
1005                      * would do this with shutdown(2) but squid proxies can be
1006                      * configured to disallow such half-closed connections.  To
1007                      * be compatible with such configurations, fiddle with socket
1008                      * options to force the pending data to be written.
1009                      */
1010 #if defined(TCP_NOPUSH) && !defined(__APPLE__)
1011                     val = 0;
1012                     setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
1013                                  sizeof(val));
1014 #endif
1015                     val = 1;
1016                     setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
1017                         (socklen_t)sizeof(val));
1018 
1019                     /* get reply */
1020                     switch (http_get_reply(conn)) {
1021                     case HTTP_OK:
1022                     case HTTP_PARTIAL:
1023                     case HTTP_NOT_MODIFIED:
1024                               /* fine */
1025                               break;
1026                     case HTTP_MOVED_PERM:
1027                     case HTTP_MOVED_TEMP:
1028                     case HTTP_SEE_OTHER:
1029                               /*
1030                                * Not so fine, but we still have to read the
1031                                * headers to get the new location.
1032                                */
1033                               break;
1034                     case HTTP_NEED_AUTH:
1035                               if (need_auth) {
1036                                         /*
1037                                          * We already sent out authorization code,
1038                                          * so there's nothing more we can do.
1039                                          */
1040                                         http_seterr(conn->err);
1041                                         goto ouch;
1042                               }
1043                               /* try again, but send the password this time */
1044                               if (verbose)
1045                                         fetch_info("server requires authorization");
1046                               break;
1047                     case HTTP_NEED_PROXY_AUTH:
1048                               /*
1049                                * If we're talking to a proxy, we already sent
1050                                * our proxy authorization code, so there's
1051                                * nothing more we can do.
1052                                */
1053                               http_seterr(conn->err);
1054                               goto ouch;
1055                     case HTTP_BAD_RANGE:
1056                               /*
1057                                * This can happen if we ask for 0 bytes because
1058                                * we already have the whole file.  Consider this
1059                                * a success for now, and check sizes later.
1060                                */
1061                               break;
1062                     case HTTP_PROTOCOL_ERROR:
1063                               /* fall through */
1064                     case -1:
1065                               --i;
1066                               if (cached)
1067                                         continue;
1068                               fetch_syserr();
1069                               goto ouch;
1070                     default:
1071                               http_seterr(conn->err);
1072                               if (!verbose)
1073                                         goto ouch;
1074                               /* fall through so we can get the full error message */
1075                     }
1076 
1077                     /* get headers */
1078                     do {
1079                               switch ((h = http_next_header(conn, &p))) {
1080                               case hdr_syserror:
1081                                         fetch_syserr();
1082                                         goto ouch;
1083                               case hdr_error:
1084                                         http_seterr(HTTP_PROTOCOL_ERROR);
1085                                         goto ouch;
1086                               case hdr_connection:
1087                                         /* XXX too weak? */
1088                                         keep_alive = (strcasecmp(p, "keep-alive") == 0);
1089                                         break;
1090                               case hdr_content_length:
1091                                         http_parse_length(p, &clength);
1092                                         break;
1093                               case hdr_content_range:
1094                                         http_parse_range(p, &offset, &length, &size);
1095                                         break;
1096                               case hdr_last_modified:
1097                                         http_parse_mtime(p, &mtime);
1098                                         break;
1099                               case hdr_location:
1100                                         if (!HTTP_REDIRECT(conn->err))
1101                                                   break;
1102                                         if (new)
1103                                                   free(new);
1104                                         if (verbose)
1105                                                   fetch_info("%d redirect to %s", conn->err, p);
1106                                         if (*p == '/')
1107                                                   /* absolute path */
1108                                                   new = fetchMakeURL(url->scheme, url->host, url->port, p,
1109                                                       url->user, url->pwd);
1110                                         else
1111                                                   new = fetchParseURL(p);
1112                                         if (new == NULL) {
1113                                                   /* XXX should set an error code */
1114                                                   goto ouch;
1115                                         }
1116                                         if (!*new->user && !*new->pwd) {
1117                                                   strcpy(new->user, url->user);
1118                                                   strcpy(new->pwd, url->pwd);
1119                                         }
1120                                         new->offset = url->offset;
1121                                         new->length = url->length;
1122                                         break;
1123                               case hdr_transfer_encoding:
1124                                         /* XXX weak test*/
1125                                         chunked = (strcasecmp(p, "chunked") == 0);
1126                                         break;
1127                               case hdr_www_authenticate:
1128                                         if (conn->err != HTTP_NEED_AUTH)
1129                                                   break;
1130                                         /* if we were smarter, we'd check the method and realm */
1131                                         break;
1132                               case hdr_end:
1133                                         /* fall through */
1134                               case hdr_unknown:
1135                                         /* ignore */
1136                                         break;
1137                               }
1138                     } while (h > hdr_end);
1139 
1140                     /* we need to provide authentication */
1141                     if (conn->err == HTTP_NEED_AUTH) {
1142                               e = conn->err;
1143                               need_auth = 1;
1144                               fetch_close(conn);
1145                               conn = NULL;
1146                               continue;
1147                     }
1148 
1149                     /* requested range not satisfiable */
1150                     if (conn->err == HTTP_BAD_RANGE) {
1151                               if (url->offset == size && url->length == 0) {
1152                                         /* asked for 0 bytes; fake it */
1153                                         offset = url->offset;
1154                                         conn->err = HTTP_OK;
1155                                         break;
1156                               } else {
1157                                         http_seterr(conn->err);
1158                                         goto ouch;
1159                               }
1160                     }
1161 
1162                     /* we have a hit or an error */
1163                     if (conn->err == HTTP_OK ||
1164                         conn->err == HTTP_PARTIAL ||
1165                         conn->err == HTTP_NOT_MODIFIED ||
1166                         HTTP_ERROR(conn->err))
1167                               break;
1168 
1169                     /* all other cases: we got a redirect */
1170                     e = conn->err;
1171                     need_auth = 0;
1172                     fetch_close(conn);
1173                     conn = NULL;
1174                     if (!new)
1175                               break;
1176                     if (url != URL)
1177                               fetchFreeURL(url);
1178                     url = new;
1179           } while (++i < n);
1180 
1181           /* we failed, or ran out of retries */
1182           if (conn == NULL) {
1183                     http_seterr(e);
1184                     goto ouch;
1185           }
1186 
1187           /* check for inconsistencies */
1188           if (clength != -1 && length != -1 && clength != length) {
1189                     http_seterr(HTTP_PROTOCOL_ERROR);
1190                     goto ouch;
1191           }
1192           if (clength == -1)
1193                     clength = length;
1194           if (clength != -1)
1195                     length = offset + clength;
1196           if (length != -1 && size != -1 && length != size) {
1197                     http_seterr(HTTP_PROTOCOL_ERROR);
1198                     goto ouch;
1199           }
1200           if (size == -1)
1201                     size = length;
1202 
1203           /* fill in stats */
1204           if (us) {
1205                     us->size = size;
1206                     us->atime = us->mtime = mtime;
1207           }
1208 
1209           /* too far? */
1210           if (URL->offset > 0 && offset > URL->offset) {
1211                     http_seterr(HTTP_PROTOCOL_ERROR);
1212                     goto ouch;
1213           }
1214 
1215           /* report back real offset and size */
1216           URL->offset = offset;
1217           URL->length = clength;
1218 
1219           if (clength == -1 && !chunked)
1220                     keep_alive = 0;
1221 
1222           if (conn->err == HTTP_NOT_MODIFIED) {
1223                     http_seterr(HTTP_NOT_MODIFIED);
1224                     if (keep_alive) {
1225                               fetch_cache_put(conn, fetch_close);
1226                               conn = NULL;
1227                     }
1228                     goto ouch;
1229           }
1230 
1231           /* wrap it up in a fetchIO */
1232           if ((f = http_funopen(conn, chunked, keep_alive, clength)) == NULL) {
1233                     fetch_syserr();
1234                     goto ouch;
1235           }
1236 
1237           if (url != URL)
1238                     fetchFreeURL(url);
1239           if (purl)
1240                     fetchFreeURL(purl);
1241 
1242           if (HTTP_ERROR(conn->err)) {
1243 
1244                     if (keep_alive) {
1245                               char buf[512];
1246                               do {
1247                               } while (fetchIO_read(f, buf, sizeof(buf)) > 0);
1248                     }
1249 
1250                     fetchIO_close(f);
1251                     f = NULL;
1252           }
1253 
1254           return (f);
1255 
1256 ouch:
1257           if (url != URL)
1258                     fetchFreeURL(url);
1259           if (purl)
1260                     fetchFreeURL(purl);
1261           if (conn != NULL)
1262                     fetch_close(conn);
1263           return (NULL);
1264 }
1265 
1266 
1267 /*****************************************************************************
1268  * Entry points
1269  */
1270 
1271 /*
1272  * Retrieve and stat a file by HTTP
1273  */
1274 fetchIO *
fetchXGetHTTP(struct url * URL,struct url_stat * us,const char * flags)1275 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1276 {
1277           return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags));
1278 }
1279 
1280 /*
1281  * Retrieve a file by HTTP
1282  */
1283 fetchIO *
fetchGetHTTP(struct url * URL,const char * flags)1284 fetchGetHTTP(struct url *URL, const char *flags)
1285 {
1286           return (fetchXGetHTTP(URL, NULL, flags));
1287 }
1288 
1289 /*
1290  * Store a file by HTTP
1291  */
1292 fetchIO *
1293 /*ARGSUSED*/
fetchPutHTTP(struct url * URL __unused,const char * flags __unused)1294 fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
1295 {
1296           fprintf(stderr, "fetchPutHTTP(): not implemented\n");
1297           return (NULL);
1298 }
1299 
1300 /*
1301  * Get an HTTP document's metadata
1302  */
1303 int
fetchStatHTTP(struct url * URL,struct url_stat * us,const char * flags)1304 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1305 {
1306           fetchIO *f;
1307 
1308           f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags);
1309           if (f == NULL)
1310                     return (-1);
1311           fetchIO_close(f);
1312           return (0);
1313 }
1314 
1315 enum http_states {
1316           ST_NONE,
1317           ST_LT,
1318           ST_LTA,
1319           ST_TAGA,
1320           ST_H,
1321           ST_R,
1322           ST_E,
1323           ST_F,
1324           ST_HREF,
1325           ST_HREFQ,
1326           ST_TAG,
1327           ST_TAGAX,
1328           ST_TAGAQ
1329 };
1330 
1331 struct index_parser {
1332           struct url_list *ue;
1333           struct url *url;
1334           enum http_states state;
1335 };
1336 
1337 static ssize_t
parse_index(struct index_parser * parser,const char * buf,size_t len)1338 parse_index(struct index_parser *parser, const char *buf, size_t len)
1339 {
1340           char *end_attr, p = *buf;
1341 
1342           switch (parser->state) {
1343           case ST_NONE:
1344                     /* Plain text, not in markup */
1345                     if (p == '<')
1346                               parser->state = ST_LT;
1347                     return 1;
1348           case ST_LT:
1349                     /* In tag -- "<" already found */
1350                     if (p == '>')
1351                               parser->state = ST_NONE;
1352                     else if (p == 'a' || p == 'A')
1353                               parser->state = ST_LTA;
1354                     else if (!isspace((unsigned char)p))
1355                               parser->state = ST_TAG;
1356                     return 1;
1357           case ST_LTA:
1358                     /* In tag -- "<a" already found */
1359                     if (p == '>')
1360                               parser->state = ST_NONE;
1361                     else if (p == '"')
1362                               parser->state = ST_TAGAQ;
1363                     else if (isspace((unsigned char)p))
1364                               parser->state = ST_TAGA;
1365                     else
1366                               parser->state = ST_TAG;
1367                     return 1;
1368           case ST_TAG:
1369                     /* In tag, but not "<a" -- disregard */
1370                     if (p == '>')
1371                               parser->state = ST_NONE;
1372                     return 1;
1373           case ST_TAGA:
1374                     /* In a-tag -- "<a " already found */
1375                     if (p == '>')
1376                               parser->state = ST_NONE;
1377                     else if (p == '"')
1378                               parser->state = ST_TAGAQ;
1379                     else if (p == 'h' || p == 'H')
1380                               parser->state = ST_H;
1381                     else if (!isspace((unsigned char)p))
1382                               parser->state = ST_TAGAX;
1383                     return 1;
1384           case ST_TAGAX:
1385                     /* In unknown keyword in a-tag */
1386                     if (p == '>')
1387                               parser->state = ST_NONE;
1388                     else if (p == '"')
1389                               parser->state = ST_TAGAQ;
1390                     else if (isspace((unsigned char)p))
1391                               parser->state = ST_TAGA;
1392                     return 1;
1393           case ST_TAGAQ:
1394                     /* In a-tag, unknown argument for keys. */
1395                     if (p == '>')
1396                               parser->state = ST_NONE;
1397                     else if (p == '"')
1398                               parser->state = ST_TAGA;
1399                     return 1;
1400           case ST_H:
1401                     /* In a-tag -- "<a h" already found */
1402                     if (p == '>')
1403                               parser->state = ST_NONE;
1404                     else if (p == '"')
1405                               parser->state = ST_TAGAQ;
1406                     else if (p == 'r' || p == 'R')
1407                               parser->state = ST_R;
1408                     else if (isspace((unsigned char)p))
1409                               parser->state = ST_TAGA;
1410                     else
1411                               parser->state = ST_TAGAX;
1412                     return 1;
1413           case ST_R:
1414                     /* In a-tag -- "<a hr" already found */
1415                     if (p == '>')
1416                               parser->state = ST_NONE;
1417                     else if (p == '"')
1418                               parser->state = ST_TAGAQ;
1419                     else if (p == 'e' || p == 'E')
1420                               parser->state = ST_E;
1421                     else if (isspace((unsigned char)p))
1422                               parser->state = ST_TAGA;
1423                     else
1424                               parser->state = ST_TAGAX;
1425                     return 1;
1426           case ST_E:
1427                     /* In a-tag -- "<a hre" already found */
1428                     if (p == '>')
1429                               parser->state = ST_NONE;
1430                     else if (p == '"')
1431                               parser->state = ST_TAGAQ;
1432                     else if (p == 'f' || p == 'F')
1433                               parser->state = ST_F;
1434                     else if (isspace((unsigned char)p))
1435                               parser->state = ST_TAGA;
1436                     else
1437                               parser->state = ST_TAGAX;
1438                     return 1;
1439           case ST_F:
1440                     /* In a-tag -- "<a href" already found */
1441                     if (p == '>')
1442                               parser->state = ST_NONE;
1443                     else if (p == '"')
1444                               parser->state = ST_TAGAQ;
1445                     else if (p == '=')
1446                               parser->state = ST_HREF;
1447                     else if (!isspace((unsigned char)p))
1448                               parser->state = ST_TAGAX;
1449                     return 1;
1450           case ST_HREF:
1451                     /* In a-tag -- "<a href=" already found */
1452                     if (p == '>')
1453                               parser->state = ST_NONE;
1454                     else if (p == '"')
1455                               parser->state = ST_HREFQ;
1456                     else if (!isspace((unsigned char)p))
1457                               parser->state = ST_TAGA;
1458                     return 1;
1459           case ST_HREFQ:
1460                     /* In href of the a-tag */
1461                     end_attr = memchr(buf, '"', len);
1462                     if (end_attr == NULL)
1463                               return 0;
1464                     *end_attr = '\0';
1465                     parser->state = ST_TAGA;
1466                     if (fetch_add_entry(parser->ue, parser->url, buf, 1))
1467                               return -1;
1468                     return end_attr + 1 - buf;
1469           }
1470           /* NOTREACHED */
1471           abort();
1472 }
1473 
1474 struct http_index_cache {
1475           struct http_index_cache *next;
1476           struct url *location;
1477           struct url_list ue;
1478 };
1479 
1480 static struct http_index_cache *index_cache;
1481 
1482 /*
1483  * List a directory
1484  */
1485 int
1486 /*ARGSUSED*/
fetchListHTTP(struct url_list * ue,struct url * url,const char * pattern __unused,const char * flags)1487 fetchListHTTP(struct url_list *ue, struct url *url, const char *pattern __unused, const char *flags)
1488 {
1489           fetchIO *f;
1490           char buf[2 * PATH_MAX];
1491           size_t buf_len, sum_processed;
1492           ssize_t read_len, processed;
1493           struct index_parser state;
1494           struct http_index_cache *cache = NULL;
1495           int do_cache, ret;
1496 
1497           do_cache = CHECK_FLAG('c');
1498 
1499           if (do_cache) {
1500                     for (cache = index_cache; cache != NULL; cache = cache->next) {
1501                               if (strcmp(cache->location->scheme, url->scheme))
1502                                         continue;
1503                               if (strcmp(cache->location->user, url->user))
1504                                         continue;
1505                               if (strcmp(cache->location->pwd, url->pwd))
1506                                         continue;
1507                               if (strcmp(cache->location->host, url->host))
1508                                         continue;
1509                               if (cache->location->port != url->port)
1510                                         continue;
1511                               if (strcmp(cache->location->doc, url->doc))
1512                                         continue;
1513                               return fetchAppendURLList(ue, &cache->ue);
1514                     }
1515 
1516                     cache = malloc(sizeof(*cache));
1517                     fetchInitURLList(&cache->ue);
1518                     cache->location = fetchCopyURL(url);
1519           }
1520 
1521           f = fetchGetHTTP(url, flags);
1522           if (f == NULL) {
1523                     if (do_cache) {
1524                               fetchFreeURLList(&cache->ue);
1525                               fetchFreeURL(cache->location);
1526                               free(cache);
1527                     }
1528                     return -1;
1529           }
1530 
1531           state.url = url;
1532           state.state = ST_NONE;
1533           if (do_cache) {
1534                     state.ue = &cache->ue;
1535           } else {
1536                     state.ue = ue;
1537           }
1538 
1539           buf_len = 0;
1540 
1541           while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) {
1542                     buf_len += read_len;
1543                     sum_processed = 0;
1544                     do {
1545                               processed = parse_index(&state, buf + sum_processed, buf_len);
1546                               if (processed == -1)
1547                                         break;
1548                               buf_len -= processed;
1549                               sum_processed += processed;
1550                     } while (processed != 0 && buf_len > 0);
1551                     if (processed == -1) {
1552                               read_len = -1;
1553                               break;
1554                     }
1555                     memmove(buf, buf + sum_processed, buf_len);
1556           }
1557 
1558           fetchIO_close(f);
1559 
1560           ret = read_len < 0 ? -1 : 0;
1561 
1562           if (do_cache) {
1563                     if (ret == 0) {
1564                               cache->next = index_cache;
1565                               index_cache = cache;
1566                     }
1567 
1568                     if (fetchAppendURLList(ue, &cache->ue))
1569                               ret = -1;
1570           }
1571 
1572           return ret;
1573 }
1574