15e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee/* wget.c - Simple downloader to get the resource file in HTTP server
25e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee *
35e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee * Copyright 2016 Lipi C.H. Lee <lipisoft@gmail.com>
45e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee *
55e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
65e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. LeeUSE_WGET(NEWTOY(wget, "f:", TOYFLAG_USR|TOYFLAG_BIN))
75e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
85e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Leeconfig WGET
95e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  bool "wget"
105e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  default n
115e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  help
125e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee    usage: wget -f filename URL
135e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee    -f filename: specify the filename to be saved
145e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee    URL: HTTP uniform resource location and only HTTP, not HTTPS
155e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
165e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee    examples:
175e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee      wget -f index.html http://www.example.com
185e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee      wget -f sample.jpg http://www.example.com:8080/sample.jpg
195e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee*/
205e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
215e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee#define FOR_wget
225e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee#include "toys.h"
235e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
245e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. LeeGLOBALS(
255e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  char *filename;
265e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee)
275e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
285e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee// extract hostname from url
2921701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Leestatic unsigned get_hn(const char *url, char *hostname) {
3021701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  unsigned i;
315e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
325e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  for (i = 0; url[i] != '\0' && url[i] != ':' && url[i] != '/'; i++) {
3321701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee    if(i >= 1024) error_exit("too long hostname in URL");
3421701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee    hostname[i] = url[i];
355e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  }
3621701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  hostname[i] = '\0';
375e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
385e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  return i;
395e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee}
405e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
415e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee// extract port number
4221701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Leestatic unsigned get_port(const char *url, char *port, unsigned url_i) {
4321701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  unsigned i;
445e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
4521701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  for (i = 0; url[i] != '\0' && url[i] != '/'; i++, url_i++) {
465e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee    if('0' <= url[i] && url[i] <= '9') port[i] = url[i];
4721701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee    else error_exit("wrong decimal port number");
485e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  }
495e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  if(i <= 6) port[i] = '\0';
5021701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  else error_exit("too long port number");
5121701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee
5221701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  return url_i;
535e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee}
545e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
555e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee// get http infos in URL
5621701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Leestatic void get_info(const char *url, char* hostname, char *port, char *path) {
5721701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  unsigned i = 7, len;
585e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
5921701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  if (strncmp(url, "http://", i)) error_exit("only HTTP support");
6021701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  len = get_hn(url+i, hostname);
615e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  i += len;
625e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
635e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  // get port if exists
645e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  if (url[i] == ':') {
655e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee    i++;
6621701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee    i = get_port(url+i, port, i);
6721701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  } else strcpy(port, "80");
685e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
695e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  // get uri in URL
7021701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  if (url[i] == '\0') strcpy(path, "/");
715e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  else if (url[i] == '/') {
7221701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee    if (strlen(url+i) < 1024) strcpy(path, url+i);
7321701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee    else error_exit("too long path in URL");
7421701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  } else error_exit("wrong URL");
755e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee}
765e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
775e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee// connect to any IPv4 or IPv6 server
785e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Leestatic int conn_svr(const char *hostname, const char *port) {
795e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  struct addrinfo hints, *result, *rp;
805e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  int sock;
815e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
825e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  memset(&hints, 0, sizeof(struct addrinfo));
835e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  hints.ai_family = AF_UNSPEC;
845e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  hints.ai_socktype = SOCK_STREAM;
855e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  hints.ai_flags = 0;
865e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  hints.ai_protocol = 0;
875e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
885e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  if ((errno = getaddrinfo(hostname, port, &hints, &result)))
895e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee    error_exit("getaddrinfo: %s", gai_strerror(errno));
905e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
915e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  // try all address list(IPv4 or IPv6) until success
925e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  for (rp = result; rp; rp = rp->ai_next) {
935e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee    if ((sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol))
945e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee        == -1) {
9521701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee      perror_msg("socket error");
965e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee      continue;
975e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee    }
985e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee    if (connect(sock, rp->ai_addr, rp->ai_addrlen) != -1)
995e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee      break; // succeed in connecting to any server IP
10021701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee    else perror_msg("connect error");
1015e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee    close(sock);
1025e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  }
1035e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  freeaddrinfo(result);
10421701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  if(!rp) error_exit("can't connect");
1055e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
1065e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  return sock;
1075e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee}
1085e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
1095e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee// make HTTP request header field
1105e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Leestatic void mk_fld(char *name, char *value) {
1115e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  strcat(toybuf, name);
1125e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  strcat(toybuf, ": ");
1135e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  strcat(toybuf, value);
1145e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  strcat(toybuf, "\r\n");
1155e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee}
1165e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
1175e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee// get http response body starting address and its length
11821701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Leestatic char *get_body(ssize_t len, ssize_t *body_len) {
11921701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  int i;
1205e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
1215e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  for (i = 0; i < len-4; i++)
1225e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee    if (!strncmp(toybuf+i, "\r\n\r\n", 4)) break;
1235e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
12421701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  *body_len = len - i - 4;
1255e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  return toybuf+i+4;
1265e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee}
1275e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
1285e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Leevoid wget_main(void)
1295e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee{
1305e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  int sock;
1315e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  FILE *fp;
13221701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  ssize_t len, body_len;
13321701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  char *body, *result, *rc, *r_str;
13421701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  char ua[18] = "toybox wget/", ver[6], hostname[1024], port[6], path[1024];
1355e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
1365e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  // TODO extract filename to be saved from URL
13721701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  if (!(toys.optflags & FLAG_f)) help_exit("no filename");
13821701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  if (fopen(TT.filename, "r")) perror_exit("file already exists");
1395e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
14021701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  if(!toys.optargs[0]) help_exit("no URL");
14121701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  get_info(toys.optargs[0], hostname, port, path);
1425e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
14321701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  sock = conn_svr(hostname, port);
1445e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
1455e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  // compose HTTP request
14621701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  sprintf(toybuf, "GET %s HTTP/1.1\r\n", path);
14721701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  mk_fld("Host", hostname);
1485e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  strncpy(ver, TOYBOX_VERSION, 5);
1495e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  strcat(ua, ver);
1505e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  mk_fld("User-Agent", ua);
1515e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  mk_fld("Connection", "close");
1525e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  strcat(toybuf, "\r\n");
1535e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
1545e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  // send the HTTP request
1555e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  len = strlen(toybuf);
15621701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  if (write(sock, toybuf, len) != len) perror_exit("write error");
1575e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
1585e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  // read HTTP response
15921701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  if ((len = read(sock, toybuf, 4096)) == -1) perror_exit("read error");
16021701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  if (!strstr(toybuf, "\r\n\r\n")) error_exit("too long HTTP response");
1615e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  body = get_body(len, &body_len);
1625e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  result = strtok(toybuf, "\r");
1635e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  strtok(result, " ");
1645e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  rc = strtok(NULL, " ");
1655e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  r_str = strtok(NULL, " ");
1665e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
1675e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  // HTTP res code check
1685e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  // TODO handle HTTP 302 Found(Redirection)
16921701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  if (strcmp(rc, "200")) error_exit("res: %s(%s)", rc, r_str);
1705e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee
17121701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  if (!(fp = fopen(TT.filename, "w"))) perror_exit("fopen error");
17221701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  if (fwrite(body, 1, body_len, fp) != body_len)
17321701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee    error_exit("fwrite error");
1745e3d1e35997315df044af3de0633b8298ad63988Lipi C. H. Lee  while ((len = read(sock, toybuf, 4096)) > 0)
17521701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee    if (fwrite(toybuf, 1, len, fp) != len)
17621701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee      error_exit("fwrite error");
17721701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee  if (fclose(fp) == EOF) perror_exit("fclose error");
17821701f1b61f1d92db05062be4ef6fd5989cacba7Lipi Lee}
179